From 799900f45a75a194acbc416aa2b993b0fe95fe68 Mon Sep 17 00:00:00 2001
From: tevorbl <38593695+tevorbl@users.noreply.github.com>
Date: Thu, 28 May 2020 13:06:08 +0100
Subject: [PATCH 1/5] update platform linux68k

latest version of musashi engine
includes floating point emulation
(plus a few patches to add in missing opcodes needed by ack - see tags JFF & TBB)

added a few missing linux syscalls in sim.c

pascal now runs pretty well
quick test with modula2 passes

c gets the floating point numbers wrong, so more work needed here

other languages untested

plat/linux68k/emu/build.lua is probably not quite right - the softfloat directory is compiled in the wrong place
---
 Makefile                                      |    4 +-
 plat/linux68k/emu/build.lua                   |   10 +-
 plat/linux68k/emu/m68kconf.h                  |   80 +-
 .../musashi/example/{makefile => Makefile}    |   84 +-
 plat/linux68k/emu/musashi/example/example.txt |  580 +-
 plat/linux68k/emu/musashi/example/m68k.h      |    1 +
 plat/linux68k/emu/musashi/example/m68k_in.c   |    1 +
 plat/linux68k/emu/musashi/example/m68kconf.h  |  412 +-
 plat/linux68k/emu/musashi/example/m68kcpu.c   |    1 +
 plat/linux68k/emu/musashi/example/m68kcpu.h   |    1 +
 plat/linux68k/emu/musashi/example/m68kdasm.c  |    1 +
 plat/linux68k/emu/musashi/example/m68kfpu.c   |    1 +
 plat/linux68k/emu/musashi/example/m68kmake.c  |    1 +
 plat/linux68k/emu/musashi/example/m68kmmu.h   |    1 +
 plat/linux68k/emu/musashi/example/osd.h       |   12 +-
 plat/linux68k/emu/musashi/example/osd_dos.c   |   32 +-
 plat/linux68k/emu/musashi/example/osd_linux.c |   46 +
 plat/linux68k/emu/musashi/example/sim.c       | 1124 ++--
 plat/linux68k/emu/musashi/example/sim.h       |   30 +-
 plat/linux68k/emu/musashi/m68k.h              |   67 +-
 plat/linux68k/emu/musashi/m68k_in.c           | 1648 +++---
 plat/linux68k/emu/musashi/m68kconf.h          |   55 +-
 plat/linux68k/emu/musashi/m68kcpu.c           |  476 +-
 plat/linux68k/emu/musashi/m68kcpu.h           |  758 ++-
 plat/linux68k/emu/musashi/m68kdasm.c          |  609 +-
 plat/linux68k/emu/musashi/m68kfpu.c           | 1819 ++++++
 plat/linux68k/emu/musashi/m68kmake.c          |  187 +-
 plat/linux68k/emu/musashi/m68kmmu.h           |  321 ++
 plat/linux68k/emu/musashi/readme.txt          |   51 +-
 .../linux68k/emu/musashi/softfloat/README.txt |   78 +
 plat/linux68k/emu/musashi/softfloat/mamesf.h  |   61 +
 plat/linux68k/emu/musashi/softfloat/milieu.h  |   42 +
 .../emu/musashi/softfloat/softfloat-macros    |  732 +++
 .../musashi/softfloat/softfloat-specialize    |  470 ++
 .../emu/musashi/softfloat/softfloat.c         | 4940 +++++++++++++++++
 .../emu/musashi/softfloat/softfloat.h         |  460 ++
 plat/linux68k/emu/sim.c                       |   99 +-
 plat/linux68k/emu/sim.h                       |    2 +-
 38 files changed, 12899 insertions(+), 2398 deletions(-)
 rename plat/linux68k/emu/musashi/example/{makefile => Makefile} (67%)
 mode change 100755 => 100644
 create mode 120000 plat/linux68k/emu/musashi/example/m68k.h
 create mode 120000 plat/linux68k/emu/musashi/example/m68k_in.c
 create mode 120000 plat/linux68k/emu/musashi/example/m68kcpu.c
 create mode 120000 plat/linux68k/emu/musashi/example/m68kcpu.h
 create mode 120000 plat/linux68k/emu/musashi/example/m68kdasm.c
 create mode 120000 plat/linux68k/emu/musashi/example/m68kfpu.c
 create mode 120000 plat/linux68k/emu/musashi/example/m68kmake.c
 create mode 120000 plat/linux68k/emu/musashi/example/m68kmmu.h
 create mode 100644 plat/linux68k/emu/musashi/example/osd_linux.c
 create mode 100644 plat/linux68k/emu/musashi/m68kfpu.c
 create mode 100644 plat/linux68k/emu/musashi/m68kmmu.h
 create mode 100644 plat/linux68k/emu/musashi/softfloat/README.txt
 create mode 100644 plat/linux68k/emu/musashi/softfloat/mamesf.h
 create mode 100644 plat/linux68k/emu/musashi/softfloat/milieu.h
 create mode 100644 plat/linux68k/emu/musashi/softfloat/softfloat-macros
 create mode 100644 plat/linux68k/emu/musashi/softfloat/softfloat-specialize
 create mode 100644 plat/linux68k/emu/musashi/softfloat/softfloat.c
 create mode 100644 plat/linux68k/emu/musashi/softfloat/softfloat.h

diff --git a/Makefile b/Makefile
index d8fa04026..616f2651b 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,8 @@ DEFAULT_PLATFORM = pc86
 
 # Where should the ACK put its temporary files?
 
-ACK_TEMP_DIR = /tmp
+#ACK_TEMP_DIR = /tmp
+ACK_TEMP_DIR = tmp
 
 # Where is the ACK going to be installed, eventually? If you don't want to
 # install it and just want to run the ACK from the build directory
@@ -31,6 +32,7 @@ LDFLAGS =
 
 AR = ar
 CC = gcc
+#CC = clang
 
 # Which build system to use; use 'ninja' or 'make' (in lower case). Leave
 # blank to autodetect.
diff --git a/plat/linux68k/emu/build.lua b/plat/linux68k/emu/build.lua
index 3fe02056a..6579ef929 100644
--- a/plat/linux68k/emu/build.lua
+++ b/plat/linux68k/emu/build.lua
@@ -11,15 +11,15 @@ normalrule {
 		"+m68kmake",
 		"./musashi/m68k_in.c",
 		"./musashi/m68kcpu.h",
+		"./musashi/m68kmmu.h",
 		"./m68kconf.h",
 		"./musashi/m68kcpu.c",
+		"./musashi/m68kfpu.c",
 		"./musashi/m68kdasm.c",
 		"./musashi/m68k.h",
+		"./musashi/softfloat",
 	},
 	outleaves = {
-		"m68kopac.c",
-		"m68kopdm.c",
-		"m68kopnz.c",
 		"m68kops.c",
 		"m68kops.h",
 		"m68kcpu.h",
@@ -29,7 +29,7 @@ normalrule {
 		"m68k.h",
 	},
 	commands = {
-		"cp %{ins[2]} %{ins[3]} %{ins[4]} %{ins[5]} %{ins[6]} %{ins[7]} %{dir}",
+		"cp -R %{ins[2]} %{ins[3]} %{ins[4]} %{ins[5]} %{ins[6]} %{ins[7]} %{ins[8]} %{ins[9]} %{ins[10]} %{dir}",
 		"cd %{dir} && %{ins[1]}"
 	}
 }
@@ -50,9 +50,9 @@ cprogram {
 	srcs = {
 		"./sim.c",
 		matching(filenamesof("+m68k_engine"), "%.c$"),
+		"./musashi/softfloat/softfloat.c",
 	},
 	deps = {
 		"+headers",
 	}
 }
-
diff --git a/plat/linux68k/emu/m68kconf.h b/plat/linux68k/emu/m68kconf.h
index c5c9b1aef..023cf006b 100755
--- a/plat/linux68k/emu/m68kconf.h
+++ b/plat/linux68k/emu/m68kconf.h
@@ -3,20 +3,28 @@
 /* ======================================================================== */
 /*
  *                                  MUSASHI
- *                                Version 3.4
+ *                                Version 3.32
  *
  * A portable Motorola M680x0 processor emulation engine.
- * Copyright 1998-2001 Karl Stenerud.  All rights reserved.
+ * Copyright Karl Stenerud.  All rights reserved.
  *
- * This code may be freely used for non-commercial purposes as long as this
- * copyright notice remains unaltered in the source code and any binary files
- * containing this code in compiled form.
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
  *
- * All other lisencing terms must be negotiated with the author
- * (Karl Stenerud).
- *
- * The latest version of this code can be obtained at:
- * http://kstenerud.cjb.net
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
  */
 
 
@@ -59,6 +67,8 @@
 #define M68K_EMULATE_010            OPT_ON
 #define M68K_EMULATE_EC020          OPT_ON
 #define M68K_EMULATE_020            OPT_ON
+#define M68K_EMULATE_030            OPT_ON
+#define M68K_EMULATE_040            OPT_ON
 
 
 /* If ON, the CPU will call m68k_read_immediate_xx() for immediate addressing
@@ -101,6 +111,35 @@
 #define M68K_EMULATE_RESET          OPT_OFF
 #define M68K_RESET_CALLBACK()       cpu_pulse_reset()
 
+/* If ON, CPU will call the callback when it encounters a cmpi.l #v, dn
+ * instruction.
+ */
+#define M68K_CMPILD_HAS_CALLBACK     OPT_OFF
+#define M68K_CMPILD_CALLBACK(v,r)    your_cmpild_handler_function(v,r)
+
+
+/* If ON, CPU will call the callback when it encounters a rte
+ * instruction.
+ */
+#define M68K_RTE_HAS_CALLBACK       OPT_OFF
+#define M68K_RTE_CALLBACK()         your_rte_handler_function()
+
+/* If ON, CPU will call the callback when it encounters a tas
+ * instruction.
+ */
+#define M68K_TAS_HAS_CALLBACK       OPT_OFF
+#define M68K_TAS_CALLBACK()         your_tas_handler_function()
+
+/* If ON, CPU will call the callback when it encounters an illegal instruction
+ * passing the opcode as argument. If the callback returns 1, then it's considered
+ * as a normal instruction, and the illegal exception in canceled. If it returns 0,
+ * the exception occurs normally.
+ * The callback looks like int callback(int opcode)
+ * You should put OPT_SPECIFY_HANDLER here if you cant to use it, otherwise it will
+ * use a dummy default handler and you'll have to call m68k_set_illg_instr_callback explicitely
+ */
+#define M68K_ILLG_HAS_CALLBACK	    OPT_OFF
+#define M68K_ILLG_CALLBACK(opcode)  op_illg(opcode)
 
 /* If ON, CPU will call the set fc callback on every memory access to
  * differentiate between user/supervisor, program/data access like a real
@@ -111,7 +150,6 @@
 #define M68K_EMULATE_FC             OPT_OFF
 #define M68K_SET_FC_CALLBACK(A)     cpu_set_fc(A)
 
-
 /* If ON, CPU will call the pc changed callback when it changes the PC by a
  * large value.  This allows host programs to be nicer when it comes to
  * fetching immediate data and instructions on a banked memory system.
@@ -124,11 +162,11 @@
  * instruction.
  */
 #define M68K_INSTRUCTION_HOOK       OPT_SPECIFY_HANDLER
-#define M68K_INSTRUCTION_CALLBACK() cpu_instr_callback()
+#define M68K_INSTRUCTION_CALLBACK(pc) cpu_instr_callback(pc)
 
 
 /* If ON, the CPU will emulate the 4-byte prefetch queue of a real 68000 */
-#define M68K_EMULATE_PREFETCH       OPT_ON
+#define M68K_EMULATE_PREFETCH       OPT_OFF /*xxx*/
 
 
 /* If ON, the CPU will generate address error exceptions if it tries to
@@ -146,6 +184,9 @@
 #define M68K_LOG_1010_1111          OPT_OFF
 #define M68K_LOG_FILEHANDLE         stderr
 
+/* Emulate PMMU : if you enable this, there will be a test to see if the current chip has some enabled pmmu added to every memory access,
+ * so enable this only if it's useful */
+#define M68K_EMULATE_PMMU   OPT_OFF
 
 /* ----------------------------- COMPATIBILITY ---------------------------- */
 
@@ -157,19 +198,9 @@
 /* If ON, the enulation core will use 64-bit integers to speed up some
  * operations.
 */
-#define M68K_USE_64_BIT  OPT_OFF
+#define M68K_USE_64_BIT  OPT_ON
 
 
-/* Set to your compiler's static inline keyword to enable it, or
- * set it to blank to disable it.
- * If you define INLINE in the makefile, it will override this value.
- * NOTE: not enabling inline functions will SEVERELY slow down emulation.
- */
-#ifndef INLINE
-//#define INLINE static __inline__
-#define INLINE static
-#endif /* INLINE */
-
 #endif /* M68K_COMPILE_FOR_MAME */
 
 #include "sim.h"
@@ -185,7 +216,6 @@
 #define m68k_write_memory_16(A, V) cpu_write_word(A, V)
 #define m68k_write_memory_32(A, V) cpu_write_long(A, V)
 
-
 /* ======================================================================== */
 /* ============================== END OF FILE ============================= */
 /* ======================================================================== */
diff --git a/plat/linux68k/emu/musashi/example/makefile b/plat/linux68k/emu/musashi/example/Makefile
old mode 100755
new mode 100644
similarity index 67%
rename from plat/linux68k/emu/musashi/example/makefile
rename to plat/linux68k/emu/musashi/example/Makefile
index d5c0bd431..961581ffa
--- a/plat/linux68k/emu/musashi/example/makefile
+++ b/plat/linux68k/emu/musashi/example/Makefile
@@ -1,42 +1,42 @@
-EXENAME          = sim
-
-OSD_DOS          = osd_dos.c
-
-OSDFILES         = $(OSD_DOS)
-MAINFILES        = sim.c
-MUSASHIFILES     = m68kcpu.c m68kdasm.c
-MUSASHIGENCFILES = m68kops.c m68kopac.c m68kopdm.c m68kopnz.c
-MUSASHIGENHFILES = m68kops.h
-MUSASHIGENERATOR = m68kmake
-
-EXE = .exe
-EXEPATH = .\\
-# EXE =
-# EXEPATH = ./
-
-.CFILES   = $(MAINFILES) $(OSDFILES) $(MUSASHIFILES) $(MUSASHIGENCFILES)
-.OFILES   = $(.CFILES:%.c=%.o)
-
-CC        = gcc
-WARNINGS  = -Wall -pedantic
-CFLAGS    = $(WARNINGS)
-LFLAGS    = $(WARNINGS)
-
-TARGET = $(EXENAME)$(EXE)
-
-DELETEFILES = $(MUSASHIGENCFILES) $(MUSASHIGENHFILES) $(.OFILES) $(TARGET) $(MUSASHIGENERATOR)$(EXE)
-
-
-all: $(TARGET)
-
-clean:
-	rm -f $(DELETEFILES)
-
-$(TARGET): $(MUSASHIGENHFILES) $(.OFILES) makefile
-	$(CC) -o $@ $(.OFILES) $(LFLAGS)
-
-$(MUSASHIGENCFILES) $(MUSASHIGENHFILES): $(MUSASHIGENERATOR)$(EXE)
-	$(EXEPATH)$(MUSASHIGENERATOR)$(EXE)
-
-$(MUSASHIGENERATOR)$(EXE):  $(MUSASHIGENERATOR).c
-	$(CC) -o  $(MUSASHIGENERATOR)$(EXE)  $(MUSASHIGENERATOR).c
+EXENAME          = sim
+
+OSD_DOS          = osd_dos.c
+
+OSDFILES         = osd_linux.c # $(OSD_DOS)
+MAINFILES        = sim.c
+MUSASHIFILES     = m68kcpu.c m68kdasm.c softfloat/softfloat.c
+MUSASHIGENCFILES = m68kops.c
+MUSASHIGENHFILES = m68kops.h
+MUSASHIGENERATOR = m68kmake
+
+# EXE = .exe
+# EXEPATH = .\\
+EXE =
+EXEPATH = ./
+
+.CFILES   = $(MAINFILES) $(OSDFILES) $(MUSASHIFILES) $(MUSASHIGENCFILES)
+.OFILES   = $(.CFILES:%.c=%.o)
+
+CC        = gcc
+WARNINGS  = -Wall -Wextra -pedantic
+CFLAGS    = $(WARNINGS)
+LFLAGS    = $(WARNINGS)
+
+TARGET = $(EXENAME)$(EXE)
+
+DELETEFILES = $(MUSASHIGENCFILES) $(MUSASHIGENHFILES) $(.OFILES) $(TARGET) $(MUSASHIGENERATOR)$(EXE)
+
+
+all: $(TARGET)
+
+clean:
+	rm -f $(DELETEFILES)
+
+$(TARGET): $(MUSASHIGENHFILES) $(.OFILES) Makefile
+	$(CC) -o $@ $(.OFILES) $(LFLAGS) -lm
+
+$(MUSASHIGENCFILES) $(MUSASHIGENHFILES): $(MUSASHIGENERATOR)$(EXE)
+	$(EXEPATH)$(MUSASHIGENERATOR)$(EXE)
+
+$(MUSASHIGENERATOR)$(EXE):  $(MUSASHIGENERATOR).c
+	$(CC) -o  $(MUSASHIGENERATOR)$(EXE)  $(MUSASHIGENERATOR).c
diff --git a/plat/linux68k/emu/musashi/example/example.txt b/plat/linux68k/emu/musashi/example/example.txt
index d701fe61f..fc679f6ee 100755
--- a/plat/linux68k/emu/musashi/example/example.txt
+++ b/plat/linux68k/emu/musashi/example/example.txt
@@ -1,290 +1,290 @@
-EXAMPLE:
--------
-As an example, I'll build an imaginary hardware platform.
-
-
-The system is fairly simple, comprising a 68000, an input device, an output
-device, a non-maskable-interrupt device, and an interrupt controller.
-
-
-The input device receives input from the user and asserts its interrupt
-request line until its value is read.  Reading from the input device's
-memory-mapped port will both clear its interrupt request and read an ASCII
-representation (8 bits) of what the user entered.
-
-The output device reads value when it is selected through its memory-mapped
-port and outputs it to a display.  The value it reads will be interpreted as
-an ASCII value and output to the display. The output device is fairly slow
-(it can only process 1 byte per second), and so it asserts its interrupt
-request line when it is ready to receive a byte.  Writing to the output device
-sends a byte to it.  If the output device is not ready, the write is ignored.
-Reading from the output device returns 0 and clears its interrupt request line
-until another byte is written to it and 1 second elapses.
-
-The non-maskable-interrupt (NMI) device, as can be surmised from the name,
-generates a non-maskable-interrupt.  This is connected to some kind of external
-switch that the user can push to generate a NMI.
-
-Since there are 3 devices interrupting the CPU, an interrupt controller is
-needed.  The interrupt controller takes 7 inputs and encodes the highest
-priority asserted line on the 3 output pins.  the input device is wired to IN2
-and the output device is wired to IN1 on the controller.  The NMI device is
-wired to IN7 and all the other inputs are wired low.
-
-The bus is also connected to a 1K ROM and a 256 byte RAM.
-Beware: This platform places ROM and RAM in the same address range and uses
-        the FC pins to select the correct address space!
-        (You didn't expect me to make it easy, did you? =)
-
-
-
-Here is the schematic in all its ASCII splendour:
--------------------------------------------------
-
-     NMI     TIED
-    SWITCH   LOW
-      |       |
-      | +-+-+-+
-      | | | | | +------------------------------------------------+
-      | | | | | | +------------------------------------+         |
-      | | | | | | |                                    |         |
-     +-------------+                                   |         |
-     |7 6 5 4 3 2 1|                                   |         |
-     |             |                                   |         |
-     | INT CONTRLR |                                   |         |
-     |             |                                   |         |
-     |i i i        |                                   |         |
-     |2 1 0        |                                   |         |
-     +-------------+                                   |         |
-      | | |                                            |         |
-      | | |     +--------------------------------+--+  |         |
-      o o o     |                                |  |  |         |
-    +--------------+  +-------+  +----------+  +---------+  +----------+
-    | I I I     a  |  |       |  |          |  | r  a  i |  |    i     |
-    | 2 1 0    23  |  |       |  |          |  | e  c    |  |          |
-    |              |  |       |  |          |  | a  k    |  |          |
-    |              |  |       |  |          |  | d       |  |          |
-    |              |  |       |  |          |  |         |  |          |
-    |    M68000    |  |  ROM  |  |   RAM    |  |   IN    |  |   OUT    |
-    |              |  |       |  |          |  |         |  |          |
-    |            a9|--|a9     |--|          |--|         |--|          |
-    |            a8|--|a8     |--|          |--|         |--|          |
-    |            a7|--|a7     |--|a7        |--|         |--|          |
-    |            a6|--|a6     |--|a6        |--|         |--|          |
-    |            a5|--|a5     |--|a5        |--|         |--|          |
-    |            a4|--|a4     |--|a4        |--|         |--|          |
-    |            a3|--|a3     |--|a3        |--|         |--|          |
-    |            a2|--|a2     |--|a2        |--|         |--|          |
-    |            a1|--|a1     |--|a1        |--|         |--|          |
-    |            a0|--|a0     |--|a0        |--|         |--|          |
-    |              |  |       |  |          |  |         |  |          |
-    |           d15|--|d15    |--|d15       |--|         |--|          |
-    |           d14|--|d14    |--|d14       |--|         |--|          |
-    |           d13|--|d13    |--|d13       |--|         |--|          |
-    |           d12|--|d12    |--|d12       |--|         |--|          |
-    |           d11|--|d11    |--|d11       |--|         |--|          |
-    |           d10|--|d10    |--|d10       |--|         |--|          |
-    |            d9|--|d9     |--|d9        |--|         |--|          |
-    |            d8|--|d8     |--|d8        |--|         |--|          |
-    |            d7|--|d7     |--|d7        |--|d7       |--|d7        |
-    |            d6|--|d6     |--|d6        |--|d6       |--|d6        |
-    |            d5|--|d5     |--|d5        |--|d5       |--|d5        |
-    |            d4|--|d4     |--|d4        |--|d4       |--|d4        |
-    |            d3|--|d3     |--|d3        |--|d3       |--|d3        |
-    |            d2|--|d2     |--|d2        |--|d2       |--|d2        |
-    |            d1|--|d1     |--|d1        |--|d1       |--|d1  w     |
-    |            d0|--|d0     |--|d0        |--|d0       |--|d0  r     |
-    |              |  |       |  |          |  |         |  |    i   a |
-    | a      F F F |  |       |  |          |  |         |  |    t   c |
-    |22  rW  2 1 0 |  |  cs   |  | cs   rW  |  |         |  |    e   k |
-    +--------------+  +-------+  +----------+  +---------+  +----------+
-      |   |  | | |        |         |    |                       |   |
-      |   |  | | |        |         |    |                       |   |
-      |   |  | | |    +-------+  +-----+ |                     +---+ |
-      |   |  | | |    |  IC1  |  | IC2 | |                     |AND| |
-      |   |  | | |    |a b c d|  |a b c| |                     +---+ |
-      |   |  | | |    +-------+  +-----+ |                      | |  |
-      |   |  | | |     | | | |    | | |  |                      | +--+
-      |   |  | | |     | | | |    | | |  |                      | |
-      |   |  | | |     | | | |    | | |  |                      | |
-      |   |  | | |     | | | |    | | |  |                      | |
-      |   |  | | +-----)-)-+-)----)-)-+  |                      | |
-      |   |  | +-------)-+---)----)-+    |                      | |
-      |   |  +---------+-----)----+      |                      | |
-      |   |                  |           |                      | |
-      |   +------------------+-----------+----------------------+ |
-      |                                                           |
-      +-----------------------------------------------------------+
-
-IC1: output=1 if a=0 and b=1 and c=0 and d=0
-IC2: output=1 if a=0 and b=0 and c=1
-
-
-
-Here is the listing for program.bin:
------------------------------------
-
-                        INPUT_ADDRESS   equ $800000
-                        OUTPUT_ADDRESS  equ $400000
-                        CIRCULAR_BUFFER equ $c0
-                        CAN_OUTPUT      equ $d0
-                        STACK_AREA      equ $100
-                        
-                        vector_table:
-00000000 0000 0100      	dc.l STACK_AREA				*  0: SP
-00000004 0000 00c0      	dc.l init					*  1: PC
-00000008 0000 0148      	dc.l unhandled_exception	*  2: bus error
-0000000c 0000 0148      	dc.l unhandled_exception	*  3: address error
-00000010 0000 0148      	dc.l unhandled_exception	*  4: illegal instruction
-00000014 0000 0148      	dc.l unhandled_exception	*  5: zero divide
-00000018 0000 0148      	dc.l unhandled_exception	*  6: chk
-0000001c 0000 0148      	dc.l unhandled_exception	*  7: trapv
-00000020 0000 0148      	dc.l unhandled_exception	*  8: privilege violation
-00000024 0000 0148      	dc.l unhandled_exception	*  9: trace
-00000028 0000 0148      	dc.l unhandled_exception	* 10: 1010
-0000002c 0000 0148      	dc.l unhandled_exception	* 11: 1111
-00000030 0000 0148      	dc.l unhandled_exception	* 12: -
-00000034 0000 0148      	dc.l unhandled_exception	* 13: -
-00000038 0000 0148      	dc.l unhandled_exception	* 14: -
-0000003c 0000 0148      	dc.l unhandled_exception	* 15: uninitialized interrupt
-00000040 0000 0148      	dc.l unhandled_exception	* 16: -
-00000044 0000 0148      	dc.l unhandled_exception	* 17: -
-00000048 0000 0148      	dc.l unhandled_exception	* 18: -
-0000004c 0000 0148      	dc.l unhandled_exception	* 19: -
-00000050 0000 0148      	dc.l unhandled_exception	* 20: -
-00000054 0000 0148      	dc.l unhandled_exception	* 21: -
-00000058 0000 0148      	dc.l unhandled_exception	* 22: -
-0000005c 0000 0148      	dc.l unhandled_exception	* 23: -
-00000060 0000 0148      	dc.l unhandled_exception	* 24: spurious interrupt
-00000064 0000 0136      	dc.l output_ready			* 25: l1 irq
-00000068 0000 010e      	dc.l input_ready			* 26: l2 irq
-0000006c 0000 0148      	dc.l unhandled_exception	* 27: l3 irq
-00000070 0000 0148      	dc.l unhandled_exception	* 28: l4 irq
-00000074 0000 0148      	dc.l unhandled_exception	* 29: l5 irq
-00000078 0000 0148      	dc.l unhandled_exception	* 30: l6 irq
-0000007c 0000 014e      	dc.l nmi					* 31: l7 irq
-00000080 0000 0148      	dc.l unhandled_exception	* 32: trap 0
-00000084 0000 0148      	dc.l unhandled_exception	* 33: trap 1
-00000088 0000 0148      	dc.l unhandled_exception	* 34: trap 2
-0000008c 0000 0148      	dc.l unhandled_exception	* 35: trap 3
-00000090 0000 0148      	dc.l unhandled_exception	* 36: trap 4
-00000094 0000 0148      	dc.l unhandled_exception	* 37: trap 5
-00000098 0000 0148      	dc.l unhandled_exception	* 38: trap 6
-0000009c 0000 0148      	dc.l unhandled_exception	* 39: trap 7
-000000a0 0000 0148      	dc.l unhandled_exception	* 40: trap 8
-000000a4 0000 0148      	dc.l unhandled_exception	* 41: trap 9
-000000a8 0000 0148      	dc.l unhandled_exception	* 42: trap 10
-000000ac 0000 0148      	dc.l unhandled_exception	* 43: trap 11
-000000b0 0000 0148      	dc.l unhandled_exception	* 44: trap 12
-000000b4 0000 0148      	dc.l unhandled_exception	* 45: trap 13
-000000b8 0000 0148      	dc.l unhandled_exception	* 46: trap 14
-000000bc 0000 0148      	dc.l unhandled_exception	* 47: trap 15
-                        * This is the end of the useful part of the table.
-                        * We will now do the Capcom thing and put code starting at $c0.
-                        
-                        init:
-                        * Copy the exception vector table to RAM.
-000000c0 227c 0000 0000 	move.l  #0, a1						* a1 is RAM index
-000000c6 303c 002f      	move.w  #47, d0						* d0 is counter (48 vectors)
-000000ca 41fa 0006      	lea.l   (copy_table,PC), a0			* a0 is scratch
-000000ce 2208           	move.l  a0, d1						* d1 is ROM index
-000000d0 4481           	neg.l   d1
-                        copy_table:
-000000d2 22fb 18fe      	dc.l    $22fb18fe					* stoopid as68k generates 020 code here
-                        *	move.l  (copy_table,PC,d1.l), (a1)+
-000000d6 5841           	addq    #4, d1
-000000d8 51c8 fff8      	dbf     d0, copy_table
-                        
-                        main_init:
-                        * Initialize main program
-000000dc 11fc 0000 00d0 	move.b  #0, CAN_OUTPUT
-000000e2 4df8 00c0      	lea.l   CIRCULAR_BUFFER, a6
-000000e6 7c00           	moveq   #0, d6						* output buffer ptr
-000000e8 7e00           	moveq   #0, d7						* input buffer ptr
-000000ea 027c f8ff      	andi    #$f8ff, SR					* clear interrupt mask
-                        main:
-                        * Main program
-000000ee 4a38 00d0      	tst.b   CAN_OUTPUT					* can we output?
-000000f2 67fa           	beq     main
-000000f4 be06           	cmp.b   d6, d7						* is there data?
-000000f6 67f6           	beq     main
-000000f8 11fc 0000 00d0 	move.b  #0, CAN_OUTPUT
-000000fe 13f6 6000 0040 	move.b  (0,a6,d6.w), OUTPUT_ADDRESS	* write data
-         0000
-00000106 5246           	addq    #1, d6
-00000108 0206 000f      	andi.b  #15, d6						* update circular buffer
-0000010c 60e0           	bra     main
-                        
-                        
-                        input_ready:
-0000010e 2f00           	move.l  d0, -(a7)
-00000110 2f01           	move.l  d1, -(a7)
-00000112 1239 0080 0000 	move.b  INPUT_ADDRESS, d1			* read data
-00000118 1007           	move.b  d7, d0						* check if buffer full
-0000011a 5240           	addq    #1, d0
-0000011c 0200 000f      	andi.b  #15, d0
-00000120 bc00           	cmp.b   d0, d6
-00000122 6700 000c      	beq     input_ready_quit			* throw away if full
-00000126 1d81 7000      	move.b  d1, (0,a6,d7.w)				* store the data
-0000012a 5247           	addq    #1, d7
-0000012c 0207 000f      	andi.b  #15, d7						* update circular buffer
-                        input_ready_quit:
-00000130 221f           	move.l  (a7)+, d1
-00000132 201f           	move.l  (a7)+, d0
-00000134 4e73           	rte
-                        
-                        output_ready:
-00000136 2f00           	move.l  d0, -(a7)
-00000138 11fc 0001 00d0 	move.b  #1, CAN_OUTPUT
-0000013e 1039 0040 0000 	move.b  OUTPUT_ADDRESS, d0			* acknowledge the interrupt
-00000144 201f           	move.l  (a7)+, d0
-00000146 4e73           	rte
-                        
-                        unhandled_exception:
-00000148 4e72 2700      	stop	#$2700						* wait for NMI
-0000014c 60fa           	bra     unhandled_exception			* shouldn't get here
-                        
-                        nmi:
-                        * perform a soft reset
-0000014e 46fc 2700      	move    #$2700, SR					* set status register
-00000152 2e7a feac      	move.l  (vector_table,PC), a7		* reset stack pointer
-00000156 4e70           	reset								* reset peripherals
-00000158 4efa feaa      	jmp     (vector_table+4,PC)			* reset program counter
-                        
-                        END
-
-
-
-Compiling the example host environment:
---------------------------------------
-
-I've only put in an os-dependant portion for dos/windows, so you'll either
-have to compile for that system or make your own osd code based on osd_dos.c
-and modify the makefile accordingly.
-
-I compiled this example using the compiler from mingw (www.mingw.org) but you
-could also use djgpp (www.delorie.com).
-
-- Copy the m68k files to a directory.  Then extract the files from example.zip to
-  the same directory, overwriting m68kconf.h.  program.bin is the actual 68000
-  program you will be running.
-- Make your own osd_get_key() in the same fashion as in osd_dos.c if you're not
-  compiling for dos/windows.
-- Type make
-- Perform the necessary animal sacrifices.
-- Type sim program.bin
-
-
-Keys:
-	ESC           - quits the simulator
-	~             - generates an NMI interrupt
-	Any other key - Genearate input for the input device
-
-
-Note: I've cheated a bit in the emulation.  There is no speed control
-      to set the speed the CPU runs at; it simply runs as fast as your
-      processor can run it.
-      To add speed control, you will need a high-precision timestamp
-      function (like the RDTSC instruction for newer Pentium CPUs)
-      and a bit of arithmetic to make the cycles argument for m68k_execute().
-      I'll leave that as an excercise to the reader.
+EXAMPLE:
+-------
+As an example, I'll build an imaginary hardware platform.
+
+
+The system is fairly simple, comprising a 68000, an input device, an output
+device, a non-maskable-interrupt device, and an interrupt controller.
+
+
+The input device receives input from the user and asserts its interrupt
+request line until its value is read.  Reading from the input device's
+memory-mapped port will both clear its interrupt request and read an ASCII
+representation (8 bits) of what the user entered.
+
+The output device reads value when it is selected through its memory-mapped
+port and outputs it to a display.  The value it reads will be interpreted as
+an ASCII value and output to the display. The output device is fairly slow
+(it can only process 1 byte per second), and so it asserts its interrupt
+request line when it is ready to receive a byte.  Writing to the output device
+sends a byte to it.  If the output device is not ready, the write is ignored.
+Reading from the output device returns 0 and clears its interrupt request line
+until another byte is written to it and 1 second elapses.
+
+The non-maskable-interrupt (NMI) device, as can be surmised from the name,
+generates a non-maskable-interrupt.  This is connected to some kind of external
+switch that the user can push to generate a NMI.
+
+Since there are 3 devices interrupting the CPU, an interrupt controller is
+needed.  The interrupt controller takes 7 inputs and encodes the highest
+priority asserted line on the 3 output pins.  the input device is wired to IN2
+and the output device is wired to IN1 on the controller.  The NMI device is
+wired to IN7 and all the other inputs are wired low.
+
+The bus is also connected to a 1K ROM and a 256 byte RAM.
+Beware: This platform places ROM and RAM in the same address range and uses
+        the FC pins to select the correct address space!
+        (You didn't expect me to make it easy, did you? =)
+
+
+
+Here is the schematic in all its ASCII splendour:
+-------------------------------------------------
+
+     NMI     TIED
+    SWITCH   LOW
+      |       |
+      | +-+-+-+
+      | | | | | +------------------------------------------------+
+      | | | | | | +------------------------------------+         |
+      | | | | | | |                                    |         |
+     +-------------+                                   |         |
+     |7 6 5 4 3 2 1|                                   |         |
+     |             |                                   |         |
+     | INT CONTRLR |                                   |         |
+     |             |                                   |         |
+     |i i i        |                                   |         |
+     |2 1 0        |                                   |         |
+     +-------------+                                   |         |
+      | | |                                            |         |
+      | | |     +--------------------------------+--+  |         |
+      o o o     |                                |  |  |         |
+    +--------------+  +-------+  +----------+  +---------+  +----------+
+    | I I I     a  |  |       |  |          |  | r  a  i |  |    i     |
+    | 2 1 0    23  |  |       |  |          |  | e  c    |  |          |
+    |              |  |       |  |          |  | a  k    |  |          |
+    |              |  |       |  |          |  | d       |  |          |
+    |              |  |       |  |          |  |         |  |          |
+    |    M68000    |  |  ROM  |  |   RAM    |  |   IN    |  |   OUT    |
+    |              |  |       |  |          |  |         |  |          |
+    |            a9|--|a9     |--|          |--|         |--|          |
+    |            a8|--|a8     |--|          |--|         |--|          |
+    |            a7|--|a7     |--|a7        |--|         |--|          |
+    |            a6|--|a6     |--|a6        |--|         |--|          |
+    |            a5|--|a5     |--|a5        |--|         |--|          |
+    |            a4|--|a4     |--|a4        |--|         |--|          |
+    |            a3|--|a3     |--|a3        |--|         |--|          |
+    |            a2|--|a2     |--|a2        |--|         |--|          |
+    |            a1|--|a1     |--|a1        |--|         |--|          |
+    |            a0|--|a0     |--|a0        |--|         |--|          |
+    |              |  |       |  |          |  |         |  |          |
+    |           d15|--|d15    |--|d15       |--|         |--|          |
+    |           d14|--|d14    |--|d14       |--|         |--|          |
+    |           d13|--|d13    |--|d13       |--|         |--|          |
+    |           d12|--|d12    |--|d12       |--|         |--|          |
+    |           d11|--|d11    |--|d11       |--|         |--|          |
+    |           d10|--|d10    |--|d10       |--|         |--|          |
+    |            d9|--|d9     |--|d9        |--|         |--|          |
+    |            d8|--|d8     |--|d8        |--|         |--|          |
+    |            d7|--|d7     |--|d7        |--|d7       |--|d7        |
+    |            d6|--|d6     |--|d6        |--|d6       |--|d6        |
+    |            d5|--|d5     |--|d5        |--|d5       |--|d5        |
+    |            d4|--|d4     |--|d4        |--|d4       |--|d4        |
+    |            d3|--|d3     |--|d3        |--|d3       |--|d3        |
+    |            d2|--|d2     |--|d2        |--|d2       |--|d2        |
+    |            d1|--|d1     |--|d1        |--|d1       |--|d1  w     |
+    |            d0|--|d0     |--|d0        |--|d0       |--|d0  r     |
+    |              |  |       |  |          |  |         |  |    i   a |
+    | a      F F F |  |       |  |          |  |         |  |    t   c |
+    |22  rW  2 1 0 |  |  cs   |  | cs   rW  |  |         |  |    e   k |
+    +--------------+  +-------+  +----------+  +---------+  +----------+
+      |   |  | | |        |         |    |                       |   |
+      |   |  | | |        |         |    |                       |   |
+      |   |  | | |    +-------+  +-----+ |                     +---+ |
+      |   |  | | |    |  IC1  |  | IC2 | |                     |AND| |
+      |   |  | | |    |a b c d|  |a b c| |                     +---+ |
+      |   |  | | |    +-------+  +-----+ |                      | |  |
+      |   |  | | |     | | | |    | | |  |                      | +--+
+      |   |  | | |     | | | |    | | |  |                      | |
+      |   |  | | |     | | | |    | | |  |                      | |
+      |   |  | | |     | | | |    | | |  |                      | |
+      |   |  | | +-----)-)-+-)----)-)-+  |                      | |
+      |   |  | +-------)-+---)----)-+    |                      | |
+      |   |  +---------+-----)----+      |                      | |
+      |   |                  |           |                      | |
+      |   +------------------+-----------+----------------------+ |
+      |                                                           |
+      +-----------------------------------------------------------+
+
+IC1: output=1 if a=0 and b=1 and c=0 and d=0
+IC2: output=1 if a=0 and b=0 and c=1
+
+
+
+Here is the listing for program.bin:
+-----------------------------------
+
+                        INPUT_ADDRESS   equ $800000
+                        OUTPUT_ADDRESS  equ $400000
+                        CIRCULAR_BUFFER equ $c0
+                        CAN_OUTPUT      equ $d0
+                        STACK_AREA      equ $100
+                        
+                        vector_table:
+00000000 0000 0100      	dc.l STACK_AREA				*  0: SP
+00000004 0000 00c0      	dc.l init					*  1: PC
+00000008 0000 0148      	dc.l unhandled_exception	*  2: bus error
+0000000c 0000 0148      	dc.l unhandled_exception	*  3: address error
+00000010 0000 0148      	dc.l unhandled_exception	*  4: illegal instruction
+00000014 0000 0148      	dc.l unhandled_exception	*  5: zero divide
+00000018 0000 0148      	dc.l unhandled_exception	*  6: chk
+0000001c 0000 0148      	dc.l unhandled_exception	*  7: trapv
+00000020 0000 0148      	dc.l unhandled_exception	*  8: privilege violation
+00000024 0000 0148      	dc.l unhandled_exception	*  9: trace
+00000028 0000 0148      	dc.l unhandled_exception	* 10: 1010
+0000002c 0000 0148      	dc.l unhandled_exception	* 11: 1111
+00000030 0000 0148      	dc.l unhandled_exception	* 12: -
+00000034 0000 0148      	dc.l unhandled_exception	* 13: -
+00000038 0000 0148      	dc.l unhandled_exception	* 14: -
+0000003c 0000 0148      	dc.l unhandled_exception	* 15: uninitialized interrupt
+00000040 0000 0148      	dc.l unhandled_exception	* 16: -
+00000044 0000 0148      	dc.l unhandled_exception	* 17: -
+00000048 0000 0148      	dc.l unhandled_exception	* 18: -
+0000004c 0000 0148      	dc.l unhandled_exception	* 19: -
+00000050 0000 0148      	dc.l unhandled_exception	* 20: -
+00000054 0000 0148      	dc.l unhandled_exception	* 21: -
+00000058 0000 0148      	dc.l unhandled_exception	* 22: -
+0000005c 0000 0148      	dc.l unhandled_exception	* 23: -
+00000060 0000 0148      	dc.l unhandled_exception	* 24: spurious interrupt
+00000064 0000 0136      	dc.l output_ready			* 25: l1 irq
+00000068 0000 010e      	dc.l input_ready			* 26: l2 irq
+0000006c 0000 0148      	dc.l unhandled_exception	* 27: l3 irq
+00000070 0000 0148      	dc.l unhandled_exception	* 28: l4 irq
+00000074 0000 0148      	dc.l unhandled_exception	* 29: l5 irq
+00000078 0000 0148      	dc.l unhandled_exception	* 30: l6 irq
+0000007c 0000 014e      	dc.l nmi					* 31: l7 irq
+00000080 0000 0148      	dc.l unhandled_exception	* 32: trap 0
+00000084 0000 0148      	dc.l unhandled_exception	* 33: trap 1
+00000088 0000 0148      	dc.l unhandled_exception	* 34: trap 2
+0000008c 0000 0148      	dc.l unhandled_exception	* 35: trap 3
+00000090 0000 0148      	dc.l unhandled_exception	* 36: trap 4
+00000094 0000 0148      	dc.l unhandled_exception	* 37: trap 5
+00000098 0000 0148      	dc.l unhandled_exception	* 38: trap 6
+0000009c 0000 0148      	dc.l unhandled_exception	* 39: trap 7
+000000a0 0000 0148      	dc.l unhandled_exception	* 40: trap 8
+000000a4 0000 0148      	dc.l unhandled_exception	* 41: trap 9
+000000a8 0000 0148      	dc.l unhandled_exception	* 42: trap 10
+000000ac 0000 0148      	dc.l unhandled_exception	* 43: trap 11
+000000b0 0000 0148      	dc.l unhandled_exception	* 44: trap 12
+000000b4 0000 0148      	dc.l unhandled_exception	* 45: trap 13
+000000b8 0000 0148      	dc.l unhandled_exception	* 46: trap 14
+000000bc 0000 0148      	dc.l unhandled_exception	* 47: trap 15
+                        * This is the end of the useful part of the table.
+                        * We will now do the Capcom thing and put code starting at $c0.
+                        
+                        init:
+                        * Copy the exception vector table to RAM.
+000000c0 227c 0000 0000 	move.l  #0, a1						* a1 is RAM index
+000000c6 303c 002f      	move.w  #47, d0						* d0 is counter (48 vectors)
+000000ca 41fa 0006      	lea.l   (copy_table,PC), a0			* a0 is scratch
+000000ce 2208           	move.l  a0, d1						* d1 is ROM index
+000000d0 4481           	neg.l   d1
+                        copy_table:
+000000d2 22fb 18fe      	dc.l    $22fb18fe					* stoopid as68k generates 020 code here
+                        *	move.l  (copy_table,PC,d1.l), (a1)+
+000000d6 5841           	addq    #4, d1
+000000d8 51c8 fff8      	dbf     d0, copy_table
+                        
+                        main_init:
+                        * Initialize main program
+000000dc 11fc 0000 00d0 	move.b  #0, CAN_OUTPUT
+000000e2 4df8 00c0      	lea.l   CIRCULAR_BUFFER, a6
+000000e6 7c00           	moveq   #0, d6						* output buffer ptr
+000000e8 7e00           	moveq   #0, d7						* input buffer ptr
+000000ea 027c f8ff      	andi    #$f8ff, SR					* clear interrupt mask
+                        main:
+                        * Main program
+000000ee 4a38 00d0      	tst.b   CAN_OUTPUT					* can we output?
+000000f2 67fa           	beq     main
+000000f4 be06           	cmp.b   d6, d7						* is there data?
+000000f6 67f6           	beq     main
+000000f8 11fc 0000 00d0 	move.b  #0, CAN_OUTPUT
+000000fe 13f6 6000 0040 	move.b  (0,a6,d6.w), OUTPUT_ADDRESS	* write data
+         0000
+00000106 5246           	addq    #1, d6
+00000108 0206 000f      	andi.b  #15, d6						* update circular buffer
+0000010c 60e0           	bra     main
+                        
+                        
+                        input_ready:
+0000010e 2f00           	move.l  d0, -(a7)
+00000110 2f01           	move.l  d1, -(a7)
+00000112 1239 0080 0000 	move.b  INPUT_ADDRESS, d1			* read data
+00000118 1007           	move.b  d7, d0						* check if buffer full
+0000011a 5240           	addq    #1, d0
+0000011c 0200 000f      	andi.b  #15, d0
+00000120 bc00           	cmp.b   d0, d6
+00000122 6700 000c      	beq     input_ready_quit			* throw away if full
+00000126 1d81 7000      	move.b  d1, (0,a6,d7.w)				* store the data
+0000012a 5247           	addq    #1, d7
+0000012c 0207 000f      	andi.b  #15, d7						* update circular buffer
+                        input_ready_quit:
+00000130 221f           	move.l  (a7)+, d1
+00000132 201f           	move.l  (a7)+, d0
+00000134 4e73           	rte
+                        
+                        output_ready:
+00000136 2f00           	move.l  d0, -(a7)
+00000138 11fc 0001 00d0 	move.b  #1, CAN_OUTPUT
+0000013e 1039 0040 0000 	move.b  OUTPUT_ADDRESS, d0			* acknowledge the interrupt
+00000144 201f           	move.l  (a7)+, d0
+00000146 4e73           	rte
+                        
+                        unhandled_exception:
+00000148 4e72 2700      	stop	#$2700						* wait for NMI
+0000014c 60fa           	bra     unhandled_exception			* shouldn't get here
+                        
+                        nmi:
+                        * perform a soft reset
+0000014e 46fc 2700      	move    #$2700, SR					* set status register
+00000152 2e7a feac      	move.l  (vector_table,PC), a7		* reset stack pointer
+00000156 4e70           	reset								* reset peripherals
+00000158 4efa feaa      	jmp     (vector_table+4,PC)			* reset program counter
+                        
+                        END
+
+
+
+Compiling the example host environment:
+--------------------------------------
+
+I've only put in an os-dependant portion for dos/windows, so you'll either
+have to compile for that system or make your own osd code based on osd_dos.c
+and modify the makefile accordingly.
+
+I compiled this example using the compiler from mingw (www.mingw.org) but you
+could also use djgpp (www.delorie.com).
+
+- Copy the m68k files to a directory.  Then extract the files from example.zip to
+  the same directory, overwriting m68kconf.h.  program.bin is the actual 68000
+  program you will be running.
+- Make your own osd_get_key() in the same fashion as in osd_dos.c if you're not
+  compiling for dos/windows.
+- Type make
+- Perform the necessary animal sacrifices.
+- Type sim program.bin
+
+
+Keys:
+	ESC           - quits the simulator
+	~             - generates an NMI interrupt
+	Any other key - Genearate input for the input device
+
+
+Note: I've cheated a bit in the emulation.  There is no speed control
+      to set the speed the CPU runs at; it simply runs as fast as your
+      processor can run it.
+      To add speed control, you will need a high-precision timestamp
+      function (like the RDTSC instruction for newer Pentium CPUs)
+      and a bit of arithmetic to make the cycles argument for m68k_execute().
+      I'll leave that as an excercise to the reader.
diff --git a/plat/linux68k/emu/musashi/example/m68k.h b/plat/linux68k/emu/musashi/example/m68k.h
new file mode 120000
index 000000000..1ca6bd3d8
--- /dev/null
+++ b/plat/linux68k/emu/musashi/example/m68k.h
@@ -0,0 +1 @@
+../m68k.h
\ No newline at end of file
diff --git a/plat/linux68k/emu/musashi/example/m68k_in.c b/plat/linux68k/emu/musashi/example/m68k_in.c
new file mode 120000
index 000000000..faeffd9a1
--- /dev/null
+++ b/plat/linux68k/emu/musashi/example/m68k_in.c
@@ -0,0 +1 @@
+../m68k_in.c
\ No newline at end of file
diff --git a/plat/linux68k/emu/musashi/example/m68kconf.h b/plat/linux68k/emu/musashi/example/m68kconf.h
index 05cc6ade4..248979218 100755
--- a/plat/linux68k/emu/musashi/example/m68kconf.h
+++ b/plat/linux68k/emu/musashi/example/m68kconf.h
@@ -1,192 +1,220 @@
-/* ======================================================================== */
-/* ========================= LICENSING & COPYRIGHT ======================== */
-/* ======================================================================== */
-/*
- *                                  MUSASHI
- *                                Version 3.4
- *
- * A portable Motorola M680x0 processor emulation engine.
- * Copyright 1998-2001 Karl Stenerud.  All rights reserved.
- *
- * This code may be freely used for non-commercial purposes as long as this
- * copyright notice remains unaltered in the source code and any binary files
- * containing this code in compiled form.
- *
- * All other lisencing terms must be negotiated with the author
- * (Karl Stenerud).
- *
- * The latest version of this code can be obtained at:
- * http://kstenerud.cjb.net
- */
-
-
-
-#ifndef M68KCONF__HEADER
-#define M68KCONF__HEADER
-
-
-/* Configuration switches.
- * Use OPT_SPECIFY_HANDLER for configuration options that allow callbacks.
- * OPT_SPECIFY_HANDLER causes the core to link directly to the function
- * or macro you specify, rather than using callback functions whose pointer
- * must be passed in using m68k_set_xxx_callback().
- */
-#define OPT_OFF             0
-#define OPT_ON              1
-#define OPT_SPECIFY_HANDLER 2
-
-
-/* ======================================================================== */
-/* ============================== MAME STUFF ============================== */
-/* ======================================================================== */
-
-/* If you're compiling this for MAME, only change M68K_COMPILE_FOR_MAME
- * to OPT_ON and use m68kmame.h to configure the 68k core.
- */
-#ifndef M68K_COMPILE_FOR_MAME
-#define M68K_COMPILE_FOR_MAME      OPT_OFF
-#endif /* M68K_COMPILE_FOR_MAME */
-
-
-#if M68K_COMPILE_FOR_MAME == OPT_OFF
-
-
-/* ======================================================================== */
-/* ============================= CONFIGURATION ============================ */
-/* ======================================================================== */
-
-/* Turn ON if you want to use the following M68K variants */
-#define M68K_EMULATE_010            OPT_ON
-#define M68K_EMULATE_EC020          OPT_ON
-#define M68K_EMULATE_020            OPT_ON
-
-
-/* If ON, the CPU will call m68k_read_immediate_xx() for immediate addressing
- * and m68k_read_pcrelative_xx() for PC-relative addressing.
- * If off, all read requests from the CPU will be redirected to m68k_read_xx()
- */
-#define M68K_SEPARATE_READS         OPT_OFF
-
-/* If ON, the CPU will call m68k_write_32_pd() when it executes move.l with a
- * predecrement destination EA mode instead of m68k_write_32().
- * To simulate real 68k behavior, m68k_write_32_pd() must first write the high
- * word to [address+2], and then write the low word to [address].
- */
-#define M68K_SIMULATE_PD_WRITES     OPT_OFF
-
-/* If ON, CPU will call the interrupt acknowledge callback when it services an
- * interrupt.
- * If off, all interrupts will be autovectored and all interrupt requests will
- * auto-clear when the interrupt is serviced.
- */
-#define M68K_EMULATE_INT_ACK        OPT_SPECIFY_HANDLER
-#define M68K_INT_ACK_CALLBACK(A)    cpu_irq_ack(A)
-
-
-/* If ON, CPU will call the breakpoint acknowledge callback when it encounters
- * a breakpoint instruction and it is running a 68010+.
- */
-#define M68K_EMULATE_BKPT_ACK       OPT_OFF
-#define M68K_BKPT_ACK_CALLBACK()    your_bkpt_ack_handler_function()
-
-
-/* If ON, the CPU will monitor the trace flags and take trace exceptions
- */
-#define M68K_EMULATE_TRACE          OPT_OFF
-
-
-/* If ON, CPU will call the output reset callback when it encounters a reset
- * instruction.
- */
-#define M68K_EMULATE_RESET          OPT_SPECIFY_HANDLER
-#define M68K_RESET_CALLBACK()       cpu_pulse_reset()
-
-
-/* If ON, CPU will call the set fc callback on every memory access to
- * differentiate between user/supervisor, program/data access like a real
- * 68000 would.  This should be enabled and the callback should be set if you
- * want to properly emulate the m68010 or higher. (moves uses function codes
- * to read/write data from different address spaces)
- */
-#define M68K_EMULATE_FC             OPT_SPECIFY_HANDLER
-#define M68K_SET_FC_CALLBACK(A)     cpu_set_fc(A)
-
-
-/* If ON, CPU will call the pc changed callback when it changes the PC by a
- * large value.  This allows host programs to be nicer when it comes to
- * fetching immediate data and instructions on a banked memory system.
- */
-#define M68K_MONITOR_PC             OPT_OFF
-#define M68K_SET_PC_CALLBACK(A)     your_pc_changed_handler_function(A)
-
-
-/* If ON, CPU will call the instruction hook callback before every
- * instruction.
- */
-#define M68K_INSTRUCTION_HOOK       OPT_SPECIFY_HANDLER
-#define M68K_INSTRUCTION_CALLBACK() cpu_instr_callback()
-
-
-/* If ON, the CPU will emulate the 4-byte prefetch queue of a real 68000 */
-#define M68K_EMULATE_PREFETCH       OPT_ON
-
-
-/* If ON, the CPU will generate address error exceptions if it tries to
- * access a word or longword at an odd address.
- * NOTE: This is only emulated properly for 68000 mode.
- */
-#define M68K_EMULATE_ADDRESS_ERROR  OPT_ON
-
-
-/* Turn ON to enable logging of illegal instruction calls.
- * M68K_LOG_FILEHANDLE must be #defined to a stdio file stream.
- * Turn on M68K_LOG_1010_1111 to log all 1010 and 1111 calls.
- */
-#define M68K_LOG_ENABLE             OPT_OFF
-#define M68K_LOG_1010_1111          OPT_OFF
-#define M68K_LOG_FILEHANDLE         some_file_handle
-
-
-/* ----------------------------- COMPATIBILITY ---------------------------- */
-
-/* The following options set optimizations that violate the current ANSI
- * standard, but will be compliant under the forthcoming C9X standard.
- */
-
-
-/* If ON, the enulation core will use 64-bit integers to speed up some
- * operations.
-*/
-#define M68K_USE_64_BIT  OPT_OFF
-
-
-/* Set to your compiler's static inline keyword to enable it, or
- * set it to blank to disable it.
- * If you define INLINE in the makefile, it will override this value.
- * NOTE: not enabling inline functions will SEVERELY slow down emulation.
- */
-#ifndef INLINE
-#define INLINE static __inline__
-#endif /* INLINE */
-
-#endif /* M68K_COMPILE_FOR_MAME */
-
-#include "sim.h"
-
-#define m68k_read_memory_8(A) cpu_read_byte(A)
-#define m68k_read_memory_16(A) cpu_read_word(A)
-#define m68k_read_memory_32(A) cpu_read_long(A)
-
-#define m68k_read_disassembler_16(A) cpu_read_word_dasm(A)
-#define m68k_read_disassembler_32(A) cpu_read_long_dasm(A)
-
-#define m68k_write_memory_8(A, V) cpu_write_byte(A, V)
-#define m68k_write_memory_16(A, V) cpu_write_word(A, V)
-#define m68k_write_memory_32(A, V) cpu_write_long(A, V)
-
-
-/* ======================================================================== */
-/* ============================== END OF FILE ============================= */
-/* ======================================================================== */
-
-#endif /* M68KCONF__HEADER */
+/* ======================================================================== */
+/* ========================= LICENSING & COPYRIGHT ======================== */
+/* ======================================================================== */
+/*
+ *                                  MUSASHI
+ *                                Version 3.32
+ *
+ * A portable Motorola M680x0 processor emulation engine.
+ * Copyright Karl Stenerud.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+
+
+#ifndef M68KCONF__HEADER
+#define M68KCONF__HEADER
+
+
+/* Configuration switches.
+ * Use OPT_SPECIFY_HANDLER for configuration options that allow callbacks.
+ * OPT_SPECIFY_HANDLER causes the core to link directly to the function
+ * or macro you specify, rather than using callback functions whose pointer
+ * must be passed in using m68k_set_xxx_callback().
+ */
+#define OPT_OFF             0
+#define OPT_ON              1
+#define OPT_SPECIFY_HANDLER 2
+
+
+/* ======================================================================== */
+/* ============================== MAME STUFF ============================== */
+/* ======================================================================== */
+
+/* If you're compiling this for MAME, only change M68K_COMPILE_FOR_MAME
+ * to OPT_ON and use m68kmame.h to configure the 68k core.
+ */
+#ifndef M68K_COMPILE_FOR_MAME
+#define M68K_COMPILE_FOR_MAME      OPT_OFF
+#endif /* M68K_COMPILE_FOR_MAME */
+
+
+#if M68K_COMPILE_FOR_MAME == OPT_OFF
+
+
+/* ======================================================================== */
+/* ============================= CONFIGURATION ============================ */
+/* ======================================================================== */
+
+/* Turn ON if you want to use the following M68K variants */
+#define M68K_EMULATE_010            OPT_ON
+#define M68K_EMULATE_EC020          OPT_ON
+#define M68K_EMULATE_020            OPT_ON
+#define M68K_EMULATE_040            OPT_ON
+
+
+/* If ON, the CPU will call m68k_read_immediate_xx() for immediate addressing
+ * and m68k_read_pcrelative_xx() for PC-relative addressing.
+ * If off, all read requests from the CPU will be redirected to m68k_read_xx()
+ */
+#define M68K_SEPARATE_READS         OPT_OFF
+
+/* If ON, the CPU will call m68k_write_32_pd() when it executes move.l with a
+ * predecrement destination EA mode instead of m68k_write_32().
+ * To simulate real 68k behavior, m68k_write_32_pd() must first write the high
+ * word to [address+2], and then write the low word to [address].
+ */
+#define M68K_SIMULATE_PD_WRITES     OPT_OFF
+
+/* If ON, CPU will call the interrupt acknowledge callback when it services an
+ * interrupt.
+ * If off, all interrupts will be autovectored and all interrupt requests will
+ * auto-clear when the interrupt is serviced.
+ */
+#define M68K_EMULATE_INT_ACK        OPT_SPECIFY_HANDLER
+#define M68K_INT_ACK_CALLBACK(A)    cpu_irq_ack(A)
+
+
+/* If ON, CPU will call the breakpoint acknowledge callback when it encounters
+ * a breakpoint instruction and it is running a 68010+.
+ */
+#define M68K_EMULATE_BKPT_ACK       OPT_OFF
+#define M68K_BKPT_ACK_CALLBACK()    your_bkpt_ack_handler_function()
+
+
+/* If ON, the CPU will monitor the trace flags and take trace exceptions
+ */
+#define M68K_EMULATE_TRACE          OPT_OFF
+
+
+/* If ON, CPU will call the output reset callback when it encounters a reset
+ * instruction.
+ */
+#define M68K_EMULATE_RESET          OPT_SPECIFY_HANDLER
+#define M68K_RESET_CALLBACK()       cpu_pulse_reset()
+
+/* If ON, CPU will call the callback when it encounters a cmpi.l #v, dn
+ * instruction.
+ */
+#define M68K_CMPILD_HAS_CALLBACK     OPT_OFF
+#define M68K_CMPILD_CALLBACK(v,r)    your_cmpild_handler_function(v,r)
+
+
+/* If ON, CPU will call the callback when it encounters a rte
+ * instruction.
+ */
+#define M68K_RTE_HAS_CALLBACK       OPT_OFF
+#define M68K_RTE_CALLBACK()         your_rte_handler_function()
+
+/* If ON, CPU will call the callback when it encounters a tas
+ * instruction.
+ */
+#define M68K_TAS_HAS_CALLBACK       OPT_OFF
+#define M68K_TAS_CALLBACK()         your_tas_handler_function()
+
+/* If ON, CPU will call the callback when it encounters an illegal instruction
+ * passing the opcode as argument. If the callback returns 1, then it's considered
+ * as a normal instruction, and the illegal exception in canceled. If it returns 0,
+ * the exception occurs normally.
+ * The callback looks like int callback(int opcode)
+ * You should put OPT_SPECIFY_HANDLER here if you cant to use it, otherwise it will
+ * use a dummy default handler and you'll have to call m68k_set_illg_instr_callback explicitely
+ */
+#define M68K_ILLG_HAS_CALLBACK	    OPT_OFF
+#define M68K_ILLG_CALLBACK(opcode)  op_illg(opcode)
+
+/* If ON, CPU will call the set fc callback on every memory access to
+ * differentiate between user/supervisor, program/data access like a real
+ * 68000 would.  This should be enabled and the callback should be set if you
+ * want to properly emulate the m68010 or higher. (moves uses function codes
+ * to read/write data from different address spaces)
+ */
+#define M68K_EMULATE_FC             OPT_SPECIFY_HANDLER
+#define M68K_SET_FC_CALLBACK(A)     cpu_set_fc(A)
+
+/* If ON, CPU will call the pc changed callback when it changes the PC by a
+ * large value.  This allows host programs to be nicer when it comes to
+ * fetching immediate data and instructions on a banked memory system.
+ */
+#define M68K_MONITOR_PC             OPT_OFF
+#define M68K_SET_PC_CALLBACK(A)     your_pc_changed_handler_function(A)
+
+
+/* If ON, CPU will call the instruction hook callback before every
+ * instruction.
+ */
+#define M68K_INSTRUCTION_HOOK       OPT_SPECIFY_HANDLER
+#define M68K_INSTRUCTION_CALLBACK(pc) cpu_instr_callback(pc)
+
+
+/* If ON, the CPU will emulate the 4-byte prefetch queue of a real 68000 */
+#define M68K_EMULATE_PREFETCH       OPT_ON
+
+
+/* If ON, the CPU will generate address error exceptions if it tries to
+ * access a word or longword at an odd address.
+ * NOTE: This is only emulated properly for 68000 mode.
+ */
+#define M68K_EMULATE_ADDRESS_ERROR  OPT_ON
+
+
+/* Turn ON to enable logging of illegal instruction calls.
+ * M68K_LOG_FILEHANDLE must be #defined to a stdio file stream.
+ * Turn on M68K_LOG_1010_1111 to log all 1010 and 1111 calls.
+ */
+#define M68K_LOG_ENABLE             OPT_OFF
+#define M68K_LOG_1010_1111          OPT_OFF
+#define M68K_LOG_FILEHANDLE         some_file_handle
+
+
+/* ----------------------------- COMPATIBILITY ---------------------------- */
+
+/* The following options set optimizations that violate the current ANSI
+ * standard, but will be compliant under the forthcoming C9X standard.
+ */
+
+
+/* If ON, the enulation core will use 64-bit integers to speed up some
+ * operations.
+*/
+#define M68K_USE_64_BIT  OPT_ON
+
+
+#include "sim.h"
+
+#define m68k_read_memory_8(A) cpu_read_byte(A)
+#define m68k_read_memory_16(A) cpu_read_word(A)
+#define m68k_read_memory_32(A) cpu_read_long(A)
+
+#define m68k_read_disassembler_16(A) cpu_read_word_dasm(A)
+#define m68k_read_disassembler_32(A) cpu_read_long_dasm(A)
+
+#define m68k_write_memory_8(A, V) cpu_write_byte(A, V)
+#define m68k_write_memory_16(A, V) cpu_write_word(A, V)
+#define m68k_write_memory_32(A, V) cpu_write_long(A, V)
+
+
+#endif /* M68K_COMPILE_FOR_MAME */
+
+/* ======================================================================== */
+/* ============================== END OF FILE ============================= */
+/* ======================================================================== */
+
+#endif /* M68KCONF__HEADER */
diff --git a/plat/linux68k/emu/musashi/example/m68kcpu.c b/plat/linux68k/emu/musashi/example/m68kcpu.c
new file mode 120000
index 000000000..b07102e80
--- /dev/null
+++ b/plat/linux68k/emu/musashi/example/m68kcpu.c
@@ -0,0 +1 @@
+../m68kcpu.c
\ No newline at end of file
diff --git a/plat/linux68k/emu/musashi/example/m68kcpu.h b/plat/linux68k/emu/musashi/example/m68kcpu.h
new file mode 120000
index 000000000..bfe2ed2e4
--- /dev/null
+++ b/plat/linux68k/emu/musashi/example/m68kcpu.h
@@ -0,0 +1 @@
+../m68kcpu.h
\ No newline at end of file
diff --git a/plat/linux68k/emu/musashi/example/m68kdasm.c b/plat/linux68k/emu/musashi/example/m68kdasm.c
new file mode 120000
index 000000000..c17dfc31c
--- /dev/null
+++ b/plat/linux68k/emu/musashi/example/m68kdasm.c
@@ -0,0 +1 @@
+../m68kdasm.c
\ No newline at end of file
diff --git a/plat/linux68k/emu/musashi/example/m68kfpu.c b/plat/linux68k/emu/musashi/example/m68kfpu.c
new file mode 120000
index 000000000..ee8a01028
--- /dev/null
+++ b/plat/linux68k/emu/musashi/example/m68kfpu.c
@@ -0,0 +1 @@
+../m68kfpu.c
\ No newline at end of file
diff --git a/plat/linux68k/emu/musashi/example/m68kmake.c b/plat/linux68k/emu/musashi/example/m68kmake.c
new file mode 120000
index 000000000..81816fbc0
--- /dev/null
+++ b/plat/linux68k/emu/musashi/example/m68kmake.c
@@ -0,0 +1 @@
+../m68kmake.c
\ No newline at end of file
diff --git a/plat/linux68k/emu/musashi/example/m68kmmu.h b/plat/linux68k/emu/musashi/example/m68kmmu.h
new file mode 120000
index 000000000..032b33f31
--- /dev/null
+++ b/plat/linux68k/emu/musashi/example/m68kmmu.h
@@ -0,0 +1 @@
+../m68kmmu.h
\ No newline at end of file
diff --git a/plat/linux68k/emu/musashi/example/osd.h b/plat/linux68k/emu/musashi/example/osd.h
index 5825095a3..1ac9e0bd6 100755
--- a/plat/linux68k/emu/musashi/example/osd.h
+++ b/plat/linux68k/emu/musashi/example/osd.h
@@ -1,6 +1,6 @@
-#ifndef HEADER__OSD
-#define HEADER__OSD
-
-int osd_get_char(void);
-
-#endif /* HEADER__OSD */
+#ifndef HEADER__OSD
+#define HEADER__OSD
+
+int osd_get_char(void);
+
+#endif /* HEADER__OSD */
diff --git a/plat/linux68k/emu/musashi/example/osd_dos.c b/plat/linux68k/emu/musashi/example/osd_dos.c
index 38ab69791..46fe3a69c 100755
--- a/plat/linux68k/emu/musashi/example/osd_dos.c
+++ b/plat/linux68k/emu/musashi/example/osd_dos.c
@@ -1,16 +1,16 @@
-#include "osd.h"
-
-/* OS-dependant code to get a character from the user.
- * This function must not block, and must either return an ASCII code or -1.
- */
-#include <conio.h>
-int osd_get_char(void)
-{
-	int ch = -1;
-	if(kbhit())
-	{
-		while(kbhit())
-			ch = getch();
-	}
-	return ch;
-}
+#include "osd.h"
+
+/* OS-dependant code to get a character from the user.
+ * This function must not block, and must either return an ASCII code or -1.
+ */
+#include <conio.h>
+int osd_get_char(void)
+{
+	int ch = -1;
+	if(kbhit())
+	{
+		while(kbhit())
+			ch = getch();
+	}
+	return ch;
+}
diff --git a/plat/linux68k/emu/musashi/example/osd_linux.c b/plat/linux68k/emu/musashi/example/osd_linux.c
new file mode 100644
index 000000000..0fbe789dc
--- /dev/null
+++ b/plat/linux68k/emu/musashi/example/osd_linux.c
@@ -0,0 +1,46 @@
+#include <stdio.h>
+#include <termios.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/time.h>
+
+void changemode(int dir)
+{
+  static struct termios oldt, newt;
+
+  if ( dir == 1 )
+  {
+    tcgetattr( STDIN_FILENO, &oldt);
+    newt = oldt;
+    newt.c_lflag &= ~( ICANON | ECHO );
+    tcsetattr( STDIN_FILENO, TCSANOW, &newt);
+  }
+  else
+    tcsetattr( STDIN_FILENO, TCSANOW, &oldt);
+}
+
+int kbhit (void)
+{
+  struct timeval tv;
+  fd_set rdfs;
+
+  tv.tv_sec = 0;
+  tv.tv_usec = 0;
+
+  FD_ZERO(&rdfs);
+  FD_SET (STDIN_FILENO, &rdfs);
+
+  select(STDIN_FILENO+1, &rdfs, NULL, NULL, &tv);
+  return FD_ISSET(STDIN_FILENO, &rdfs);
+
+}
+
+int osd_get_char() {
+    changemode(1);
+    int ch = -1;
+    while(kbhit())
+	ch = getchar();
+    changemode(0);
+    return ch;
+}
+
diff --git a/plat/linux68k/emu/musashi/example/sim.c b/plat/linux68k/emu/musashi/example/sim.c
index 1e5555cd0..173045ef6 100755
--- a/plat/linux68k/emu/musashi/example/sim.c
+++ b/plat/linux68k/emu/musashi/example/sim.c
@@ -1,561 +1,563 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdarg.h>
-#include <time.h>
-#include "sim.h"
-#include "m68k.h"
-#include "osd.h"
-
-void disassemble_program();
-
-/* Memory-mapped IO ports */
-#define INPUT_ADDRESS 0x800000
-#define OUTPUT_ADDRESS 0x400000
-
-/* IRQ connections */
-#define IRQ_NMI_DEVICE 7
-#define IRQ_INPUT_DEVICE 2
-#define IRQ_OUTPUT_DEVICE 1
-
-/* Time between characters sent to output device (seconds) */
-#define OUTPUT_DEVICE_PERIOD 1
-
-/* ROM and RAM sizes */
-#define MAX_ROM 0xfff
-#define MAX_RAM 0xff
-
-
-/* Read/write macros */
-#define READ_BYTE(BASE, ADDR) (BASE)[ADDR]
-#define READ_WORD(BASE, ADDR) (((BASE)[ADDR]<<8) |			\
-							  (BASE)[(ADDR)+1])
-#define READ_LONG(BASE, ADDR) (((BASE)[ADDR]<<24) |			\
-							  ((BASE)[(ADDR)+1]<<16) |		\
-							  ((BASE)[(ADDR)+2]<<8) |		\
-							  (BASE)[(ADDR)+3])
-
-#define WRITE_BYTE(BASE, ADDR, VAL) (BASE)[ADDR] = (VAL)&0xff
-#define WRITE_WORD(BASE, ADDR, VAL) (BASE)[ADDR] = ((VAL)>>8) & 0xff;		\
-									(BASE)[(ADDR)+1] = (VAL)&0xff
-#define WRITE_LONG(BASE, ADDR, VAL) (BASE)[ADDR] = ((VAL)>>24) & 0xff;		\
-									(BASE)[(ADDR)+1] = ((VAL)>>16)&0xff;	\
-									(BASE)[(ADDR)+2] = ((VAL)>>8)&0xff;		\
-									(BASE)[(ADDR)+3] = (VAL)&0xff
-
-
-/* Prototypes */
-void exit_error(char* fmt, ...);
-
-unsigned int cpu_read_byte(unsigned int address);
-unsigned int cpu_read_word(unsigned int address);
-unsigned int cpu_read_long(unsigned int address);
-void cpu_write_byte(unsigned int address, unsigned int value);
-void cpu_write_word(unsigned int address, unsigned int value);
-void cpu_write_long(unsigned int address, unsigned int value);
-void cpu_pulse_reset(void);
-void cpu_set_fc(unsigned int fc);
-int cpu_irq_ack(int level);
-
-void nmi_device_reset(void);
-void nmi_device_update(void);
-int nmi_device_ack(void);
-
-void input_device_reset(void);
-void input_device_update(void);
-int input_device_ack(void);
-unsigned int input_device_read(void);
-void input_device_write(unsigned int value);
-
-void output_device_reset(void);
-void output_device_update(void);
-int output_device_ack(void);
-unsigned int output_device_read(void);
-void output_device_write(unsigned int value);
-
-void int_controller_set(unsigned int value);
-void int_controller_clear(unsigned int value);
-
-void get_user_input(void);
-
-
-/* Data */
-unsigned int g_quit = 0;                        /* 1 if we want to quit */
-unsigned int g_nmi = 0;                         /* 1 if nmi pending */
-
-int          g_input_device_value = -1;         /* Current value in input device */
-
-unsigned int g_output_device_ready = 0;         /* 1 if output device is ready */
-time_t       g_output_device_last_output;       /* Time of last char output */
-
-unsigned int g_int_controller_pending = 0;      /* list of pending interrupts */
-unsigned int g_int_controller_highest_int = 0;  /* Highest pending interrupt */
-
-unsigned char g_rom[MAX_ROM+1];                 /* ROM */
-unsigned char g_ram[MAX_RAM+1];                 /* RAM */
-unsigned int  g_fc;                             /* Current function code from CPU */
-
-
-/* Exit with an error message.  Use printf syntax. */
-void exit_error(char* fmt, ...)
-{
-	static int guard_val = 0;
-	char buff[100];
-	unsigned int pc;
-	va_list args;
-
-	if(guard_val)
-		return;
-	else
-		guard_val = 1;
-
-	va_start(args, fmt);
-	vfprintf(stderr, fmt, args);
-	va_end(args);
-	fprintf(stderr, "\n");
-	pc = m68k_get_reg(NULL, M68K_REG_PPC);
-	m68k_disassemble(buff, pc, M68K_CPU_TYPE_68000);
-	fprintf(stderr, "At %04x: %s\n", pc, buff);
-
-	exit(EXIT_FAILURE);
-}
-
-
-/* Read data from RAM, ROM, or a device */
-unsigned int cpu_read_byte(unsigned int address)
-{
-	if(g_fc & 2)	/* Program */
-	{
-		if(address > MAX_ROM)
-			exit_error("Attempted to read byte from ROM address %08x", address);
-		return READ_BYTE(g_rom, address);
-	}
-
-	/* Otherwise it's data space */
-	switch(address)
-	{
-		case INPUT_ADDRESS:
-			return input_device_read();
-		case OUTPUT_ADDRESS:
-			return output_device_read();
-		default:
-			break;
-	}
-	if(address > MAX_RAM)
-		exit_error("Attempted to read byte from RAM address %08x", address);
-		return READ_BYTE(g_ram, address);
-}
-
-unsigned int cpu_read_word(unsigned int address)
-{
-	if(g_fc & 2)	/* Program */
-	{
-		if(address > MAX_ROM)
-			exit_error("Attempted to read word from ROM address %08x", address);
-		return READ_WORD(g_rom, address);
-	}
-
-	/* Otherwise it's data space */
-	switch(address)
-	{
-		case INPUT_ADDRESS:
-			return input_device_read();
-		case OUTPUT_ADDRESS:
-			return output_device_read();
-		default:
-			break;
-	}
-	if(address > MAX_RAM)
-		exit_error("Attempted to read word from RAM address %08x", address);
-		return READ_WORD(g_ram, address);
-}
-
-unsigned int cpu_read_long(unsigned int address)
-{
-	if(g_fc & 2)	/* Program */
-	{
-		if(address > MAX_ROM)
-			exit_error("Attempted to read long from ROM address %08x", address);
-		return READ_LONG(g_rom, address);
-	}
-
-	/* Otherwise it's data space */
-	switch(address)
-	{
-		case INPUT_ADDRESS:
-			return input_device_read();
-		case OUTPUT_ADDRESS:
-			return output_device_read();
-		default:
-			break;
-	}
-	if(address > MAX_RAM)
-		exit_error("Attempted to read long from RAM address %08x", address);
-		return READ_LONG(g_ram, address);
-}
-
-
-unsigned int cpu_read_word_dasm(unsigned int address)
-{
-	if(address > MAX_ROM)
-		exit_error("Disassembler attempted to read word from ROM address %08x", address);
-	return READ_WORD(g_rom, address);
-}
-
-unsigned int cpu_read_long_dasm(unsigned int address)
-{
-	if(address > MAX_ROM)
-		exit_error("Dasm attempted to read long from ROM address %08x", address);
-	return READ_LONG(g_rom, address);
-}
-
-
-/* Write data to RAM or a device */
-void cpu_write_byte(unsigned int address, unsigned int value)
-{
-	if(g_fc & 2)	/* Program */
-		exit_error("Attempted to write %02x to ROM address %08x", value&0xff, address);
-
-	/* Otherwise it's data space */
-	switch(address)
-	{
-		case INPUT_ADDRESS:
-			input_device_write(value&0xff);
-			return;
-		case OUTPUT_ADDRESS:
-			output_device_write(value&0xff);
-			return;
-		default:
-			break;
-	}
-	if(address > MAX_RAM)
-		exit_error("Attempted to write %02x to RAM address %08x", value&0xff, address);
-	WRITE_BYTE(g_ram, address, value);
-}
-
-void cpu_write_word(unsigned int address, unsigned int value)
-{
-	if(g_fc & 2)	/* Program */
-		exit_error("Attempted to write %04x to ROM address %08x", value&0xffff, address);
-
-	/* Otherwise it's data space */
-	switch(address)
-	{
-		case INPUT_ADDRESS:
-			input_device_write(value&0xffff);
-			return;
-		case OUTPUT_ADDRESS:
-			output_device_write(value&0xffff);
-			return;
-		default:
-			break;
-	}
-	if(address > MAX_RAM)
-		exit_error("Attempted to write %04x to RAM address %08x", value&0xffff, address);
-	WRITE_WORD(g_ram, address, value);
-}
-
-void cpu_write_long(unsigned int address, unsigned int value)
-{
-	if(g_fc & 2)	/* Program */
-		exit_error("Attempted to write %08x to ROM address %08x", value, address);
-
-	/* Otherwise it's data space */
-	switch(address)
-	{
-		case INPUT_ADDRESS:
-			input_device_write(value);
-			return;
-		case OUTPUT_ADDRESS:
-			output_device_write(value);
-			return;
-		default:
-			break;
-	}
-	if(address > MAX_RAM)
-		exit_error("Attempted to write %08x to RAM address %08x", value, address);
-	WRITE_LONG(g_ram, address, value);
-}
-
-/* Called when the CPU pulses the RESET line */
-void cpu_pulse_reset(void)
-{
-	nmi_device_reset();
-	output_device_reset();
-	input_device_reset();
-}
-
-/* Called when the CPU changes the function code pins */
-void cpu_set_fc(unsigned int fc)
-{
-	g_fc = fc;
-}
-
-/* Called when the CPU acknowledges an interrupt */
-int cpu_irq_ack(int level)
-{
-	switch(level)
-	{
-		case IRQ_NMI_DEVICE:
-			return nmi_device_ack();
-		case IRQ_INPUT_DEVICE:
-			return input_device_ack();
-		case IRQ_OUTPUT_DEVICE:
-			return output_device_ack();
-	}
-	return M68K_INT_ACK_SPURIOUS;
-}
-
-
-
-
-/* Implementation for the NMI device */
-void nmi_device_reset(void)
-{
-	g_nmi = 0;
-}
-
-void nmi_device_update(void)
-{
-	if(g_nmi)
-	{
-		g_nmi = 0;
-		int_controller_set(IRQ_NMI_DEVICE);
-	}
-}
-
-int nmi_device_ack(void)
-{
-	printf("\nNMI\n");fflush(stdout);
-	int_controller_clear(IRQ_NMI_DEVICE);
-	return M68K_INT_ACK_AUTOVECTOR;
-}
-
-
-/* Implementation for the input device */
-void input_device_reset(void)
-{
-	g_input_device_value = -1;
-	int_controller_clear(IRQ_INPUT_DEVICE);
-}
-
-void input_device_update(void)
-{
-	if(g_input_device_value >= 0)
-		int_controller_set(IRQ_INPUT_DEVICE);
-}
-
-int input_device_ack(void)
-{
-	return M68K_INT_ACK_AUTOVECTOR;
-}
-
-unsigned int input_device_read(void)
-{
-	int value = g_input_device_value > 0 ? g_input_device_value : 0;
-	int_controller_clear(IRQ_INPUT_DEVICE);
-	g_input_device_value = -1;
-	return value;
-}
-
-void input_device_write(unsigned int value)
-{
-}
-
-
-/* Implementation for the output device */
-void output_device_reset(void)
-{
-	g_output_device_last_output = time(NULL);
-	g_output_device_ready = 0;
-	int_controller_clear(IRQ_OUTPUT_DEVICE);
-}
-
-void output_device_update(void)
-{
-	if(!g_output_device_ready)
-	{
-		if((time(NULL) - g_output_device_last_output) >= OUTPUT_DEVICE_PERIOD)
-		{
-			g_output_device_ready = 1;
-			int_controller_set(IRQ_OUTPUT_DEVICE);
-		}
-	}
-}
-
-int output_device_ack(void)
-{
-	return M68K_INT_ACK_AUTOVECTOR;
-}
-
-unsigned int output_device_read(void)
-{
-	int_controller_clear(IRQ_OUTPUT_DEVICE);
-	return 0;
-}
-
-void output_device_write(unsigned int value)
-{
-	char ch;
-	if(g_output_device_ready)
-	{
-		ch = value & 0xff;
-		printf("%c", ch);
-		g_output_device_last_output = time(NULL);
-		g_output_device_ready = 0;
-		int_controller_clear(IRQ_OUTPUT_DEVICE);
-	}
-}
-
-
-/* Implementation for the interrupt controller */
-void int_controller_set(unsigned int value)
-{
-	unsigned int old_pending = g_int_controller_pending;
-
-	g_int_controller_pending |= (1<<value);
-
-	if(old_pending != g_int_controller_pending && value > g_int_controller_highest_int)
-	{
-		g_int_controller_highest_int = value;
-		m68k_set_irq(g_int_controller_highest_int);
-	}
-}
-
-void int_controller_clear(unsigned int value)
-{
-	g_int_controller_pending &= ~(1<<value);
-
-	for(g_int_controller_highest_int = 7;g_int_controller_highest_int > 0;g_int_controller_highest_int--)
-		if(g_int_controller_pending & (1<<g_int_controller_highest_int))
-			break;
-
-	m68k_set_irq(g_int_controller_highest_int);
-}
-
-
-/* Parse user input and update any devices that need user input */
-void get_user_input(void)
-{
-	static int last_ch = -1;
-	int ch = osd_get_char();
-
-	if(ch >= 0)
-	{
-		switch(ch)
-		{
-			case 0x1b:
-				g_quit = 1;
-				break;
-			case '~':
-				if(last_ch != ch)
-					g_nmi = 1;
-				break;
-			default:
-				g_input_device_value = ch;
-		}
-	}
-	last_ch = ch;
-}
-
-/* Disassembler */
-void make_hex(char* buff, unsigned int pc, unsigned int length)
-{
-	char* ptr = buff;
-
-	for(;length>0;length -= 2)
-	{
-		sprintf(ptr, "%04x", cpu_read_word_dasm(pc));
-		pc += 2;
-		ptr += 4;
-		if(length > 2)
-			*ptr++ = ' ';
-	}
-}
-
-void disassemble_program()
-{
-	unsigned int pc;
-	unsigned int instr_size;
-	char buff[100];
-	char buff2[100];
-
-	pc = cpu_read_long_dasm(4);
-
-	while(pc <= 0x16e)
-	{
-		instr_size = m68k_disassemble(buff, pc, M68K_CPU_TYPE_68000);
-		make_hex(buff2, pc, instr_size);
-		printf("%03x: %-20s: %s\n", pc, buff2, buff);
-		pc += instr_size;
-	}
-	fflush(stdout);
-}
-
-void cpu_instr_callback()
-{
-/* The following code would print out instructions as they are executed */
-/*
-	static char buff[100];
-	static char buff2[100];
-	static unsigned int pc;
-	static unsigned int instr_size;
-
-	pc = m68k_get_reg(NULL, M68K_REG_PC);
-	instr_size = m68k_disassemble(buff, pc, M68K_CPU_TYPE_68000);
-	make_hex(buff2, pc, instr_size);
-	printf("E %03x: %-20s: %s\n", pc, buff2, buff);
-	fflush(stdout);
-*/
-}
-
-
-
-/* The main loop */
-int main(int argc, char* argv[])
-{
-	FILE* fhandle;
-
-	if(argc != 2)
-	{
-		printf("Usage: sim <program file>\n");
-		exit(-1);
-	}
-
-	if((fhandle = fopen(argv[1], "rb")) == NULL)
-		exit_error("Unable to open %s", argv[1]);
-
-	if(fread(g_rom, 1, MAX_ROM+1, fhandle) <= 0)
-		exit_error("Error reading %s", argv[1]);
-
-//	disassemble_program();
-
-	m68k_init();
-	m68k_set_cpu_type(M68K_CPU_TYPE_68000);
-	m68k_pulse_reset();
-	input_device_reset();
-	output_device_reset();
-	nmi_device_reset();
-
-	g_quit = 0;
-	while(!g_quit)
-	{
-		// Our loop requires some interleaving to allow us to update the
-		// input, output, and nmi devices.
-
-		get_user_input();
-
-		// Values to execute determine the interleave rate.
-		// Smaller values allow for more accurate interleaving with multiple
-		// devices/CPUs but is more processor intensive.
-		// 100000 is usually a good value to start at, then work from there.
-
-		// Note that I am not emulating the correct clock speed!
-		m68k_execute(100000);
-		output_device_update();
-		input_device_update();
-		nmi_device_update();
-	}
-
-	return 0;
-}
-
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <time.h>
+#include "sim.h"
+#include "m68k.h"
+#include "osd.h"
+
+void disassemble_program();
+
+/* Memory-mapped IO ports */
+#define INPUT_ADDRESS 0x800000
+#define OUTPUT_ADDRESS 0x400000
+
+/* IRQ connections */
+#define IRQ_NMI_DEVICE 7
+#define IRQ_INPUT_DEVICE 2
+#define IRQ_OUTPUT_DEVICE 1
+
+/* Time between characters sent to output device (seconds) */
+#define OUTPUT_DEVICE_PERIOD 1
+
+/* ROM and RAM sizes */
+#define MAX_ROM 0xfff
+#define MAX_RAM 0xff
+
+
+/* Read/write macros */
+#define READ_BYTE(BASE, ADDR) (BASE)[ADDR]
+#define READ_WORD(BASE, ADDR) (((BASE)[ADDR]<<8) |			\
+							  (BASE)[(ADDR)+1])
+#define READ_LONG(BASE, ADDR) (((BASE)[ADDR]<<24) |			\
+							  ((BASE)[(ADDR)+1]<<16) |		\
+							  ((BASE)[(ADDR)+2]<<8) |		\
+							  (BASE)[(ADDR)+3])
+
+#define WRITE_BYTE(BASE, ADDR, VAL) (BASE)[ADDR] = (VAL)&0xff
+#define WRITE_WORD(BASE, ADDR, VAL) (BASE)[ADDR] = ((VAL)>>8) & 0xff;		\
+									(BASE)[(ADDR)+1] = (VAL)&0xff
+#define WRITE_LONG(BASE, ADDR, VAL) (BASE)[ADDR] = ((VAL)>>24) & 0xff;		\
+									(BASE)[(ADDR)+1] = ((VAL)>>16)&0xff;	\
+									(BASE)[(ADDR)+2] = ((VAL)>>8)&0xff;		\
+									(BASE)[(ADDR)+3] = (VAL)&0xff
+
+
+/* Prototypes */
+void exit_error(char* fmt, ...);
+
+unsigned int cpu_read_byte(unsigned int address);
+unsigned int cpu_read_word(unsigned int address);
+unsigned int cpu_read_long(unsigned int address);
+void cpu_write_byte(unsigned int address, unsigned int value);
+void cpu_write_word(unsigned int address, unsigned int value);
+void cpu_write_long(unsigned int address, unsigned int value);
+void cpu_pulse_reset(void);
+void cpu_set_fc(unsigned int fc);
+int cpu_irq_ack(int level);
+
+void nmi_device_reset(void);
+void nmi_device_update(void);
+int nmi_device_ack(void);
+
+void input_device_reset(void);
+void input_device_update(void);
+int input_device_ack(void);
+unsigned int input_device_read(void);
+void input_device_write(unsigned int value);
+
+void output_device_reset(void);
+void output_device_update(void);
+int output_device_ack(void);
+unsigned int output_device_read(void);
+void output_device_write(unsigned int value);
+
+void int_controller_set(unsigned int value);
+void int_controller_clear(unsigned int value);
+
+void get_user_input(void);
+
+
+/* Data */
+unsigned int g_quit = 0;                        /* 1 if we want to quit */
+unsigned int g_nmi = 0;                         /* 1 if nmi pending */
+
+int          g_input_device_value = -1;         /* Current value in input device */
+
+unsigned int g_output_device_ready = 0;         /* 1 if output device is ready */
+time_t       g_output_device_last_output;       /* Time of last char output */
+
+unsigned int g_int_controller_pending = 0;      /* list of pending interrupts */
+unsigned int g_int_controller_highest_int = 0;  /* Highest pending interrupt */
+
+unsigned char g_rom[MAX_ROM+1];                 /* ROM */
+unsigned char g_ram[MAX_RAM+1];                 /* RAM */
+unsigned int  g_fc;                             /* Current function code from CPU */
+
+
+/* Exit with an error message.  Use printf syntax. */
+void exit_error(char* fmt, ...)
+{
+	static int guard_val = 0;
+	char buff[100];
+	unsigned int pc;
+	va_list args;
+
+	if(guard_val)
+		return;
+	else
+		guard_val = 1;
+
+	va_start(args, fmt);
+	vfprintf(stderr, fmt, args);
+	va_end(args);
+	fprintf(stderr, "\n");
+	pc = m68k_get_reg(NULL, M68K_REG_PPC);
+	m68k_disassemble(buff, pc, M68K_CPU_TYPE_68000);
+	fprintf(stderr, "At %04x: %s\n", pc, buff);
+
+	exit(EXIT_FAILURE);
+}
+
+
+/* Read data from RAM, ROM, or a device */
+unsigned int cpu_read_byte(unsigned int address)
+{
+	if(g_fc & 2)	/* Program */
+	{
+		if(address > MAX_ROM)
+			exit_error("Attempted to read byte from ROM address %08x", address);
+		return READ_BYTE(g_rom, address);
+	}
+
+	/* Otherwise it's data space */
+	switch(address)
+	{
+		case INPUT_ADDRESS:
+			return input_device_read();
+		case OUTPUT_ADDRESS:
+			return output_device_read();
+		default:
+			break;
+	}
+	if(address > MAX_RAM)
+		exit_error("Attempted to read byte from RAM address %08x", address);
+	return READ_BYTE(g_ram, address);
+}
+
+unsigned int cpu_read_word(unsigned int address)
+{
+	if(g_fc & 2)	/* Program */
+	{
+		if(address > MAX_ROM)
+			exit_error("Attempted to read word from ROM address %08x", address);
+		return READ_WORD(g_rom, address);
+	}
+
+	/* Otherwise it's data space */
+	switch(address)
+	{
+		case INPUT_ADDRESS:
+			return input_device_read();
+		case OUTPUT_ADDRESS:
+			return output_device_read();
+		default:
+			break;
+	}
+	if(address > MAX_RAM)
+		exit_error("Attempted to read word from RAM address %08x", address);
+	return READ_WORD(g_ram, address);
+}
+
+unsigned int cpu_read_long(unsigned int address)
+{
+	if(g_fc & 2)	/* Program */
+	{
+		if(address > MAX_ROM)
+			exit_error("Attempted to read long from ROM address %08x", address);
+		return READ_LONG(g_rom, address);
+	}
+
+	/* Otherwise it's data space */
+	switch(address)
+	{
+		case INPUT_ADDRESS:
+			return input_device_read();
+		case OUTPUT_ADDRESS:
+			return output_device_read();
+		default:
+			break;
+	}
+	if(address > MAX_RAM)
+		exit_error("Attempted to read long from RAM address %08x", address);
+	return READ_LONG(g_ram, address);
+}
+
+
+unsigned int cpu_read_word_dasm(unsigned int address)
+{
+	if(address > MAX_ROM)
+		exit_error("Disassembler attempted to read word from ROM address %08x", address);
+	return READ_WORD(g_rom, address);
+}
+
+unsigned int cpu_read_long_dasm(unsigned int address)
+{
+	if(address > MAX_ROM)
+		exit_error("Dasm attempted to read long from ROM address %08x", address);
+	return READ_LONG(g_rom, address);
+}
+
+
+/* Write data to RAM or a device */
+void cpu_write_byte(unsigned int address, unsigned int value)
+{
+	if(g_fc & 2)	/* Program */
+		exit_error("Attempted to write %02x to ROM address %08x", value&0xff, address);
+
+	/* Otherwise it's data space */
+	switch(address)
+	{
+		case INPUT_ADDRESS:
+			input_device_write(value&0xff);
+			return;
+		case OUTPUT_ADDRESS:
+			output_device_write(value&0xff);
+			return;
+		default:
+			break;
+	}
+	if(address > MAX_RAM)
+		exit_error("Attempted to write %02x to RAM address %08x", value&0xff, address);
+	WRITE_BYTE(g_ram, address, value);
+}
+
+void cpu_write_word(unsigned int address, unsigned int value)
+{
+	if(g_fc & 2)	/* Program */
+		exit_error("Attempted to write %04x to ROM address %08x", value&0xffff, address);
+
+	/* Otherwise it's data space */
+	switch(address)
+	{
+		case INPUT_ADDRESS:
+			input_device_write(value&0xffff);
+			return;
+		case OUTPUT_ADDRESS:
+			output_device_write(value&0xffff);
+			return;
+		default:
+			break;
+	}
+	if(address > MAX_RAM)
+		exit_error("Attempted to write %04x to RAM address %08x", value&0xffff, address);
+	WRITE_WORD(g_ram, address, value);
+}
+
+void cpu_write_long(unsigned int address, unsigned int value)
+{
+	if(g_fc & 2)	/* Program */
+		exit_error("Attempted to write %08x to ROM address %08x", value, address);
+
+	/* Otherwise it's data space */
+	switch(address)
+	{
+		case INPUT_ADDRESS:
+			input_device_write(value);
+			return;
+		case OUTPUT_ADDRESS:
+			output_device_write(value);
+			return;
+		default:
+			break;
+	}
+	if(address > MAX_RAM)
+		exit_error("Attempted to write %08x to RAM address %08x", value, address);
+	WRITE_LONG(g_ram, address, value);
+}
+
+/* Called when the CPU pulses the RESET line */
+void cpu_pulse_reset(void)
+{
+	nmi_device_reset();
+	output_device_reset();
+	input_device_reset();
+}
+
+/* Called when the CPU changes the function code pins */
+void cpu_set_fc(unsigned int fc)
+{
+	g_fc = fc;
+}
+
+/* Called when the CPU acknowledges an interrupt */
+int cpu_irq_ack(int level)
+{
+	switch(level)
+	{
+		case IRQ_NMI_DEVICE:
+			return nmi_device_ack();
+		case IRQ_INPUT_DEVICE:
+			return input_device_ack();
+		case IRQ_OUTPUT_DEVICE:
+			return output_device_ack();
+	}
+	return M68K_INT_ACK_SPURIOUS;
+}
+
+
+
+
+/* Implementation for the NMI device */
+void nmi_device_reset(void)
+{
+	g_nmi = 0;
+}
+
+void nmi_device_update(void)
+{
+	if(g_nmi)
+	{
+		g_nmi = 0;
+		int_controller_set(IRQ_NMI_DEVICE);
+	}
+}
+
+int nmi_device_ack(void)
+{
+	printf("\nNMI\n");fflush(stdout);
+	int_controller_clear(IRQ_NMI_DEVICE);
+	return M68K_INT_ACK_AUTOVECTOR;
+}
+
+
+/* Implementation for the input device */
+void input_device_reset(void)
+{
+	g_input_device_value = -1;
+	int_controller_clear(IRQ_INPUT_DEVICE);
+}
+
+void input_device_update(void)
+{
+	if(g_input_device_value >= 0)
+		int_controller_set(IRQ_INPUT_DEVICE);
+}
+
+int input_device_ack(void)
+{
+	return M68K_INT_ACK_AUTOVECTOR;
+}
+
+unsigned int input_device_read(void)
+{
+	int value = g_input_device_value > 0 ? g_input_device_value : 0;
+	int_controller_clear(IRQ_INPUT_DEVICE);
+	g_input_device_value = -1;
+	return value;
+}
+
+void input_device_write(unsigned int value)
+{
+	(void)value;
+}
+
+
+/* Implementation for the output device */
+void output_device_reset(void)
+{
+	g_output_device_last_output = time(NULL);
+	g_output_device_ready = 0;
+	int_controller_clear(IRQ_OUTPUT_DEVICE);
+}
+
+void output_device_update(void)
+{
+	if(!g_output_device_ready)
+	{
+		if((time(NULL) - g_output_device_last_output) >= OUTPUT_DEVICE_PERIOD)
+		{
+			g_output_device_ready = 1;
+			int_controller_set(IRQ_OUTPUT_DEVICE);
+		}
+	}
+}
+
+int output_device_ack(void)
+{
+	return M68K_INT_ACK_AUTOVECTOR;
+}
+
+unsigned int output_device_read(void)
+{
+	int_controller_clear(IRQ_OUTPUT_DEVICE);
+	return 0;
+}
+
+void output_device_write(unsigned int value)
+{
+	char ch;
+	if(g_output_device_ready)
+	{
+		ch = value & 0xff;
+		printf("%c", ch);
+		g_output_device_last_output = time(NULL);
+		g_output_device_ready = 0;
+		int_controller_clear(IRQ_OUTPUT_DEVICE);
+	}
+}
+
+
+/* Implementation for the interrupt controller */
+void int_controller_set(unsigned int value)
+{
+	unsigned int old_pending = g_int_controller_pending;
+
+	g_int_controller_pending |= (1<<value);
+
+	if(old_pending != g_int_controller_pending && value > g_int_controller_highest_int)
+	{
+		g_int_controller_highest_int = value;
+		m68k_set_irq(g_int_controller_highest_int);
+	}
+}
+
+void int_controller_clear(unsigned int value)
+{
+	g_int_controller_pending &= ~(1<<value);
+
+	for(g_int_controller_highest_int = 7;g_int_controller_highest_int > 0;g_int_controller_highest_int--)
+		if(g_int_controller_pending & (1<<g_int_controller_highest_int))
+			break;
+
+	m68k_set_irq(g_int_controller_highest_int);
+}
+
+
+/* Parse user input and update any devices that need user input */
+void get_user_input(void)
+{
+	static int last_ch = -1;
+	int ch = osd_get_char();
+
+	if(ch >= 0)
+	{
+		switch(ch)
+		{
+			case 0x1b:
+				g_quit = 1;
+				break;
+			case '~':
+				if(last_ch != ch)
+					g_nmi = 1;
+				break;
+			default:
+				g_input_device_value = ch;
+		}
+	}
+	last_ch = ch;
+}
+
+/* Disassembler */
+void make_hex(char* buff, unsigned int pc, unsigned int length)
+{
+	char* ptr = buff;
+
+	for(;length>0;length -= 2)
+	{
+		sprintf(ptr, "%04x", cpu_read_word_dasm(pc));
+		pc += 2;
+		ptr += 4;
+		if(length > 2)
+			*ptr++ = ' ';
+	}
+}
+
+void disassemble_program()
+{
+	unsigned int pc;
+	unsigned int instr_size;
+	char buff[100];
+	char buff2[100];
+
+	pc = cpu_read_long_dasm(4);
+
+	while(pc <= 0x16e)
+	{
+		instr_size = m68k_disassemble(buff, pc, M68K_CPU_TYPE_68000);
+		make_hex(buff2, pc, instr_size);
+		printf("%03x: %-20s: %s\n", pc, buff2, buff);
+		pc += instr_size;
+	}
+	fflush(stdout);
+}
+
+void cpu_instr_callback(int pc)
+{
+	(void)pc;
+/* The following code would print out instructions as they are executed */
+/*
+	static char buff[100];
+	static char buff2[100];
+	static unsigned int pc;
+	static unsigned int instr_size;
+
+	pc = m68k_get_reg(NULL, M68K_REG_PC);
+	instr_size = m68k_disassemble(buff, pc, M68K_CPU_TYPE_68000);
+	make_hex(buff2, pc, instr_size);
+	printf("E %03x: %-20s: %s\n", pc, buff2, buff);
+	fflush(stdout);
+*/
+}
+
+
+
+/* The main loop */
+int main(int argc, char* argv[])
+{
+	FILE* fhandle;
+
+	if(argc != 2)
+	{
+		printf("Usage: sim <program file>\n");
+		exit(-1);
+	}
+
+	if((fhandle = fopen(argv[1], "rb")) == NULL)
+		exit_error("Unable to open %s", argv[1]);
+
+	if(fread(g_rom, 1, MAX_ROM+1, fhandle) <= 0)
+		exit_error("Error reading %s", argv[1]);
+
+//	disassemble_program();
+
+	m68k_init();
+	m68k_set_cpu_type(M68K_CPU_TYPE_68000);
+	m68k_pulse_reset();
+	input_device_reset();
+	output_device_reset();
+	nmi_device_reset();
+
+	g_quit = 0;
+	while(!g_quit)
+	{
+		// Our loop requires some interleaving to allow us to update the
+		// input, output, and nmi devices.
+
+		get_user_input();
+
+		// Values to execute determine the interleave rate.
+		// Smaller values allow for more accurate interleaving with multiple
+		// devices/CPUs but is more processor intensive.
+		// 100000 is usually a good value to start at, then work from there.
+
+		// Note that I am not emulating the correct clock speed!
+		m68k_execute(100000);
+		output_device_update();
+		input_device_update();
+		nmi_device_update();
+	}
+
+	return 0;
+}
+
diff --git a/plat/linux68k/emu/musashi/example/sim.h b/plat/linux68k/emu/musashi/example/sim.h
index 75db0cba1..e778006b4 100755
--- a/plat/linux68k/emu/musashi/example/sim.h
+++ b/plat/linux68k/emu/musashi/example/sim.h
@@ -1,15 +1,15 @@
-#ifndef SIM__HEADER
-#define SIM__HEADER
-
-unsigned int cpu_read_byte(unsigned int address);
-unsigned int cpu_read_word(unsigned int address);
-unsigned int cpu_read_long(unsigned int address);
-void cpu_write_byte(unsigned int address, unsigned int value);
-void cpu_write_word(unsigned int address, unsigned int value);
-void cpu_write_long(unsigned int address, unsigned int value);
-void cpu_pulse_reset(void);
-void cpu_set_fc(unsigned int fc);
-int  cpu_irq_ack(int level);
-void cpu_instr_callback();
-
-#endif /* SIM__HEADER */
+#ifndef SIM__HEADER
+#define SIM__HEADER
+
+unsigned int cpu_read_byte(unsigned int address);
+unsigned int cpu_read_word(unsigned int address);
+unsigned int cpu_read_long(unsigned int address);
+void cpu_write_byte(unsigned int address, unsigned int value);
+void cpu_write_word(unsigned int address, unsigned int value);
+void cpu_write_long(unsigned int address, unsigned int value);
+void cpu_pulse_reset(void);
+void cpu_set_fc(unsigned int fc);
+int  cpu_irq_ack(int level);
+void cpu_instr_callback(int pc);
+
+#endif /* SIM__HEADER */
diff --git a/plat/linux68k/emu/musashi/m68k.h b/plat/linux68k/emu/musashi/m68k.h
index e2f0b273f..555b314f0 100755
--- a/plat/linux68k/emu/musashi/m68k.h
+++ b/plat/linux68k/emu/musashi/m68k.h
@@ -3,10 +3,10 @@
 /* ======================================================================== */
 /*
  *                                  MUSASHI
- *                                Version 3.4
+ *                                Version 3.32
  *
  * A portable Motorola M680x0 processor emulation engine.
- * Copyright 1998-2001 Karl Stenerud.  All rights reserved.
+ * Copyright Karl Stenerud.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -30,14 +30,29 @@
 #ifndef M68K__HEADER
 #define M68K__HEADER
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef ARRAY_LENGTH
+#define ARRAY_LENGTH(x)         (sizeof(x) / sizeof(x[0]))
+#endif
+
+#ifndef FALSE
+#define FALSE 0
+#define TRUE 1
+#endif
 
 /* ======================================================================== */
 /* ============================= CONFIGURATION ============================ */
 /* ======================================================================== */
 
 /* Import the configuration for this build */
+#ifdef MUSASHI_CNF
+#include MUSASHI_CNF
+#else
 #include "m68kconf.h"
-
+#endif
 
 /* ======================================================================== */
 /* ============================ GENERAL DEFINES =========================== */
@@ -83,8 +98,12 @@ enum
 	M68K_CPU_TYPE_68010,
 	M68K_CPU_TYPE_68EC020,
 	M68K_CPU_TYPE_68020,
-	M68K_CPU_TYPE_68030,	/* Supported by disassembler ONLY */
-	M68K_CPU_TYPE_68040		/* Supported by disassembler ONLY */
+	M68K_CPU_TYPE_68EC030,
+	M68K_CPU_TYPE_68030,
+	M68K_CPU_TYPE_68EC040,
+	M68K_CPU_TYPE_68LC040,
+	M68K_CPU_TYPE_68040,
+	M68K_CPU_TYPE_SCC68070
 };
 
 /* Registers used by m68k_get_reg() and m68k_set_reg() */
@@ -241,6 +260,20 @@ void m68k_set_reset_instr_callback(void  (*callback)(void));
  */
 void m68k_set_pc_changed_callback(void  (*callback)(unsigned int new_pc));
 
+/* Set the callback for the TAS instruction.
+ * You must enable M68K_TAS_HAS_CALLBACK in m68kconf.h.
+ * The CPU calls this callback every time it encounters a TAS instruction.
+ * Default behavior: return 1, allow writeback.
+ */
+void m68k_set_tas_instr_callback(int  (*callback)(void));
+
+/* Set the callback for illegal instructions.
+ * You must enable M68K_ILLG_HAS_CALLBACK in m68kconf.h.
+ * The CPU calls this callback every time it encounters an illegal instruction
+ * which must return 1 if it handles the instruction normally or 0 if it's really an illegal instruction.
+ * Default behavior: return 0, exception will occur.
+ */
+void m68k_set_illg_instr_callback(int  (*callback)(int));
 
 /* Set the callback for CPU function code changes.
  * You must enable M68K_EMULATE_FC in m68kconf.h.
@@ -258,7 +291,7 @@ void m68k_set_fc_callback(void  (*callback)(unsigned int new_fc));
  * instruction cycle.
  * Default behavior: do nothing.
  */
-void m68k_set_instr_hook_callback(void  (*callback)(void));
+void m68k_set_instr_hook_callback(void  (*callback)(unsigned int pc));
 
 
@@ -304,11 +337,21 @@ void m68k_end_timeslice(void);          /* End timeslice now */
  */
 void m68k_set_irq(unsigned int int_level);
 
+/* Set the virtual irq lines, where the highest level
+ * active line is automatically selected.  If you use this function,
+ * do not use m68k_set_irq.
+ */
+void m68k_set_virq(unsigned int level, unsigned int active);
+unsigned int m68k_get_virq(unsigned int level);
 
 /* Halt the CPU as if you pulsed the HALT pin. */
 void m68k_pulse_halt(void);
 
 
+/* Trigger a bus error exception */
+void m68k_pulse_bus_error(void);
+
+
 /* Context switching to allow multiple CPUs */
 
 /* Get the size of the cpu context in bytes */
@@ -321,7 +364,7 @@ unsigned int m68k_get_context(void* dst);
 void m68k_set_context(void* dst);
 
 /* Register the CPU state information */
-void m68k_state_register(const char *type);
+void m68k_state_register(const char *type, int index);
 
 
 /* Peek at the internals of a CPU context.  This can either be a context
@@ -341,6 +384,11 @@ unsigned int m68k_is_valid_instruction(unsigned int instruction, unsigned int cp
  */
 unsigned int m68k_disassemble(char* str_buff, unsigned int pc, unsigned int cpu_type);
 
+/* Same as above but accepts raw opcode data directly rather than fetching
+ * via the read/write interfaces.
+ */
+unsigned int m68k_disassemble_raw(char* str_buff, unsigned int pc, const unsigned char* opdata, const unsigned char* argdata, unsigned int cpu_type);
+
 
 /* ======================================================================== */
 /* ============================== MAME STUFF ============================== */
@@ -355,4 +403,9 @@ unsigned int m68k_disassemble(char* str_buff, unsigned int pc, unsigned int cpu_
 /* ============================== END OF FILE ============================= */
 /* ======================================================================== */
 
+#ifdef __cplusplus
+}
+#endif
+
+
 #endif /* M68K__HEADER */
diff --git a/plat/linux68k/emu/musashi/m68k_in.c b/plat/linux68k/emu/musashi/m68k_in.c
index bc4217f5d..6e2831f04 100755
--- a/plat/linux68k/emu/musashi/m68k_in.c
+++ b/plat/linux68k/emu/musashi/m68k_in.c
@@ -8,10 +8,10 @@ must fix:
 /* ======================================================================== */
 /*
  *                                  MUSASHI
- *                                Version 3.4
+ *                                Version 3.32
  *
  * A portable Motorola M680x0 processor emulation engine.
- * Copyright 1998-2001 Karl Stenerud.  All rights reserved.
+ * Copyright Karl Stenerud.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -131,9 +131,10 @@ M68KMAKE_TABLE_HEADER
 /* ========================= OPCODE TABLE BUILDER ========================= */
 /* ======================================================================== */
 
+#include <stdio.h>
 #include "m68kops.h"
 
-#define NUM_CPU_TYPES 3
+#define NUM_CPU_TYPES 5
 
 void  (*m68ki_instruction_jump_table[0x10000])(void); /* opcode handler jump table */
 unsigned char m68ki_cycles[NUM_CPU_TYPES][0x10000]; /* Cycles used by CPU type */
@@ -149,23 +150,23 @@ typedef struct
 
 
 /* Opcode handler table */
-static opcode_handler_struct m68k_opcode_handler_table[] =
+static const opcode_handler_struct m68k_opcode_handler_table[] =
 {
-/*   function                      mask    match    000  010  020 */
+/*   function                      mask    match    000  010  020  040 */
 
 
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 M68KMAKE_TABLE_FOOTER
 
-	{0, 0, 0, {0, 0, 0}}
+	{0, 0, 0, {0, 0, 0, 0, 0}}
 };
 
 
 /* Build the opcode handler jump table */
 void m68ki_build_opcode_table(void)
 {
-	opcode_handler_struct *ostruct;
+	const opcode_handler_struct *ostruct;
 	int cycle_cost;
 	int instr;
 	int i;
@@ -278,7 +279,11 @@ void m68ki_build_opcode_table(void)
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 M68KMAKE_OPCODE_HANDLER_HEADER
 
+#include <stdio.h>
 #include "m68kcpu.h"
+extern void m68040_fpu_op0(void);
+extern void m68040_fpu_op1(void);
+extern void m68881_mmu_ops();
 
 /* ======================================================================== */
 /* ========================= INSTRUCTION HANDLERS ========================= */
@@ -368,523 +373,528 @@ cpu cycles:  Base number of cycles required to execute this opcode on the
 
 
-              spec  spec                    allowed ea  mode   cpu cycles
-name    size  proc   ea   bit pattern       A+-DXWLdxI  0 1 2  000 010 020  comments
-======  ====  ====  ====  ================  ==========  = = =  === === ===  =============
+              spec  spec                    allowed ea  mode       cpu cycles
+name    size  proc   ea   bit pattern       A+-DXWLdxI  0 1 2 3 4  000 010 020 030 040  comments
+======  ====  ====  ====  ================  ==========  = = = = =  === === === === === =============
 M68KMAKE_TABLE_START
-1010       0  .     .     1010............  ..........  U U U    4   4   4
-1111       0  .     .     1111............  ..........  U U U    4   4   4
-abcd       8  rr    .     1100...100000...  ..........  U U U    6   6   4
-abcd       8  mm    ax7   1100111100001...  ..........  U U U   18  18  16
-abcd       8  mm    ay7   1100...100001111  ..........  U U U   18  18  16
-abcd       8  mm    axy7  1100111100001111  ..........  U U U   18  18  16
-abcd       8  mm    .     1100...100001...  ..........  U U U   18  18  16
-add        8  er    d     1101...000000...  ..........  U U U    4   4   2
-add        8  er    .     1101...000......  A+-DXWLdxI  U U U    4   4   2
-add       16  er    d     1101...001000...  ..........  U U U    4   4   2
-add       16  er    a     1101...001001...  ..........  U U U    4   4   2
-add       16  er    .     1101...001......  A+-DXWLdxI  U U U    4   4   2
-add       32  er    d     1101...010000...  ..........  U U U    6   6   2
-add       32  er    a     1101...010001...  ..........  U U U    6   6   2
-add       32  er    .     1101...010......  A+-DXWLdxI  U U U    6   6   2
-add        8  re    .     1101...100......  A+-DXWL...  U U U    8   8   4
-add       16  re    .     1101...101......  A+-DXWL...  U U U    8   8   4
-add       32  re    .     1101...110......  A+-DXWL...  U U U   12  12   4
-adda      16  .     d     1101...011000...  ..........  U U U    8   8   2
-adda      16  .     a     1101...011001...  ..........  U U U    8   8   2
-adda      16  .     .     1101...011......  A+-DXWLdxI  U U U    8   8   2
-adda      32  .     d     1101...111000...  ..........  U U U    6   6   2
-adda      32  .     a     1101...111001...  ..........  U U U    6   6   2
-adda      32  .     .     1101...111......  A+-DXWLdxI  U U U    6   6   2
-addi       8  .     d     0000011000000...  ..........  U U U    8   8   2
-addi       8  .     .     0000011000......  A+-DXWL...  U U U   12  12   4
-addi      16  .     d     0000011001000...  ..........  U U U    8   8   2
-addi      16  .     .     0000011001......  A+-DXWL...  U U U   12  12   4
-addi      32  .     d     0000011010000...  ..........  U U U   16  14   2
-addi      32  .     .     0000011010......  A+-DXWL...  U U U   20  20   4
-addq       8  .     d     0101...000000...  ..........  U U U    4   4   2
-addq       8  .     .     0101...000......  A+-DXWL...  U U U    8   8   4
-addq      16  .     d     0101...001000...  ..........  U U U    4   4   2
-addq      16  .     a     0101...001001...  ..........  U U U    4   4   2
-addq      16  .     .     0101...001......  A+-DXWL...  U U U    8   8   4
-addq      32  .     d     0101...010000...  ..........  U U U    8   8   2
-addq      32  .     a     0101...010001...  ..........  U U U    8   8   2
-addq      32  .     .     0101...010......  A+-DXWL...  U U U   12  12   4
-addx       8  rr    .     1101...100000...  ..........  U U U    4   4   2
-addx      16  rr    .     1101...101000...  ..........  U U U    4   4   2
-addx      32  rr    .     1101...110000...  ..........  U U U    8   6   2
-addx       8  mm    ax7   1101111100001...  ..........  U U U   18  18  12
-addx       8  mm    ay7   1101...100001111  ..........  U U U   18  18  12
-addx       8  mm    axy7  1101111100001111  ..........  U U U   18  18  12
-addx       8  mm    .     1101...100001...  ..........  U U U   18  18  12
-addx      16  mm    .     1101...101001...  ..........  U U U   18  18  12
-addx      32  mm    .     1101...110001...  ..........  U U U   30  30  12
-and        8  er    d     1100...000000...  ..........  U U U    4   4   2
-and        8  er    .     1100...000......  A+-DXWLdxI  U U U    4   4   2
-and       16  er    d     1100...001000...  ..........  U U U    4   4   2
-and       16  er    .     1100...001......  A+-DXWLdxI  U U U    4   4   2
-and       32  er    d     1100...010000...  ..........  U U U    6   6   2
-and       32  er    .     1100...010......  A+-DXWLdxI  U U U    6   6   2
-and        8  re    .     1100...100......  A+-DXWL...  U U U    8   8   4
-and       16  re    .     1100...101......  A+-DXWL...  U U U    8   8   4
-and       32  re    .     1100...110......  A+-DXWL...  U U U   12  12   4
-andi       8  toc   .     0000001000111100  ..........  U U U   20  16  12
-andi      16  tos   .     0000001001111100  ..........  S S S   20  16  12
-andi       8  .     d     0000001000000...  ..........  U U U    8   8   2
-andi       8  .     .     0000001000......  A+-DXWL...  U U U   12  12   4
-andi      16  .     d     0000001001000...  ..........  U U U    8   8   2
-andi      16  .     .     0000001001......  A+-DXWL...  U U U   12  12   4
-andi      32  .     d     0000001010000...  ..........  U U U   14  14   2
-andi      32  .     .     0000001010......  A+-DXWL...  U U U   20  20   4
-asr        8  s     .     1110...000000...  ..........  U U U    6   6   6
-asr       16  s     .     1110...001000...  ..........  U U U    6   6   6
-asr       32  s     .     1110...010000...  ..........  U U U    8   8   6
-asr        8  r     .     1110...000100...  ..........  U U U    6   6   6
-asr       16  r     .     1110...001100...  ..........  U U U    6   6   6
-asr       32  r     .     1110...010100...  ..........  U U U    8   8   6
-asr       16  .     .     1110000011......  A+-DXWL...  U U U    8   8   5
-asl        8  s     .     1110...100000...  ..........  U U U    6   6   8
-asl       16  s     .     1110...101000...  ..........  U U U    6   6   8
-asl       32  s     .     1110...110000...  ..........  U U U    8   8   8
-asl        8  r     .     1110...100100...  ..........  U U U    6   6   8
-asl       16  r     .     1110...101100...  ..........  U U U    6   6   8
-asl       32  r     .     1110...110100...  ..........  U U U    8   8   8
-asl       16  .     .     1110000111......  A+-DXWL...  U U U    8   8   6
-bcc        8  .     .     0110............  ..........  U U U   10  10   6
-bcc       16  .     .     0110....00000000  ..........  U U U   10  10   6
-bcc       32  .     .     0110....11111111  ..........  . . U    .   .   6
-bchg       8  r     .     0000...101......  A+-DXWL...  U U U    8   8   4
-bchg      32  r     d     0000...101000...  ..........  U U U    8   8   4
-bchg       8  s     .     0000100001......  A+-DXWL...  U U U   12  12   4
-bchg      32  s     d     0000100001000...  ..........  U U U   12  12   4
-bclr       8  r     .     0000...110......  A+-DXWL...  U U U    8  10   4
-bclr      32  r     d     0000...110000...  ..........  U U U   10  10   4
-bclr       8  s     .     0000100010......  A+-DXWL...  U U U   12  12   4
-bclr      32  s     d     0000100010000...  ..........  U U U   14  14   4
-bfchg     32  .     d     1110101011000...  ..........  . . U    .   .  12  timing not quite correct
-bfchg     32  .     .     1110101011......  A..DXWL...  . . U    .   .  20
-bfclr     32  .     d     1110110011000...  ..........  . . U    .   .  12
-bfclr     32  .     .     1110110011......  A..DXWL...  . . U    .   .  20
-bfexts    32  .     d     1110101111000...  ..........  . . U    .   .   8
-bfexts    32  .     .     1110101111......  A..DXWLdx.  . . U    .   .  15
-bfextu    32  .     d     1110100111000...  ..........  . . U    .   .   8
-bfextu    32  .     .     1110100111......  A..DXWLdx.  . . U    .   .  15
-bfffo     32  .     d     1110110111000...  ..........  . . U    .   .  18
-bfffo     32  .     .     1110110111......  A..DXWLdx.  . . U    .   .  28
-bfins     32  .     d     1110111111000...  ..........  . . U    .   .  10
-bfins     32  .     .     1110111111......  A..DXWL...  . . U    .   .  17
-bfset     32  .     d     1110111011000...  ..........  . . U    .   .  12
-bfset     32  .     .     1110111011......  A..DXWL...  . . U    .   .  20
-bftst     32  .     d     1110100011000...  ..........  . . U    .   .   6
-bftst     32  .     .     1110100011......  A..DXWLdx.  . . U    .   .  13
-bkpt       0  .     .     0100100001001...  ..........  . U U    .  10  10
-bra        8  .     .     01100000........  ..........  U U U   10  10  10
-bra       16  .     .     0110000000000000  ..........  U U U   10  10  10
-bra       32  .     .     0110000011111111  ..........  U U U    .   .  10
-bset      32  r     d     0000...111000...  ..........  U U U    8   8   4
-bset       8  r     .     0000...111......  A+-DXWL...  U U U    8   8   4
-bset       8  s     .     0000100011......  A+-DXWL...  U U U   12  12   4
-bset      32  s     d     0000100011000...  ..........  U U U   12  12   4
-bsr        8  .     .     01100001........  ..........  U U U   18  18   7
-bsr       16  .     .     0110000100000000  ..........  U U U   18  18   7
-bsr       32  .     .     0110000111111111  ..........  . . U    .   .   7
-btst       8  r     .     0000...100......  A+-DXWLdxI  U U U    4   4   4
-btst      32  r     d     0000...100000...  ..........  U U U    6   6   4
-btst       8  s     .     0000100000......  A+-DXWLdx.  U U U    8   8   4
-btst      32  s     d     0000100000000...  ..........  U U U   10  10   4
-callm     32  .     .     0000011011......  A..DXWLdx.  . . U    .   .  60  not properly emulated
-cas        8  .     .     0000101011......  A+-DXWL...  . . U    .   .  12
-cas       16  .     .     0000110011......  A+-DXWL...  . . U    .   .  12
-cas       32  .     .     0000111011......  A+-DXWL...  . . U    .   .  12
-cas2      16  .     .     0000110011111100  ..........  . . U    .   .  12
-cas2      32  .     .     0000111011111100  ..........  . . U    .   .  12
-chk       16  .     d     0100...110000...  ..........  U U U   10   8   8
-chk       16  .     .     0100...110......  A+-DXWLdxI  U U U   10   8   8
-chk       32  .     d     0100...100000...  ..........  . . U    .   .   8
-chk       32  .     .     0100...100......  A+-DXWLdxI  . . U    .   .   8
-chk2cmp2   8  .     pcdi  0000000011111010  ..........  . . U    .   .  23
-chk2cmp2   8  .     pcix  0000000011111011  ..........  . . U    .   .  23
-chk2cmp2   8  .     .     0000000011......  A..DXWL...  . . U    .   .  18
-chk2cmp2  16  .     pcdi  0000001011111010  ..........  . . U    .   .  23
-chk2cmp2  16  .     pcix  0000001011111011  ..........  . . U    .   .  23
-chk2cmp2  16  .     .     0000001011......  A..DXWL...  . . U    .   .  18
-chk2cmp2  32  .     pcdi  0000010011111010  ..........  . . U    .   .  23
-chk2cmp2  32  .     pcix  0000010011111011  ..........  . . U    .   .  23
-chk2cmp2  32  .     .     0000010011......  A..DXWL...  . . U    .   .  18
-clr        8  .     d     0100001000000...  ..........  U U U    4   4   2
-clr        8  .     .     0100001000......  A+-DXWL...  U U U    8   4   4
-clr       16  .     d     0100001001000...  ..........  U U U    4   4   2
-clr       16  .     .     0100001001......  A+-DXWL...  U U U    8   4   4
-clr       32  .     d     0100001010000...  ..........  U U U    6   6   2
-clr       32  .     .     0100001010......  A+-DXWL...  U U U   12   6   4
-cmp        8  .     d     1011...000000...  ..........  U U U    4   4   2
-cmp        8  .     .     1011...000......  A+-DXWLdxI  U U U    4   4   2
-cmp       16  .     d     1011...001000...  ..........  U U U    4   4   2
-cmp       16  .     a     1011...001001...  ..........  U U U    4   4   2
-cmp       16  .     .     1011...001......  A+-DXWLdxI  U U U    4   4   2
-cmp       32  .     d     1011...010000...  ..........  U U U    6   6   2
-cmp       32  .     a     1011...010001...  ..........  U U U    6   6   2
-cmp       32  .     .     1011...010......  A+-DXWLdxI  U U U    6   6   2
-cmpa      16  .     d     1011...011000...  ..........  U U U    6   6   4
-cmpa      16  .     a     1011...011001...  ..........  U U U    6   6   4
-cmpa      16  .     .     1011...011......  A+-DXWLdxI  U U U    6   6   4
-cmpa      32  .     d     1011...111000...  ..........  U U U    6   6   4
-cmpa      32  .     a     1011...111001...  ..........  U U U    6   6   4
-cmpa      32  .     .     1011...111......  A+-DXWLdxI  U U U    6   6   4
-cmpi       8  .     d     0000110000000...  ..........  U U U    8   8   2
-cmpi       8  .     .     0000110000......  A+-DXWL...  U U U    8   8   2
-cmpi       8  .     pcdi  0000110000111010  ..........  . . U    .   .   7
-cmpi       8  .     pcix  0000110000111011  ..........  . . U    .   .   9
-cmpi      16  .     d     0000110001000...  ..........  U U U    8   8   2
-cmpi      16  .     .     0000110001......  A+-DXWL...  U U U    8   8   2
-cmpi      16  .     pcdi  0000110001111010  ..........  . . U    .   .   7
-cmpi      16  .     pcix  0000110001111011  ..........  . . U    .   .   9
-cmpi      32  .     d     0000110010000...  ..........  U U U   14  12   2
-cmpi      32  .     .     0000110010......  A+-DXWL...  U U U   12  12   2
-cmpi      32  .     pcdi  0000110010111010  ..........  . . U    .   .   7
-cmpi      32  .     pcix  0000110010111011  ..........  . . U    .   .   9
-cmpm       8  .     ax7   1011111100001...  ..........  U U U   12  12   9
-cmpm       8  .     ay7   1011...100001111  ..........  U U U   12  12   9
-cmpm       8  .     axy7  1011111100001111  ..........  U U U   12  12   9
-cmpm       8  .     .     1011...100001...  ..........  U U U   12  12   9
-cmpm      16  .     .     1011...101001...  ..........  U U U   12  12   9
-cmpm      32  .     .     1011...110001...  ..........  U U U   20  20   9
-cpbcc     32  .     .     1111...01.......  ..........  . . U    .   .   4  unemulated
-cpdbcc    32  .     .     1111...001001...  ..........  . . U    .   .   4  unemulated
-cpgen     32  .     .     1111...000......  ..........  . . U    .   .   4  unemulated
-cpscc     32  .     .     1111...001......  ..........  . . U    .   .   4  unemulated
-cptrapcc  32  .     .     1111...001111...  ..........  . . U    .   .   4  unemulated
-dbt       16  .     .     0101000011001...  ..........  U U U   12  12   6
-dbf       16  .     .     0101000111001...  ..........  U U U   12  12   6
-dbcc      16  .     .     0101....11001...  ..........  U U U   12  12   6
-divs      16  .     d     1000...111000...  ..........  U U U  158 122  56
-divs      16  .     .     1000...111......  A+-DXWLdxI  U U U  158 122  56
-divu      16  .     d     1000...011000...  ..........  U U U  140 108  44
-divu      16  .     .     1000...011......  A+-DXWLdxI  U U U  140 108  44
-divl      32  .     d     0100110001000...  ..........  . . U    .   .  84
-divl      32  .     .     0100110001......  A+-DXWLdxI  . . U    .   .  84
-eor        8  .     d     1011...100000...  ..........  U U U    4   4   2
-eor        8  .     .     1011...100......  A+-DXWL...  U U U    8   8   4
-eor       16  .     d     1011...101000...  ..........  U U U    4   4   2
-eor       16  .     .     1011...101......  A+-DXWL...  U U U    8   8   4
-eor       32  .     d     1011...110000...  ..........  U U U    8   6   2
-eor       32  .     .     1011...110......  A+-DXWL...  U U U   12  12   4
-eori       8  toc   .     0000101000111100  ..........  U U U   20  16  12
-eori      16  tos   .     0000101001111100  ..........  S S S   20  16  12
-eori       8  .     d     0000101000000...  ..........  U U U    8   8   2
-eori       8  .     .     0000101000......  A+-DXWL...  U U U   12  12   4
-eori      16  .     d     0000101001000...  ..........  U U U    8   8   2
-eori      16  .     .     0000101001......  A+-DXWL...  U U U   12  12   4
-eori      32  .     d     0000101010000...  ..........  U U U   16  14   2
-eori      32  .     .     0000101010......  A+-DXWL...  U U U   20  20   4
-exg       32  dd    .     1100...101000...  ..........  U U U    6   6   2
-exg       32  aa    .     1100...101001...  ..........  U U U    6   6   2
-exg       32  da    .     1100...110001...  ..........  U U U    6   6   2
-ext       16  .     .     0100100010000...  ..........  U U U    4   4   4
-ext       32  .     .     0100100011000...  ..........  U U U    4   4   4
-extb      32  .     .     0100100111000...  ..........  . . U    .   .   4
-illegal    0  .     .     0100101011111100  ..........  U U U    4   4   4
-jmp       32  .     .     0100111011......  A..DXWLdx.  U U U    4   4   0
-jsr       32  .     .     0100111010......  A..DXWLdx.  U U U   12  12   0
-lea       32  .     .     0100...111......  A..DXWLdx.  U U U    0   0   2
-link      16  .     a7    0100111001010111  ..........  U U U   16  16   5
-link      16  .     .     0100111001010...  ..........  U U U   16  16   5
-link      32  .     a7    0100100000001111  ..........  . . U    .   .   6
-link      32  .     .     0100100000001...  ..........  . . U    .   .   6
-lsr        8  s     .     1110...000001...  ..........  U U U    6   6   4
-lsr       16  s     .     1110...001001...  ..........  U U U    6   6   4
-lsr       32  s     .     1110...010001...  ..........  U U U    8   8   4
-lsr        8  r     .     1110...000101...  ..........  U U U    6   6   6
-lsr       16  r     .     1110...001101...  ..........  U U U    6   6   6
-lsr       32  r     .     1110...010101...  ..........  U U U    8   8   6
-lsr       16  .     .     1110001011......  A+-DXWL...  U U U    8   8   5
-lsl        8  s     .     1110...100001...  ..........  U U U    6   6   4
-lsl       16  s     .     1110...101001...  ..........  U U U    6   6   4
-lsl       32  s     .     1110...110001...  ..........  U U U    8   8   4
-lsl        8  r     .     1110...100101...  ..........  U U U    6   6   6
-lsl       16  r     .     1110...101101...  ..........  U U U    6   6   6
-lsl       32  r     .     1110...110101...  ..........  U U U    8   8   6
-lsl       16  .     .     1110001111......  A+-DXWL...  U U U    8   8   5
-move       8  d     d     0001...000000...  ..........  U U U    4   4   2
-move       8  d     .     0001...000......  A+-DXWLdxI  U U U    4   4   2
-move       8  ai    d     0001...010000...  ..........  U U U    8   8   4
-move       8  ai    .     0001...010......  A+-DXWLdxI  U U U    8   8   4
-move       8  pi    d     0001...011000...  ..........  U U U    8   8   4
-move       8  pi    .     0001...011......  A+-DXWLdxI  U U U    8   8   4
-move       8  pi7   d     0001111011000...  ..........  U U U    8   8   4
-move       8  pi7   .     0001111011......  A+-DXWLdxI  U U U    8   8   4
-move       8  pd    d     0001...100000...  ..........  U U U    8   8   5
-move       8  pd    .     0001...100......  A+-DXWLdxI  U U U    8   8   5
-move       8  pd7   d     0001111100000...  ..........  U U U    8   8   5
-move       8  pd7   .     0001111100......  A+-DXWLdxI  U U U    8   8   5
-move       8  di    d     0001...101000...  ..........  U U U   12  12   5
-move       8  di    .     0001...101......  A+-DXWLdxI  U U U   12  12   5
-move       8  ix    d     0001...110000...  ..........  U U U   14  14   7
-move       8  ix    .     0001...110......  A+-DXWLdxI  U U U   14  14   7
-move       8  aw    d     0001000111000...  ..........  U U U   12  12   4
-move       8  aw    .     0001000111......  A+-DXWLdxI  U U U   12  12   4
-move       8  al    d     0001001111000...  ..........  U U U   16  16   6
-move       8  al    .     0001001111......  A+-DXWLdxI  U U U   16  16   6
-move      16  d     d     0011...000000...  ..........  U U U    4   4   2
-move      16  d     a     0011...000001...  ..........  U U U    4   4   2
-move      16  d     .     0011...000......  A+-DXWLdxI  U U U    4   4   2
-move      16  ai    d     0011...010000...  ..........  U U U    8   8   4
-move      16  ai    a     0011...010001...  ..........  U U U    8   8   4
-move      16  ai    .     0011...010......  A+-DXWLdxI  U U U    8   8   4
-move      16  pi    d     0011...011000...  ..........  U U U    8   8   4
-move      16  pi    a     0011...011001...  ..........  U U U    8   8   4
-move      16  pi    .     0011...011......  A+-DXWLdxI  U U U    8   8   4
-move      16  pd    d     0011...100000...  ..........  U U U    8   8   5
-move      16  pd    a     0011...100001...  ..........  U U U    8   8   5
-move      16  pd    .     0011...100......  A+-DXWLdxI  U U U    8   8   5
-move      16  di    d     0011...101000...  ..........  U U U   12  12   5
-move      16  di    a     0011...101001...  ..........  U U U   12  12   5
-move      16  di    .     0011...101......  A+-DXWLdxI  U U U   12  12   5
-move      16  ix    d     0011...110000...  ..........  U U U   14  14   7
-move      16  ix    a     0011...110001...  ..........  U U U   14  14   7
-move      16  ix    .     0011...110......  A+-DXWLdxI  U U U   14  14   7
-move      16  aw    d     0011000111000...  ..........  U U U   12  12   4
-move      16  aw    a     0011000111001...  ..........  U U U   12  12   4
-move      16  aw    .     0011000111......  A+-DXWLdxI  U U U   12  12   4
-move      16  al    d     0011001111000...  ..........  U U U   16  16   6
-move      16  al    a     0011001111001...  ..........  U U U   16  16   6
-move      16  al    .     0011001111......  A+-DXWLdxI  U U U   16  16   6
-move      32  d     d     0010...000000...  ..........  U U U    4   4   2
-move      32  d     a     0010...000001...  ..........  U U U    4   4   2
-move      32  d     .     0010...000......  A+-DXWLdxI  U U U    4   4   2
-move      32  ai    d     0010...010000...  ..........  U U U   12  12   4
-move      32  ai    a     0010...010001...  ..........  U U U   12  12   4
-move      32  ai    .     0010...010......  A+-DXWLdxI  U U U   12  12   4
-move      32  pi    d     0010...011000...  ..........  U U U   12  12   4
-move      32  pi    a     0010...011001...  ..........  U U U   12  12   4
-move      32  pi    .     0010...011......  A+-DXWLdxI  U U U   12  12   4
-move      32  pd    d     0010...100000...  ..........  U U U   12  14   5
-move      32  pd    a     0010...100001...  ..........  U U U   12  14   5
-move      32  pd    .     0010...100......  A+-DXWLdxI  U U U   12  14   5
-move      32  di    d     0010...101000...  ..........  U U U   16  16   5
-move      32  di    a     0010...101001...  ..........  U U U   16  16   5
-move      32  di    .     0010...101......  A+-DXWLdxI  U U U   16  16   5
-move      32  ix    d     0010...110000...  ..........  U U U   18  18   7
-move      32  ix    a     0010...110001...  ..........  U U U   18  18   7
-move      32  ix    .     0010...110......  A+-DXWLdxI  U U U   18  18   7
-move      32  aw    d     0010000111000...  ..........  U U U   16  16   4
-move      32  aw    a     0010000111001...  ..........  U U U   16  16   4
-move      32  aw    .     0010000111......  A+-DXWLdxI  U U U   16  16   4
-move      32  al    d     0010001111000...  ..........  U U U   20  20   6
-move      32  al    a     0010001111001...  ..........  U U U   20  20   6
-move      32  al    .     0010001111......  A+-DXWLdxI  U U U   20  20   6
-movea     16  .     d     0011...001000...  ..........  U U U    4   4   2
-movea     16  .     a     0011...001001...  ..........  U U U    4   4   2
-movea     16  .     .     0011...001......  A+-DXWLdxI  U U U    4   4   2
-movea     32  .     d     0010...001000...  ..........  U U U    4   4   2
-movea     32  .     a     0010...001001...  ..........  U U U    4   4   2
-movea     32  .     .     0010...001......  A+-DXWLdxI  U U U    4   4   2
-move      16  frc   d     0100001011000...  ..........  . U U    .   4   4
-move      16  frc   .     0100001011......  A+-DXWL...  . U U    .   8   4
-move      16  toc   d     0100010011000...  ..........  U U U   12  12   4
-move      16  toc   .     0100010011......  A+-DXWLdxI  U U U   12  12   4
-move      16  frs   d     0100000011000...  ..........  U S S    6   4   8 U only for 000
-move      16  frs   .     0100000011......  A+-DXWL...  U S S    8   8   8 U only for 000
-move      16  tos   d     0100011011000...  ..........  S S S   12  12   8
-move      16  tos   .     0100011011......  A+-DXWLdxI  S S S   12  12   8
-move      32  fru   .     0100111001101...  ..........  S S S    4   6   2
-move      32  tou   .     0100111001100...  ..........  S S S    4   6   2
-movec     32  cr    .     0100111001111010  ..........  . S S    .  12   6
-movec     32  rc    .     0100111001111011  ..........  . S S    .  10  12
-movem     16  re    pd    0100100010100...  ..........  U U U    8   8   4
-movem     16  re    .     0100100010......  A..DXWL...  U U U    4   4   4  cycles for hypothetical d addressing mode (020 unverified)
-movem     32  re    pd    0100100011100...  ..........  U U U    8   8   4
-movem     32  re    .     0100100011......  A..DXWL...  U U U    0   0   4  cycles for hypothetical d addressing mode (020 unverified)
-movem     16  er    pi    0100110010011...  ..........  U U U   12  12   8
-movem     16  er    pcdi  0100110010111010  ..........  U U U   16  16   9
-movem     16  er    pcix  0100110010111011  ..........  U U U   18  18  11
-movem     16  er    .     0100110010......  A..DXWL...  U U U    8   8   8  cycles for hypothetical d addressing mode (020 unverified)
-movem     32  er    pi    0100110011011...  ..........  U U U   12  12   8
-movem     32  er    pcdi  0100110011111010  ..........  U U U   16  16   9
-movem     32  er    pcix  0100110011111011  ..........  U U U   18  18  11
-movem     32  er    .     0100110011......  A..DXWL...  U U U    4   4   8  cycles for hypothetical d addressing mode (020 unverified)
-movep     16  er    .     0000...100001...  ..........  U U U   16  16  12
-movep     32  er    .     0000...101001...  ..........  U U U   24  24  18
-movep     16  re    .     0000...110001...  ..........  U U U   16  16  11
-movep     32  re    .     0000...111001...  ..........  U U U   24  24  17
-moveq     32  .     .     0111...0........  ..........  U U U    4   4   2
-moves      8  .     .     0000111000......  A+-DXWL...  . S S    .  14   5
-moves     16  .     .     0000111001......  A+-DXWL...  . S S    .  14   5
-moves     32  .     .     0000111010......  A+-DXWL...  . S S    .  16   5
-muls      16  .     d     1100...111000...  ..........  U U U   54  32  27
-muls      16  .     .     1100...111......  A+-DXWLdxI  U U U   54  32  27
-mulu      16  .     d     1100...011000...  ..........  U U U   54  30  27
-mulu      16  .     .     1100...011......  A+-DXWLdxI  U U U   54  30  27
-mull      32  .     d     0100110000000...  ..........  . . U    .   .  43
-mull      32  .     .     0100110000......  A+-DXWLdxI  . . U    .   .  43
-nbcd       8  .     d     0100100000000...  ..........  U U U    6   6   6
-nbcd       8  .     .     0100100000......  A+-DXWL...  U U U    8   8   6
-neg        8  .     d     0100010000000...  ..........  U U U    4   4   2
-neg        8  .     .     0100010000......  A+-DXWL...  U U U    8   8   4
-neg       16  .     d     0100010001000...  ..........  U U U    4   4   2
-neg       16  .     .     0100010001......  A+-DXWL...  U U U    8   8   4
-neg       32  .     d     0100010010000...  ..........  U U U    6   6   2
-neg       32  .     .     0100010010......  A+-DXWL...  U U U   12  12   4
-negx       8  .     d     0100000000000...  ..........  U U U    4   4   2
-negx       8  .     .     0100000000......  A+-DXWL...  U U U    8   8   4
-negx      16  .     d     0100000001000...  ..........  U U U    4   4   2
-negx      16  .     .     0100000001......  A+-DXWL...  U U U    8   8   4
-negx      32  .     d     0100000010000...  ..........  U U U    6   6   2
-negx      32  .     .     0100000010......  A+-DXWL...  U U U   12  12   4
-nop        0  .     .     0100111001110001  ..........  U U U    4   4   2
-not        8  .     d     0100011000000...  ..........  U U U    4   4   2
-not        8  .     .     0100011000......  A+-DXWL...  U U U    8   8   4
-not       16  .     d     0100011001000...  ..........  U U U    4   4   2
-not       16  .     .     0100011001......  A+-DXWL...  U U U    8   8   4
-not       32  .     d     0100011010000...  ..........  U U U    6   6   2
-not       32  .     .     0100011010......  A+-DXWL...  U U U   12  12   4
-or         8  er    d     1000...000000...  ..........  U U U    4   4   2
-or         8  er    .     1000...000......  A+-DXWLdxI  U U U    4   4   2
-or        16  er    d     1000...001000...  ..........  U U U    4   4   2
-or        16  er    .     1000...001......  A+-DXWLdxI  U U U    4   4   2
-or        32  er    d     1000...010000...  ..........  U U U    6   6   2
-or        32  er    .     1000...010......  A+-DXWLdxI  U U U    6   6   2
-or         8  re    .     1000...100......  A+-DXWL...  U U U    8   8   4
-or        16  re    .     1000...101......  A+-DXWL...  U U U    8   8   4
-or        32  re    .     1000...110......  A+-DXWL...  U U U   12  12   4
-ori        8  toc   .     0000000000111100  ..........  U U U   20  16  12
-ori       16  tos   .     0000000001111100  ..........  S S S   20  16  12
-ori        8  .     d     0000000000000...  ..........  U U U    8   8   2
-ori        8  .     .     0000000000......  A+-DXWL...  U U U   12  12   4
-ori       16  .     d     0000000001000...  ..........  U U U    8   8   2
-ori       16  .     .     0000000001......  A+-DXWL...  U U U   12  12   4
-ori       32  .     d     0000000010000...  ..........  U U U   16  14   2
-ori       32  .     .     0000000010......  A+-DXWL...  U U U   20  20   4
-pack      16  rr    .     1000...101000...  ..........  . . U    .   .   6
-pack      16  mm    ax7   1000111101001...  ..........  . . U    .   .  13
-pack      16  mm    ay7   1000...101001111  ..........  . . U    .   .  13
-pack      16  mm    axy7  1000111101001111  ..........  . . U    .   .  13
-pack      16  mm    .     1000...101001...  ..........  . . U    .   .  13
-pea       32  .     .     0100100001......  A..DXWLdx.  U U U    6   6   5
-reset      0  .     .     0100111001110000  ..........  S S S    0   0   0
-ror        8  s     .     1110...000011...  ..........  U U U    6   6   8
-ror       16  s     .     1110...001011...  ..........  U U U    6   6   8
-ror       32  s     .     1110...010011...  ..........  U U U    8   8   8
-ror        8  r     .     1110...000111...  ..........  U U U    6   6   8
-ror       16  r     .     1110...001111...  ..........  U U U    6   6   8
-ror       32  r     .     1110...010111...  ..........  U U U    8   8   8
-ror       16  .     .     1110011011......  A+-DXWL...  U U U    8   8   7
-rol        8  s     .     1110...100011...  ..........  U U U    6   6   8
-rol       16  s     .     1110...101011...  ..........  U U U    6   6   8
-rol       32  s     .     1110...110011...  ..........  U U U    8   8   8
-rol        8  r     .     1110...100111...  ..........  U U U    6   6   8
-rol       16  r     .     1110...101111...  ..........  U U U    6   6   8
-rol       32  r     .     1110...110111...  ..........  U U U    8   8   8
-rol       16  .     .     1110011111......  A+-DXWL...  U U U    8   8   7
-roxr       8  s     .     1110...000010...  ..........  U U U    6   6  12
-roxr      16  s     .     1110...001010...  ..........  U U U    6   6  12
-roxr      32  s     .     1110...010010...  ..........  U U U    8   8  12
-roxr       8  r     .     1110...000110...  ..........  U U U    6   6  12
-roxr      16  r     .     1110...001110...  ..........  U U U    6   6  12
-roxr      32  r     .     1110...010110...  ..........  U U U    8   8  12
-roxr      16  .     .     1110010011......  A+-DXWL...  U U U    8   8   5
-roxl       8  s     .     1110...100010...  ..........  U U U    6   6  12
-roxl      16  s     .     1110...101010...  ..........  U U U    6   6  12
-roxl      32  s     .     1110...110010...  ..........  U U U    8   8  12
-roxl       8  r     .     1110...100110...  ..........  U U U    6   6  12
-roxl      16  r     .     1110...101110...  ..........  U U U    6   6  12
-roxl      32  r     .     1110...110110...  ..........  U U U    8   8  12
-roxl      16  .     .     1110010111......  A+-DXWL...  U U U    8   8   5
-rtd       32  .     .     0100111001110100  ..........  . U U    .  16  10
-rte       32  .     .     0100111001110011  ..........  S S S   20  24  20  bus fault not emulated
-rtm       32  .     .     000001101100....  ..........  . . U    .   .  19  not properly emulated
-rtr       32  .     .     0100111001110111  ..........  U U U   20  20  14
-rts       32  .     .     0100111001110101  ..........  U U U   16  16  10
-sbcd       8  rr    .     1000...100000...  ..........  U U U    6   6   4
-sbcd       8  mm    ax7   1000111100001...  ..........  U U U   18  18  16
-sbcd       8  mm    ay7   1000...100001111  ..........  U U U   18  18  16
-sbcd       8  mm    axy7  1000111100001111  ..........  U U U   18  18  16
-sbcd       8  mm    .     1000...100001...  ..........  U U U   18  18  16
-st         8  .     d     0101000011000...  ..........  U U U    6   4   4
-st         8  .     .     0101000011......  A+-DXWL...  U U U    8   8   6
-sf         8  .     d     0101000111000...  ..........  U U U    4   4   4
-sf         8  .     .     0101000111......  A+-DXWL...  U U U    8   8   6
-scc        8  .     d     0101....11000...  ..........  U U U    4   4   4
-scc        8  .     .     0101....11......  A+-DXWL...  U U U    8   8   6
-stop       0  .     .     0100111001110010  ..........  S S S    4   4   8
-sub        8  er    d     1001...000000...  ..........  U U U    4   4   2
-sub        8  er    .     1001...000......  A+-DXWLdxI  U U U    4   4   2
-sub       16  er    d     1001...001000...  ..........  U U U    4   4   2
-sub       16  er    a     1001...001001...  ..........  U U U    4   4   2
-sub       16  er    .     1001...001......  A+-DXWLdxI  U U U    4   4   2
-sub       32  er    d     1001...010000...  ..........  U U U    6   6   2
-sub       32  er    a     1001...010001...  ..........  U U U    6   6   2
-sub       32  er    .     1001...010......  A+-DXWLdxI  U U U    6   6   2
-sub        8  re    .     1001...100......  A+-DXWL...  U U U    8   8   4
-sub       16  re    .     1001...101......  A+-DXWL...  U U U    8   8   4
-sub       32  re    .     1001...110......  A+-DXWL...  U U U   12  12   4
-suba      16  .     d     1001...011000...  ..........  U U U    8   8   2
-suba      16  .     a     1001...011001...  ..........  U U U    8   8   2
-suba      16  .     .     1001...011......  A+-DXWLdxI  U U U    8   8   2
-suba      32  .     d     1001...111000...  ..........  U U U    6   6   2
-suba      32  .     a     1001...111001...  ..........  U U U    6   6   2
-suba      32  .     .     1001...111......  A+-DXWLdxI  U U U    6   6   2
-subi       8  .     d     0000010000000...  ..........  U U U    8   8   2
-subi       8  .     .     0000010000......  A+-DXWL...  U U U   12  12   4
-subi      16  .     d     0000010001000...  ..........  U U U    8   8   2
-subi      16  .     .     0000010001......  A+-DXWL...  U U U   12  12   4
-subi      32  .     d     0000010010000...  ..........  U U U   16  14   2
-subi      32  .     .     0000010010......  A+-DXWL...  U U U   20  20   4
-subq       8  .     d     0101...100000...  ..........  U U U    4   4   2
-subq       8  .     .     0101...100......  A+-DXWL...  U U U    8   8   4
-subq      16  .     d     0101...101000...  ..........  U U U    4   4   2
-subq      16  .     a     0101...101001...  ..........  U U U    8   4   2
-subq      16  .     .     0101...101......  A+-DXWL...  U U U    8   8   4
-subq      32  .     d     0101...110000...  ..........  U U U    8   8   2
-subq      32  .     a     0101...110001...  ..........  U U U    8   8   2
-subq      32  .     .     0101...110......  A+-DXWL...  U U U   12  12   4
-subx       8  rr    .     1001...100000...  ..........  U U U    4   4   2
-subx      16  rr    .     1001...101000...  ..........  U U U    4   4   2
-subx      32  rr    .     1001...110000...  ..........  U U U    8   6   2
-subx       8  mm    ax7   1001111100001...  ..........  U U U   18  18  12
-subx       8  mm    ay7   1001...100001111  ..........  U U U   18  18  12
-subx       8  mm    axy7  1001111100001111  ..........  U U U   18  18  12
-subx       8  mm    .     1001...100001...  ..........  U U U   18  18  12
-subx      16  mm    .     1001...101001...  ..........  U U U   18  18  12
-subx      32  mm    .     1001...110001...  ..........  U U U   30  30  12
-swap      32  .     .     0100100001000...  ..........  U U U    4   4   4
-tas        8  .     d     0100101011000...  ..........  U U U    4   4   4
-tas        8  .     .     0100101011......  A+-DXWL...  U U U   14  14  12
-trap       0  .     .     010011100100....  ..........  U U U    4   4   4
-trapt      0  .     .     0101000011111100  ..........  . . U    .   .   4
-trapt     16  .     .     0101000011111010  ..........  . . U    .   .   6
-trapt     32  .     .     0101000011111011  ..........  . . U    .   .   8
-trapf      0  .     .     0101000111111100  ..........  . . U    .   .   4
-trapf     16  .     .     0101000111111010  ..........  . . U    .   .   6
-trapf     32  .     .     0101000111111011  ..........  . . U    .   .   8
-trapcc     0  .     .     0101....11111100  ..........  . . U    .   .   4
-trapcc    16  .     .     0101....11111010  ..........  . . U    .   .   6
-trapcc    32  .     .     0101....11111011  ..........  . . U    .   .   8
-trapv      0  .     .     0100111001110110  ..........  U U U    4   4   4
-tst        8  .     d     0100101000000...  ..........  U U U    4   4   2
-tst        8  .     .     0100101000......  A+-DXWL...  U U U    4   4   2
-tst        8  .     pcdi  0100101000111010  ..........  . . U    .   .   7
-tst        8  .     pcix  0100101000111011  ..........  . . U    .   .   9
-tst        8  .     i     0100101000111100  ..........  . . U    .   .   6
-tst       16  .     d     0100101001000...  ..........  U U U    4   4   2
-tst       16  .     a     0100101001001...  ..........  . . U    .   .   2
-tst       16  .     .     0100101001......  A+-DXWL...  U U U    4   4   2
-tst       16  .     pcdi  0100101001111010  ..........  . . U    .   .   7
-tst       16  .     pcix  0100101001111011  ..........  . . U    .   .   9
-tst       16  .     i     0100101001111100  ..........  . . U    .   .   6
-tst       32  .     d     0100101010000...  ..........  U U U    4   4   2
-tst       32  .     a     0100101010001...  ..........  . . U    .   .   2
-tst       32  .     .     0100101010......  A+-DXWL...  U U U    4   4   2
-tst       32  .     pcdi  0100101010111010  ..........  . . U    .   .   7
-tst       32  .     pcix  0100101010111011  ..........  . . U    .   .   9
-tst       32  .     i     0100101010111100  ..........  . . U    .   .   6
-unlk      32  .     a7    0100111001011111  ..........  U U U   12  12   6
-unlk      32  .     .     0100111001011...  ..........  U U U   12  12   6
-unpk      16  rr    .     1000...110000...  ..........  . . U    .   .   8
-unpk      16  mm    ax7   1000111110001...  ..........  . . U    .   .  13
-unpk      16  mm    ay7   1000...110001111  ..........  . . U    .   .  13
-unpk      16  mm    axy7  1000111110001111  ..........  . . U    .   .  13
-unpk      16  mm    .     1000...110001...  ..........  . . U    .   .  13
+1010       0  .     .     1010............  ..........  U U U U U   4   4   4   4   4
+1111       0  .     .     1111............  ..........  U U U U U   4   4   4   4   4
+040fpu0   32  .     .     11110010........  ..........  . . . . U   .   .   .   .   0
+040fpu1   32  .     .     11110011........  ..........  . . . . U   .   .   .   .   0
+abcd       8  rr    .     1100...100000...  ..........  U U U U U   6   6   4   4   4
+abcd       8  mm    ax7   1100111100001...  ..........  U U U U U  18  18  16  16  16
+abcd       8  mm    ay7   1100...100001111  ..........  U U U U U  18  18  16  16  16
+abcd       8  mm    axy7  1100111100001111  ..........  U U U U U  18  18  16  16  16
+abcd       8  mm    .     1100...100001...  ..........  U U U U U  18  18  16  16  16
+add        8  er    d     1101...000000...  ..........  U U U U U   4   4   2   2   2
+add        8  er    .     1101...000......  A+-DXWLdxI  U U U U U   4   4   2   2   2
+add       16  er    d     1101...001000...  ..........  U U U U U   4   4   2   2   2
+add       16  er    a     1101...001001...  ..........  U U U U U   4   4   2   2   2
+add       16  er    .     1101...001......  A+-DXWLdxI  U U U U U   4   4   2   2   2
+add       32  er    d     1101...010000...  ..........  U U U U U   6   6   2   2   2
+add       32  er    a     1101...010001...  ..........  U U U U U   6   6   2   2   2
+add       32  er    .     1101...010......  A+-DXWLdxI  U U U U U   6   6   2   2   2
+add        8  re    .     1101...100......  A+-DXWL...  U U U U U   8   8   4   4   4
+add       16  re    .     1101...101......  A+-DXWL...  U U U U U   8   8   4   4   4
+add       32  re    .     1101...110......  A+-DXWL...  U U U U U  12  12   4   4   4
+adda      16  .     d     1101...011000...  ..........  U U U U U   8   8   2   2   2
+adda      16  .     a     1101...011001...  ..........  U U U U U   8   8   2   2   2
+adda      16  .     .     1101...011......  A+-DXWLdxI  U U U U U   8   8   2   2   2
+adda      32  .     d     1101...111000...  ..........  U U U U U   6   6   2   2   2
+adda      32  .     a     1101...111001...  ..........  U U U U U   6   6   2   2   2
+adda      32  .     .     1101...111......  A+-DXWLdxI  U U U U U   6   6   2   2   2
+addi       8  .     d     0000011000000...  ..........  U U U U U   8   8   2   2   2
+addi       8  .     .     0000011000......  A+-DXWL...  U U U U U  12  12   4   4   4
+addi      16  .     d     0000011001000...  ..........  U U U U U   8   8   2   2   2
+addi      16  .     .     0000011001......  A+-DXWL...  U U U U U  12  12   4   4   4
+addi      32  .     d     0000011010000...  ..........  U U U U U  16  14   2   2   2
+addi      32  .     .     0000011010......  A+-DXWL...  U U U U U  20  20   4   4   4
+addq       8  .     d     0101...000000...  ..........  U U U U U   4   4   2   2   2
+addq       8  .     .     0101...000......  A+-DXWL...  U U U U U   8   8   4   4   4
+addq      16  .     d     0101...001000...  ..........  U U U U U   4   4   2   2   2
+addq      16  .     a     0101...001001...  ..........  U U U U U   4   4   2   2   2
+addq      16  .     .     0101...001......  A+-DXWL...  U U U U U   8   8   4   4   4
+addq      32  .     d     0101...010000...  ..........  U U U U U   8   8   2   2   2
+addq      32  .     a     0101...010001...  ..........  U U U U U   8   8   2   2   2
+addq      32  .     .     0101...010......  A+-DXWL...  U U U U U  12  12   4   4   4
+addx       8  rr    .     1101...100000...  ..........  U U U U U   4   4   2   2   2
+addx      16  rr    .     1101...101000...  ..........  U U U U U   4   4   2   2   2
+addx      32  rr    .     1101...110000...  ..........  U U U U U   8   6   2   2   2
+addx       8  mm    ax7   1101111100001...  ..........  U U U U U  18  18  12  12  12
+addx       8  mm    ay7   1101...100001111  ..........  U U U U U  18  18  12  12  12
+addx       8  mm    axy7  1101111100001111  ..........  U U U U U  18  18  12  12  12
+addx       8  mm    .     1101...100001...  ..........  U U U U U  18  18  12  12  12
+addx      16  mm    .     1101...101001...  ..........  U U U U U  18  18  12  12  12
+addx      32  mm    .     1101...110001...  ..........  U U U U U  30  30  12  12  12
+and        8  er    d     1100...000000...  ..........  U U U U U   4   4   2   2   2
+and        8  er    .     1100...000......  A+-DXWLdxI  U U U U U   4   4   2   2   2
+and       16  er    d     1100...001000...  ..........  U U U U U   4   4   2   2   2
+and       16  er    .     1100...001......  A+-DXWLdxI  U U U U U   4   4   2   2   2
+and       32  er    d     1100...010000...  ..........  U U U U U   6   6   2   2   2
+and       32  er    .     1100...010......  A+-DXWLdxI  U U U U U   6   6   2   2   2
+and        8  re    .     1100...100......  A+-DXWL...  U U U U U   8   8   4   4   4
+and       16  re    .     1100...101......  A+-DXWL...  U U U U U   8   8   4   4   4
+and       32  re    .     1100...110......  A+-DXWL...  U U U U U  12  12   4   4   4
+andi      16  toc   .     0000001000111100  ..........  U U U U U  20  16  12  12  12
+andi      16  tos   .     0000001001111100  ..........  S S S S S  20  16  12  12  12
+andi       8  .     d     0000001000000...  ..........  U U U U U   8   8   2   2   2
+andi       8  .     .     0000001000......  A+-DXWL...  U U U U U  12  12   4   4   4
+andi      16  .     d     0000001001000...  ..........  U U U U U   8   8   2   2   2
+andi      16  .     .     0000001001......  A+-DXWL...  U U U U U  12  12   4   4   4
+andi      32  .     d     0000001010000...  ..........  U U U U U  14  14   2   2   2
+andi      32  .     .     0000001010......  A+-DXWL...  U U U U U  20  20   4   4   4
+asr        8  s     .     1110...000000...  ..........  U U U U U   6   6   6   6   6
+asr       16  s     .     1110...001000...  ..........  U U U U U   6   6   6   6   6
+asr       32  s     .     1110...010000...  ..........  U U U U U   8   8   6   6   6
+asr        8  r     .     1110...000100...  ..........  U U U U U   6   6   6   6   6
+asr       16  r     .     1110...001100...  ..........  U U U U U   6   6   6   6   6
+asr       32  r     .     1110...010100...  ..........  U U U U U   8   8   6   6   6
+asr       16  .     .     1110000011......  A+-DXWL...  U U U U U   8   8   5   5   5
+asl        8  s     .     1110...100000...  ..........  U U U U U   6   6   8   8   8
+asl       16  s     .     1110...101000...  ..........  U U U U U   6   6   8   8   8
+asl       32  s     .     1110...110000...  ..........  U U U U U   8   8   8   8   8
+asl        8  r     .     1110...100100...  ..........  U U U U U   6   6   8   8   8
+asl       16  r     .     1110...101100...  ..........  U U U U U   6   6   8   8   8
+asl       32  r     .     1110...110100...  ..........  U U U U U   8   8   8   8   8
+asl       16  .     .     1110000111......  A+-DXWL...  U U U U U   8   8   6   6   6
+bcc        8  .     .     0110............  ..........  U U U U U  10  10   6   6   6
+bcc       16  .     .     0110....00000000  ..........  U U U U U  10  10   6   6   6
+bcc       32  .     .     0110....11111111  ..........  U U U U U  10  10   6   6   6
+bchg       8  r     .     0000...101......  A+-DXWL...  U U U U U   8   8   4   4   4
+bchg      32  r     d     0000...101000...  ..........  U U U U U   8   8   4   4   4
+bchg       8  s     .     0000100001......  A+-DXWL...  U U U U U  12  12   4   4   4
+bchg      32  s     d     0000100001000...  ..........  U U U U U  12  12   4   4   4
+bclr       8  r     .     0000...110......  A+-DXWL...  U U U U U   8  10   4   4   4
+bclr      32  r     d     0000...110000...  ..........  U U U U U  10  10   4   4   4
+bclr       8  s     .     0000100010......  A+-DXWL...  U U U U U  12  12   4   4   4
+bclr      32  s     d     0000100010000...  ..........  U U U U U  14  14   4   4   4
+bfchg     32  .     d     1110101011000...  ..........  . . U U U   .   .  12  12  12  timing not quite correct
+bfchg     32  .     .     1110101011......  A..DXWL...  . . U U U   .   .  20  20  20
+bfclr     32  .     d     1110110011000...  ..........  . . U U U   .   .  12  12  12
+bfclr     32  .     .     1110110011......  A..DXWL...  . . U U U   .   .  20  20  20
+bfexts    32  .     d     1110101111000...  ..........  . . U U U   .   .   8   8   8
+bfexts    32  .     .     1110101111......  A..DXWLdx.  . . U U U   .   .  15  15  15
+bfextu    32  .     d     1110100111000...  ..........  . . U U U   .   .   8   8   8
+bfextu    32  .     .     1110100111......  A..DXWLdx.  . . U U U   .   .  15  15  15
+bfffo     32  .     d     1110110111000...  ..........  . . U U U   .   .  18  18  18
+bfffo     32  .     .     1110110111......  A..DXWLdx.  . . U U U   .   .  28  28  28
+bfins     32  .     d     1110111111000...  ..........  . . U U U   .   .  10  10  10
+bfins     32  .     .     1110111111......  A..DXWL...  . . U U U   .   .  17  17  17
+bfset     32  .     d     1110111011000...  ..........  . . U U U   .   .  12  12  12
+bfset     32  .     .     1110111011......  A..DXWL...  . . U U U   .   .  20  20  20
+bftst     32  .     d     1110100011000...  ..........  . . U U U   .   .   6   6   6
+bftst     32  .     .     1110100011......  A..DXWLdx.  . . U U U   .   .  13  13  13
+bkpt       0  .     .     0100100001001...  ..........  . U U U U   .  10  10  10  10
+bra        8  .     .     01100000........  ..........  U U U U U  10  10  10  10  10
+bra       16  .     .     0110000000000000  ..........  U U U U U  10  10  10  10  10
+bra       32  .     .     0110000011111111  ..........  U U U U U  10  10  10  10  10
+bset      32  r     d     0000...111000...  ..........  U U U U U   8   8   4   4   4
+bset       8  r     .     0000...111......  A+-DXWL...  U U U U U   8   8   4   4   4
+bset       8  s     .     0000100011......  A+-DXWL...  U U U U U  12  12   4   4   4
+bset      32  s     d     0000100011000...  ..........  U U U U U  12  12   4   4   4
+bsr        8  .     .     01100001........  ..........  U U U U U  18  18   7   7   7
+bsr       16  .     .     0110000100000000  ..........  U U U U U  18  18   7   7   7
+bsr       32  .     .     0110000111111111  ..........  U U U U U  18  18   7   7   7
+btst       8  r     .     0000...100......  A+-DXWLdxI  U U U U U   4   4   4   4   4
+btst      32  r     d     0000...100000...  ..........  U U U U U   6   6   4   4   4
+btst       8  s     .     0000100000......  A+-DXWLdx.  U U U U U   8   8   4   4   4
+btst      32  s     d     0000100000000...  ..........  U U U U U  10  10   4   4   4
+callm     32  .     .     0000011011......  A..DXWLdx.  . . U U U   .   .  60  60  60  not properly emulated
+cas        8  .     .     0000101011......  A+-DXWL...  . . U U U   .   .  12  12  12
+cas       16  .     .     0000110011......  A+-DXWL...  . . U U U   .   .  12  12  12
+cas       32  .     .     0000111011......  A+-DXWL...  . . U U U   .   .  12  12  12
+cas2      16  .     .     0000110011111100  ..........  . . U U U   .   .  12  12  12
+cas2      32  .     .     0000111011111100  ..........  . . U U U   .   .  12  12  12
+chk       16  .     d     0100...110000...  ..........  U U U U U  10   8   8   8   8
+chk       16  .     .     0100...110......  A+-DXWLdxI  U U U U U  10   8   8   8   8
+chk       32  .     d     0100...100000...  ..........  . . U U U   .   .   8   8   8
+chk       32  .     .     0100...100......  A+-DXWLdxI  . . U U U   .   .   8   8   8
+chk2cmp2   8  .     pcdi  0000000011111010  ..........  . . U U U   .   .  23  23  23
+chk2cmp2   8  .     pcix  0000000011111011  ..........  . . U U U   .   .  23  23  23
+chk2cmp2   8  .     .     0000000011......  A..DXWL...  . . U U U   .   .  18  18  18
+chk2cmp2  16  .     pcdi  0000001011111010  ..........  . . U U U   .   .  23  23  23
+chk2cmp2  16  .     pcix  0000001011111011  ..........  . . U U U   .   .  23  23  23
+chk2cmp2  16  .     .     0000001011......  A..DXWL...  . . U U U   .   .  18  18  18
+chk2cmp2  32  .     pcdi  0000010011111010  ..........  . . U U U   .   .  23  23  23
+chk2cmp2  32  .     pcix  0000010011111011  ..........  . . U U U   .   .  23  23  23
+chk2cmp2  32  .     .     0000010011......  A..DXWL...  . . U U U   .   .  18  18  18
+clr        8  .     d     0100001000000...  ..........  U U U U U   4   4   2   2   2
+clr        8  .     .     0100001000......  A+-DXWL...  U U U U U   8   4   4   4   4
+clr       16  .     d     0100001001000...  ..........  U U U U U   4   4   2   2   2
+clr       16  .     .     0100001001......  A+-DXWL...  U U U U U   8   4   4   4   4
+clr       32  .     d     0100001010000...  ..........  U U U U U   6   6   2   2   2
+clr       32  .     .     0100001010......  A+-DXWL...  U U U U U  12   6   4   4   4
+cmp        8  .     d     1011...000000...  ..........  U U U U U   4   4   2   2   2
+cmp        8  .     .     1011...000......  A+-DXWLdxI  U U U U U   4   4   2   2   2
+cmp       16  .     d     1011...001000...  ..........  U U U U U   4   4   2   2   2
+cmp       16  .     a     1011...001001...  ..........  U U U U U   4   4   2   2   2
+cmp       16  .     .     1011...001......  A+-DXWLdxI  U U U U U   4   4   2   2   2
+cmp       32  .     d     1011...010000...  ..........  U U U U U   6   6   2   2   2
+cmp       32  .     a     1011...010001...  ..........  U U U U U   6   6   2   2   2
+cmp       32  .     .     1011...010......  A+-DXWLdxI  U U U U U   6   6   2   2   2
+cmpa      16  .     d     1011...011000...  ..........  U U U U U   6   6   4   4   4
+cmpa      16  .     a     1011...011001...  ..........  U U U U U   6   6   4   4   4
+cmpa      16  .     .     1011...011......  A+-DXWLdxI  U U U U U   6   6   4   4   4
+cmpa      32  .     d     1011...111000...  ..........  U U U U U   6   6   4   4   4
+cmpa      32  .     a     1011...111001...  ..........  U U U U U   6   6   4   4   4
+cmpa      32  .     .     1011...111......  A+-DXWLdxI  U U U U U   6   6   4   4   4
+cmpi       8  .     d     0000110000000...  ..........  U U U U U   8   8   2   2   2
+cmpi       8  .     .     0000110000......  A+-DXWL...  U U U U U   8   8   2   2   2
+cmpi       8  .     pcdi  0000110000111010  ..........  . . U U U   .   .   7   7   7
+cmpi       8  .     pcix  0000110000111011  ..........  . . U U U   .   .   9   9   9
+cmpi      16  .     d     0000110001000...  ..........  U U U U U   8   8   2   2   2
+cmpi      16  .     .     0000110001......  A+-DXWL...  U U U U U   8   8   2   2   2
+cmpi      16  .     pcdi  0000110001111010  ..........  . . U U U   .   .   7   7   7
+cmpi      16  .     pcix  0000110001111011  ..........  . . U U U   .   .   9   9   9
+cmpi      32  .     d     0000110010000...  ..........  U U U U U  14  12   2   2   2
+cmpi      32  .     .     0000110010......  A+-DXWL...  U U U U U  12  12   2   2   2
+cmpi      32  .     pcdi  0000110010111010  ..........  . . U U U   .   .   7   7   7
+cmpi      32  .     pcix  0000110010111011  ..........  . . U U U   .   .   9   9   9
+cmpm       8  .     ax7   1011111100001...  ..........  U U U U U  12  12   9   9   9
+cmpm       8  .     ay7   1011...100001111  ..........  U U U U U  12  12   9   9   9
+cmpm       8  .     axy7  1011111100001111  ..........  U U U U U  12  12   9   9   9
+cmpm       8  .     .     1011...100001...  ..........  U U U U U  12  12   9   9   9
+cmpm      16  .     .     1011...101001...  ..........  U U U U U  12  12   9   9   9
+cmpm      32  .     .     1011...110001...  ..........  U U U U U  20  20   9   9   9
+cpbcc     32  .     .     1111...01.......  ..........  . . U U .   .   .   4   4   .  unemulated
+cpdbcc    32  .     .     1111...001001...  ..........  . . U U .   .   .   4   4   .  unemulated
+cpgen     32  .     .     1111...000......  ..........  . . U U .   .   .   4   4   .  unemulated
+cpscc     32  .     .     1111...001......  ..........  . . U U .   .   .   4   4   .  unemulated
+cptrapcc  32  .     .     1111...001111...  ..........  . . U U .   .   .   4   4   .  unemulated
+dbt       16  .     .     0101000011001...  ..........  U U U U U  12  12   6   6   6
+dbf       16  .     .     0101000111001...  ..........  U U U U U  12  12   6   6   6
+dbcc      16  .     .     0101....11001...  ..........  U U U U U  12  12   6   6   6
+divs      16  .     d     1000...111000...  ..........  U U U U U 158 122  56  56  56
+divs      16  .     .     1000...111......  A+-DXWLdxI  U U U U U 158 122  56  56  56
+divu      16  .     d     1000...011000...  ..........  U U U U U 140 108  44  44  44
+divu      16  .     .     1000...011......  A+-DXWLdxI  U U U U U 140 108  44  44  44
+divl      32  .     d     0100110001000...  ..........  . . U U U   .   .  84  84  84
+divl      32  .     .     0100110001......  A+-DXWLdxI  . . U U U   .   .  84  84  84
+eor        8  .     d     1011...100000...  ..........  U U U U U   4   4   2   2   2
+eor        8  .     .     1011...100......  A+-DXWL...  U U U U U   8   8   4   4   4
+eor       16  .     d     1011...101000...  ..........  U U U U U   4   4   2   2   2
+eor       16  .     .     1011...101......  A+-DXWL...  U U U U U   8   8   4   4   4
+eor       32  .     d     1011...110000...  ..........  U U U U U   8   6   2   2   2
+eor       32  .     .     1011...110......  A+-DXWL...  U U U U U  12  12   4   4   4
+eori      16  toc   .     0000101000111100  ..........  U U U U U  20  16  12  12  12
+eori      16  tos   .     0000101001111100  ..........  S S S S S  20  16  12  12  12
+eori       8  .     d     0000101000000...  ..........  U U U U U   8   8   2   2   2
+eori       8  .     .     0000101000......  A+-DXWL...  U U U U U  12  12   4   4   4
+eori      16  .     d     0000101001000...  ..........  U U U U U   8   8   2   2   2
+eori      16  .     .     0000101001......  A+-DXWL...  U U U U U  12  12   4   4   4
+eori      32  .     d     0000101010000...  ..........  U U U U U  16  14   2   2   2
+eori      32  .     .     0000101010......  A+-DXWL...  U U U U U  20  20   4   4   4
+exg       32  dd    .     1100...101000...  ..........  U U U U U   6   6   2   2   2
+exg       32  aa    .     1100...101001...  ..........  U U U U U   6   6   2   2   2
+exg       32  da    .     1100...110001...  ..........  U U U U U   6   6   2   2   2
+ext       16  .     .     0100100010000...  ..........  U U U U U   4   4   4   4   4
+ext       32  .     .     0100100011000...  ..........  U U U U U   4   4   4   4   4
+extb      32  .     .     0100100111000...  ..........  . . U U U   .   .   4   4   4
+illegal    0  .     .     0100101011111100  ..........  U U U U U   4   4   4   4   4
+jmp       32  .     .     0100111011......  A..DXWLdx.  U U U U U   4   4   0   0   0
+jsr       32  .     .     0100111010......  A..DXWLdx.  U U U U U  12  12   0   0   0
+lea       32  .     .     0100...111......  A..DXWLdx.  U U U U U   0   0   2   2   2
+link      16  .     a7    0100111001010111  ..........  U U U U U  16  16   5   5   5
+link      16  .     .     0100111001010...  ..........  U U U U U  16  16   5   5   5
+link      32  .     a7    0100100000001111  ..........  . . U U U   .   .   6   6   6
+link      32  .     .     0100100000001...  ..........  . . U U U   .   .   6   6   6
+lsr        8  s     .     1110...000001...  ..........  U U U U U   6   6   4   4   4
+lsr       16  s     .     1110...001001...  ..........  U U U U U   6   6   4   4   4
+lsr       32  s     .     1110...010001...  ..........  U U U U U   8   8   4   4   4
+lsr        8  r     .     1110...000101...  ..........  U U U U U   6   6   6   6   6
+lsr       16  r     .     1110...001101...  ..........  U U U U U   6   6   6   6   6
+lsr       32  r     .     1110...010101...  ..........  U U U U U   8   8   6   6   6
+lsr       16  .     .     1110001011......  A+-DXWL...  U U U U U   8   8   5   5   5
+lsl        8  s     .     1110...100001...  ..........  U U U U U   6   6   4   4   4
+lsl       16  s     .     1110...101001...  ..........  U U U U U   6   6   4   4   4
+lsl       32  s     .     1110...110001...  ..........  U U U U U   8   8   4   4   4
+lsl        8  r     .     1110...100101...  ..........  U U U U U   6   6   6   6   6
+lsl       16  r     .     1110...101101...  ..........  U U U U U   6   6   6   6   6
+lsl       32  r     .     1110...110101...  ..........  U U U U U   8   8   6   6   6
+lsl       16  .     .     1110001111......  A+-DXWL...  U U U U U   8   8   5   5   5
+move       8  d     d     0001...000000...  ..........  U U U U U   4   4   2   2   2
+move       8  d     .     0001...000......  A+-DXWLdxI  U U U U U   4   4   2   2   2
+move       8  ai    d     0001...010000...  ..........  U U U U U   8   8   4   4   4
+move       8  ai    .     0001...010......  A+-DXWLdxI  U U U U U   8   8   4   4   4
+move       8  pi    d     0001...011000...  ..........  U U U U U   8   8   4   4   4
+move       8  pi    .     0001...011......  A+-DXWLdxI  U U U U U   8   8   4   4   4
+move       8  pi7   d     0001111011000...  ..........  U U U U U   8   8   4   4   4
+move       8  pi7   .     0001111011......  A+-DXWLdxI  U U U U U   8   8   4   4   4
+move       8  pd    d     0001...100000...  ..........  U U U U U   8   8   5   5   5
+move       8  pd    .     0001...100......  A+-DXWLdxI  U U U U U   8   8   5   5   5
+move       8  pd7   d     0001111100000...  ..........  U U U U U   8   8   5   5   5
+move       8  pd7   .     0001111100......  A+-DXWLdxI  U U U U U   8   8   5   5   5
+move       8  di    d     0001...101000...  ..........  U U U U U  12  12   5   5   5
+move       8  di    .     0001...101......  A+-DXWLdxI  U U U U U  12  12   5   5   5
+move       8  ix    d     0001...110000...  ..........  U U U U U  14  14   7   7   7
+move       8  ix    .     0001...110......  A+-DXWLdxI  U U U U U  14  14   7   7   7
+move       8  aw    d     0001000111000...  ..........  U U U U U  12  12   4   4   4
+move       8  aw    .     0001000111......  A+-DXWLdxI  U U U U U  12  12   4   4   4
+move       8  al    d     0001001111000...  ..........  U U U U U  16  16   6   6   6
+move       8  al    .     0001001111......  A+-DXWLdxI  U U U U U  16  16   6   6   6
+move      16  d     d     0011...000000...  ..........  U U U U U   4   4   2   2   2
+move      16  d     a     0011...000001...  ..........  U U U U U   4   4   2   2   2
+move      16  d     .     0011...000......  A+-DXWLdxI  U U U U U   4   4   2   2   2
+move      16  ai    d     0011...010000...  ..........  U U U U U   8   8   4   4   4
+move      16  ai    a     0011...010001...  ..........  U U U U U   8   8   4   4   4
+move      16  ai    .     0011...010......  A+-DXWLdxI  U U U U U   8   8   4   4   4
+move      16  pi    d     0011...011000...  ..........  U U U U U   8   8   4   4   4
+move      16  pi    a     0011...011001...  ..........  U U U U U   8   8   4   4   4
+move      16  pi    .     0011...011......  A+-DXWLdxI  U U U U U   8   8   4   4   4
+move      16  pd    d     0011...100000...  ..........  U U U U U   8   8   5   5   5
+move      16  pd    a     0011...100001...  ..........  U U U U U   8   8   5   5   5
+move      16  pd    .     0011...100......  A+-DXWLdxI  U U U U U   8   8   5   5   5
+move      16  di    d     0011...101000...  ..........  U U U U U  12  12   5   5   5
+move      16  di    a     0011...101001...  ..........  U U U U U  12  12   5   5   5
+move      16  di    .     0011...101......  A+-DXWLdxI  U U U U U  12  12   5   5   5
+move      16  ix    d     0011...110000...  ..........  U U U U U  14  14   7   7   7
+move      16  ix    a     0011...110001...  ..........  U U U U U  14  14   7   7   7
+move      16  ix    .     0011...110......  A+-DXWLdxI  U U U U U  14  14   7   7   7
+move      16  aw    d     0011000111000...  ..........  U U U U U  12  12   4   4   4
+move      16  aw    a     0011000111001...  ..........  U U U U U  12  12   4   4   4
+move      16  aw    .     0011000111......  A+-DXWLdxI  U U U U U  12  12   4   4   4
+move      16  al    d     0011001111000...  ..........  U U U U U  16  16   6   6   6
+move      16  al    a     0011001111001...  ..........  U U U U U  16  16   6   6   6
+move      16  al    .     0011001111......  A+-DXWLdxI  U U U U U  16  16   6   6   6
+move      32  d     d     0010...000000...  ..........  U U U U U   4   4   2   2   2
+move      32  d     a     0010...000001...  ..........  U U U U U   4   4   2   2   2
+move      32  d     .     0010...000......  A+-DXWLdxI  U U U U U   4   4   2   2   2
+move      32  ai    d     0010...010000...  ..........  U U U U U  12  12   4   4   4
+move      32  ai    a     0010...010001...  ..........  U U U U U  12  12   4   4   4
+move      32  ai    .     0010...010......  A+-DXWLdxI  U U U U U  12  12   4   4   4
+move      32  pi    d     0010...011000...  ..........  U U U U U  12  12   4   4   4
+move      32  pi    a     0010...011001...  ..........  U U U U U  12  12   4   4   4
+move      32  pi    .     0010...011......  A+-DXWLdxI  U U U U U  12  12   4   4   4
+move      32  pd    d     0010...100000...  ..........  U U U U U  12  14   5   5   5
+move      32  pd    a     0010...100001...  ..........  U U U U U  12  14   5   5   5
+move      32  pd    .     0010...100......  A+-DXWLdxI  U U U U U  12  14   5   5   5
+move      32  di    d     0010...101000...  ..........  U U U U U  16  16   5   5   5
+move      32  di    a     0010...101001...  ..........  U U U U U  16  16   5   5   5
+move      32  di    .     0010...101......  A+-DXWLdxI  U U U U U  16  16   5   5   5
+move      32  ix    d     0010...110000...  ..........  U U U U U  18  18   7   7   7
+move      32  ix    a     0010...110001...  ..........  U U U U U  18  18   7   7   7
+move      32  ix    .     0010...110......  A+-DXWLdxI  U U U U U  18  18   7   7   7
+move      32  aw    d     0010000111000...  ..........  U U U U U  16  16   4   4   4
+move      32  aw    a     0010000111001...  ..........  U U U U U  16  16   4   4   4
+move      32  aw    .     0010000111......  A+-DXWLdxI  U U U U U  16  16   4   4   4
+move      32  al    d     0010001111000...  ..........  U U U U U  20  20   6   6   6
+move      32  al    a     0010001111001...  ..........  U U U U U  20  20   6   6   6
+move      32  al    .     0010001111......  A+-DXWLdxI  U U U U U  20  20   6   6   6
+movea     16  .     d     0011...001000...  ..........  U U U U U   4   4   2   2   2
+movea     16  .     a     0011...001001...  ..........  U U U U U   4   4   2   2   2
+movea     16  .     .     0011...001......  A+-DXWLdxI  U U U U U   4   4   2   2   2
+movea     32  .     d     0010...001000...  ..........  U U U U U   4   4   2   2   2
+movea     32  .     a     0010...001001...  ..........  U U U U U   4   4   2   2   2
+movea     32  .     .     0010...001......  A+-DXWLdxI  U U U U U   4   4   2   2   2
+move      16  frc   d     0100001011000...  ..........  . U U U U   .   4   4   4   4
+move      16  frc   .     0100001011......  A+-DXWL...  . U U U U   .   8   4   4   4
+move      16  toc   d     0100010011000...  ..........  U U U U U  12  12   4   4   4
+move      16  toc   .     0100010011......  A+-DXWLdxI  U U U U U  12  12   4   4   4
+move      16  frs   d     0100000011000...  ..........  U S S S S   6   4   8   8   8  U only for 000
+move      16  frs   .     0100000011......  A+-DXWL...  U S S S S   8   8   8   8   8  U only for 000
+move      16  tos   d     0100011011000...  ..........  S S S S S  12  12   8   8   8
+move      16  tos   .     0100011011......  A+-DXWLdxI  S S S S S  12  12   8   8   8
+move      32  fru   .     0100111001101...  ..........  S S S S S   4   6   2   2   2
+move      32  tou   .     0100111001100...  ..........  S S S S S   4   6   2   2   2
+movec     32  cr    .     0100111001111010  ..........  . S S S S   .  12   6   6   6
+movec     32  rc    .     0100111001111011  ..........  . S S S S   .  10  12  12  12
+movem     16  re    pd    0100100010100...  ..........  U U U U U   8   8   4   4   4
+movem     16  re    .     0100100010......  A..DXWL...  U U U U U   8   8   4   4   4
+movem     32  re    pd    0100100011100...  ..........  U U U U U   8   8   4   4   4
+movem     32  re    .     0100100011......  A..DXWL...  U U U U U   8   8   4   4   4
+movem     16  er    pi    0100110010011...  ..........  U U U U U  12  12   8   8   8
+movem     16  er    pcdi  0100110010111010  ..........  U U U U U  16  16   9   9   9
+movem     16  er    pcix  0100110010111011  ..........  U U U U U  18  18  11  11  11
+movem     16  er    .     0100110010......  A..DXWL...  U U U U U  12  12   8   8   8
+movem     32  er    pi    0100110011011...  ..........  U U U U U  12  12   8   8   8
+movem     32  er    pcdi  0100110011111010  ..........  U U U U U  16  16   9   9   9
+movem     32  er    pcix  0100110011111011  ..........  U U U U U  18  18  11  11  11
+movem     32  er    .     0100110011......  A..DXWL...  U U U U U  12  12   8   8   8
+movep     16  er    .     0000...100001...  ..........  U U U U U  16  16  12  12  12
+movep     32  er    .     0000...101001...  ..........  U U U U U  24  24  18  18  18
+movep     16  re    .     0000...110001...  ..........  U U U U U  16  16  11  11  11
+movep     32  re    .     0000...111001...  ..........  U U U U U  24  24  17  17  17
+moveq     32  .     .     0111...0........  ..........  U U U U U   4   4   2   2   2
+moves      8  .     .     0000111000......  A+-DXWL...  . S S S S   .  14   5   5   5
+moves     16  .     .     0000111001......  A+-DXWL...  . S S S S   .  14   5   5   5
+moves     32  .     .     0000111010......  A+-DXWL...  . S S S S   .  16   5   5   5
+move16    32  .     .     1111011000100...  ..........  . . . . U   .   .   .   .   4  TODO: correct timing
+muls      16  .     d     1100...111000...  ..........  U U U U U  54  32  27  27  27
+muls      16  .     .     1100...111......  A+-DXWLdxI  U U U U U  54  32  27  27  27
+mulu      16  .     d     1100...011000...  ..........  U U U U U  54  30  27  27  27
+mulu      16  .     .     1100...011......  A+-DXWLdxI  U U U U U  54  30  27  27  27
+mull      32  .     d     0100110000000...  ..........  . . U U U   .   .  43  43  43
+mull      32  .     .     0100110000......  A+-DXWLdxI  . . U U U   .   .  43  43  43
+nbcd       8  .     d     0100100000000...  ..........  U U U U U   6   6   6   6   6
+nbcd       8  .     .     0100100000......  A+-DXWL...  U U U U U   8   8   6   6   6
+neg        8  .     d     0100010000000...  ..........  U U U U U   4   4   2   2   2
+neg        8  .     .     0100010000......  A+-DXWL...  U U U U U   8   8   4   4   4
+neg       16  .     d     0100010001000...  ..........  U U U U U   4   4   2   2   2
+neg       16  .     .     0100010001......  A+-DXWL...  U U U U U   8   8   4   4   4
+neg       32  .     d     0100010010000...  ..........  U U U U U   6   6   2   2   2
+neg       32  .     .     0100010010......  A+-DXWL...  U U U U U  12  12   4   4   4
+negx       8  .     d     0100000000000...  ..........  U U U U U   4   4   2   2   2
+negx       8  .     .     0100000000......  A+-DXWL...  U U U U U   8   8   4   4   4
+negx      16  .     d     0100000001000...  ..........  U U U U U   4   4   2   2   2
+negx      16  .     .     0100000001......  A+-DXWL...  U U U U U   8   8   4   4   4
+negx      32  .     d     0100000010000...  ..........  U U U U U   6   6   2   2   2
+negx      32  .     .     0100000010......  A+-DXWL...  U U U U U  12  12   4   4   4
+nop        0  .     .     0100111001110001  ..........  U U U U U   4   4   2   2   2
+not        8  .     d     0100011000000...  ..........  U U U U U   4   4   2   2   2
+not        8  .     .     0100011000......  A+-DXWL...  U U U U U   8   8   4   4   4
+not       16  .     d     0100011001000...  ..........  U U U U U   4   4   2   2   2
+not       16  .     .     0100011001......  A+-DXWL...  U U U U U   8   8   4   4   4
+not       32  .     d     0100011010000...  ..........  U U U U U   6   6   2   2   2
+not       32  .     .     0100011010......  A+-DXWL...  U U U U U  12  12   4   4   4
+or         8  er    d     1000...000000...  ..........  U U U U U   4   4   2   2   2
+or         8  er    .     1000...000......  A+-DXWLdxI  U U U U U   4   4   2   2   2
+or        16  er    d     1000...001000...  ..........  U U U U U   4   4   2   2   2
+or        16  er    .     1000...001......  A+-DXWLdxI  U U U U U   4   4   2   2   2
+or        32  er    d     1000...010000...  ..........  U U U U U   6   6   2   2   2
+or        32  er    .     1000...010......  A+-DXWLdxI  U U U U U   6   6   2   2   2
+or         8  re    .     1000...100......  A+-DXWL...  U U U U U   8   8   4   4   4
+or        16  re    .     1000...101......  A+-DXWL...  U U U U U   8   8   4   4   4
+or        32  re    .     1000...110......  A+-DXWL...  U U U U U  12  12   4   4   4
+ori       16  toc   .     0000000000111100  ..........  U U U U U  20  16  12  12  12
+ori       16  tos   .     0000000001111100  ..........  S S S S S  20  16  12  12  12
+ori        8  .     d     0000000000000...  ..........  U U U U U   8   8   2   2   2
+ori        8  .     .     0000000000......  A+-DXWL...  U U U U U  12  12   4   4   4
+ori       16  .     d     0000000001000...  ..........  U U U U U   8   8   2   2   2
+ori       16  .     .     0000000001......  A+-DXWL...  U U U U U  12  12   4   4   4
+ori       32  .     d     0000000010000...  ..........  U U U U U  16  14   2   2   2
+ori       32  .     .     0000000010......  A+-DXWL...  U U U U U  20  20   4   4   4
+pack      16  rr    .     1000...101000...  ..........  . . U U U   .   .   6   6   6
+pack      16  mm    ax7   1000111101001...  ..........  . . U U U   .   .  13  13  13
+pack      16  mm    ay7   1000...101001111  ..........  . . U U U   .   .  13  13  13
+pack      16  mm    axy7  1000111101001111  ..........  . . U U U   .   .  13  13  13
+pack      16  mm    .     1000...101001...  ..........  . . U U U   .   .  13  13  13
+pea       32  .     .     0100100001......  A..DXWLdx.  U U U U U   6   6   5   5   5
+pflush    32  .     .     1111010100011000  ..........  . . . . S   .   .   .   .   4   TODO: correct timing
+pmmu      32  .     .     1111000.........  ..........  . . S S S   .   .   8   8   8
+reset      0  .     .     0100111001110000  ..........  S S S S S   0   0   0   0   0
+ror        8  s     .     1110...000011...  ..........  U U U U U   6   6   8   8   8
+ror       16  s     .     1110...001011...  ..........  U U U U U   6   6   8   8   8
+ror       32  s     .     1110...010011...  ..........  U U U U U   8   8   8   8   8
+ror        8  r     .     1110...000111...  ..........  U U U U U   6   6   8   8   8
+ror       16  r     .     1110...001111...  ..........  U U U U U   6   6   8   8   8
+ror       32  r     .     1110...010111...  ..........  U U U U U   8   8   8   8   8
+ror       16  .     .     1110011011......  A+-DXWL...  U U U U U   8   8   7   7   7
+rol        8  s     .     1110...100011...  ..........  U U U U U   6   6   8   8   8
+rol       16  s     .     1110...101011...  ..........  U U U U U   6   6   8   8   8
+rol       32  s     .     1110...110011...  ..........  U U U U U   8   8   8   8   8
+rol        8  r     .     1110...100111...  ..........  U U U U U   6   6   8   8   8
+rol       16  r     .     1110...101111...  ..........  U U U U U   6   6   8   8   8
+rol       32  r     .     1110...110111...  ..........  U U U U U   8   8   8   8   8
+rol       16  .     .     1110011111......  A+-DXWL...  U U U U U   8   8   7   7   7
+roxr       8  s     .     1110...000010...  ..........  U U U U U   6   6  12  12  12
+roxr      16  s     .     1110...001010...  ..........  U U U U U   6   6  12  12  12
+roxr      32  s     .     1110...010010...  ..........  U U U U U   8   8  12  12  12
+roxr       8  r     .     1110...000110...  ..........  U U U U U   6   6  12  12  12
+roxr      16  r     .     1110...001110...  ..........  U U U U U   6   6  12  12  12
+roxr      32  r     .     1110...010110...  ..........  U U U U U   8   8  12  12  12
+roxr      16  .     .     1110010011......  A+-DXWL...  U U U U U   8   8   5   5   5
+roxl       8  s     .     1110...100010...  ..........  U U U U U   6   6  12  12  12
+roxl      16  s     .     1110...101010...  ..........  U U U U U   6   6  12  12  12
+roxl      32  s     .     1110...110010...  ..........  U U U U U   8   8  12  12  12
+roxl       8  r     .     1110...100110...  ..........  U U U U U   6   6  12  12  12
+roxl      16  r     .     1110...101110...  ..........  U U U U U   6   6  12  12  12
+roxl      32  r     .     1110...110110...  ..........  U U U U U   8   8  12  12  12
+roxl      16  .     .     1110010111......  A+-DXWL...  U U U U U   8   8   5   5   5
+rtd       32  .     .     0100111001110100  ..........  . U U U U   .  16  10  10  10
+rte       32  .     .     0100111001110011  ..........  S S S S S  20  24  20  20  20  bus fault not emulated
+rtm       32  .     .     000001101100....  ..........  . . U U U   .   .  19  19  19  not properly emulated
+rtr       32  .     .     0100111001110111  ..........  U U U U U  20  20  14  14  14
+rts       32  .     .     0100111001110101  ..........  U U U U U  16  16  10  10  10
+sbcd       8  rr    .     1000...100000...  ..........  U U U U U   6   6   4   4   4
+sbcd       8  mm    ax7   1000111100001...  ..........  U U U U U  18  18  16  16  16
+sbcd       8  mm    ay7   1000...100001111  ..........  U U U U U  18  18  16  16  16
+sbcd       8  mm    axy7  1000111100001111  ..........  U U U U U  18  18  16  16  16
+sbcd       8  mm    .     1000...100001...  ..........  U U U U U  18  18  16  16  16
+st         8  .     d     0101000011000...  ..........  U U U U U   6   4   4   4   4
+st         8  .     .     0101000011......  A+-DXWL...  U U U U U   8   8   6   6   6
+sf         8  .     d     0101000111000...  ..........  U U U U U   4   4   4   4   4
+sf         8  .     .     0101000111......  A+-DXWL...  U U U U U   8   8   6   6   6
+scc        8  .     d     0101....11000...  ..........  U U U U U   4   4   4   4   4
+scc        8  .     .     0101....11......  A+-DXWL...  U U U U U   8   8   6   6   6
+stop       0  .     .     0100111001110010  ..........  S S S S S   4   4   8   8   8
+sub        8  er    d     1001...000000...  ..........  U U U U U   4   4   2   2   2
+sub        8  er    .     1001...000......  A+-DXWLdxI  U U U U U   4   4   2   2   2
+sub       16  er    d     1001...001000...  ..........  U U U U U   4   4   2   2   2
+sub       16  er    a     1001...001001...  ..........  U U U U U   4   4   2   2   2
+sub       16  er    .     1001...001......  A+-DXWLdxI  U U U U U   4   4   2   2   2
+sub       32  er    d     1001...010000...  ..........  U U U U U   6   6   2   2   2
+sub       32  er    a     1001...010001...  ..........  U U U U U   6   6   2   2   2
+sub       32  er    .     1001...010......  A+-DXWLdxI  U U U U U   6   6   2   2   2
+sub        8  re    .     1001...100......  A+-DXWL...  U U U U U   8   8   4   4   4
+sub       16  re    .     1001...101......  A+-DXWL...  U U U U U   8   8   4   4   4
+sub       32  re    .     1001...110......  A+-DXWL...  U U U U U  12  12   4   4   4
+suba      16  .     d     1001...011000...  ..........  U U U U U   8   8   2   2   2
+suba      16  .     a     1001...011001...  ..........  U U U U U   8   8   2   2   2
+suba      16  .     .     1001...011......  A+-DXWLdxI  U U U U U   8   8   2   2   2
+suba      32  .     d     1001...111000...  ..........  U U U U U   6   6   2   2   2
+suba      32  .     a     1001...111001...  ..........  U U U U U   6   6   2   2   2
+suba      32  .     .     1001...111......  A+-DXWLdxI  U U U U U   6   6   2   2   2
+subi       8  .     d     0000010000000...  ..........  U U U U U   8   8   2   2   2
+subi       8  .     .     0000010000......  A+-DXWL...  U U U U U  12  12   4   4   4
+subi      16  .     d     0000010001000...  ..........  U U U U U   8   8   2   2   2
+subi      16  .     .     0000010001......  A+-DXWL...  U U U U U  12  12   4   4   4
+subi      32  .     d     0000010010000...  ..........  U U U U U  16  14   2   2   2
+subi      32  .     .     0000010010......  A+-DXWL...  U U U U U  20  20   4   4   4
+subq       8  .     d     0101...100000...  ..........  U U U U U   4   4   2   2   2
+subq       8  .     .     0101...100......  A+-DXWL...  U U U U U   8   8   4   4   4
+subq      16  .     d     0101...101000...  ..........  U U U U U   4   4   2   2   2
+subq      16  .     a     0101...101001...  ..........  U U U U U   8   4   2   2   2
+subq      16  .     .     0101...101......  A+-DXWL...  U U U U U   8   8   4   4   4
+subq      32  .     d     0101...110000...  ..........  U U U U U   8   8   2   2   2
+subq      32  .     a     0101...110001...  ..........  U U U U U   8   8   2   2   2
+subq      32  .     .     0101...110......  A+-DXWL...  U U U U U  12  12   4   4   4
+subx       8  rr    .     1001...100000...  ..........  U U U U U   4   4   2   2   2
+subx      16  rr    .     1001...101000...  ..........  U U U U U   4   4   2   2   2
+subx      32  rr    .     1001...110000...  ..........  U U U U U   8   6   2   2   2
+subx       8  mm    ax7   1001111100001...  ..........  U U U U U  18  18  12  12  12
+subx       8  mm    ay7   1001...100001111  ..........  U U U U U  18  18  12  12  12
+subx       8  mm    axy7  1001111100001111  ..........  U U U U U  18  18  12  12  12
+subx       8  mm    .     1001...100001...  ..........  U U U U U  18  18  12  12  12
+subx      16  mm    .     1001...101001...  ..........  U U U U U  18  18  12  12  12
+subx      32  mm    .     1001...110001...  ..........  U U U U U  30  30  12  12  12
+swap      32  .     .     0100100001000...  ..........  U U U U U   4   4   4   4   4
+tas        8  .     d     0100101011000...  ..........  U U U U U   4   4   4   4   4
+tas        8  .     .     0100101011......  A+-DXWL...  U U U U U  14  14  12  12  12
+trap       0  .     .     010011100100....  ..........  U U U U U   4   4   4   4   4
+trapt      0  .     .     0101000011111100  ..........  . . U U U   .   .   4   4   4
+trapt     16  .     .     0101000011111010  ..........  . . U U U   .   .   6   6   6
+trapt     32  .     .     0101000011111011  ..........  . . U U U   .   .   8   8   8
+trapf      0  .     .     0101000111111100  ..........  . . U U U   .   .   4   4   4
+trapf     16  .     .     0101000111111010  ..........  . . U U U   .   .   6   6   6
+trapf     32  .     .     0101000111111011  ..........  . . U U U   .   .   8   8   8
+trapcc     0  .     .     0101....11111100  ..........  . . U U U   .   .   4   4   4
+trapcc    16  .     .     0101....11111010  ..........  . . U U U   .   .   6   6   6
+trapcc    32  .     .     0101....11111011  ..........  . . U U U   .   .   8   8   8
+trapv      0  .     .     0100111001110110  ..........  U U U U U   4   4   4   4   4
+tst        8  .     d     0100101000000...  ..........  U U U U U   4   4   2   2   2
+tst        8  .     .     0100101000......  A+-DXWL...  U U U U U   4   4   2   2   2
+tst        8  .     pcdi  0100101000111010  ..........  . . U U U   .   .   7   7   7
+tst        8  .     pcix  0100101000111011  ..........  . . U U U   .   .   9   9   9
+tst        8  .     i     0100101000111100  ..........  . . U U U   .   .   6   6   6
+tst       16  .     d     0100101001000...  ..........  U U U U U   4   4   2   2   2
+tst       16  .     a     0100101001001...  ..........  . . U U U   .   .   2   2   2
+tst       16  .     .     0100101001......  A+-DXWL...  U U U U U   4   4   2   2   2
+tst       16  .     pcdi  0100101001111010  ..........  . . U U U   .   .   7   7   7
+tst       16  .     pcix  0100101001111011  ..........  . . U U U   .   .   9   9   9
+tst       16  .     i     0100101001111100  ..........  . . U U U   .   .   6   6   6
+tst       32  .     d     0100101010000...  ..........  U U U U U   4   4   2   2   2
+tst       32  .     a     0100101010001...  ..........  . . U U U   .   .   2   2   2
+tst       32  .     .     0100101010......  A+-DXWL...  U U U U U   4   4   2   2   2
+tst       32  .     pcdi  0100101010111010  ..........  . . U U U   .   .   7   7   7
+tst       32  .     pcix  0100101010111011  ..........  . . U U U   .   .   9   9   9
+tst       32  .     i     0100101010111100  ..........  . . U U U   .   .   6   6   6
+unlk      32  .     a7    0100111001011111  ..........  U U U U U  12  12   6   6   6
+unlk      32  .     .     0100111001011...  ..........  U U U U U  12  12   6   6   6
+unpk      16  rr    .     1000...110000...  ..........  . . U U U   .   .   8   8   8
+unpk      16  mm    ax7   1000111110001...  ..........  . . U U U   .   .  13  13  13
+unpk      16  mm    ay7   1000...110001111  ..........  . . U U U   .   .  13  13  13
+unpk      16  mm    axy7  1000111110001111  ..........  . . U U U   .   .  13  13  13
+unpk      16  mm    .     1000...110001...  ..........  . . U U U   .   .  13  13  13
 
 
@@ -903,6 +913,29 @@ M68KMAKE_OP(1111, 0, ., .)
 }
 
 
+M68KMAKE_OP(040fpu0, 32, ., .)
+{
+	if(CPU_TYPE_IS_030_PLUS(CPU_TYPE))
+	{
+		m68040_fpu_op0();
+		return;
+	}
+	m68ki_exception_1111();
+}
+
+
+M68KMAKE_OP(040fpu1, 32, ., .)
+{
+	if(CPU_TYPE_IS_030_PLUS(CPU_TYPE))
+	{
+		m68040_fpu_op1();
+		return;
+	}
+	m68ki_exception_1111();
+}
+
+
+
 M68KMAKE_OP(abcd, 8, rr, .)
 {
 	uint* r_dst = &DX;
@@ -1227,8 +1260,8 @@ M68KMAKE_OP(adda, 16, ., a)
 
 M68KMAKE_OP(adda, 16, ., .)
 {
-	signed short src = MAKE_INT_16(M68KMAKE_GET_OPER_AY_16);
 	uint* r_dst = &AX;
+	uint src = MAKE_INT_16(M68KMAKE_GET_OPER_AY_16);
 
 	*r_dst = MASK_OUT_ABOVE_32(*r_dst + src);
 }
@@ -1807,7 +1840,7 @@ M68KMAKE_OP(andi, 32, ., .)
 }
 
 
-M68KMAKE_OP(andi, 8, toc, .)
+M68KMAKE_OP(andi, 16, toc, .)
 {
 	m68ki_set_ccr(m68ki_get_ccr() & OPER_I_8());
 }
@@ -1833,6 +1866,9 @@ M68KMAKE_OP(asr, 8, s, .)
 	uint src = MASK_OUT_ABOVE_8(*r_dst);
 	uint res = src >> shift;
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	if(GET_MSB_8(src))
 		res |= m68ki_shift_8_table[shift];
 
@@ -1852,6 +1888,9 @@ M68KMAKE_OP(asr, 16, s, .)
 	uint src = MASK_OUT_ABOVE_16(*r_dst);
 	uint res = src >> shift;
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	if(GET_MSB_16(src))
 		res |= m68ki_shift_16_table[shift];
 
@@ -1871,6 +1910,9 @@ M68KMAKE_OP(asr, 32, s, .)
 	uint src = *r_dst;
 	uint res = src >> shift;
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	if(GET_MSB_32(src))
 		res |= m68ki_shift_32_table[shift];
 
@@ -2064,6 +2106,9 @@ M68KMAKE_OP(asl, 8, s, .)
 	uint src = MASK_OUT_ABOVE_8(*r_dst);
 	uint res = MASK_OUT_ABOVE_8(src << shift);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	*r_dst = MASK_OUT_BELOW_8(*r_dst) | res;
 
 	FLAG_X = FLAG_C = src << shift;
@@ -2081,6 +2126,9 @@ M68KMAKE_OP(asl, 16, s, .)
 	uint src = MASK_OUT_ABOVE_16(*r_dst);
 	uint res = MASK_OUT_ABOVE_16(src << shift);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	*r_dst = MASK_OUT_BELOW_16(*r_dst) | res;
 
 	FLAG_N = NFLAG_16(res);
@@ -2098,6 +2146,9 @@ M68KMAKE_OP(asl, 32, s, .)
 	uint src = *r_dst;
 	uint res = MASK_OUT_ABOVE_32(src << shift);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	*r_dst = res;
 
 	FLAG_N = NFLAG_32(res);
@@ -2277,7 +2328,16 @@ M68KMAKE_OP(bcc, 32, ., .)
 		REG_PC += 4;
 		return;
 	}
-	m68ki_exception_illegal();
+	else
+	{
+		if(M68KMAKE_CC)
+		{
+			m68ki_trace_t0();			   /* auto-disable (see m68kcpu.h) */
+			m68ki_branch_8(MASK_OUT_ABOVE_8(REG_IR));
+			return;
+		}
+		USE_CYCLES(CYC_BCC_NOTAKE_B);
+	}
 }
 
 
@@ -3121,7 +3181,13 @@ M68KMAKE_OP(bra, 32, ., .)
 			USE_ALL_CYCLES();
 		return;
 	}
-	m68ki_exception_illegal();
+	else
+	{
+		m68ki_trace_t0();				   /* auto-disable (see m68kcpu.h) */
+		m68ki_branch_8(MASK_OUT_ABOVE_8(REG_IR));
+		if(REG_PC == REG_PPC)
+			USE_ALL_CYCLES();
+	}
 }
 
 
@@ -3196,7 +3262,12 @@ M68KMAKE_OP(bsr, 32, ., .)
 		m68ki_branch_32(offset);
 		return;
 	}
-	m68ki_exception_illegal();
+	else
+	{
+		m68ki_trace_t0();				   /* auto-disable (see m68kcpu.h) */
+		m68ki_push_32(REG_PC);
+		m68ki_branch_8(MASK_OUT_ABOVE_8(REG_IR));
+	}
 }
 
 
@@ -3369,8 +3440,8 @@ M68KMAKE_OP(cas2, 16, ., .)
 				return;
 			}
 		}
-		*compare1 = BIT_1F(word2) ? MAKE_INT_16(dest1) : MASK_OUT_BELOW_16(*compare1) | dest1;
-		*compare2 = BIT_F(word2) ? MAKE_INT_16(dest2) : MASK_OUT_BELOW_16(*compare2) | dest2;
+		*compare1 = BIT_1F(word2) ? (uint)MAKE_INT_16(dest1) : MASK_OUT_BELOW_16(*compare1) | dest1;
+		*compare2 = BIT_F(word2) ? (uint)MAKE_INT_16(dest2) : MASK_OUT_BELOW_16(*compare2) | dest2;
 		return;
 	}
 	m68ki_exception_illegal();
@@ -3509,28 +3580,24 @@ M68KMAKE_OP(chk2cmp2, 8, ., pcdi)
 	if(CPU_TYPE_IS_EC020_PLUS(CPU_TYPE))
 	{
 		uint word2 = OPER_I_16();
-		uint compare = REG_DA[(word2 >> 12) & 15]&0xff;
+		sint compare = REG_DA[(word2 >> 12) & 15]&0xff;
 		uint ea = EA_PCDI_8();
-		uint lower_bound = m68ki_read_pcrel_8(ea);
-		uint upper_bound = m68ki_read_pcrel_8(ea + 1);
+		sint lower_bound = m68ki_read_pcrel_8(ea);
+		sint upper_bound = m68ki_read_pcrel_8(ea + 1);
 
 		if(!BIT_F(word2))
-			FLAG_C = MAKE_INT_8(compare) - MAKE_INT_8(lower_bound);
-		else
-			FLAG_C = compare - lower_bound;
-		FLAG_Z = !((upper_bound==compare) | (lower_bound==compare));
-		if(COND_CS())
-		{
-			if(BIT_B(word2))
-				m68ki_exception_trap(EXCEPTION_CHK);
-			return;
-		}
+			compare = (int32)(int8)compare;
+      
+		FLAG_Z = !((upper_bound==compare) || (lower_bound==compare));  // JFF: | => ||
+
+    FLAG_C = (lower_bound <= upper_bound ? compare < lower_bound || compare > upper_bound : compare > upper_bound || compare < lower_bound) << 8;
 
-		FLAG_C = upper_bound - compare;
 		if(COND_CS() && BIT_B(word2))
 				m68ki_exception_trap(EXCEPTION_CHK);
 		return;
 	}
+
+      
 	m68ki_exception_illegal();
 }
 
@@ -3540,27 +3607,21 @@ M68KMAKE_OP(chk2cmp2, 8, ., pcix)
 	if(CPU_TYPE_IS_EC020_PLUS(CPU_TYPE))
 	{
 		uint word2 = OPER_I_16();
-		uint compare = REG_DA[(word2 >> 12) & 15]&0xff;
+		sint compare = REG_DA[(word2 >> 12) & 15]&0xff;
 		uint ea = EA_PCIX_8();
-		uint lower_bound = m68ki_read_pcrel_8(ea);
-		uint upper_bound = m68ki_read_pcrel_8(ea + 1);
+		sint lower_bound = m68ki_read_pcrel_8(ea);
+		sint upper_bound = m68ki_read_pcrel_8(ea + 1);
 
 		if(!BIT_F(word2))
-			FLAG_C = MAKE_INT_8(compare) - MAKE_INT_8(lower_bound);
-		else
-			FLAG_C = compare - lower_bound;
-		FLAG_Z = !((upper_bound==compare) | (lower_bound==compare));
-		if(COND_CS())
-		{
-			if(BIT_B(word2))
-				m68ki_exception_trap(EXCEPTION_CHK);
-			return;
-		}
+			compare = (int32)(int8)compare;
+		FLAG_Z = !((upper_bound==compare) || (lower_bound==compare));  // JFF: | => ||, faster operation short circuits
+
+    FLAG_C = (lower_bound <= upper_bound ? compare < lower_bound || compare > upper_bound : compare > upper_bound || compare < lower_bound) << 8;
 
-		FLAG_C = upper_bound - compare;
 		if(COND_CS() && BIT_B(word2))
 				m68ki_exception_trap(EXCEPTION_CHK);
 		return;
+
 	}
 	m68ki_exception_illegal();
 }
@@ -3571,24 +3632,18 @@ M68KMAKE_OP(chk2cmp2, 8, ., .)
 	if(CPU_TYPE_IS_EC020_PLUS(CPU_TYPE))
 	{
 		uint word2 = OPER_I_16();
-		uint compare = REG_DA[(word2 >> 12) & 15]&0xff;
+		sint compare = REG_DA[(word2 >> 12) & 15]&0xff;
 		uint ea = M68KMAKE_GET_EA_AY_8;
-		uint lower_bound = m68ki_read_8(ea);
-		uint upper_bound = m68ki_read_8(ea + 1);
+		sint lower_bound = (int8)m68ki_read_8(ea);
+		sint upper_bound = (int8)m68ki_read_8(ea + 1);
 
 		if(!BIT_F(word2))
-			FLAG_C = MAKE_INT_8(compare) - MAKE_INT_8(lower_bound);
-		else
-			FLAG_C = compare - lower_bound;
-		FLAG_Z = !((upper_bound==compare) | (lower_bound==compare));
-		if(COND_CS())
-		{
-			if(BIT_B(word2))
-				m68ki_exception_trap(EXCEPTION_CHK);
-			return;
-		}
+			compare = (int32)(int8)compare;
+      
+ 		FLAG_Z = !((upper_bound==compare) || (lower_bound==compare));  // JFF: | => ||
+
+    FLAG_C = (lower_bound <= upper_bound ? compare < lower_bound || compare > upper_bound : compare > upper_bound || compare < lower_bound) << 8;
 
-		FLAG_C = upper_bound - compare;
 		if(COND_CS() && BIT_B(word2))
 				m68ki_exception_trap(EXCEPTION_CHK);
 		return;
@@ -3602,29 +3657,17 @@ M68KMAKE_OP(chk2cmp2, 16, ., pcdi)
 	if(CPU_TYPE_IS_EC020_PLUS(CPU_TYPE))
 	{
 		uint word2 = OPER_I_16();
-		uint compare = REG_DA[(word2 >> 12) & 15]&0xffff;
+		sint compare = REG_DA[(word2 >> 12) & 15]&0xffff;
 		uint ea = EA_PCDI_16();
-		uint lower_bound = m68ki_read_pcrel_16(ea);
-		uint upper_bound = m68ki_read_pcrel_16(ea + 2);
+		sint lower_bound = (int16)m68ki_read_pcrel_16(ea);
+		sint upper_bound = (int16)m68ki_read_pcrel_16(ea + 2);
 
 		if(!BIT_F(word2))
-			FLAG_C = MAKE_INT_16(compare) - MAKE_INT_16(lower_bound);
-		else
-			FLAG_C = compare - lower_bound;
-		FLAG_Z = !((upper_bound==compare) | (lower_bound==compare));
-		FLAG_C = CFLAG_16(FLAG_C);
-		if(COND_CS())
-		{
-			if(BIT_B(word2))
-				m68ki_exception_trap(EXCEPTION_CHK);
-			return;
-		}
+			compare = (int32)(int16)compare;
+ 		FLAG_Z = !((upper_bound==compare) || (lower_bound==compare));  // JFF: | => ||
+
+    FLAG_C = (lower_bound <= upper_bound ? compare < lower_bound || compare > upper_bound : compare > upper_bound || compare < lower_bound) << 8;
 
-		if(!BIT_F(word2))
-			FLAG_C = MAKE_INT_16(upper_bound) - MAKE_INT_16(compare);
-		else
-			FLAG_C = upper_bound - compare;
-		FLAG_C = CFLAG_16(FLAG_C);
 		if(COND_CS() && BIT_B(word2))
 				m68ki_exception_trap(EXCEPTION_CHK);
 		return;
@@ -3638,29 +3681,17 @@ M68KMAKE_OP(chk2cmp2, 16, ., pcix)
 	if(CPU_TYPE_IS_EC020_PLUS(CPU_TYPE))
 	{
 		uint word2 = OPER_I_16();
-		uint compare = REG_DA[(word2 >> 12) & 15]&0xffff;
+		sint compare = REG_DA[(word2 >> 12) & 15]&0xffff;
 		uint ea = EA_PCIX_16();
-		uint lower_bound = m68ki_read_pcrel_16(ea);
-		uint upper_bound = m68ki_read_pcrel_16(ea + 2);
+		sint lower_bound = (int16)m68ki_read_pcrel_16(ea);
+		sint upper_bound = (int16)m68ki_read_pcrel_16(ea + 2);
 
 		if(!BIT_F(word2))
-			FLAG_C = MAKE_INT_16(compare) - MAKE_INT_16(lower_bound);
-		else
-			FLAG_C = compare - lower_bound;
-		FLAG_Z = !((upper_bound==compare) | (lower_bound==compare));
-		FLAG_C = CFLAG_16(FLAG_C);
-		if(COND_CS())
-		{
-			if(BIT_B(word2))
-				m68ki_exception_trap(EXCEPTION_CHK);
-			return;
-		}
+			compare = (int32)(int16)compare;
+ 		FLAG_Z = !((upper_bound==compare) || (lower_bound==compare));  // JFF: | => ||
+
+    FLAG_C = (lower_bound <= upper_bound ? compare < lower_bound || compare > upper_bound : compare > upper_bound || compare < lower_bound) << 8;
 
-		if(!BIT_F(word2))
-			FLAG_C = MAKE_INT_16(upper_bound) - MAKE_INT_16(compare);
-		else
-			FLAG_C = upper_bound - compare;
-		FLAG_C = CFLAG_16(FLAG_C);
 		if(COND_CS() && BIT_B(word2))
 				m68ki_exception_trap(EXCEPTION_CHK);
 		return;
@@ -3674,30 +3705,17 @@ M68KMAKE_OP(chk2cmp2, 16, ., .)
 	if(CPU_TYPE_IS_EC020_PLUS(CPU_TYPE))
 	{
 		uint word2 = OPER_I_16();
-		uint compare = REG_DA[(word2 >> 12) & 15]&0xffff;
+		sint compare = REG_DA[(word2 >> 12) & 15]&0xffff;
 		uint ea = M68KMAKE_GET_EA_AY_16;
-		uint lower_bound = m68ki_read_16(ea);
-		uint upper_bound = m68ki_read_16(ea + 2);
+		sint lower_bound = (int16)m68ki_read_16(ea);
+		sint upper_bound = (int16)m68ki_read_16(ea + 2);
 
 		if(!BIT_F(word2))
-			FLAG_C = MAKE_INT_16(compare) - MAKE_INT_16(lower_bound);
-		else
-			FLAG_C = compare - lower_bound;
+			compare = (int32)(int16)compare;
+ 		FLAG_Z = !((upper_bound==compare) || (lower_bound==compare));  // JFF: | => ||
 
-		FLAG_Z = !((upper_bound==compare) | (lower_bound==compare));
-		FLAG_C = CFLAG_16(FLAG_C);
-		if(COND_CS())
-		{
-			if(BIT_B(word2))
-				m68ki_exception_trap(EXCEPTION_CHK);
-			return;
-		}
-		if(!BIT_F(word2))
-			FLAG_C = MAKE_INT_16(upper_bound) - MAKE_INT_16(compare);
-		else
-			FLAG_C = upper_bound - compare;
+    FLAG_C = (lower_bound <= upper_bound ? compare < lower_bound || compare > upper_bound : compare > upper_bound || compare < lower_bound) << 8;
 
-		FLAG_C = CFLAG_16(FLAG_C);
 		if(COND_CS() && BIT_B(word2))
 				m68ki_exception_trap(EXCEPTION_CHK);
 		return;
@@ -3711,23 +3729,15 @@ M68KMAKE_OP(chk2cmp2, 32, ., pcdi)
 	if(CPU_TYPE_IS_EC020_PLUS(CPU_TYPE))
 	{
 		uint word2 = OPER_I_16();
-		uint compare = REG_DA[(word2 >> 12) & 15];
+		sint compare = REG_DA[(word2 >> 12) & 15];
 		uint ea = EA_PCDI_32();
-		uint lower_bound = m68ki_read_pcrel_32(ea);
-		uint upper_bound = m68ki_read_pcrel_32(ea + 4);
+		sint lower_bound = m68ki_read_pcrel_32(ea);
+		sint upper_bound = m68ki_read_pcrel_32(ea + 4);
 
-		FLAG_C = compare - lower_bound;
-		FLAG_Z = !((upper_bound==compare) | (lower_bound==compare));
-		FLAG_C = CFLAG_SUB_32(lower_bound, compare, FLAG_C);
-		if(COND_CS())
-		{
-			if(BIT_B(word2))
-				m68ki_exception_trap(EXCEPTION_CHK);
-			return;
-		}
+		FLAG_Z = !((upper_bound==compare) || (lower_bound==compare));  // JFF: | => ||
 
-		FLAG_C = upper_bound - compare;
-		FLAG_C = CFLAG_SUB_32(compare, upper_bound, FLAG_C);
+    FLAG_C = (lower_bound <= upper_bound ? compare < lower_bound || compare > upper_bound : compare > upper_bound || compare < lower_bound) << 8;
+    
 		if(COND_CS() && BIT_B(word2))
 				m68ki_exception_trap(EXCEPTION_CHK);
 		return;
@@ -3741,23 +3751,15 @@ M68KMAKE_OP(chk2cmp2, 32, ., pcix)
 	if(CPU_TYPE_IS_EC020_PLUS(CPU_TYPE))
 	{
 		uint word2 = OPER_I_16();
-		uint compare = REG_DA[(word2 >> 12) & 15];
+		sint compare = REG_DA[(word2 >> 12) & 15];
 		uint ea = EA_PCIX_32();
-		uint lower_bound = m68ki_read_pcrel_32(ea);
-		uint upper_bound = m68ki_read_pcrel_32(ea + 4);
+		sint lower_bound = m68ki_read_32(ea);
+		sint upper_bound = m68ki_read_32(ea + 4);
 
-		FLAG_C = compare - lower_bound;
-		FLAG_Z = !((upper_bound==compare) | (lower_bound==compare));
-		FLAG_C = CFLAG_SUB_32(lower_bound, compare, FLAG_C);
-		if(COND_CS())
-		{
-			if(BIT_B(word2))
-				m68ki_exception_trap(EXCEPTION_CHK);
-			return;
-		}
+		FLAG_Z = !((upper_bound==compare) || (lower_bound==compare));  // JFF: | => ||
 
-		FLAG_C = upper_bound - compare;
-		FLAG_C = CFLAG_SUB_32(compare, upper_bound, FLAG_C);
+    FLAG_C = (lower_bound <= upper_bound ? compare < lower_bound || compare > upper_bound : compare > upper_bound || compare < lower_bound) << 8;
+    
 		if(COND_CS() && BIT_B(word2))
 				m68ki_exception_trap(EXCEPTION_CHK);
 		return;
@@ -3769,25 +3771,18 @@ M68KMAKE_OP(chk2cmp2, 32, ., pcix)
 M68KMAKE_OP(chk2cmp2, 32, ., .)
 {
 	if(CPU_TYPE_IS_EC020_PLUS(CPU_TYPE))
-	{
+	{   
 		uint word2 = OPER_I_16();
-		uint compare = REG_DA[(word2 >> 12) & 15];
+		// JFF changed the logic. chk2/cmp2 uses signed values, not unsigned
+		sint compare = REG_DA[(word2 >> 12) & 15];
 		uint ea = M68KMAKE_GET_EA_AY_32;
-		uint lower_bound = m68ki_read_32(ea);
-		uint upper_bound = m68ki_read_32(ea + 4);
+		sint lower_bound = m68ki_read_32(ea);
+		sint upper_bound = m68ki_read_32(ea + 4);
 
-		FLAG_C = compare - lower_bound;
-		FLAG_Z = !((upper_bound==compare) | (lower_bound==compare));
-		FLAG_C = CFLAG_SUB_32(lower_bound, compare, FLAG_C);
-		if(COND_CS())
-		{
-			if(BIT_B(word2))
-				m68ki_exception_trap(EXCEPTION_CHK);
-			return;
-		}
+		FLAG_Z = !((upper_bound==compare) || (lower_bound==compare));  // JFF: | => ||
 
-		FLAG_C = upper_bound - compare;
-		FLAG_C = CFLAG_SUB_32(compare, upper_bound, FLAG_C);
+    FLAG_C = (lower_bound <= upper_bound ? compare < lower_bound || compare > upper_bound : compare > upper_bound || compare < lower_bound) << 8;
+    
 		if(COND_CS() && BIT_B(word2))
 				m68ki_exception_trap(EXCEPTION_CHK);
 		return;
@@ -4174,6 +4169,7 @@ M68KMAKE_OP(cmpi, 32, ., d)
 	uint dst = DY;
 	uint res = dst - src;
 
+	m68ki_cmpild_callback(src, REG_IR & 7);		   /* auto-disable (see m68kcpu.h) */
 	FLAG_N = NFLAG_32(res);
 	FLAG_Z = MASK_OUT_ABOVE_32(res);
 	FLAG_V = VFLAG_SUB_32(src, dst, res);
@@ -4367,6 +4363,8 @@ M68KMAKE_OP(cptrapcc, 32, ., .)
 		M68K_DO_LOG((M68K_LOG_FILEHANDLE "%s at %08x: called unimplemented instruction %04x (%s)\n",
 					 m68ki_cpu_names[CPU_TYPE], ADDRESS_68K(REG_PC - 2), REG_IR,
 					 m68k_disassemble_quick(ADDRESS_68K(REG_PC - 2))));
+    // JFF: unsupported, but at least if the trap doesn't occur, app should still work, so at least PC increase is correct
+    REG_PC += 4;  
 		return;
 	}
 	m68ki_exception_1111();
@@ -5127,7 +5125,7 @@ M68KMAKE_OP(eori, 32, ., .)
 }
 
 
-M68KMAKE_OP(eori, 8, toc, .)
+M68KMAKE_OP(eori, 16, toc, .)
 {
 	m68ki_set_ccr(m68ki_get_ccr() ^ OPER_I_8());
 }
@@ -5302,6 +5300,9 @@ M68KMAKE_OP(lsr, 8, s, .)
 	uint src = MASK_OUT_ABOVE_8(*r_dst);
 	uint res = src >> shift;
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	*r_dst = MASK_OUT_BELOW_8(*r_dst) | res;
 
 	FLAG_N = NFLAG_CLEAR;
@@ -5318,6 +5319,9 @@ M68KMAKE_OP(lsr, 16, s, .)
 	uint src = MASK_OUT_ABOVE_16(*r_dst);
 	uint res = src >> shift;
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	*r_dst = MASK_OUT_BELOW_16(*r_dst) | res;
 
 	FLAG_N = NFLAG_CLEAR;
@@ -5334,6 +5338,9 @@ M68KMAKE_OP(lsr, 32, s, .)
 	uint src = *r_dst;
 	uint res = src >> shift;
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	*r_dst = res;
 
 	FLAG_N = NFLAG_CLEAR;
@@ -5475,6 +5482,9 @@ M68KMAKE_OP(lsl, 8, s, .)
 	uint src = MASK_OUT_ABOVE_8(*r_dst);
 	uint res = MASK_OUT_ABOVE_8(src << shift);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	*r_dst = MASK_OUT_BELOW_8(*r_dst) | res;
 
 	FLAG_N = NFLAG_8(res);
@@ -5491,6 +5501,9 @@ M68KMAKE_OP(lsl, 16, s, .)
 	uint src = MASK_OUT_ABOVE_16(*r_dst);
 	uint res = MASK_OUT_ABOVE_16(src << shift);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	*r_dst = MASK_OUT_BELOW_16(*r_dst) | res;
 
 	FLAG_N = NFLAG_16(res);
@@ -5507,6 +5520,9 @@ M68KMAKE_OP(lsl, 32, s, .)
 	uint src = *r_dst;
 	uint res = MASK_OUT_ABOVE_32(src << shift);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	*r_dst = res;
 
 	FLAG_N = NFLAG_32(res);
@@ -6388,7 +6404,8 @@ M68KMAKE_OP(move, 32, pd, d)
 	uint res = DY;
 	uint ea = EA_AX_PD_32();
 
-	m68ki_write_32(ea, res);
+	m68ki_write_16(ea+2, res & 0xFFFF );
+	m68ki_write_16(ea, (res >> 16) & 0xFFFF );
 
 	FLAG_N = NFLAG_32(res);
 	FLAG_Z = res;
@@ -6402,7 +6419,8 @@ M68KMAKE_OP(move, 32, pd, a)
 	uint res = AY;
 	uint ea = EA_AX_PD_32();
 
-	m68ki_write_32(ea, res);
+	m68ki_write_16(ea+2, res & 0xFFFF );
+	m68ki_write_16(ea, (res >> 16) & 0xFFFF );
 
 	FLAG_N = NFLAG_32(res);
 	FLAG_Z = res;
@@ -6416,7 +6434,8 @@ M68KMAKE_OP(move, 32, pd, .)
 	uint res = M68KMAKE_GET_OPER_AY_32;
 	uint ea = EA_AX_PD_32();
 
-	m68ki_write_32(ea, res);
+	m68ki_write_16(ea+2, res & 0xFFFF );
+	m68ki_write_16(ea, (res >> 16) & 0xFFFF );
 
 	FLAG_N = NFLAG_32(res);
 	FLAG_Z = res;
@@ -6787,6 +6806,70 @@ M68KMAKE_OP(movec, 32, cr, .)
 				}
 				m68ki_exception_illegal();
 				return;
+			case 0x003:				/* TC */
+				if(CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
+			case 0x004:				/* ITT0 */
+				if(CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
+			case 0x005:				/* ITT1 */
+				if(CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
+			case 0x006:				/* DTT0 */
+				if(CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
+			case 0x007:				/* DTT1 */
+				if(CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
+			case 0x805:				/* MMUSR */
+				if(CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
+			case 0x806:				/* URP */
+				if(CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
+			case 0x807:				/* SRP */
+				if(CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
 			default:
 				m68ki_exception_illegal();
 				return;
@@ -6817,9 +6900,22 @@ M68KMAKE_OP(movec, 32, rc, .)
 				REG_DFC = REG_DA[(word2 >> 12) & 15] & 7;
 				return;
 			case 0x002:			   /* CACR */
+				/* Only EC020 and later have CACR */
 				if(CPU_TYPE_IS_EC020_PLUS(CPU_TYPE))
 				{
-					REG_CACR = REG_DA[(word2 >> 12) & 15];
+					/* 68030 can write all bits except 5-7, 040 can write all */
+					if (CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+					{
+						REG_CACR = REG_DA[(word2 >> 12) & 15];
+					}
+					else if (CPU_TYPE_IS_030_PLUS(CPU_TYPE))
+					{
+						REG_CACR = REG_DA[(word2 >> 12) & 15] & 0xff1f;
+					}
+					else
+					{
+						REG_CACR = REG_DA[(word2 >> 12) & 15] & 0x0f;
+					}
 					return;
 				}
 				m68ki_exception_illegal();
@@ -6865,6 +6961,70 @@ M68KMAKE_OP(movec, 32, rc, .)
 				}
 				m68ki_exception_illegal();
 				return;
+			case 0x003:			/* TC */
+				if (CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
+			case 0x004:			/* ITT0 */
+				if (CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
+			case 0x005:			/* ITT1 */
+				if (CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
+			case 0x006:			/* DTT0 */
+				if (CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
+			case 0x007:			/* DTT1 */
+				if (CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
+			case 0x805:			/* MMUSR */
+				if (CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
+			case 0x806:			/* URP */
+				if (CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
+			case 0x807:			/* SRP */
+				if (CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
 			default:
 				m68ki_exception_illegal();
 				return;
@@ -6927,7 +7087,8 @@ M68KMAKE_OP(movem, 32, re, pd)
 		if(register_list & (1 << i))
 		{
 			ea -= 4;
-			m68ki_write_32(ea, REG_DA[15-i]);
+			m68ki_write_16(ea+2, REG_DA[15-i] & 0xFFFF );
+			m68ki_write_16(ea, (REG_DA[15-i] >> 16) & 0xFFFF );
 			count++;
 		}
 	AY = ea;
@@ -7260,6 +7421,22 @@ M68KMAKE_OP(moveq, 32, ., .)
 }
 
 
+M68KMAKE_OP(move16, 32, ., .)
+{
+	uint16 w2 = OPER_I_16();
+	int ax = REG_IR & 7;
+	int ay = (w2 >> 12) & 7;
+
+	m68ki_write_32(REG_A[ay],    m68ki_read_32(REG_A[ax]));
+	m68ki_write_32(REG_A[ay]+4,  m68ki_read_32(REG_A[ax]+4));
+	m68ki_write_32(REG_A[ay]+8,  m68ki_read_32(REG_A[ax]+8));
+	m68ki_write_32(REG_A[ay]+12, m68ki_read_32(REG_A[ax]+12));
+
+	REG_A[ax] += 16;
+	REG_A[ay] += 16;
+}
+
+
 M68KMAKE_OP(muls, 16, ., d)
 {
 	uint* r_dst = &DX;
@@ -8090,7 +8267,7 @@ M68KMAKE_OP(ori, 32, ., .)
 }
 
 
-M68KMAKE_OP(ori, 8, toc, .)
+M68KMAKE_OP(ori, 16, toc, .)
 {
 	m68ki_set_ccr(m68ki_get_ccr() | OPER_I_8());
 }
@@ -8198,6 +8375,27 @@ M68KMAKE_OP(pea, 32, ., .)
 	m68ki_push_32(ea);
 }
 
+M68KMAKE_OP(pflush, 32, ., .)
+{
+	if ((CPU_TYPE_IS_EC020_PLUS(CPU_TYPE)) && (HAS_PMMU))
+	{
+		fprintf(stderr,"68040: unhandled PFLUSH\n");
+		return;
+	}
+	m68ki_exception_1111();
+}
+
+M68KMAKE_OP(pmmu, 32, ., .)
+{
+	if ((CPU_TYPE_IS_EC020_PLUS(CPU_TYPE)) && (HAS_PMMU))
+	{
+		m68881_mmu_ops();
+	}
+	else
+	{
+		m68ki_exception_1111();
+	}
+}
 
 M68KMAKE_OP(reset, 0, ., .)
 {
@@ -8219,6 +8417,9 @@ M68KMAKE_OP(ror, 8, s, .)
 	uint src = MASK_OUT_ABOVE_8(*r_dst);
 	uint res = ROR_8(src, shift);
 
+	if(orig_shift != 0)
+		USE_CYCLES(orig_shift<<CYC_SHIFT);
+
 	*r_dst = MASK_OUT_BELOW_8(*r_dst) | res;
 
 	FLAG_N = NFLAG_8(res);
@@ -8235,6 +8436,9 @@ M68KMAKE_OP(ror, 16, s, .)
 	uint src = MASK_OUT_ABOVE_16(*r_dst);
 	uint res = ROR_16(src, shift);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	*r_dst = MASK_OUT_BELOW_16(*r_dst) | res;
 
 	FLAG_N = NFLAG_16(res);
@@ -8251,6 +8455,9 @@ M68KMAKE_OP(ror, 32, s, .)
 	uint64 src = *r_dst;
 	uint res = ROR_32(src, shift);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	*r_dst = res;
 
 	FLAG_N = NFLAG_32(res);
@@ -8364,6 +8571,9 @@ M68KMAKE_OP(rol, 8, s, .)
 	uint src = MASK_OUT_ABOVE_8(*r_dst);
 	uint res = ROL_8(src, shift);
 
+	if(orig_shift != 0)
+		USE_CYCLES(orig_shift<<CYC_SHIFT);
+
 	*r_dst = MASK_OUT_BELOW_8(*r_dst) | res;
 
 	FLAG_N = NFLAG_8(res);
@@ -8380,6 +8590,9 @@ M68KMAKE_OP(rol, 16, s, .)
 	uint src = MASK_OUT_ABOVE_16(*r_dst);
 	uint res = ROL_16(src, shift);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	*r_dst = MASK_OUT_BELOW_16(*r_dst) | res;
 
 	FLAG_N = NFLAG_16(res);
@@ -8396,6 +8609,9 @@ M68KMAKE_OP(rol, 32, s, .)
 	uint64 src = *r_dst;
 	uint res = ROL_32(src, shift);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	*r_dst = res;
 
 	FLAG_N = NFLAG_32(res);
@@ -8489,7 +8705,7 @@ M68KMAKE_OP(rol, 32, r, .)
 
 		*r_dst = res;
 
-		FLAG_C = (src >> (32 - shift)) << 8;
+		FLAG_C = (src >> ((32 - shift) & 0x1f)) << 8;
 		FLAG_N = NFLAG_32(res);
 		FLAG_Z = res;
 		FLAG_V = VFLAG_CLEAR;
@@ -8525,6 +8741,9 @@ M68KMAKE_OP(roxr, 8, s, .)
 	uint src = MASK_OUT_ABOVE_8(*r_dst);
 	uint res = ROR_9(src | (XFLAG_AS_1() << 8), shift);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	FLAG_C = FLAG_X = res;
 	res = MASK_OUT_ABOVE_8(res);
 
@@ -8543,6 +8762,9 @@ M68KMAKE_OP(roxr, 16, s, .)
 	uint src = MASK_OUT_ABOVE_16(*r_dst);
 	uint res = ROR_17(src | (XFLAG_AS_1() << 16), shift);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	FLAG_C = FLAG_X = res >> 8;
 	res = MASK_OUT_ABOVE_16(res);
 
@@ -8563,6 +8785,9 @@ M68KMAKE_OP(roxr, 32, s, .)
 	uint64 src   = *r_dst;
 	uint64 res   = src | (((uint64)XFLAG_AS_1()) << 32);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	res = ROR_33_64(res, shift);
 
 	FLAG_C = FLAG_X = res >> 24;
@@ -8582,6 +8807,9 @@ M68KMAKE_OP(roxr, 32, s, .)
 	uint res = MASK_OUT_ABOVE_32((ROR_33(src, shift) & ~(1 << (32 - shift))) | (XFLAG_AS_1() << (32 - shift)));
 	uint new_x_flag = src & (1 << (shift - 1));
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	*r_dst = res;
 
 	FLAG_C = FLAG_X = (new_x_flag != 0)<<8;
@@ -8737,6 +8965,9 @@ M68KMAKE_OP(roxl, 8, s, .)
 	uint src = MASK_OUT_ABOVE_8(*r_dst);
 	uint res = ROL_9(src | (XFLAG_AS_1() << 8), shift);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	FLAG_C = FLAG_X = res;
 	res = MASK_OUT_ABOVE_8(res);
 
@@ -8755,6 +8986,9 @@ M68KMAKE_OP(roxl, 16, s, .)
 	uint src = MASK_OUT_ABOVE_16(*r_dst);
 	uint res = ROL_17(src | (XFLAG_AS_1() << 16), shift);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	FLAG_C = FLAG_X = res >> 8;
 	res = MASK_OUT_ABOVE_16(res);
 
@@ -8775,6 +9009,9 @@ M68KMAKE_OP(roxl, 32, s, .)
 	uint64 src   = *r_dst;
 	uint64 res   = src | (((uint64)XFLAG_AS_1()) << 32);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	res = ROL_33_64(res, shift);
 
 	FLAG_C = FLAG_X = res >> 24;
@@ -8794,6 +9031,9 @@ M68KMAKE_OP(roxl, 32, s, .)
 	uint res = MASK_OUT_ABOVE_32((ROL_33(src, shift) & ~(1 << (shift - 1))) | (XFLAG_AS_1() << (shift - 1)));
 	uint new_x_flag = src & (1 << (32 - shift));
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	*r_dst = res;
 
 	FLAG_C = FLAG_X = (new_x_flag != 0)<<8;
@@ -8966,6 +9206,7 @@ M68KMAKE_OP(rte, 32, ., .)
 		uint new_pc;
 		uint format_word;
 
+		m68ki_rte_callback();		   /* auto-disable (see m68kcpu.h) */
 		m68ki_trace_t0();			   /* auto-disable (see m68kcpu.h) */
 
 		if(CPU_TYPE_IS_000(CPU_TYPE))
@@ -8994,10 +9235,36 @@ M68KMAKE_OP(rte, 32, ., .)
 				CPU_INSTR_MODE = INSTRUCTION_YES;
 				CPU_RUN_MODE = RUN_MODE_NORMAL;
 				return;
+			} else if (format_word == 8) {
+				/* Format 8 stack frame -- 68010 only. 29 word bus/address error */
+				new_sr = m68ki_pull_16();
+				new_pc = m68ki_pull_32();
+				m68ki_fake_pull_16();	/* format word */
+				m68ki_fake_pull_16();	/* special status word */
+				m68ki_fake_pull_32();	/* fault address */
+				m68ki_fake_pull_16();	/* unused/reserved */
+				m68ki_fake_pull_16();	/* data output buffer */
+				m68ki_fake_pull_16();	/* unused/reserved */
+				m68ki_fake_pull_16();	/* data input buffer */
+				m68ki_fake_pull_16();	/* unused/reserved */
+				m68ki_fake_pull_16();	/* instruction input buffer */
+				m68ki_fake_pull_32();	/* internal information, 16 words */
+				m68ki_fake_pull_32();	/* (actually, we use 8 DWORDs) */
+				m68ki_fake_pull_32();
+				m68ki_fake_pull_32();
+				m68ki_fake_pull_32();
+				m68ki_fake_pull_32();
+				m68ki_fake_pull_32();
+				m68ki_fake_pull_32();
+				m68ki_jump(new_pc);
+				m68ki_set_sr(new_sr);
+				CPU_INSTR_MODE = INSTRUCTION_YES;
+				CPU_RUN_MODE = RUN_MODE_NORMAL;
+				return;
 			}
 			CPU_INSTR_MODE = INSTRUCTION_YES;
 			CPU_RUN_MODE = RUN_MODE_NORMAL;
-			/* Not handling bus fault (9) */
+			/* Not handling other exception types (9) */
 			m68ki_exception_format_error();
 			return;
 		}
@@ -9456,8 +9723,8 @@ M68KMAKE_OP(suba, 16, ., a)
 
 M68KMAKE_OP(suba, 16, ., .)
 {
-	signed short src = MAKE_INT_16(M68KMAKE_GET_OPER_AY_16);
 	uint* r_dst = &AX;
+	uint src = MAKE_INT_16(M68KMAKE_GET_OPER_AY_16);
 
 	*r_dst = MASK_OUT_ABOVE_32(*r_dst - src);
 }
@@ -9888,12 +10155,20 @@ M68KMAKE_OP(tas, 8, ., .)
 {
 	uint ea = M68KMAKE_GET_EA_AY_8;
 	uint dst = m68ki_read_8(ea);
+	uint allow_writeback;
 
 	FLAG_Z = dst;
 	FLAG_N = NFLAG_8(dst);
 	FLAG_V = VFLAG_CLEAR;
 	FLAG_C = CFLAG_CLEAR;
-	m68ki_write_8(ea, dst | 0x80);
+
+	/* The Genesis/Megadrive games Gargoyles and Ex-Mutants need the TAS writeback
+       disabled in order to function properly.  Some Amiga software may also rely
+       on this, but only when accessing specific addresses so additional functionality
+       will be needed. */
+	allow_writeback = m68ki_tas_callback();
+
+	if (allow_writeback==1) m68ki_write_8(ea, dst | 0x80);
 }
 
 
@@ -9919,6 +10194,7 @@ M68KMAKE_OP(trapt, 16, ., .)
 {
 	if(CPU_TYPE_IS_EC020_PLUS(CPU_TYPE))
 	{
+        REG_PC += 2; // JFF else stackframe & return addresses are incorrect
 		m68ki_exception_trap(EXCEPTION_TRAPV);	/* HJB 990403 */
 		return;
 	}
@@ -9930,6 +10206,7 @@ M68KMAKE_OP(trapt, 32, ., .)
 {
 	if(CPU_TYPE_IS_EC020_PLUS(CPU_TYPE))
 	{
+        REG_PC += 4; // JFF else stackframe & return addresses are incorrect
 		m68ki_exception_trap(EXCEPTION_TRAPV);	/* HJB 990403 */
 		return;
 	}
@@ -9985,12 +10262,13 @@ M68KMAKE_OP(trapcc, 16, ., .)
 {
 	if(CPU_TYPE_IS_EC020_PLUS(CPU_TYPE))
 	{
+     	REG_PC += 2;  /* JFF increase before or 1) stackframe is incorrect 2) RTE address is wrong if trap is taken */
 		if(M68KMAKE_CC)
 		{
 			m68ki_exception_trap(EXCEPTION_TRAPV);	/* HJB 990403 */
 			return;
 		}
-		REG_PC += 2;
+
 		return;
 	}
 	m68ki_exception_illegal();
@@ -10001,12 +10279,12 @@ M68KMAKE_OP(trapcc, 32, ., .)
 {
 	if(CPU_TYPE_IS_EC020_PLUS(CPU_TYPE))
 	{
+		REG_PC += 4;  /* JFF increase before or 1) stackframe is incorrect 2) RTE address is wrong if trap is taken */
 		if(M68KMAKE_CC)
 		{
 			m68ki_exception_trap(EXCEPTION_TRAPV);	/* HJB 990403 */
 			return;
 		}
-		REG_PC += 4;
 		return;
 	}
 	m68ki_exception_illegal();
diff --git a/plat/linux68k/emu/musashi/m68kconf.h b/plat/linux68k/emu/musashi/m68kconf.h
index 7edd073aa..8844952df 100755
--- a/plat/linux68k/emu/musashi/m68kconf.h
+++ b/plat/linux68k/emu/musashi/m68kconf.h
@@ -3,10 +3,10 @@
 /* ======================================================================== */
 /*
  *                                  MUSASHI
- *                                Version 3.4
+ *                                Version 3.32
  *
  * A portable Motorola M680x0 processor emulation engine.
- * Copyright 1998-2001 Karl Stenerud.  All rights reserved.
+ * Copyright Karl Stenerud.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -52,7 +52,7 @@
  * to OPT_ON and use m68kmame.h to configure the 68k core.
  */
 #ifndef M68K_COMPILE_FOR_MAME
-#define M68K_COMPILE_FOR_MAME      OPT_ON
+#define M68K_COMPILE_FOR_MAME      OPT_OFF
 #endif /* M68K_COMPILE_FOR_MAME */
 
 
@@ -67,6 +67,8 @@
 #define M68K_EMULATE_010            OPT_ON
 #define M68K_EMULATE_EC020          OPT_ON
 #define M68K_EMULATE_020            OPT_ON
+#define M68K_EMULATE_030            OPT_ON
+#define M68K_EMULATE_040            OPT_ON
 
 
 /* If ON, the CPU will call m68k_read_immediate_xx() for immediate addressing
@@ -109,6 +111,35 @@
 #define M68K_EMULATE_RESET          OPT_OFF
 #define M68K_RESET_CALLBACK()       your_reset_handler_function()
 
+/* If ON, CPU will call the callback when it encounters a cmpi.l #v, dn
+ * instruction.
+ */
+#define M68K_CMPILD_HAS_CALLBACK     OPT_OFF
+#define M68K_CMPILD_CALLBACK(v,r)    your_cmpild_handler_function(v,r)
+
+
+/* If ON, CPU will call the callback when it encounters a rte
+ * instruction.
+ */
+#define M68K_RTE_HAS_CALLBACK       OPT_OFF
+#define M68K_RTE_CALLBACK()         your_rte_handler_function()
+
+/* If ON, CPU will call the callback when it encounters a tas
+ * instruction.
+ */
+#define M68K_TAS_HAS_CALLBACK       OPT_OFF
+#define M68K_TAS_CALLBACK()         your_tas_handler_function()
+
+/* If ON, CPU will call the callback when it encounters an illegal instruction
+ * passing the opcode as argument. If the callback returns 1, then it's considered
+ * as a normal instruction, and the illegal exception in canceled. If it returns 0,
+ * the exception occurs normally.
+ * The callback looks like int callback(int opcode)
+ * You should put OPT_SPECIFY_HANDLER here if you cant to use it, otherwise it will
+ * use a dummy default handler and you'll have to call m68k_set_illg_instr_callback explicitely
+ */
+#define M68K_ILLG_HAS_CALLBACK	    OPT_OFF
+#define M68K_ILLG_CALLBACK(opcode)  op_illg(opcode)
 
 /* If ON, CPU will call the set fc callback on every memory access to
  * differentiate between user/supervisor, program/data access like a real
@@ -119,7 +150,6 @@
 #define M68K_EMULATE_FC             OPT_OFF
 #define M68K_SET_FC_CALLBACK(A)     your_set_fc_handler_function(A)
 
-
 /* If ON, CPU will call the pc changed callback when it changes the PC by a
  * large value.  This allows host programs to be nicer when it comes to
  * fetching immediate data and instructions on a banked memory system.
@@ -132,7 +162,7 @@
  * instruction.
  */
 #define M68K_INSTRUCTION_HOOK       OPT_OFF
-#define M68K_INSTRUCTION_CALLBACK() your_instruction_hook_function()
+#define M68K_INSTRUCTION_CALLBACK(pc) your_instruction_hook_function(pc)
 
 
 /* If ON, the CPU will emulate the 4-byte prefetch queue of a real 68000 */
@@ -154,6 +184,9 @@
 #define M68K_LOG_1010_1111          OPT_OFF
 #define M68K_LOG_FILEHANDLE         some_file_handle
 
+/* Emulate PMMU : if you enable this, there will be a test to see if the current chip has some enabled pmmu added to every memory access,
+ * so enable this only if it's useful */
+#define M68K_EMULATE_PMMU   OPT_ON
 
 /* ----------------------------- COMPATIBILITY ---------------------------- */
 
@@ -165,21 +198,11 @@
 /* If ON, the enulation core will use 64-bit integers to speed up some
  * operations.
 */
-#define M68K_USE_64_BIT  OPT_OFF
+#define M68K_USE_64_BIT  OPT_ON
 
 
-/* Set to your compiler's static inline keyword to enable it, or
- * set it to blank to disable it.
- * If you define INLINE in the makefile, it will override this value.
- * NOTE: not enabling inline functions will SEVERELY slow down emulation.
- */
-#ifndef INLINE
-#define INLINE static __inline__
-#endif /* INLINE */
-
 #endif /* M68K_COMPILE_FOR_MAME */
 
-
 /* ======================================================================== */
 /* ============================== END OF FILE ============================= */
 /* ======================================================================== */
diff --git a/plat/linux68k/emu/musashi/m68kcpu.c b/plat/linux68k/emu/musashi/m68kcpu.c
index 89820c0f0..040980abd 100755
--- a/plat/linux68k/emu/musashi/m68kcpu.c
+++ b/plat/linux68k/emu/musashi/m68kcpu.c
@@ -3,10 +3,10 @@
 /* ======================================================================== */
 /*
  *                                  MUSASHI
- *                                Version 3.4
+ *                                Version 4.60
  *
  * A portable Motorola M680x0 processor emulation engine.
- * Copyright 1998-2001 Karl Stenerud.  All rights reserved.
+ * Copyright Karl Stenerud.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -38,9 +38,19 @@
 /* ================================ INCLUDES ============================== */
 /* ======================================================================== */
 
+extern void m68040_fpu_op0(void);
+extern void m68040_fpu_op1(void);
+extern void m68881_mmu_ops();
+extern unsigned char m68ki_cycles[][0x10000];
+extern void (*m68ki_instruction_jump_table[0x10000])(void); /* opcode handler jump table */
+extern void m68ki_build_opcode_table(void);
+
 #include "m68kops.h"
 #include "m68kcpu.h"
 
+#include "m68kfpu.c"
+#include "m68kmmu.h" // uses some functions from m68kfpu.c which are static !
+
 /* ======================================================================== */
 /* ================================= DATA ================================= */
 /* ======================================================================== */
@@ -51,7 +61,7 @@ uint m68ki_tracing = 0;
 uint m68ki_address_space;
 
 #ifdef M68K_LOG_ENABLE
-char* m68ki_cpu_names[9] =
+const char *const m68ki_cpu_names[] =
 {
 	"Invalid CPU",
 	"M68000",
@@ -69,15 +79,21 @@ char* m68ki_cpu_names[9] =
 m68ki_cpu_core m68ki_cpu = {0};
 
 #if M68K_EMULATE_ADDRESS_ERROR
+#ifdef _BSD_SETJMP_H
+sigjmp_buf m68ki_aerr_trap;
+#else
 jmp_buf m68ki_aerr_trap;
+#endif
 #endif /* M68K_EMULATE_ADDRESS_ERROR */
 
 uint    m68ki_aerr_address;
 uint    m68ki_aerr_write_mode;
 uint    m68ki_aerr_fc;
 
+jmp_buf m68ki_bus_error_jmp_buf;
+
 /* Used by shift & rotate instructions */
-uint8 m68ki_shift_8_table[65] =
+const uint8 m68ki_shift_8_table[65] =
 {
 	0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff, 0xff, 0xff, 0xff,
 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
@@ -86,7 +102,7 @@ uint8 m68ki_shift_8_table[65] =
 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 	0xff, 0xff, 0xff, 0xff, 0xff
 };
-uint16 m68ki_shift_16_table[65] =
+const uint16 m68ki_shift_16_table[65] =
 {
 	0x0000, 0x8000, 0xc000, 0xe000, 0xf000, 0xf800, 0xfc00, 0xfe00, 0xff00,
 	0xff80, 0xffc0, 0xffe0, 0xfff0, 0xfff8, 0xfffc, 0xfffe, 0xffff, 0xffff,
@@ -97,7 +113,7 @@ uint16 m68ki_shift_16_table[65] =
 	0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
 	0xffff, 0xffff
 };
-uint m68ki_shift_32_table[65] =
+const uint m68ki_shift_32_table[65] =
 {
 	0x00000000, 0x80000000, 0xc0000000, 0xe0000000, 0xf0000000, 0xf8000000,
 	0xfc000000, 0xfe000000, 0xff000000, 0xff800000, 0xffc00000, 0xffe00000,
@@ -116,16 +132,16 @@ uint m68ki_shift_32_table[65] =
 /* Number of clock cycles to use for exception processing.
  * I used 4 for any vectors that are undocumented for processing times.
  */
-uint8 m68ki_exception_cycle_table[3][256] =
+const uint8 m68ki_exception_cycle_table[5][256] =
 {
 	{ /* 000 */
-		  4, /*  0: Reset - Initial Stack Pointer                      */
+		 40, /*  0: Reset - Initial Stack Pointer                      */
 		  4, /*  1: Reset - Initial Program Counter                    */
 		 50, /*  2: Bus Error                             (unemulated) */
 		 50, /*  3: Address Error                         (unemulated) */
 		 34, /*  4: Illegal Instruction                                */
-		 38, /*  5: Divide by Zero -- ASG: changed from 42             */
-		 40, /*  6: CHK -- ASG: chanaged from 44                       */
+		 38, /*  5: Divide by Zero                                     */
+		 40, /*  6: CHK                                                */
 		 34, /*  7: TRAPV                                              */
 		 34, /*  8: Privilege Violation                                */
 		 34, /*  9: Trace                                              */
@@ -151,7 +167,7 @@ uint8 m68ki_exception_cycle_table[3][256] =
 		 44, /* 29: Level 5 Interrupt Autovector                       */
 		 44, /* 30: Level 6 Interrupt Autovector                       */
 		 44, /* 31: Level 7 Interrupt Autovector                       */
-		 34, /* 32: TRAP #0 -- ASG: chanaged from 38                   */
+		 34, /* 32: TRAP #0                                            */
 		 34, /* 33: TRAP #1                                            */
 		 34, /* 34: TRAP #2                                            */
 		 34, /* 35: TRAP #3                                            */
@@ -192,7 +208,7 @@ uint8 m68ki_exception_cycle_table[3][256] =
 		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4
 	},
 	{ /* 010 */
-		  4, /*  0: Reset - Initial Stack Pointer                      */
+		 40, /*  0: Reset - Initial Stack Pointer                      */
 		  4, /*  1: Reset - Initial Program Counter                    */
 		126, /*  2: Bus Error                             (unemulated) */
 		126, /*  3: Address Error                         (unemulated) */
@@ -336,10 +352,156 @@ uint8 m68ki_exception_cycle_table[3][256] =
 		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
 		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
 		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4
+	},
+	{ /* 030 - not correct */
+		  4, /*  0: Reset - Initial Stack Pointer                      */
+		  4, /*  1: Reset - Initial Program Counter                    */
+		 50, /*  2: Bus Error                             (unemulated) */
+		 50, /*  3: Address Error                         (unemulated) */
+		 20, /*  4: Illegal Instruction                                */
+		 38, /*  5: Divide by Zero                                     */
+		 40, /*  6: CHK                                                */
+		 20, /*  7: TRAPV                                              */
+		 34, /*  8: Privilege Violation                                */
+		 25, /*  9: Trace                                              */
+		 20, /* 10: 1010                                               */
+		 20, /* 11: 1111                                               */
+		  4, /* 12: RESERVED                                           */
+		  4, /* 13: Coprocessor Protocol Violation        (unemulated) */
+		  4, /* 14: Format Error                                       */
+		 30, /* 15: Uninitialized Interrupt                            */
+		  4, /* 16: RESERVED                                           */
+		  4, /* 17: RESERVED                                           */
+		  4, /* 18: RESERVED                                           */
+		  4, /* 19: RESERVED                                           */
+		  4, /* 20: RESERVED                                           */
+		  4, /* 21: RESERVED                                           */
+		  4, /* 22: RESERVED                                           */
+		  4, /* 23: RESERVED                                           */
+		 30, /* 24: Spurious Interrupt                                 */
+		 30, /* 25: Level 1 Interrupt Autovector                       */
+		 30, /* 26: Level 2 Interrupt Autovector                       */
+		 30, /* 27: Level 3 Interrupt Autovector                       */
+		 30, /* 28: Level 4 Interrupt Autovector                       */
+		 30, /* 29: Level 5 Interrupt Autovector                       */
+		 30, /* 30: Level 6 Interrupt Autovector                       */
+		 30, /* 31: Level 7 Interrupt Autovector                       */
+		 20, /* 32: TRAP #0                                            */
+		 20, /* 33: TRAP #1                                            */
+		 20, /* 34: TRAP #2                                            */
+		 20, /* 35: TRAP #3                                            */
+		 20, /* 36: TRAP #4                                            */
+		 20, /* 37: TRAP #5                                            */
+		 20, /* 38: TRAP #6                                            */
+		 20, /* 39: TRAP #7                                            */
+		 20, /* 40: TRAP #8                                            */
+		 20, /* 41: TRAP #9                                            */
+		 20, /* 42: TRAP #10                                           */
+		 20, /* 43: TRAP #11                                           */
+		 20, /* 44: TRAP #12                                           */
+		 20, /* 45: TRAP #13                                           */
+		 20, /* 46: TRAP #14                                           */
+		 20, /* 47: TRAP #15                                           */
+		  4, /* 48: FP Branch or Set on Unknown Condition (unemulated) */
+		  4, /* 49: FP Inexact Result                     (unemulated) */
+		  4, /* 50: FP Divide by Zero                     (unemulated) */
+		  4, /* 51: FP Underflow                          (unemulated) */
+		  4, /* 52: FP Operand Error                      (unemulated) */
+		  4, /* 53: FP Overflow                           (unemulated) */
+		  4, /* 54: FP Signaling NAN                      (unemulated) */
+		  4, /* 55: FP Unimplemented Data Type            (unemulated) */
+		  4, /* 56: MMU Configuration Error               (unemulated) */
+		  4, /* 57: MMU Illegal Operation Error           (unemulated) */
+		  4, /* 58: MMU Access Level Violation Error      (unemulated) */
+		  4, /* 59: RESERVED                                           */
+		  4, /* 60: RESERVED                                           */
+		  4, /* 61: RESERVED                                           */
+		  4, /* 62: RESERVED                                           */
+		  4, /* 63: RESERVED                                           */
+		     /* 64-255: User Defined                                   */
+		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4
+	},
+	{ /* 040 */ // TODO: these values are not correct
+		  4, /*  0: Reset - Initial Stack Pointer                      */
+		  4, /*  1: Reset - Initial Program Counter                    */
+		 50, /*  2: Bus Error                             (unemulated) */
+		 50, /*  3: Address Error                         (unemulated) */
+		 20, /*  4: Illegal Instruction                                */
+		 38, /*  5: Divide by Zero                                     */
+		 40, /*  6: CHK                                                */
+		 20, /*  7: TRAPV                                              */
+		 34, /*  8: Privilege Violation                                */
+		 25, /*  9: Trace                                              */
+		 20, /* 10: 1010                                               */
+		 20, /* 11: 1111                                               */
+		  4, /* 12: RESERVED                                           */
+		  4, /* 13: Coprocessor Protocol Violation        (unemulated) */
+		  4, /* 14: Format Error                                       */
+		 30, /* 15: Uninitialized Interrupt                            */
+		  4, /* 16: RESERVED                                           */
+		  4, /* 17: RESERVED                                           */
+		  4, /* 18: RESERVED                                           */
+		  4, /* 19: RESERVED                                           */
+		  4, /* 20: RESERVED                                           */
+		  4, /* 21: RESERVED                                           */
+		  4, /* 22: RESERVED                                           */
+		  4, /* 23: RESERVED                                           */
+		 30, /* 24: Spurious Interrupt                                 */
+		 30, /* 25: Level 1 Interrupt Autovector                       */
+		 30, /* 26: Level 2 Interrupt Autovector                       */
+		 30, /* 27: Level 3 Interrupt Autovector                       */
+		 30, /* 28: Level 4 Interrupt Autovector                       */
+		 30, /* 29: Level 5 Interrupt Autovector                       */
+		 30, /* 30: Level 6 Interrupt Autovector                       */
+		 30, /* 31: Level 7 Interrupt Autovector                       */
+		 20, /* 32: TRAP #0                                            */
+		 20, /* 33: TRAP #1                                            */
+		 20, /* 34: TRAP #2                                            */
+		 20, /* 35: TRAP #3                                            */
+		 20, /* 36: TRAP #4                                            */
+		 20, /* 37: TRAP #5                                            */
+		 20, /* 38: TRAP #6                                            */
+		 20, /* 39: TRAP #7                                            */
+		 20, /* 40: TRAP #8                                            */
+		 20, /* 41: TRAP #9                                            */
+		 20, /* 42: TRAP #10                                           */
+		 20, /* 43: TRAP #11                                           */
+		 20, /* 44: TRAP #12                                           */
+		 20, /* 45: TRAP #13                                           */
+		 20, /* 46: TRAP #14                                           */
+		 20, /* 47: TRAP #15                                           */
+		  4, /* 48: FP Branch or Set on Unknown Condition (unemulated) */
+		  4, /* 49: FP Inexact Result                     (unemulated) */
+		  4, /* 50: FP Divide by Zero                     (unemulated) */
+		  4, /* 51: FP Underflow                          (unemulated) */
+		  4, /* 52: FP Operand Error                      (unemulated) */
+		  4, /* 53: FP Overflow                           (unemulated) */
+		  4, /* 54: FP Signaling NAN                      (unemulated) */
+		  4, /* 55: FP Unimplemented Data Type            (unemulated) */
+		  4, /* 56: MMU Configuration Error               (unemulated) */
+		  4, /* 57: MMU Illegal Operation Error           (unemulated) */
+		  4, /* 58: MMU Access Level Violation Error      (unemulated) */
+		  4, /* 59: RESERVED                                           */
+		  4, /* 60: RESERVED                                           */
+		  4, /* 61: RESERVED                                           */
+		  4, /* 62: RESERVED                                           */
+		  4, /* 63: RESERVED                                           */
+		     /* 64-255: User Defined                                   */
+		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4
 	}
 };
 
-uint8 m68ki_ea_idx_cycle_table[64] =
+const uint8 m68ki_ea_idx_cycle_table[64] =
 {
 	 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 	 0, /* ..01.000 no memory indirect, base NULL             */
@@ -390,6 +552,31 @@ static void default_reset_instr_callback(void)
 {
 }
 
+/* Called when a cmpi.l #v, dn instruction is executed */
+static void default_cmpild_instr_callback(unsigned int val, int reg)
+{
+	(void)val;
+	(void)reg;
+}
+
+/* Called when a rte instruction is executed */
+static void default_rte_instr_callback(void)
+{
+}
+
+/* Called when a tas instruction is executed */
+static int default_tas_instr_callback(void)
+{
+	return 1; // allow writeback
+}
+
+/* Called when an illegal instruction is encountered */
+static int default_illg_instr_callback(int opcode)
+{
+	(void)opcode;
+	return 0; // not handled : exception will occur
+}
+
 /* Called when the program counter changed by a large value */
 static unsigned int default_pc_changed_callback_data;
 static void default_pc_changed_callback(unsigned int new_pc)
@@ -405,8 +592,9 @@ static void default_set_fc_callback(unsigned int new_fc)
 }
 
 /* Called every instruction cycle prior to execution */
-static void default_instr_hook_callback(void)
+static void default_instr_hook_callback(unsigned int pc)
 {
+	(void)pc;
 }
 
 
@@ -415,7 +603,6 @@ static void default_instr_hook_callback(void)
 	jmp_buf m68ki_aerr_trap;
 #endif /* M68K_EMULATE_ADDRESS_ERROR */
 
-
 /* ======================================================================== */
 /* ================================= API ================================== */
 /* ======================================================================== */
@@ -474,6 +661,7 @@ unsigned int m68k_get_reg(void* context, m68k_register_t regnum)
 				case CPU_TYPE_010:		return (unsigned int)M68K_CPU_TYPE_68010;
 				case CPU_TYPE_EC020:	return (unsigned int)M68K_CPU_TYPE_68EC020;
 				case CPU_TYPE_020:		return (unsigned int)M68K_CPU_TYPE_68020;
+				case CPU_TYPE_040:		return (unsigned int)M68K_CPU_TYPE_68040;
 			}
 			return M68K_CPU_TYPE_INVALID;
 		default:			return 0;
@@ -502,7 +690,7 @@ void m68k_set_reg(m68k_register_t regnum, unsigned int value)
 		case M68K_REG_A6:	REG_A[6] = MASK_OUT_ABOVE_32(value); return;
 		case M68K_REG_A7:	REG_A[7] = MASK_OUT_ABOVE_32(value); return;
 		case M68K_REG_PC:	m68ki_jump(MASK_OUT_ABOVE_32(value)); return;
-		case M68K_REG_SR:	m68ki_set_sr(value); return;
+		case M68K_REG_SR:	m68ki_set_sr_noint_nosp(value); return;
 		case M68K_REG_SP:	REG_SP = MASK_OUT_ABOVE_32(value); return;
 		case M68K_REG_USP:	if(FLAG_S)
 								REG_USP = MASK_OUT_ABOVE_32(value);
@@ -547,6 +735,26 @@ void m68k_set_reset_instr_callback(void  (*callback)(void))
 	CALLBACK_RESET_INSTR = callback ? callback : default_reset_instr_callback;
 }
 
+void m68k_set_cmpild_instr_callback(void  (*callback)(unsigned int, int))
+{
+	CALLBACK_CMPILD_INSTR = callback ? callback : default_cmpild_instr_callback;
+}
+
+void m68k_set_rte_instr_callback(void  (*callback)(void))
+{
+	CALLBACK_RTE_INSTR = callback ? callback : default_rte_instr_callback;
+}
+
+void m68k_set_tas_instr_callback(int  (*callback)(void))
+{
+	CALLBACK_TAS_INSTR = callback ? callback : default_tas_instr_callback;
+}
+
+void m68k_set_illg_instr_callback(int  (*callback)(int))
+{
+	CALLBACK_ILLG_INSTR = callback ? callback : default_illg_instr_callback;
+}
+
 void m68k_set_pc_changed_callback(void  (*callback)(unsigned int new_pc))
 {
 	CALLBACK_PC_CHANGED = callback ? callback : default_pc_changed_callback;
@@ -557,7 +765,7 @@ void m68k_set_fc_callback(void  (*callback)(unsigned int new_fc))
 	CALLBACK_SET_FC = callback ? callback : default_set_fc_callback;
 }
 
-void m68k_set_instr_hook_callback(void  (*callback)(void))
+void m68k_set_instr_hook_callback(void  (*callback)(unsigned int pc))
 {
 	CALLBACK_INSTR_HOOK = callback ? callback : default_instr_hook_callback;
 }
@@ -582,6 +790,12 @@ void m68k_set_cpu_type(unsigned int cpu_type)
 			CYC_MOVEM_L      = 3;
 			CYC_SHIFT        = 1;
 			CYC_RESET        = 132;
+			HAS_PMMU	 = 0;
+			return;
+		case M68K_CPU_TYPE_SCC68070:
+			m68k_set_cpu_type(M68K_CPU_TYPE_68010);
+			CPU_ADDRESS_MASK = 0xffffffff;
+			CPU_TYPE         = CPU_TYPE_SCC070;
 			return;
 		case M68K_CPU_TYPE_68010:
 			CPU_TYPE         = CPU_TYPE_010;
@@ -598,6 +812,7 @@ void m68k_set_cpu_type(unsigned int cpu_type)
 			CYC_MOVEM_L      = 3;
 			CYC_SHIFT        = 1;
 			CYC_RESET        = 130;
+			HAS_PMMU	 = 0;
 			return;
 		case M68K_CPU_TYPE_68EC020:
 			CPU_TYPE         = CPU_TYPE_EC020;
@@ -614,6 +829,7 @@ void m68k_set_cpu_type(unsigned int cpu_type)
 			CYC_MOVEM_L      = 2;
 			CYC_SHIFT        = 0;
 			CYC_RESET        = 518;
+			HAS_PMMU	 = 0;
 			return;
 		case M68K_CPU_TYPE_68020:
 			CPU_TYPE         = CPU_TYPE_020;
@@ -630,6 +846,91 @@ void m68k_set_cpu_type(unsigned int cpu_type)
 			CYC_MOVEM_L      = 2;
 			CYC_SHIFT        = 0;
 			CYC_RESET        = 518;
+			HAS_PMMU	 = 0;
+			return;
+		case M68K_CPU_TYPE_68030:
+			CPU_TYPE         = CPU_TYPE_030;
+			CPU_ADDRESS_MASK = 0xffffffff;
+			CPU_SR_MASK      = 0xf71f; /* T1 T0 S  M  -- I2 I1 I0 -- -- -- X  N  Z  V  C  */
+			CYC_INSTRUCTION  = m68ki_cycles[3];
+			CYC_EXCEPTION    = m68ki_exception_cycle_table[3];
+			CYC_BCC_NOTAKE_B = -2;
+			CYC_BCC_NOTAKE_W = 0;
+			CYC_DBCC_F_NOEXP = 0;
+			CYC_DBCC_F_EXP   = 4;
+			CYC_SCC_R_TRUE   = 0;
+			CYC_MOVEM_W      = 2;
+			CYC_MOVEM_L      = 2;
+			CYC_SHIFT        = 0;
+			CYC_RESET        = 518;
+			HAS_PMMU	       = 1;
+			return;
+		case M68K_CPU_TYPE_68EC030:
+			CPU_TYPE         = CPU_TYPE_EC030;
+			CPU_ADDRESS_MASK = 0xffffffff;
+			CPU_SR_MASK          = 0xf71f; /* T1 T0 S  M  -- I2 I1 I0 -- -- -- X  N  Z  V  C  */
+			CYC_INSTRUCTION  = m68ki_cycles[3];
+			CYC_EXCEPTION    = m68ki_exception_cycle_table[3];
+			CYC_BCC_NOTAKE_B = -2;
+			CYC_BCC_NOTAKE_W = 0;
+			CYC_DBCC_F_NOEXP = 0;
+			CYC_DBCC_F_EXP   = 4;
+			CYC_SCC_R_TRUE   = 0;
+			CYC_MOVEM_W      = 2;
+			CYC_MOVEM_L      = 2;
+			CYC_SHIFT        = 0;
+			CYC_RESET        = 518;
+			HAS_PMMU	       = 0;		/* EC030 lacks the PMMU and is effectively a die-shrink 68020 */
+			return;
+		case M68K_CPU_TYPE_68040:		// TODO: these values are not correct
+			CPU_TYPE         = CPU_TYPE_040;
+			CPU_ADDRESS_MASK = 0xffffffff;
+			CPU_SR_MASK      = 0xf71f; /* T1 T0 S  M  -- I2 I1 I0 -- -- -- X  N  Z  V  C  */
+			CYC_INSTRUCTION  = m68ki_cycles[4];
+			CYC_EXCEPTION    = m68ki_exception_cycle_table[4];
+			CYC_BCC_NOTAKE_B = -2;
+			CYC_BCC_NOTAKE_W = 0;
+			CYC_DBCC_F_NOEXP = 0;
+			CYC_DBCC_F_EXP   = 4;
+			CYC_SCC_R_TRUE   = 0;
+			CYC_MOVEM_W      = 2;
+			CYC_MOVEM_L      = 2;
+			CYC_SHIFT        = 0;
+			CYC_RESET        = 518;
+			HAS_PMMU	 = 1;
+			return;
+		case M68K_CPU_TYPE_68EC040: // Just a 68040 without pmmu apparently...
+			CPU_TYPE         = CPU_TYPE_EC040;
+			CPU_ADDRESS_MASK = 0xffffffff;
+			CPU_SR_MASK      = 0xf71f; /* T1 T0 S  M  -- I2 I1 I0 -- -- -- X  N  Z  V  C  */
+			CYC_INSTRUCTION  = m68ki_cycles[4];
+			CYC_EXCEPTION    = m68ki_exception_cycle_table[4];
+			CYC_BCC_NOTAKE_B = -2;
+			CYC_BCC_NOTAKE_W = 0;
+			CYC_DBCC_F_NOEXP = 0;
+			CYC_DBCC_F_EXP   = 4;
+			CYC_SCC_R_TRUE   = 0;
+			CYC_MOVEM_W      = 2;
+			CYC_MOVEM_L      = 2;
+			CYC_SHIFT        = 0;
+			CYC_RESET        = 518;
+			HAS_PMMU	 = 0;
+			return;
+		case M68K_CPU_TYPE_68LC040:
+			CPU_TYPE         = CPU_TYPE_LC040;
+			m68ki_cpu.sr_mask          = 0xf71f; /* T1 T0 S  M  -- I2 I1 I0 -- -- -- X  N  Z  V  C  */
+			m68ki_cpu.cyc_instruction  = m68ki_cycles[4];
+			m68ki_cpu.cyc_exception    = m68ki_exception_cycle_table[4];
+			m68ki_cpu.cyc_bcc_notake_b = -2;
+			m68ki_cpu.cyc_bcc_notake_w = 0;
+			m68ki_cpu.cyc_dbcc_f_noexp = 0;
+			m68ki_cpu.cyc_dbcc_f_exp   = 4;
+			m68ki_cpu.cyc_scc_r_true   = 0;
+			m68ki_cpu.cyc_movem_w      = 2;
+			m68ki_cpu.cyc_movem_l      = 2;
+			m68ki_cpu.cyc_shift        = 0;
+			m68ki_cpu.cyc_reset        = 518;
+			HAS_PMMU	       = 1;
 			return;
 	}
 }
@@ -638,23 +939,34 @@ void m68k_set_cpu_type(unsigned int cpu_type)
 /* ASG: removed per-instruction interrupt checks */
 int m68k_execute(int num_cycles)
 {
+	/* eat up any reset cycles */
+	if (RESET_CYCLES) {
+	    int rc = RESET_CYCLES;
+	    RESET_CYCLES = 0;
+	    num_cycles -= rc;
+	    if (num_cycles <= 0)
+		return rc;
+	}
+
+	/* Set our pool of clock cycles available */
+	SET_CYCLES(num_cycles);
+	m68ki_initial_cycles = num_cycles;
+
+	/* See if interrupts came in */
+	m68ki_check_interrupts();
+
 	/* Make sure we're not stopped */
 	if(!CPU_STOPPED)
 	{
-		/* Set our pool of clock cycles available */
-		SET_CYCLES(num_cycles);
-		m68ki_initial_cycles = num_cycles;
-
-		/* ASG: update cycles */
-		USE_CYCLES(CPU_INT_CYCLES);
-		CPU_INT_CYCLES = 0;
-
 		/* Return point if we had an address error */
 		m68ki_set_address_error_trap(); /* auto-disable (see m68kcpu.h) */
 
+		m68ki_check_bus_error_trap();
+
 		/* Main loop.  Keep going until we run out of clock cycles */
 		do
 		{
+			int i;
 			/* Set tracing accodring to T1. (T0 is done inside instruction) */
 			m68ki_trace_t1(); /* auto-disable (see m68kcpu.h) */
 
@@ -662,11 +974,16 @@ int m68k_execute(int num_cycles)
 			m68ki_use_data_space(); /* auto-disable (see m68kcpu.h) */
 
 			/* Call external hook to peek at CPU */
-			m68ki_instr_hook(); /* auto-disable (see m68kcpu.h) */
+			m68ki_instr_hook(REG_PC); /* auto-disable (see m68kcpu.h) */
 
 			/* Record previous program counter */
 			REG_PPC = REG_PC;
 
+			/* Record previous D/A register state (in case of bus error) */
+			for (i = 15; i >= 0; i--){
+				REG_DA_SAVE[i] = REG_DA[i];
+			}
+
 			/* Read an instruction and call its handler */
 			REG_IR = m68ki_read_imm_16();
 			m68ki_instruction_jump_table[REG_IR]();
@@ -678,20 +995,12 @@ int m68k_execute(int num_cycles)
 
 		/* set previous PC to current PC for the next entry into the loop */
 		REG_PPC = REG_PC;
-
-		/* ASG: update cycles */
-		USE_CYCLES(CPU_INT_CYCLES);
-		CPU_INT_CYCLES = 0;
-
-		/* return how many clocks we used */
-		return m68ki_initial_cycles - GET_CYCLES();
 	}
+	else
+		SET_CYCLES(0);
 
-	/* We get here if the CPU is stopped or halted */
-	SET_CYCLES(0);
-	CPU_INT_CYCLES = 0;
-
-	return num_cycles;
+	/* return how many clocks we used */
+	return m68ki_initial_cycles - GET_CYCLES();
 }
 
 
@@ -732,9 +1041,29 @@ void m68k_set_irq(unsigned int int_level)
 	/* A transition from < 7 to 7 always interrupts (NMI) */
 	/* Note: Level 7 can also level trigger like a normal IRQ */
 	if(old_level != 0x0700 && CPU_INT_LEVEL == 0x0700)
-		m68ki_exception_interrupt(7); /* Edge triggered level 7 (NMI) */
+		m68ki_cpu.nmi_pending = TRUE;
+}
+
+void m68k_set_virq(unsigned int level, unsigned int active)
+{
+	uint state = m68ki_cpu.virq_state;
+	uint blevel;
+
+	if(active)
+		state |= 1 << level;
 	else
-		m68ki_check_interrupts(); /* Level triggered (IRQ) */
+		state &= ~(1 << level);
+	m68ki_cpu.virq_state = state;
+
+	for(blevel = 7; blevel > 0; blevel--)
+		if(state & (1 << blevel))
+			break;
+	m68k_set_irq(blevel);
+}
+
+unsigned int m68k_get_virq(unsigned int level)
+{
+	return (m68ki_cpu.virq_state & (1 << level)) ? 1 : 0;
 }
 
 void m68k_init(void)
@@ -751,14 +1080,27 @@ void m68k_init(void)
 	m68k_set_int_ack_callback(NULL);
 	m68k_set_bkpt_ack_callback(NULL);
 	m68k_set_reset_instr_callback(NULL);
+	m68k_set_cmpild_instr_callback(NULL);
+	m68k_set_rte_instr_callback(NULL);
+	m68k_set_tas_instr_callback(NULL);
+	m68k_set_illg_instr_callback(NULL);
 	m68k_set_pc_changed_callback(NULL);
 	m68k_set_fc_callback(NULL);
 	m68k_set_instr_hook_callback(NULL);
 }
 
+/* Trigger a Bus Error exception */
+void m68k_pulse_bus_error(void)
+{
+	m68ki_exception_bus_error();
+}
+
 /* Pulse the RESET line on the CPU */
 void m68k_pulse_reset(void)
 {
+	/* Disable the PMMU on reset */
+	m68ki_cpu.pmmu_enabled = 0;
+
 	/* Clear all stop levels and eat up all remaining cycles */
 	CPU_STOPPED = 0;
 	SET_CYCLES(0);
@@ -771,6 +1113,8 @@ void m68k_pulse_reset(void)
 	m68ki_clear_trace();
 	/* Interrupt mask to level 7 */
 	FLAG_INT_MASK = 0x0700;
+	CPU_INT_LEVEL = 0;
+	m68ki_cpu.virq_state = 0;
 	/* Reset VBR */
 	REG_VBR = 0;
 	/* Go to supervisor mode */
@@ -789,6 +1133,8 @@ void m68k_pulse_reset(void)
 	m68ki_jump(REG_PC);
 
 	CPU_RUN_MODE = RUN_MODE_NORMAL;
+
+	RESET_CYCLES = CYC_EXCEPTION[EXCEPTION_RESET];
 }
 
 /* Pulse the HALT line on the CPU */
@@ -797,7 +1143,6 @@ void m68k_pulse_halt(void)
 	CPU_STOPPED |= STOP_LEVEL_HALT;
 }
 
-
 /* Get and set the current CPU context */
 /* This is to allow for multiple CPUs */
 unsigned int m68k_context_size()
@@ -816,20 +1161,16 @@ void m68k_set_context(void* src)
 	if(src) m68ki_cpu = *(m68ki_cpu_core*)src;
 }
 
-
-
 /* ======================================================================== */
 /* ============================== MAME STUFF ============================== */
 /* ======================================================================== */
 
 #if M68K_COMPILE_FOR_MAME == OPT_ON
 
-#include "state.h"
-
 static struct {
 	UINT16 sr;
-	int stopped;
-	int halted;
+	UINT8 stopped;
+	UINT8 halted;
 } m68k_substate;
 
 static void m68k_prepare_substate(void)
@@ -847,29 +1188,26 @@ static void m68k_post_load(void)
 	m68ki_jump(REG_PC);
 }
 
-void m68k_state_register(const char *type)
+void m68k_state_register(const char *type, int index)
 {
-	int cpu = cpu_getactivecpu();
-
-	state_save_register_UINT32(type, cpu, "D"         , REG_D, 8);
-	state_save_register_UINT32(type, cpu, "A"         , REG_A, 8);
-	state_save_register_UINT32(type, cpu, "PPC"       , &REG_PPC, 1);
-	state_save_register_UINT32(type, cpu, "PC"        , &REG_PC, 1);
-	state_save_register_UINT32(type, cpu, "USP"       , &REG_USP, 1);
-	state_save_register_UINT32(type, cpu, "ISP"       , &REG_ISP, 1);
-	state_save_register_UINT32(type, cpu, "MSP"       , &REG_MSP, 1);
-	state_save_register_UINT32(type, cpu, "VBR"       , &REG_VBR, 1);
-	state_save_register_UINT32(type, cpu, "SFC"       , &REG_SFC, 1);
-	state_save_register_UINT32(type, cpu, "DFC"       , &REG_DFC, 1);
-	state_save_register_UINT32(type, cpu, "CACR"      , &REG_CACR, 1);
-	state_save_register_UINT32(type, cpu, "CAAR"      , &REG_CAAR, 1);
-	state_save_register_UINT16(type, cpu, "SR"        , &m68k_substate.sr, 1);
-	state_save_register_UINT32(type, cpu, "INT_LEVEL" , &CPU_INT_LEVEL, 1);
-	state_save_register_UINT32(type, cpu, "INT_CYCLES", &CPU_INT_CYCLES, 1);
-	state_save_register_int   (type, cpu, "STOPPED"   , &m68k_substate.stopped);
-	state_save_register_int   (type, cpu, "HALTED"    , &m68k_substate.halted);
-	state_save_register_UINT32(type, cpu, "PREF_ADDR" , &CPU_PREF_ADDR, 1);
-	state_save_register_UINT32(type, cpu, "PREF_DATA" , &CPU_PREF_DATA, 1);
+	/* Note, D covers A because the dar array is common, REG_A=REG_D+8 */
+	state_save_register_item_array(type, index, REG_D);
+	state_save_register_item(type, index, REG_PPC);
+	state_save_register_item(type, index, REG_PC);
+	state_save_register_item(type, index, REG_USP);
+	state_save_register_item(type, index, REG_ISP);
+	state_save_register_item(type, index, REG_MSP);
+	state_save_register_item(type, index, REG_VBR);
+	state_save_register_item(type, index, REG_SFC);
+	state_save_register_item(type, index, REG_DFC);
+	state_save_register_item(type, index, REG_CACR);
+	state_save_register_item(type, index, REG_CAAR);
+	state_save_register_item(type, index, m68k_substate.sr);
+	state_save_register_item(type, index, CPU_INT_LEVEL);
+	state_save_register_item(type, index, m68k_substate.stopped);
+	state_save_register_item(type, index, m68k_substate.halted);
+	state_save_register_item(type, index, CPU_PREF_ADDR);
+	state_save_register_item(type, index, CPU_PREF_DATA);
 	state_save_register_func_presave(m68k_prepare_substate);
 	state_save_register_func_postload(m68k_post_load);
 }
diff --git a/plat/linux68k/emu/musashi/m68kcpu.h b/plat/linux68k/emu/musashi/m68kcpu.h
index d4e92cc9d..63afc7c48 100755
--- a/plat/linux68k/emu/musashi/m68kcpu.h
+++ b/plat/linux68k/emu/musashi/m68kcpu.h
@@ -3,10 +3,10 @@
 /* ======================================================================== */
 /*
  *                                  MUSASHI
- *                                Version 3.4
+ *                                Version 4.5
  *
  * A portable Motorola M680x0 processor emulation engine.
- * Copyright 1998-2001 Karl Stenerud.  All rights reserved.
+ * Copyright Karl Stenerud.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -33,12 +33,15 @@
 #ifndef M68KCPU__HEADER
 #define M68KCPU__HEADER
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #include "m68k.h"
+
 #include <limits.h>
 
-#if M68K_EMULATE_ADDRESS_ERROR
 #include <setjmp.h>
-#endif /* M68K_EMULATE_ADDRESS_ERROR */
 
 /* ======================================================================== */
 /* ==================== ARCHITECTURE-DEPENDANT DEFINES ==================== */
@@ -63,26 +66,37 @@
 #undef sint
 #undef uint
 
-#define sint8  signed   char			/* ASG: changed from char to signed char */
-#define sint16 signed   short
-#define sint32 signed   long
-#define uint8  unsigned char
-#define uint16 unsigned short
-#define uint32 unsigned long
+typedef signed   char  sint8;  		/* ASG: changed from char to signed char */
+typedef signed   short sint16;
+typedef signed   int   sint32; 		/* AWJ: changed from long to int */
+typedef unsigned char  uint8;
+typedef unsigned short uint16;
+typedef unsigned int   uint32; 			/* AWJ: changed from long to int */
 
 /* signed and unsigned int must be at least 32 bits wide */
-#define sint   signed   int
-#define uint   unsigned int
+typedef signed   int sint;
+typedef unsigned int uint;
 
 
 #if M68K_USE_64_BIT
-#define sint64 signed   long long
-#define uint64 unsigned long long
+typedef signed   long long sint64;
+typedef unsigned long long uint64;
 #else
-#define sint64 sint32
-#define uint64 uint32
+typedef sint32 sint64;
+typedef uint32 uint64;
 #endif /* M68K_USE_64_BIT */
 
+/* U64 and S64 are used to wrap long integer constants. */
+#ifdef __GNUC__
+#define U64(val) val##ULL
+#define S64(val) val##LL
+#else
+#define U64(val) val
+#define S64(val) val
+#endif
+
+#include "softfloat/milieu.h"
+#include "softfloat/softfloat.h"
 
 
 /* Allow for architectures that don't have 8-bit sizes */
@@ -93,7 +107,7 @@
 	#define sint8  signed   int
 	#undef  uint8
 	#define uint8  unsigned int
-	INLINE sint MAKE_INT_8(uint value)
+	static inline sint MAKE_INT_8(uint value)
 	{
 		return (value & 0x80) ? value | ~0xff : value & 0xff;
 	}
@@ -108,7 +122,7 @@
 	#define sint16 signed   int
 	#undef  uint16
 	#define uint16 unsigned int
-	INLINE sint MAKE_INT_16(uint value)
+	static inline sint MAKE_INT_16(uint value)
 	{
 		return (value & 0x8000) ? value | ~0xffff : value & 0xffff;
 	}
@@ -116,18 +130,18 @@
 
 
 /* Allow for architectures that don't have 32-bit sizes */
-#if ULONG_MAX == 0xffffffff
+#if UINT_MAX == 0xffffffff
 	#define MAKE_INT_32(A) (sint32)(A)
 #else
 	#undef  sint32
 	#define sint32  signed   int
 	#undef  uint32
 	#define uint32  unsigned int
-	INLINE sint MAKE_INT_32(uint value)
+	static inline sint MAKE_INT_32(uint value)
 	{
 		return (value & 0x80000000) ? value | ~0xffffffff : value & 0xffffffff;
 	}
-#endif /* ULONG_MAX == 0xffffffff */
+#endif /* UINT_MAX == 0xffffffff */
 
 
@@ -137,6 +151,7 @@
 /* ======================================================================== */
 
 /* Exception Vectors handled by emulation */
+#define EXCEPTION_RESET                    0
 #define EXCEPTION_BUS_ERROR                2 /* This one is not emulated! */
 #define EXCEPTION_ADDRESS_ERROR            3 /* This one is partially emulated (doesn't stack a proper frame yet) */
 #define EXCEPTION_ILLEGAL_INSTRUCTION      4
@@ -161,10 +176,17 @@
 #define FUNCTION_CODE_CPU_SPACE          7
 
 /* CPU types for deciding what to emulate */
-#define CPU_TYPE_000   1
-#define CPU_TYPE_010   2
-#define CPU_TYPE_EC020 4
-#define CPU_TYPE_020   8
+#define CPU_TYPE_000	(0x00000001)
+#define CPU_TYPE_008    (0x00000002)
+#define CPU_TYPE_010    (0x00000004)
+#define CPU_TYPE_EC020  (0x00000008)
+#define CPU_TYPE_020    (0x00000010)
+#define CPU_TYPE_EC030  (0x00000020)
+#define CPU_TYPE_030    (0x00000040)
+#define CPU_TYPE_EC040  (0x00000080)
+#define CPU_TYPE_LC040  (0x00000100)
+#define CPU_TYPE_040    (0x00000200)
+#define CPU_TYPE_SCC070 (0x00000400)
 
 /* Different ways to stop the CPU */
 #define STOP_LEVEL_STOP 1
@@ -305,6 +327,7 @@
 #define CPU_TYPE         m68ki_cpu.cpu_type
 
 #define REG_DA           m68ki_cpu.dar /* easy access to data and address regs */
+#define REG_DA_SAVE           m68ki_cpu.dar_save
 #define REG_D            m68ki_cpu.dar
 #define REG_A            (m68ki_cpu.dar+8)
 #define REG_PPC 		 m68ki_cpu.ppc
@@ -321,6 +344,11 @@
 #define REG_CAAR         m68ki_cpu.caar
 #define REG_IR           m68ki_cpu.ir
 
+#define REG_FP           m68ki_cpu.fpr
+#define REG_FPCR         m68ki_cpu.fpcr
+#define REG_FPSR         m68ki_cpu.fpsr
+#define REG_FPIAR        m68ki_cpu.fpiar
+
 #define FLAG_T1          m68ki_cpu.t1_flag
 #define FLAG_T0          m68ki_cpu.t0_flag
 #define FLAG_S           m68ki_cpu.s_flag
@@ -333,7 +361,6 @@
 #define FLAG_INT_MASK    m68ki_cpu.int_mask
 
 #define CPU_INT_LEVEL    m68ki_cpu.int_level /* ASG: changed from CPU_INTS_PENDING */
-#define CPU_INT_CYCLES   m68ki_cpu.int_cycles /* ASG */
 #define CPU_STOPPED      m68ki_cpu.stopped
 #define CPU_PREF_ADDR    m68ki_cpu.pref_addr
 #define CPU_PREF_DATA    m68ki_cpu.pref_data
@@ -353,11 +380,18 @@
 #define CYC_MOVEM_L      m68ki_cpu.cyc_movem_l
 #define CYC_SHIFT        m68ki_cpu.cyc_shift
 #define CYC_RESET        m68ki_cpu.cyc_reset
+#define HAS_PMMU	 m68ki_cpu.has_pmmu
+#define PMMU_ENABLED	 m68ki_cpu.pmmu_enabled
+#define RESET_CYCLES	 m68ki_cpu.reset_cycles
 
 
 #define CALLBACK_INT_ACK     m68ki_cpu.int_ack_callback
 #define CALLBACK_BKPT_ACK    m68ki_cpu.bkpt_ack_callback
 #define CALLBACK_RESET_INSTR m68ki_cpu.reset_instr_callback
+#define CALLBACK_CMPILD_INSTR m68ki_cpu.cmpild_instr_callback
+#define CALLBACK_RTE_INSTR    m68ki_cpu.rte_instr_callback
+#define CALLBACK_TAS_INSTR    m68ki_cpu.tas_instr_callback
+#define CALLBACK_ILLG_INSTR    m68ki_cpu.illg_instr_callback
 #define CALLBACK_PC_CHANGED  m68ki_cpu.pc_changed_callback
 #define CALLBACK_SET_FC      m68ki_cpu.set_fc_callback
 #define CALLBACK_INSTR_HOOK  m68ki_cpu.instr_hook_callback
@@ -369,8 +403,24 @@
 /* These defines are dependant on the configuration defines in m68kconf.h */
 
 /* Disable certain comparisons if we're not using all CPU types */
+#if M68K_EMULATE_040
+#define CPU_TYPE_IS_040_PLUS(A)    ((A) & (CPU_TYPE_040 | CPU_TYPE_EC040))
+	#define CPU_TYPE_IS_040_LESS(A)    1
+#else
+	#define CPU_TYPE_IS_040_PLUS(A)    0
+	#define CPU_TYPE_IS_040_LESS(A)    1
+#endif
+
+#if M68K_EMULATE_030
+#define CPU_TYPE_IS_030_PLUS(A)    ((A) & (CPU_TYPE_030 | CPU_TYPE_EC030 | CPU_TYPE_040 | CPU_TYPE_EC040))
+#define CPU_TYPE_IS_030_LESS(A)    1
+#else
+#define CPU_TYPE_IS_030_PLUS(A)	0
+#define CPU_TYPE_IS_030_LESS(A)    1
+#endif
+
 #if M68K_EMULATE_020
-	#define CPU_TYPE_IS_020_PLUS(A)    ((A) & CPU_TYPE_020)
+#define CPU_TYPE_IS_020_PLUS(A)    ((A) & (CPU_TYPE_020 | CPU_TYPE_030 | CPU_TYPE_EC030 | CPU_TYPE_040 | CPU_TYPE_EC040))
 	#define CPU_TYPE_IS_020_LESS(A)    1
 #else
 	#define CPU_TYPE_IS_020_PLUS(A)    0
@@ -378,7 +428,7 @@
 #endif
 
 #if M68K_EMULATE_EC020
-	#define CPU_TYPE_IS_EC020_PLUS(A)  ((A) & (CPU_TYPE_EC020 | CPU_TYPE_020))
+#define CPU_TYPE_IS_EC020_PLUS(A)  ((A) & (CPU_TYPE_EC020 | CPU_TYPE_020 | CPU_TYPE_030 | CPU_TYPE_EC030 | CPU_TYPE_040 | CPU_TYPE_EC040))
 	#define CPU_TYPE_IS_EC020_LESS(A)  ((A) & (CPU_TYPE_000 | CPU_TYPE_010 | CPU_TYPE_EC020))
 #else
 	#define CPU_TYPE_IS_EC020_PLUS(A)  CPU_TYPE_IS_020_PLUS(A)
@@ -387,8 +437,8 @@
 
 #if M68K_EMULATE_010
 	#define CPU_TYPE_IS_010(A)         ((A) == CPU_TYPE_010)
-	#define CPU_TYPE_IS_010_PLUS(A)    ((A) & (CPU_TYPE_010 | CPU_TYPE_EC020 | CPU_TYPE_020))
-	#define CPU_TYPE_IS_010_LESS(A)    ((A) & (CPU_TYPE_000 | CPU_TYPE_010))
+#define CPU_TYPE_IS_010_PLUS(A)    ((A) & (CPU_TYPE_010 | CPU_TYPE_EC020 | CPU_TYPE_020 | CPU_TYPE_EC030 | CPU_TYPE_030 | CPU_TYPE_040 | CPU_TYPE_EC040))
+#define CPU_TYPE_IS_010_LESS(A)    ((A) & (CPU_TYPE_000 | CPU_TYPE_008 | CPU_TYPE_010))
 #else
 	#define CPU_TYPE_IS_010(A)         0
 	#define CPU_TYPE_IS_010_PLUS(A)    CPU_TYPE_IS_EC020_PLUS(A)
@@ -401,7 +451,7 @@
 	#define CPU_TYPE_IS_020_VARIANT(A) 0
 #endif
 
-#if M68K_EMULATE_020 || M68K_EMULATE_EC020 || M68K_EMULATE_010
+#if M68K_EMULATE_040 || M68K_EMULATE_020 || M68K_EMULATE_EC020 || M68K_EMULATE_010
 	#define CPU_TYPE_IS_000(A)         ((A) == CPU_TYPE_000)
 #else
 	#define CPU_TYPE_IS_000(A)         1
@@ -450,14 +500,54 @@
 	#define m68ki_output_reset()
 #endif /* M68K_EMULATE_RESET */
 
-#if M68K_INSTRUCTION_HOOK
-	#if M68K_INSTRUCTION_HOOK == OPT_SPECIFY_HANDLER
-		#define m68ki_instr_hook() M68K_INSTRUCTION_CALLBACK()
+#if M68K_CMPILD_HAS_CALLBACK
+	#if M68K_CMPILD_HAS_CALLBACK == OPT_SPECIFY_HANDLER
+		#define m68ki_cmpild_callback(v,r) M68K_CMPILD_CALLBACK(v,r)
 	#else
-		#define m68ki_instr_hook() CALLBACK_INSTR_HOOK()
+		#define m68ki_cmpild_callback(v,r) CALLBACK_CMPILD_INSTR(v,r)
 	#endif
 #else
-	#define m68ki_instr_hook()
+	#define m68ki_cmpild_callback(v,r)
+#endif /* M68K_CMPILD_HAS_CALLBACK */
+
+#if M68K_RTE_HAS_CALLBACK
+	#if M68K_RTE_HAS_CALLBACK == OPT_SPECIFY_HANDLER
+		#define m68ki_rte_callback() M68K_RTE_CALLBACK()
+	#else
+		#define m68ki_rte_callback() CALLBACK_RTE_INSTR()
+	#endif
+#else
+	#define m68ki_rte_callback()
+#endif /* M68K_RTE_HAS_CALLBACK */
+
+#if M68K_TAS_HAS_CALLBACK
+	#if M68K_TAS_HAS_CALLBACK == OPT_SPECIFY_HANDLER
+		#define m68ki_tas_callback() M68K_TAS_CALLBACK()
+	#else
+		#define m68ki_tas_callback() CALLBACK_TAS_INSTR()
+	#endif
+#else
+	#define m68ki_tas_callback() 1
+#endif /* M68K_TAS_HAS_CALLBACK */
+
+#if M68K_ILLG_HAS_CALLBACK
+	#if M68K_ILLG_HAS_CALLBACK == OPT_SPECIFY_HANDLER
+		#define m68ki_illg_callback(opcode) M68K_ILLG_CALLBACK(opcode)
+	#else
+		#define m68ki_illg_callback(opcode) CALLBACK_ILLG_INSTR(opcode)
+	#endif
+#else
+	#define m68ki_illg_callback(opcode) 0 // Default is 0 = not handled, exception will occur
+#endif /* M68K_ILLG_HAS_CALLBACK */
+
+#if M68K_INSTRUCTION_HOOK
+	#if M68K_INSTRUCTION_HOOK == OPT_SPECIFY_HANDLER
+		#define m68ki_instr_hook(pc) M68K_INSTRUCTION_CALLBACK(pc)
+	#else
+		#define m68ki_instr_hook(pc) CALLBACK_INSTR_HOOK(pc)
+	#endif
+#else
+	#define m68ki_instr_hook(pc)
 #endif /* M68K_INSTRUCTION_HOOK */
 
 #if M68K_MONITOR_PC
@@ -511,8 +601,32 @@
 /* Address error */
 #if M68K_EMULATE_ADDRESS_ERROR
 	#include <setjmp.h>
-	extern jmp_buf m68ki_aerr_trap;
 
+/* sigjmp() on Mac OS X and *BSD in general saves signal contexts and is super-slow, use sigsetjmp() to tell it not to */
+#ifdef _BSD_SETJMP_H
+extern sigjmp_buf m68ki_aerr_trap;
+#define m68ki_set_address_error_trap(m68k) \
+	if(sigsetjmp(m68ki_aerr_trap, 0) != 0) \
+	{ \
+		m68ki_exception_address_error(m68k); \
+		if(m68ki_stopped) \
+		{ \
+			if (m68ki_remaining_cycles > 0) \
+				m68ki_remaining_cycles = 0; \
+			return m68ki_initial_cycles; \
+		} \
+	}
+
+#define m68ki_check_address_error(ADDR, WRITE_MODE, FC) \
+	if((ADDR)&1) \
+	{ \
+		m68ki_aerr_address = ADDR; \
+		m68ki_aerr_write_mode = WRITE_MODE; \
+		m68ki_aerr_fc = FC; \
+		siglongjmp(m68ki_aerr_trap, 1); \
+	}
+#else
+extern jmp_buf m68ki_aerr_trap;
 	#define m68ki_set_address_error_trap() \
 		if(setjmp(m68ki_aerr_trap) != 0) \
 		{ \
@@ -520,7 +634,6 @@
 			if(CPU_STOPPED) \
 			{ \
 				SET_CYCLES(0); \
-				CPU_INT_CYCLES = 0; \
 				return m68ki_initial_cycles; \
 			} \
 			/* ensure we don't re-enter execution loop after an
@@ -540,16 +653,24 @@
 			m68ki_aerr_fc = FC; \
 			longjmp(m68ki_aerr_trap, 1); \
 		}
+#endif
+
+	#define m68ki_check_address_error_010_less(ADDR, WRITE_MODE, FC) \
+		if (CPU_TYPE_IS_010_LESS(CPU_TYPE)) \
+		{ \
+			m68ki_check_address_error(ADDR, WRITE_MODE, FC) \
+		}
 #else
 	#define m68ki_set_address_error_trap()
 	#define m68ki_check_address_error(ADDR, WRITE_MODE, FC)
+	#define m68ki_check_address_error_010_less(ADDR, WRITE_MODE, FC)
 #endif /* M68K_ADDRESS_ERROR */
 
 /* Logging */
 #if M68K_LOG_ENABLE
 	#include <stdio.h>
 	extern FILE* M68K_LOG_FILEHANDLE
-	extern char* m68ki_cpu_names[];
+	extern const char *const m68ki_cpu_names[];
 
 	#define M68K_DO_LOG(A) if(M68K_LOG_FILEHANDLE) fprintf A
 	#if M68K_LOG_1010_1111
@@ -774,9 +895,6 @@
 #define m68ki_write_32_pd(A, V) m68ki_write_32_fc(A, FLAG_S | FUNCTION_CODE_USER_DATA, V)
 #endif
 
-/* map read immediate 8 to read immediate 16 */
-#define m68ki_read_imm_8() MASK_OUT_ABOVE_8(m68ki_read_imm_16())
-
 /* Map PC-relative reads */
 #define m68ki_read_pcrel_8(A) m68k_read_pcrelative_8(A)
 #define m68ki_read_pcrel_16(A) m68k_read_pcrelative_16(A)
@@ -798,10 +916,18 @@
 /* =============================== PROTOTYPES ============================= */
 /* ======================================================================== */
 
+typedef union
+{
+	uint64 i;
+	double f;
+} fp_reg;
+
 typedef struct
 {
-	uint cpu_type;     /* CPU Type: 68000, 68010, 68EC020, or 68020 */
+	uint cpu_type;     /* CPU Type: 68000, 68008, 68010, 68EC020, 68020, 68EC030, 68030, 68EC040, or 68040 */
 	uint dar[16];      /* Data and Address Registers */
+	uint dar_save[16];  /* Saved Data and Address Registers (pushed onto the
+						   stack when a bus error occurs)*/
 	uint ppc;		   /* Previous program counter */
 	uint pc;           /* Program Counter */
 	uint sp[7];        /* User, Interrupt, and Master Stack Pointers */
@@ -811,6 +937,10 @@ typedef struct
 	uint cacr;         /* Cache Control Register (m68020, unemulated) */
 	uint caar;         /* Cache Address Register (m68020, unemulated) */
 	uint ir;           /* Instruction Register */
+	floatx80 fpr[8];     /* FPU Data Register (m68030/040) */
+	uint fpiar;        /* FPU Instruction Address Register (m68040) */
+	uint fpsr;         /* FPU Status Register (m68040) */
+	uint fpcr;         /* FPU Control Register (m68040) */
 	uint t1_flag;      /* Trace 1 */
 	uint t0_flag;      /* Trace 0 */
 	uint s_flag;       /* Supervisor */
@@ -822,7 +952,6 @@ typedef struct
 	uint c_flag;       /* Carry */
 	uint int_mask;     /* I0-I2 */
 	uint int_level;    /* State of interrupt pins IPL0-IPL2 -- ASG: changed from ints_pending */
-	uint int_cycles;   /* ASG: extra cycles from generated interrupts */
 	uint stopped;      /* Stopped state */
 	uint pref_addr;    /* Last prefetch address */
 	uint pref_data;    /* Data in the prefetch queue */
@@ -830,6 +959,10 @@ typedef struct
 	uint sr_mask;      /* Implemented status register bits */
 	uint instr_mode;   /* Stores whether we are in instruction mode or group 0/1 exception mode */
 	uint run_mode;     /* Stores whether we are processing a reset, bus error, address error, or something else */
+	int    has_pmmu;     /* Indicates if a PMMU available (yes on 030, 040, no on EC030) */
+	int    pmmu_enabled; /* Indicates if the PMMU is enabled */
+	int    fpu_just_reset; /* Indicates the FPU was just reset */
+	uint reset_cycles;
 
 	/* Clocks required for instructions / exceptions */
 	uint cyc_bcc_notake_b;
@@ -841,16 +974,31 @@ typedef struct
 	uint cyc_movem_l;
 	uint cyc_shift;
 	uint cyc_reset;
-	uint8* cyc_instruction;
-	uint8* cyc_exception;
+
+	/* Virtual IRQ lines state */
+	uint virq_state;
+	uint nmi_pending;
+
+	/* PMMU registers */
+	uint mmu_crp_aptr, mmu_crp_limit;
+	uint mmu_srp_aptr, mmu_srp_limit;
+	uint mmu_tc;
+	uint16 mmu_sr;
+
+	const uint8* cyc_instruction;
+	const uint8* cyc_exception;
 
 	/* Callbacks to host */
 	int  (*int_ack_callback)(int int_line);           /* Interrupt Acknowledge */
 	void (*bkpt_ack_callback)(unsigned int data);     /* Breakpoint Acknowledge */
 	void (*reset_instr_callback)(void);               /* Called when a RESET instruction is encountered */
+ 	void (*cmpild_instr_callback)(unsigned int, int); /* Called when a CMPI.L #v, Dn instruction is encountered */
+ 	void (*rte_instr_callback)(void);                 /* Called when a RTE instruction is encountered */
+	int  (*tas_instr_callback)(void);                 /* Called when a TAS instruction is encountered, allows / disallows writeback */
+	int  (*illg_instr_callback)(int);                 /* Called when an illegal instruction is encountered, allows handling */
 	void (*pc_changed_callback)(unsigned int new_pc); /* Called when the PC changes by a large amount */
 	void (*set_fc_callback)(unsigned int new_fc);     /* Called when the CPU function code changes */
-	void (*instr_hook_callback)(void);                /* Called every instruction cycle prior to execution */
+	void (*instr_hook_callback)(unsigned int pc);     /* Called every instruction cycle prior to execution */
 
 } m68ki_cpu_core;
 
@@ -858,132 +1006,22 @@ typedef struct
 extern m68ki_cpu_core m68ki_cpu;
 extern sint           m68ki_remaining_cycles;
 extern uint           m68ki_tracing;
-extern uint8          m68ki_shift_8_table[];
-extern uint16         m68ki_shift_16_table[];
-extern uint           m68ki_shift_32_table[];
-extern uint8          m68ki_exception_cycle_table[][256];
+extern const uint8    m68ki_shift_8_table[];
+extern const uint16   m68ki_shift_16_table[];
+extern const uint     m68ki_shift_32_table[];
+extern const uint8    m68ki_exception_cycle_table[][256];
 extern uint           m68ki_address_space;
-extern uint8          m68ki_ea_idx_cycle_table[];
+extern const uint8    m68ki_ea_idx_cycle_table[];
 
 extern uint           m68ki_aerr_address;
 extern uint           m68ki_aerr_write_mode;
 extern uint           m68ki_aerr_fc;
 
-/* Read data immediately after the program counter */
-INLINE uint m68ki_read_imm_16(void);
-INLINE uint m68ki_read_imm_32(void);
-
-/* Read data with specific function code */
-INLINE uint m68ki_read_8_fc  (uint address, uint fc);
-INLINE uint m68ki_read_16_fc (uint address, uint fc);
-INLINE uint m68ki_read_32_fc (uint address, uint fc);
-
-/* Write data with specific function code */
-INLINE void m68ki_write_8_fc (uint address, uint fc, uint value);
-INLINE void m68ki_write_16_fc(uint address, uint fc, uint value);
-INLINE void m68ki_write_32_fc(uint address, uint fc, uint value);
-#if M68K_SIMULATE_PD_WRITES
-INLINE void m68ki_write_32_pd_fc(uint address, uint fc, uint value);
-#endif /* M68K_SIMULATE_PD_WRITES */
-
-/* Indexed and PC-relative ea fetching */
-INLINE uint m68ki_get_ea_pcdi(void);
-INLINE uint m68ki_get_ea_pcix(void);
-INLINE uint m68ki_get_ea_ix(uint An);
-
-/* Operand fetching */
-INLINE uint OPER_AY_AI_8(void);
-INLINE uint OPER_AY_AI_16(void);
-INLINE uint OPER_AY_AI_32(void);
-INLINE uint OPER_AY_PI_8(void);
-INLINE uint OPER_AY_PI_16(void);
-INLINE uint OPER_AY_PI_32(void);
-INLINE uint OPER_AY_PD_8(void);
-INLINE uint OPER_AY_PD_16(void);
-INLINE uint OPER_AY_PD_32(void);
-INLINE uint OPER_AY_DI_8(void);
-INLINE uint OPER_AY_DI_16(void);
-INLINE uint OPER_AY_DI_32(void);
-INLINE uint OPER_AY_IX_8(void);
-INLINE uint OPER_AY_IX_16(void);
-INLINE uint OPER_AY_IX_32(void);
-
-INLINE uint OPER_AX_AI_8(void);
-INLINE uint OPER_AX_AI_16(void);
-INLINE uint OPER_AX_AI_32(void);
-INLINE uint OPER_AX_PI_8(void);
-INLINE uint OPER_AX_PI_16(void);
-INLINE uint OPER_AX_PI_32(void);
-INLINE uint OPER_AX_PD_8(void);
-INLINE uint OPER_AX_PD_16(void);
-INLINE uint OPER_AX_PD_32(void);
-INLINE uint OPER_AX_DI_8(void);
-INLINE uint OPER_AX_DI_16(void);
-INLINE uint OPER_AX_DI_32(void);
-INLINE uint OPER_AX_IX_8(void);
-INLINE uint OPER_AX_IX_16(void);
-INLINE uint OPER_AX_IX_32(void);
-
-INLINE uint OPER_A7_PI_8(void);
-INLINE uint OPER_A7_PD_8(void);
-
-INLINE uint OPER_AW_8(void);
-INLINE uint OPER_AW_16(void);
-INLINE uint OPER_AW_32(void);
-INLINE uint OPER_AL_8(void);
-INLINE uint OPER_AL_16(void);
-INLINE uint OPER_AL_32(void);
-INLINE uint OPER_PCDI_8(void);
-INLINE uint OPER_PCDI_16(void);
-INLINE uint OPER_PCDI_32(void);
-INLINE uint OPER_PCIX_8(void);
-INLINE uint OPER_PCIX_16(void);
-INLINE uint OPER_PCIX_32(void);
-
-/* Stack operations */
-INLINE void m68ki_push_16(uint value);
-INLINE void m68ki_push_32(uint value);
-INLINE uint m68ki_pull_16(void);
-INLINE uint m68ki_pull_32(void);
-
-/* Program flow operations */
-INLINE void m68ki_jump(uint new_pc);
-INLINE void m68ki_jump_vector(uint vector);
-INLINE void m68ki_branch_8(uint offset);
-INLINE void m68ki_branch_16(uint offset);
-INLINE void m68ki_branch_32(uint offset);
-
-/* Status register operations. */
-INLINE void m68ki_set_s_flag(uint value);            /* Only bit 2 of value should be set (i.e. 4 or 0) */
-INLINE void m68ki_set_sm_flag(uint value);           /* only bits 1 and 2 of value should be set */
-INLINE void m68ki_set_ccr(uint value);               /* set the condition code register */
-INLINE void m68ki_set_sr(uint value);                /* set the status register */
-INLINE void m68ki_set_sr_noint(uint value);          /* set the status register */
-
-/* Exception processing */
-INLINE uint m68ki_init_exception(void);              /* Initial exception processing */
-
-INLINE void m68ki_stack_frame_3word(uint pc, uint sr); /* Stack various frame types */
-INLINE void m68ki_stack_frame_buserr(uint sr);
-
-INLINE void m68ki_stack_frame_0000(uint pc, uint sr, uint vector);
-INLINE void m68ki_stack_frame_0001(uint pc, uint sr, uint vector);
-INLINE void m68ki_stack_frame_0010(uint sr, uint vector);
-INLINE void m68ki_stack_frame_1000(uint pc, uint sr, uint vector);
-INLINE void m68ki_stack_frame_1010(uint sr, uint vector, uint pc);
-INLINE void m68ki_stack_frame_1011(uint sr, uint vector, uint pc);
-
-INLINE void m68ki_exception_trap(uint vector);
-INLINE void m68ki_exception_trapN(uint vector);
-INLINE void m68ki_exception_trace(void);
-INLINE void m68ki_exception_privilege_violation(void);
-INLINE void m68ki_exception_1010(void);
-INLINE void m68ki_exception_1111(void);
-INLINE void m68ki_exception_illegal(void);
-INLINE void m68ki_exception_format_error(void);
-INLINE void m68ki_exception_address_error(void);
-INLINE void m68ki_exception_interrupt(uint int_level);
-INLINE void m68ki_check_interrupts(void);            /* ASG: check for interrupts */
+/* Forward declarations to keep some of the macros happy */
+static inline uint m68ki_read_16_fc (uint address, uint fc);
+static inline uint m68ki_read_32_fc (uint address, uint fc);
+static inline uint m68ki_get_ea_ix(uint An);
+static inline void m68ki_check_interrupts(void);            /* ASG: check for interrupts */
 
 /* quick disassembly (used for logging) */
 char* m68ki_disassemble_quick(unsigned int pc, unsigned int cpu_type);
@@ -996,59 +1034,93 @@ char* m68ki_disassemble_quick(unsigned int pc, unsigned int cpu_type);
 
 /* ---------------------------- Read Immediate ---------------------------- */
 
+extern uint pmmu_translate_addr(uint addr_in);
+
 /* Handles all immediate reads, does address error check, function code setting,
  * and prefetching if they are enabled in m68kconf.h
  */
-INLINE uint m68ki_read_imm_16(void)
+static inline uint m68ki_read_imm_16(void)
 {
 	m68ki_set_fc(FLAG_S | FUNCTION_CODE_USER_PROGRAM); /* auto-disable (see m68kcpu.h) */
 	m68ki_check_address_error(REG_PC, MODE_READ, FLAG_S | FUNCTION_CODE_USER_PROGRAM); /* auto-disable (see m68kcpu.h) */
+
+#if M68K_SEPARATE_READS
+#if M68K_EMULATE_PMMU
+	if (PMMU_ENABLED)
+	    address = pmmu_translate_addr(address);
+#endif
+#endif
+
 #if M68K_EMULATE_PREFETCH
-	if(MASK_OUT_BELOW_2(REG_PC) != CPU_PREF_ADDR)
+{
+	uint result;
+	if(REG_PC != CPU_PREF_ADDR)
 	{
-		CPU_PREF_ADDR = MASK_OUT_BELOW_2(REG_PC);
-		CPU_PREF_DATA = m68k_read_immediate_32(ADDRESS_68K(CPU_PREF_ADDR));
+		CPU_PREF_ADDR = REG_PC;
+		CPU_PREF_DATA = m68k_read_immediate_16(ADDRESS_68K(CPU_PREF_ADDR));
 	}
+	result = MASK_OUT_ABOVE_16(CPU_PREF_DATA);
 	REG_PC += 2;
-	return MASK_OUT_ABOVE_16(CPU_PREF_DATA >> ((2-((REG_PC-2)&2))<<3));
+	CPU_PREF_ADDR = REG_PC;
+	CPU_PREF_DATA = m68k_read_immediate_16(ADDRESS_68K(CPU_PREF_ADDR));
+	return result;
+}
 #else
 	REG_PC += 2;
 	return m68k_read_immediate_16(ADDRESS_68K(REG_PC-2));
 #endif /* M68K_EMULATE_PREFETCH */
 }
-INLINE uint m68ki_read_imm_32(void)
+
+static inline uint m68ki_read_imm_8(void)
 {
+	/* map read immediate 8 to read immediate 16 */
+	return MASK_OUT_ABOVE_8(m68ki_read_imm_16());
+}
+
+static inline uint m68ki_read_imm_32(void)
+{
+#if M68K_SEPARATE_READS
+#if M68K_EMULATE_PMMU
+	if (PMMU_ENABLED)
+	    address = pmmu_translate_addr(address);
+#endif
+#endif
+
 #if M68K_EMULATE_PREFETCH
 	uint temp_val;
 
+        printf("(%s) now at line %d\n", __FILE__, __LINE__);
+
 	m68ki_set_fc(FLAG_S | FUNCTION_CODE_USER_PROGRAM); /* auto-disable (see m68kcpu.h) */
 	m68ki_check_address_error(REG_PC, MODE_READ, FLAG_S | FUNCTION_CODE_USER_PROGRAM); /* auto-disable (see m68kcpu.h) */
-	if(MASK_OUT_BELOW_2(REG_PC) != CPU_PREF_ADDR)
+
+        printf("(%s) now at line %d\n", __FILE__, __LINE__);
+
+        if(REG_PC != CPU_PREF_ADDR)
 	{
-		CPU_PREF_ADDR = MASK_OUT_BELOW_2(REG_PC);
-		CPU_PREF_DATA = m68k_read_immediate_32(ADDRESS_68K(CPU_PREF_ADDR));
+		CPU_PREF_ADDR = REG_PC;
+		CPU_PREF_DATA = m68k_read_immediate_16(ADDRESS_68K(CPU_PREF_ADDR));
 	}
-	temp_val = CPU_PREF_DATA;
+	temp_val = MASK_OUT_ABOVE_16(CPU_PREF_DATA);
 	REG_PC += 2;
-	if(MASK_OUT_BELOW_2(REG_PC) != CPU_PREF_ADDR)
-	{
-		CPU_PREF_ADDR = MASK_OUT_BELOW_2(REG_PC);
-		CPU_PREF_DATA = m68k_read_immediate_32(ADDRESS_68K(CPU_PREF_ADDR));
-		temp_val = MASK_OUT_ABOVE_32((temp_val << 16) | (CPU_PREF_DATA >> 16));
-	}
+	CPU_PREF_ADDR = REG_PC;
+	CPU_PREF_DATA = m68k_read_immediate_16(ADDRESS_68K(CPU_PREF_ADDR));
+
+	temp_val = MASK_OUT_ABOVE_32((temp_val << 16) | MASK_OUT_ABOVE_16(CPU_PREF_DATA));
 	REG_PC += 2;
+	CPU_PREF_ADDR = REG_PC;
+	CPU_PREF_DATA = m68k_read_immediate_16(ADDRESS_68K(CPU_PREF_ADDR));
 
 	return temp_val;
 #else
 	m68ki_set_fc(FLAG_S | FUNCTION_CODE_USER_PROGRAM); /* auto-disable (see m68kcpu.h) */
 	m68ki_check_address_error(REG_PC, MODE_READ, FLAG_S | FUNCTION_CODE_USER_PROGRAM); /* auto-disable (see m68kcpu.h) */
 	REG_PC += 4;
-	return m68k_read_immediate_32(ADDRESS_68K(REG_PC-4));
+        //printf("(%s) now at line %d\n", __FILE__, __LINE__);
+        return m68k_read_immediate_32(ADDRESS_68K(REG_PC-4));
 #endif /* M68K_EMULATE_PREFETCH */
 }
 
-
-
 /* ------------------------- Top level read/write ------------------------- */
 
 /* Handles all memory accesses (except for immediate reads if they are
@@ -1057,58 +1129,106 @@ INLINE uint m68ki_read_imm_32(void)
  * These functions will also check for address error and set the function
  * code if they are enabled in m68kconf.h.
  */
-INLINE uint m68ki_read_8_fc(uint address, uint fc)
+static inline uint m68ki_read_8_fc(uint address, uint fc)
 {
+	(void)fc;
 	m68ki_set_fc(fc); /* auto-disable (see m68kcpu.h) */
+
+#if M68K_EMULATE_PMMU
+	if (PMMU_ENABLED)
+	    address = pmmu_translate_addr(address);
+#endif
+
 	return m68k_read_memory_8(ADDRESS_68K(address));
 }
-INLINE uint m68ki_read_16_fc(uint address, uint fc)
+static inline uint m68ki_read_16_fc(uint address, uint fc)
 {
+	(void)fc;
 	m68ki_set_fc(fc); /* auto-disable (see m68kcpu.h) */
-	m68ki_check_address_error(address, MODE_READ, fc); /* auto-disable (see m68kcpu.h) */
+	m68ki_check_address_error_010_less(address, MODE_READ, fc); /* auto-disable (see m68kcpu.h) */
+
+#if M68K_EMULATE_PMMU
+	if (PMMU_ENABLED)
+	    address = pmmu_translate_addr(address);
+#endif
+
 	return m68k_read_memory_16(ADDRESS_68K(address));
 }
-INLINE uint m68ki_read_32_fc(uint address, uint fc)
+static inline uint m68ki_read_32_fc(uint address, uint fc)
 {
+	(void)fc;
 	m68ki_set_fc(fc); /* auto-disable (see m68kcpu.h) */
-	m68ki_check_address_error(address, MODE_READ, fc); /* auto-disable (see m68kcpu.h) */
+	m68ki_check_address_error_010_less(address, MODE_READ, fc); /* auto-disable (see m68kcpu.h) */
+
+#if M68K_EMULATE_PMMU
+	if (PMMU_ENABLED)
+	    address = pmmu_translate_addr(address);
+#endif
+
 	return m68k_read_memory_32(ADDRESS_68K(address));
 }
 
-INLINE void m68ki_write_8_fc(uint address, uint fc, uint value)
+static inline void m68ki_write_8_fc(uint address, uint fc, uint value)
 {
+	(void)fc;
 	m68ki_set_fc(fc); /* auto-disable (see m68kcpu.h) */
+
+#if M68K_EMULATE_PMMU
+	if (PMMU_ENABLED)
+	    address = pmmu_translate_addr(address);
+#endif
+
 	m68k_write_memory_8(ADDRESS_68K(address), value);
 }
-INLINE void m68ki_write_16_fc(uint address, uint fc, uint value)
+static inline void m68ki_write_16_fc(uint address, uint fc, uint value)
 {
+	(void)fc;
 	m68ki_set_fc(fc); /* auto-disable (see m68kcpu.h) */
-	m68ki_check_address_error(address, MODE_WRITE, fc); /* auto-disable (see m68kcpu.h) */
+	m68ki_check_address_error_010_less(address, MODE_WRITE, fc); /* auto-disable (see m68kcpu.h) */
+
+#if M68K_EMULATE_PMMU
+	if (PMMU_ENABLED)
+	    address = pmmu_translate_addr(address);
+#endif
+
 	m68k_write_memory_16(ADDRESS_68K(address), value);
 }
-INLINE void m68ki_write_32_fc(uint address, uint fc, uint value)
+static inline void m68ki_write_32_fc(uint address, uint fc, uint value)
 {
+	(void)fc;
 	m68ki_set_fc(fc); /* auto-disable (see m68kcpu.h) */
-	m68ki_check_address_error(address, MODE_WRITE, fc); /* auto-disable (see m68kcpu.h) */
+	m68ki_check_address_error_010_less(address, MODE_WRITE, fc); /* auto-disable (see m68kcpu.h) */
+
+#if M68K_EMULATE_PMMU
+	if (PMMU_ENABLED)
+	    address = pmmu_translate_addr(address);
+#endif
+
 	m68k_write_memory_32(ADDRESS_68K(address), value);
 }
 
 #if M68K_SIMULATE_PD_WRITES
-INLINE void m68ki_write_32_pd_fc(uint address, uint fc, uint value)
+static inline void m68ki_write_32_pd_fc(uint address, uint fc, uint value)
 {
+	(void)fc;
 	m68ki_set_fc(fc); /* auto-disable (see m68kcpu.h) */
-	m68ki_check_address_error(address, MODE_WRITE, fc); /* auto-disable (see m68kcpu.h) */
+	m68ki_check_address_error_010_less(address, MODE_WRITE, fc); /* auto-disable (see m68kcpu.h) */
+
+#if M68K_EMULATE_PMMU
+	if (PMMU_ENABLED)
+	    address = pmmu_translate_addr(address);
+#endif
+
 	m68k_write_memory_32_pd(ADDRESS_68K(address), value);
 }
 #endif
 
-
 /* --------------------- Effective Address Calculation -------------------- */
 
 /* The program counter relative addressing modes cause operands to be
  * retrieved from program space, not data space.
  */
-INLINE uint m68ki_get_ea_pcdi(void)
+static inline uint m68ki_get_ea_pcdi(void)
 {
 	uint old_pc = REG_PC;
 	m68ki_use_program_space(); /* auto-disable */
@@ -1116,7 +1236,7 @@ INLINE uint m68ki_get_ea_pcdi(void)
 }
 
 
-INLINE uint m68ki_get_ea_pcix(void)
+static inline uint m68ki_get_ea_pcix(void)
 {
 	m68ki_use_program_space(); /* auto-disable */
 	return m68ki_get_ea_ix(REG_PC);
@@ -1164,7 +1284,7 @@ INLINE uint m68ki_get_ea_pcix(void)
  * 1  011  mem indir with long outer
  * 1  100-111  reserved
  */
-INLINE uint m68ki_get_ea_ix(uint An)
+static inline uint m68ki_get_ea_ix(uint An)
 {
 	/* An = base register */
 	uint extension = m68ki_read_imm_16();
@@ -1217,7 +1337,7 @@ INLINE uint m68ki_get_ea_ix(uint An)
 
 	/* Check if base displacement is present */
 	if(BIT_5(extension))                /* BD SIZE */
-		bd = BIT_4(extension) ? m68ki_read_imm_32() : MAKE_INT_16(m68ki_read_imm_16());
+		bd = BIT_4(extension) ? m68ki_read_imm_32() : (uint32)MAKE_INT_16(m68ki_read_imm_16());
 
 	/* If no indirect action, we are done */
 	if(!(extension&7))                  /* No Memory Indirect */
@@ -1225,7 +1345,7 @@ INLINE uint m68ki_get_ea_ix(uint An)
 
 	/* Check if outer displacement is present */
 	if(BIT_1(extension))                /* I/IS:  od */
-		od = BIT_0(extension) ? m68ki_read_imm_32() : MAKE_INT_16(m68ki_read_imm_16());
+		od = BIT_0(extension) ? m68ki_read_imm_32() : (uint32)MAKE_INT_16(m68ki_read_imm_16());
 
 	/* Postindex */
 	if(BIT_2(extension))                /* I/IS:  0 = preindex, 1 = postindex */
@@ -1237,78 +1357,78 @@ INLINE uint m68ki_get_ea_ix(uint An)
 
 
 /* Fetch operands */
-INLINE uint OPER_AY_AI_8(void)  {uint ea = EA_AY_AI_8();  return m68ki_read_8(ea); }
-INLINE uint OPER_AY_AI_16(void) {uint ea = EA_AY_AI_16(); return m68ki_read_16(ea);}
-INLINE uint OPER_AY_AI_32(void) {uint ea = EA_AY_AI_32(); return m68ki_read_32(ea);}
-INLINE uint OPER_AY_PI_8(void)  {uint ea = EA_AY_PI_8();  return m68ki_read_8(ea); }
-INLINE uint OPER_AY_PI_16(void) {uint ea = EA_AY_PI_16(); return m68ki_read_16(ea);}
-INLINE uint OPER_AY_PI_32(void) {uint ea = EA_AY_PI_32(); return m68ki_read_32(ea);}
-INLINE uint OPER_AY_PD_8(void)  {uint ea = EA_AY_PD_8();  return m68ki_read_8(ea); }
-INLINE uint OPER_AY_PD_16(void) {uint ea = EA_AY_PD_16(); return m68ki_read_16(ea);}
-INLINE uint OPER_AY_PD_32(void) {uint ea = EA_AY_PD_32(); return m68ki_read_32(ea);}
-INLINE uint OPER_AY_DI_8(void)  {uint ea = EA_AY_DI_8();  return m68ki_read_8(ea); }
-INLINE uint OPER_AY_DI_16(void) {uint ea = EA_AY_DI_16(); return m68ki_read_16(ea);}
-INLINE uint OPER_AY_DI_32(void) {uint ea = EA_AY_DI_32(); return m68ki_read_32(ea);}
-INLINE uint OPER_AY_IX_8(void)  {uint ea = EA_AY_IX_8();  return m68ki_read_8(ea); }
-INLINE uint OPER_AY_IX_16(void) {uint ea = EA_AY_IX_16(); return m68ki_read_16(ea);}
-INLINE uint OPER_AY_IX_32(void) {uint ea = EA_AY_IX_32(); return m68ki_read_32(ea);}
+static inline uint OPER_AY_AI_8(void)  {uint ea = EA_AY_AI_8();  return m68ki_read_8(ea); }
+static inline uint OPER_AY_AI_16(void) {uint ea = EA_AY_AI_16(); return m68ki_read_16(ea);}
+static inline uint OPER_AY_AI_32(void) {uint ea = EA_AY_AI_32(); return m68ki_read_32(ea);}
+static inline uint OPER_AY_PI_8(void)  {uint ea = EA_AY_PI_8();  return m68ki_read_8(ea); }
+static inline uint OPER_AY_PI_16(void) {uint ea = EA_AY_PI_16(); return m68ki_read_16(ea);}
+static inline uint OPER_AY_PI_32(void) {uint ea = EA_AY_PI_32(); return m68ki_read_32(ea);}
+static inline uint OPER_AY_PD_8(void)  {uint ea = EA_AY_PD_8();  return m68ki_read_8(ea); }
+static inline uint OPER_AY_PD_16(void) {uint ea = EA_AY_PD_16(); return m68ki_read_16(ea);}
+static inline uint OPER_AY_PD_32(void) {uint ea = EA_AY_PD_32(); return m68ki_read_32(ea);}
+static inline uint OPER_AY_DI_8(void)  {uint ea = EA_AY_DI_8();  return m68ki_read_8(ea); }
+static inline uint OPER_AY_DI_16(void) {uint ea = EA_AY_DI_16(); return m68ki_read_16(ea);}
+static inline uint OPER_AY_DI_32(void) {uint ea = EA_AY_DI_32(); return m68ki_read_32(ea);}
+static inline uint OPER_AY_IX_8(void)  {uint ea = EA_AY_IX_8();  return m68ki_read_8(ea); }
+static inline uint OPER_AY_IX_16(void) {uint ea = EA_AY_IX_16(); return m68ki_read_16(ea);}
+static inline uint OPER_AY_IX_32(void) {uint ea = EA_AY_IX_32(); return m68ki_read_32(ea);}
 
-INLINE uint OPER_AX_AI_8(void)  {uint ea = EA_AX_AI_8();  return m68ki_read_8(ea); }
-INLINE uint OPER_AX_AI_16(void) {uint ea = EA_AX_AI_16(); return m68ki_read_16(ea);}
-INLINE uint OPER_AX_AI_32(void) {uint ea = EA_AX_AI_32(); return m68ki_read_32(ea);}
-INLINE uint OPER_AX_PI_8(void)  {uint ea = EA_AX_PI_8();  return m68ki_read_8(ea); }
-INLINE uint OPER_AX_PI_16(void) {uint ea = EA_AX_PI_16(); return m68ki_read_16(ea);}
-INLINE uint OPER_AX_PI_32(void) {uint ea = EA_AX_PI_32(); return m68ki_read_32(ea);}
-INLINE uint OPER_AX_PD_8(void)  {uint ea = EA_AX_PD_8();  return m68ki_read_8(ea); }
-INLINE uint OPER_AX_PD_16(void) {uint ea = EA_AX_PD_16(); return m68ki_read_16(ea);}
-INLINE uint OPER_AX_PD_32(void) {uint ea = EA_AX_PD_32(); return m68ki_read_32(ea);}
-INLINE uint OPER_AX_DI_8(void)  {uint ea = EA_AX_DI_8();  return m68ki_read_8(ea); }
-INLINE uint OPER_AX_DI_16(void) {uint ea = EA_AX_DI_16(); return m68ki_read_16(ea);}
-INLINE uint OPER_AX_DI_32(void) {uint ea = EA_AX_DI_32(); return m68ki_read_32(ea);}
-INLINE uint OPER_AX_IX_8(void)  {uint ea = EA_AX_IX_8();  return m68ki_read_8(ea); }
-INLINE uint OPER_AX_IX_16(void) {uint ea = EA_AX_IX_16(); return m68ki_read_16(ea);}
-INLINE uint OPER_AX_IX_32(void) {uint ea = EA_AX_IX_32(); return m68ki_read_32(ea);}
+static inline uint OPER_AX_AI_8(void)  {uint ea = EA_AX_AI_8();  return m68ki_read_8(ea); }
+static inline uint OPER_AX_AI_16(void) {uint ea = EA_AX_AI_16(); return m68ki_read_16(ea);}
+static inline uint OPER_AX_AI_32(void) {uint ea = EA_AX_AI_32(); return m68ki_read_32(ea);}
+static inline uint OPER_AX_PI_8(void)  {uint ea = EA_AX_PI_8();  return m68ki_read_8(ea); }
+static inline uint OPER_AX_PI_16(void) {uint ea = EA_AX_PI_16(); return m68ki_read_16(ea);}
+static inline uint OPER_AX_PI_32(void) {uint ea = EA_AX_PI_32(); return m68ki_read_32(ea);}
+static inline uint OPER_AX_PD_8(void)  {uint ea = EA_AX_PD_8();  return m68ki_read_8(ea); }
+static inline uint OPER_AX_PD_16(void) {uint ea = EA_AX_PD_16(); return m68ki_read_16(ea);}
+static inline uint OPER_AX_PD_32(void) {uint ea = EA_AX_PD_32(); return m68ki_read_32(ea);}
+static inline uint OPER_AX_DI_8(void)  {uint ea = EA_AX_DI_8();  return m68ki_read_8(ea); }
+static inline uint OPER_AX_DI_16(void) {uint ea = EA_AX_DI_16(); return m68ki_read_16(ea);}
+static inline uint OPER_AX_DI_32(void) {uint ea = EA_AX_DI_32(); return m68ki_read_32(ea);}
+static inline uint OPER_AX_IX_8(void)  {uint ea = EA_AX_IX_8();  return m68ki_read_8(ea); }
+static inline uint OPER_AX_IX_16(void) {uint ea = EA_AX_IX_16(); return m68ki_read_16(ea);}
+static inline uint OPER_AX_IX_32(void) {uint ea = EA_AX_IX_32(); return m68ki_read_32(ea);}
 
-INLINE uint OPER_A7_PI_8(void)  {uint ea = EA_A7_PI_8();  return m68ki_read_8(ea); }
-INLINE uint OPER_A7_PD_8(void)  {uint ea = EA_A7_PD_8();  return m68ki_read_8(ea); }
+static inline uint OPER_A7_PI_8(void)  {uint ea = EA_A7_PI_8();  return m68ki_read_8(ea); }
+static inline uint OPER_A7_PD_8(void)  {uint ea = EA_A7_PD_8();  return m68ki_read_8(ea); }
 
-INLINE uint OPER_AW_8(void)     {uint ea = EA_AW_8();     return m68ki_read_8(ea); }
-INLINE uint OPER_AW_16(void)    {uint ea = EA_AW_16();    return m68ki_read_16(ea);}
-INLINE uint OPER_AW_32(void)    {uint ea = EA_AW_32();    return m68ki_read_32(ea);}
-INLINE uint OPER_AL_8(void)     {uint ea = EA_AL_8();     return m68ki_read_8(ea); }
-INLINE uint OPER_AL_16(void)    {uint ea = EA_AL_16();    return m68ki_read_16(ea);}
-INLINE uint OPER_AL_32(void)    {uint ea = EA_AL_32();    return m68ki_read_32(ea);}
-INLINE uint OPER_PCDI_8(void)   {uint ea = EA_PCDI_8();   return m68ki_read_pcrel_8(ea); }
-INLINE uint OPER_PCDI_16(void)  {uint ea = EA_PCDI_16();  return m68ki_read_pcrel_16(ea);}
-INLINE uint OPER_PCDI_32(void)  {uint ea = EA_PCDI_32();  return m68ki_read_pcrel_32(ea);}
-INLINE uint OPER_PCIX_8(void)   {uint ea = EA_PCIX_8();   return m68ki_read_pcrel_8(ea); }
-INLINE uint OPER_PCIX_16(void)  {uint ea = EA_PCIX_16();  return m68ki_read_pcrel_16(ea);}
-INLINE uint OPER_PCIX_32(void)  {uint ea = EA_PCIX_32();  return m68ki_read_pcrel_32(ea);}
+static inline uint OPER_AW_8(void)     {uint ea = EA_AW_8();     return m68ki_read_8(ea); }
+static inline uint OPER_AW_16(void)    {uint ea = EA_AW_16();    return m68ki_read_16(ea);}
+static inline uint OPER_AW_32(void)    {uint ea = EA_AW_32();    return m68ki_read_32(ea);}
+static inline uint OPER_AL_8(void)     {uint ea = EA_AL_8();     return m68ki_read_8(ea); }
+static inline uint OPER_AL_16(void)    {uint ea = EA_AL_16();    return m68ki_read_16(ea);}
+static inline uint OPER_AL_32(void)    {uint ea = EA_AL_32();    return m68ki_read_32(ea);}
+static inline uint OPER_PCDI_8(void)   {uint ea = EA_PCDI_8();   return m68ki_read_pcrel_8(ea); }
+static inline uint OPER_PCDI_16(void)  {uint ea = EA_PCDI_16();  return m68ki_read_pcrel_16(ea);}
+static inline uint OPER_PCDI_32(void)  {uint ea = EA_PCDI_32();  return m68ki_read_pcrel_32(ea);}
+static inline uint OPER_PCIX_8(void)   {uint ea = EA_PCIX_8();   return m68ki_read_pcrel_8(ea); }
+static inline uint OPER_PCIX_16(void)  {uint ea = EA_PCIX_16();  return m68ki_read_pcrel_16(ea);}
+static inline uint OPER_PCIX_32(void)  {uint ea = EA_PCIX_32();  return m68ki_read_pcrel_32(ea);}
 
 
 /* ---------------------------- Stack Functions --------------------------- */
 
 /* Push/pull data from the stack */
-INLINE void m68ki_push_16(uint value)
+static inline void m68ki_push_16(uint value)
 {
 	REG_SP = MASK_OUT_ABOVE_32(REG_SP - 2);
 	m68ki_write_16(REG_SP, value);
 }
 
-INLINE void m68ki_push_32(uint value)
+static inline void m68ki_push_32(uint value)
 {
 	REG_SP = MASK_OUT_ABOVE_32(REG_SP - 4);
 	m68ki_write_32(REG_SP, value);
 }
 
-INLINE uint m68ki_pull_16(void)
+static inline uint m68ki_pull_16(void)
 {
 	REG_SP = MASK_OUT_ABOVE_32(REG_SP + 2);
 	return m68ki_read_16(REG_SP-2);
 }
 
-INLINE uint m68ki_pull_32(void)
+static inline uint m68ki_pull_32(void)
 {
 	REG_SP = MASK_OUT_ABOVE_32(REG_SP + 4);
 	return m68ki_read_32(REG_SP-4);
@@ -1318,22 +1438,22 @@ INLINE uint m68ki_pull_32(void)
 /* Increment/decrement the stack as if doing a push/pull but
  * don't do any memory access.
  */
-INLINE void m68ki_fake_push_16(void)
+static inline void m68ki_fake_push_16(void)
 {
 	REG_SP = MASK_OUT_ABOVE_32(REG_SP - 2);
 }
 
-INLINE void m68ki_fake_push_32(void)
+static inline void m68ki_fake_push_32(void)
 {
 	REG_SP = MASK_OUT_ABOVE_32(REG_SP - 4);
 }
 
-INLINE void m68ki_fake_pull_16(void)
+static inline void m68ki_fake_pull_16(void)
 {
 	REG_SP = MASK_OUT_ABOVE_32(REG_SP + 2);
 }
 
-INLINE void m68ki_fake_pull_32(void)
+static inline void m68ki_fake_pull_32(void)
 {
 	REG_SP = MASK_OUT_ABOVE_32(REG_SP + 4);
 }
@@ -1345,13 +1465,13 @@ INLINE void m68ki_fake_pull_32(void)
  * These functions will also call the pc_changed callback if it was enabled
  * in m68kconf.h.
  */
-INLINE void m68ki_jump(uint new_pc)
+static inline void m68ki_jump(uint new_pc)
 {
 	REG_PC = new_pc;
 	m68ki_pc_changed(REG_PC);
 }
 
-INLINE void m68ki_jump_vector(uint vector)
+static inline void m68ki_jump_vector(uint vector)
 {
 	REG_PC = (vector<<2) + REG_VBR;
 	REG_PC = m68ki_read_data_32(REG_PC);
@@ -1364,30 +1484,28 @@ INLINE void m68ki_jump_vector(uint vector)
  * So far I've found no problems with not calling pc_changed for 8 or 16
  * bit branches.
  */
-INLINE void m68ki_branch_8(uint offset)
+static inline void m68ki_branch_8(uint offset)
 {
 	REG_PC += MAKE_INT_8(offset);
 }
 
-INLINE void m68ki_branch_16(uint offset)
+static inline void m68ki_branch_16(uint offset)
 {
 	REG_PC += MAKE_INT_16(offset);
 }
 
-INLINE void m68ki_branch_32(uint offset)
+static inline void m68ki_branch_32(uint offset)
 {
 	REG_PC += offset;
 	m68ki_pc_changed(REG_PC);
 }
 
-
-
 /* ---------------------------- Status Register --------------------------- */
 
 /* Set the S flag and change the active stack pointer.
  * Note that value MUST be 4 or 0.
  */
-INLINE void m68ki_set_s_flag(uint value)
+static inline void m68ki_set_s_flag(uint value)
 {
 	/* Backup the old stack pointer */
 	REG_SP_BASE[FLAG_S | ((FLAG_S>>1) & FLAG_M)] = REG_SP;
@@ -1400,7 +1518,7 @@ INLINE void m68ki_set_s_flag(uint value)
 /* Set the S and M flags and change the active stack pointer.
  * Note that value MUST be 0, 2, 4, or 6 (bit2 = S, bit1 = M).
  */
-INLINE void m68ki_set_sm_flag(uint value)
+static inline void m68ki_set_sm_flag(uint value)
 {
 	/* Backup the old stack pointer */
 	REG_SP_BASE[FLAG_S | ((FLAG_S>>1) & FLAG_M)] = REG_SP;
@@ -1412,7 +1530,7 @@ INLINE void m68ki_set_sm_flag(uint value)
 }
 
 /* Set the S and M flags.  Don't touch the stack pointer. */
-INLINE void m68ki_set_sm_flag_nosp(uint value)
+static inline void m68ki_set_sm_flag_nosp(uint value)
 {
 	/* Set the S and M flags */
 	FLAG_S = value & SFLAG_SET;
@@ -1421,7 +1539,7 @@ INLINE void m68ki_set_sm_flag_nosp(uint value)
 
 
 /* Set the condition code register */
-INLINE void m68ki_set_ccr(uint value)
+static inline void m68ki_set_ccr(uint value)
 {
 	FLAG_X = BIT_4(value)  << 4;
 	FLAG_N = BIT_3(value)  << 4;
@@ -1431,7 +1549,7 @@ INLINE void m68ki_set_ccr(uint value)
 }
 
 /* Set the status register but don't check for interrupts */
-INLINE void m68ki_set_sr_noint(uint value)
+static inline void m68ki_set_sr_noint(uint value)
 {
 	/* Mask out the "unimplemented" bits */
 	value &= CPU_SR_MASK;
@@ -1447,7 +1565,7 @@ INLINE void m68ki_set_sr_noint(uint value)
 /* Set the status register but don't check for interrupts nor
  * change the stack pointer
  */
-INLINE void m68ki_set_sr_noint_nosp(uint value)
+static inline void m68ki_set_sr_noint_nosp(uint value)
 {
 	/* Mask out the "unimplemented" bits */
 	value &= CPU_SR_MASK;
@@ -1461,7 +1579,7 @@ INLINE void m68ki_set_sr_noint_nosp(uint value)
 }
 
 /* Set the status register and check for interrupts */
-INLINE void m68ki_set_sr(uint value)
+static inline void m68ki_set_sr(uint value)
 {
 	m68ki_set_sr_noint(value);
 	m68ki_check_interrupts();
@@ -1471,7 +1589,7 @@ INLINE void m68ki_set_sr(uint value)
 /* ------------------------- Exception Processing ------------------------- */
 
 /* Initiate exception processing */
-INLINE uint m68ki_init_exception(void)
+static inline uint m68ki_init_exception(void)
 {
 	/* Save the old status register */
 	uint sr = m68ki_get_sr();
@@ -1486,7 +1604,7 @@ INLINE uint m68ki_init_exception(void)
 }
 
 /* 3 word stack frame (68000 only) */
-INLINE void m68ki_stack_frame_3word(uint pc, uint sr)
+static inline void m68ki_stack_frame_3word(uint pc, uint sr)
 {
 	m68ki_push_32(pc);
 	m68ki_push_16(sr);
@@ -1495,7 +1613,7 @@ INLINE void m68ki_stack_frame_3word(uint pc, uint sr)
 /* Format 0 stack frame.
  * This is the standard stack frame for 68010+.
  */
-INLINE void m68ki_stack_frame_0000(uint pc, uint sr, uint vector)
+static inline void m68ki_stack_frame_0000(uint pc, uint sr, uint vector)
 {
 	/* Stack a 3-word frame if we are 68000 */
 	if(CPU_TYPE == CPU_TYPE_000)
@@ -1511,7 +1629,7 @@ INLINE void m68ki_stack_frame_0000(uint pc, uint sr, uint vector)
 /* Format 1 stack frame (68020).
  * For 68020, this is the 4 word throwaway frame.
  */
-INLINE void m68ki_stack_frame_0001(uint pc, uint sr, uint vector)
+static inline void m68ki_stack_frame_0001(uint pc, uint sr, uint vector)
 {
 	m68ki_push_16(0x1000 | (vector<<2));
 	m68ki_push_32(pc);
@@ -1521,7 +1639,7 @@ INLINE void m68ki_stack_frame_0001(uint pc, uint sr, uint vector)
 /* Format 2 stack frame.
  * This is used only by 68020 for trap exceptions.
  */
-INLINE void m68ki_stack_frame_0010(uint sr, uint vector)
+static inline void m68ki_stack_frame_0010(uint sr, uint vector)
 {
 	m68ki_push_32(REG_PPC);
 	m68ki_push_16(0x2000 | (vector<<2));
@@ -1532,7 +1650,7 @@ INLINE void m68ki_stack_frame_0010(uint sr, uint vector)
 
 /* Bus error stack frame (68000 only).
  */
-INLINE void m68ki_stack_frame_buserr(uint sr)
+static inline void m68ki_stack_frame_buserr(uint sr)
 {
 	m68ki_push_32(REG_PC);
 	m68ki_push_16(sr);
@@ -1549,7 +1667,7 @@ INLINE void m68ki_stack_frame_buserr(uint sr)
 /* Format 8 stack frame (68010).
  * 68010 only.  This is the 29 word bus/address error frame.
  */
-void m68ki_stack_frame_1000(uint pc, uint sr, uint vector)
+static inline void m68ki_stack_frame_1000(uint pc, uint sr, uint vector)
 {
 	/* VERSION
 	 * NUMBER
@@ -1603,7 +1721,7 @@ void m68ki_stack_frame_1000(uint pc, uint sr, uint vector)
  * if the error happens at an instruction boundary.
  * PC stacked is address of next instruction.
  */
-void m68ki_stack_frame_1010(uint sr, uint vector, uint pc)
+static inline void m68ki_stack_frame_1010(uint sr, uint vector, uint pc)
 {
 	/* INTERNAL REGISTER */
 	m68ki_push_16(0);
@@ -1650,7 +1768,7 @@ void m68ki_stack_frame_1010(uint sr, uint vector, uint pc)
  * if the error happens during instruction execution.
  * PC stacked is address of instruction in progress.
  */
-void m68ki_stack_frame_1011(uint sr, uint vector, uint pc)
+static inline void m68ki_stack_frame_1011(uint sr, uint vector, uint pc)
 {
 	/* INTERNAL REGISTERS (18 words) */
 	m68ki_push_32(0);
@@ -1721,7 +1839,7 @@ void m68ki_stack_frame_1011(uint sr, uint vector, uint pc)
 /* Used for Group 2 exceptions.
  * These stack a type 2 frame on the 020.
  */
-INLINE void m68ki_exception_trap(uint vector)
+static inline void m68ki_exception_trap(uint vector)
 {
 	uint sr = m68ki_init_exception();
 
@@ -1737,7 +1855,7 @@ INLINE void m68ki_exception_trap(uint vector)
 }
 
 /* Trap#n stacks a 0 frame but behaves like group2 otherwise */
-INLINE void m68ki_exception_trapN(uint vector)
+static inline void m68ki_exception_trapN(uint vector)
 {
 	uint sr = m68ki_init_exception();
 	m68ki_stack_frame_0000(REG_PC, sr, vector);
@@ -1748,7 +1866,7 @@ INLINE void m68ki_exception_trapN(uint vector)
 }
 
 /* Exception for trace mode */
-INLINE void m68ki_exception_trace(void)
+static inline void m68ki_exception_trace(void)
 {
 	uint sr = m68ki_init_exception();
 
@@ -1775,7 +1893,7 @@ INLINE void m68ki_exception_trace(void)
 }
 
 /* Exception for privilege violation */
-INLINE void m68ki_exception_privilege_violation(void)
+static inline void m68ki_exception_privilege_violation(void)
 {
 	uint sr = m68ki_init_exception();
 
@@ -1793,8 +1911,45 @@ INLINE void m68ki_exception_privilege_violation(void)
 	USE_CYCLES(CYC_EXCEPTION[EXCEPTION_PRIVILEGE_VIOLATION] - CYC_INSTRUCTION[REG_IR]);
 }
 
+extern jmp_buf m68ki_bus_error_jmp_buf;
+
+#define m68ki_check_bus_error_trap() setjmp(m68ki_bus_error_jmp_buf)
+
+/* Exception for bus error */
+static inline void m68ki_exception_bus_error(void)
+{
+	int i;
+
+	/* If we were processing a bus error, address error, or reset,
+	 * this is a catastrophic failure.
+	 * Halt the CPU
+	 */
+	if(CPU_RUN_MODE == RUN_MODE_BERR_AERR_RESET)
+	{
+m68k_read_memory_8(0x00ffff01);
+		CPU_STOPPED = STOP_LEVEL_HALT;
+		return;
+	}
+	CPU_RUN_MODE = RUN_MODE_BERR_AERR_RESET;
+
+	/* Use up some clock cycles and undo the instruction's cycles */
+	USE_CYCLES(CYC_EXCEPTION[EXCEPTION_BUS_ERROR] - CYC_INSTRUCTION[REG_IR]);
+
+	for (i = 15; i >= 0; i--){
+		REG_DA[i] = REG_DA_SAVE[i];
+	}
+
+	uint sr = m68ki_init_exception();
+	m68ki_stack_frame_1000(REG_PPC, sr, EXCEPTION_BUS_ERROR);
+
+	m68ki_jump_vector(EXCEPTION_BUS_ERROR);
+	longjmp(m68ki_bus_error_jmp_buf, 1);
+}
+
+extern int cpu_log_enabled;
+
 /* Exception for A-Line instructions */
-INLINE void m68ki_exception_1010(void)
+static inline void m68ki_exception_1010(void)
 {
 	uint sr;
 #if M68K_LOG_1010_1111 == OPT_ON
@@ -1812,7 +1967,7 @@ INLINE void m68ki_exception_1010(void)
 }
 
 /* Exception for F-Line instructions */
-INLINE void m68ki_exception_1111(void)
+static inline void m68ki_exception_1111(void)
 {
 	uint sr;
 
@@ -1830,14 +1985,20 @@ INLINE void m68ki_exception_1111(void)
 	USE_CYCLES(CYC_EXCEPTION[EXCEPTION_1111] - CYC_INSTRUCTION[REG_IR]);
 }
 
+#if M68K_ILLG_HAS_CALLBACK == OPT_SPECIFY_HANDLER
+extern int m68ki_illg_callback(int);
+#endif
+
 /* Exception for illegal instructions */
-INLINE void m68ki_exception_illegal(void)
+static inline void m68ki_exception_illegal(void)
 {
 	uint sr;
 
 	M68K_DO_LOG((M68K_LOG_FILEHANDLE "%s at %08x: illegal instruction %04x (%s)\n",
 				 m68ki_cpu_names[CPU_TYPE], ADDRESS_68K(REG_PPC), REG_IR,
 				 m68ki_disassemble_quick(ADDRESS_68K(REG_PPC))));
+	if (m68ki_illg_callback(REG_IR))
+	    return;
 
 	sr = m68ki_init_exception();
 
@@ -1856,7 +2017,7 @@ INLINE void m68ki_exception_illegal(void)
 }
 
 /* Exception for format errror in RTE */
-INLINE void m68ki_exception_format_error(void)
+static inline void m68ki_exception_format_error(void)
 {
 	uint sr = m68ki_init_exception();
 	m68ki_stack_frame_0000(REG_PC, sr, EXCEPTION_FORMAT_ERROR);
@@ -1867,7 +2028,7 @@ INLINE void m68ki_exception_format_error(void)
 }
 
 /* Exception for address error */
-INLINE void m68ki_exception_address_error(void)
+static inline void m68ki_exception_address_error(void)
 {
 	uint sr = m68ki_init_exception();
 
@@ -1888,16 +2049,16 @@ m68k_read_memory_8(0x00ffff01);
 
 	m68ki_jump_vector(EXCEPTION_ADDRESS_ERROR);
 
-	/* Use up some clock cycles. Note that we don't need to undo the 
+	/* Use up some clock cycles. Note that we don't need to undo the
 	instruction's cycles here as we've longjmp:ed directly from the
 	instruction handler without passing the part of the excecute loop
 	that deducts instruction cycles */
-	USE_CYCLES(CYC_EXCEPTION[EXCEPTION_ADDRESS_ERROR]); 
+	USE_CYCLES(CYC_EXCEPTION[EXCEPTION_ADDRESS_ERROR]);
 }
 
 
 /* Service an interrupt request and start exception processing */
-void m68ki_exception_interrupt(uint int_level)
+static inline void m68ki_exception_interrupt(uint int_level)
 {
 	uint vector;
 	uint sr;
@@ -1960,7 +2121,7 @@ void m68ki_exception_interrupt(uint int_level)
 	m68ki_jump(new_pc);
 
 	/* Defer cycle counting until later */
-	CPU_INT_CYCLES += CYC_EXCEPTION[vector];
+	USE_CYCLES(CYC_EXCEPTION[vector]);
 
 #if !M68K_EMULATE_INT_ACK
 	/* Automatically clear IRQ if we are not using an acknowledge scheme */
@@ -1970,9 +2131,14 @@ void m68ki_exception_interrupt(uint int_level)
 
 
 /* ASG: Check for interrupts */
-INLINE void m68ki_check_interrupts(void)
+static inline void m68ki_check_interrupts(void)
 {
-	if(CPU_INT_LEVEL > FLAG_INT_MASK)
+	if(m68ki_cpu.nmi_pending)
+	{
+		m68ki_cpu.nmi_pending = FALSE;
+		m68ki_exception_interrupt(7);
+	}
+	else if(CPU_INT_LEVEL > FLAG_INT_MASK)
 		m68ki_exception_interrupt(CPU_INT_LEVEL>>8);
 }
 
@@ -1982,4 +2148,8 @@ INLINE void m68ki_check_interrupts(void)
 /* ============================== END OF FILE ============================= */
 /* ======================================================================== */
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* M68KCPU__HEADER */
diff --git a/plat/linux68k/emu/musashi/m68kdasm.c b/plat/linux68k/emu/musashi/m68kdasm.c
index 146da471a..fbbde31c0 100755
--- a/plat/linux68k/emu/musashi/m68kdasm.c
+++ b/plat/linux68k/emu/musashi/m68kdasm.c
@@ -3,10 +3,10 @@
 /* ======================================================================== */
 /*
  *                                  MUSASHI
- *                                Version 3.4
+ *                                Version 3.32
  *
  * A portable Motorola M680x0 processor emulation engine.
- * Copyright 1998-2001 Karl Stenerud.  All rights reserved.
+ * Copyright Karl Stenerud.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -38,6 +38,14 @@
 #include <string.h>
 #include "m68k.h"
 
+#ifndef uint32
+#define uint32 uint
+#endif
+
+#ifndef uint16
+#define uint16 unsigned short
+#endif
+
 #ifndef DECL_SPEC
 #define DECL_SPEC
 #endif
@@ -129,6 +137,15 @@
 #define EXT_OUTER_DISPLACEMENT_LONG(A)    (((A)&3) == 3 && ((A)&0x47) < 0x44)
 
 
+/* Opcode flags */
+#if M68K_COMPILE_FOR_MAME == OPT_ON
+#define SET_OPCODE_FLAGS(x)	g_opcode_type = x;
+#define COMBINE_OPCODE_FLAGS(x) ((x) | g_opcode_type | DASMFLAG_SUPPORTED)
+#else
+#define SET_OPCODE_FLAGS(x)
+#define COMBINE_OPCODE_FLAGS(X) (X)
+#endif
+
 
 /* ======================================================================== */
 /* =============================== PROTOTYPES ============================= */
@@ -147,6 +164,7 @@ uint  peek_imm_32(void);
 /* make signed integers 100% portably */
 static int make_int_8(int value);
 static int make_int_16(int value);
+static int make_int_32(int value);
 
 /* make a string of a hex value */
 static char* make_signed_hex_str_8(uint val);
@@ -201,20 +219,23 @@ static char g_helper_str[100]; /* string to hold helpful info */
 static uint g_cpu_pc;        /* program counter */
 static uint g_cpu_ir;        /* instruction register */
 static uint g_cpu_type;
+static uint g_opcode_type;
+static const unsigned char* g_rawop;
+static uint g_rawbasepc;
 
 /* used by ops like asr, ror, addq, etc */
-static uint g_3bit_qdata_table[8] = {8, 1, 2, 3, 4, 5, 6, 7};
+static const uint g_3bit_qdata_table[8] = {8, 1, 2, 3, 4, 5, 6, 7};
 
-static uint g_5bit_data_table[32] =
+static const uint g_5bit_data_table[32] =
 {
 	32,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
 	16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
 };
 
-static char* g_cc[16] =
+static const char *const g_cc[16] =
 {"t", "f", "hi", "ls", "cc", "cs", "ne", "eq", "vc", "vs", "pl", "mi", "ge", "lt", "gt", "le"};
 
-static char* g_cpcc[64] =
+static const char *const g_cpcc[64] =
 {/* 000    001    010    011    100    101    110    111 */
 	  "f",  "eq", "ogt", "oge", "olt", "ole", "ogl",  "or", /* 000 */
 	 "un", "ueq", "ugt", "uge", "ult", "ule",  "ne",   "t", /* 001 */
@@ -226,6 +247,16 @@ static char* g_cpcc[64] =
 	  "?",   "?",   "?",   "?",   "?",   "?",   "?",   "?"  /* 111 */
 };
 
+static const char *const g_mmuregs[8] =
+{
+	"tc", "drp", "srp", "crp", "cal", "val", "sccr", "acr"
+};
+
+static const char *const g_mmucond[16] =
+{
+	"bs", "bc", "ls", "lc", "ss", "sc", "as", "ac",
+	"ws", "wc", "is", "ic", "gs", "gc", "cs", "cc"
+};
 
 /* ======================================================================== */
 /* =========================== UTILITY FUNCTIONS ========================== */
@@ -234,17 +265,56 @@ static char* g_cpcc[64] =
 #define LIMIT_CPU_TYPES(ALLOWED_CPU_TYPES)	\
 	if(!(g_cpu_type & ALLOWED_CPU_TYPES))	\
 	{										\
-		d68000_illegal();					\
+		if((g_cpu_ir & 0xf000) == 0xf000)	\
+			d68000_1111();					\
+		else d68000_illegal();				\
 		return;								\
 	}
 
-#define read_imm_8()  (m68k_read_disassembler_16(((g_cpu_pc+=2)-2)&g_address_mask)&0xff)
-#define read_imm_16() m68k_read_disassembler_16(((g_cpu_pc+=2)-2)&g_address_mask)
-#define read_imm_32() m68k_read_disassembler_32(((g_cpu_pc+=4)-4)&g_address_mask)
+static uint dasm_read_imm_8(uint advance)
+{
+	uint result;
+	if (g_rawop)
+		result = g_rawop[g_cpu_pc + 1 - g_rawbasepc];
+	else
+		result = m68k_read_disassembler_16(g_cpu_pc & g_address_mask) & 0xff;
+	g_cpu_pc += advance;
+	return result;
+}
 
-#define peek_imm_8()  (m68k_read_disassembler_16(g_cpu_pc & g_address_mask)&0xff)
-#define peek_imm_16() m68k_read_disassembler_16(g_cpu_pc & g_address_mask)
-#define peek_imm_32() m68k_read_disassembler_32(g_cpu_pc & g_address_mask)
+static uint dasm_read_imm_16(uint advance)
+{
+	uint result;
+	if (g_rawop)
+		result = (g_rawop[g_cpu_pc + 0 - g_rawbasepc] << 8) |
+		          g_rawop[g_cpu_pc + 1 - g_rawbasepc];
+	else
+		result = m68k_read_disassembler_16(g_cpu_pc & g_address_mask) & 0xffff;
+	g_cpu_pc += advance;
+	return result;
+}
+
+static uint dasm_read_imm_32(uint advance)
+{
+	uint result;
+	if (g_rawop)
+		result = (g_rawop[g_cpu_pc + 0 - g_rawbasepc] << 24) |
+		         (g_rawop[g_cpu_pc + 1 - g_rawbasepc] << 16) |
+		         (g_rawop[g_cpu_pc + 2 - g_rawbasepc] << 8) |
+		          g_rawop[g_cpu_pc + 3 - g_rawbasepc];
+	else
+		result = m68k_read_disassembler_32(g_cpu_pc & g_address_mask) & 0xffffffff;
+	g_cpu_pc += advance;
+	return result;
+}
+
+#define read_imm_8()  dasm_read_imm_8(2)
+#define read_imm_16() dasm_read_imm_16(2)
+#define read_imm_32() dasm_read_imm_32(4)
+
+#define peek_imm_8()  dasm_read_imm_8(0)
+#define peek_imm_16() dasm_read_imm_16(0)
+#define peek_imm_32() dasm_read_imm_32(0)
 
 /* Fake a split interface */
 #define get_ea_mode_str_8(instruction) get_ea_mode_str(instruction, 0)
@@ -259,6 +329,11 @@ static char* g_cpcc[64] =
 #define get_imm_str_u16() get_imm_str_u(1)
 #define get_imm_str_u32() get_imm_str_u(2)
 
+static int sext_7bit_int(int value)
+{
+	return (value & 0x40) ? (value | 0xffffff80) : (value & 0x7f);
+}
+
 
 /* 100% portable signed int generators */
 static int make_int_8(int value)
@@ -271,6 +346,10 @@ static int make_int_16(int value)
 	return (value & 0x8000) ? value | ~0xffff : value & 0xffff;
 }
 
+static int make_int_32(int value)
+{
+	return (value & 0x80000000) ? value | ~0xffffffff : value & 0xffffffff;
+}
 
 /* Get string representation of hex values */
 static char* make_signed_hex_str_8(uint val)
@@ -425,7 +504,14 @@ static char* get_ea_mode_str(uint instruction, uint size)
 					strcat(mode, "[");
 				if(base)
 				{
-					strcat(mode, make_signed_hex_str_16(base));
+					if (EXT_BASE_DISPLACEMENT_LONG(extension))
+					{
+						strcat(mode, make_signed_hex_str_32(base));
+					}
+					else
+					{
+						strcat(mode, make_signed_hex_str_16(base));
+					}
 					comma = 1;
 				}
 				if(*base_reg)
@@ -875,20 +961,20 @@ static void d68000_asl_ea(void)
 static void d68000_bcc_8(void)
 {
 	uint temp_pc = g_cpu_pc;
-	sprintf(g_dasm_str, "b%-2s     %x", g_cc[(g_cpu_ir>>8)&0xf], temp_pc + make_int_8(g_cpu_ir));
+	sprintf(g_dasm_str, "b%-2s     $%x", g_cc[(g_cpu_ir>>8)&0xf], temp_pc + make_int_8(g_cpu_ir));
 }
 
 static void d68000_bcc_16(void)
 {
 	uint temp_pc = g_cpu_pc;
-	sprintf(g_dasm_str, "b%-2s     %x", g_cc[(g_cpu_ir>>8)&0xf], temp_pc + make_int_16(read_imm_16()));
+	sprintf(g_dasm_str, "b%-2s     $%x", g_cc[(g_cpu_ir>>8)&0xf], temp_pc + make_int_16(read_imm_16()));
 }
 
 static void d68020_bcc_32(void)
 {
 	uint temp_pc = g_cpu_pc;
 	LIMIT_CPU_TYPES(M68020_PLUS);
-	sprintf(g_dasm_str, "b%-2s     %x; (2+)", g_cc[(g_cpu_ir>>8)&0xf], temp_pc + read_imm_32());
+	sprintf(g_dasm_str, "b%-2s     $%x; (2+)", g_cc[(g_cpu_ir>>8)&0xf], temp_pc + read_imm_32());
 }
 
 static void d68000_bchg_r(void)
@@ -1090,20 +1176,20 @@ static void d68020_bftst(void)
 static void d68000_bra_8(void)
 {
 	uint temp_pc = g_cpu_pc;
-	sprintf(g_dasm_str, "bra     %x", temp_pc + make_int_8(g_cpu_ir));
+	sprintf(g_dasm_str, "bra     $%x", temp_pc + make_int_8(g_cpu_ir));
 }
 
 static void d68000_bra_16(void)
 {
 	uint temp_pc = g_cpu_pc;
-	sprintf(g_dasm_str, "bra     %x", temp_pc + make_int_16(read_imm_16()));
+	sprintf(g_dasm_str, "bra     $%x", temp_pc + make_int_16(read_imm_16()));
 }
 
 static void d68020_bra_32(void)
 {
 	uint temp_pc = g_cpu_pc;
 	LIMIT_CPU_TYPES(M68020_PLUS);
-	sprintf(g_dasm_str, "bra     %x; (2+)", temp_pc + read_imm_32());
+	sprintf(g_dasm_str, "bra     $%x; (2+)", temp_pc + read_imm_32());
 }
 
 static void d68000_bset_r(void)
@@ -1120,20 +1206,23 @@ static void d68000_bset_s(void)
 static void d68000_bsr_8(void)
 {
 	uint temp_pc = g_cpu_pc;
-	sprintf(g_dasm_str, "bsr     %x", temp_pc + make_int_8(g_cpu_ir));
+	sprintf(g_dasm_str, "bsr     $%x", temp_pc + make_int_8(g_cpu_ir));
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OVER);
 }
 
 static void d68000_bsr_16(void)
 {
 	uint temp_pc = g_cpu_pc;
-	sprintf(g_dasm_str, "bsr     %x", temp_pc + make_int_16(read_imm_16()));
+	sprintf(g_dasm_str, "bsr     $%x", temp_pc + make_int_16(read_imm_16()));
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OVER);
 }
 
 static void d68020_bsr_32(void)
 {
 	uint temp_pc = g_cpu_pc;
 	LIMIT_CPU_TYPES(M68020_PLUS);
-	sprintf(g_dasm_str, "bsr     %x; (2+)", temp_pc + peek_imm_32());
+	sprintf(g_dasm_str, "bsr     $%x; (2+)", temp_pc + read_imm_32());
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OVER);
 }
 
 static void d68000_btst_r(void)
@@ -1211,12 +1300,14 @@ static void d68020_cas2_32(void)
 static void d68000_chk_16(void)
 {
 	sprintf(g_dasm_str, "chk.w   %s, D%d", get_ea_mode_str_16(g_cpu_ir), (g_cpu_ir>>9)&7);
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OVER);
 }
 
 static void d68020_chk_32(void)
 {
 	LIMIT_CPU_TYPES(M68020_PLUS);
 	sprintf(g_dasm_str, "chk.l   %s, D%d; (2+)", get_ea_mode_str_32(g_cpu_ir), (g_cpu_ir>>9)&7);
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OVER);
 }
 
 static void d68020_chk2_cmp2_8(void)
@@ -1392,7 +1483,7 @@ static void d68020_cpbcc_16(void)
 	uint new_pc = g_cpu_pc;
 	LIMIT_CPU_TYPES(M68020_PLUS);
 	extension = read_imm_16();
-	new_pc += make_int_16(peek_imm_16());
+	new_pc += make_int_16(read_imm_16());
 	sprintf(g_dasm_str, "%db%-4s  %s; %x (extension = %x) (2-3)", (g_cpu_ir>>9)&7, g_cpcc[g_cpu_ir&0x3f], get_imm_str_s16(), new_pc, extension);
 }
 
@@ -1402,7 +1493,7 @@ static void d68020_cpbcc_32(void)
 	uint new_pc = g_cpu_pc;
 	LIMIT_CPU_TYPES(M68020_PLUS);
 	extension = read_imm_16();
-	new_pc += peek_imm_32();
+	new_pc += read_imm_32();
 	sprintf(g_dasm_str, "%db%-4s  %s; %x (extension = %x) (2-3)", (g_cpu_ir>>9)&7, g_cpcc[g_cpu_ir&0x3f], get_imm_str_s16(), new_pc, extension);
 }
 
@@ -1414,7 +1505,7 @@ static void d68020_cpdbcc(void)
 	LIMIT_CPU_TYPES(M68020_PLUS);
 	extension1 = read_imm_16();
 	extension2 = read_imm_16();
-	new_pc += make_int_16(peek_imm_16());
+	new_pc += make_int_16(read_imm_16());
 	sprintf(g_dasm_str, "%ddb%-4s D%d,%s; %x (extension = %x) (2-3)", (g_cpu_ir>>9)&7, g_cpcc[extension1&0x3f], g_cpu_ir&7, get_imm_str_s16(), new_pc, extension2);
 }
 
@@ -1427,13 +1518,27 @@ static void d68020_cpgen(void)
 static void d68020_cprestore(void)
 {
 	LIMIT_CPU_TYPES(M68020_PLUS);
-	sprintf(g_dasm_str, "%drestore %s; (2-3)", (g_cpu_ir>>9)&7, get_ea_mode_str_8(g_cpu_ir));
+	if (((g_cpu_ir>>9)&7) == 1)
+	{
+		sprintf(g_dasm_str, "frestore %s", get_ea_mode_str_8(g_cpu_ir));
+	}
+	else
+	{
+		sprintf(g_dasm_str, "%drestore %s; (2-3)", (g_cpu_ir>>9)&7, get_ea_mode_str_8(g_cpu_ir));
+	}
 }
 
 static void d68020_cpsave(void)
 {
 	LIMIT_CPU_TYPES(M68020_PLUS);
-	sprintf(g_dasm_str, "%dsave   %s; (2-3)", (g_cpu_ir>>9)&7, get_ea_mode_str_8(g_cpu_ir));
+	if (((g_cpu_ir>>9)&7) == 1)
+	{
+		sprintf(g_dasm_str, "fsave   %s", get_ea_mode_str_8(g_cpu_ir));
+	}
+	else
+	{
+		sprintf(g_dasm_str, "%dsave   %s; (2-3)", (g_cpu_ir>>9)&7, get_ea_mode_str_8(g_cpu_ir));
+	}
 }
 
 static void d68020_cpscc(void)
@@ -1499,13 +1604,15 @@ static void d68040_cpush(void)
 static void d68000_dbra(void)
 {
 	uint temp_pc = g_cpu_pc;
-	sprintf(g_dasm_str, "dbra    D%d, %x", g_cpu_ir & 7, temp_pc + make_int_16(read_imm_16()));
+	sprintf(g_dasm_str, "dbra    D%d, $%x", g_cpu_ir & 7, temp_pc + make_int_16(read_imm_16()));
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OVER);
 }
 
 static void d68000_dbcc(void)
 {
 	uint temp_pc = g_cpu_pc;
-	sprintf(g_dasm_str, "db%-2s    D%d, %x", g_cc[(g_cpu_ir>>8)&0xf], g_cpu_ir & 7, temp_pc + make_int_16(read_imm_16()));
+	sprintf(g_dasm_str, "db%-2s    D%d, $%x", g_cc[(g_cpu_ir>>8)&0xf], g_cpu_ir & 7, temp_pc + make_int_16(read_imm_16()));
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OVER);
 }
 
 static void d68000_divs(void)
@@ -1606,6 +1713,220 @@ static void d68020_extb_32(void)
 	sprintf(g_dasm_str, "extb.l  D%d; (2+)", g_cpu_ir&7);
 }
 
+static void d68040_fpu(void)
+{
+	char float_data_format[8][3] =
+	{
+		".l", ".s", ".x", ".p", ".w", ".d", ".b", ".p"
+	};
+
+	char mnemonic[40];
+	uint32 w2, src, dst_reg;
+	LIMIT_CPU_TYPES(M68030_PLUS);
+	w2 = read_imm_16();
+
+	src = (w2 >> 10) & 0x7;
+	dst_reg = (w2 >> 7) & 0x7;
+
+	// special override for FMOVECR
+	if ((((w2 >> 13) & 0x7) == 2) && (((w2>>10)&0x7) == 7))
+	{
+		sprintf(g_dasm_str, "fmovecr   #$%0x, fp%d", (w2&0x7f), dst_reg);
+		return;
+	}
+
+	switch ((w2 >> 13) & 0x7)
+	{
+		case 0x0:
+		case 0x2:
+		{
+			switch(w2 & 0x7f)
+			{
+				case 0x00:	sprintf(mnemonic, "fmove"); break;
+				case 0x01:	sprintf(mnemonic, "fint"); break;
+				case 0x02:	sprintf(mnemonic, "fsinh"); break;
+				case 0x03:	sprintf(mnemonic, "fintrz"); break;
+				case 0x04:	sprintf(mnemonic, "fsqrt"); break;
+				case 0x06:	sprintf(mnemonic, "flognp1"); break;
+				case 0x08:	sprintf(mnemonic, "fetoxm1"); break;
+				case 0x09:	sprintf(mnemonic, "ftanh1"); break;
+				case 0x0a:	sprintf(mnemonic, "fatan"); break;
+				case 0x0c:	sprintf(mnemonic, "fasin"); break;
+				case 0x0d:	sprintf(mnemonic, "fatanh"); break;
+				case 0x0e:	sprintf(mnemonic, "fsin"); break;
+				case 0x0f:	sprintf(mnemonic, "ftan"); break;
+				case 0x10:	sprintf(mnemonic, "fetox"); break;
+				case 0x11:	sprintf(mnemonic, "ftwotox"); break;
+				case 0x12:	sprintf(mnemonic, "ftentox"); break;
+				case 0x14:	sprintf(mnemonic, "flogn"); break;
+				case 0x15:	sprintf(mnemonic, "flog10"); break;
+				case 0x16:	sprintf(mnemonic, "flog2"); break;
+				case 0x18:	sprintf(mnemonic, "fabs"); break;
+				case 0x19:	sprintf(mnemonic, "fcosh"); break;
+				case 0x1a:	sprintf(mnemonic, "fneg"); break;
+				case 0x1c:	sprintf(mnemonic, "facos"); break;
+				case 0x1d:	sprintf(mnemonic, "fcos"); break;
+				case 0x1e:	sprintf(mnemonic, "fgetexp"); break;
+				case 0x1f:	sprintf(mnemonic, "fgetman"); break;
+				case 0x20:	sprintf(mnemonic, "fdiv"); break;
+				case 0x21:	sprintf(mnemonic, "fmod"); break;
+				case 0x22:	sprintf(mnemonic, "fadd"); break;
+				case 0x23:	sprintf(mnemonic, "fmul"); break;
+				case 0x24:	sprintf(mnemonic, "fsgldiv"); break;
+				case 0x25:	sprintf(mnemonic, "frem"); break;
+				case 0x26:	sprintf(mnemonic, "fscale"); break;
+				case 0x27:	sprintf(mnemonic, "fsglmul"); break;
+				case 0x28:	sprintf(mnemonic, "fsub"); break;
+				case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: case 0x35: case 0x36: case 0x37:
+							sprintf(mnemonic, "fsincos"); break;
+				case 0x38:	sprintf(mnemonic, "fcmp"); break;
+				case 0x3a:	sprintf(mnemonic, "ftst"); break;
+				case 0x41:	sprintf(mnemonic, "fssqrt"); break;
+				case 0x45:	sprintf(mnemonic, "fdsqrt"); break;
+				case 0x58:	sprintf(mnemonic, "fsabs"); break;
+				case 0x5a:	sprintf(mnemonic, "fsneg"); break;
+				case 0x5c:	sprintf(mnemonic, "fdabs"); break;
+				case 0x5e:	sprintf(mnemonic, "fdneg"); break;
+				case 0x60:	sprintf(mnemonic, "fsdiv"); break;
+				case 0x62:	sprintf(mnemonic, "fsadd"); break;
+				case 0x63:	sprintf(mnemonic, "fsmul"); break;
+				case 0x64:	sprintf(mnemonic, "fddiv"); break;
+				case 0x66:	sprintf(mnemonic, "fdadd"); break;
+				case 0x67:	sprintf(mnemonic, "fdmul"); break;
+				case 0x68:	sprintf(mnemonic, "fssub"); break;
+				case 0x6c:	sprintf(mnemonic, "fdsub"); break;
+
+				default:	sprintf(mnemonic, "FPU (?)"); break;
+			}
+
+			if (w2 & 0x4000)
+			{
+				sprintf(g_dasm_str, "%s%s   %s, FP%d", mnemonic, float_data_format[src], get_ea_mode_str_32(g_cpu_ir), dst_reg);
+			}
+			else
+			{
+				sprintf(g_dasm_str, "%s.x   FP%d, FP%d", mnemonic, src, dst_reg);
+			}
+			break;
+		}
+
+		case 0x3:
+		{
+			switch ((w2>>10)&7)
+			{
+				case 3:		// packed decimal w/fixed k-factor
+					sprintf(g_dasm_str, "fmove%s   FP%d, %s {#%d}", float_data_format[(w2>>10)&7], dst_reg, get_ea_mode_str_32(g_cpu_ir), sext_7bit_int(w2&0x7f));
+					break;
+
+				case 7:		// packed decimal w/dynamic k-factor (register)
+					sprintf(g_dasm_str, "fmove%s   FP%d, %s {D%d}", float_data_format[(w2>>10)&7], dst_reg, get_ea_mode_str_32(g_cpu_ir), (w2>>4)&7);
+					break;
+
+				default:
+					sprintf(g_dasm_str, "fmove%s   FP%d, %s", float_data_format[(w2>>10)&7], dst_reg, get_ea_mode_str_32(g_cpu_ir));
+					break;
+			}
+			break;
+		}
+
+		case 0x4:	// ea to control
+		{
+			sprintf(g_dasm_str, "fmovem.l   %s, ", get_ea_mode_str_32(g_cpu_ir));
+			if (w2 & 0x1000) strcat(g_dasm_str, "fpcr");
+			if (w2 & 0x0800) strcat(g_dasm_str, "/fpsr");
+			if (w2 & 0x0400) strcat(g_dasm_str, "/fpiar");
+			break;
+		}
+
+		case 0x5:	// control to ea
+		{
+			
+			strcpy(g_dasm_str, "fmovem.l   ");
+			if (w2 & 0x1000) strcat(g_dasm_str, "fpcr");
+			if (w2 & 0x0800) strcat(g_dasm_str, "/fpsr");
+			if (w2 & 0x0400) strcat(g_dasm_str, "/fpiar");
+			strcat(g_dasm_str, ", ");
+			strcat(g_dasm_str, get_ea_mode_str_32(g_cpu_ir));
+			break;
+		}
+
+		case 0x6:	// memory to FPU, list
+		{
+			char temp[32];
+
+			if ((w2>>11) & 1)	// dynamic register list
+			{
+				sprintf(g_dasm_str, "fmovem.x   %s, D%d", get_ea_mode_str_32(g_cpu_ir), (w2>>4)&7);
+			}
+			else	// static register list
+			{
+				int i;
+
+				sprintf(g_dasm_str, "fmovem.x   %s, ", get_ea_mode_str_32(g_cpu_ir));
+
+				for (i = 0; i < 8; i++)
+				{
+					if (w2 & (1<<i))
+					{
+						if ((w2>>12) & 1)	// postincrement or control
+						{
+							sprintf(temp, "FP%d ", 7-i);
+						}
+						else			// predecrement
+						{
+							sprintf(temp, "FP%d ", i);
+						}
+						strcat(g_dasm_str, temp);
+					}
+				}
+			}
+			break;
+		}
+
+		case 0x7:	// FPU to memory, list
+		{
+			char temp[32];
+
+			if ((w2>>11) & 1)	// dynamic register list
+			{
+				sprintf(g_dasm_str, "fmovem.x   D%d, %s", (w2>>4)&7, get_ea_mode_str_32(g_cpu_ir));
+			}
+			else	// static register list
+			{
+				int i;
+
+				sprintf(g_dasm_str, "fmovem.x   ");
+
+				for (i = 0; i < 8; i++)
+				{
+					if (w2 & (1<<i))
+					{
+						if ((w2>>12) & 1)	// postincrement or control
+						{
+							sprintf(temp, "FP%d ", 7-i);
+						}
+						else			// predecrement
+						{
+							sprintf(temp, "FP%d ", i);
+						}
+						strcat(g_dasm_str, temp);
+					}
+				}
+
+				strcat(g_dasm_str, ", ");
+				strcat(g_dasm_str, get_ea_mode_str_32(g_cpu_ir));
+			}
+			break;
+		}
+
+		default:
+		{
+			sprintf(g_dasm_str, "FPU (?) ");
+			break;
+		}
+	}
+}
+
 static void d68000_jmp(void)
 {
 	sprintf(g_dasm_str, "jmp     %s", get_ea_mode_str_32(g_cpu_ir));
@@ -1614,6 +1935,7 @@ static void d68000_jmp(void)
 static void d68000_jsr(void)
 {
 	sprintf(g_dasm_str, "jsr     %s", get_ea_mode_str_32(g_cpu_ir));
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OVER);
 }
 
 static void d68000_lea(void)
@@ -1840,7 +2162,7 @@ static void d68010_movec(void)
 			processor = "?";
 	}
 
-	if(BIT_1(g_cpu_ir))
+	if(BIT_0(g_cpu_ir))
 		sprintf(g_dasm_str, "movec %c%d, %s; (%s)", BIT_F(extension) ? 'A' : 'D', (extension>>12)&7, reg_name, processor);
 	else
 		sprintf(g_dasm_str, "movec %s, %c%d; (%s)", reg_name, BIT_F(extension) ? 'A' : 'D', (extension>>12)&7, processor);
@@ -2374,6 +2696,21 @@ static void d68000_pea(void)
 	sprintf(g_dasm_str, "pea     %s", get_ea_mode_str_32(g_cpu_ir));
 }
 
+// this is a 68040-specific form of PFLUSH
+static void d68040_pflush(void)
+{
+	LIMIT_CPU_TYPES(M68040_PLUS);
+
+	if (g_cpu_ir & 0x10)
+	{
+		sprintf(g_dasm_str, "pflusha%s", (g_cpu_ir & 8) ? "" : "n");
+	}
+	else
+	{
+		sprintf(g_dasm_str, "pflush%s(A%d)", (g_cpu_ir & 8) ? "" : "n", g_cpu_ir & 7);
+	}
+}
+
 static void d68000_reset(void)
 {
 	sprintf(g_dasm_str, "reset");
@@ -2524,27 +2861,32 @@ static void d68010_rtd(void)
 {
 	LIMIT_CPU_TYPES(M68010_PLUS);
 	sprintf(g_dasm_str, "rtd     %s; (1+)", get_imm_str_s16());
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OUT);
 }
 
 static void d68000_rte(void)
 {
 	sprintf(g_dasm_str, "rte");
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OUT);
 }
 
 static void d68020_rtm(void)
 {
 	LIMIT_CPU_TYPES(M68020_ONLY);
 	sprintf(g_dasm_str, "rtm     %c%d; (2+)", BIT_3(g_cpu_ir) ? 'A' : 'D', g_cpu_ir&7);
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OUT);
 }
 
 static void d68000_rtr(void)
 {
 	sprintf(g_dasm_str, "rtr");
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OUT);
 }
 
 static void d68000_rts(void)
 {
 	sprintf(g_dasm_str, "rts");
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OUT);
 }
 
 static void d68000_sbcd_rr(void)
@@ -2689,23 +3031,27 @@ static void d68020_trapcc_0(void)
 {
 	LIMIT_CPU_TYPES(M68020_PLUS);
 	sprintf(g_dasm_str, "trap%-2s; (2+)", g_cc[(g_cpu_ir>>8)&0xf]);
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OVER);
 }
 
 static void d68020_trapcc_16(void)
 {
 	LIMIT_CPU_TYPES(M68020_PLUS);
 	sprintf(g_dasm_str, "trap%-2s  %s; (2+)", g_cc[(g_cpu_ir>>8)&0xf], get_imm_str_u16());
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OVER);
 }
 
 static void d68020_trapcc_32(void)
 {
 	LIMIT_CPU_TYPES(M68020_PLUS);
 	sprintf(g_dasm_str, "trap%-2s  %s; (2+)", g_cc[(g_cpu_ir>>8)&0xf], get_imm_str_u32());
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OVER);
 }
 
 static void d68000_trapv(void)
 {
 	sprintf(g_dasm_str, "trapv");
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OVER);
 }
 
 static void d68000_tst_8(void)
@@ -2807,6 +3153,137 @@ static void d68020_unpk_mm(void)
 }
 
 
+// PFLUSH:  001xxx0xxxxxxxxx
+// PLOAD:   001000x0000xxxxx
+// PVALID1: 0010100000000000
+// PVALID2: 0010110000000xxx
+// PMOVE 1: 010xxxx000000000
+// PMOVE 2: 011xxxx0000xxx00
+// PMOVE 3: 011xxxx000000000
+// PTEST:   100xxxxxxxxxxxxx
+// PFLUSHR:  1010000000000000
+static void d68851_p000(void)
+{
+	char* str;
+	uint modes = read_imm_16();
+
+	// do this after fetching the second PMOVE word so we properly get the 3rd if necessary
+	str = get_ea_mode_str_32(g_cpu_ir);
+
+	if ((modes & 0xfde0) == 0x2000)	// PLOAD
+	{
+		if (modes & 0x0200)
+		{
+	 		sprintf(g_dasm_str, "pload  #%d, %s", (modes>>10)&7, str);
+		}
+		else
+		{
+	 		sprintf(g_dasm_str, "pload  %s, #%d", str, (modes>>10)&7);
+		}
+		return;
+	}
+
+	if ((modes & 0xe200) == 0x2000)	// PFLUSH
+	{
+		sprintf(g_dasm_str, "pflushr %x, %x, %s", modes & 0x1f, (modes>>5)&0xf, str);
+		return;
+	}
+
+	if (modes == 0xa000)	// PFLUSHR
+	{
+		sprintf(g_dasm_str, "pflushr %s", str);
+	}
+
+	if (modes == 0x2800)	// PVALID (FORMAT 1)
+	{
+		sprintf(g_dasm_str, "pvalid VAL, %s", str);
+		return;
+	}
+
+	if ((modes & 0xfff8) == 0x2c00)	// PVALID (FORMAT 2)
+	{
+		sprintf(g_dasm_str, "pvalid A%d, %s", modes & 0xf, str);
+		return;
+	}
+
+	if ((modes & 0xe000) == 0x8000)	// PTEST
+	{
+		sprintf(g_dasm_str, "ptest #%d, %s", modes & 0x1f, str);
+		return;
+	}
+
+	switch ((modes>>13) & 0x7)
+	{
+		case 0:	// MC68030/040 form with FD bit
+		case 2:	// MC68881 form, FD never set
+			if (modes & 0x0100)
+			{
+				if (modes & 0x0200)
+				{
+			 		sprintf(g_dasm_str, "pmovefd  %s, %s", g_mmuregs[(modes>>10)&7], str);
+				}
+				else
+				{
+			 		sprintf(g_dasm_str, "pmovefd  %s, %s", str, g_mmuregs[(modes>>10)&7]);
+				}
+			}
+			else
+			{
+				if (modes & 0x0200)
+				{
+			 		sprintf(g_dasm_str, "pmove  %s, %s", g_mmuregs[(modes>>10)&7], str);
+				}
+				else
+				{
+			 		sprintf(g_dasm_str, "pmove  %s, %s", str, g_mmuregs[(modes>>10)&7]);
+				}
+			}
+			break;
+
+		case 3:	// MC68030 to/from status reg
+			if (modes & 0x0200)
+			{
+		 		sprintf(g_dasm_str, "pmove  mmusr, %s", str);
+			}
+			else
+			{
+		 		sprintf(g_dasm_str, "pmove  %s, mmusr", str);
+			}
+			break;
+
+		default:
+			sprintf(g_dasm_str, "pmove [unknown form] %s", str);
+			break;
+	}
+}
+
+static void d68851_pbcc16(void)
+{
+	uint32 temp_pc = g_cpu_pc;
+
+	sprintf(g_dasm_str, "pb%s %x", g_mmucond[g_cpu_ir&0xf], temp_pc + make_int_16(read_imm_16()));
+}
+
+static void d68851_pbcc32(void)
+{
+	uint32 temp_pc = g_cpu_pc;
+
+	sprintf(g_dasm_str, "pb%s %x", g_mmucond[g_cpu_ir&0xf], temp_pc + make_int_32(read_imm_32()));
+}
+
+static void d68851_pdbcc(void)
+{
+	uint32 temp_pc = g_cpu_pc;
+	uint16 modes = read_imm_16();
+
+	sprintf(g_dasm_str, "pb%s %x", g_mmucond[modes&0xf], temp_pc + make_int_16(read_imm_16()));
+}
+
+// PScc:  0000000000xxxxxx
+static void d68851_p001(void)
+{
+	sprintf(g_dasm_str, "MMU 001 group");
+}
 
 /* ======================================================================== */
 /* ======================= INSTRUCTION TABLE BUILDER ====================== */
@@ -2827,9 +3304,9 @@ static void d68020_unpk_mm(void)
   1 = pc idx
 */
 
-static opcode_struct g_opcode_info[] =
+static const opcode_struct g_opcode_info[] =
 {
-/*  opcode handler    mask    match   ea mask */
+/*  opcode handler             mask    match   ea mask */
 	{d68000_1010         , 0xf000, 0xa000, 0x000},
 	{d68000_1111         , 0xf000, 0xf000, 0x000},
 	{d68000_abcd_rr      , 0xf1f8, 0xc100, 0x000},
@@ -2967,6 +3444,7 @@ static opcode_struct g_opcode_info[] =
 	{d68020_extb_32      , 0xfff8, 0x49c0, 0x000},
 	{d68000_ext_16       , 0xfff8, 0x4880, 0x000},
 	{d68000_ext_32       , 0xfff8, 0x48c0, 0x000},
+	{d68040_fpu          , 0xffc0, 0xf200, 0x000},
 	{d68000_illegal      , 0xffff, 0x4afc, 0x000},
 	{d68000_jmp          , 0xffc0, 0x4ec0, 0x27b},
 	{d68000_jsr          , 0xffc0, 0x4e80, 0x27b},
@@ -3046,6 +3524,7 @@ static opcode_struct g_opcode_info[] =
 	{d68020_pack_rr      , 0xf1f8, 0x8140, 0x000},
 	{d68020_pack_mm      , 0xf1f8, 0x8148, 0x000},
 	{d68000_pea          , 0xffc0, 0x4840, 0x27b},
+	{d68040_pflush       , 0xffe0, 0xf500, 0x000},
 	{d68000_reset        , 0xffff, 0x4e70, 0x000},
 	{d68000_ror_s_8      , 0xf1f8, 0xe018, 0x000},
 	{d68000_ror_s_16     , 0xf1f8, 0xe058, 0x000},
@@ -3128,6 +3607,11 @@ static opcode_struct g_opcode_info[] =
 	{d68000_unlk         , 0xfff8, 0x4e58, 0x000},
 	{d68020_unpk_rr      , 0xf1f8, 0x8180, 0x000},
 	{d68020_unpk_mm      , 0xf1f8, 0x8188, 0x000},
+	{d68851_p000         , 0xffc0, 0xf000, 0x000},
+	{d68851_pbcc16       , 0xffc0, 0xf080, 0x000},
+	{d68851_pbcc32       , 0xffc0, 0xf0c0, 0x000},
+	{d68851_pdbcc        , 0xfff8, 0xf048, 0x000},
+	{d68851_p001         , 0xffc0, 0xf040, 0x000},
 	{0, 0, 0, 0}
 };
 
@@ -3200,19 +3684,17 @@ static void build_opcode_table(void)
 	uint i;
 	uint opcode;
 	opcode_struct* ostruct;
-	uint opcode_info_length = 0;
+	opcode_struct opcode_info[ARRAY_LENGTH(g_opcode_info)];
 
-	for(ostruct = g_opcode_info;ostruct->opcode_handler != 0;ostruct++)
-		opcode_info_length++;
-
-	qsort((void *)g_opcode_info, opcode_info_length, sizeof(g_opcode_info[0]), compare_nof_true_bits);
+	memcpy(opcode_info, g_opcode_info, sizeof(g_opcode_info));
+	qsort((void *)opcode_info, ARRAY_LENGTH(opcode_info)-1, sizeof(opcode_info[0]), compare_nof_true_bits);
 
 	for(i=0;i<0x10000;i++)
 	{
 		g_instruction_table[i] = d68000_illegal; /* default to illegal */
 		opcode = i;
 		/* search through opcode info for a match */
-		for(ostruct = g_opcode_info;ostruct->opcode_handler != 0;ostruct++)
+		for(ostruct = opcode_info;ostruct->opcode_handler != 0;ostruct++)
 		{
 			/* match opcode mask and allowed ea modes */
 			if((opcode & ostruct->mask) == ostruct->match)
@@ -3265,11 +3747,14 @@ unsigned int m68k_disassemble(char* str_buff, unsigned int pc, unsigned int cpu_
 			g_cpu_type = TYPE_68020;
 			g_address_mask = 0xffffffff;
 			break;
+		case M68K_CPU_TYPE_68EC030:
 		case M68K_CPU_TYPE_68030:
 			g_cpu_type = TYPE_68030;
 			g_address_mask = 0xffffffff;
 			break;
 		case M68K_CPU_TYPE_68040:
+		case M68K_CPU_TYPE_68EC040:
+		case M68K_CPU_TYPE_68LC040:
 			g_cpu_type = TYPE_68040;
 			g_address_mask = 0xffffffff;
 			break;
@@ -3280,9 +3765,10 @@ unsigned int m68k_disassemble(char* str_buff, unsigned int pc, unsigned int cpu_
 	g_cpu_pc = pc;
 	g_helper_str[0] = 0;
 	g_cpu_ir = read_imm_16();
+	g_opcode_type = 0;
 	g_instruction_table[g_cpu_ir]();
 	sprintf(str_buff, "%s%s", g_dasm_str, g_helper_str);
-	return g_cpu_pc - pc;
+	return COMBINE_OPCODE_FLAGS(g_cpu_pc - pc);
 }
 
 char* m68ki_disassemble_quick(unsigned int pc, unsigned int cpu_type)
@@ -3293,6 +3779,18 @@ char* m68ki_disassemble_quick(unsigned int pc, unsigned int cpu_type)
 	return buff;
 }
 
+unsigned int m68k_disassemble_raw(char* str_buff, unsigned int pc, const unsigned char* opdata, const unsigned char* argdata, unsigned int cpu_type)
+{
+	unsigned int result;
+	(void)argdata;
+
+	g_rawop = opdata;
+	g_rawbasepc = pc;
+	result = m68k_disassemble(str_buff, pc, cpu_type);
+	g_rawop = NULL;
+	return result;
+}
+
 /* Check if the instruction is a valid one */
 unsigned int m68k_is_valid_instruction(unsigned int instruction, unsigned int cpu_type)
 {
@@ -3323,6 +3821,7 @@ unsigned int m68k_is_valid_instruction(unsigned int instruction, unsigned int cp
 				return 0;
 			if(g_instruction_table[instruction] == d68010_rtd)
 				return 0;
+			// Fallthrough
 		case M68K_CPU_TYPE_68010:
 			if(g_instruction_table[instruction] == d68020_bcc_32)
 				return 0;
@@ -3444,9 +3943,11 @@ unsigned int m68k_is_valid_instruction(unsigned int instruction, unsigned int cp
 				return 0;
 			if(g_instruction_table[instruction] == d68020_unpk_mm)
 				return 0;
+			// Fallthrough
 		case M68K_CPU_TYPE_68EC020:
 		case M68K_CPU_TYPE_68020:
 		case M68K_CPU_TYPE_68030:
+		case M68K_CPU_TYPE_68EC030:
 			if(g_instruction_table[instruction] == d68040_cinv)
 				return 0;
 			if(g_instruction_table[instruction] == d68040_cpush)
@@ -3461,6 +3962,32 @@ unsigned int m68k_is_valid_instruction(unsigned int instruction, unsigned int cp
 				return 0;
 			if(g_instruction_table[instruction] == d68040_move16_al_ai)
 				return 0;
+			// Fallthrough
+		case M68K_CPU_TYPE_68040:
+		case M68K_CPU_TYPE_68EC040:
+		case M68K_CPU_TYPE_68LC040:
+			if(g_instruction_table[instruction] == d68020_cpbcc_16)
+				return 0;
+			if(g_instruction_table[instruction] == d68020_cpbcc_32)
+				return 0;
+			if(g_instruction_table[instruction] == d68020_cpdbcc)
+				return 0;
+			if(g_instruction_table[instruction] == d68020_cpgen)
+				return 0;
+			if(g_instruction_table[instruction] == d68020_cprestore)
+				return 0;
+			if(g_instruction_table[instruction] == d68020_cpsave)
+				return 0;
+			if(g_instruction_table[instruction] == d68020_cpscc)
+				return 0;
+			if(g_instruction_table[instruction] == d68020_cptrapcc_0)
+				return 0;
+			if(g_instruction_table[instruction] == d68020_cptrapcc_16)
+				return 0;
+			if(g_instruction_table[instruction] == d68020_cptrapcc_32)
+				return 0;
+			if(g_instruction_table[instruction] == d68040_pflush)
+				return 0;
 	}
 	if(cpu_type != M68K_CPU_TYPE_68020 && cpu_type != M68K_CPU_TYPE_68EC020 &&
 	  (g_instruction_table[instruction] == d68020_callm ||
@@ -3470,7 +3997,7 @@ unsigned int m68k_is_valid_instruction(unsigned int instruction, unsigned int cp
 	return 1;
 }
 
-
+// f028 2215 0008
 
 /* ======================================================================== */
 /* ============================== END OF FILE ============================= */
diff --git a/plat/linux68k/emu/musashi/m68kfpu.c b/plat/linux68k/emu/musashi/m68kfpu.c
new file mode 100644
index 000000000..46070f56c
--- /dev/null
+++ b/plat/linux68k/emu/musashi/m68kfpu.c
@@ -0,0 +1,1819 @@
+#include <math.h>
+#include <stdio.h>
+#include <stdarg.h>
+
+
+extern void exit(int);
+
+static void fatalerror(char *format, ...) {
+  va_list ap;
+  va_start(ap,format);
+  vfprintf(stderr,format,ap);  // JFF: fixed. Was using fprintf and arguments were wrong
+  va_end(ap);
+  exit(1);
+}
+
+#define FPCC_N			0x08000000
+#define FPCC_Z			0x04000000
+#define FPCC_I			0x02000000
+#define FPCC_NAN		0x01000000
+
+#define DOUBLE_INFINITY					(unsigned long long)(0x7ff0000000000000)
+#define DOUBLE_EXPONENT					(unsigned long long)(0x7ff0000000000000)
+#define DOUBLE_MANTISSA					(unsigned long long)(0x000fffffffffffff)
+
+extern flag floatx80_is_nan( floatx80 a );
+
+// masks for packed dwords, positive k-factor
+static uint32 pkmask2[18] =
+{
+ 0xffffffff, 0, 0xf0000000, 0xff000000, 0xfff00000, 0xffff0000,
+ 0xfffff000, 0xffffff00, 0xfffffff0, 0xffffffff,
+ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+ 0xffffffff, 0xffffffff, 0xffffffff
+};
+
+static uint32 pkmask3[18] =
+{
+ 0xffffffff, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0xf0000000, 0xff000000, 0xfff00000, 0xffff0000,
+ 0xfffff000, 0xffffff00, 0xfffffff0, 0xffffffff,
+};
+
+static inline double fx80_to_double(floatx80 fx)
+{
+  uint64 d;
+  double *foo;
+
+  foo = (double *)&d;
+
+  d = floatx80_to_float64(fx);
+
+  return *foo;
+}
+
+static inline floatx80 double_to_fx80(double in)
+{
+  uint64 *d;
+
+  d = (uint64 *)&in;
+
+  return float64_to_floatx80(*d);
+}
+
+static inline floatx80 load_extended_float80(uint32 ea)
+{
+  uint32 d1,d2;
+	uint16 d3;
+  floatx80 fp;
+
+  d3 = m68ki_read_16(ea);
+  d1 = m68ki_read_32(ea+4);
+  d2 = m68ki_read_32(ea+8);
+
+  fp.high = d3;
+  fp.low = ((uint64)d1<<32) | (d2 & 0xffffffff);
+
+  return fp;
+}
+
+static inline void store_extended_float80(uint32 ea, floatx80 fpr)
+{
+  m68ki_write_16(ea+0, fpr.high);
+  m68ki_write_16(ea+2, 0);
+  m68ki_write_32(ea+4, (fpr.low>>32)&0xffffffff);
+  m68ki_write_32(ea+8, fpr.low&0xffffffff);
+}
+
+static inline floatx80 load_pack_float80(uint32 ea)
+{
+  uint32 dw1, dw2, dw3;
+  floatx80 result;
+  double tmp;
+  char str[128], *ch;
+
+  dw1 = m68ki_read_32(ea);
+  dw2 = m68ki_read_32(ea+4);
+  dw3 = m68ki_read_32(ea+8);
+
+  ch = &str[0];
+  if (dw1 & 0x80000000)	// mantissa sign
+    {
+      *ch++ = '-';
+    }
+  *ch++ = (char)((dw1 & 0xf) + '0');
+  *ch++ = '.';
+  *ch++ = (char)(((dw2 >> 28) & 0xf) + '0');
+  *ch++ = (char)(((dw2 >> 24) & 0xf) + '0');
+  *ch++ = (char)(((dw2 >> 20) & 0xf) + '0');
+  *ch++ = (char)(((dw2 >> 16) & 0xf) + '0');
+  *ch++ = (char)(((dw2 >> 12) & 0xf) + '0');
+  *ch++ = (char)(((dw2 >> 8)  & 0xf) + '0');
+  *ch++ = (char)(((dw2 >> 4)  & 0xf) + '0');
+  *ch++ = (char)(((dw2 >> 0)  & 0xf) + '0');
+  *ch++ = (char)(((dw3 >> 28) & 0xf) + '0');
+  *ch++ = (char)(((dw3 >> 24) & 0xf) + '0');
+  *ch++ = (char)(((dw3 >> 20) & 0xf) + '0');
+  *ch++ = (char)(((dw3 >> 16) & 0xf) + '0');
+  *ch++ = (char)(((dw3 >> 12) & 0xf) + '0');
+  *ch++ = (char)(((dw3 >> 8)  & 0xf) + '0');
+  *ch++ = (char)(((dw3 >> 4)  & 0xf) + '0');
+  *ch++ = (char)(((dw3 >> 0)  & 0xf) + '0');
+  *ch++ = 'E';
+  if (dw1 & 0x40000000)	// exponent sign
+    {
+      *ch++ = '-';
+    }
+  *ch++ = (char)(((dw1 >> 24) & 0xf) + '0');
+  *ch++ = (char)(((dw1 >> 20) & 0xf) + '0');
+  *ch++ = (char)(((dw1 >> 16) & 0xf) + '0');
+  *ch = '\0';
+
+  sscanf(str, "%le", &tmp);
+
+  result = double_to_fx80(tmp);
+
+  return result;
+}
+
+static inline void store_pack_float80(uint32 ea, int k, floatx80 fpr)
+{
+  uint32 dw1, dw2, dw3;
+  char str[128], *ch;
+  int i, j, exp;
+
+  dw1 = dw2 = dw3 = 0;
+  ch = &str[0];
+
+  sprintf(str, "%.16e", fx80_to_double(fpr));
+
+  if (*ch == '-')
+    {
+      ch++;
+      dw1 = 0x80000000;
+    }
+
+  if (*ch == '+')
+    {
+      ch++;
+    }
+
+  dw1 |= (*ch++ - '0');
+
+  if (*ch == '.')
+    {
+      ch++;
+    }
+
+  // handle negative k-factor here
+  if ((k <= 0) && (k >= -13))
+    {
+      exp = 0;
+      for (i = 0; i < 3; i++)
+	{
+	  if (ch[18+i] >= '0' && ch[18+i] <= '9')
+	    {
+	      exp = (exp << 4) | (ch[18+i] - '0');
+	    }
+	}
+
+      if (ch[17] == '-')
+	{
+	  exp = -exp;
+	}
+
+      k = -k;
+      // last digit is (k + exponent - 1)
+      k += (exp - 1);
+
+      // round up the last significant mantissa digit
+      if (ch[k+1] >= '5')
+	{
+	  ch[k]++;
+	}
+
+      // zero out the rest of the mantissa digits
+      for (j = (k+1); j < 16; j++)
+	{
+	  ch[j] = '0';
+	}
+
+      // now zero out K to avoid tripping the positive K detection below
+      k = 0;
+    }
+
+  // crack 8 digits of the mantissa
+  for (i = 0; i < 8; i++)
+    {
+      dw2 <<= 4;
+      if (*ch >= '0' && *ch <= '9')
+	{
+	  dw2 |= *ch++ - '0';
+	}
+    }
+
+  // next 8 digits of the mantissa
+  for (i = 0; i < 8; i++)
+    {
+      dw3 <<= 4;
+      if (*ch >= '0' && *ch <= '9')
+	dw3 |= *ch++ - '0';
+    }
+
+  // handle masking if k is positive
+  if (k >= 1)
+    {
+      if (k <= 17)
+	{
+	  dw2 &= pkmask2[k];
+	  dw3 &= pkmask3[k];
+	}
+      else
+	{
+	  dw2 &= pkmask2[17];
+	  dw3 &= pkmask3[17];
+	  //			m68ki_cpu.fpcr |=  (need to set OPERR bit)
+	}
+    }
+
+  // finally, crack the exponent
+  if (*ch == 'e' || *ch == 'E')
+    {
+      ch++;
+      if (*ch == '-')
+	{
+	  ch++;
+	  dw1 |= 0x40000000;
+	}
+
+      if (*ch == '+')
+	{
+	  ch++;
+	}
+
+      j = 0;
+      for (i = 0; i < 3; i++)
+	{
+	  if (*ch >= '0' && *ch <= '9')
+	    {
+	      j = (j << 4) | (*ch++ - '0');
+	    }
+	}
+
+      dw1 |= (j << 16);
+    }
+
+  m68ki_write_32(ea, dw1);
+  m68ki_write_32(ea+4, dw2);
+  m68ki_write_32(ea+8, dw3);
+}
+
+static inline void SET_CONDITION_CODES(floatx80 reg)
+{
+  REG_FPSR &= ~(FPCC_N|FPCC_Z|FPCC_I|FPCC_NAN);
+
+  // sign flag
+  if (reg.high & 0x8000)
+    {
+      REG_FPSR |= FPCC_N;
+    }
+
+  // zero flag
+  if (((reg.high & 0x7fff) == 0) && ((reg.low<<1) == 0))
+    {
+      REG_FPSR |= FPCC_Z;
+    }
+
+  // infinity flag
+  if (((reg.high & 0x7fff) == 0x7fff) && ((reg.low<<1) == 0))
+    {
+      REG_FPSR |= FPCC_I;
+    }
+
+  // NaN flag
+  if (floatx80_is_nan(reg))
+    {
+      REG_FPSR |= FPCC_NAN;
+    }
+}
+
+static inline int TEST_CONDITION(int condition)
+{
+  int n = (REG_FPSR & FPCC_N) != 0;
+  int z = (REG_FPSR & FPCC_Z) != 0;
+  int nan = (REG_FPSR & FPCC_NAN) != 0;
+  int r = 0;
+  switch (condition)
+    {
+      case 0x10:
+      case 0x00:		return 0;					// False
+
+      case 0x11:
+      case 0x01:		return (z);					// Equal
+
+      case 0x12:
+      case 0x02:		return (!(nan || z || n));			// Greater Than
+
+      case 0x13:
+      case 0x03:		return (z || !(nan || n));			// Greater or Equal
+
+      case 0x14:
+      case 0x04:		return (n && !(nan || z));			// Less Than
+
+      case 0x15:
+      case 0x05:		return (z || (n && !nan));			// Less Than or Equal
+
+      case 0x16:
+      case 0x06:		return !nan && !z;
+
+      case 0x17:
+      case 0x07:		return !nan;
+
+      case 0x18:
+      case 0x08:		return nan;
+
+      case 0x19:
+      case 0x09:		return nan || z;
+
+      case 0x1a:
+      case 0x0a:		return (nan || !(n || z));			// Not Less Than or Equal
+
+      case 0x1b:
+      case 0x0b:		return (nan || z || !n);			// Not Less Than
+
+      case 0x1c:
+      case 0x0c:		return (nan || (n && !z));			// Not Greater or Equal Than
+
+      case 0x1d:
+      case 0x0d:		return (nan || z || n);				// Not Greater Than
+
+      case 0x1e:
+      case 0x0e:		return (!z);					// Not Equal
+
+      case 0x1f:
+      case 0x0f:		return 1;					// True
+
+      default:		fatalerror("M68kFPU: test_condition: unhandled condition %02X\n", condition);
+    }
+
+  return r;
+}
+
+static uint8 READ_EA_8(int ea)
+{
+  int mode = (ea >> 3) & 0x7;
+  int reg = (ea & 0x7);
+
+  switch (mode)
+    {
+      case 0:		// Dn
+	{
+	  return REG_D[reg];
+	}
+      case 2: 	// (An)
+	{
+	  uint32 ea = REG_A[reg];
+	  return m68ki_read_8(ea);
+	}
+      case 5:		// (d16, An)
+	{
+	  uint32 ea = EA_AY_DI_8();
+	  return m68ki_read_8(ea);
+	}
+      case 6:		// (An) + (Xn) + d8
+	{
+	  uint32 ea = EA_AY_IX_8();
+	  return m68ki_read_8(ea);
+	}
+      case 7:
+	{
+	  switch (reg)
+	    {
+	      case 0:		// (xxx).W
+		{
+		  uint32 ea = (uint32)OPER_I_16();
+		  return m68ki_read_8(ea);
+		}
+	      case 1:		// (xxx).L
+		{
+		  uint32 d1 = OPER_I_16();
+		  uint32 d2 = OPER_I_16();
+		  uint32 ea = (d1 << 16) | d2;
+		  return m68ki_read_8(ea);
+		}
+	      case 4:		// #<data>
+		{
+		  return  OPER_I_8();
+		}
+	      default:	fatalerror("M68kFPU: READ_EA_8: unhandled mode %d, reg %d at %08X\n", mode, reg, REG_PC);
+	    }
+	  break;
+	}
+      default:	fatalerror("M68kFPU: READ_EA_8: unhandled mode %d, reg %d at %08X\n", mode, reg, REG_PC);
+    }
+
+  return 0;
+}
+
+static uint16 READ_EA_16(int ea)
+{
+  int mode = (ea >> 3) & 0x7;
+  int reg = (ea & 0x7);
+
+  switch (mode)
+    {
+      case 0:		// Dn
+	{
+			return (uint16)(REG_D[reg]);
+	}
+      case 2:		// (An)
+	{
+	  uint32 ea = REG_A[reg];
+	  return m68ki_read_16(ea);
+	}
+      case 5:		// (d16, An)
+	{
+	  uint32 ea = EA_AY_DI_16();
+	  return m68ki_read_16(ea);
+	}
+      case 6:		// (An) + (Xn) + d8
+	{
+	  uint32 ea = EA_AY_IX_16();
+	  return m68ki_read_16(ea);
+	}
+      case 7:
+	{
+	  switch (reg)
+	    {
+	      case 0:		// (xxx).W
+		{
+		  uint32 ea = (uint32)OPER_I_16();
+		  return m68ki_read_16(ea);
+		}
+	      case 1:		// (xxx).L
+		{
+		  uint32 d1 = OPER_I_16();
+		  uint32 d2 = OPER_I_16();
+		  uint32 ea = (d1 << 16) | d2;
+		  return m68ki_read_16(ea);
+		}
+	      case 4:		// #<data>
+		{
+		  return OPER_I_16();
+		}
+
+	      default:	fatalerror("M68kFPU: READ_EA_16: unhandled mode %d, reg %d at %08X\n", mode, reg, REG_PC);
+	    }
+	  break;
+	}
+      default:	fatalerror("M68kFPU: READ_EA_16: unhandled mode %d, reg %d at %08X\n", mode, reg, REG_PC);
+    }
+
+  return 0;
+}
+
+static uint32 READ_EA_32(int ea)
+{
+  int mode = (ea >> 3) & 0x7;
+  int reg = (ea & 0x7);
+
+  switch (mode)
+    {
+      case 0:		// Dn
+	{
+	  return REG_D[reg];
+	}
+      case 2:		// (An)
+	{
+	  uint32 ea = REG_A[reg];
+	  return m68ki_read_32(ea);
+	}
+      case 3:		// (An)+
+	{
+	  uint32 ea = EA_AY_PI_32();
+	  return m68ki_read_32(ea);
+	}
+      case 5:		// (d16, An)
+	{
+	  uint32 ea = EA_AY_DI_32();
+	  return m68ki_read_32(ea);
+	}
+      case 6:		// (An) + (Xn) + d8
+	{
+	  uint32 ea = EA_AY_IX_32();
+	  return m68ki_read_32(ea);
+	}
+      case 7:
+	{
+	  switch (reg)
+	    {
+	      case 0:		// (xxx).W
+		{
+		  uint32 ea = (uint32)OPER_I_16();
+		  return m68ki_read_32(ea);
+		}
+	      case 1:		// (xxx).L
+		{
+		  uint32 d1 = OPER_I_16();
+		  uint32 d2 = OPER_I_16();
+		  uint32 ea = (d1 << 16) | d2;
+		  return m68ki_read_32(ea);
+		}
+	      case 2:		// (d16, PC)
+		{
+		  uint32 ea = EA_PCDI_32();
+		  return m68ki_read_32(ea);
+		}
+	      case 4:		// #<data>
+		{
+		  return  OPER_I_32();
+		}
+	      default:	fatalerror("M68kFPU: READ_EA_32: unhandled mode %d, reg %d at %08X\n", mode, reg, REG_PC);
+	    }
+	  break;
+	}
+      default:	fatalerror("M68kFPU: READ_EA_32: unhandled mode %d, reg %d at %08X\n", mode, reg, REG_PC);
+    }
+  return 0;
+}
+
+static uint64 READ_EA_64(int ea)
+{
+  int mode = (ea >> 3) & 0x7;
+  int reg = (ea & 0x7);
+  uint32 h1, h2;
+
+  switch (mode)
+    {
+      case 2:		// (An)
+	{
+	  uint32 ea = REG_A[reg];
+	  h1 = m68ki_read_32(ea+0);
+	  h2 = m68ki_read_32(ea+4);
+	  return  (uint64)(h1) << 32 | (uint64)(h2);
+	}
+      case 3:		// (An)+
+	{
+	  uint32 ea = REG_A[reg];
+	  REG_A[reg] += 8;
+	  h1 = m68ki_read_32(ea+0);
+	  h2 = m68ki_read_32(ea+4);
+	  return  (uint64)(h1) << 32 | (uint64)(h2);
+	}
+      case 5:		// (d16, An)
+	{
+	  uint32 ea = EA_AY_DI_32();
+	  h1 = m68ki_read_32(ea+0);
+	  h2 = m68ki_read_32(ea+4);
+	  return  (uint64)(h1) << 32 | (uint64)(h2);
+	}
+      case 7:
+	{
+	  switch (reg)
+	    {
+	      case 4:		// #<data>
+		{
+		  h1 = OPER_I_32();
+		  h2 = OPER_I_32();
+		  return  (uint64)(h1) << 32 | (uint64)(h2);
+		}
+	      case 2:		// (d16, PC)
+		{
+		  uint32 ea = EA_PCDI_32();
+		  h1 = m68ki_read_32(ea+0);
+		  h2 = m68ki_read_32(ea+4);
+		  return  (uint64)(h1) << 32 | (uint64)(h2);
+		}
+	      default:	fatalerror("M68kFPU: READ_EA_64: unhandled mode %d, reg %d at %08X\n", mode, reg, REG_PC);
+	    }
+	  break;
+	}
+      default:	fatalerror("M68kFPU: READ_EA_64: unhandled mode %d, reg %d at %08X\n", mode, reg, REG_PC);
+    }
+
+  return 0;
+}
+
+
+static floatx80 READ_EA_FPE(int mode, int reg, uint32 di_mode_ea)
+{
+  floatx80 fpr;
+
+  switch (mode)
+    {
+      case 2:		// (An)
+	{
+	  uint32 ea = REG_A[reg];
+	  fpr = load_extended_float80(ea);
+	  break;
+	}
+
+      case 3:		// (An)+
+	{
+	  uint32 ea = REG_A[reg];
+	  REG_A[reg] += 12;
+	  fpr = load_extended_float80(ea);
+	  break;
+	}
+      case 5:		// (d16, An)  (added by JFF)
+	{
+	  fpr = load_extended_float80(di_mode_ea);
+         
+	  break;
+
+	}
+      case 7:	// extended modes
+	{
+	  switch (reg)
+	    {
+	      case 2:	// (d16, PC)
+		{
+		  uint32 ea = EA_PCDI_32();
+		  fpr = load_extended_float80(ea);
+		}
+		break;
+
+	      case 3:	// (d16,PC,Dx.w)
+		{
+		  uint32 ea = EA_PCIX_32();
+		  fpr = load_extended_float80(ea);
+		}
+		break;
+	      case 4: // immediate (JFF)
+		{
+		  uint32 ea = REG_PC;
+		  fpr = load_extended_float80(ea);
+		  REG_PC += 12;
+		}
+		break;
+	      default:
+		fatalerror("M68kFPU: READ_EA_FPE: unhandled mode %d, reg %d, at %08X\n", mode, reg, REG_PC);
+		break;
+	    }
+	}
+	break;
+
+      default:	fatalerror("M68kFPU: READ_EA_FPE: unhandled mode %d, reg %d, at %08X\n", mode, reg, REG_PC); break;
+    }
+
+  return fpr;
+}
+
+static floatx80 READ_EA_PACK(int ea)
+{
+  floatx80 fpr;
+  int mode = (ea >> 3) & 0x7;
+  int reg = (ea & 0x7);
+
+  switch (mode)
+    {
+      case 2:		// (An)
+	{
+	  uint32 ea = REG_A[reg];
+	  fpr = load_pack_float80(ea);
+	  break;
+	}
+
+      case 3:		// (An)+
+	{
+	  uint32 ea = REG_A[reg];
+	  REG_A[reg] += 12;
+	  fpr = load_pack_float80(ea);
+	  break;
+	}
+
+      case 7:	// extended modes
+	{
+	  switch (reg)
+	    {
+	      case 3:	// (d16,PC,Dx.w)
+		{
+		  uint32 ea = EA_PCIX_32();
+		  fpr = load_pack_float80(ea);
+		}
+		break;
+
+	      default:
+		fatalerror("M68kFPU: READ_EA_PACK: unhandled mode %d, reg %d, at %08X\n", mode, reg, REG_PC);
+		break;
+	    }
+	}
+	break;
+
+      default:	fatalerror("M68kFPU: READ_EA_PACK: unhandled mode %d, reg %d, at %08X\n", mode, reg, REG_PC); break;
+    }
+
+  return fpr;
+}
+
+static void WRITE_EA_8(int ea, uint8 data)
+{
+  int mode = (ea >> 3) & 0x7;
+  int reg = (ea & 0x7);
+
+  switch (mode)
+    {
+      case 0:		// Dn
+	{
+	  REG_D[reg] = data;
+	  break;
+	}
+      case 2:		// (An)
+	{
+	  uint32 ea = REG_A[reg];
+	  m68ki_write_8(ea, data);
+	  break;
+	}
+      case 3:		// (An)+
+	{
+	  uint32 ea = EA_AY_PI_8();
+	  m68ki_write_8(ea, data);
+	  break;
+	}
+      case 4:		// -(An)
+	{
+	  uint32 ea = EA_AY_PD_8();
+	  m68ki_write_8(ea, data);
+	  break;
+	}
+      case 5:		// (d16, An)
+	{
+	  uint32 ea = EA_AY_DI_8();
+	  m68ki_write_8(ea, data);
+	  break;
+	}
+      case 6:		// (An) + (Xn) + d8
+	{
+	  uint32 ea = EA_AY_IX_8();
+	  m68ki_write_8(ea, data);
+	  break;
+	}
+      case 7:
+	{
+	  switch (reg)
+	    {
+	      case 1:		// (xxx).B
+		{
+		  uint32 d1 = OPER_I_16();
+		  uint32 d2 = OPER_I_16();
+		  uint32 ea = (d1 << 16) | d2;
+		  m68ki_write_8(ea, data);
+		  break;
+		}
+	      case 2:		// (d16, PC)
+		{
+		  uint32 ea = EA_PCDI_16();
+		  m68ki_write_8(ea, data);
+		  break;
+		}
+	      default:	fatalerror("M68kFPU: WRITE_EA_8: unhandled mode %d, reg %d at %08X\n", mode, reg, REG_PC);
+	    }
+	  break;
+	}
+      default:	fatalerror("M68kFPU: WRITE_EA_8: unhandled mode %d, reg %d, data %08X at %08X\n", mode, reg, data, REG_PC);
+    }
+}
+
+static void WRITE_EA_16(int ea, uint16 data)
+{
+  int mode = (ea >> 3) & 0x7;
+  int reg = (ea & 0x7);
+
+  switch (mode)
+    {
+      case 0:		// Dn
+	{
+	  REG_D[reg] = data;
+	  break;
+	}
+      case 2:		// (An)
+	{
+	  uint32 ea = REG_A[reg];
+	  m68ki_write_16(ea, data);
+	  break;
+	}
+      case 3:		// (An)+
+	{
+	  uint32 ea = EA_AY_PI_16();
+	  m68ki_write_16(ea, data);
+	  break;
+	}
+      case 4:		// -(An)
+	{
+	  uint32 ea = EA_AY_PD_16();
+	  m68ki_write_16(ea, data);
+	  break;
+	}
+      case 5:		// (d16, An)
+	{
+	  uint32 ea = EA_AY_DI_16();
+	  m68ki_write_16(ea, data);
+	  break;
+	}
+      case 6:		// (An) + (Xn) + d8
+	{
+	  uint32 ea = EA_AY_IX_16();
+	  m68ki_write_16(ea, data);
+	  break;
+	}
+      case 7:
+	{
+	  switch (reg)
+	    {
+	      case 1:		// (xxx).W
+		{
+		  uint32 d1 = OPER_I_16();
+		  uint32 d2 = OPER_I_16();
+		  uint32 ea = (d1 << 16) | d2;
+		  m68ki_write_16(ea, data);
+		  break;
+		}
+	      case 2:		// (d16, PC)
+		{
+		  uint32 ea = EA_PCDI_16();
+		  m68ki_write_16(ea, data);
+		  break;
+		}
+	      default:	fatalerror("M68kFPU: WRITE_EA_16: unhandled mode %d, reg %d at %08X\n", mode, reg, REG_PC);
+	    }
+	  break;
+	}
+      default:	fatalerror("M68kFPU: WRITE_EA_16: unhandled mode %d, reg %d, data %08X at %08X\n", mode, reg, data, REG_PC);
+    }
+}
+
+static void WRITE_EA_32(int ea, uint32 data)
+{
+  int mode = (ea >> 3) & 0x7;
+  int reg = (ea & 0x7);
+
+  switch (mode)
+    {
+      case 0:		// Dn
+	{
+	  REG_D[reg] = data;
+	  break;
+	}
+      case 1:		// An
+	{
+	  REG_A[reg] = data;
+	  break;
+	}
+      case 2:		// (An)
+	{
+	  uint32 ea = REG_A[reg];
+	  m68ki_write_32(ea, data);
+	  break;
+	}
+      case 3:		// (An)+
+	{
+	  uint32 ea = EA_AY_PI_32();
+	  m68ki_write_32(ea, data);
+	  break;
+	}
+      case 4:		// -(An)
+	{
+	  uint32 ea = EA_AY_PD_32();
+	  m68ki_write_32(ea, data);
+	  break;
+	}
+      case 5:		// (d16, An)
+	{
+	  uint32 ea = EA_AY_DI_32();
+	  m68ki_write_32(ea, data);
+	  break;
+	}
+      case 6:		// (An) + (Xn) + d8
+	{
+	  uint32 ea = EA_AY_IX_32();
+	  m68ki_write_32(ea, data);
+	  break;
+	}
+      case 7:
+	{
+	  switch (reg)
+	    {
+	      case 1:		// (xxx).L
+		{
+		  uint32 d1 = OPER_I_16();
+		  uint32 d2 = OPER_I_16();
+		  uint32 ea = (d1 << 16) | d2;
+                  m68ki_write_32(ea, data);
+                  break;
+		}
+	      case 2:		// (d16, PC)
+		{
+		  uint32 ea = EA_PCDI_32();
+                  m68ki_write_32(ea, data);
+                  break;
+		}
+	      default:	fatalerror("M68kFPU: WRITE_EA_32: unhandled mode %d, reg %d at %08X\n", mode, reg, REG_PC);
+	    }
+	  break;
+	}
+      default:	fatalerror("M68kFPU: WRITE_EA_32: unhandled mode %d, reg %d, data %08X at %08X\n", mode, reg, data, REG_PC);
+    }
+}
+
+static void WRITE_EA_64(int ea, uint64 data)
+{
+  int mode = (ea >> 3) & 0x7;
+  int reg = (ea & 0x7);
+
+  switch (mode)
+    {
+      case 2:		// (An)
+	{
+	  uint32 ea = REG_A[reg];
+	  m68ki_write_32(ea, (uint32)(data >> 32));
+	  m68ki_write_32(ea+4, (uint32)(data));
+	  break;
+	}
+      case 4:		// -(An)
+	{
+	  uint32 ea;
+	  REG_A[reg] -= 8;
+	  ea = REG_A[reg];
+	  m68ki_write_32(ea+0, (uint32)(data >> 32));
+	  m68ki_write_32(ea+4, (uint32)(data));
+	  break;
+	}
+      case 5:		// (d16, An)
+	{
+	  uint32 ea = EA_AY_DI_32();
+	  m68ki_write_32(ea+0, (uint32)(data >> 32));
+	  m68ki_write_32(ea+4, (uint32)(data));
+	  break;
+        }
+
+
+      case 7:    // (TBB)
+        {
+          switch (reg)
+            {
+              case 1:		// (xxx).L
+                {
+                  uint32 d1 = OPER_I_16();
+                  uint32 d2 = OPER_I_16();
+                  uint32 ea = (d1 << 16) | d2;
+                  m68ki_write_32(ea+0, (uint32)(data >> 32));
+                  m68ki_write_32(ea+4, (uint32)(data));
+                  break;
+                }
+              case 2:		// (d16, PC)
+                {
+                  uint32 ea = EA_PCDI_32();
+                  m68ki_write_32(ea+0, (uint32)(data >> 32));
+                  m68ki_write_32(ea+4, (uint32)(data));
+                  break;
+                }
+              default:	fatalerror("M68kFPU: WRITE_EA_32: unhandled mode %d, reg %d at %08X\n", mode, reg, REG_PC);
+            }
+          break;
+        }
+
+
+
+
+      default:	fatalerror("M68kFPU: WRITE_EA_64: unhandled mode %d, reg %d, data %08X%08X at %08X\n", mode, reg, (uint32)(data >> 32), (uint32)(data), REG_PC);
+    }
+}
+
+static void WRITE_EA_FPE(int mode, int reg, floatx80 fpr, uint32 di_mode_ea)
+{
+
+
+  switch (mode)
+    {
+      case 2:		// (An)
+	{
+	  uint32 ea;
+	  ea = REG_A[reg];
+	  store_extended_float80(ea, fpr);
+	  break;
+	}
+
+      case 3:		// (An)+
+	{
+	  uint32 ea;
+	  ea = REG_A[reg];
+	  store_extended_float80(ea, fpr);
+	  REG_A[reg] += 12;
+	  break;
+	}
+
+      case 4:		// -(An)
+	{
+	  uint32 ea;
+	  REG_A[reg] -= 12;
+	  ea = REG_A[reg];
+	  store_extended_float80(ea, fpr);
+	  break;
+	}
+      case 5:		// (d16, An)  (added by JFF)
+	{
+	  // EA_AY_DI_32() should not be done here because fmovem would increase
+	  // PC each time, reading incorrect displacement & advancing PC too much
+	  // uint32 ea = EA_AY_DI_32();
+	  store_extended_float80(di_mode_ea, fpr);
+         
+	  break;
+
+	}
+      case 7:
+	{
+	  switch (reg)
+	    {
+	      default:	fatalerror("M68kFPU: WRITE_EA_FPE: unhandled mode %d, reg %d, at %08X\n", mode, reg, REG_PC);
+	    }
+	  break;
+	}
+      default:	fatalerror("M68kFPU: WRITE_EA_FPE: unhandled mode %d, reg %d, at %08X\n", mode, reg, REG_PC);
+    }
+}
+
+static void WRITE_EA_PACK(int ea, int k, floatx80 fpr)
+{
+  int mode = (ea >> 3) & 0x7;
+  int reg = (ea & 0x7);
+
+  switch (mode)
+    {
+      case 2:		// (An)
+	{
+	  uint32 ea;
+	  ea = REG_A[reg];
+	  store_pack_float80(ea, k, fpr);
+	  break;
+	}
+
+      case 3:		// (An)+
+	{
+	  uint32 ea;
+	  ea = REG_A[reg];
+	  store_pack_float80(ea, k, fpr);
+	  REG_A[reg] += 12;
+	  break;
+	}
+
+      case 4:		// -(An)
+	{
+	  uint32 ea;
+	  REG_A[reg] -= 12;
+	  ea = REG_A[reg];
+	  store_pack_float80(ea, k, fpr);
+	  break;
+	}
+
+      case 7:
+	{
+	  switch (reg)
+	    {
+	      default:	fatalerror("M68kFPU: WRITE_EA_PACK: unhandled mode %d, reg %d, at %08X\n", mode, reg, REG_PC);
+	    }
+	}
+	break;
+      default:	fatalerror("M68kFPU: WRITE_EA_PACK: unhandled mode %d, reg %d, at %08X\n", mode, reg, REG_PC);
+    }
+}
+
+
+static void fpgen_rm_reg(uint16 w2)
+{
+  int ea = REG_IR & 0x3f;
+  int rm = (w2 >> 14) & 0x1;
+  int src = (w2 >> 10) & 0x7;
+  int dst = (w2 >>  7) & 0x7;
+  int opmode = w2 & 0x7f;
+  floatx80 source;
+
+  // fmovecr #$f, fp0	f200 5c0f
+
+  if (rm)
+    {
+      switch (src)
+	{
+	  case 0:		// Long-Word Integer
+	    {
+	      sint32 d = READ_EA_32(ea);
+	      source = int32_to_floatx80(d);
+	      break;
+	    }
+	  case 1:		// Single-precision Real
+	    {
+	      uint32 d = READ_EA_32(ea);
+	      source = float32_to_floatx80(d);
+	      break;
+	    }
+	  case 2:		// Extended-precision Real
+	    {
+	      int imode = (ea >> 3) & 0x7;
+	      int reg = (ea & 0x7);
+	      uint32 di_mode_ea = imode == 5 ? (REG_A[reg]+MAKE_INT_16(m68ki_read_imm_16())) : 0;
+	      source = READ_EA_FPE(imode,reg,di_mode_ea);
+	      break;
+	    }
+	  case 3:		// Packed-decimal Real
+	    {
+	      source = READ_EA_PACK(ea);
+	      break;
+	    }
+	  case 4:		// Word Integer
+	    {
+	      sint16 d = READ_EA_16(ea);
+	      source = int32_to_floatx80((sint32)d);
+	      break;
+	    }
+	  case 5:		// Double-precision Real
+	    {
+	      uint64 d = READ_EA_64(ea);
+
+	      source = float64_to_floatx80(d);
+	      break;
+	    }
+	  case 6:		// Byte Integer
+	    {
+	      sint8 d = READ_EA_8(ea);
+	      source = int32_to_floatx80((sint32)d);
+	      break;
+	    }
+	  case 7:		// FMOVECR load from constant ROM
+	    {
+	      switch (w2 & 0x7f)
+		{
+		  case 0x0:	// Pi
+		    source.high = 0x4000;
+		    source.low = U64(0xc90fdaa22168c235);
+		    break;
+
+		  case 0xb:	// log10(2)
+		    source.high = 0x3ffd;
+		    source.low = U64(0x9a209a84fbcff798);
+		    break;
+
+		  case 0xc:	// e
+		    source.high = 0x4000;
+		    source.low = U64(0xadf85458a2bb4a9b);
+		    break;
+
+		  case 0xd:	// log2(e)
+		    source.high = 0x3fff;
+		    source.low = U64(0xb8aa3b295c17f0bc);
+		    break;
+
+		  case 0xe:	// log10(e)
+		    source.high = 0x3ffd;
+		    source.low = U64(0xde5bd8a937287195);
+		    break;
+
+		  case 0xf:	// 0.0
+		    source = int32_to_floatx80((sint32)0);
+		    break;
+
+		  case 0x30:	// ln(2)
+		    source.high = 0x3ffe;
+		    source.low = U64(0xb17217f7d1cf79ac);
+		    break;
+
+		  case 0x31:	// ln(10)
+		    source.high = 0x4000;
+		    source.low = U64(0x935d8dddaaa8ac17);
+		    break;
+
+		  case 0x32:	// 1 (or 100?  manuals are unclear, but 1 would make more sense)
+		    source = int32_to_floatx80((sint32)1);
+		    break;
+
+		  case 0x33:	// 10^1
+		    source = int32_to_floatx80((sint32)10);
+		    break;
+
+		  case 0x34:	// 10^2
+		    source = int32_to_floatx80((sint32)10*10);
+		    break;
+
+		  default:
+		    fatalerror("fmove_rm_reg: unknown constant ROM offset %x at %08x\n", w2&0x7f, REG_PC-4);
+		    break;
+		}
+
+	      // handle it right here, the usual opmode bits aren't valid in the FMOVECR case
+	      REG_FP[dst] = source;	      
+	      SET_CONDITION_CODES(REG_FP[dst]); // JFF when destination is a register, we HAVE to update FPCR
+	      USE_CYCLES(4);
+	      return;
+	    }
+	  default:	fatalerror("fmove_rm_reg: invalid source specifier %x at %08X\n", src, REG_PC-4);
+	}
+    }
+  else
+    {
+      source = REG_FP[src];
+    }
+
+
+
+  switch (opmode)
+    {
+      case 0x00:		// FMOVE
+	{
+	  REG_FP[dst] = source;
+	  SET_CONDITION_CODES(REG_FP[dst]);  // JFF needs update condition codes
+	  USE_CYCLES(4);
+	  break;
+	}
+      case 0x01:		// Fsint
+	{
+	  sint32 temp;
+	  temp = floatx80_to_int32(source);
+	  REG_FP[dst] = int32_to_floatx80(temp);
+	  SET_CONDITION_CODES(REG_FP[dst]);  // JFF needs update condition codes
+	  break;
+	}
+      case 0x03:		// FsintRZ
+	{
+	  sint32 temp;
+	  temp = floatx80_to_int32_round_to_zero(source);
+	  REG_FP[dst] = int32_to_floatx80(temp);
+	  SET_CONDITION_CODES(REG_FP[dst]);  // JFF needs update condition codes
+	  break;
+	}
+      case 0x04:		// FSQRT
+	{
+	  REG_FP[dst] = floatx80_sqrt(source);
+	  SET_CONDITION_CODES(REG_FP[dst]);
+	  USE_CYCLES(109);
+	  break;
+	}
+      case 0x18:		// FABS
+	{
+	  REG_FP[dst] = source;
+	  REG_FP[dst].high &= 0x7fff;
+	  SET_CONDITION_CODES(REG_FP[dst]);
+	  USE_CYCLES(3);
+	  break;
+	}
+      case 0x1a:		// FNEG
+	{
+	  REG_FP[dst] = source;
+	  REG_FP[dst].high ^= 0x8000;
+	  SET_CONDITION_CODES(REG_FP[dst]);
+	  USE_CYCLES(3);
+	  break;
+	}
+      case 0x1e:		// FGETEXP
+	{
+          sint16 temp;
+
+          temp = source.high;	// get the exponent
+          temp -= 0x3fff;	// take off the bias
+          REG_FP[dst] = double_to_fx80((double)temp);
+          SET_CONDITION_CODES(REG_FP[dst]);
+	  USE_CYCLES(6);
+	}
+
+
+      case 0x1f:		// FGETMANT  (TBB)
+        {
+          REG_FP[dst] = source;
+          REG_FP[dst].high &= ~0x7fff;	// clear the bias, keep sign
+          REG_FP[dst].high |= 0x3fff;	// set new bias, 1.0 <= result < 2.0
+          SET_CONDITION_CODES(REG_FP[dst]);
+          //USE_CYCLES(6);
+        }
+
+
+      case 0x60:		// FSDIVS (JFF) (source has already been converted to floatx80)
+      case 0x20:		// FDIV
+	{
+	  REG_FP[dst] = floatx80_div(REG_FP[dst], source);
+	  SET_CONDITION_CODES(REG_FP[dst]); // JFF
+	  USE_CYCLES(43);
+	  break;
+	}
+      case 0x22:		// FADD
+	{
+	  REG_FP[dst] = floatx80_add(REG_FP[dst], source);
+	  SET_CONDITION_CODES(REG_FP[dst]);
+	  USE_CYCLES(9);
+	  break;
+	}
+ 
+      case 0x63:		// FSMULS (JFF) (source has already been converted to floatx80)
+      case 0x23:		// FMUL
+	{
+	  REG_FP[dst] = floatx80_mul(REG_FP[dst], source);
+	  SET_CONDITION_CODES(REG_FP[dst]);
+	  USE_CYCLES(11);  // maybe number of cycles is slightly inferior for FSMULS
+	  break;
+	}
+      case 0x25:		// FREM
+	{
+	  REG_FP[dst] = floatx80_rem(REG_FP[dst], source);
+	  SET_CONDITION_CODES(REG_FP[dst]);
+	  USE_CYCLES(43);	// guess
+	  break;
+	}
+      case 0x28:		// FSUB
+	{
+	  REG_FP[dst] = floatx80_sub(REG_FP[dst], source);
+	  SET_CONDITION_CODES(REG_FP[dst]);
+	  USE_CYCLES(9);
+	  break;
+	}
+      case 0x38:		// FCMP
+	{
+	  floatx80 res;
+	  res = floatx80_sub(REG_FP[dst], source);
+	  SET_CONDITION_CODES(res);
+	  USE_CYCLES(7);
+	  break;
+	}
+      case 0x3a:		// FTST
+	{
+	  floatx80 res;
+	  res = source;
+	  SET_CONDITION_CODES(res);
+	  USE_CYCLES(7);
+	  break;
+	}
+
+      default:	fatalerror("fpgen_rm_reg: unimplemented opmode %02X at %08X\n", opmode, REG_PC-4);
+    }
+}
+
+static void fmove_reg_mem(uint16 w2)
+{
+  int ea = REG_IR & 0x3f;
+  int src = (w2 >>  7) & 0x7;
+  int dst = (w2 >> 10) & 0x7;
+  int k = (w2 & 0x7f);
+
+  switch (dst)
+    {
+      case 0:		// Long-Word Integer
+	{
+	  sint32 d = (sint32)floatx80_to_int32(REG_FP[src]);
+	  WRITE_EA_32(ea, d);
+	  break;
+	}
+      case 1:		// Single-precision Real
+	{
+	  uint32 d = floatx80_to_float32(REG_FP[src]);
+	  WRITE_EA_32(ea, d);
+	  break;
+	}
+      case 2:		// Extended-precision Real
+	{
+	  int mode = (ea >> 3) & 0x7;
+	  int reg = (ea & 0x7);
+	  uint32 di_mode_ea = mode == 5 ? (REG_A[reg]+MAKE_INT_16(m68ki_read_imm_16())) : 0;
+	    		      
+	  WRITE_EA_FPE(mode, reg, REG_FP[src], di_mode_ea);
+	  break;
+	}
+      case 3:		// Packed-decimal Real with Static K-factor
+	{
+	  // sign-extend k
+	  k = (k & 0x40) ? (k | 0xffffff80) : (k & 0x7f);
+	  WRITE_EA_PACK(ea, k, REG_FP[src]);
+	  break;
+	}
+      case 4:		// Word Integer
+	{
+	  WRITE_EA_16(ea, (sint16)floatx80_to_int32(REG_FP[src]));
+	  break;
+	}
+      case 5:		// Double-precision Real
+	{
+	  uint64 d;
+
+	  d = floatx80_to_float64(REG_FP[src]);
+
+	  WRITE_EA_64(ea, d);
+	  break;
+	}
+      case 6:		// Byte Integer
+	{
+	  WRITE_EA_8(ea, (sint8)floatx80_to_int32(REG_FP[src]));
+	  break;
+	}
+      case 7:		// Packed-decimal Real with Dynamic K-factor
+	{
+	  WRITE_EA_PACK(ea, REG_D[k>>4], REG_FP[src]);
+	  break;
+	}
+    }
+
+  USE_CYCLES(12);
+}
+
+static void fmove_fpcr(uint16 w2)
+{
+  int ea = REG_IR & 0x3f;
+  int dir = (w2 >> 13) & 0x1;
+  int reg = (w2 >> 10) & 0x7;
+
+  if (dir)	// From system control reg to <ea>
+    {
+      if (reg & 4) WRITE_EA_32(ea, REG_FPCR);
+      if (reg & 2) WRITE_EA_32(ea, REG_FPSR);
+      if (reg & 1) WRITE_EA_32(ea, REG_FPIAR);
+    }
+  else		// From <ea> to system control reg
+    {
+      if (reg & 4) 
+	{
+	  REG_FPCR = READ_EA_32(ea);
+	  // JFF: need to update rounding mode from softfloat module
+	  float_rounding_mode = (REG_FPCR >> 4) & 0x3;
+	  
+	}
+	  
+      if (reg & 2) REG_FPSR = READ_EA_32(ea);
+      if (reg & 1) REG_FPIAR = READ_EA_32(ea);
+    }
+
+  USE_CYCLES(10);
+}
+
+static void fmovem(uint16 w2)
+{
+  int i;
+  int ea = REG_IR & 0x3f;
+  int dir = (w2 >> 13) & 0x1;
+  int mode = (w2 >> 11) & 0x3;
+  int reglist = w2 & 0xff;
+
+  if (dir)	// From FP regs to mem
+    {
+      /* Mode field: Specifies the type of the register list and addressing mode.
+	 1 & 3 are unsupported right now
+	 1  Dynamic register list, predecrement addressing mode.
+	   3  Dynamic register list, postincrement or control addressing mode.
+       */
+      
+      switch (mode)
+	{
+	  case 2:		// (JFF): Static register list, postincrement or control addressing mode.     
+	    {
+	      int imode = (ea >> 3) & 0x7;
+	      int reg = (ea & 0x7);
+	      int di_mode = imode == 5;	      
+	      
+	      uint32 di_mode_ea = di_mode ? (REG_A[reg]+MAKE_INT_16(m68ki_read_imm_16())) : 0;
+	    		      
+		      
+	      for (i=0; i < 8; i++)
+		{
+		  if (reglist & (1 << i))
+		    {
+		      WRITE_EA_FPE(imode,reg, REG_FP[7-i],di_mode_ea);
+		      USE_CYCLES(2);
+		      if (di_mode)
+			{
+			  di_mode_ea += 12;
+			}
+		      
+		    }
+		}
+	      break;
+	    }
+	  case 0:		// Static register list, predecrement addressing mode
+	    {
+	      int imode = (ea >> 3) & 0x7;
+	      int reg = (ea & 0x7);
+	      // the "di_mode_ea" parameter kludge is required here else WRITE_EA_FPE would have
+	      // to call EA_AY_DI_32() (that advances PC & reads displacement) each time
+	      // when the proper behaviour is 1) read once, 2) increment ea for each matching register
+	      // this forces to pre-read the mode (named "imode") so we can decide to read displacement, only once
+	      int di_mode = imode == 5;	      
+	      uint32 di_mode_ea =  di_mode ? (REG_A[reg]+MAKE_INT_16(m68ki_read_imm_16())) : 0;
+
+	      for (i=0; i < 8; i++)
+		{
+		  if (reglist & (1 << i))
+		    {
+		      WRITE_EA_FPE(imode,reg, REG_FP[i],di_mode_ea);
+		      USE_CYCLES(2);
+		      if (di_mode)
+			{
+			  di_mode_ea += 12;
+			}
+		    }
+		}
+	      break;
+	    }
+
+	  default:	fatalerror("040fpu0: FMOVEM: mode %d unimplemented at %08X\n", mode, REG_PC-4);
+	}
+    }
+  else		// From mem to FP regs
+    {
+      switch (mode)
+	{
+	  case 2:		// Static register list, postincrement addressing mode
+	    {
+	      int imode = (ea >> 3) & 0x7;
+	      int reg = (ea & 0x7);
+	      int di_mode = imode == 5;	    
+	      uint32 di_mode_ea = di_mode ? (REG_A[reg]+MAKE_INT_16(m68ki_read_imm_16())) : 0;
+	      
+	      for (i=0; i < 8; i++)
+		{
+		  if (reglist & (1 << i))
+		    {
+		      REG_FP[7-i] = READ_EA_FPE(imode,reg,di_mode_ea);
+		      USE_CYCLES(2);
+		      if (di_mode)
+			{
+			  di_mode_ea += 12;
+			}
+		      
+		    }
+		}
+	      break;
+	    }
+
+	  default:	fatalerror("040fpu0: FMOVEM: mode %d unimplemented at %08X\n", mode, REG_PC-4);
+	}
+    }
+}
+
+
+static void fscc()
+{
+  // added by JFF, this seems to work properly now 
+  int condition = OPER_I_16() & 0x3f;
+
+  int cc = TEST_CONDITION(condition);
+  
+  if ((REG_IR & 0x38) == 0)
+    {
+      // If the specified floating-point condition is true, sets the byte integer operand at
+      // the destination to TRUE (all ones); otherwise, sets the byte to FALSE (all zeros).
+      
+      REG_D[REG_IR & 7] = (REG_D[REG_IR & 7] & 0xFFFFFF00) | (cc ? 0xff : 0x00);
+    }
+  else
+    {
+      // unimplemented
+      fatalerror("040fpu0: fscc: memory not implemented at %08X\n", REG_PC-4);
+    }
+  USE_CYCLES(7);  // JFF unsure of the number of cycles!!
+}
+
+static void fbcc16(void)
+{
+  sint32 offset;
+  int condition = REG_IR & 0x3f;
+
+  offset = (sint16)(OPER_I_16());
+
+  // TODO: condition and jump!!!
+  if (TEST_CONDITION(condition))
+    {
+      m68ki_trace_t0();			   /* auto-disable (see m68kcpu.h) */
+      m68ki_branch_16(offset-2);
+    }
+
+  USE_CYCLES(7);
+}
+
+static void fbcc32(void)
+{
+  sint32 offset;
+  int condition = REG_IR & 0x3f;
+
+  offset = OPER_I_32();
+
+  // TODO: condition and jump!!!
+  if (TEST_CONDITION(condition))
+    {
+      m68ki_trace_t0();			   /* auto-disable (see m68kcpu.h) */
+      m68ki_branch_32(offset-4);
+    }
+
+  USE_CYCLES(7);
+}
+
+
+void m68040_fpu_op0()
+{
+  m68ki_cpu.fpu_just_reset = 0;
+
+  switch ((REG_IR >> 6) & 0x3)
+    {
+      case 0:
+	{
+   	uint16 w2 = OPER_I_16();
+	  switch ((w2 >> 13) & 0x7)
+	    {
+	      case 0x0:	// FPU ALU FP, FP
+	      case 0x2:	// FPU ALU ea, FP
+		{
+		  fpgen_rm_reg(w2);
+		  break;
+		}
+
+	      case 0x3:	// FMOVE FP, ea
+		{
+		  fmove_reg_mem(w2);
+		  break;
+		}
+
+	      case 0x4:	// FMOVEM ea, FPCR
+	      case 0x5:	// FMOVEM FPCR, ea
+		{
+		  fmove_fpcr(w2);
+		  break;
+		}
+
+	      case 0x6:	// FMOVEM ea, list
+	      case 0x7:	// FMOVEM list, ea
+		{
+		  fmovem(w2);
+		  break;
+		}
+
+	      default:	fatalerror("M68kFPU: unimplemented subop %d at %08X\n", (w2 >> 13) & 0x7, REG_PC-4);
+	    }
+	  break;
+	}
+
+      case 1:           // FScc (JFF)
+	{
+	  fscc();
+	  break;
+	}
+      case 2:		// FBcc disp16
+	{
+	  fbcc16();
+	  break;
+	}
+      case 3:		// FBcc disp32
+	{
+	  fbcc32();
+	  break;
+	}
+
+      default:	fatalerror("M68kFPU: unimplemented main op %d at %08X\n", (m68ki_cpu.ir >> 6) & 0x3,  REG_PC-4);
+    }
+}
+
+static void perform_fsave(uint32 addr, int inc)
+{
+  if (inc)
+    {
+      // 68881 IDLE, version 0x1f
+      m68ki_write_32(addr, 0x1f180000);
+      m68ki_write_32(addr+4, 0);
+      m68ki_write_32(addr+8, 0);
+      m68ki_write_32(addr+12, 0);
+      m68ki_write_32(addr+16, 0);
+      m68ki_write_32(addr+20, 0);
+      m68ki_write_32(addr+24, 0x70000000);
+    }
+  else
+    {
+      m68ki_write_32(addr, 0x70000000);
+      m68ki_write_32(addr-4, 0);
+      m68ki_write_32(addr-8, 0);
+      m68ki_write_32(addr-12, 0);
+      m68ki_write_32(addr-16, 0);
+      m68ki_write_32(addr-20, 0);
+      m68ki_write_32(addr-24, 0x1f180000);
+    }
+}
+
+// FRESTORE on a NULL frame reboots the FPU - all registers to NaN, the 3 status regs to 0
+static void do_frestore_null()
+{
+  int i;
+
+  REG_FPCR = 0;
+  REG_FPSR = 0;
+  REG_FPIAR = 0;
+  for (i = 0; i < 8; i++)
+    {
+      REG_FP[i].high = 0x7fff;
+      REG_FP[i].low = U64(0xffffffffffffffff);
+    }
+
+  // Mac IIci at 408458e6 wants an FSAVE of a just-restored NULL frame to also be NULL
+  // The PRM says it's possible to generate a NULL frame, but not how/when/why.  (need the 68881/68882 manual!)
+  m68ki_cpu.fpu_just_reset = 1;
+}
+
+void m68040_fpu_op1()
+{
+  int ea = REG_IR & 0x3f;
+  int mode = (ea >> 3) & 0x7;
+  int reg = (ea & 0x7);
+  uint32 addr, temp;
+
+  switch ((REG_IR >> 6) & 0x3)
+    {
+      case 0:		// FSAVE <ea>
+	{
+	  switch (mode)
+	    {
+	      case 3:	// (An)+
+		addr = EA_AY_PI_32();
+
+		if (m68ki_cpu.fpu_just_reset)
+		  {
+		    m68ki_write_32(addr, 0);
+		  }
+		else
+		  {
+		    // we normally generate an IDLE frame
+		    REG_A[reg] += 6*4;
+		    perform_fsave(addr, 1);
+		  }
+		break;
+
+	      case 4: // -(An)
+		addr = EA_AY_PD_32();
+
+		if (m68ki_cpu.fpu_just_reset)
+		  {
+		    m68ki_write_32(addr, 0);
+		  }
+		else
+		  {
+		    // we normally generate an IDLE frame
+		    REG_A[reg] -= 6*4;
+		    perform_fsave(addr, 0);
+		  }
+		break;
+
+	      default:
+		fatalerror("M68kFPU: FSAVE unhandled mode %d reg %d at %x\n", mode, reg, REG_PC);
+	    }
+	  break;
+	}
+	break;
+
+      case 1:		// FRESTORE <ea>
+	{
+	  switch (mode)
+	    {
+	      case 2: // (An)
+		addr = REG_A[reg];
+		temp = m68ki_read_32(addr);
+
+		// check for NULL frame
+		if (temp & 0xff000000)
+		  {
+		    // we don't handle non-NULL frames and there's no pre/post inc/dec to do here
+		    m68ki_cpu.fpu_just_reset = 0;
+		  }
+		else
+		  {
+		    do_frestore_null();
+		  }
+		break;
+
+	      case 3:	// (An)+
+		addr = EA_AY_PI_32();
+		temp = m68ki_read_32(addr);
+
+		// check for NULL frame
+		if (temp & 0xff000000)
+		  {
+		    m68ki_cpu.fpu_just_reset = 0;
+
+		    // how about an IDLE frame?
+		    if ((temp & 0x00ff0000) == 0x00180000)
+		      {
+			REG_A[reg] += 6*4;
+		      } // check UNIMP
+		    else if ((temp & 0x00ff0000) == 0x00380000)
+		      {
+			REG_A[reg] += 14*4;
+		      } // check BUSY
+		    else if ((temp & 0x00ff0000) == 0x00b40000)
+		      {
+			REG_A[reg] += 45*4;
+		      }
+		  }
+		else
+		  {
+		    do_frestore_null();
+		  }
+		break;
+
+	      default:
+		fatalerror("M68kFPU: FRESTORE unhandled mode %d reg %d at %x\n", mode, reg, REG_PC);
+	    }
+	  break;
+	}
+	break;
+
+      default:	fatalerror("m68040_fpu_op1: unimplemented op %d at %08X\n", (REG_IR >> 6) & 0x3, REG_PC-2);
+    }
+}
+
+
+
diff --git a/plat/linux68k/emu/musashi/m68kmake.c b/plat/linux68k/emu/musashi/m68kmake.c
index a3fb0d9d4..0b1d95155 100755
--- a/plat/linux68k/emu/musashi/m68kmake.c
+++ b/plat/linux68k/emu/musashi/m68kmake.c
@@ -3,10 +3,11 @@
 /* ======================================================================== */
 /*
  *                                  MUSASHI
- *                                Version 3.4
+ *                                Version 4.60
  *
  * A portable Motorola M680x0 processor emulation engine.
- * Copyright 1998-2001 Karl Stenerud.  All rights reserved.
+ * Copyright Karl Stenerud.  All rights reserved.
+ * FPU and MMU by R. Belmont.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -56,7 +57,7 @@
  */
 
 
-char* g_version = "3.3";
+static const char g_version[] = "4.60";
 
 /* ======================================================================== */
 /* =============================== INCLUDES =============================== */
@@ -77,7 +78,6 @@ char* g_version = "3.3";
 #define M68K_MAX_PATH 1024
 #define M68K_MAX_DIR  1024
 
-#define NUM_CPUS                          3	/* 000, 010, 020 */
 #define MAX_LINE_LENGTH                 200	/* length of 1 line */
 #define MAX_BODY_LENGTH                 300	/* Number of lines in 1 function */
 #define MAX_REPLACE_LENGTH               30	/* Max number of replace strings */
@@ -93,9 +93,6 @@ char* g_version = "3.3";
 #define FILENAME_INPUT      "m68k_in.c"
 #define FILENAME_PROTOTYPE  "m68kops.h"
 #define FILENAME_TABLE      "m68kops.c"
-#define FILENAME_OPS_AC     "m68kopac.c"
-#define FILENAME_OPS_DM     "m68kopdm.c"
-#define FILENAME_OPS_NZ     "m68kopnz.c"
 
 
 /* Identifier sequences recognized by this program */
@@ -135,9 +132,14 @@ char* g_version = "3.3";
 /* ============================== PROTOTYPES ============================== */
 /* ======================================================================== */
 
-#define CPU_TYPE_000 0
-#define CPU_TYPE_010 1
-#define CPU_TYPE_020 2
+enum {
+    CPU_TYPE_000=0,
+    CPU_TYPE_010,
+    CPU_TYPE_020,
+    CPU_TYPE_030,
+    CPU_TYPE_040,
+    NUM_CPUS
+};
 
 #define UNSPECIFIED "."
 #define UNSPECIFIED_CH '.'
@@ -185,7 +187,7 @@ typedef struct
 	char ea_allowed[EA_ALLOWED_LENGTH];   /* Effective addressing modes allowed */
 	char cpu_mode[NUM_CPUS];              /* User or supervisor mode */
 	char cpus[NUM_CPUS+1];                /* Allowed CPUs */
-	unsigned char cycles[NUM_CPUS];       /* cycles for 000, 010, 020 */
+	unsigned char cycles[NUM_CPUS];       /* cycles for 000, 010, 020, 030, 040 */
 } opcode_struct;
 
 
@@ -231,7 +233,6 @@ int extract_opcode_info(char* src, char* name, int* size, char* spec_proc, char*
 void add_replace_string(replace_struct* replace, char* search_str, char* replace_str);
 void write_body(FILE* filep, body_struct* body, replace_struct* replace);
 void get_base_name(char* base_name, opcode_struct* op);
-void write_prototype(FILE* filep, char* base_name);
 void write_function_name(FILE* filep, char* base_name);
 void add_opcode_output_table_entry(opcode_struct* op, char* name);
 static int DECL_SPEC compare_nof_true_bits(const void* aptr, const void* bptr);
@@ -241,7 +242,7 @@ void set_opcode_struct(opcode_struct* src, opcode_struct* dst, int ea_mode);
 void generate_opcode_handler(FILE* filep, body_struct* body, replace_struct* replace, opcode_struct* opinfo, int ea_mode);
 void generate_opcode_ea_variants(FILE* filep, body_struct* body, replace_struct* replace, opcode_struct* op);
 void generate_opcode_cc_variants(FILE* filep, body_struct* body, replace_struct* replace, opcode_struct* op_in, int offset);
-void process_opcode_handlers(void);
+void process_opcode_handlers(FILE* filep);
 void populate_table(void);
 void read_insert(char* insert);
 
@@ -258,9 +259,6 @@ char g_input_filename[M68K_MAX_PATH] = FILENAME_INPUT;
 FILE* g_input_file = NULL;
 FILE* g_prototype_file = NULL;
 FILE* g_table_file = NULL;
-FILE* g_ops_ac_file = NULL;
-FILE* g_ops_dm_file = NULL;
-FILE* g_ops_nz_file = NULL;
 
 int g_num_functions = 0;  /* Number of functions processed */
 int g_num_primitives = 0; /* Number of function primitives read */
@@ -272,7 +270,7 @@ opcode_struct g_opcode_input_table[MAX_OPCODE_INPUT_TABLE_LENGTH];
 opcode_struct g_opcode_output_table[MAX_OPCODE_OUTPUT_TABLE_LENGTH];
 int g_opcode_output_table_length = 0;
 
-ea_info_struct g_ea_info_table[13] =
+const ea_info_struct g_ea_info_table[13] =
 {/* fname    ea        mask  match */
 	{"",     "",       0x00, 0x00}, /* EA_MODE_NONE */
 	{"ai",   "AY_AI",  0x38, 0x10}, /* EA_MODE_AI   */
@@ -290,7 +288,7 @@ ea_info_struct g_ea_info_table[13] =
 };
 
 
-char* g_cc_table[16][2] =
+const char *const g_cc_table[16][2] =
 {
 	{ "t",  "T"}, /* 0000 */
 	{ "f",  "F"}, /* 0001 */
@@ -311,7 +309,7 @@ char* g_cc_table[16][2] =
 };
 
 /* size to index translator (0 -> 0, 8 and 16 -> 1, 32 -> 2) */
-int g_size_select_table[33] =
+const int g_size_select_table[33] =
 {
 	0,												/* unsized */
 	0, 0, 0, 0, 0, 0, 0, 1,							/*    8    */
@@ -320,25 +318,26 @@ int g_size_select_table[33] =
 };
 
 /* Extra cycles required for certain EA modes */
-int g_ea_cycle_table[13][NUM_CPUS][3] =
-{/*       000           010           020   */
-	{{ 0,  0,  0}, { 0,  0,  0}, { 0,  0,  0}}, /* EA_MODE_NONE */
-	{{ 0,  4,  8}, { 0,  4,  8}, { 0,  4,  4}}, /* EA_MODE_AI   */
-	{{ 0,  4,  8}, { 0,  4,  8}, { 0,  4,  4}}, /* EA_MODE_PI   */
-	{{ 0,  4,  8}, { 0,  4,  8}, { 0,  4,  4}}, /* EA_MODE_PI7  */
-	{{ 0,  6, 10}, { 0,  6, 10}, { 0,  5,  5}}, /* EA_MODE_PD   */
-	{{ 0,  6, 10}, { 0,  6, 10}, { 0,  5,  5}}, /* EA_MODE_PD7  */
-	{{ 0,  8, 12}, { 0,  8, 12}, { 0,  5,  5}}, /* EA_MODE_DI   */
-	{{ 0, 10, 14}, { 0, 10, 14}, { 0,  7,  7}}, /* EA_MODE_IX   */
-	{{ 0,  8, 12}, { 0,  8, 12}, { 0,  4,  4}}, /* EA_MODE_AW   */
-	{{ 0, 12, 16}, { 0, 12, 16}, { 0,  4,  4}}, /* EA_MODE_AL   */
-	{{ 0,  8, 12}, { 0,  8, 12}, { 0,  5,  5}}, /* EA_MODE_PCDI */
-	{{ 0, 10, 14}, { 0, 10, 14}, { 0,  7,  7}}, /* EA_MODE_PCIX */
-	{{ 0,  4,  8}, { 0,  4,  8}, { 0,  2,  4}}, /* EA_MODE_I    */
+/* TODO: correct timings for 030, 040 */
+const int g_ea_cycle_table[13][NUM_CPUS][3] =
+{/*       000           010           020           030           040  */
+	{{ 0,  0,  0}, { 0,  0,  0}, { 0,  0,  0}, { 0,  0,  0}, { 0,  0,  0}}, /* EA_MODE_NONE */
+	{{ 0,  4,  8}, { 0,  4,  8}, { 0,  4,  4}, { 0,  4,  4}, { 0,  4,  4}}, /* EA_MODE_AI   */
+	{{ 0,  4,  8}, { 0,  4,  8}, { 0,  4,  4}, { 0,  4,  4}, { 0,  4,  4}}, /* EA_MODE_PI   */
+	{{ 0,  4,  8}, { 0,  4,  8}, { 0,  4,  4}, { 0,  4,  4}, { 0,  4,  4}}, /* EA_MODE_PI7  */
+	{{ 0,  6, 10}, { 0,  6, 10}, { 0,  5,  5}, { 0,  5,  5}, { 0,  5,  5}}, /* EA_MODE_PD   */
+	{{ 0,  6, 10}, { 0,  6, 10}, { 0,  5,  5}, { 0,  5,  5}, { 0,  5,  5}}, /* EA_MODE_PD7  */
+	{{ 0,  8, 12}, { 0,  8, 12}, { 0,  5,  5}, { 0,  5,  5}, { 0,  5,  5}}, /* EA_MODE_DI   */
+	{{ 0, 10, 14}, { 0, 10, 14}, { 0,  7,  7}, { 0,  7,  7}, { 0,  7,  7}}, /* EA_MODE_IX   */
+	{{ 0,  8, 12}, { 0,  8, 12}, { 0,  4,  4}, { 0,  4,  4}, { 0,  4,  4}}, /* EA_MODE_AW   */
+	{{ 0, 12, 16}, { 0, 12, 16}, { 0,  4,  4}, { 0,  4,  4}, { 0,  4,  4}}, /* EA_MODE_AL   */
+	{{ 0,  8, 12}, { 0,  8, 12}, { 0,  5,  5}, { 0,  5,  5}, { 0,  5,  5}}, /* EA_MODE_PCDI */
+	{{ 0, 10, 14}, { 0, 10, 14}, { 0,  7,  7}, { 0,  7,  7}, { 0,  7,  7}}, /* EA_MODE_PCIX */
+	{{ 0,  4,  8}, { 0,  4,  8}, { 0,  2,  4}, { 0,  2,  4}, { 0,  2,  4}}, /* EA_MODE_I    */
 };
 
 /* Extra cycles for JMP instruction (000, 010) */
-int g_jmp_cycle_table[13] =
+const int g_jmp_cycle_table[13] =
 {
 	 0, /* EA_MODE_NONE */
 	 4, /* EA_MODE_AI   */
@@ -356,7 +355,7 @@ int g_jmp_cycle_table[13] =
 };
 
 /* Extra cycles for JSR instruction (000, 010) */
-int g_jsr_cycle_table[13] =
+const int g_jsr_cycle_table[13] =
 {
 	 0, /* EA_MODE_NONE */
 	 4, /* EA_MODE_AI   */
@@ -374,7 +373,7 @@ int g_jsr_cycle_table[13] =
 };
 
 /* Extra cycles for LEA instruction (000, 010) */
-int g_lea_cycle_table[13] =
+const int g_lea_cycle_table[13] =
 {
 	 0, /* EA_MODE_NONE */
 	 4, /* EA_MODE_AI   */
@@ -392,7 +391,7 @@ int g_lea_cycle_table[13] =
 };
 
 /* Extra cycles for PEA instruction (000, 010) */
-int g_pea_cycle_table[13] =
+const int g_pea_cycle_table[13] =
 {
 	 0, /* EA_MODE_NONE */
 	 6, /* EA_MODE_AI   */
@@ -409,8 +408,26 @@ int g_pea_cycle_table[13] =
 	 0, /* EA_MODE_I    */
 };
 
+/* Extra cycles for MOVEM instruction (000, 010) */
+const int g_movem_cycle_table[13] =
+{
+	 0, /* EA_MODE_NONE */
+	 0, /* EA_MODE_AI   */
+	 0, /* EA_MODE_PI   */
+	 0, /* EA_MODE_PI7  */
+	 0, /* EA_MODE_PD   */
+	 0, /* EA_MODE_PD7  */
+	 4, /* EA_MODE_DI   */
+	 6, /* EA_MODE_IX   */
+	 4, /* EA_MODE_AW   */
+	 8, /* EA_MODE_AL   */
+	 0, /* EA_MODE_PCDI */
+	 0, /* EA_MODE_PCIX */
+	 0, /* EA_MODE_I    */
+};
+
 /* Extra cycles for MOVES instruction (010) */
-int g_moves_cycle_table[13][3] =
+const int g_moves_cycle_table[13][3] =
 {
 	{ 0,  0,  0}, /* EA_MODE_NONE */
 	{ 0,  4,  6}, /* EA_MODE_AI   */
@@ -428,7 +445,7 @@ int g_moves_cycle_table[13][3] =
 };
 
 /* Extra cycles for CLR instruction (010) */
-int g_clr_cycle_table[13][3] =
+const int g_clr_cycle_table[13][3] =
 {
 	{ 0,  0,  0}, /* EA_MODE_NONE */
 	{ 0,  4,  6}, /* EA_MODE_AI   */
@@ -463,9 +480,6 @@ void error_exit(char* fmt, ...)
 
 	if(g_prototype_file) fclose(g_prototype_file);
 	if(g_table_file) fclose(g_table_file);
-	if(g_ops_ac_file) fclose(g_ops_ac_file);
-	if(g_ops_dm_file) fclose(g_ops_dm_file);
-	if(g_ops_nz_file) fclose(g_ops_nz_file);
 	if(g_input_file) fclose(g_input_file);
 
 	exit(EXIT_FAILURE);
@@ -482,9 +496,6 @@ void perror_exit(char* fmt, ...)
 
 	if(g_prototype_file) fclose(g_prototype_file);
 	if(g_table_file) fclose(g_table_file);
-	if(g_ops_ac_file) fclose(g_ops_ac_file);
-	if(g_ops_dm_file) fclose(g_ops_dm_file);
-	if(g_ops_nz_file) fclose(g_ops_nz_file);
 	if(g_input_file) fclose(g_input_file);
 
 	exit(EXIT_FAILURE);
@@ -586,7 +597,7 @@ int fgetline(char* buff, int nchars, FILE* file)
 	if(fgets(buff, nchars, file) == NULL)
 		return -1;
 	if(buff[0] == '\r')
-		memcpy(buff, buff + 1, nchars - 1);
+		memmove(buff, buff + 1, nchars - 1);
 
 	length = strlen(buff);
 	while(length && (buff[length-1] == '\r' || buff[length-1] == '\n'))
@@ -639,6 +650,8 @@ int get_oper_cycles(opcode_struct* op, int ea_mode, int cpu_type)
 			return op->cycles[cpu_type] + g_lea_cycle_table[ea_mode];
 		if(strcmp(op->name, "pea") == 0)
 			return op->cycles[cpu_type] + g_pea_cycle_table[ea_mode];
+		if(strcmp(op->name, "movem") == 0)
+			return op->cycles[cpu_type] + g_movem_cycle_table[ea_mode];
 	}
 	return op->cycles[cpu_type] + g_ea_cycle_table[ea_mode][cpu_type][size];
 }
@@ -649,7 +662,7 @@ opcode_struct* find_opcode(char* name, int size, char* spec_proc, char* spec_ea)
 	opcode_struct* op;
 
 
-	for(op = g_opcode_input_table;op->name != NULL;op++)
+	for(op = g_opcode_input_table;op < &g_opcode_input_table[MAX_OPCODE_INPUT_TABLE_LENGTH];op++)
 	{
 		if(	strcmp(name, op->name) == 0 &&
 			(size == op->size) &&
@@ -665,7 +678,7 @@ opcode_struct* find_illegal_opcode(void)
 {
 	opcode_struct* op;
 
-	for(op = g_opcode_input_table;op->name != NULL;op++)
+	for(op = g_opcode_input_table;op < &g_opcode_input_table[MAX_OPCODE_INPUT_TABLE_LENGTH];op++)
 	{
 		if(strcmp(op->name, "illegal") == 0)
 			return op;
@@ -750,7 +763,7 @@ void write_body(FILE* filep, body_struct* body, replace_struct* replace)
 			}
 			/* Found a directive with no matching replace string */
 			if(!found)
-				error_exit("Unknown " ID_BASE " directive");
+				error_exit("Unknown " ID_BASE " directive [%s]", output);
 		}
 		fprintf(filep, "%s\n", output);
 	}
@@ -769,16 +782,10 @@ void get_base_name(char* base_name, opcode_struct* op)
 		sprintf(base_name+strlen(base_name), "_%s", op->spec_ea);
 }
 
-/* Write the prototype of an opcode handler function */
-void write_prototype(FILE* filep, char* base_name)
-{
-	fprintf(filep, "void %s(void);\n", base_name);
-}
-
 /* Write the name of an opcode handler function */
 void write_function_name(FILE* filep, char* base_name)
 {
-	fprintf(filep, "void %s(void)\n", base_name);
+	fprintf(filep, "static void %s(void)\n", base_name);
 }
 
 void add_opcode_output_table_entry(opcode_struct* op, char* name)
@@ -861,7 +868,6 @@ void generate_opcode_handler(FILE* filep, body_struct* body, replace_struct* rep
 	/* Set the opcode structure and write the tables, prototypes, etc */
 	set_opcode_struct(opinfo, op, ea_mode);
 	get_base_name(str, op);
-	write_prototype(g_prototype_file, str);
 	add_opcode_output_table_entry(op, str);
 	write_function_name(filep, str);
 
@@ -980,10 +986,9 @@ void generate_opcode_cc_variants(FILE* filep, body_struct* body, replace_struct*
 }
 
 /* Process the opcode handlers section of the input file */
-void process_opcode_handlers(void)
+void process_opcode_handlers(FILE* filep)
 {
 	FILE* input_file = g_input_file;
-	FILE* output_file;
 	char func_name[MAX_LINE_LENGTH+1];
 	char oper_name[MAX_LINE_LENGTH+1];
 	int  oper_size;
@@ -993,9 +998,6 @@ void process_opcode_handlers(void)
 	replace_struct* replace = malloc(sizeof(replace_struct));
 	body_struct* body = malloc(sizeof(body_struct));
 
-
-	output_file = g_ops_ac_file;
-
 	for(;;)
 	{
 		/* Find the first line of the function */
@@ -1038,23 +1040,17 @@ void process_opcode_handlers(void)
 		if(opinfo == NULL)
 			error_exit("Unable to find matching table entry for %s", func_name);
 
-        /* Change output files if we pass 'c' or 'n' */
-		if(output_file == g_ops_ac_file && oper_name[0] > 'c')
-			output_file = g_ops_dm_file;
-		else if(output_file == g_ops_dm_file && oper_name[0] > 'm')
-			output_file = g_ops_nz_file;
-
 		replace->length = 0;
 
 		/* Generate opcode variants */
 		if(strcmp(opinfo->name, "bcc") == 0 || strcmp(opinfo->name, "scc") == 0)
-			generate_opcode_cc_variants(output_file, body, replace, opinfo, 1);
+			generate_opcode_cc_variants(filep, body, replace, opinfo, 1);
 		else if(strcmp(opinfo->name, "dbcc") == 0)
-			generate_opcode_cc_variants(output_file, body, replace, opinfo, 2);
+			generate_opcode_cc_variants(filep, body, replace, opinfo, 2);
 		else if(strcmp(opinfo->name, "trapcc") == 0)
-			generate_opcode_cc_variants(output_file, body, replace, opinfo, 4);
+			generate_opcode_cc_variants(filep, body, replace, opinfo, 4);
 		else
-			generate_opcode_ea_variants(output_file, body, replace, opinfo);
+			generate_opcode_ea_variants(filep, body, replace, opinfo);
 	}
 
 	free(replace);
@@ -1077,13 +1073,13 @@ void populate_table(void)
 	/* Find the start of the table */
 	while(strcmp(buff, ID_TABLE_START) != 0)
 		if(fgetline(buff, MAX_LINE_LENGTH, g_input_file) < 0)
-			error_exit("Premature EOF while reading table");
+			error_exit("(table_start) Premature EOF while reading table");
 
 	/* Process the entire table */
 	for(op = g_opcode_input_table;;op++)
 	{
 		if(fgetline(buff, MAX_LINE_LENGTH, g_input_file) < 0)
-			error_exit("Premature EOF while reading table");
+			error_exit("(inline) Premature EOF while reading table");
 		if(strlen(buff) == 0)
 			continue;
 		/* We finish when we find an input separator */
@@ -1221,13 +1217,15 @@ int main(int argc, char **argv)
 {
 	/* File stuff */
 	char output_path[M68K_MAX_DIR] = "";
-	char filename[M68K_MAX_PATH];
+        char filename[M68K_MAX_PATH*2];
 	/* Section identifier */
 	char section_id[MAX_LINE_LENGTH+1];
 	/* Inserts */
 	char temp_insert[MAX_INSERT_LENGTH+1];
 	char prototype_footer_insert[MAX_INSERT_LENGTH+1];
+	char table_header_insert[MAX_INSERT_LENGTH+1];
 	char table_footer_insert[MAX_INSERT_LENGTH+1];
+	char ophandler_header_insert[MAX_INSERT_LENGTH+1];
 	char ophandler_footer_insert[MAX_INSERT_LENGTH+1];
 	/* Flags if we've processed certain parts already */
 	int prototype_header_read = 0;
@@ -1239,8 +1237,8 @@ int main(int argc, char **argv)
 	int table_body_read = 0;
 	int ophandler_body_read = 0;
 
-	printf("\n\t\tMusashi v%s 68000, 68010, 68EC020, 68020 emulator\n", g_version);
-	printf("\t\tCopyright 1998-2000 Karl Stenerud (karl@mame.net)\n\n");
+	printf("\n\tMusashi v%s 68000, 68008, 68010, 68EC020, 68020, 68EC030, 68030, 68EC040, 68040 emulator\n", g_version);
+	printf("\t\tCopyright Karl Stenerud (kstenerud@gmail.com)\n\n");
 
 	/* Check if output path and source for the input file are given */
     if(argc > 1)
@@ -1266,18 +1264,6 @@ int main(int argc, char **argv)
 	if((g_table_file = fopen(filename, "wt")) == NULL)
 		perror_exit("Unable to create table file (%s)\n", filename);
 
-	sprintf(filename, "%s%s", output_path, FILENAME_OPS_AC);
-	if((g_ops_ac_file = fopen(filename, "wt")) == NULL)
-		perror_exit("Unable to create ops ac file (%s)\n", filename);
-
-	sprintf(filename, "%s%s", output_path, FILENAME_OPS_DM);
-	if((g_ops_dm_file = fopen(filename, "wt")) == NULL)
-		perror_exit("Unable to create ops dm file (%s)\n", filename);
-
-	sprintf(filename, "%s%s", output_path, FILENAME_OPS_NZ);
-	if((g_ops_nz_file = fopen(filename, "wt")) == NULL)
-		perror_exit("Unable to create ops nz file (%s)\n", filename);
-
 	if((g_input_file=fopen(g_input_filename, "rt")) == NULL)
 		perror_exit("can't open %s for input", g_input_filename);
 
@@ -1305,18 +1291,14 @@ int main(int argc, char **argv)
 		{
 			if(table_header_read)
 				error_exit("Duplicate table header");
-			read_insert(temp_insert);
-			fprintf(g_table_file, "%s", temp_insert);
+			read_insert(table_header_insert);
 			table_header_read = 1;
 		}
 		else if(strcmp(section_id, ID_OPHANDLER_HEADER) == 0)
 		{
 			if(ophandler_header_read)
 				error_exit("Duplicate opcode handler header");
-			read_insert(temp_insert);
-			fprintf(g_ops_ac_file, "%s\n\n", temp_insert);
-			fprintf(g_ops_dm_file, "%s\n\n", temp_insert);
-			fprintf(g_ops_nz_file, "%s\n\n", temp_insert);
+			read_insert(ophandler_header_insert);
 			ophandler_header_read = 1;
 		}
 		else if(strcmp(section_id, ID_PROTOTYPE_FOOTER) == 0)
@@ -1369,7 +1351,9 @@ int main(int argc, char **argv)
 			if(ophandler_body_read)
 				error_exit("Duplicate opcode handler section");
 
-			process_opcode_handlers();
+			fprintf(g_table_file, "%s\n\n", ophandler_header_insert);
+			process_opcode_handlers(g_table_file);
+			fprintf(g_table_file, "%s\n\n", ophandler_footer_insert);
 
 			ophandler_body_read = 1;
 		}
@@ -1393,13 +1377,11 @@ int main(int argc, char **argv)
 			if(!ophandler_body_read)
 				error_exit("Missing opcode handler body");
 
+			fprintf(g_table_file, "%s\n\n", table_header_insert);
 			print_opcode_output_table(g_table_file);
+			fprintf(g_table_file, "%s\n\n", table_footer_insert);
 
 			fprintf(g_prototype_file, "%s\n\n", prototype_footer_insert);
-			fprintf(g_table_file, "%s\n\n", table_footer_insert);
-			fprintf(g_ops_ac_file, "%s\n\n", ophandler_footer_insert);
-			fprintf(g_ops_dm_file, "%s\n\n", ophandler_footer_insert);
-			fprintf(g_ops_nz_file, "%s\n\n", ophandler_footer_insert);
 
 			break;
 		}
@@ -1412,9 +1394,6 @@ int main(int argc, char **argv)
 	/* Close all files and exit */
 	fclose(g_prototype_file);
 	fclose(g_table_file);
-	fclose(g_ops_ac_file);
-	fclose(g_ops_dm_file);
-	fclose(g_ops_nz_file);
 	fclose(g_input_file);
 
 	printf("Generated %d opcode handlers from %d primitives\n", g_num_functions, g_num_primitives);
diff --git a/plat/linux68k/emu/musashi/m68kmmu.h b/plat/linux68k/emu/musashi/m68kmmu.h
new file mode 100644
index 000000000..faba371e7
--- /dev/null
+++ b/plat/linux68k/emu/musashi/m68kmmu.h
@@ -0,0 +1,321 @@
+/*
+    m68kmmu.h - PMMU implementation for 68851/68030/68040
+
+    By R. Belmont
+
+    Copyright Nicola Salmoria and the MAME Team.
+    Visit http://mamedev.org for licensing and usage restrictions.
+*/
+
+/*
+	pmmu_translate_addr: perform 68851/68030-style PMMU address translation
+*/
+uint pmmu_translate_addr(uint addr_in)
+{
+	uint32 addr_out, tbl_entry = 0, tbl_entry2, tamode = 0, tbmode = 0, tcmode = 0;
+	uint root_aptr, root_limit, tofs, is, abits, bbits, cbits;
+	uint resolved, tptr, shift;
+
+	resolved = 0;
+	addr_out = addr_in;
+
+	// if SRP is enabled and we're in supervisor mode, use it
+	if ((m68ki_cpu.mmu_tc & 0x02000000) && (m68ki_get_sr() & 0x2000))
+	{
+		root_aptr = m68ki_cpu.mmu_srp_aptr;
+		root_limit = m68ki_cpu.mmu_srp_limit;
+	}
+	else	// else use the CRP
+	{
+		root_aptr = m68ki_cpu.mmu_crp_aptr;
+		root_limit = m68ki_cpu.mmu_crp_limit;
+	}
+
+	// get initial shift (# of top bits to ignore)
+	is = (m68ki_cpu.mmu_tc>>16) & 0xf;
+	abits = (m68ki_cpu.mmu_tc>>12)&0xf;
+	bbits = (m68ki_cpu.mmu_tc>>8)&0xf;
+	cbits = (m68ki_cpu.mmu_tc>>4)&0xf;
+
+//	fprintf(stderr,"PMMU: tcr %08x limit %08x aptr %08x is %x abits %d bbits %d cbits %d\n", m68ki_cpu.mmu_tc, root_limit, root_aptr, is, abits, bbits, cbits);
+
+	// get table A offset
+	tofs = (addr_in<<is)>>(32-abits);
+
+	// find out what format table A is
+	switch (root_limit & 3)
+	{
+		case 0:	// invalid, should cause MMU exception
+		case 1:	// page descriptor, should cause direct mapping
+			fatalerror("680x0 PMMU: Unhandled root mode\n");
+			break;
+
+		case 2:	// valid 4 byte descriptors
+			tofs *= 4;
+//			fprintf(stderr,"PMMU: reading table A entry at %08x\n", tofs + (root_aptr & 0xfffffffc));
+			tbl_entry = m68k_read_memory_32( tofs + (root_aptr & 0xfffffffc));
+			tamode = tbl_entry & 3;
+//			fprintf(stderr,"PMMU: addr %08x entry %08x mode %x tofs %x\n", addr_in, tbl_entry, tamode, tofs);
+			break;
+
+		case 3: // valid 8 byte descriptors
+			tofs *= 8;
+//			fprintf(stderr,"PMMU: reading table A entries at %08x\n", tofs + (root_aptr & 0xfffffffc));
+			tbl_entry2 = m68k_read_memory_32( tofs + (root_aptr & 0xfffffffc));
+			tbl_entry = m68k_read_memory_32( tofs + (root_aptr & 0xfffffffc)+4);
+			tamode = tbl_entry2 & 3;
+//			fprintf(stderr,"PMMU: addr %08x entry %08x entry2 %08x mode %x tofs %x\n", addr_in, tbl_entry, tbl_entry2, tamode, tofs);
+			break;
+	}
+
+	// get table B offset and pointer
+	tofs = (addr_in<<(is+abits))>>(32-bbits);
+	tptr = tbl_entry & 0xfffffff0;
+
+	// find out what format table B is, if any
+	switch (tamode)
+	{
+		case 0: // invalid, should cause MMU exception
+			fatalerror("680x0 PMMU: Unhandled Table A mode %d (addr_in %08x)\n", tamode, addr_in);
+			break;
+
+		case 2: // 4-byte table B descriptor
+			tofs *= 4;
+//			fprintf(stderr,"PMMU: reading table B entry at %08x\n", tofs + tptr);
+			tbl_entry = m68k_read_memory_32( tofs + tptr);
+			tbmode = tbl_entry & 3;
+//			fprintf(stderr,"PMMU: addr %08x entry %08x mode %x tofs %x\n", addr_in, tbl_entry, tbmode, tofs);
+			break;
+
+		case 3: // 8-byte table B descriptor
+			tofs *= 8;
+//			fprintf(stderr,"PMMU: reading table B entries at %08x\n", tofs + tptr);
+			tbl_entry2 = m68k_read_memory_32( tofs + tptr);
+			tbl_entry = m68k_read_memory_32( tofs + tptr + 4);
+			tbmode = tbl_entry2 & 3;
+//			fprintf(stderr,"PMMU: addr %08x entry %08x entry2 %08x mode %x tofs %x\n", addr_in, tbl_entry, tbl_entry2, tbmode, tofs);
+			break;
+
+		case 1:	// early termination descriptor
+			tbl_entry &= 0xffffff00;
+
+			shift = is+abits;
+			addr_out = ((addr_in<<shift)>>shift) + tbl_entry;
+			resolved = 1;
+			break;
+	}
+
+	// if table A wasn't early-out, continue to process table B
+	if (!resolved)
+	{
+		// get table C offset and pointer
+		tofs = (addr_in<<(is+abits+bbits))>>(32-cbits);
+		tptr = tbl_entry & 0xfffffff0;
+
+		switch (tbmode)
+		{
+			case 0:	// invalid, should cause MMU exception
+				fatalerror("680x0 PMMU: Unhandled Table B mode %d (addr_in %08x PC %x)\n", tbmode, addr_in, REG_PC);
+				break;
+
+			case 2: // 4-byte table C descriptor
+				tofs *= 4;
+//				fprintf(stderr,"PMMU: reading table C entry at %08x\n", tofs + tptr);
+				tbl_entry = m68k_read_memory_32(tofs + tptr);
+				tcmode = tbl_entry & 3;
+//				fprintf(stderr,"PMMU: addr %08x entry %08x mode %x tofs %x\n", addr_in, tbl_entry, tbmode, tofs);
+				break;
+
+			case 3: // 8-byte table C descriptor
+				tofs *= 8;
+//				fprintf(stderr,"PMMU: reading table C entries at %08x\n", tofs + tptr);
+				tbl_entry2 = m68k_read_memory_32(tofs + tptr);
+				tbl_entry = m68k_read_memory_32(tofs + tptr + 4);
+				tcmode = tbl_entry2 & 3;
+//				fprintf(stderr,"PMMU: addr %08x entry %08x entry2 %08x mode %x tofs %x\n", addr_in, tbl_entry, tbl_entry2, tbmode, tofs);
+				break;
+
+			case 1: // termination descriptor
+				tbl_entry &= 0xffffff00;
+
+				shift = is+abits+bbits;
+				addr_out = ((addr_in<<shift)>>shift) + tbl_entry;
+				resolved = 1;
+				break;
+		}
+	}
+
+	if (!resolved)
+	{
+		switch (tcmode)
+		{
+			case 0:	// invalid, should cause MMU exception
+			case 2: // 4-byte ??? descriptor
+			case 3: // 8-byte ??? descriptor
+				fatalerror("680x0 PMMU: Unhandled Table B mode %d (addr_in %08x PC %x)\n", tbmode, addr_in, REG_PC);
+				break;
+
+			case 1: // termination descriptor
+				tbl_entry &= 0xffffff00;
+
+				shift = is+abits+bbits+cbits;
+				addr_out = ((addr_in<<shift)>>shift) + tbl_entry;
+				resolved = 1;
+				break;
+		}
+	}
+
+
+//	fprintf(stderr,"PMMU: [%08x] => [%08x]\n", addr_in, addr_out);
+
+	return addr_out;
+}
+
+/*
+
+	m68881_mmu_ops: COP 0 MMU opcode handling
+
+*/
+
+void m68881_mmu_ops()
+{
+	uint16 modes;
+	uint32 ea = m68ki_cpu.ir & 0x3f;
+	uint64 temp64;
+
+	// catch the 2 "weird" encodings up front (PBcc)
+	if ((m68ki_cpu.ir & 0xffc0) == 0xf0c0)
+	{
+		fprintf(stderr,"680x0: unhandled PBcc\n");
+		return;
+	}
+	else if ((m68ki_cpu.ir & 0xffc0) == 0xf080)
+	{
+		fprintf(stderr,"680x0: unhandled PBcc\n");
+		return;
+	}
+	else	// the rest are 1111000xxxXXXXXX where xxx is the instruction family
+	{
+		switch ((m68ki_cpu.ir>>9) & 0x7)
+		{
+			case 0:
+				modes = OPER_I_16();
+
+				if ((modes & 0xfde0) == 0x2000)	// PLOAD
+				{
+					fprintf(stderr,"680x0: unhandled PLOAD\n");
+					return;
+				}
+				else if ((modes & 0xe200) == 0x2000)	// PFLUSH
+				{
+					fprintf(stderr,"680x0: unhandled PFLUSH PC=%x\n", REG_PC);
+					return;
+				}
+				else if (modes == 0xa000)	// PFLUSHR
+				{
+					fprintf(stderr,"680x0: unhandled PFLUSHR\n");
+					return;
+				}
+				else if (modes == 0x2800)	// PVALID (FORMAT 1)
+				{
+					fprintf(stderr,"680x0: unhandled PVALID1\n");
+					return;
+				}
+				else if ((modes & 0xfff8) == 0x2c00)	// PVALID (FORMAT 2)
+				{
+					fprintf(stderr,"680x0: unhandled PVALID2\n");
+					return;
+				}
+				else if ((modes & 0xe000) == 0x8000)	// PTEST
+				{
+					fprintf(stderr,"680x0: unhandled PTEST\n");
+					return;
+				}
+				else
+				{
+					switch ((modes>>13) & 0x7)
+					{
+						case 0:	// MC68030/040 form with FD bit
+						case 2:	// MC68881 form, FD never set
+							if (modes & 0x200)
+							{
+							 	switch ((modes>>10) & 7)
+								{
+									case 0:	// translation control register
+										WRITE_EA_32(ea, m68ki_cpu.mmu_tc);
+										break;
+
+									case 2: // supervisor root pointer
+										WRITE_EA_64(ea, (uint64)m68ki_cpu.mmu_srp_limit<<32 | (uint64)m68ki_cpu.mmu_srp_aptr);
+										break;
+
+									case 3: // CPU root pointer
+										WRITE_EA_64(ea, (uint64)m68ki_cpu.mmu_crp_limit<<32 | (uint64)m68ki_cpu.mmu_crp_aptr);
+										break;
+
+									default:
+										fprintf(stderr,"680x0: PMOVE from unknown MMU register %x, PC %x\n", (modes>>10) & 7, REG_PC);
+										break;
+								}
+							}
+							else
+							{
+							 	switch ((modes>>10) & 7)
+								{
+									case 0:	// translation control register
+										m68ki_cpu.mmu_tc = READ_EA_32(ea);
+
+										if (m68ki_cpu.mmu_tc & 0x80000000)
+										{
+											m68ki_cpu.pmmu_enabled = 1;
+										}
+										else
+										{
+											m68ki_cpu.pmmu_enabled = 0;
+										}
+										break;
+
+									case 2:	// supervisor root pointer
+										temp64 = READ_EA_64(ea);
+										m68ki_cpu.mmu_srp_limit = (temp64>>32) & 0xffffffff;
+										m68ki_cpu.mmu_srp_aptr = temp64 & 0xffffffff;
+										break;
+
+									case 3:	// CPU root pointer
+										temp64 = READ_EA_64(ea);
+										m68ki_cpu.mmu_crp_limit = (temp64>>32) & 0xffffffff;
+										m68ki_cpu.mmu_crp_aptr = temp64 & 0xffffffff;
+										break;
+
+									default:
+										fprintf(stderr,"680x0: PMOVE to unknown MMU register %x, PC %x\n", (modes>>10) & 7, REG_PC);
+										break;
+								}
+							}
+							break;
+
+						case 3:	// MC68030 to/from status reg
+							if (modes & 0x200)
+							{
+								WRITE_EA_32(ea, m68ki_cpu.mmu_sr);
+							}
+							else
+							{
+								m68ki_cpu.mmu_sr = READ_EA_32(ea);
+							}
+							break;
+
+						default:
+							fprintf(stderr,"680x0: unknown PMOVE mode %x (modes %04x) (PC %x)\n", (modes>>13) & 0x7, modes, REG_PC);
+							break;
+					}
+				}
+				break;
+
+			default:
+				fprintf(stderr,"680x0: unknown PMMU instruction group %d\n", (m68ki_cpu.ir>>9) & 0x7);
+				break;
+		}
+	}
+}
+
diff --git a/plat/linux68k/emu/musashi/readme.txt b/plat/linux68k/emu/musashi/readme.txt
index adf993398..e9d878c9c 100755
--- a/plat/linux68k/emu/musashi/readme.txt
+++ b/plat/linux68k/emu/musashi/readme.txt
@@ -1,7 +1,7 @@
                                     MUSASHI
                                     =======
 
-                                  Version 3.4
+                                  Version 4.10
 
              A portable Motorola M680x0 processor emulation engine.
             Copyright 1998-2002 Karl Stenerud.  All rights reserved.
@@ -11,8 +11,9 @@
 INTRODUCTION:
 ------------
 
-Musashi is a Motorola 68000, 68010, 68EC020, and 68020 emulator written in C.
-This emulator was written with two goals in mind: portability and speed.
+Musashi is a Motorola 68000, 68010, 68EC020, 68020, 68EC030, 68030, 68EC040 and
+68040 emulator written in C.  This emulator was written with two goals in mind:
+portability and speed.
 
 The emulator is written to ANSI C89 specifications.  It also uses inline
 functions, which are C9X compliant.
@@ -146,11 +147,23 @@ To enable separate immediate reads:
     unsigned int  m68k_read_immediate_16(unsigned int address);
     unsigned int  m68k_read_immediate_32(unsigned int address);
 
+    Now you also have the pcrelative stuff:
+    unsigned int  m68k_read_pcrelative_8(unsigned int address);
+    unsigned int  m68k_read_pcrelative_16(unsigned int address);
+    unsigned int  m68k_read_pcrelative_32(unsigned int address);
+
 - If you need to know the current PC (for banking and such), set
   M68K_MONITOR_PC to OPT_SPECIFY_HANDLER, and set M68K_SET_PC_CALLBACK(A) to
   your routine.
 
+- In the unlikely case where you need to emulate some PMMU in the immediate
+  reads and/or pcrealtive stuff, you'll need to explicitely call the
+  translation address mechanism from your user functions this way :
 
+    if (PMMU_ENABLED)
+        address = pmmu_translate_addr(address);
+
+  (this is handled automatically by normal memory accesses).
 
 ADDRESS SPACES:
 --------------
@@ -212,9 +225,12 @@ To set the CPU type you want to use:
     M68K_CPU_TYPE_68000,
     M68K_CPU_TYPE_68010,
     M68K_CPU_TYPE_68EC020,
-    M68K_CPU_TYPE_68020
-
-
+    M68K_CPU_TYPE_68020,
+    M68K_CPU_TYPE_68EC030,
+    M68K_CPU_TYPE_68030,
+    M68K_CPU_TYPE_68EC040,
+    M68K_CPU_TYPE_68040,
+    M68K_CPU_TYPE_SCC68070 (which is a 68010 with a 32 bit data bus).
 
 CLOCK FREQUENCY:
 ---------------
@@ -302,4 +318,25 @@ of the CPU.
 EXAMPLE:
 -------
 
-The subdir example contains a full example (currently DOS only).
+The subdir example contains a full example (currently linux & Dos only).
+
+Compilation
+-----------
+
+You can use the default Makefile in Musashi's directory, it works like this :
+1st build m68kmake, which will build m68kops.c and m68kops.h based on the
+contents of m68k_in.c.
+Then compile m68kcpu.o and m68kops.o. Add m68kdasm.o if you want the
+disassemble functions. When linking this to your project you will need libm
+for the fpu emulation of the 68040.
+
+Using some custom m68kconf.h outside Musashi's directory
+--------------------------------------------------------
+
+It can be useful to keep an untouched musashi directory in a project (from
+git for example) and maintain a separate m68kconf.h specific to the
+project. For this, pass -DMUSASHI_CNF="mycustomconfig.h" to gcc (or whatever
+compiler you use). Notice that if you use an unix shell (or make which uses
+the shell to launch its commands), then you need to escape the quotes like
+this : -DMUSASHI_CNF=\"mycustomconfig.h\"
+
diff --git a/plat/linux68k/emu/musashi/softfloat/README.txt b/plat/linux68k/emu/musashi/softfloat/README.txt
new file mode 100644
index 000000000..9500d25e8
--- /dev/null
+++ b/plat/linux68k/emu/musashi/softfloat/README.txt
@@ -0,0 +1,78 @@
+MAME note: this package is derived from the following original SoftFloat 
+package and has been "re-packaged" to work with MAME's conventions and
+build system.  The source files come from bits64/ and bits64/templates
+in the original distribution as MAME requires a compiler with a 64-bit
+integer type.
+
+
+Package Overview for SoftFloat Release 2b
+
+John R. Hauser
+2002 May 27
+
+
+----------------------------------------------------------------------------
+Overview
+
+SoftFloat is a software implementation of floating-point that conforms to
+the IEC/IEEE Standard for Binary Floating-Point Arithmetic.  SoftFloat is
+distributed in the form of C source code.  Compiling the SoftFloat sources
+generates two things:
+
+-- A SoftFloat object file (typically `softfloat.o') containing the complete
+   set of IEC/IEEE floating-point routines.
+
+-- A `timesoftfloat' program for evaluating the speed of the SoftFloat
+   routines.  (The SoftFloat module is linked into this program.)
+
+The SoftFloat package is documented in four text files:
+
+   SoftFloat.txt          Documentation for using the SoftFloat functions.
+   SoftFloat-source.txt   Documentation for compiling SoftFloat.
+   SoftFloat-history.txt  History of major changes to SoftFloat.
+   timesoftfloat.txt      Documentation for using `timesoftfloat'.
+
+Other files in the package comprise the source code for SoftFloat.
+
+Please be aware that some work is involved in porting this software to other
+targets.  It is not just a matter of getting `make' to complete without
+error messages.  I would have written the code that way if I could, but
+there are fundamental differences between systems that can't be hidden.
+You should not attempt to compile SoftFloat without first reading both
+`SoftFloat.txt' and `SoftFloat-source.txt'.
+
+
+----------------------------------------------------------------------------
+Legal Notice
+
+SoftFloat was written by me, John R. Hauser.  This work was made possible in
+part by the International Computer Science Institute, located at Suite 600,
+1947 Center Street, Berkeley, California 94704.  Funding was partially
+provided by the National Science Foundation under grant MIP-9311980.  The
+original version of this code was written as part of a project to build
+a fixed-point vector processor in collaboration with the University of
+California at Berkeley, overseen by Profs. Nelson Morgan and John Wawrzynek.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL
+LOSSES, COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO
+FURTHERMORE EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER
+SCIENCE INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES,
+COSTS, OR OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE
+SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, provided
+that the minimal documentation requirements stated in the source code are
+satisfied.
+
+
+----------------------------------------------------------------------------
+Contact Information
+
+At the time of this writing, the most up-to-date information about
+SoftFloat and the latest release can be found at the Web page `http://
+www.cs.berkeley.edu/~jhauser/arithmetic/SoftFloat.html'.
+
+
diff --git a/plat/linux68k/emu/musashi/softfloat/mamesf.h b/plat/linux68k/emu/musashi/softfloat/mamesf.h
new file mode 100644
index 000000000..c41950394
--- /dev/null
+++ b/plat/linux68k/emu/musashi/softfloat/mamesf.h
@@ -0,0 +1,61 @@
+/*----------------------------------------------------------------------------
+| One of the macros `BIGENDIAN' or `LITTLEENDIAN' must be defined.
+*----------------------------------------------------------------------------*/
+#ifdef LSB_FIRST
+#define LITTLEENDIAN
+#else
+#define BIGENDIAN
+#endif
+
+/*----------------------------------------------------------------------------
+| The macro `BITS64' can be defined to indicate that 64-bit integer types are
+| supported by the compiler.
+*----------------------------------------------------------------------------*/
+#define BITS64
+
+/*----------------------------------------------------------------------------
+| Each of the following `typedef's defines the most convenient type that holds
+| integers of at least as many bits as specified.  For example, `uint8' should
+| be the most convenient type that can hold unsigned integers of as many as
+| 8 bits.  The `flag' type must be able to hold either a 0 or 1.  For most
+| implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed
+| to the same as `int'.
+*----------------------------------------------------------------------------*/
+
+typedef sint8 flag;
+typedef sint8 int8;
+typedef sint16 int16;
+typedef sint32 int32;
+typedef sint64 int64;
+
+/*----------------------------------------------------------------------------
+| Each of the following `typedef's defines a type that holds integers
+| of _exactly_ the number of bits specified.  For instance, for most
+| implementation of C, `bits16' and `sbits16' should be `typedef'ed to
+| `unsigned short int' and `signed short int' (or `short int'), respectively.
+*----------------------------------------------------------------------------*/
+typedef uint8 bits8;
+typedef sint8 sbits8;
+typedef uint16 bits16;
+typedef sint16 sbits16;
+typedef uint32 bits32;
+typedef sint32 sbits32;
+typedef uint64 bits64;
+typedef sint64 sbits64;
+
+/*----------------------------------------------------------------------------
+| The `LIT64' macro takes as its argument a textual integer literal and
+| if necessary ``marks'' the literal as having a 64-bit integer type.
+| For example, the GNU C Compiler (`gcc') requires that 64-bit literals be
+| appended with the letters `LL' standing for `long long', which is `gcc's
+| name for the 64-bit integer type.  Some compilers may allow `LIT64' to be
+| defined as the identity macro:  `#define LIT64( a ) a'.
+*----------------------------------------------------------------------------*/
+#define LIT64( a ) a##ULL
+
+/*----------------------------------------------------------------------------
+| The macro `INLINE' can be used before functions that should be inlined.  If
+| a compiler does not support explicit inlining, this macro should be defined
+| to be `static'.
+*----------------------------------------------------------------------------*/
+// MAME defines INLINE
diff --git a/plat/linux68k/emu/musashi/softfloat/milieu.h b/plat/linux68k/emu/musashi/softfloat/milieu.h
new file mode 100644
index 000000000..10687b755
--- /dev/null
+++ b/plat/linux68k/emu/musashi/softfloat/milieu.h
@@ -0,0 +1,42 @@
+
+/*============================================================================
+
+This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+/*----------------------------------------------------------------------------
+| Include common integer types and flags.
+*----------------------------------------------------------------------------*/
+#include "mamesf.h"
+
+/*----------------------------------------------------------------------------
+| Symbolic Boolean literals.
+*----------------------------------------------------------------------------*/
+#define FALSE 0
+#define TRUE 1
diff --git a/plat/linux68k/emu/musashi/softfloat/softfloat-macros b/plat/linux68k/emu/musashi/softfloat/softfloat-macros
new file mode 100644
index 000000000..5de9031de
--- /dev/null
+++ b/plat/linux68k/emu/musashi/softfloat/softfloat-macros
@@ -0,0 +1,732 @@
+
+/*============================================================================
+
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+/*----------------------------------------------------------------------------
+| Shifts `a' right by the number of bits given in `count'.  If any nonzero
+| bits are shifted off, they are ``jammed'' into the least significant bit of
+| the result by setting the least significant bit to 1.  The value of `count'
+| can be arbitrarily large; in particular, if `count' is greater than 32, the
+| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+| The result is stored in the location pointed to by `zPtr'.
+*----------------------------------------------------------------------------*/
+
+static inline void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
+{
+    bits32 z;
+
+    if ( count == 0 ) {
+        z = a;
+    }
+    else if ( count < 32 ) {
+        z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
+    }
+    else {
+        z = ( a != 0 );
+    }
+    *zPtr = z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts `a' right by the number of bits given in `count'.  If any nonzero
+| bits are shifted off, they are ``jammed'' into the least significant bit of
+| the result by setting the least significant bit to 1.  The value of `count'
+| can be arbitrarily large; in particular, if `count' is greater than 64, the
+| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+| The result is stored in the location pointed to by `zPtr'.
+*----------------------------------------------------------------------------*/
+
+static inline void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
+{
+    bits64 z;
+
+    if ( count == 0 ) {
+        z = a;
+    }
+    else if ( count < 64 ) {
+        z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
+    }
+    else {
+        z = ( a != 0 );
+    }
+    *zPtr = z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
+| _plus_ the number of bits given in `count'.  The shifted result is at most
+| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
+| bits shifted off form a second 64-bit result as follows:  The _last_ bit
+| shifted off is the most-significant bit of the extra result, and the other
+| 63 bits of the extra result are all zero if and only if _all_but_the_last_
+| bits shifted off were all zero.  This extra result is stored in the location
+| pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
+|     (This routine makes more sense if `a0' and `a1' are considered to form
+| a fixed-point value with binary point between `a0' and `a1'.  This fixed-
+| point value is shifted right by the number of bits given in `count', and
+| the integer part of the result is returned at the location pointed to by
+| `z0Ptr'.  The fractional part of the result may be slightly corrupted as
+| described above, and is returned at the location pointed to by `z1Ptr'.)
+*----------------------------------------------------------------------------*/
+
+static inline void
+ shift64ExtraRightJamming(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z0, z1;
+    int8 negCount = ( - count ) & 63;
+
+    if ( count == 0 ) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if ( count < 64 ) {
+        z1 = ( a0<<negCount ) | ( a1 != 0 );
+        z0 = a0>>count;
+    }
+    else {
+        if ( count == 64 ) {
+            z1 = a0 | ( a1 != 0 );
+        }
+        else {
+            z1 = ( ( a0 | a1 ) != 0 );
+        }
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
+| number of bits given in `count'.  Any bits shifted off are lost.  The value
+| of `count' can be arbitrarily large; in particular, if `count' is greater
+| than 128, the result will be 0.  The result is broken into two 64-bit pieces
+| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void
+ shift128Right(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z0, z1;
+    int8 negCount = ( - count ) & 63;
+
+    if ( count == 0 ) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if ( count < 64 ) {
+        z1 = ( a0<<negCount ) | ( a1>>count );
+        z0 = a0>>count;
+    }
+    else {
+        z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
+| number of bits given in `count'.  If any nonzero bits are shifted off, they
+| are ``jammed'' into the least significant bit of the result by setting the
+| least significant bit to 1.  The value of `count' can be arbitrarily large;
+| in particular, if `count' is greater than 128, the result will be either
+| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
+| nonzero.  The result is broken into two 64-bit pieces which are stored at
+| the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void
+ shift128RightJamming(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z0, z1;
+    int8 negCount = ( - count ) & 63;
+
+    if ( count == 0 ) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if ( count < 64 ) {
+        z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
+        z0 = a0>>count;
+    }
+    else {
+        if ( count == 64 ) {
+            z1 = a0 | ( a1 != 0 );
+        }
+        else if ( count < 128 ) {
+            z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
+        }
+        else {
+            z1 = ( ( a0 | a1 ) != 0 );
+        }
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
+| by 64 _plus_ the number of bits given in `count'.  The shifted result is
+| at most 128 nonzero bits; these are broken into two 64-bit pieces which are
+| stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
+| off form a third 64-bit result as follows:  The _last_ bit shifted off is
+| the most-significant bit of the extra result, and the other 63 bits of the
+| extra result are all zero if and only if _all_but_the_last_ bits shifted off
+| were all zero.  This extra result is stored in the location pointed to by
+| `z2Ptr'.  The value of `count' can be arbitrarily large.
+|     (This routine makes more sense if `a0', `a1', and `a2' are considered
+| to form a fixed-point value with binary point between `a1' and `a2'.  This
+| fixed-point value is shifted right by the number of bits given in `count',
+| and the integer part of the result is returned at the locations pointed to
+| by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
+| corrupted as described above, and is returned at the location pointed to by
+| `z2Ptr'.)
+*----------------------------------------------------------------------------*/
+
+static inline void
+ shift128ExtraRightJamming(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     int16 count,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    int8 negCount = ( - count ) & 63;
+
+    if ( count == 0 ) {
+        z2 = a2;
+        z1 = a1;
+        z0 = a0;
+    }
+    else {
+        if ( count < 64 ) {
+            z2 = a1<<negCount;
+            z1 = ( a0<<negCount ) | ( a1>>count );
+            z0 = a0>>count;
+        }
+        else {
+            if ( count == 64 ) {
+                z2 = a1;
+                z1 = a0;
+            }
+            else {
+                a2 |= a1;
+                if ( count < 128 ) {
+                    z2 = a0<<negCount;
+                    z1 = a0>>( count & 63 );
+                }
+                else {
+                    z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
+                    z1 = 0;
+                }
+            }
+            z0 = 0;
+        }
+        z2 |= ( a2 != 0 );
+    }
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
+| number of bits given in `count'.  Any bits shifted off are lost.  The value
+| of `count' must be less than 64.  The result is broken into two 64-bit
+| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void
+ shortShift128Left(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+
+    *z1Ptr = a1<<count;
+    *z0Ptr =
+        ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
+| by the number of bits given in `count'.  Any bits shifted off are lost.
+| The value of `count' must be less than 64.  The result is broken into three
+| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
+| `z1Ptr', and `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void
+ shortShift192Left(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     int16 count,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    int8 negCount;
+
+    z2 = a2<<count;
+    z1 = a1<<count;
+    z0 = a0<<count;
+    if ( 0 < count ) {
+        negCount = ( ( - count ) & 63 );
+        z1 |= a2>>negCount;
+        z0 |= a1>>negCount;
+    }
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
+| value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
+| any carry out is lost.  The result is broken into two 64-bit pieces which
+| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void
+ add128(
+     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z1;
+
+    z1 = a1 + b1;
+    *z1Ptr = z1;
+    *z0Ptr = a0 + b0 + ( z1 < a1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
+| 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
+| modulo 2^192, so any carry out is lost.  The result is broken into three
+| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
+| `z1Ptr', and `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void
+ add192(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     bits64 b0,
+     bits64 b1,
+     bits64 b2,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    uint8 carry0, carry1;
+
+    z2 = a2 + b2;
+    carry1 = ( z2 < a2 );
+    z1 = a1 + b1;
+    carry0 = ( z1 < a1 );
+    z0 = a0 + b0;
+    z1 += carry1;
+    z0 += ( z1 < carry1 );
+    z0 += carry0;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
+| 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
+| 2^128, so any borrow out (carry out) is lost.  The result is broken into two
+| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
+| `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void
+ sub128(
+     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+
+    *z1Ptr = a1 - b1;
+    *z0Ptr = a0 - b0 - ( a1 < b1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
+| from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
+| Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
+| result is broken into three 64-bit pieces which are stored at the locations
+| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void
+ sub192(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     bits64 b0,
+     bits64 b1,
+     bits64 b2,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    uint8 borrow0, borrow1;
+
+    z2 = a2 - b2;
+    borrow1 = ( a2 < b2 );
+    z1 = a1 - b1;
+    borrow0 = ( a1 < b1 );
+    z0 = a0 - b0;
+    z0 -= ( z1 < borrow1 );
+    z1 -= borrow1;
+    z0 -= borrow0;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
+| into two 64-bit pieces which are stored at the locations pointed to by
+| `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits32 aHigh, aLow, bHigh, bLow;
+    bits64 z0, zMiddleA, zMiddleB, z1;
+
+    aLow = a;
+    aHigh = a>>32;
+    bLow = b;
+    bHigh = b>>32;
+    z1 = ( (bits64) aLow ) * bLow;
+    zMiddleA = ( (bits64) aLow ) * bHigh;
+    zMiddleB = ( (bits64) aHigh ) * bLow;
+    z0 = ( (bits64) aHigh ) * bHigh;
+    zMiddleA += zMiddleB;
+    z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
+    zMiddleA <<= 32;
+    z1 += zMiddleA;
+    z0 += ( z1 < zMiddleA );
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
+| `b' to obtain a 192-bit product.  The product is broken into three 64-bit
+| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
+| `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void
+ mul128By64To192(
+     bits64 a0,
+     bits64 a1,
+     bits64 b,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2, more1;
+
+    mul64To128( a1, b, &z1, &z2 );
+    mul64To128( a0, b, &z0, &more1 );
+    add128( z0, more1, 0, z1, &z0, &z1 );
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
+| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
+| product.  The product is broken into four 64-bit pieces which are stored at
+| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void
+ mul128To256(
+     bits64 a0,
+     bits64 a1,
+     bits64 b0,
+     bits64 b1,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr,
+     bits64 *z3Ptr
+ )
+{
+    bits64 z0, z1, z2, z3;
+    bits64 more1, more2;
+
+    mul64To128( a1, b1, &z2, &z3 );
+    mul64To128( a1, b0, &z1, &more2 );
+    add128( z1, more2, 0, z2, &z1, &z2 );
+    mul64To128( a0, b0, &z0, &more1 );
+    add128( z0, more1, 0, z1, &z0, &z1 );
+    mul64To128( a0, b1, &more1, &more2 );
+    add128( more1, more2, 0, z2, &more1, &z2 );
+    add128( z0, z1, 0, more1, &z0, &z1 );
+    *z3Ptr = z3;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns an approximation to the 64-bit integer quotient obtained by dividing
+| `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
+| divisor `b' must be at least 2^63.  If q is the exact quotient truncated
+| toward zero, the approximation returned lies between q and q + 2 inclusive.
+| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
+| unsigned integer is returned.
+*----------------------------------------------------------------------------*/
+
+static inline bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
+{
+    bits64 b0, b1;
+    bits64 rem0, rem1, term0, term1;
+    bits64 z;
+
+    if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
+    b0 = b>>32;
+    z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
+    mul64To128( b, z, &term0, &term1 );
+    sub128( a0, a1, term0, term1, &rem0, &rem1 );
+    while ( ( (sbits64) rem0 ) < 0 ) {
+        z -= LIT64( 0x100000000 );
+        b1 = b<<32;
+        add128( rem0, rem1, b0, b1, &rem0, &rem1 );
+    }
+    rem0 = ( rem0<<32 ) | ( rem1>>32 );
+    z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns an approximation to the square root of the 32-bit significand given
+| by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
+| `aExp' (the least significant bit) is 1, the integer returned approximates
+| 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
+| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
+| case, the approximation returned lies strictly within +/-2 of the exact
+| value.
+*----------------------------------------------------------------------------*/
+
+static inline bits32 estimateSqrt32( int16 aExp, bits32 a )
+{
+    static const bits16 sqrtOddAdjustments[] = {
+        0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
+        0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
+    };
+    static const bits16 sqrtEvenAdjustments[] = {
+        0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
+        0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
+    };
+    int8 index;
+    bits32 z;
+
+    index = ( a>>27 ) & 15;
+    if ( aExp & 1 ) {
+        z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
+        z = ( ( a / z )<<14 ) + ( z<<15 );
+        a >>= 1;
+    }
+    else {
+        z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
+        z = a / z + z;
+        z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
+        if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
+    }
+    return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the number of leading 0 bits before the most-significant 1 bit of
+| `a'.  If `a' is zero, 32 is returned.
+*----------------------------------------------------------------------------*/
+
+static int8 countLeadingZeros32( bits32 a )
+{
+    static const int8 countLeadingZerosHigh[] = {
+        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+    };
+    int8 shiftCount;
+
+    shiftCount = 0;
+    if ( a < 0x10000 ) {
+        shiftCount += 16;
+        a <<= 16;
+    }
+    if ( a < 0x1000000 ) {
+        shiftCount += 8;
+        a <<= 8;
+    }
+    shiftCount += countLeadingZerosHigh[ a>>24 ];
+    return shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the number of leading 0 bits before the most-significant 1 bit of
+| `a'.  If `a' is zero, 64 is returned.
+*----------------------------------------------------------------------------*/
+
+static int8 countLeadingZeros64( bits64 a )
+{
+    int8 shiftCount;
+
+    shiftCount = 0;
+    if ( a < ( (bits64) 1 )<<32 ) {
+        shiftCount += 32;
+    }
+    else {
+        a >>= 32;
+    }
+    shiftCount += countLeadingZeros32( a );
+    return shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
+| is equal to the 128-bit value formed by concatenating `b0' and `b1'.
+| Otherwise, returns 0.
+*----------------------------------------------------------------------------*/
+
+static inline flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+    return ( a0 == b0 ) && ( a1 == b1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
+| than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
+| Otherwise, returns 0.
+*----------------------------------------------------------------------------*/
+
+static inline flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
+| than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
+| returns 0.
+*----------------------------------------------------------------------------*/
+
+static inline flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
+| not equal to the 128-bit value formed by concatenating `b0' and `b1'.
+| Otherwise, returns 0.
+*----------------------------------------------------------------------------*/
+
+static inline flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+    return ( a0 != b0 ) || ( a1 != b1 );
+
+}
+
+/*-----------------------------------------------------------------------------
+| Changes the sign of the extended double-precision floating-point value 'a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static inline floatx80 floatx80_chs(floatx80 reg)
+{
+    reg.high ^= 0x8000;
+    return reg;
+}
+
diff --git a/plat/linux68k/emu/musashi/softfloat/softfloat-specialize b/plat/linux68k/emu/musashi/softfloat/softfloat-specialize
new file mode 100644
index 000000000..cc97273b8
--- /dev/null
+++ b/plat/linux68k/emu/musashi/softfloat/softfloat-specialize
@@ -0,0 +1,470 @@
+
+/*============================================================================
+
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+/*----------------------------------------------------------------------------
+| Underflow tininess-detection mode, statically initialized to default value.
+| (The declaration in `softfloat.h' must match the `int8' type here.)
+*----------------------------------------------------------------------------*/
+int8 float_detect_tininess = float_tininess_after_rounding;
+
+/*----------------------------------------------------------------------------
+| Raises the exceptions specified by `flags'.  Floating-point traps can be
+| defined here if desired.  It is currently not possible for such a trap to
+| substitute a result value.  If traps are not implemented, this routine
+| should be simply `float_exception_flags |= flags;'.
+*----------------------------------------------------------------------------*/
+
+void float_raise( int8 flags )
+{
+
+    float_exception_flags |= flags;
+
+}
+
+/*----------------------------------------------------------------------------
+| Internal canonical NaN format.
+*----------------------------------------------------------------------------*/
+typedef struct {
+    flag sign;
+    bits64 high, low;
+} commonNaNT;
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated single-precision NaN.
+*----------------------------------------------------------------------------*/
+#define float32_default_nan 0xFFFFFFFF
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is a NaN;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag float32_is_nan( float32 a )
+{
+
+    return ( 0xFF000000 < (bits32) ( a<<1 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is a signaling
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag float32_is_signaling_nan( float32 a )
+{
+
+    return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point NaN
+| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT float32ToCommonNaN( float32 a )
+{
+    commonNaNT z;
+
+    if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
+    z.sign = a>>31;
+    z.low = 0;
+    z.high = ( (bits64) a )<<41;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the single-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static float32 commonNaNToFloat32( commonNaNT a )
+{
+
+    return ( ( (bits32) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes two single-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static float32 propagateFloat32NaN( float32 a, float32 b )
+{
+    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = float32_is_nan( a );
+    aIsSignalingNaN = float32_is_signaling_nan( a );
+    bIsNaN = float32_is_nan( b );
+    bIsSignalingNaN = float32_is_signaling_nan( b );
+    a |= 0x00400000;
+    b |= 0x00400000;
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
+    if ( aIsNaN ) {
+        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
+    }
+    else {
+        return b;
+    }
+
+}
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated double-precision NaN.
+*----------------------------------------------------------------------------*/
+#define float64_default_nan LIT64( 0xFFFFFFFFFFFFFFFF )
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is a NaN;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag float64_is_nan( float64 a )
+{
+
+    return ( LIT64( 0xFFE0000000000000 ) < (bits64) ( a<<1 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is a signaling
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag float64_is_signaling_nan( float64 a )
+{
+
+    return
+           ( ( ( a>>51 ) & 0xFFF ) == 0xFFE )
+        && ( a & LIT64( 0x0007FFFFFFFFFFFF ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point NaN
+| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT float64ToCommonNaN( float64 a )
+{
+    commonNaNT z;
+
+    if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
+    z.sign = a>>63;
+    z.low = 0;
+    z.high = a<<12;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the double-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static float64 commonNaNToFloat64( commonNaNT a )
+{
+
+    return
+          ( ( (bits64) a.sign )<<63 )
+        | LIT64( 0x7FF8000000000000 )
+        | ( a.high>>12 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes two double-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static float64 propagateFloat64NaN( float64 a, float64 b )
+{
+    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = float64_is_nan( a );
+    aIsSignalingNaN = float64_is_signaling_nan( a );
+    bIsNaN = float64_is_nan( b );
+    bIsSignalingNaN = float64_is_signaling_nan( b );
+    a |= LIT64( 0x0008000000000000 );
+    b |= LIT64( 0x0008000000000000 );
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
+    if ( aIsNaN ) {
+        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
+    }
+    else {
+        return b;
+    }
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.  The
+| `high' and `low' values hold the most- and least-significant bits,
+| respectively.
+*----------------------------------------------------------------------------*/
+#define floatx80_default_nan_high 0xFFFF
+#define floatx80_default_nan_low  LIT64( 0xFFFFFFFFFFFFFFFF )
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is a
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_is_nan( floatx80 a )
+{
+
+    return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is a
+| signaling NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_is_signaling_nan( floatx80 a )
+{
+    bits64 aLow;
+
+    aLow = a.low & ~ LIT64( 0x4000000000000000 );
+    return
+           ( ( a.high & 0x7FFF ) == 0x7FFF )
+        && (bits64) ( aLow<<1 )
+        && ( a.low == aLow );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point NaN `a' to the canonical NaN format.  If `a' is a signaling NaN, the
+| invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT floatx80ToCommonNaN( floatx80 a )
+{
+    commonNaNT z;
+
+    if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
+    z.sign = a.high>>15;
+    z.low = 0;
+    z.high = a.low<<1;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the extended
+| double-precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static floatx80 commonNaNToFloatx80( commonNaNT a )
+{
+    floatx80 z;
+
+    z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 );
+    z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes two extended double-precision floating-point values `a' and `b', one
+| of which is a NaN, and returns the appropriate NaN result.  If either `a' or
+| `b' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b )
+{
+    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = floatx80_is_nan( a );
+    aIsSignalingNaN = floatx80_is_signaling_nan( a );
+    bIsNaN = floatx80_is_nan( b );
+    bIsSignalingNaN = floatx80_is_signaling_nan( b );
+    a.low |= LIT64( 0xC000000000000000 );
+    b.low |= LIT64( 0xC000000000000000 );
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
+    if ( aIsNaN ) {
+        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
+    }
+    else {
+        return b;
+    }
+
+}
+
+#define EXP_BIAS 0x3FFF
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline bits64 extractFloatx80Frac( floatx80 a )
+{
+
+    return a.low;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline int32 extractFloatx80Exp( floatx80 a )
+{
+
+    return a.high & 0x7FFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the extended double-precision floating-point value
+| `a'.
+*----------------------------------------------------------------------------*/
+
+static inline flag extractFloatx80Sign( floatx80 a )
+{
+
+    return a.high>>15;
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated quadruple-precision NaN.  The `high' and
+| `low' values hold the most- and least-significant bits, respectively.
+*----------------------------------------------------------------------------*/
+#define float128_default_nan_high LIT64( 0xFFFFFFFFFFFFFFFF )
+#define float128_default_nan_low  LIT64( 0xFFFFFFFFFFFFFFFF )
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is a NaN;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag float128_is_nan( float128 a )
+{
+
+    return
+           ( LIT64( 0xFFFE000000000000 ) <= (bits64) ( a.high<<1 ) )
+        && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is a
+| signaling NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag float128_is_signaling_nan( float128 a )
+{
+
+    return
+           ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE )
+        && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point NaN
+| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT float128ToCommonNaN( float128 a )
+{
+    commonNaNT z;
+
+    if ( float128_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
+    z.sign = a.high>>63;
+    shortShift128Left( a.high, a.low, 16, &z.high, &z.low );
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the quadruple-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static float128 commonNaNToFloat128( commonNaNT a )
+{
+    float128 z;
+
+    shift128Right( a.high, a.low, 16, &z.high, &z.low );
+    z.high |= ( ( (bits64) a.sign )<<63 ) | LIT64( 0x7FFF800000000000 );
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes two quadruple-precision floating-point values `a' and `b', one of
+| which is a NaN, and returns the appropriate NaN result.  If either `a' or
+| `b' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static float128 propagateFloat128NaN( float128 a, float128 b )
+{
+    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = float128_is_nan( a );
+    aIsSignalingNaN = float128_is_signaling_nan( a );
+    bIsNaN = float128_is_nan( b );
+    bIsSignalingNaN = float128_is_signaling_nan( b );
+    a.high |= LIT64( 0x0000800000000000 );
+    b.high |= LIT64( 0x0000800000000000 );
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
+    if ( aIsNaN ) {
+        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
+    }
+    else {
+        return b;
+    }
+
+}
+
+#endif
+
diff --git a/plat/linux68k/emu/musashi/softfloat/softfloat.c b/plat/linux68k/emu/musashi/softfloat/softfloat.c
new file mode 100644
index 000000000..e2bad5963
--- /dev/null
+++ b/plat/linux68k/emu/musashi/softfloat/softfloat.c
@@ -0,0 +1,4940 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+#include "../m68kcpu.h" // which includes softfloat.h after defining the basic types
+
+/*----------------------------------------------------------------------------
+| Floating-point rounding mode, extended double-precision rounding precision,
+| and exception flags.
+*----------------------------------------------------------------------------*/
+int8 float_exception_flags = 0;
+#ifdef FLOATX80
+int8 floatx80_rounding_precision = 80;
+#endif
+
+int8 float_rounding_mode = float_round_nearest_even;
+
+/*----------------------------------------------------------------------------
+| Functions and definitions to determine:  (1) whether tininess for underflow
+| is detected before or after rounding by default, (2) what (if anything)
+| happens when exceptions are raised, (3) how signaling NaNs are distinguished
+| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
+| are propagated from function inputs to output.  These details are target-
+| specific.
+*----------------------------------------------------------------------------*/
+#include "softfloat-specialize"
+
+/*----------------------------------------------------------------------------
+| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
+| and 7, and returns the properly rounded 32-bit integer corresponding to the
+| input.  If `zSign' is 1, the input is negated before being converted to an
+| integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
+| is simply rounded to an integer, with the inexact exception raised if the
+| input cannot be represented exactly as an integer.  However, if the fixed-
+| point input is too large, the invalid exception is raised and the largest
+| positive or negative integer is returned.
+*----------------------------------------------------------------------------*/
+
+static int32 roundAndPackInt32( flag zSign, bits64 absZ )
+{
+	int8 roundingMode;
+	flag roundNearestEven;
+	int8 roundIncrement, roundBits;
+	int32 z;
+
+	roundingMode = float_rounding_mode;
+	roundNearestEven = ( roundingMode == float_round_nearest_even );
+	roundIncrement = 0x40;
+	if ( ! roundNearestEven ) {
+		if ( roundingMode == float_round_to_zero ) {
+			roundIncrement = 0;
+		}
+		else {
+			roundIncrement = 0x7F;
+			if ( zSign ) {
+				if ( roundingMode == float_round_up ) roundIncrement = 0;
+			}
+			else {
+				if ( roundingMode == float_round_down ) roundIncrement = 0;
+			}
+		}
+	}
+	roundBits = absZ & 0x7F;
+	absZ = ( absZ + roundIncrement )>>7;
+	absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
+	z = absZ;
+	if ( zSign ) z = - z;
+	if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
+		float_raise( float_flag_invalid );
+		return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+	}
+	if ( roundBits ) float_exception_flags |= float_flag_inexact;
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
+| `absZ1', with binary point between bits 63 and 64 (between the input words),
+| and returns the properly rounded 64-bit integer corresponding to the input.
+| If `zSign' is 1, the input is negated before being converted to an integer.
+| Ordinarily, the fixed-point input is simply rounded to an integer, with
+| the inexact exception raised if the input cannot be represented exactly as
+| an integer.  However, if the fixed-point input is too large, the invalid
+| exception is raised and the largest positive or negative integer is
+| returned.
+*----------------------------------------------------------------------------*/
+
+static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 )
+{
+	int8 roundingMode;
+	flag roundNearestEven, increment;
+	int64 z;
+
+	roundingMode = float_rounding_mode;
+	roundNearestEven = ( roundingMode == float_round_nearest_even );
+	increment = ( (sbits64) absZ1 < 0 );
+	if ( ! roundNearestEven ) {
+		if ( roundingMode == float_round_to_zero ) {
+			increment = 0;
+		}
+		else {
+			if ( zSign ) {
+				increment = ( roundingMode == float_round_down ) && absZ1;
+			}
+			else {
+				increment = ( roundingMode == float_round_up ) && absZ1;
+			}
+		}
+	}
+	if ( increment ) {
+		++absZ0;
+		if ( absZ0 == 0 ) goto overflow;
+		absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
+	}
+	z = absZ0;
+	if ( zSign ) z = - z;
+	if ( z && ( ( z < 0 ) ^ zSign ) ) {
+	overflow:
+		float_raise( float_flag_invalid );
+		return
+				zSign ? (sbits64) LIT64( 0x8000000000000000 )
+                     : (sbits64)( 0x7FFFFFFFFFFFFFFF );
+	}
+	if ( absZ1 ) float_exception_flags |= float_flag_inexact;
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline bits32 extractFloat32Frac( float32 a )
+{
+	return a & 0x007FFFFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline int16 extractFloat32Exp( float32 a )
+{
+	return ( a>>23 ) & 0xFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline flag extractFloat32Sign( float32 a )
+{
+	return a>>31;
+
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal single-precision floating-point value represented
+| by the denormalized significand `aSig'.  The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+static void
+	normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
+{
+	int8 shiftCount;
+
+	shiftCount = countLeadingZeros32( aSig ) - 8;
+	*zSigPtr = aSig<<shiftCount;
+	*zExpPtr = 1 - shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+| single-precision floating-point value, returning the result.  After being
+| shifted into the proper positions, the three fields are simply added
+| together to form the result.  This means that any integer portion of `zSig'
+| will be added into the exponent.  Since a properly normalized significand
+| will have an integer portion equal to 1, the `zExp' input should be 1 less
+| than the desired result exponent whenever `zSig' is a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+static inline float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
+{
+	return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper single-precision floating-
+| point value corresponding to the abstract input.  Ordinarily, the abstract
+| value is simply rounded and packed into the single-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal single-
+| precision floating-point number.
+|     The input significand `zSig' has its binary point between bits 30
+| and 29, which is 7 bits to the left of the usual location.  This shifted
+| significand must be normalized or smaller.  If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding.  In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
+{
+	int8 roundingMode;
+	flag roundNearestEven;
+	int8 roundIncrement, roundBits;
+	flag isTiny;
+
+	roundingMode = float_rounding_mode;
+	roundNearestEven = ( roundingMode == float_round_nearest_even );
+	roundIncrement = 0x40;
+	if ( ! roundNearestEven ) {
+		if ( roundingMode == float_round_to_zero ) {
+			roundIncrement = 0;
+		}
+		else {
+			roundIncrement = 0x7F;
+			if ( zSign ) {
+				if ( roundingMode == float_round_up ) roundIncrement = 0;
+			}
+			else {
+				if ( roundingMode == float_round_down ) roundIncrement = 0;
+			}
+		}
+	}
+	roundBits = zSig & 0x7F;
+	if ( 0xFD <= (bits16) zExp ) {
+		if (    ( 0xFD < zExp )
+				|| (    ( zExp == 0xFD )
+					&& ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
+			) {
+			float_raise( float_flag_overflow | float_flag_inexact );
+			return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
+		}
+		if ( zExp < 0 ) {
+			isTiny =
+					( float_detect_tininess == float_tininess_before_rounding )
+				|| ( zExp < -1 )
+				|| ( zSig + roundIncrement < 0x80000000 );
+			shift32RightJamming( zSig, - zExp, &zSig );
+			zExp = 0;
+			roundBits = zSig & 0x7F;
+			if ( isTiny && roundBits ) float_raise( float_flag_underflow );
+		}
+	}
+	if ( roundBits ) float_exception_flags |= float_flag_inexact;
+	zSig = ( zSig + roundIncrement )>>7;
+	zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
+	if ( zSig == 0 ) zExp = 0;
+	return packFloat32( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper single-precision floating-
+| point value corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
+| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
+| floating-point exponent.
+*----------------------------------------------------------------------------*/
+
+static float32
+	normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
+{
+	int8 shiftCount;
+
+	shiftCount = countLeadingZeros32( zSig ) - 1;
+	return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the double-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline bits64 extractFloat64Frac( float64 a )
+{
+	return a & LIT64( 0x000FFFFFFFFFFFFF );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the double-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline int16 extractFloat64Exp( float64 a )
+{
+	return ( a>>52 ) & 0x7FF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the double-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline flag extractFloat64Sign( float64 a )
+{
+	return a>>63;
+
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal double-precision floating-point value represented
+| by the denormalized significand `aSig'.  The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+static void
+	normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
+{
+	int8 shiftCount;
+
+	shiftCount = countLeadingZeros64( aSig ) - 11;
+	*zSigPtr = aSig<<shiftCount;
+	*zExpPtr = 1 - shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+| double-precision floating-point value, returning the result.  After being
+| shifted into the proper positions, the three fields are simply added
+| together to form the result.  This means that any integer portion of `zSig'
+| will be added into the exponent.  Since a properly normalized significand
+| will have an integer portion equal to 1, the `zExp' input should be 1 less
+| than the desired result exponent whenever `zSig' is a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+static inline float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
+{
+	return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper double-precision floating-
+| point value corresponding to the abstract input.  Ordinarily, the abstract
+| value is simply rounded and packed into the double-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded
+| to a subnormal number, and the underflow and inexact exceptions are raised
+| if the abstract input cannot be represented exactly as a subnormal double-
+| precision floating-point number.
+|     The input significand `zSig' has its binary point between bits 62
+| and 61, which is 10 bits to the left of the usual location.  This shifted
+| significand must be normalized or smaller.  If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding.  In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
+{
+	int8 roundingMode;
+	flag roundNearestEven;
+	int16 roundIncrement, roundBits;
+	flag isTiny;
+
+	roundingMode = float_rounding_mode;
+	roundNearestEven = ( roundingMode == float_round_nearest_even );
+	roundIncrement = 0x200;
+	if ( ! roundNearestEven ) {
+		if ( roundingMode == float_round_to_zero ) {
+			roundIncrement = 0;
+		}
+		else {
+			roundIncrement = 0x3FF;
+			if ( zSign ) {
+				if ( roundingMode == float_round_up ) roundIncrement = 0;
+			}
+			else {
+				if ( roundingMode == float_round_down ) roundIncrement = 0;
+			}
+		}
+	}
+	roundBits = zSig & 0x3FF;
+	if ( 0x7FD <= (bits16) zExp ) {
+		if (    ( 0x7FD < zExp )
+				|| (    ( zExp == 0x7FD )
+					&& ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
+			) {
+			float_raise( float_flag_overflow | float_flag_inexact );
+			return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
+		}
+		if ( zExp < 0 ) {
+			isTiny =
+					( float_detect_tininess == float_tininess_before_rounding )
+				|| ( zExp < -1 )
+				|| ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
+			shift64RightJamming( zSig, - zExp, &zSig );
+			zExp = 0;
+			roundBits = zSig & 0x3FF;
+			if ( isTiny && roundBits ) float_raise( float_flag_underflow );
+		}
+	}
+	if ( roundBits ) float_exception_flags |= float_flag_inexact;
+	zSig = ( zSig + roundIncrement )>>10;
+	zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
+	if ( zSig == 0 ) zExp = 0;
+	return packFloat64( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper double-precision floating-
+| point value corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
+| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
+| floating-point exponent.
+*----------------------------------------------------------------------------*/
+
+static float64
+	normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
+{
+	int8 shiftCount;
+
+	shiftCount = countLeadingZeros64( zSig ) - 1;
+	return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal extended double-precision floating-point value
+| represented by the denormalized significand `aSig'.  The normalized exponent
+| and significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+static void
+	normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
+{
+	int8 shiftCount;
+
+	shiftCount = countLeadingZeros64( aSig );
+	*zSigPtr = aSig<<shiftCount;
+	*zExpPtr = 1 - shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and extended significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input.  Ordinarily, the abstract value is
+| rounded and packed into the extended double-precision format, with the
+| inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal extended
+| double-precision floating-point number.
+|     If `roundingPrecision' is 32 or 64, the result is rounded to the same
+| number of bits as single or double precision, respectively.  Otherwise, the
+| result is rounded to the full precision of the extended double-precision
+| format.
+|     The input significand must be normalized or smaller.  If the input
+| significand is not normalized, `zExp' must be 0; in that case, the result
+| returned is a subnormal number, and it must not require rounding.  The
+| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+// roundAndPackFloatx80 is now also used in fyl2x.c
+
+/* static */ floatx80
+	roundAndPackFloatx80(
+		int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
+	)
+{
+	int8 roundingMode;
+	flag roundNearestEven, increment, isTiny;
+	int64 roundIncrement, roundMask, roundBits;
+
+	roundingMode = float_rounding_mode;
+	roundNearestEven = ( roundingMode == float_round_nearest_even );
+	if ( roundingPrecision == 80 ) goto precision80;
+	if ( roundingPrecision == 64 ) {
+		roundIncrement = LIT64( 0x0000000000000400 );
+		roundMask = LIT64( 0x00000000000007FF );
+	}
+	else if ( roundingPrecision == 32 ) {
+		roundIncrement = LIT64( 0x0000008000000000 );
+		roundMask = LIT64( 0x000000FFFFFFFFFF );
+	}
+	else {
+		goto precision80;
+	}
+	zSig0 |= ( zSig1 != 0 );
+	if ( ! roundNearestEven ) {
+		if ( roundingMode == float_round_to_zero ) {
+			roundIncrement = 0;
+		}
+		else {
+			roundIncrement = roundMask;
+			if ( zSign ) {
+				if ( roundingMode == float_round_up ) roundIncrement = 0;
+			}
+			else {
+				if ( roundingMode == float_round_down ) roundIncrement = 0;
+			}
+		}
+	}
+	roundBits = zSig0 & roundMask;
+	if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
+		if (    ( 0x7FFE < zExp )
+				|| ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
+			) {
+			goto overflow;
+		}
+		if ( zExp <= 0 ) {
+			isTiny =
+					( float_detect_tininess == float_tininess_before_rounding )
+				|| ( zExp < 0 )
+				|| ( zSig0 <= zSig0 + roundIncrement );
+			shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
+			zExp = 0;
+			roundBits = zSig0 & roundMask;
+			if ( isTiny && roundBits ) float_raise( float_flag_underflow );
+			if ( roundBits ) float_exception_flags |= float_flag_inexact;
+			zSig0 += roundIncrement;
+			if ( (sbits64) zSig0 < 0 ) zExp = 1;
+			roundIncrement = roundMask + 1;
+			if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
+				roundMask |= roundIncrement;
+			}
+			zSig0 &= ~ roundMask;
+			return packFloatx80( zSign, zExp, zSig0 );
+		}
+	}
+	if ( roundBits ) float_exception_flags |= float_flag_inexact;
+	zSig0 += roundIncrement;
+	if ( zSig0 < roundIncrement ) {
+		++zExp;
+                zSig0 = (sbits64)( 0x8000000000000000 );
+	}
+	roundIncrement = roundMask + 1;
+	if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
+		roundMask |= roundIncrement;
+	}
+	zSig0 &= ~ roundMask;
+	if ( zSig0 == 0 ) zExp = 0;
+	return packFloatx80( zSign, zExp, zSig0 );
+	precision80:
+	increment = ( (sbits64) zSig1 < 0 );
+	if ( ! roundNearestEven ) {
+		if ( roundingMode == float_round_to_zero ) {
+			increment = 0;
+		}
+		else {
+			if ( zSign ) {
+				increment = ( roundingMode == float_round_down ) && zSig1;
+			}
+			else {
+				increment = ( roundingMode == float_round_up ) && zSig1;
+			}
+		}
+	}
+	if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
+		if (    ( 0x7FFE < zExp )
+				|| (    ( zExp == 0x7FFE )
+					&& ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
+					&& increment
+				)
+			) {
+			roundMask = 0;
+	overflow:
+			float_raise( float_flag_overflow | float_flag_inexact );
+			if (    ( roundingMode == float_round_to_zero )
+					|| ( zSign && ( roundingMode == float_round_up ) )
+					|| ( ! zSign && ( roundingMode == float_round_down ) )
+				) {
+				return packFloatx80( zSign, 0x7FFE, ~ roundMask );
+			}
+			return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+		}
+		if ( zExp <= 0 ) {
+			isTiny =
+					( float_detect_tininess == float_tininess_before_rounding )
+				|| ( zExp < 0 )
+				|| ! increment
+				|| ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
+			shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
+			zExp = 0;
+			if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
+			if ( zSig1 ) float_exception_flags |= float_flag_inexact;
+			if ( roundNearestEven ) {
+				increment = ( (sbits64) zSig1 < 0 );
+			}
+			else {
+				if ( zSign ) {
+					increment = ( roundingMode == float_round_down ) && zSig1;
+				}
+				else {
+					increment = ( roundingMode == float_round_up ) && zSig1;
+				}
+			}
+			if ( increment ) {
+				++zSig0;
+				zSig0 &=
+					~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
+				if ( (sbits64) zSig0 < 0 ) zExp = 1;
+			}
+			return packFloatx80( zSign, zExp, zSig0 );
+		}
+	}
+	if ( zSig1 ) float_exception_flags |= float_flag_inexact;
+	if ( increment ) {
+		++zSig0;
+		if ( zSig0 == 0 ) {
+			++zExp;
+			zSig0 = LIT64( 0x8000000000000000 );
+		}
+		else {
+			zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
+		}
+	}
+	else {
+		if ( zSig0 == 0 ) zExp = 0;
+	}
+	return packFloatx80( zSign, zExp, zSig0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent
+| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloatx80' except that the input significand does not have to be
+| normalized.
+*----------------------------------------------------------------------------*/
+
+static floatx80
+	normalizeRoundAndPackFloatx80(
+		int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
+	)
+{
+	int8 shiftCount;
+
+	if ( zSig0 == 0 ) {
+		zSig0 = zSig1;
+		zSig1 = 0;
+		zExp -= 64;
+	}
+	shiftCount = countLeadingZeros64( zSig0 );
+	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
+	zExp -= shiftCount;
+	return
+		roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the least-significant 64 fraction bits of the quadruple-precision
+| floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline bits64 extractFloat128Frac1( float128 a )
+{
+	return a.low;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the most-significant 48 fraction bits of the quadruple-precision
+| floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline bits64 extractFloat128Frac0( float128 a )
+{
+	return a.high & LIT64( 0x0000FFFFFFFFFFFF );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the quadruple-precision floating-point value
+| `a'.
+*----------------------------------------------------------------------------*/
+
+static inline int32 extractFloat128Exp( float128 a )
+{
+	return ( a.high>>48 ) & 0x7FFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the quadruple-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline flag extractFloat128Sign( float128 a )
+{
+	return a.high>>63;
+
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal quadruple-precision floating-point value
+| represented by the denormalized significand formed by the concatenation of
+| `aSig0' and `aSig1'.  The normalized exponent is stored at the location
+| pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
+| significand are stored at the location pointed to by `zSig0Ptr', and the
+| least significant 64 bits of the normalized significand are stored at the
+| location pointed to by `zSig1Ptr'.
+*----------------------------------------------------------------------------*/
+
+static void
+	normalizeFloat128Subnormal(
+		bits64 aSig0,
+		bits64 aSig1,
+		int32 *zExpPtr,
+		bits64 *zSig0Ptr,
+		bits64 *zSig1Ptr
+	)
+{
+	int8 shiftCount;
+
+	if ( aSig0 == 0 ) {
+		shiftCount = countLeadingZeros64( aSig1 ) - 15;
+		if ( shiftCount < 0 ) {
+			*zSig0Ptr = aSig1>>( - shiftCount );
+			*zSig1Ptr = aSig1<<( shiftCount & 63 );
+		}
+		else {
+			*zSig0Ptr = aSig1<<shiftCount;
+			*zSig1Ptr = 0;
+		}
+		*zExpPtr = - shiftCount - 63;
+	}
+	else {
+		shiftCount = countLeadingZeros64( aSig0 ) - 15;
+		shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
+		*zExpPtr = 1 - shiftCount;
+	}
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a'
+| to the single-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 int32_to_float32( int32 a )
+{
+	flag zSign;
+
+	if ( a == 0 ) return 0;
+	if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
+	zSign = ( a < 0 );
+	return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a'
+| to the double-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 int32_to_float64( int32 a )
+{
+	flag zSign;
+	uint32 absA;
+	int8 shiftCount;
+	bits64 zSig;
+
+	if ( a == 0 ) return 0;
+	zSign = ( a < 0 );
+	absA = zSign ? - a : a;
+	shiftCount = countLeadingZeros32( absA ) + 21;
+	zSig = absA;
+	return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a'
+| to the extended double-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 int32_to_floatx80( int32 a )
+{
+	flag zSign;
+	uint32 absA;
+	int8 shiftCount;
+	bits64 zSig;
+
+	if ( a == 0 ) return packFloatx80( 0, 0, 0 );
+	zSign = ( a < 0 );
+	absA = zSign ? - a : a;
+	shiftCount = countLeadingZeros32( absA ) + 32;
+	zSig = absA;
+	return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a' to
+| the quadruple-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 int32_to_float128( int32 a )
+{
+	flag zSign;
+	uint32 absA;
+	int8 shiftCount;
+	bits64 zSig0;
+
+	if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
+	zSign = ( a < 0 );
+	absA = zSign ? - a : a;
+	shiftCount = countLeadingZeros32( absA ) + 17;
+	zSig0 = absA;
+	return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a'
+| to the single-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 int64_to_float32( int64 a )
+{
+	flag zSign;
+	uint64 absA;
+	int8 shiftCount;
+//    bits32 zSig;
+
+	if ( a == 0 ) return 0;
+	zSign = ( a < 0 );
+	absA = zSign ? - a : a;
+	shiftCount = countLeadingZeros64( absA ) - 40;
+	if ( 0 <= shiftCount ) {
+		return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
+	}
+	else {
+		shiftCount += 7;
+		if ( shiftCount < 0 ) {
+			shift64RightJamming( absA, - shiftCount, &absA );
+		}
+		else {
+			absA <<= shiftCount;
+		}
+		return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA );
+	}
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a'
+| to the double-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 int64_to_float64( int64 a )
+{
+	flag zSign;
+
+	if ( a == 0 ) return 0;
+	if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
+		return packFloat64( 1, 0x43E, 0 );
+	}
+	zSign = ( a < 0 );
+	return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a'
+| to the extended double-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 int64_to_floatx80( int64 a )
+{
+	flag zSign;
+	uint64 absA;
+	int8 shiftCount;
+
+	if ( a == 0 ) return packFloatx80( 0, 0, 0 );
+	zSign = ( a < 0 );
+	absA = zSign ? - a : a;
+	shiftCount = countLeadingZeros64( absA );
+	return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a' to
+| the quadruple-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 int64_to_float128( int64 a )
+{
+	flag zSign;
+	uint64 absA;
+	int8 shiftCount;
+	int32 zExp;
+	bits64 zSig0, zSig1;
+
+	if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
+	zSign = ( a < 0 );
+	absA = zSign ? - a : a;
+	shiftCount = countLeadingZeros64( absA ) + 49;
+	zExp = 0x406E - shiftCount;
+	if ( 64 <= shiftCount ) {
+		zSig1 = 0;
+		zSig0 = absA;
+		shiftCount -= 64;
+	}
+	else {
+		zSig1 = absA;
+		zSig0 = 0;
+	}
+	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
+	return packFloat128( zSign, zExp, zSig0, zSig1 );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 32-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 float32_to_int32( float32 a )
+{
+	flag aSign;
+	int16 aExp, shiftCount;
+	bits32 aSig;
+	bits64 aSig64;
+
+	aSig = extractFloat32Frac( a );
+	aExp = extractFloat32Exp( a );
+	aSign = extractFloat32Sign( a );
+	if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
+	if ( aExp ) aSig |= 0x00800000;
+	shiftCount = 0xAF - aExp;
+	aSig64 = aSig;
+	aSig64 <<= 32;
+	if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
+	return roundAndPackInt32( aSign, aSig64 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 32-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int32 float32_to_int32_round_to_zero( float32 a )
+{
+	flag aSign;
+	int16 aExp, shiftCount;
+	bits32 aSig;
+	int32 z;
+
+	aSig = extractFloat32Frac( a );
+	aExp = extractFloat32Exp( a );
+	aSign = extractFloat32Sign( a );
+	shiftCount = aExp - 0x9E;
+	if ( 0 <= shiftCount ) {
+		if ( a != 0xCF000000 ) {
+			float_raise( float_flag_invalid );
+			if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
+		}
+		return (sbits32) 0x80000000;
+	}
+	else if ( aExp <= 0x7E ) {
+		if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
+		return 0;
+	}
+	aSig = ( aSig | 0x00800000 )<<8;
+	z = aSig>>( - shiftCount );
+	if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
+		float_exception_flags |= float_flag_inexact;
+	}
+	if ( aSign ) z = - z;
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 64-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 float32_to_int64( float32 a )
+{
+	flag aSign;
+	int16 aExp, shiftCount;
+	bits32 aSig;
+	bits64 aSig64, aSigExtra;
+
+	aSig = extractFloat32Frac( a );
+	aExp = extractFloat32Exp( a );
+	aSign = extractFloat32Sign( a );
+	shiftCount = 0xBE - aExp;
+	if ( shiftCount < 0 ) {
+		float_raise( float_flag_invalid );
+		if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
+			return LIT64( 0x7FFFFFFFFFFFFFFF );
+		}
+		return (sbits64) LIT64( 0x8000000000000000 );
+	}
+	if ( aExp ) aSig |= 0x00800000;
+	aSig64 = aSig;
+	aSig64 <<= 40;
+	shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
+	return roundAndPackInt64( aSign, aSig64, aSigExtra );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 64-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.  If
+| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
+| conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int64 float32_to_int64_round_to_zero( float32 a )
+{
+	flag aSign;
+	int16 aExp, shiftCount;
+	bits32 aSig;
+	bits64 aSig64;
+	int64 z;
+
+	aSig = extractFloat32Frac( a );
+	aExp = extractFloat32Exp( a );
+	aSign = extractFloat32Sign( a );
+	shiftCount = aExp - 0xBE;
+	if ( 0 <= shiftCount ) {
+		if ( a != 0xDF000000 ) {
+			float_raise( float_flag_invalid );
+			if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
+				return LIT64( 0x7FFFFFFFFFFFFFFF );
+			}
+		}
+		return (sbits64) LIT64( 0x8000000000000000 );
+	}
+	else if ( aExp <= 0x7E ) {
+		if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
+		return 0;
+	}
+	aSig64 = aSig | 0x00800000;
+	aSig64 <<= 40;
+	z = aSig64>>( - shiftCount );
+	if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
+		float_exception_flags |= float_flag_inexact;
+	}
+	if ( aSign ) z = - z;
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the double-precision floating-point format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float32_to_float64( float32 a )
+{
+	flag aSign;
+	int16 aExp;
+	bits32 aSig;
+
+	aSig = extractFloat32Frac( a );
+	aExp = extractFloat32Exp( a );
+	aSign = extractFloat32Sign( a );
+	if ( aExp == 0xFF ) {
+		if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
+		return packFloat64( aSign, 0x7FF, 0 );
+	}
+	if ( aExp == 0 ) {
+		if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
+		normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+		--aExp;
+	}
+	return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the extended double-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 float32_to_floatx80( float32 a )
+{
+	flag aSign;
+	int16 aExp;
+	bits32 aSig;
+
+	aSig = extractFloat32Frac( a );
+	aExp = extractFloat32Exp( a );
+	aSign = extractFloat32Sign( a );
+	if ( aExp == 0xFF ) {
+		if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
+		return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+	}
+	if ( aExp == 0 ) {
+		if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
+		normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+	}
+	aSig |= 0x00800000;
+	return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the double-precision floating-point format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float32_to_float128( float32 a )
+{
+	flag aSign;
+	int16 aExp;
+	bits32 aSig;
+
+	aSig = extractFloat32Frac( a );
+	aExp = extractFloat32Exp( a );
+	aSign = extractFloat32Sign( a );
+	if ( aExp == 0xFF ) {
+		if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) );
+		return packFloat128( aSign, 0x7FFF, 0, 0 );
+	}
+	if ( aExp == 0 ) {
+		if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
+		normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+		--aExp;
+	}
+	return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Rounds the single-precision floating-point value `a' to an integer, and
+| returns the result as a single-precision floating-point value.  The
+| operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_round_to_int( float32 a )
+{
+	flag aSign;
+	int16 aExp;
+	bits32 lastBitMask, roundBitsMask;
+	int8 roundingMode;
+	float32 z;
+
+	aExp = extractFloat32Exp( a );
+	if ( 0x96 <= aExp ) {
+		if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
+			return propagateFloat32NaN( a, a );
+		}
+		return a;
+	}
+	if ( aExp <= 0x7E ) {
+		if ( (bits32) ( a<<1 ) == 0 ) return a;
+		float_exception_flags |= float_flag_inexact;
+		aSign = extractFloat32Sign( a );
+		switch ( float_rounding_mode ) {
+			case float_round_nearest_even:
+			if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
+				return packFloat32( aSign, 0x7F, 0 );
+			}
+			break;
+			case float_round_down:
+			return aSign ? 0xBF800000 : 0;
+			case float_round_up:
+			return aSign ? 0x80000000 : 0x3F800000;
+		}
+		return packFloat32( aSign, 0, 0 );
+	}
+	lastBitMask = 1;
+	lastBitMask <<= 0x96 - aExp;
+	roundBitsMask = lastBitMask - 1;
+	z = a;
+	roundingMode = float_rounding_mode;
+	if ( roundingMode == float_round_nearest_even ) {
+		z += lastBitMask>>1;
+		if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
+	}
+	else if ( roundingMode != float_round_to_zero ) {
+		if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
+			z += roundBitsMask;
+		}
+	}
+	z &= ~ roundBitsMask;
+	if ( z != a ) float_exception_flags |= float_flag_inexact;
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the single-precision
+| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
+| before being returned.  `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
+{
+	int16 aExp, bExp, zExp;
+	bits32 aSig, bSig, zSig;
+	int16 expDiff;
+
+	aSig = extractFloat32Frac( a );
+	aExp = extractFloat32Exp( a );
+	bSig = extractFloat32Frac( b );
+	bExp = extractFloat32Exp( b );
+	expDiff = aExp - bExp;
+	aSig <<= 6;
+	bSig <<= 6;
+	if ( 0 < expDiff ) {
+		if ( aExp == 0xFF ) {
+			if ( aSig ) return propagateFloat32NaN( a, b );
+			return a;
+		}
+		if ( bExp == 0 ) {
+			--expDiff;
+		}
+		else {
+			bSig |= 0x20000000;
+		}
+		shift32RightJamming( bSig, expDiff, &bSig );
+		zExp = aExp;
+	}
+	else if ( expDiff < 0 ) {
+		if ( bExp == 0xFF ) {
+			if ( bSig ) return propagateFloat32NaN( a, b );
+			return packFloat32( zSign, 0xFF, 0 );
+		}
+		if ( aExp == 0 ) {
+			++expDiff;
+		}
+		else {
+			aSig |= 0x20000000;
+		}
+		shift32RightJamming( aSig, - expDiff, &aSig );
+		zExp = bExp;
+	}
+	else {
+		if ( aExp == 0xFF ) {
+			if ( aSig | bSig ) return propagateFloat32NaN( a, b );
+			return a;
+		}
+		if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
+		zSig = 0x40000000 + aSig + bSig;
+		zExp = aExp;
+		goto roundAndPack;
+	}
+	aSig |= 0x20000000;
+	zSig = ( aSig + bSig )<<1;
+	--zExp;
+	if ( (sbits32) zSig < 0 ) {
+		zSig = aSig + bSig;
+		++zExp;
+	}
+	roundAndPack:
+	return roundAndPackFloat32( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the single-
+| precision floating-point values `a' and `b'.  If `zSign' is 1, the
+| difference is negated before being returned.  `zSign' is ignored if the
+| result is a NaN.  The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
+{
+	int16 aExp, bExp, zExp;
+	bits32 aSig, bSig, zSig;
+	int16 expDiff;
+
+	aSig = extractFloat32Frac( a );
+	aExp = extractFloat32Exp( a );
+	bSig = extractFloat32Frac( b );
+	bExp = extractFloat32Exp( b );
+	expDiff = aExp - bExp;
+	aSig <<= 7;
+	bSig <<= 7;
+	if ( 0 < expDiff ) goto aExpBigger;
+	if ( expDiff < 0 ) goto bExpBigger;
+	if ( aExp == 0xFF ) {
+		if ( aSig | bSig ) return propagateFloat32NaN( a, b );
+		float_raise( float_flag_invalid );
+		return float32_default_nan;
+	}
+	if ( aExp == 0 ) {
+		aExp = 1;
+		bExp = 1;
+	}
+	if ( bSig < aSig ) goto aBigger;
+	if ( aSig < bSig ) goto bBigger;
+	return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
+	bExpBigger:
+	if ( bExp == 0xFF ) {
+		if ( bSig ) return propagateFloat32NaN( a, b );
+		return packFloat32( zSign ^ 1, 0xFF, 0 );
+	}
+	if ( aExp == 0 ) {
+		++expDiff;
+	}
+	else {
+		aSig |= 0x40000000;
+	}
+	shift32RightJamming( aSig, - expDiff, &aSig );
+	bSig |= 0x40000000;
+	bBigger:
+	zSig = bSig - aSig;
+	zExp = bExp;
+	zSign ^= 1;
+	goto normalizeRoundAndPack;
+	aExpBigger:
+	if ( aExp == 0xFF ) {
+		if ( aSig ) return propagateFloat32NaN( a, b );
+		return a;
+	}
+	if ( bExp == 0 ) {
+		--expDiff;
+	}
+	else {
+		bSig |= 0x40000000;
+	}
+	shift32RightJamming( bSig, expDiff, &bSig );
+	aSig |= 0x40000000;
+	aBigger:
+	zSig = aSig - bSig;
+	zExp = aExp;
+	normalizeRoundAndPack:
+	--zExp;
+	return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the single-precision floating-point values `a'
+| and `b'.  The operation is performed according to the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_add( float32 a, float32 b )
+{
+	flag aSign, bSign;
+
+	aSign = extractFloat32Sign( a );
+	bSign = extractFloat32Sign( b );
+	if ( aSign == bSign ) {
+		return addFloat32Sigs( a, b, aSign );
+	}
+	else {
+		return subFloat32Sigs( a, b, aSign );
+	}
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the single-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_sub( float32 a, float32 b )
+{
+	flag aSign, bSign;
+
+	aSign = extractFloat32Sign( a );
+	bSign = extractFloat32Sign( b );
+	if ( aSign == bSign ) {
+		return subFloat32Sigs( a, b, aSign );
+	}
+	else {
+		return addFloat32Sigs( a, b, aSign );
+	}
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the single-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_mul( float32 a, float32 b )
+{
+	flag aSign, bSign, zSign;
+	int16 aExp, bExp, zExp;
+	bits32 aSig, bSig;
+	bits64 zSig64;
+	bits32 zSig;
+
+	aSig = extractFloat32Frac( a );
+	aExp = extractFloat32Exp( a );
+	aSign = extractFloat32Sign( a );
+	bSig = extractFloat32Frac( b );
+	bExp = extractFloat32Exp( b );
+	bSign = extractFloat32Sign( b );
+	zSign = aSign ^ bSign;
+	if ( aExp == 0xFF ) {
+		if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
+			return propagateFloat32NaN( a, b );
+		}
+		if ( ( bExp | bSig ) == 0 ) {
+			float_raise( float_flag_invalid );
+			return float32_default_nan;
+		}
+		return packFloat32( zSign, 0xFF, 0 );
+	}
+	if ( bExp == 0xFF ) {
+		if ( bSig ) return propagateFloat32NaN( a, b );
+		if ( ( aExp | aSig ) == 0 ) {
+			float_raise( float_flag_invalid );
+			return float32_default_nan;
+		}
+		return packFloat32( zSign, 0xFF, 0 );
+	}
+	if ( aExp == 0 ) {
+		if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
+		normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+	}
+	if ( bExp == 0 ) {
+		if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
+		normalizeFloat32Subnormal( bSig, &bExp, &bSig );
+	}
+	zExp = aExp + bExp - 0x7F;
+	aSig = ( aSig | 0x00800000 )<<7;
+	bSig = ( bSig | 0x00800000 )<<8;
+	shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
+	zSig = zSig64;
+	if ( 0 <= (sbits32) ( zSig<<1 ) ) {
+		zSig <<= 1;
+		--zExp;
+	}
+	return roundAndPackFloat32( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the single-precision floating-point value `a'
+| by the corresponding value `b'.  The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_div( float32 a, float32 b )
+{
+	flag aSign, bSign, zSign;
+	int16 aExp, bExp, zExp;
+	bits32 aSig, bSig, zSig;
+
+	aSig = extractFloat32Frac( a );
+	aExp = extractFloat32Exp( a );
+	aSign = extractFloat32Sign( a );
+	bSig = extractFloat32Frac( b );
+	bExp = extractFloat32Exp( b );
+	bSign = extractFloat32Sign( b );
+	zSign = aSign ^ bSign;
+	if ( aExp == 0xFF ) {
+		if ( aSig ) return propagateFloat32NaN( a, b );
+		if ( bExp == 0xFF ) {
+			if ( bSig ) return propagateFloat32NaN( a, b );
+			float_raise( float_flag_invalid );
+			return float32_default_nan;
+		}
+		return packFloat32( zSign, 0xFF, 0 );
+	}
+	if ( bExp == 0xFF ) {
+		if ( bSig ) return propagateFloat32NaN( a, b );
+		return packFloat32( zSign, 0, 0 );
+	}
+	if ( bExp == 0 ) {
+		if ( bSig == 0 ) {
+			if ( ( aExp | aSig ) == 0 ) {
+				float_raise( float_flag_invalid );
+				return float32_default_nan;
+			}
+			float_raise( float_flag_divbyzero );
+			return packFloat32( zSign, 0xFF, 0 );
+		}
+		normalizeFloat32Subnormal( bSig, &bExp, &bSig );
+	}
+	if ( aExp == 0 ) {
+		if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
+		normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+	}
+	zExp = aExp - bExp + 0x7D;
+	aSig = ( aSig | 0x00800000 )<<7;
+	bSig = ( bSig | 0x00800000 )<<8;
+	if ( bSig <= ( aSig + aSig ) ) {
+		aSig >>= 1;
+		++zExp;
+	}
+	zSig = ( ( (bits64) aSig )<<32 ) / bSig;
+	if ( ( zSig & 0x3F ) == 0 ) {
+		zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
+	}
+	return roundAndPackFloat32( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the single-precision floating-point value `a'
+| with respect to the corresponding value `b'.  The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_rem( float32 a, float32 b )
+{
+	flag aSign, zSign;
+	int16 aExp, bExp, expDiff;
+	bits32 aSig, bSig;
+	bits32 q;
+	bits64 aSig64, bSig64, q64;
+	bits32 alternateASig;
+	sbits32 sigMean;
+
+	aSig = extractFloat32Frac( a );
+	aExp = extractFloat32Exp( a );
+	aSign = extractFloat32Sign( a );
+	bSig = extractFloat32Frac( b );
+	bExp = extractFloat32Exp( b );
+//    bSign = extractFloat32Sign( b );
+	if ( aExp == 0xFF ) {
+		if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
+			return propagateFloat32NaN( a, b );
+		}
+		float_raise( float_flag_invalid );
+		return float32_default_nan;
+	}
+	if ( bExp == 0xFF ) {
+		if ( bSig ) return propagateFloat32NaN( a, b );
+		return a;
+	}
+	if ( bExp == 0 ) {
+		if ( bSig == 0 ) {
+			float_raise( float_flag_invalid );
+			return float32_default_nan;
+		}
+		normalizeFloat32Subnormal( bSig, &bExp, &bSig );
+	}
+	if ( aExp == 0 ) {
+		if ( aSig == 0 ) return a;
+		normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+	}
+	expDiff = aExp - bExp;
+	aSig |= 0x00800000;
+	bSig |= 0x00800000;
+	if ( expDiff < 32 ) {
+		aSig <<= 8;
+		bSig <<= 8;
+		if ( expDiff < 0 ) {
+			if ( expDiff < -1 ) return a;
+			aSig >>= 1;
+		}
+		q = ( bSig <= aSig );
+		if ( q ) aSig -= bSig;
+		if ( 0 < expDiff ) {
+			q = ( ( (bits64) aSig )<<32 ) / bSig;
+			q >>= 32 - expDiff;
+			bSig >>= 2;
+			aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
+		}
+		else {
+			aSig >>= 2;
+			bSig >>= 2;
+		}
+	}
+	else {
+		if ( bSig <= aSig ) aSig -= bSig;
+		aSig64 = ( (bits64) aSig )<<40;
+		bSig64 = ( (bits64) bSig )<<40;
+		expDiff -= 64;
+		while ( 0 < expDiff ) {
+			q64 = estimateDiv128To64( aSig64, 0, bSig64 );
+			q64 = ( 2 < q64 ) ? q64 - 2 : 0;
+			aSig64 = - ( ( bSig * q64 )<<38 );
+			expDiff -= 62;
+		}
+		expDiff += 64;
+		q64 = estimateDiv128To64( aSig64, 0, bSig64 );
+		q64 = ( 2 < q64 ) ? q64 - 2 : 0;
+		q = q64>>( 64 - expDiff );
+		bSig <<= 6;
+		aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
+	}
+	do {
+		alternateASig = aSig;
+		++q;
+		aSig -= bSig;
+	} while ( 0 <= (sbits32) aSig );
+	sigMean = aSig + alternateASig;
+	if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
+		aSig = alternateASig;
+	}
+	zSign = ( (sbits32) aSig < 0 );
+	if ( zSign ) aSig = - aSig;
+	return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the single-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_sqrt( float32 a )
+{
+	flag aSign;
+	int16 aExp, zExp;
+	bits32 aSig, zSig;
+	bits64 rem, term;
+
+	aSig = extractFloat32Frac( a );
+	aExp = extractFloat32Exp( a );
+	aSign = extractFloat32Sign( a );
+	if ( aExp == 0xFF ) {
+		if ( aSig ) return propagateFloat32NaN( a, 0 );
+		if ( ! aSign ) return a;
+		float_raise( float_flag_invalid );
+		return float32_default_nan;
+	}
+	if ( aSign ) {
+		if ( ( aExp | aSig ) == 0 ) return a;
+		float_raise( float_flag_invalid );
+		return float32_default_nan;
+	}
+	if ( aExp == 0 ) {
+		if ( aSig == 0 ) return 0;
+		normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+	}
+	zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
+	aSig = ( aSig | 0x00800000 )<<8;
+	zSig = estimateSqrt32( aExp, aSig ) + 2;
+	if ( ( zSig & 0x7F ) <= 5 ) {
+		if ( zSig < 2 ) {
+			zSig = 0x7FFFFFFF;
+			goto roundAndPack;
+		}
+		aSig >>= aExp & 1;
+		term = ( (bits64) zSig ) * zSig;
+		rem = ( ( (bits64) aSig )<<32 ) - term;
+		while ( (sbits64) rem < 0 ) {
+			--zSig;
+			rem += ( ( (bits64) zSig )<<1 ) | 1;
+		}
+		zSig |= ( rem != 0 );
+	}
+	shift32RightJamming( zSig, 1, &zSig );
+	roundAndPack:
+	return roundAndPackFloat32( 0, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float32_eq( float32 a, float32 b )
+{
+	if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+			|| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+		) {
+		if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+			float_raise( float_flag_invalid );
+		}
+		return 0;
+	}
+	return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is less than
+| or equal to the corresponding value `b', and 0 otherwise.  The comparison
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float32_le( float32 a, float32 b )
+{
+	flag aSign, bSign;
+
+	if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+			|| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+		) {
+		float_raise( float_flag_invalid );
+		return 0;
+	}
+	aSign = extractFloat32Sign( a );
+	bSign = extractFloat32Sign( b );
+	if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
+	return ( a == b ) || ( aSign ^ ( a < b ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float32_lt( float32 a, float32 b )
+{
+	flag aSign, bSign;
+
+	if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+			|| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+		) {
+		float_raise( float_flag_invalid );
+		return 0;
+	}
+	aSign = extractFloat32Sign( a );
+	bSign = extractFloat32Sign( b );
+	if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
+	return ( a != b ) && ( aSign ^ ( a < b ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise.  The invalid exception is
+| raised if either operand is a NaN.  Otherwise, the comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float32_eq_signaling( float32 a, float32 b )
+{
+	if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+			|| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+		) {
+		float_raise( float_flag_invalid );
+		return 0;
+	}
+	return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is less than or
+| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
+| cause an exception.  Otherwise, the comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float32_le_quiet( float32 a, float32 b )
+{
+	flag aSign, bSign;
+//    int16 aExp, bExp;
+
+	if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+			|| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+		) {
+		if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+			float_raise( float_flag_invalid );
+		}
+		return 0;
+	}
+	aSign = extractFloat32Sign( a );
+	bSign = extractFloat32Sign( b );
+	if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
+	return ( a == b ) || ( aSign ^ ( a < b ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
+| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float32_lt_quiet( float32 a, float32 b )
+{
+	flag aSign, bSign;
+
+	if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+			|| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+		) {
+		if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+			float_raise( float_flag_invalid );
+		}
+		return 0;
+	}
+	aSign = extractFloat32Sign( a );
+	bSign = extractFloat32Sign( b );
+	if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
+	return ( a != b ) && ( aSign ^ ( a < b ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 32-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 float64_to_int32( float64 a )
+{
+	flag aSign;
+	int16 aExp, shiftCount;
+	bits64 aSig;
+
+	aSig = extractFloat64Frac( a );
+	aExp = extractFloat64Exp( a );
+	aSign = extractFloat64Sign( a );
+	if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
+	if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
+	shiftCount = 0x42C - aExp;
+	if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
+	return roundAndPackInt32( aSign, aSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 32-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int32 float64_to_int32_round_to_zero( float64 a )
+{
+	flag aSign;
+	int16 aExp, shiftCount;
+	bits64 aSig, savedASig;
+	int32 z;
+
+	aSig = extractFloat64Frac( a );
+	aExp = extractFloat64Exp( a );
+	aSign = extractFloat64Sign( a );
+	if ( 0x41E < aExp ) {
+		if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
+		goto invalid;
+	}
+	else if ( aExp < 0x3FF ) {
+		if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
+		return 0;
+	}
+	aSig |= LIT64( 0x0010000000000000 );
+	shiftCount = 0x433 - aExp;
+	savedASig = aSig;
+	aSig >>= shiftCount;
+	z = aSig;
+	if ( aSign ) z = - z;
+	if ( ( z < 0 ) ^ aSign ) {
+	invalid:
+		float_raise( float_flag_invalid );
+		return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+	}
+	if ( ( aSig<<shiftCount ) != savedASig ) {
+		float_exception_flags |= float_flag_inexact;
+	}
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 64-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 float64_to_int64( float64 a )
+{
+	flag aSign;
+	int16 aExp, shiftCount;
+	bits64 aSig, aSigExtra;
+
+	aSig = extractFloat64Frac( a );
+	aExp = extractFloat64Exp( a );
+	aSign = extractFloat64Sign( a );
+	if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
+	shiftCount = 0x433 - aExp;
+	if ( shiftCount <= 0 ) {
+		if ( 0x43E < aExp ) {
+			float_raise( float_flag_invalid );
+			if (    ! aSign
+					|| (    ( aExp == 0x7FF )
+						&& ( aSig != LIT64( 0x0010000000000000 ) ) )
+				) {
+				return LIT64( 0x7FFFFFFFFFFFFFFF );
+			}
+			return (sbits64) LIT64( 0x8000000000000000 );
+		}
+		aSigExtra = 0;
+		aSig <<= - shiftCount;
+	}
+	else {
+		shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
+	}
+	return roundAndPackInt64( aSign, aSig, aSigExtra );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 64-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int64 float64_to_int64_round_to_zero( float64 a )
+{
+	flag aSign;
+	int16 aExp, shiftCount;
+	bits64 aSig;
+	int64 z;
+
+	aSig = extractFloat64Frac( a );
+	aExp = extractFloat64Exp( a );
+	aSign = extractFloat64Sign( a );
+	if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
+	shiftCount = aExp - 0x433;
+	if ( 0 <= shiftCount ) {
+		if ( 0x43E <= aExp ) {
+			if ( a != LIT64( 0xC3E0000000000000 ) ) {
+				float_raise( float_flag_invalid );
+				if (    ! aSign
+						|| (    ( aExp == 0x7FF )
+							&& ( aSig != LIT64( 0x0010000000000000 ) ) )
+					) {
+					return LIT64( 0x7FFFFFFFFFFFFFFF );
+				}
+			}
+			return (sbits64) LIT64( 0x8000000000000000 );
+		}
+		z = aSig<<shiftCount;
+	}
+	else {
+		if ( aExp < 0x3FE ) {
+			if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
+			return 0;
+		}
+		z = aSig>>( - shiftCount );
+		if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
+			float_exception_flags |= float_flag_inexact;
+		}
+	}
+	if ( aSign ) z = - z;
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the single-precision floating-point format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float64_to_float32( float64 a )
+{
+	flag aSign;
+	int16 aExp;
+	bits64 aSig;
+	bits32 zSig;
+
+	aSig = extractFloat64Frac( a );
+	aExp = extractFloat64Exp( a );
+	aSign = extractFloat64Sign( a );
+	if ( aExp == 0x7FF ) {
+		if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
+		return packFloat32( aSign, 0xFF, 0 );
+	}
+	shift64RightJamming( aSig, 22, &aSig );
+	zSig = aSig;
+	if ( aExp || zSig ) {
+		zSig |= 0x40000000;
+		aExp -= 0x381;
+	}
+	return roundAndPackFloat32( aSign, aExp, zSig );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the extended double-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 float64_to_floatx80( float64 a )
+{
+	flag aSign;
+	int16 aExp;
+	bits64 aSig;
+
+	aSig = extractFloat64Frac( a );
+	aExp = extractFloat64Exp( a );
+	aSign = extractFloat64Sign( a );
+	if ( aExp == 0x7FF ) {
+		if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
+		return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+	}
+	if ( aExp == 0 ) {
+		if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
+		normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+	}
+	return
+		packFloatx80(
+			aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the quadruple-precision floating-point format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float64_to_float128( float64 a )
+{
+	flag aSign;
+	int16 aExp;
+	bits64 aSig, zSig0, zSig1;
+
+	aSig = extractFloat64Frac( a );
+	aExp = extractFloat64Exp( a );
+	aSign = extractFloat64Sign( a );
+	if ( aExp == 0x7FF ) {
+		if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) );
+		return packFloat128( aSign, 0x7FFF, 0, 0 );
+	}
+	if ( aExp == 0 ) {
+		if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
+		normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+		--aExp;
+	}
+	shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
+	return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Rounds the double-precision floating-point value `a' to an integer, and
+| returns the result as a double-precision floating-point value.  The
+| operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_round_to_int( float64 a )
+{
+	flag aSign;
+	int16 aExp;
+	bits64 lastBitMask, roundBitsMask;
+	int8 roundingMode;
+	float64 z;
+
+	aExp = extractFloat64Exp( a );
+	if ( 0x433 <= aExp ) {
+		if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
+			return propagateFloat64NaN( a, a );
+		}
+		return a;
+	}
+	if ( aExp < 0x3FF ) {
+		if ( (bits64) ( a<<1 ) == 0 ) return a;
+		float_exception_flags |= float_flag_inexact;
+		aSign = extractFloat64Sign( a );
+		switch ( float_rounding_mode ) {
+			case float_round_nearest_even:
+			if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
+				return packFloat64( aSign, 0x3FF, 0 );
+			}
+			break;
+			case float_round_down:
+			return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
+			case float_round_up:
+			return
+			aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
+		}
+		return packFloat64( aSign, 0, 0 );
+	}
+	lastBitMask = 1;
+	lastBitMask <<= 0x433 - aExp;
+	roundBitsMask = lastBitMask - 1;
+	z = a;
+	roundingMode = float_rounding_mode;
+	if ( roundingMode == float_round_nearest_even ) {
+		z += lastBitMask>>1;
+		if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
+	}
+	else if ( roundingMode != float_round_to_zero ) {
+		if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
+			z += roundBitsMask;
+		}
+	}
+	z &= ~ roundBitsMask;
+	if ( z != a ) float_exception_flags |= float_flag_inexact;
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the double-precision
+| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
+| before being returned.  `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
+{
+	int16 aExp, bExp, zExp;
+	bits64 aSig, bSig, zSig;
+	int16 expDiff;
+
+	aSig = extractFloat64Frac( a );
+	aExp = extractFloat64Exp( a );
+	bSig = extractFloat64Frac( b );
+	bExp = extractFloat64Exp( b );
+	expDiff = aExp - bExp;
+	aSig <<= 9;
+	bSig <<= 9;
+	if ( 0 < expDiff ) {
+		if ( aExp == 0x7FF ) {
+			if ( aSig ) return propagateFloat64NaN( a, b );
+			return a;
+		}
+		if ( bExp == 0 ) {
+			--expDiff;
+		}
+		else {
+			bSig |= LIT64( 0x2000000000000000 );
+		}
+		shift64RightJamming( bSig, expDiff, &bSig );
+		zExp = aExp;
+	}
+	else if ( expDiff < 0 ) {
+		if ( bExp == 0x7FF ) {
+			if ( bSig ) return propagateFloat64NaN( a, b );
+			return packFloat64( zSign, 0x7FF, 0 );
+		}
+		if ( aExp == 0 ) {
+			++expDiff;
+		}
+		else {
+			aSig |= LIT64( 0x2000000000000000 );
+		}
+		shift64RightJamming( aSig, - expDiff, &aSig );
+		zExp = bExp;
+	}
+	else {
+		if ( aExp == 0x7FF ) {
+			if ( aSig | bSig ) return propagateFloat64NaN( a, b );
+			return a;
+		}
+		if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
+		zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
+		zExp = aExp;
+		goto roundAndPack;
+	}
+	aSig |= LIT64( 0x2000000000000000 );
+	zSig = ( aSig + bSig )<<1;
+	--zExp;
+	if ( (sbits64) zSig < 0 ) {
+		zSig = aSig + bSig;
+		++zExp;
+	}
+	roundAndPack:
+	return roundAndPackFloat64( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the double-
+| precision floating-point values `a' and `b'.  If `zSign' is 1, the
+| difference is negated before being returned.  `zSign' is ignored if the
+| result is a NaN.  The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
+{
+	int16 aExp, bExp, zExp;
+	bits64 aSig, bSig, zSig;
+	int16 expDiff;
+
+	aSig = extractFloat64Frac( a );
+	aExp = extractFloat64Exp( a );
+	bSig = extractFloat64Frac( b );
+	bExp = extractFloat64Exp( b );
+	expDiff = aExp - bExp;
+	aSig <<= 10;
+	bSig <<= 10;
+	if ( 0 < expDiff ) goto aExpBigger;
+	if ( expDiff < 0 ) goto bExpBigger;
+	if ( aExp == 0x7FF ) {
+		if ( aSig | bSig ) return propagateFloat64NaN( a, b );
+		float_raise( float_flag_invalid );
+		return float64_default_nan;
+	}
+	if ( aExp == 0 ) {
+		aExp = 1;
+		bExp = 1;
+	}
+	if ( bSig < aSig ) goto aBigger;
+	if ( aSig < bSig ) goto bBigger;
+	return packFloat64( float_rounding_mode == float_round_down, 0, 0 );
+	bExpBigger:
+	if ( bExp == 0x7FF ) {
+		if ( bSig ) return propagateFloat64NaN( a, b );
+		return packFloat64( zSign ^ 1, 0x7FF, 0 );
+	}
+	if ( aExp == 0 ) {
+		++expDiff;
+	}
+	else {
+		aSig |= LIT64( 0x4000000000000000 );
+	}
+	shift64RightJamming( aSig, - expDiff, &aSig );
+	bSig |= LIT64( 0x4000000000000000 );
+	bBigger:
+	zSig = bSig - aSig;
+	zExp = bExp;
+	zSign ^= 1;
+	goto normalizeRoundAndPack;
+	aExpBigger:
+	if ( aExp == 0x7FF ) {
+		if ( aSig ) return propagateFloat64NaN( a, b );
+		return a;
+	}
+	if ( bExp == 0 ) {
+		--expDiff;
+	}
+	else {
+		bSig |= LIT64( 0x4000000000000000 );
+	}
+	shift64RightJamming( bSig, expDiff, &bSig );
+	aSig |= LIT64( 0x4000000000000000 );
+	aBigger:
+	zSig = aSig - bSig;
+	zExp = aExp;
+	normalizeRoundAndPack:
+	--zExp;
+	return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the double-precision floating-point values `a'
+| and `b'.  The operation is performed according to the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_add( float64 a, float64 b )
+{
+	flag aSign, bSign;
+
+	aSign = extractFloat64Sign( a );
+	bSign = extractFloat64Sign( b );
+	if ( aSign == bSign ) {
+		return addFloat64Sigs( a, b, aSign );
+	}
+	else {
+		return subFloat64Sigs( a, b, aSign );
+	}
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the double-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_sub( float64 a, float64 b )
+{
+	flag aSign, bSign;
+
+	aSign = extractFloat64Sign( a );
+	bSign = extractFloat64Sign( b );
+	if ( aSign == bSign ) {
+		return subFloat64Sigs( a, b, aSign );
+	}
+	else {
+		return addFloat64Sigs( a, b, aSign );
+	}
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the double-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_mul( float64 a, float64 b )
+{
+	flag aSign, bSign, zSign;
+	int16 aExp, bExp, zExp;
+	bits64 aSig, bSig, zSig0, zSig1;
+
+	aSig = extractFloat64Frac( a );
+	aExp = extractFloat64Exp( a );
+	aSign = extractFloat64Sign( a );
+	bSig = extractFloat64Frac( b );
+	bExp = extractFloat64Exp( b );
+	bSign = extractFloat64Sign( b );
+	zSign = aSign ^ bSign;
+	if ( aExp == 0x7FF ) {
+		if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
+			return propagateFloat64NaN( a, b );
+		}
+		if ( ( bExp | bSig ) == 0 ) {
+			float_raise( float_flag_invalid );
+			return float64_default_nan;
+		}
+		return packFloat64( zSign, 0x7FF, 0 );
+	}
+	if ( bExp == 0x7FF ) {
+		if ( bSig ) return propagateFloat64NaN( a, b );
+		if ( ( aExp | aSig ) == 0 ) {
+			float_raise( float_flag_invalid );
+			return float64_default_nan;
+		}
+		return packFloat64( zSign, 0x7FF, 0 );
+	}
+	if ( aExp == 0 ) {
+		if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
+		normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+	}
+	if ( bExp == 0 ) {
+		if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
+		normalizeFloat64Subnormal( bSig, &bExp, &bSig );
+	}
+	zExp = aExp + bExp - 0x3FF;
+	aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
+	bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
+	mul64To128( aSig, bSig, &zSig0, &zSig1 );
+	zSig0 |= ( zSig1 != 0 );
+	if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
+		zSig0 <<= 1;
+		--zExp;
+	}
+	return roundAndPackFloat64( zSign, zExp, zSig0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the double-precision floating-point value `a'
+| by the corresponding value `b'.  The operation is performed according to
+| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_div( float64 a, float64 b )
+{
+	flag aSign, bSign, zSign;
+	int16 aExp, bExp, zExp;
+	bits64 aSig, bSig, zSig;
+	bits64 rem0, rem1;
+	bits64 term0, term1;
+
+	aSig = extractFloat64Frac( a );
+	aExp = extractFloat64Exp( a );
+	aSign = extractFloat64Sign( a );
+	bSig = extractFloat64Frac( b );
+	bExp = extractFloat64Exp( b );
+	bSign = extractFloat64Sign( b );
+	zSign = aSign ^ bSign;
+	if ( aExp == 0x7FF ) {
+		if ( aSig ) return propagateFloat64NaN( a, b );
+		if ( bExp == 0x7FF ) {
+			if ( bSig ) return propagateFloat64NaN( a, b );
+			float_raise( float_flag_invalid );
+			return float64_default_nan;
+		}
+		return packFloat64( zSign, 0x7FF, 0 );
+	}
+	if ( bExp == 0x7FF ) {
+		if ( bSig ) return propagateFloat64NaN( a, b );
+		return packFloat64( zSign, 0, 0 );
+	}
+	if ( bExp == 0 ) {
+		if ( bSig == 0 ) {
+			if ( ( aExp | aSig ) == 0 ) {
+				float_raise( float_flag_invalid );
+				return float64_default_nan;
+			}
+			float_raise( float_flag_divbyzero );
+			return packFloat64( zSign, 0x7FF, 0 );
+		}
+		normalizeFloat64Subnormal( bSig, &bExp, &bSig );
+	}
+	if ( aExp == 0 ) {
+		if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
+		normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+	}
+	zExp = aExp - bExp + 0x3FD;
+	aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
+	bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
+	if ( bSig <= ( aSig + aSig ) ) {
+		aSig >>= 1;
+		++zExp;
+	}
+	zSig = estimateDiv128To64( aSig, 0, bSig );
+	if ( ( zSig & 0x1FF ) <= 2 ) {
+		mul64To128( bSig, zSig, &term0, &term1 );
+		sub128( aSig, 0, term0, term1, &rem0, &rem1 );
+		while ( (sbits64) rem0 < 0 ) {
+			--zSig;
+			add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
+		}
+		zSig |= ( rem1 != 0 );
+	}
+	return roundAndPackFloat64( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the double-precision floating-point value `a'
+| with respect to the corresponding value `b'.  The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_rem( float64 a, float64 b )
+{
+	flag aSign, zSign;
+	int16 aExp, bExp, expDiff;
+	bits64 aSig, bSig;
+	bits64 q, alternateASig;
+	sbits64 sigMean;
+
+	aSig = extractFloat64Frac( a );
+	aExp = extractFloat64Exp( a );
+	aSign = extractFloat64Sign( a );
+	bSig = extractFloat64Frac( b );
+	bExp = extractFloat64Exp( b );
+//    bSign = extractFloat64Sign( b );
+	if ( aExp == 0x7FF ) {
+		if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
+			return propagateFloat64NaN( a, b );
+		}
+		float_raise( float_flag_invalid );
+		return float64_default_nan;
+	}
+	if ( bExp == 0x7FF ) {
+		if ( bSig ) return propagateFloat64NaN( a, b );
+		return a;
+	}
+	if ( bExp == 0 ) {
+		if ( bSig == 0 ) {
+			float_raise( float_flag_invalid );
+			return float64_default_nan;
+		}
+		normalizeFloat64Subnormal( bSig, &bExp, &bSig );
+	}
+	if ( aExp == 0 ) {
+		if ( aSig == 0 ) return a;
+		normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+	}
+	expDiff = aExp - bExp;
+	aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
+	bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
+	if ( expDiff < 0 ) {
+		if ( expDiff < -1 ) return a;
+		aSig >>= 1;
+	}
+	q = ( bSig <= aSig );
+	if ( q ) aSig -= bSig;
+	expDiff -= 64;
+	while ( 0 < expDiff ) {
+		q = estimateDiv128To64( aSig, 0, bSig );
+		q = ( 2 < q ) ? q - 2 : 0;
+		aSig = - ( ( bSig>>2 ) * q );
+		expDiff -= 62;
+	}
+	expDiff += 64;
+	if ( 0 < expDiff ) {
+		q = estimateDiv128To64( aSig, 0, bSig );
+		q = ( 2 < q ) ? q - 2 : 0;
+		q >>= 64 - expDiff;
+		bSig >>= 2;
+		aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
+	}
+	else {
+		aSig >>= 2;
+		bSig >>= 2;
+	}
+	do {
+		alternateASig = aSig;
+		++q;
+		aSig -= bSig;
+	} while ( 0 <= (sbits64) aSig );
+	sigMean = aSig + alternateASig;
+	if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
+		aSig = alternateASig;
+	}
+	zSign = ( (sbits64) aSig < 0 );
+	if ( zSign ) aSig = - aSig;
+	return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the double-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_sqrt( float64 a )
+{
+	flag aSign;
+	int16 aExp, zExp;
+	bits64 aSig, zSig, doubleZSig;
+	bits64 rem0, rem1, term0, term1;
+//    float64 z;
+
+	aSig = extractFloat64Frac( a );
+	aExp = extractFloat64Exp( a );
+	aSign = extractFloat64Sign( a );
+	if ( aExp == 0x7FF ) {
+		if ( aSig ) return propagateFloat64NaN( a, a );
+		if ( ! aSign ) return a;
+		float_raise( float_flag_invalid );
+		return float64_default_nan;
+	}
+	if ( aSign ) {
+		if ( ( aExp | aSig ) == 0 ) return a;
+		float_raise( float_flag_invalid );
+		return float64_default_nan;
+	}
+	if ( aExp == 0 ) {
+		if ( aSig == 0 ) return 0;
+		normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+	}
+	zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
+	aSig |= LIT64( 0x0010000000000000 );
+	zSig = estimateSqrt32( aExp, aSig>>21 );
+	aSig <<= 9 - ( aExp & 1 );
+	zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
+	if ( ( zSig & 0x1FF ) <= 5 ) {
+		doubleZSig = zSig<<1;
+		mul64To128( zSig, zSig, &term0, &term1 );
+		sub128( aSig, 0, term0, term1, &rem0, &rem1 );
+		while ( (sbits64) rem0 < 0 ) {
+			--zSig;
+			doubleZSig -= 2;
+			add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
+		}
+		zSig |= ( ( rem0 | rem1 ) != 0 );
+	}
+	return roundAndPackFloat64( 0, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is equal to the
+| corresponding value `b', and 0 otherwise.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float64_eq( float64 a, float64 b )
+{
+	if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+			|| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+		) {
+		if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+			float_raise( float_flag_invalid );
+		}
+		return 0;
+	}
+	return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is less than or
+| equal to the corresponding value `b', and 0 otherwise.  The comparison is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float64_le( float64 a, float64 b )
+{
+	flag aSign, bSign;
+
+	if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+			|| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+		) {
+		float_raise( float_flag_invalid );
+		return 0;
+	}
+	aSign = extractFloat64Sign( a );
+	bSign = extractFloat64Sign( b );
+	if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
+	return ( a == b ) || ( aSign ^ ( a < b ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float64_lt( float64 a, float64 b )
+{
+	flag aSign, bSign;
+
+	if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+			|| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+		) {
+		float_raise( float_flag_invalid );
+		return 0;
+	}
+	aSign = extractFloat64Sign( a );
+	bSign = extractFloat64Sign( b );
+	if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
+	return ( a != b ) && ( aSign ^ ( a < b ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is equal to the
+| corresponding value `b', and 0 otherwise.  The invalid exception is raised
+| if either operand is a NaN.  Otherwise, the comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float64_eq_signaling( float64 a, float64 b )
+{
+	if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+			|| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+		) {
+		float_raise( float_flag_invalid );
+		return 0;
+	}
+	return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is less than or
+| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
+| cause an exception.  Otherwise, the comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float64_le_quiet( float64 a, float64 b )
+{
+	flag aSign, bSign;
+//    int16 aExp, bExp;
+
+	if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+			|| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+		) {
+		if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+			float_raise( float_flag_invalid );
+		}
+		return 0;
+	}
+	aSign = extractFloat64Sign( a );
+	bSign = extractFloat64Sign( b );
+	if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
+	return ( a == b ) || ( aSign ^ ( a < b ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
+| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float64_lt_quiet( float64 a, float64 b )
+{
+	flag aSign, bSign;
+
+	if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+			|| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+		) {
+		if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+			float_raise( float_flag_invalid );
+		}
+		return 0;
+	}
+	aSign = extractFloat64Sign( a );
+	bSign = extractFloat64Sign( b );
+	if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
+	return ( a != b ) && ( aSign ^ ( a < b ) );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 32-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic---which means in particular that the conversion
+| is rounded according to the current rounding mode.  If `a' is a NaN, the
+| largest positive integer is returned.  Otherwise, if the conversion
+| overflows, the largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 floatx80_to_int32( floatx80 a )
+{
+	flag aSign;
+	int32 aExp, shiftCount;
+	bits64 aSig;
+
+	aSig = extractFloatx80Frac( a );
+	aExp = extractFloatx80Exp( a );
+	aSign = extractFloatx80Sign( a );
+	if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
+	shiftCount = 0x4037 - aExp;
+	if ( shiftCount <= 0 ) shiftCount = 1;
+	shift64RightJamming( aSig, shiftCount, &aSig );
+	return roundAndPackInt32( aSign, aSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 32-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic, except that the conversion is always rounded
+| toward zero.  If `a' is a NaN, the largest positive integer is returned.
+| Otherwise, if the conversion overflows, the largest integer with the same
+| sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 floatx80_to_int32_round_to_zero( floatx80 a )
+{
+	flag aSign;
+	int32 aExp, shiftCount;
+	bits64 aSig, savedASig;
+	int32 z;
+
+	aSig = extractFloatx80Frac( a );
+	aExp = extractFloatx80Exp( a );
+	aSign = extractFloatx80Sign( a );
+	if ( 0x401E < aExp ) {
+		if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
+		goto invalid;
+	}
+	else if ( aExp < 0x3FFF ) {
+		if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
+		return 0;
+	}
+	shiftCount = 0x403E - aExp;
+	savedASig = aSig;
+	aSig >>= shiftCount;
+	z = aSig;
+	if ( aSign ) z = - z;
+	if ( ( z < 0 ) ^ aSign ) {
+	invalid:
+		float_raise( float_flag_invalid );
+		return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+	}
+	if ( ( aSig<<shiftCount ) != savedASig ) {
+		float_exception_flags |= float_flag_inexact;
+	}
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 64-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic---which means in particular that the conversion
+| is rounded according to the current rounding mode.  If `a' is a NaN,
+| the largest positive integer is returned.  Otherwise, if the conversion
+| overflows, the largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 floatx80_to_int64( floatx80 a )
+{
+	flag aSign;
+	int32 aExp, shiftCount;
+	bits64 aSig, aSigExtra;
+
+	aSig = extractFloatx80Frac( a );
+	aExp = extractFloatx80Exp( a );
+	aSign = extractFloatx80Sign( a );
+	shiftCount = 0x403E - aExp;
+	if ( shiftCount <= 0 ) {
+		if ( shiftCount ) {
+			float_raise( float_flag_invalid );
+			if (    ! aSign
+					|| (    ( aExp == 0x7FFF )
+						&& ( aSig != LIT64( 0x8000000000000000 ) ) )
+				) {
+				return LIT64( 0x7FFFFFFFFFFFFFFF );
+			}
+			return (sbits64) LIT64( 0x8000000000000000 );
+		}
+		aSigExtra = 0;
+	}
+	else {
+		shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
+	}
+	return roundAndPackInt64( aSign, aSig, aSigExtra );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 64-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic, except that the conversion is always rounded
+| toward zero.  If `a' is a NaN, the largest positive integer is returned.
+| Otherwise, if the conversion overflows, the largest integer with the same
+| sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 floatx80_to_int64_round_to_zero( floatx80 a )
+{
+	flag aSign;
+	int32 aExp, shiftCount;
+	bits64 aSig;
+	int64 z;
+
+	aSig = extractFloatx80Frac( a );
+	aExp = extractFloatx80Exp( a );
+	aSign = extractFloatx80Sign( a );
+	shiftCount = aExp - 0x403E;
+	if ( 0 <= shiftCount ) {
+		aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
+		if ( ( a.high != 0xC03E ) || aSig ) {
+			float_raise( float_flag_invalid );
+			if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
+				return LIT64( 0x7FFFFFFFFFFFFFFF );
+			}
+		}
+		return (sbits64) LIT64( 0x8000000000000000 );
+	}
+	else if ( aExp < 0x3FFF ) {
+		if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
+		return 0;
+	}
+	z = aSig>>( - shiftCount );
+	if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
+		float_exception_flags |= float_flag_inexact;
+	}
+	if ( aSign ) z = - z;
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the single-precision floating-point format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 floatx80_to_float32( floatx80 a )
+{
+	flag aSign;
+	int32 aExp;
+	bits64 aSig;
+
+	aSig = extractFloatx80Frac( a );
+	aExp = extractFloatx80Exp( a );
+	aSign = extractFloatx80Sign( a );
+	if ( aExp == 0x7FFF ) {
+		if ( (bits64) ( aSig<<1 ) ) {
+			return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
+		}
+		return packFloat32( aSign, 0xFF, 0 );
+	}
+	shift64RightJamming( aSig, 33, &aSig );
+	if ( aExp || aSig ) aExp -= 0x3F81;
+	return roundAndPackFloat32( aSign, aExp, aSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the double-precision floating-point format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 floatx80_to_float64( floatx80 a )
+{
+	flag aSign;
+	int32 aExp;
+	bits64 aSig, zSig;
+
+	aSig = extractFloatx80Frac( a );
+	aExp = extractFloatx80Exp( a );
+	aSign = extractFloatx80Sign( a );
+	if ( aExp == 0x7FFF ) {
+		if ( (bits64) ( aSig<<1 ) ) {
+			return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
+		}
+		return packFloat64( aSign, 0x7FF, 0 );
+	}
+	shift64RightJamming( aSig, 1, &zSig );
+	if ( aExp || aSig ) aExp -= 0x3C01;
+	return roundAndPackFloat64( aSign, aExp, zSig );
+
+}
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the quadruple-precision floating-point format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 floatx80_to_float128( floatx80 a )
+{
+	flag aSign;
+	int16 aExp;
+	bits64 aSig, zSig0, zSig1;
+
+	aSig = extractFloatx80Frac( a );
+	aExp = extractFloatx80Exp( a );
+	aSign = extractFloatx80Sign( a );
+	if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
+		return commonNaNToFloat128( floatx80ToCommonNaN( a ) );
+	}
+	shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
+	return packFloat128( aSign, aExp, zSig0, zSig1 );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Rounds the extended double-precision floating-point value `a' to an integer,
+| and returns the result as an extended quadruple-precision floating-point
+| value.  The operation is performed according to the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_round_to_int( floatx80 a )
+{
+	flag aSign;
+	int32 aExp;
+	bits64 lastBitMask, roundBitsMask;
+	int8 roundingMode;
+	floatx80 z;
+
+	aExp = extractFloatx80Exp( a );
+	if ( 0x403E <= aExp ) {
+		if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
+			return propagateFloatx80NaN( a, a );
+		}
+		return a;
+	}
+	if ( aExp < 0x3FFF ) {
+		if (    ( aExp == 0 )
+				&& ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
+			return a;
+		}
+		float_exception_flags |= float_flag_inexact;
+		aSign = extractFloatx80Sign( a );
+		switch ( float_rounding_mode ) {
+			case float_round_nearest_even:
+			if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
+				) {
+				return
+					packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
+			}
+			break;
+			case float_round_down:
+			return
+					aSign ?
+						packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
+				: packFloatx80( 0, 0, 0 );
+			case float_round_up:
+			return
+					aSign ? packFloatx80( 1, 0, 0 )
+				: packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
+		}
+		return packFloatx80( aSign, 0, 0 );
+	}
+	lastBitMask = 1;
+	lastBitMask <<= 0x403E - aExp;
+	roundBitsMask = lastBitMask - 1;
+	z = a;
+	roundingMode = float_rounding_mode;
+	if ( roundingMode == float_round_nearest_even ) {
+		z.low += lastBitMask>>1;
+		if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
+	}
+	else if ( roundingMode != float_round_to_zero ) {
+		if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
+			z.low += roundBitsMask;
+		}
+	}
+	z.low &= ~ roundBitsMask;
+	if ( z.low == 0 ) {
+		++z.high;
+		z.low = LIT64( 0x8000000000000000 );
+	}
+	if ( z.low != a.low ) float_exception_flags |= float_flag_inexact;
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the extended double-
+| precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
+| negated before being returned.  `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
+{
+	int32 aExp, bExp, zExp;
+	bits64 aSig, bSig, zSig0, zSig1;
+	int32 expDiff;
+
+	aSig = extractFloatx80Frac( a );
+	aExp = extractFloatx80Exp( a );
+	bSig = extractFloatx80Frac( b );
+	bExp = extractFloatx80Exp( b );
+	expDiff = aExp - bExp;
+	if ( 0 < expDiff ) {
+		if ( aExp == 0x7FFF ) {
+			if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
+			return a;
+		}
+		if ( bExp == 0 ) --expDiff;
+		shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
+		zExp = aExp;
+	}
+	else if ( expDiff < 0 ) {
+		if ( bExp == 0x7FFF ) {
+			if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+			return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+		}
+		if ( aExp == 0 ) ++expDiff;
+		shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
+		zExp = bExp;
+	}
+	else {
+		if ( aExp == 0x7FFF ) {
+			if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
+				return propagateFloatx80NaN( a, b );
+			}
+			return a;
+		}
+		zSig1 = 0;
+		zSig0 = aSig + bSig;
+		if ( aExp == 0 ) {
+			normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
+			goto roundAndPack;
+		}
+		zExp = aExp;
+		goto shiftRight1;
+	}
+	zSig0 = aSig + bSig;
+	if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
+	shiftRight1:
+	shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
+	zSig0 |= LIT64( 0x8000000000000000 );
+	++zExp;
+	roundAndPack:
+	return
+		roundAndPackFloatx80(
+			floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the extended
+| double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
+| difference is negated before being returned.  `zSign' is ignored if the
+| result is a NaN.  The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
+{
+	int32 aExp, bExp, zExp;
+	bits64 aSig, bSig, zSig0, zSig1;
+	int32 expDiff;
+	floatx80 z;
+
+	aSig = extractFloatx80Frac( a );
+	aExp = extractFloatx80Exp( a );
+	bSig = extractFloatx80Frac( b );
+	bExp = extractFloatx80Exp( b );
+	expDiff = aExp - bExp;
+	if ( 0 < expDiff ) goto aExpBigger;
+	if ( expDiff < 0 ) goto bExpBigger;
+	if ( aExp == 0x7FFF ) {
+		if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
+			return propagateFloatx80NaN( a, b );
+		}
+		float_raise( float_flag_invalid );
+		z.low = floatx80_default_nan_low;
+		z.high = floatx80_default_nan_high;
+		return z;
+	}
+	if ( aExp == 0 ) {
+		aExp = 1;
+		bExp = 1;
+	}
+	zSig1 = 0;
+	if ( bSig < aSig ) goto aBigger;
+	if ( aSig < bSig ) goto bBigger;
+	return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
+	bExpBigger:
+	if ( bExp == 0x7FFF ) {
+		if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+		return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
+	}
+	if ( aExp == 0 ) ++expDiff;
+	shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
+	bBigger:
+	sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
+	zExp = bExp;
+	zSign ^= 1;
+	goto normalizeRoundAndPack;
+	aExpBigger:
+	if ( aExp == 0x7FFF ) {
+		if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
+		return a;
+	}
+	if ( bExp == 0 ) --expDiff;
+	shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
+	aBigger:
+	sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
+	zExp = aExp;
+	normalizeRoundAndPack:
+	return
+		normalizeRoundAndPackFloatx80(
+			floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the extended double-precision floating-point
+| values `a' and `b'.  The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_add( floatx80 a, floatx80 b )
+{
+	flag aSign, bSign;
+
+	aSign = extractFloatx80Sign( a );
+	bSign = extractFloatx80Sign( b );
+	if ( aSign == bSign ) {
+		return addFloatx80Sigs( a, b, aSign );
+	}
+	else {
+		return subFloatx80Sigs( a, b, aSign );
+	}
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the extended double-precision floating-
+| point values `a' and `b'.  The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_sub( floatx80 a, floatx80 b )
+{
+	flag aSign, bSign;
+
+	aSign = extractFloatx80Sign( a );
+	bSign = extractFloatx80Sign( b );
+	if ( aSign == bSign ) {
+		return subFloatx80Sigs( a, b, aSign );
+	}
+	else {
+		return addFloatx80Sigs( a, b, aSign );
+	}
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the extended double-precision floating-
+| point values `a' and `b'.  The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_mul( floatx80 a, floatx80 b )
+{
+	flag aSign, bSign, zSign;
+	int32 aExp, bExp, zExp;
+	bits64 aSig, bSig, zSig0, zSig1;
+	floatx80 z;
+
+	aSig = extractFloatx80Frac( a );
+	aExp = extractFloatx80Exp( a );
+	aSign = extractFloatx80Sign( a );
+	bSig = extractFloatx80Frac( b );
+	bExp = extractFloatx80Exp( b );
+	bSign = extractFloatx80Sign( b );
+	zSign = aSign ^ bSign;
+	if ( aExp == 0x7FFF ) {
+		if (    (bits64) ( aSig<<1 )
+				|| ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
+			return propagateFloatx80NaN( a, b );
+		}
+		if ( ( bExp | bSig ) == 0 ) goto invalid;
+		return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+	}
+	if ( bExp == 0x7FFF ) {
+		if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+		if ( ( aExp | aSig ) == 0 ) {
+	invalid:
+			float_raise( float_flag_invalid );
+			z.low = floatx80_default_nan_low;
+			z.high = floatx80_default_nan_high;
+			return z;
+		}
+		return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+	}
+	if ( aExp == 0 ) {
+		if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
+		normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
+	}
+	if ( bExp == 0 ) {
+		if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
+		normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+	}
+	zExp = aExp + bExp - 0x3FFE;
+	mul64To128( aSig, bSig, &zSig0, &zSig1 );
+	if ( 0 < (sbits64) zSig0 ) {
+		shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
+		--zExp;
+	}
+	return
+		roundAndPackFloatx80(
+			floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the extended double-precision floating-point
+| value `a' by the corresponding value `b'.  The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_div( floatx80 a, floatx80 b )
+{
+	flag aSign, bSign, zSign;
+	int32 aExp, bExp, zExp;
+	bits64 aSig, bSig, zSig0, zSig1;
+	bits64 rem0, rem1, rem2, term0, term1, term2;
+	floatx80 z;
+
+	aSig = extractFloatx80Frac( a );
+	aExp = extractFloatx80Exp( a );
+	aSign = extractFloatx80Sign( a );
+	bSig = extractFloatx80Frac( b );
+	bExp = extractFloatx80Exp( b );
+	bSign = extractFloatx80Sign( b );
+	zSign = aSign ^ bSign;
+	if ( aExp == 0x7FFF ) {
+		if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
+		if ( bExp == 0x7FFF ) {
+			if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+			goto invalid;
+		}
+		return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+	}
+	if ( bExp == 0x7FFF ) {
+		if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+		return packFloatx80( zSign, 0, 0 );
+	}
+	if ( bExp == 0 ) {
+		if ( bSig == 0 ) {
+			if ( ( aExp | aSig ) == 0 ) {
+	invalid:
+				float_raise( float_flag_invalid );
+				z.low = floatx80_default_nan_low;
+				z.high = floatx80_default_nan_high;
+				return z;
+			}
+			float_raise( float_flag_divbyzero );
+			return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+		}
+		normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+	}
+	if ( aExp == 0 ) {
+		if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
+		normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
+	}
+	zExp = aExp - bExp + 0x3FFE;
+	rem1 = 0;
+	if ( bSig <= aSig ) {
+		shift128Right( aSig, 0, 1, &aSig, &rem1 );
+		++zExp;
+	}
+	zSig0 = estimateDiv128To64( aSig, rem1, bSig );
+	mul64To128( bSig, zSig0, &term0, &term1 );
+	sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
+	while ( (sbits64) rem0 < 0 ) {
+		--zSig0;
+		add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
+	}
+	zSig1 = estimateDiv128To64( rem1, 0, bSig );
+	if ( (bits64) ( zSig1<<1 ) <= 8 ) {
+		mul64To128( bSig, zSig1, &term1, &term2 );
+		sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+		while ( (sbits64) rem1 < 0 ) {
+			--zSig1;
+			add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
+		}
+		zSig1 |= ( ( rem1 | rem2 ) != 0 );
+	}
+	return
+		roundAndPackFloatx80(
+			floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the extended double-precision floating-point value
+| `a' with respect to the corresponding value `b'.  The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_rem( floatx80 a, floatx80 b )
+{
+	flag aSign, zSign;
+	int32 aExp, bExp, expDiff;
+	bits64 aSig0, aSig1, bSig;
+	bits64 q, term0, term1, alternateASig0, alternateASig1;
+	floatx80 z;
+
+	aSig0 = extractFloatx80Frac( a );
+	aExp = extractFloatx80Exp( a );
+	aSign = extractFloatx80Sign( a );
+	bSig = extractFloatx80Frac( b );
+	bExp = extractFloatx80Exp( b );
+//    bSign = extractFloatx80Sign( b );
+	if ( aExp == 0x7FFF ) {
+		if (    (bits64) ( aSig0<<1 )
+				|| ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
+			return propagateFloatx80NaN( a, b );
+		}
+		goto invalid;
+	}
+	if ( bExp == 0x7FFF ) {
+		if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+		return a;
+	}
+	if ( bExp == 0 ) {
+		if ( bSig == 0 ) {
+	invalid:
+			float_raise( float_flag_invalid );
+			z.low = floatx80_default_nan_low;
+			z.high = floatx80_default_nan_high;
+			return z;
+		}
+		normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+	}
+	if ( aExp == 0 ) {
+		if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
+		normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
+	}
+	bSig |= LIT64( 0x8000000000000000 );
+	zSign = aSign;
+	expDiff = aExp - bExp;
+	aSig1 = 0;
+	if ( expDiff < 0 ) {
+		if ( expDiff < -1 ) return a;
+		shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
+		expDiff = 0;
+	}
+	q = ( bSig <= aSig0 );
+	if ( q ) aSig0 -= bSig;
+	expDiff -= 64;
+	while ( 0 < expDiff ) {
+		q = estimateDiv128To64( aSig0, aSig1, bSig );
+		q = ( 2 < q ) ? q - 2 : 0;
+		mul64To128( bSig, q, &term0, &term1 );
+		sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+		shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
+		expDiff -= 62;
+	}
+	expDiff += 64;
+	if ( 0 < expDiff ) {
+		q = estimateDiv128To64( aSig0, aSig1, bSig );
+		q = ( 2 < q ) ? q - 2 : 0;
+		q >>= 64 - expDiff;
+		mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
+		sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+		shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
+		while ( le128( term0, term1, aSig0, aSig1 ) ) {
+			++q;
+			sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+		}
+	}
+	else {
+		term1 = 0;
+		term0 = bSig;
+	}
+	sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
+	if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
+			|| (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
+				&& ( q & 1 ) )
+		) {
+		aSig0 = alternateASig0;
+		aSig1 = alternateASig1;
+		zSign = ! zSign;
+	}
+	return
+		normalizeRoundAndPackFloatx80(
+			80, zSign, bExp + expDiff, aSig0, aSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the extended double-precision floating-point
+| value `a'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_sqrt( floatx80 a )
+{
+	flag aSign;
+	int32 aExp, zExp;
+	bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
+	bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+	floatx80 z;
+
+	aSig0 = extractFloatx80Frac( a );
+	aExp = extractFloatx80Exp( a );
+	aSign = extractFloatx80Sign( a );
+	if ( aExp == 0x7FFF ) {
+		if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
+		if ( ! aSign ) return a;
+		goto invalid;
+	}
+	if ( aSign ) {
+		if ( ( aExp | aSig0 ) == 0 ) return a;
+	invalid:
+		float_raise( float_flag_invalid );
+		z.low = floatx80_default_nan_low;
+		z.high = floatx80_default_nan_high;
+		return z;
+	}
+	if ( aExp == 0 ) {
+		if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
+		normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
+	}
+	zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
+	zSig0 = estimateSqrt32( aExp, aSig0>>32 );
+	shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
+	zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
+	doubleZSig0 = zSig0<<1;
+	mul64To128( zSig0, zSig0, &term0, &term1 );
+	sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
+	while ( (sbits64) rem0 < 0 ) {
+		--zSig0;
+		doubleZSig0 -= 2;
+		add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
+	}
+	zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
+	if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
+		if ( zSig1 == 0 ) zSig1 = 1;
+		mul64To128( doubleZSig0, zSig1, &term1, &term2 );
+		sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+		mul64To128( zSig1, zSig1, &term2, &term3 );
+		sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
+		while ( (sbits64) rem1 < 0 ) {
+			--zSig1;
+			shortShift128Left( 0, zSig1, 1, &term2, &term3 );
+			term3 |= 1;
+			term2 |= doubleZSig0;
+			add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
+		}
+		zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
+	}
+	shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
+	zSig0 |= doubleZSig0;
+	return
+		roundAndPackFloatx80(
+			floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is
+| equal to the corresponding value `b', and 0 otherwise.  The comparison is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_eq( floatx80 a, floatx80 b )
+{
+	if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+				&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
+			|| (    ( extractFloatx80Exp( b ) == 0x7FFF )
+				&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
+		) {
+		if (    floatx80_is_signaling_nan( a )
+				|| floatx80_is_signaling_nan( b ) ) {
+			float_raise( float_flag_invalid );
+		}
+		return 0;
+	}
+	return
+			( a.low == b.low )
+		&& (    ( a.high == b.high )
+				|| (    ( a.low == 0 )
+					&& ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
+			);
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is
+| less than or equal to the corresponding value `b', and 0 otherwise.  The
+| comparison is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_le( floatx80 a, floatx80 b )
+{
+	flag aSign, bSign;
+
+	if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+				&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
+			|| (    ( extractFloatx80Exp( b ) == 0x7FFF )
+				&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
+		) {
+		float_raise( float_flag_invalid );
+		return 0;
+	}
+	aSign = extractFloatx80Sign( a );
+	bSign = extractFloatx80Sign( b );
+	if ( aSign != bSign ) {
+		return
+				aSign
+			|| (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+					== 0 );
+	}
+	return
+			aSign ? le128( b.high, b.low, a.high, a.low )
+		: le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is
+| less than the corresponding value `b', and 0 otherwise.  The comparison
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_lt( floatx80 a, floatx80 b )
+{
+	flag aSign, bSign;
+
+	if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+				&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
+			|| (    ( extractFloatx80Exp( b ) == 0x7FFF )
+				&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
+		) {
+		float_raise( float_flag_invalid );
+		return 0;
+	}
+	aSign = extractFloatx80Sign( a );
+	bSign = extractFloatx80Sign( b );
+	if ( aSign != bSign ) {
+		return
+				aSign
+			&& (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+					!= 0 );
+	}
+	return
+			aSign ? lt128( b.high, b.low, a.high, a.low )
+		: lt128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is equal
+| to the corresponding value `b', and 0 otherwise.  The invalid exception is
+| raised if either operand is a NaN.  Otherwise, the comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_eq_signaling( floatx80 a, floatx80 b )
+{
+	if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+				&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
+			|| (    ( extractFloatx80Exp( b ) == 0x7FFF )
+				&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
+		) {
+		float_raise( float_flag_invalid );
+		return 0;
+	}
+	return
+			( a.low == b.low )
+		&& (    ( a.high == b.high )
+				|| (    ( a.low == 0 )
+					&& ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
+			);
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is less
+| than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
+| do not cause an exception.  Otherwise, the comparison is performed according
+| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_le_quiet( floatx80 a, floatx80 b )
+{
+	flag aSign, bSign;
+
+	if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+				&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
+			|| (    ( extractFloatx80Exp( b ) == 0x7FFF )
+				&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
+		) {
+		if (    floatx80_is_signaling_nan( a )
+				|| floatx80_is_signaling_nan( b ) ) {
+			float_raise( float_flag_invalid );
+		}
+		return 0;
+	}
+	aSign = extractFloatx80Sign( a );
+	bSign = extractFloatx80Sign( b );
+	if ( aSign != bSign ) {
+		return
+				aSign
+			|| (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+					== 0 );
+	}
+	return
+			aSign ? le128( b.high, b.low, a.high, a.low )
+		: le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is less
+| than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
+| an exception.  Otherwise, the comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_lt_quiet( floatx80 a, floatx80 b )
+{
+	flag aSign, bSign;
+
+	if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+				&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
+			|| (    ( extractFloatx80Exp( b ) == 0x7FFF )
+				&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
+		) {
+		if (    floatx80_is_signaling_nan( a )
+				|| floatx80_is_signaling_nan( b ) ) {
+			float_raise( float_flag_invalid );
+		}
+		return 0;
+	}
+	aSign = extractFloatx80Sign( a );
+	bSign = extractFloatx80Sign( b );
+	if ( aSign != bSign ) {
+		return
+				aSign
+			&& (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+					!= 0 );
+	}
+	return
+			aSign ? lt128( b.high, b.low, a.high, a.low )
+		: lt128( a.high, a.low, b.high, b.low );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 32-bit two's complement integer format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 float128_to_int32( float128 a )
+{
+	flag aSign;
+	int32 aExp, shiftCount;
+	bits64 aSig0, aSig1;
+
+	aSig1 = extractFloat128Frac1( a );
+	aSig0 = extractFloat128Frac0( a );
+	aExp = extractFloat128Exp( a );
+	aSign = extractFloat128Sign( a );
+	if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
+	if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
+	aSig0 |= ( aSig1 != 0 );
+	shiftCount = 0x4028 - aExp;
+	if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
+	return roundAndPackInt32( aSign, aSig0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 32-bit two's complement integer format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.  If
+| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
+| conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int32 float128_to_int32_round_to_zero( float128 a )
+{
+	flag aSign;
+	int32 aExp, shiftCount;
+	bits64 aSig0, aSig1, savedASig;
+	int32 z;
+
+	aSig1 = extractFloat128Frac1( a );
+	aSig0 = extractFloat128Frac0( a );
+	aExp = extractFloat128Exp( a );
+	aSign = extractFloat128Sign( a );
+	aSig0 |= ( aSig1 != 0 );
+	if ( 0x401E < aExp ) {
+		if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
+		goto invalid;
+	}
+	else if ( aExp < 0x3FFF ) {
+		if ( aExp || aSig0 ) float_exception_flags |= float_flag_inexact;
+		return 0;
+	}
+	aSig0 |= LIT64( 0x0001000000000000 );
+	shiftCount = 0x402F - aExp;
+	savedASig = aSig0;
+	aSig0 >>= shiftCount;
+	z = aSig0;
+	if ( aSign ) z = - z;
+	if ( ( z < 0 ) ^ aSign ) {
+	invalid:
+		float_raise( float_flag_invalid );
+		return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+	}
+	if ( ( aSig0<<shiftCount ) != savedASig ) {
+		float_exception_flags |= float_flag_inexact;
+	}
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 64-bit two's complement integer format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 float128_to_int64( float128 a )
+{
+	flag aSign;
+	int32 aExp, shiftCount;
+	bits64 aSig0, aSig1;
+
+	aSig1 = extractFloat128Frac1( a );
+	aSig0 = extractFloat128Frac0( a );
+	aExp = extractFloat128Exp( a );
+	aSign = extractFloat128Sign( a );
+	if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
+	shiftCount = 0x402F - aExp;
+	if ( shiftCount <= 0 ) {
+		if ( 0x403E < aExp ) {
+			float_raise( float_flag_invalid );
+			if (    ! aSign
+					|| (    ( aExp == 0x7FFF )
+						&& ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
+					)
+				) {
+				return LIT64( 0x7FFFFFFFFFFFFFFF );
+			}
+			return (sbits64) LIT64( 0x8000000000000000 );
+		}
+		shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
+	}
+	else {
+		shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
+	}
+	return roundAndPackInt64( aSign, aSig0, aSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 64-bit two's complement integer format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int64 float128_to_int64_round_to_zero( float128 a )
+{
+	flag aSign;
+	int32 aExp, shiftCount;
+	bits64 aSig0, aSig1;
+	int64 z;
+
+	aSig1 = extractFloat128Frac1( a );
+	aSig0 = extractFloat128Frac0( a );
+	aExp = extractFloat128Exp( a );
+	aSign = extractFloat128Sign( a );
+	if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
+	shiftCount = aExp - 0x402F;
+	if ( 0 < shiftCount ) {
+		if ( 0x403E <= aExp ) {
+			aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
+			if (    ( a.high == LIT64( 0xC03E000000000000 ) )
+					&& ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
+				if ( aSig1 ) float_exception_flags |= float_flag_inexact;
+			}
+			else {
+				float_raise( float_flag_invalid );
+				if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
+					return LIT64( 0x7FFFFFFFFFFFFFFF );
+				}
+			}
+			return (sbits64) LIT64( 0x8000000000000000 );
+		}
+		z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
+		if ( (bits64) ( aSig1<<shiftCount ) ) {
+			float_exception_flags |= float_flag_inexact;
+		}
+	}
+	else {
+		if ( aExp < 0x3FFF ) {
+			if ( aExp | aSig0 | aSig1 ) {
+				float_exception_flags |= float_flag_inexact;
+			}
+			return 0;
+		}
+		z = aSig0>>( - shiftCount );
+		if (    aSig1
+				|| ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
+			float_exception_flags |= float_flag_inexact;
+		}
+	}
+	if ( aSign ) z = - z;
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the single-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float128_to_float32( float128 a )
+{
+	flag aSign;
+	int32 aExp;
+	bits64 aSig0, aSig1;
+	bits32 zSig;
+
+	aSig1 = extractFloat128Frac1( a );
+	aSig0 = extractFloat128Frac0( a );
+	aExp = extractFloat128Exp( a );
+	aSign = extractFloat128Sign( a );
+	if ( aExp == 0x7FFF ) {
+		if ( aSig0 | aSig1 ) {
+			return commonNaNToFloat32( float128ToCommonNaN( a ) );
+		}
+		return packFloat32( aSign, 0xFF, 0 );
+	}
+	aSig0 |= ( aSig1 != 0 );
+	shift64RightJamming( aSig0, 18, &aSig0 );
+	zSig = aSig0;
+	if ( aExp || zSig ) {
+		zSig |= 0x40000000;
+		aExp -= 0x3F81;
+	}
+	return roundAndPackFloat32( aSign, aExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the double-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float128_to_float64( float128 a )
+{
+	flag aSign;
+	int32 aExp;
+	bits64 aSig0, aSig1;
+
+	aSig1 = extractFloat128Frac1( a );
+	aSig0 = extractFloat128Frac0( a );
+	aExp = extractFloat128Exp( a );
+	aSign = extractFloat128Sign( a );
+	if ( aExp == 0x7FFF ) {
+		if ( aSig0 | aSig1 ) {
+			return commonNaNToFloat64( float128ToCommonNaN( a ) );
+		}
+		return packFloat64( aSign, 0x7FF, 0 );
+	}
+	shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
+	aSig0 |= ( aSig1 != 0 );
+	if ( aExp || aSig0 ) {
+		aSig0 |= LIT64( 0x4000000000000000 );
+		aExp -= 0x3C01;
+	}
+	return roundAndPackFloat64( aSign, aExp, aSig0 );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the extended double-precision floating-point format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 float128_to_floatx80( float128 a )
+{
+	flag aSign;
+	int32 aExp;
+	bits64 aSig0, aSig1;
+
+	aSig1 = extractFloat128Frac1( a );
+	aSig0 = extractFloat128Frac0( a );
+	aExp = extractFloat128Exp( a );
+	aSign = extractFloat128Sign( a );
+	if ( aExp == 0x7FFF ) {
+		if ( aSig0 | aSig1 ) {
+			return commonNaNToFloatx80( float128ToCommonNaN( a ) );
+		}
+		return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+	}
+	if ( aExp == 0 ) {
+		if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
+		normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+	}
+	else {
+		aSig0 |= LIT64( 0x0001000000000000 );
+	}
+	shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
+	return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Rounds the quadruple-precision floating-point value `a' to an integer, and
+| returns the result as a quadruple-precision floating-point value.  The
+| operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_round_to_int( float128 a )
+{
+	flag aSign;
+	int32 aExp;
+	bits64 lastBitMask, roundBitsMask;
+	int8 roundingMode;
+	float128 z;
+
+	aExp = extractFloat128Exp( a );
+	if ( 0x402F <= aExp ) {
+		if ( 0x406F <= aExp ) {
+			if (    ( aExp == 0x7FFF )
+					&& ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
+				) {
+				return propagateFloat128NaN( a, a );
+			}
+			return a;
+		}
+		lastBitMask = 1;
+		lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
+		roundBitsMask = lastBitMask - 1;
+		z = a;
+		roundingMode = float_rounding_mode;
+		if ( roundingMode == float_round_nearest_even ) {
+			if ( lastBitMask ) {
+				add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
+				if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
+			}
+			else {
+				if ( (sbits64) z.low < 0 ) {
+					++z.high;
+					if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
+				}
+			}
+		}
+		else if ( roundingMode != float_round_to_zero ) {
+			if (   extractFloat128Sign( z )
+					^ ( roundingMode == float_round_up ) ) {
+				add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
+			}
+		}
+		z.low &= ~ roundBitsMask;
+	}
+	else {
+		if ( aExp < 0x3FFF ) {
+			if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
+			float_exception_flags |= float_flag_inexact;
+			aSign = extractFloat128Sign( a );
+			switch ( float_rounding_mode ) {
+				case float_round_nearest_even:
+				if (    ( aExp == 0x3FFE )
+						&& (   extractFloat128Frac0( a )
+							| extractFloat128Frac1( a ) )
+					) {
+					return packFloat128( aSign, 0x3FFF, 0, 0 );
+				}
+				break;
+				case float_round_down:
+				return
+						aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
+					: packFloat128( 0, 0, 0, 0 );
+				case float_round_up:
+				return
+						aSign ? packFloat128( 1, 0, 0, 0 )
+					: packFloat128( 0, 0x3FFF, 0, 0 );
+			}
+			return packFloat128( aSign, 0, 0, 0 );
+		}
+		lastBitMask = 1;
+		lastBitMask <<= 0x402F - aExp;
+		roundBitsMask = lastBitMask - 1;
+		z.low = 0;
+		z.high = a.high;
+		roundingMode = float_rounding_mode;
+		if ( roundingMode == float_round_nearest_even ) {
+			z.high += lastBitMask>>1;
+			if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
+				z.high &= ~ lastBitMask;
+			}
+		}
+		else if ( roundingMode != float_round_to_zero ) {
+			if (   extractFloat128Sign( z )
+					^ ( roundingMode == float_round_up ) ) {
+				z.high |= ( a.low != 0 );
+				z.high += roundBitsMask;
+			}
+		}
+		z.high &= ~ roundBitsMask;
+	}
+	if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
+		float_exception_flags |= float_flag_inexact;
+	}
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the quadruple-precision
+| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
+| before being returned.  `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float128 addFloat128Sigs( float128 a, float128 b, flag zSign )
+{
+	int32 aExp, bExp, zExp;
+	bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
+	int32 expDiff;
+
+	aSig1 = extractFloat128Frac1( a );
+	aSig0 = extractFloat128Frac0( a );
+	aExp = extractFloat128Exp( a );
+	bSig1 = extractFloat128Frac1( b );
+	bSig0 = extractFloat128Frac0( b );
+	bExp = extractFloat128Exp( b );
+	expDiff = aExp - bExp;
+	if ( 0 < expDiff ) {
+		if ( aExp == 0x7FFF ) {
+			if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
+			return a;
+		}
+		if ( bExp == 0 ) {
+			--expDiff;
+		}
+		else {
+			bSig0 |= LIT64( 0x0001000000000000 );
+		}
+		shift128ExtraRightJamming(
+			bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
+		zExp = aExp;
+	}
+	else if ( expDiff < 0 ) {
+		if ( bExp == 0x7FFF ) {
+			if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
+			return packFloat128( zSign, 0x7FFF, 0, 0 );
+		}
+		if ( aExp == 0 ) {
+			++expDiff;
+		}
+		else {
+			aSig0 |= LIT64( 0x0001000000000000 );
+		}
+		shift128ExtraRightJamming(
+			aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
+		zExp = bExp;
+	}
+	else {
+		if ( aExp == 0x7FFF ) {
+			if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
+				return propagateFloat128NaN( a, b );
+			}
+			return a;
+		}
+		add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
+		if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
+		zSig2 = 0;
+		zSig0 |= LIT64( 0x0002000000000000 );
+		zExp = aExp;
+		goto shiftRight1;
+	}
+	aSig0 |= LIT64( 0x0001000000000000 );
+	add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
+	--zExp;
+	if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
+	++zExp;
+	shiftRight1:
+	shift128ExtraRightJamming(
+		zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
+	roundAndPack:
+	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the quadruple-
+| precision floating-point values `a' and `b'.  If `zSign' is 1, the
+| difference is negated before being returned.  `zSign' is ignored if the
+| result is a NaN.  The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float128 subFloat128Sigs( float128 a, float128 b, flag zSign )
+{
+	int32 aExp, bExp, zExp;
+	bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
+	int32 expDiff;
+	float128 z;
+
+	aSig1 = extractFloat128Frac1( a );
+	aSig0 = extractFloat128Frac0( a );
+	aExp = extractFloat128Exp( a );
+	bSig1 = extractFloat128Frac1( b );
+	bSig0 = extractFloat128Frac0( b );
+	bExp = extractFloat128Exp( b );
+	expDiff = aExp - bExp;
+	shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
+	shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
+	if ( 0 < expDiff ) goto aExpBigger;
+	if ( expDiff < 0 ) goto bExpBigger;
+	if ( aExp == 0x7FFF ) {
+		if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
+			return propagateFloat128NaN( a, b );
+		}
+		float_raise( float_flag_invalid );
+		z.low = float128_default_nan_low;
+		z.high = float128_default_nan_high;
+		return z;
+	}
+	if ( aExp == 0 ) {
+		aExp = 1;
+		bExp = 1;
+	}
+	if ( bSig0 < aSig0 ) goto aBigger;
+	if ( aSig0 < bSig0 ) goto bBigger;
+	if ( bSig1 < aSig1 ) goto aBigger;
+	if ( aSig1 < bSig1 ) goto bBigger;
+	return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 );
+	bExpBigger:
+	if ( bExp == 0x7FFF ) {
+		if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
+		return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
+	}
+	if ( aExp == 0 ) {
+		++expDiff;
+	}
+	else {
+		aSig0 |= LIT64( 0x4000000000000000 );
+	}
+	shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
+	bSig0 |= LIT64( 0x4000000000000000 );
+	bBigger:
+	sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
+	zExp = bExp;
+	zSign ^= 1;
+	goto normalizeRoundAndPack;
+	aExpBigger:
+	if ( aExp == 0x7FFF ) {
+		if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
+		return a;
+	}
+	if ( bExp == 0 ) {
+		--expDiff;
+	}
+	else {
+		bSig0 |= LIT64( 0x4000000000000000 );
+	}
+	shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
+	aSig0 |= LIT64( 0x4000000000000000 );
+	aBigger:
+	sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
+	zExp = aExp;
+	normalizeRoundAndPack:
+	--zExp;
+	return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the quadruple-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_add( float128 a, float128 b )
+{
+	flag aSign, bSign;
+
+	aSign = extractFloat128Sign( a );
+	bSign = extractFloat128Sign( b );
+	if ( aSign == bSign ) {
+		return addFloat128Sigs( a, b, aSign );
+	}
+	else {
+		return subFloat128Sigs( a, b, aSign );
+	}
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the quadruple-precision floating-point
+| values `a' and `b'.  The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_sub( float128 a, float128 b )
+{
+	flag aSign, bSign;
+
+	aSign = extractFloat128Sign( a );
+	bSign = extractFloat128Sign( b );
+	if ( aSign == bSign ) {
+		return subFloat128Sigs( a, b, aSign );
+	}
+	else {
+		return addFloat128Sigs( a, b, aSign );
+	}
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the quadruple-precision floating-point
+| values `a' and `b'.  The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_mul( float128 a, float128 b )
+{
+	flag aSign, bSign, zSign;
+	int32 aExp, bExp, zExp;
+	bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
+	float128 z;
+
+	aSig1 = extractFloat128Frac1( a );
+	aSig0 = extractFloat128Frac0( a );
+	aExp = extractFloat128Exp( a );
+	aSign = extractFloat128Sign( a );
+	bSig1 = extractFloat128Frac1( b );
+	bSig0 = extractFloat128Frac0( b );
+	bExp = extractFloat128Exp( b );
+	bSign = extractFloat128Sign( b );
+	zSign = aSign ^ bSign;
+	if ( aExp == 0x7FFF ) {
+		if (    ( aSig0 | aSig1 )
+				|| ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
+			return propagateFloat128NaN( a, b );
+		}
+		if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
+		return packFloat128( zSign, 0x7FFF, 0, 0 );
+	}
+	if ( bExp == 0x7FFF ) {
+		if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
+		if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
+	invalid:
+			float_raise( float_flag_invalid );
+			z.low = float128_default_nan_low;
+			z.high = float128_default_nan_high;
+			return z;
+		}
+		return packFloat128( zSign, 0x7FFF, 0, 0 );
+	}
+	if ( aExp == 0 ) {
+		if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
+		normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+	}
+	if ( bExp == 0 ) {
+		if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
+		normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
+	}
+	zExp = aExp + bExp - 0x4000;
+	aSig0 |= LIT64( 0x0001000000000000 );
+	shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
+	mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
+	add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
+	zSig2 |= ( zSig3 != 0 );
+	if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
+		shift128ExtraRightJamming(
+			zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
+		++zExp;
+	}
+	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the quadruple-precision floating-point value
+| `a' by the corresponding value `b'.  The operation is performed according to
+| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_div( float128 a, float128 b )
+{
+	flag aSign, bSign, zSign;
+	int32 aExp, bExp, zExp;
+	bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
+	bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+	float128 z;
+
+	aSig1 = extractFloat128Frac1( a );
+	aSig0 = extractFloat128Frac0( a );
+	aExp = extractFloat128Exp( a );
+	aSign = extractFloat128Sign( a );
+	bSig1 = extractFloat128Frac1( b );
+	bSig0 = extractFloat128Frac0( b );
+	bExp = extractFloat128Exp( b );
+	bSign = extractFloat128Sign( b );
+	zSign = aSign ^ bSign;
+	if ( aExp == 0x7FFF ) {
+		if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
+		if ( bExp == 0x7FFF ) {
+			if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
+			goto invalid;
+		}
+		return packFloat128( zSign, 0x7FFF, 0, 0 );
+	}
+	if ( bExp == 0x7FFF ) {
+		if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
+		return packFloat128( zSign, 0, 0, 0 );
+	}
+	if ( bExp == 0 ) {
+		if ( ( bSig0 | bSig1 ) == 0 ) {
+			if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
+	invalid:
+				float_raise( float_flag_invalid );
+				z.low = float128_default_nan_low;
+				z.high = float128_default_nan_high;
+				return z;
+			}
+			float_raise( float_flag_divbyzero );
+			return packFloat128( zSign, 0x7FFF, 0, 0 );
+		}
+		normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
+	}
+	if ( aExp == 0 ) {
+		if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
+		normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+	}
+	zExp = aExp - bExp + 0x3FFD;
+	shortShift128Left(
+		aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
+	shortShift128Left(
+		bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
+	if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
+		shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
+		++zExp;
+	}
+	zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
+	mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
+	sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
+	while ( (sbits64) rem0 < 0 ) {
+		--zSig0;
+		add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
+	}
+	zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
+	if ( ( zSig1 & 0x3FFF ) <= 4 ) {
+		mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
+		sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
+		while ( (sbits64) rem1 < 0 ) {
+			--zSig1;
+			add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
+		}
+		zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
+	}
+	shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
+	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the quadruple-precision floating-point value `a'
+| with respect to the corresponding value `b'.  The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_rem( float128 a, float128 b )
+{
+	flag aSign, zSign;
+	int32 aExp, bExp, expDiff;
+	bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
+	bits64 allZero, alternateASig0, alternateASig1, sigMean1;
+	sbits64 sigMean0;
+	float128 z;
+
+	aSig1 = extractFloat128Frac1( a );
+	aSig0 = extractFloat128Frac0( a );
+	aExp = extractFloat128Exp( a );
+	aSign = extractFloat128Sign( a );
+	bSig1 = extractFloat128Frac1( b );
+	bSig0 = extractFloat128Frac0( b );
+	bExp = extractFloat128Exp( b );
+//    bSign = extractFloat128Sign( b );
+	if ( aExp == 0x7FFF ) {
+		if (    ( aSig0 | aSig1 )
+				|| ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
+			return propagateFloat128NaN( a, b );
+		}
+		goto invalid;
+	}
+	if ( bExp == 0x7FFF ) {
+		if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
+		return a;
+	}
+	if ( bExp == 0 ) {
+		if ( ( bSig0 | bSig1 ) == 0 ) {
+	invalid:
+			float_raise( float_flag_invalid );
+			z.low = float128_default_nan_low;
+			z.high = float128_default_nan_high;
+			return z;
+		}
+		normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
+	}
+	if ( aExp == 0 ) {
+		if ( ( aSig0 | aSig1 ) == 0 ) return a;
+		normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+	}
+	expDiff = aExp - bExp;
+	if ( expDiff < -1 ) return a;
+	shortShift128Left(
+		aSig0 | LIT64( 0x0001000000000000 ),
+		aSig1,
+		15 - ( expDiff < 0 ),
+		&aSig0,
+		&aSig1
+	);
+	shortShift128Left(
+		bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
+	q = le128( bSig0, bSig1, aSig0, aSig1 );
+	if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
+	expDiff -= 64;
+	while ( 0 < expDiff ) {
+		q = estimateDiv128To64( aSig0, aSig1, bSig0 );
+		q = ( 4 < q ) ? q - 4 : 0;
+		mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
+		shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
+		shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
+		sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
+		expDiff -= 61;
+	}
+	if ( -64 < expDiff ) {
+		q = estimateDiv128To64( aSig0, aSig1, bSig0 );
+		q = ( 4 < q ) ? q - 4 : 0;
+		q >>= - expDiff;
+		shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
+		expDiff += 52;
+		if ( expDiff < 0 ) {
+			shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
+		}
+		else {
+			shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
+		}
+		mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
+		sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
+	}
+	else {
+		shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
+		shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
+	}
+	do {
+		alternateASig0 = aSig0;
+		alternateASig1 = aSig1;
+		++q;
+		sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
+	} while ( 0 <= (sbits64) aSig0 );
+	add128(
+		aSig0, aSig1, alternateASig0, alternateASig1, (bits64 *)&sigMean0, &sigMean1 );
+	if (    ( sigMean0 < 0 )
+			|| ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
+		aSig0 = alternateASig0;
+		aSig1 = alternateASig1;
+	}
+	zSign = ( (sbits64) aSig0 < 0 );
+	if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
+	return
+		normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the quadruple-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_sqrt( float128 a )
+{
+	flag aSign;
+	int32 aExp, zExp;
+	bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
+	bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+	float128 z;
+
+	aSig1 = extractFloat128Frac1( a );
+	aSig0 = extractFloat128Frac0( a );
+	aExp = extractFloat128Exp( a );
+	aSign = extractFloat128Sign( a );
+	if ( aExp == 0x7FFF ) {
+		if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a );
+		if ( ! aSign ) return a;
+		goto invalid;
+	}
+	if ( aSign ) {
+		if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
+	invalid:
+		float_raise( float_flag_invalid );
+		z.low = float128_default_nan_low;
+		z.high = float128_default_nan_high;
+		return z;
+	}
+	if ( aExp == 0 ) {
+		if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
+		normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+	}
+	zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
+	aSig0 |= LIT64( 0x0001000000000000 );
+	zSig0 = estimateSqrt32( aExp, aSig0>>17 );
+	shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
+	zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
+	doubleZSig0 = zSig0<<1;
+	mul64To128( zSig0, zSig0, &term0, &term1 );
+	sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
+	while ( (sbits64) rem0 < 0 ) {
+		--zSig0;
+		doubleZSig0 -= 2;
+		add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
+	}
+	zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
+	if ( ( zSig1 & 0x1FFF ) <= 5 ) {
+		if ( zSig1 == 0 ) zSig1 = 1;
+		mul64To128( doubleZSig0, zSig1, &term1, &term2 );
+		sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+		mul64To128( zSig1, zSig1, &term2, &term3 );
+		sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
+		while ( (sbits64) rem1 < 0 ) {
+			--zSig1;
+			shortShift128Left( 0, zSig1, 1, &term2, &term3 );
+			term3 |= 1;
+			term2 |= doubleZSig0;
+			add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
+		}
+		zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
+	}
+	shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
+	return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float128_eq( float128 a, float128 b )
+{
+	if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+				&& ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+			|| (    ( extractFloat128Exp( b ) == 0x7FFF )
+				&& ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+		) {
+		if (    float128_is_signaling_nan( a )
+				|| float128_is_signaling_nan( b ) ) {
+			float_raise( float_flag_invalid );
+		}
+		return 0;
+	}
+	return
+			( a.low == b.low )
+		&& (    ( a.high == b.high )
+				|| (    ( a.low == 0 )
+					&& ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
+			);
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| or equal to the corresponding value `b', and 0 otherwise.  The comparison
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float128_le( float128 a, float128 b )
+{
+	flag aSign, bSign;
+
+	if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+				&& ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+			|| (    ( extractFloat128Exp( b ) == 0x7FFF )
+				&& ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+		) {
+		float_raise( float_flag_invalid );
+		return 0;
+	}
+	aSign = extractFloat128Sign( a );
+	bSign = extractFloat128Sign( b );
+	if ( aSign != bSign ) {
+		return
+				aSign
+			|| (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+					== 0 );
+	}
+	return
+			aSign ? le128( b.high, b.low, a.high, a.low )
+		: le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float128_lt( float128 a, float128 b )
+{
+	flag aSign, bSign;
+
+	if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+				&& ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+			|| (    ( extractFloat128Exp( b ) == 0x7FFF )
+				&& ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+		) {
+		float_raise( float_flag_invalid );
+		return 0;
+	}
+	aSign = extractFloat128Sign( a );
+	bSign = extractFloat128Sign( b );
+	if ( aSign != bSign ) {
+		return
+				aSign
+			&& (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+					!= 0 );
+	}
+	return
+			aSign ? lt128( b.high, b.low, a.high, a.low )
+		: lt128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise.  The invalid exception is
+| raised if either operand is a NaN.  Otherwise, the comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float128_eq_signaling( float128 a, float128 b )
+{
+	if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+				&& ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+			|| (    ( extractFloat128Exp( b ) == 0x7FFF )
+				&& ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+		) {
+		float_raise( float_flag_invalid );
+		return 0;
+	}
+	return
+			( a.low == b.low )
+		&& (    ( a.high == b.high )
+				|| (    ( a.low == 0 )
+					&& ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
+			);
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
+| cause an exception.  Otherwise, the comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float128_le_quiet( float128 a, float128 b )
+{
+	flag aSign, bSign;
+
+	if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+				&& ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+			|| (    ( extractFloat128Exp( b ) == 0x7FFF )
+				&& ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+		) {
+		if (    float128_is_signaling_nan( a )
+				|| float128_is_signaling_nan( b ) ) {
+			float_raise( float_flag_invalid );
+		}
+		return 0;
+	}
+	aSign = extractFloat128Sign( a );
+	bSign = extractFloat128Sign( b );
+	if ( aSign != bSign ) {
+		return
+				aSign
+			|| (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+					== 0 );
+	}
+	return
+			aSign ? le128( b.high, b.low, a.high, a.low )
+		: le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
+| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float128_lt_quiet( float128 a, float128 b )
+{
+	flag aSign, bSign;
+
+	if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+				&& ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+			|| (    ( extractFloat128Exp( b ) == 0x7FFF )
+				&& ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+		) {
+		if (    float128_is_signaling_nan( a )
+				|| float128_is_signaling_nan( b ) ) {
+			float_raise( float_flag_invalid );
+		}
+		return 0;
+	}
+	aSign = extractFloat128Sign( a );
+	bSign = extractFloat128Sign( b );
+	if ( aSign != bSign ) {
+		return
+				aSign
+			&& (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+					!= 0 );
+	}
+	return
+			aSign ? lt128( b.high, b.low, a.high, a.low )
+		: lt128( a.high, a.low, b.high, b.low );
+
+}
+
+#endif
diff --git a/plat/linux68k/emu/musashi/softfloat/softfloat.h b/plat/linux68k/emu/musashi/softfloat/softfloat.h
new file mode 100644
index 000000000..92135e663
--- /dev/null
+++ b/plat/linux68k/emu/musashi/softfloat/softfloat.h
@@ -0,0 +1,460 @@
+
+/*============================================================================
+
+This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+/*----------------------------------------------------------------------------
+| The macro `FLOATX80' must be defined to enable the extended double-precision
+| floating-point format `floatx80'.  If this macro is not defined, the
+| `floatx80' type will not be defined, and none of the functions that either
+| input or output the `floatx80' type will be defined.  The same applies to
+| the `FLOAT128' macro and the quadruple-precision format `float128'.
+*----------------------------------------------------------------------------*/
+#define FLOATX80
+#define FLOAT128
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point types.
+*----------------------------------------------------------------------------*/
+typedef bits32 float32;
+typedef bits64 float64;
+#ifdef FLOATX80
+typedef struct {
+	bits16 high;
+	bits64 low;
+} floatx80;
+#endif
+#ifdef FLOAT128
+typedef struct {
+	bits64 high, low;
+} float128;
+#endif
+
+/*----------------------------------------------------------------------------
+| Primitive arithmetic functions, including multi-word arithmetic, and
+| division and square root approximations.  (Can be specialized to target if
+| desired.)
+*----------------------------------------------------------------------------*/
+#include "softfloat-macros"
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point underflow tininess-detection mode.
+*----------------------------------------------------------------------------*/
+extern int8 float_detect_tininess;
+enum {
+	float_tininess_after_rounding  = 0,
+	float_tininess_before_rounding = 1
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point rounding mode.
+*----------------------------------------------------------------------------*/
+extern int8 float_rounding_mode;
+enum {
+	float_round_nearest_even = 0,
+	float_round_to_zero      = 1,
+	float_round_down         = 2,
+	float_round_up           = 3
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point exception flags.
+*----------------------------------------------------------------------------*/
+extern int8 float_exception_flags;
+enum {
+	float_flag_invalid = 0x01, float_flag_denormal = 0x02, float_flag_divbyzero = 0x04, float_flag_overflow = 0x08,
+	float_flag_underflow = 0x10, float_flag_inexact = 0x20
+};
+
+/*----------------------------------------------------------------------------
+| Routine to raise any or all of the software IEC/IEEE floating-point
+| exception flags.
+*----------------------------------------------------------------------------*/
+void float_raise( int8 );
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE integer-to-floating-point conversion routines.
+*----------------------------------------------------------------------------*/
+float32 int32_to_float32( int32 );
+float64 int32_to_float64( int32 );
+#ifdef FLOATX80
+floatx80 int32_to_floatx80( int32 );
+#endif
+#ifdef FLOAT128
+float128 int32_to_float128( int32 );
+#endif
+float32 int64_to_float32( int64 );
+float64 int64_to_float64( int64 );
+#ifdef FLOATX80
+floatx80 int64_to_floatx80( int64 );
+#endif
+#ifdef FLOAT128
+float128 int64_to_float128( int64 );
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int32 float32_to_int32( float32 );
+int32 float32_to_int32_round_to_zero( float32 );
+int64 float32_to_int64( float32 );
+int64 float32_to_int64_round_to_zero( float32 );
+float64 float32_to_float64( float32 );
+#ifdef FLOATX80
+floatx80 float32_to_floatx80( float32 );
+#endif
+#ifdef FLOAT128
+float128 float32_to_float128( float32 );
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision operations.
+*----------------------------------------------------------------------------*/
+float32 float32_round_to_int( float32 );
+float32 float32_add( float32, float32 );
+float32 float32_sub( float32, float32 );
+float32 float32_mul( float32, float32 );
+float32 float32_div( float32, float32 );
+float32 float32_rem( float32, float32 );
+float32 float32_sqrt( float32 );
+flag float32_eq( float32, float32 );
+flag float32_le( float32, float32 );
+flag float32_lt( float32, float32 );
+flag float32_eq_signaling( float32, float32 );
+flag float32_le_quiet( float32, float32 );
+flag float32_lt_quiet( float32, float32 );
+flag float32_is_signaling_nan( float32 );
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int32 float64_to_int32( float64 );
+int32 float64_to_int32_round_to_zero( float64 );
+int64 float64_to_int64( float64 );
+int64 float64_to_int64_round_to_zero( float64 );
+float32 float64_to_float32( float64 );
+#ifdef FLOATX80
+floatx80 float64_to_floatx80( float64 );
+#endif
+#ifdef FLOAT128
+float128 float64_to_float128( float64 );
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision operations.
+*----------------------------------------------------------------------------*/
+float64 float64_round_to_int( float64 );
+float64 float64_add( float64, float64 );
+float64 float64_sub( float64, float64 );
+float64 float64_mul( float64, float64 );
+float64 float64_div( float64, float64 );
+float64 float64_rem( float64, float64 );
+float64 float64_sqrt( float64 );
+flag float64_eq( float64, float64 );
+flag float64_le( float64, float64 );
+flag float64_lt( float64, float64 );
+flag float64_eq_signaling( float64, float64 );
+flag float64_le_quiet( float64, float64 );
+flag float64_lt_quiet( float64, float64 );
+flag float64_is_signaling_nan( float64 );
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int32 floatx80_to_int32( floatx80 );
+int32 floatx80_to_int32_round_to_zero( floatx80 );
+int64 floatx80_to_int64( floatx80 );
+int64 floatx80_to_int64_round_to_zero( floatx80 );
+float32 floatx80_to_float32( floatx80 );
+float64 floatx80_to_float64( floatx80 );
+#ifdef FLOAT128
+float128 floatx80_to_float128( floatx80 );
+#endif
+floatx80 floatx80_scale(floatx80 a, floatx80 b);
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
+| extended double-precision floating-point value, returning the result.
+*----------------------------------------------------------------------------*/
+
+static inline floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
+{
+	floatx80 z;
+
+	z.low = zSig;
+	z.high = ( ( (bits16) zSign )<<15 ) + zExp;
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision rounding precision.  Valid
+| values are 32, 64, and 80.
+*----------------------------------------------------------------------------*/
+extern int8 floatx80_rounding_precision;
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision operations.
+*----------------------------------------------------------------------------*/
+floatx80 floatx80_round_to_int( floatx80 );
+floatx80 floatx80_add( floatx80, floatx80 );
+floatx80 floatx80_sub( floatx80, floatx80 );
+floatx80 floatx80_mul( floatx80, floatx80 );
+floatx80 floatx80_div( floatx80, floatx80 );
+floatx80 floatx80_rem( floatx80, floatx80 );
+floatx80 floatx80_sqrt( floatx80 );
+flag floatx80_eq( floatx80, floatx80 );
+flag floatx80_le( floatx80, floatx80 );
+flag floatx80_lt( floatx80, floatx80 );
+flag floatx80_eq_signaling( floatx80, floatx80 );
+flag floatx80_le_quiet( floatx80, floatx80 );
+flag floatx80_lt_quiet( floatx80, floatx80 );
+flag floatx80_is_signaling_nan( floatx80 );
+
+/* int floatx80_fsin(floatx80 &a);
+int floatx80_fcos(floatx80 &a);
+int floatx80_ftan(floatx80 &a); */
+
+floatx80 floatx80_flognp1(floatx80 a);
+floatx80 floatx80_flogn(floatx80 a);
+floatx80 floatx80_flog2(floatx80 a);
+floatx80 floatx80_flog10(floatx80 a);
+
+// roundAndPackFloatx80 used to be in softfloat-round-pack, is now in softfloat.c
+floatx80 roundAndPackFloatx80(int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1);
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE quadruple-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int32 float128_to_int32( float128 );
+int32 float128_to_int32_round_to_zero( float128 );
+int64 float128_to_int64( float128 );
+int64 float128_to_int64_round_to_zero( float128 );
+float32 float128_to_float32( float128 );
+float64 float128_to_float64( float128 );
+#ifdef FLOATX80
+floatx80 float128_to_floatx80( float128 );
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE quadruple-precision operations.
+*----------------------------------------------------------------------------*/
+float128 float128_round_to_int( float128 );
+float128 float128_add( float128, float128 );
+float128 float128_sub( float128, float128 );
+float128 float128_mul( float128, float128 );
+float128 float128_div( float128, float128 );
+float128 float128_rem( float128, float128 );
+float128 float128_sqrt( float128 );
+flag float128_eq( float128, float128 );
+flag float128_le( float128, float128 );
+flag float128_lt( float128, float128 );
+flag float128_eq_signaling( float128, float128 );
+flag float128_le_quiet( float128, float128 );
+flag float128_lt_quiet( float128, float128 );
+flag float128_is_signaling_nan( float128 );
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', the exponent `zExp', and the significand formed
+| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
+| floating-point value, returning the result.  After being shifted into the
+| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
+| added together to form the most significant 32 bits of the result.  This
+| means that any integer portion of `zSig0' will be added into the exponent.
+| Since a properly normalized significand will have an integer portion equal
+| to 1, the `zExp' input should be 1 less than the desired result exponent
+| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+static inline float128
+	packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
+{
+	float128 z;
+
+	z.low = zSig1;
+	z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and extended significand formed by the concatenation of `zSig0', `zSig1',
+| and `zSig2', and returns the proper quadruple-precision floating-point value
+| corresponding to the abstract input.  Ordinarily, the abstract value is
+| simply rounded and packed into the quadruple-precision format, with the
+| inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal quadruple-
+| precision floating-point number.
+|     The input significand must be normalized or smaller.  If the input
+| significand is not normalized, `zExp' must be 0; in that case, the result
+| returned is a subnormal number, and it must not require rounding.  In the
+| usual case that the input significand is normalized, `zExp' must be 1 less
+| than the ``true'' floating-point exponent.  The handling of underflow and
+| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static inline float128
+	roundAndPackFloat128(
+		flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 )
+{
+	int8 roundingMode;
+	flag roundNearestEven, increment, isTiny;
+
+	roundingMode = float_rounding_mode;
+	roundNearestEven = ( roundingMode == float_round_nearest_even );
+	increment = ( (sbits64) zSig2 < 0 );
+	if ( ! roundNearestEven ) {
+		if ( roundingMode == float_round_to_zero ) {
+			increment = 0;
+		}
+		else {
+			if ( zSign ) {
+				increment = ( roundingMode == float_round_down ) && zSig2;
+			}
+			else {
+				increment = ( roundingMode == float_round_up ) && zSig2;
+			}
+		}
+	}
+	if ( 0x7FFD <= (bits32) zExp ) {
+		if (    ( 0x7FFD < zExp )
+				|| (    ( zExp == 0x7FFD )
+					&& eq128(
+							LIT64( 0x0001FFFFFFFFFFFF ),
+							LIT64( 0xFFFFFFFFFFFFFFFF ),
+							zSig0,
+							zSig1
+						)
+					&& increment
+				)
+			) {
+			float_raise( float_flag_overflow | float_flag_inexact );
+			if (    ( roundingMode == float_round_to_zero )
+					|| ( zSign && ( roundingMode == float_round_up ) )
+					|| ( ! zSign && ( roundingMode == float_round_down ) )
+				) {
+				return
+					packFloat128(
+						zSign,
+						0x7FFE,
+						LIT64( 0x0000FFFFFFFFFFFF ),
+						LIT64( 0xFFFFFFFFFFFFFFFF )
+					);
+			}
+			return packFloat128( zSign, 0x7FFF, 0, 0 );
+		}
+		if ( zExp < 0 ) {
+			isTiny =
+					( float_detect_tininess == float_tininess_before_rounding )
+				|| ( zExp < -1 )
+				|| ! increment
+				|| lt128(
+						zSig0,
+						zSig1,
+						LIT64( 0x0001FFFFFFFFFFFF ),
+						LIT64( 0xFFFFFFFFFFFFFFFF )
+					);
+			shift128ExtraRightJamming(
+				zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
+			zExp = 0;
+			if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
+			if ( roundNearestEven ) {
+				increment = ( (sbits64) zSig2 < 0 );
+			}
+			else {
+				if ( zSign ) {
+					increment = ( roundingMode == float_round_down ) && zSig2;
+				}
+				else {
+					increment = ( roundingMode == float_round_up ) && zSig2;
+				}
+			}
+		}
+	}
+	if ( zSig2 ) float_exception_flags |= float_flag_inexact;
+	if ( increment ) {
+		add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
+		zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
+	}
+	else {
+		if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
+	}
+	return packFloat128( zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand formed by the concatenation of `zSig0' and `zSig1', and
+| returns the proper quadruple-precision floating-point value corresponding
+| to the abstract input.  This routine is just like `roundAndPackFloat128'
+| except that the input significand has fewer bits and does not have to be
+| normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
+| point exponent.
+*----------------------------------------------------------------------------*/
+
+static inline float128
+	normalizeRoundAndPackFloat128(
+		flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
+{
+	int8 shiftCount;
+	bits64 zSig2;
+
+	if ( zSig0 == 0 ) {
+		zSig0 = zSig1;
+		zSig1 = 0;
+		zExp -= 64;
+	}
+	shiftCount = countLeadingZeros64( zSig0 ) - 15;
+	if ( 0 <= shiftCount ) {
+		zSig2 = 0;
+		shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
+	}
+	else {
+		shift128ExtraRightJamming(
+			zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
+	}
+	zExp -= shiftCount;
+	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
+
+}
+#endif
diff --git a/plat/linux68k/emu/sim.c b/plat/linux68k/emu/sim.c
index eb7794897..e78e5c437 100755
--- a/plat/linux68k/emu/sim.c
+++ b/plat/linux68k/emu/sim.c
@@ -6,6 +6,9 @@
 #include <time.h>
 #include <unistd.h>
 #include <errno.h>
+
+#include <fcntl.h>
+
 #include "sim.h"
 #include "m68k.h"
 
@@ -72,7 +75,7 @@ void exit_error(char* fmt, ...)
 	fprintf(stderr, "\n");
 	pc = m68k_get_reg(NULL, M68K_REG_PPC);
 	m68k_disassemble(buff, pc, M68K_CPU_TYPE_68020);
-	fprintf(stderr, "At %04x: %s\n", pc, buff);
+        fprintf(stderr, "At %04x: %s\n", pc, buff);
 
 	exit(EXIT_FAILURE);
 }
@@ -90,7 +93,7 @@ uint32_t cpu_read_long(uint32_t address)
 	switch (address)
 	{
 		case 0x00: return INIT_SP;
-		case 0x04: return entrypoint;
+                case 0x04: return entrypoint;
 		case 0x80: emulated_syscall(); return 0x10000;
 		case 0x10000: return 0x4e734e73; /* rte; rte */
 		case 0x10004: return 0;
@@ -170,10 +173,12 @@ void disassemble_program()
 	char buff2[100];
 
 	pc = cpu_read_long_dasm(4);
+        printf("entry point is %0x\n", entrypoint);
+        printf("pc is %0x\n", pc);
 
-	while(pc <= 0x16e)
+        while(pc <= entrypoint + 0x16e)
 	{
-		instr_size = m68k_disassemble(buff, pc, M68K_CPU_TYPE_68000);
+                instr_size = m68k_disassemble(buff, pc, M68K_CPU_TYPE_68020);
 		make_hex(buff2, pc, instr_size);
 		printf("%03x: %-20s: %s\n", pc, buff2, buff);
 		pc += instr_size;
@@ -181,8 +186,9 @@ void disassemble_program()
 	fflush(stdout);
 }
 
-void cpu_instr_callback()
+void cpu_instr_callback(int apc)
 {
+        (void)apc;
 	uint32_t pc = m68k_get_reg(NULL, M68K_REG_PC);
 	if (pc == 0xc)
 		exit_error("address exception");
@@ -219,6 +225,10 @@ void cpu_instr_callback()
 #endif
 }
 
+/**
+ * translate simulated linux syscall to native call on host
+ * see https://www.lurklurk.org/syscalls.html for m68k syscall numbers
+ **/
 static void emulated_syscall(void)
 {
 	int s = m68k_get_reg(NULL, M68K_REG_D0);
@@ -227,16 +237,57 @@ static void emulated_syscall(void)
 		case 1: /* exit */
 			exit(m68k_get_reg(NULL, M68K_REG_D1));
 			
-		case 4: /* write */
+                case 3: /* read */
 		{
 			uint32_t fd = m68k_get_reg(NULL, M68K_REG_D1);
 			uint32_t ptr = m68k_get_reg(NULL, M68K_REG_D2);
-			uint32_t len = m68k_get_reg(NULL, M68K_REG_D3);
-			m68k_set_reg(M68K_REG_D0, write(fd, g_ram + transform_address(ptr), len));
+                        uint32_t count = m68k_get_reg(NULL, M68K_REG_D3);
+                        m68k_set_reg(M68K_REG_D0, read(fd, g_ram + transform_address(ptr), count));
 			break;
 		}
 
-		case 45: /* brk */
+                case 4: /* write */
+                {
+                        uint32_t fd = m68k_get_reg(NULL, M68K_REG_D1);
+                        uint32_t ptr = m68k_get_reg(NULL, M68K_REG_D2);
+                        uint32_t len = m68k_get_reg(NULL, M68K_REG_D3);
+                        m68k_set_reg(M68K_REG_D0, write(fd, g_ram + transform_address(ptr), len));
+                        break;
+                }
+
+                case 5: /* open */
+                {
+                        uint32_t pathname = m68k_get_reg(NULL, M68K_REG_D1);
+                        uint32_t flags = m68k_get_reg(NULL, M68K_REG_D2);
+                        uint32_t mode = m68k_get_reg(NULL, M68K_REG_D3);
+                        m68k_set_reg(M68K_REG_D0, open(g_ram + transform_address(pathname), flags, mode));
+                        break;
+                }
+
+                case 6: /* close */
+                {
+                        uint32_t fd = m68k_get_reg(NULL, M68K_REG_D1);
+                        m68k_set_reg(M68K_REG_D0, close(fd));
+                        break;
+                }
+
+                case 10: /* unlink */
+                {
+                        uint32_t pathname = m68k_get_reg(NULL, M68K_REG_D1);
+                        m68k_set_reg(M68K_REG_D0, unlink(g_ram + transform_address(pathname)));
+                        break;
+                }
+
+                case 19: /* lseek */
+                {
+                        uint32_t fd = m68k_get_reg(NULL, M68K_REG_D1);
+                        uint32_t offset = m68k_get_reg(NULL, M68K_REG_D2);
+                        uint32_t whence = m68k_get_reg(NULL, M68K_REG_D3);
+                        m68k_set_reg(M68K_REG_D0, lseek(fd, offset, whence));
+                        break;
+                }
+
+                case 45: /* brk */
 		{
 			uint32_t newpos = m68k_get_reg(NULL, M68K_REG_D1);
 			if (newpos == 0)
@@ -251,8 +302,8 @@ static void emulated_syscall(void)
 			break;
 		}
 
-		case 20: /* getpid */
-		case 48: /* signal */
+                case 20: /* getpid */
+                case 48: /* signal */
 		case 54: /* ioctl */
 		case 78: /* gettimeofday */
 			m68k_set_reg(M68K_REG_D0, 0);
@@ -280,9 +331,9 @@ static void load_program(FILE* fd)
 	if (fread(g_ram, 1, phentsize, fd) != phentsize)
 		exit_error("couldn't read program header");
 
-	uint32_t offset = READ_LONG(g_ram, 0x04);
-	uint32_t vaddr = READ_LONG(g_ram, 0x08);
-	uint32_t filesz = READ_LONG(g_ram, 0x10);
+        uint32_t offset = READ_LONG(g_ram, 0x04);
+        uint32_t vaddr = READ_LONG(g_ram, 0x08);
+        uint32_t filesz = READ_LONG(g_ram, 0x10);
 	uint32_t memsz = READ_LONG(g_ram, 0x14);
 	brkbase = brkpos = vaddr + memsz;
 
@@ -310,11 +361,15 @@ int main(int argc, char* argv[])
 
 	load_program(fhandle);
 
-//	disassemble_program();
+        //disassemble_program();
 
-	m68k_set_cpu_type(M68K_CPU_TYPE_68020);
-	m68k_init();
-	m68k_pulse_reset();
+        //printf("now at line %d\n", __LINE__);
+        m68k_set_cpu_type(M68K_CPU_TYPE_68040);
+        //printf("now at line %d\n", __LINE__);
+        m68k_init();
+        //printf("initialising core\n");
+        m68k_pulse_reset();
+        //printf("now at line %d\n", __LINE__);
 
 	/* On entry, the Linux stack looks like this.
 	 * 
@@ -337,11 +392,13 @@ int main(int argc, char* argv[])
 		unsigned long argv = sp;
 		cpu_write_long(sp -= 4, argv);
 		cpu_write_long(sp -= 4, 0);
-		m68k_set_reg(M68K_REG_SP, sp);
+                m68k_set_reg(M68K_REG_SP, sp);   /* init sp is also addr 0 */
 	}
 
-	for (;;)
-		m68k_execute(100000);
+        //printf("running program ");
+        for (;;) {
+             m68k_execute(100000);
+        }
 
 	return 0;
 }
diff --git a/plat/linux68k/emu/sim.h b/plat/linux68k/emu/sim.h
index 75db0cba1..f0f9b0b61 100755
--- a/plat/linux68k/emu/sim.h
+++ b/plat/linux68k/emu/sim.h
@@ -10,6 +10,6 @@ void cpu_write_long(unsigned int address, unsigned int value);
 void cpu_pulse_reset(void);
 void cpu_set_fc(unsigned int fc);
 int  cpu_irq_ack(int level);
-void cpu_instr_callback();
+void cpu_instr_callback(int pc);
 
 #endif /* SIM__HEADER */

From e5da3227d95d43c4b0716b410cb0691a15cde567 Mon Sep 17 00:00:00 2001
From: tevorbl <38593695+tevorbl@users.noreply.github.com>
Date: Sat, 30 May 2020 13:37:04 +0100
Subject: [PATCH 2/5] fpu bug fix for fgetmant op

- inserted missing break
- use temp variable instead of manipulating dest register

fgetexp changed to directly convert int to floatx80
---
 plat/linux68k/emu/musashi/m68kfpu.c | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/plat/linux68k/emu/musashi/m68kfpu.c b/plat/linux68k/emu/musashi/m68kfpu.c
index 46070f56c..8d46acb2d 100644
--- a/plat/linux68k/emu/musashi/m68kfpu.c
+++ b/plat/linux68k/emu/musashi/m68kfpu.c
@@ -1259,27 +1259,30 @@ static void fpgen_rm_reg(uint16 w2)
 	  REG_FP[dst].high ^= 0x8000;
 	  SET_CONDITION_CODES(REG_FP[dst]);
 	  USE_CYCLES(3);
-	  break;
-	}
+          break;
+        }
       case 0x1e:		// FGETEXP
 	{
           sint16 temp;
 
           temp = source.high;	// get the exponent
           temp -= 0x3fff;	// take off the bias
-          REG_FP[dst] = double_to_fx80((double)temp);
+          REG_FP[dst] = int32_to_floatx80((int32)temp);
           SET_CONDITION_CODES(REG_FP[dst]);
 	  USE_CYCLES(6);
-	}
+          break;
+        }
 
 
       case 0x1f:		// FGETMANT  (TBB)
         {
-          REG_FP[dst] = source;
-          REG_FP[dst].high &= ~0x7fff;	// clear the bias, keep sign
-          REG_FP[dst].high |= 0x3fff;	// set new bias, 1.0 <= result < 2.0
+          floatx80 temp = source;
+          temp.high &= ~0x7fff;	// clear the bias, keep sign
+          temp.high |= 0x3fff;	// set new bias, 1.0 <= result < 2.0
+          REG_FP[dst] = temp;
           SET_CONDITION_CODES(REG_FP[dst]);
-          //USE_CYCLES(6);
+          USE_CYCLES(6);
+          break;
         }
 
 
@@ -1611,8 +1614,8 @@ void m68040_fpu_op0()
 	    {
 	      case 0x0:	// FPU ALU FP, FP
 	      case 0x2:	// FPU ALU ea, FP
-		{
-		  fpgen_rm_reg(w2);
+                {
+                  fpgen_rm_reg(w2);
 		  break;
 		}
 

From 04fe0aa53ef449dd854949a0e503c018f32e32cd Mon Sep 17 00:00:00 2001
From: tevorbl <38593695+tevorbl@users.noreply.github.com>
Date: Mon, 1 Jun 2020 20:53:29 +0100
Subject: [PATCH 3/5] fixed floats in printf in linux59k platform

problem was implementation of fint & fintrz floating point ops in m68kfpu.c
Both these ops truncated the integer to 32 bits instead of leaving it as an extended precision floating point number

Changed the implementation to use 64 bit ints instead of 32 bit ints.
---
 plat/linux68k/emu/musashi/m68kfpu.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/plat/linux68k/emu/musashi/m68kfpu.c b/plat/linux68k/emu/musashi/m68kfpu.c
index 8d46acb2d..13c0a8174 100644
--- a/plat/linux68k/emu/musashi/m68kfpu.c
+++ b/plat/linux68k/emu/musashi/m68kfpu.c
@@ -967,7 +967,7 @@ static void WRITE_EA_64(int ea, uint64 data)
                   m68ki_write_32(ea+4, (uint32)(data));
                   break;
                 }
-              default:	fatalerror("M68kFPU: WRITE_EA_32: unhandled mode %d, reg %d at %08X\n", mode, reg, REG_PC);
+              default:	fatalerror("M68kFPU: WRITE_EA_64: unhandled mode %d, reg %d at %08X\n", mode, reg, REG_PC);
             }
           break;
         }
@@ -1224,18 +1224,18 @@ static void fpgen_rm_reg(uint16 w2)
 	}
       case 0x01:		// Fsint
 	{
-	  sint32 temp;
-	  temp = floatx80_to_int32(source);
-	  REG_FP[dst] = int32_to_floatx80(temp);
-	  SET_CONDITION_CODES(REG_FP[dst]);  // JFF needs update condition codes
-	  break;
-	}
+          sint64 temp;
+          temp = floatx80_to_int64(source);
+          REG_FP[dst] = int64_to_floatx80(temp);
+          SET_CONDITION_CODES(REG_FP[dst]);  // JFF needs update condition codes
+          break;
+        }
       case 0x03:		// FsintRZ
-	{
-	  sint32 temp;
-	  temp = floatx80_to_int32_round_to_zero(source);
-	  REG_FP[dst] = int32_to_floatx80(temp);
-	  SET_CONDITION_CODES(REG_FP[dst]);  // JFF needs update condition codes
+        {
+          sint64 temp;
+          temp = floatx80_to_int64_round_to_zero(source);
+          REG_FP[dst] = int64_to_floatx80(temp);
+          SET_CONDITION_CODES(REG_FP[dst]);  // JFF needs update condition codes
 	  break;
 	}
       case 0x04:		// FSQRT

From 6fe335b9e98ce48c1ef6608007ef406f4038c10a Mon Sep 17 00:00:00 2001
From: tevorbl <38593695+tevorbl@users.noreply.github.com>
Date: Tue, 2 Jun 2020 13:00:03 +0100
Subject: [PATCH 4/5] another fix for printf(float) on m68k platform

bug caused by this instruction:

  fmove.l fp0,d0

problem was caused by a conflict between the fpu emulator (softfloat) and the compiler.

the emulator implemented this as a purely arithmetic move & conversion,
but the compiler assumed that the result could be interpreted as a logical (ie unsigned) conversion.
rightly or wrongly.


for example, if fp0 contained the value 2576980377.0  which is unsigned integer -1717987328
the emulator would treat this as an integer overflow and move 0x7fffffff (INT_MAX) into d0.

The complier on the other hand would assume that d0 contained 2576980377 (the unsigned value).

I don't know which is correct, but this is my fix for the time being.
---
 plat/linux68k/emu/musashi/softfloat/softfloat.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/plat/linux68k/emu/musashi/softfloat/softfloat.c b/plat/linux68k/emu/musashi/softfloat/softfloat.c
index e2bad5963..4669b08a9 100644
--- a/plat/linux68k/emu/musashi/softfloat/softfloat.c
+++ b/plat/linux68k/emu/musashi/softfloat/softfloat.c
@@ -95,7 +95,7 @@ static int32 roundAndPackInt32( flag zSign, bits64 absZ )
 	if ( zSign ) z = - z;
 	if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
 		float_raise( float_flag_invalid );
-		return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+                return z /* zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF*/;
 	}
 	if ( roundBits ) float_exception_flags |= float_flag_inexact;
 	return z;
@@ -146,9 +146,9 @@ static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 )
 	if ( z && ( ( z < 0 ) ^ zSign ) ) {
 	overflow:
 		float_raise( float_flag_invalid );
-		return
-				zSign ? (sbits64) LIT64( 0x8000000000000000 )
-                     : (sbits64)( 0x7FFFFFFFFFFFFFFF );
+                return z
+                        /*	zSign ? (sbits64) LIT64( 0x8000000000000000 )
+                     : (sbits64)( 0x7FFFFFFFFFFFFFFF )*/;
 	}
 	if ( absZ1 ) float_exception_flags |= float_flag_inexact;
 	return z;

From cd36c3526b2a6ffdb9d39600648fda67bed9e5a6 Mon Sep 17 00:00:00 2001
From: tevorbl <38593695+tevorbl@users.noreply.github.com>
Date: Sat, 13 Jun 2020 14:33:14 +0100
Subject: [PATCH 5/5] minor formatting changes

sim.c:
remove stray tracing statements
make formatting consistently leading tabs

Makefile:
restore ACK_TEMP_DIR & CC to default settings
---
 Makefile                |   4 +-
 plat/linux68k/emu/sim.c | 134 +++++++++++++++++++---------------------
 2 files changed, 66 insertions(+), 72 deletions(-)

diff --git a/Makefile b/Makefile
index 616f2651b..d8fa04026 100644
--- a/Makefile
+++ b/Makefile
@@ -9,8 +9,7 @@ DEFAULT_PLATFORM = pc86
 
 # Where should the ACK put its temporary files?
 
-#ACK_TEMP_DIR = /tmp
-ACK_TEMP_DIR = tmp
+ACK_TEMP_DIR = /tmp
 
 # Where is the ACK going to be installed, eventually? If you don't want to
 # install it and just want to run the ACK from the build directory
@@ -32,7 +31,6 @@ LDFLAGS =
 
 AR = ar
 CC = gcc
-#CC = clang
 
 # Which build system to use; use 'ninja' or 'make' (in lower case). Leave
 # blank to autodetect.
diff --git a/plat/linux68k/emu/sim.c b/plat/linux68k/emu/sim.c
index e78e5c437..2b1bd775a 100755
--- a/plat/linux68k/emu/sim.c
+++ b/plat/linux68k/emu/sim.c
@@ -75,7 +75,7 @@ void exit_error(char* fmt, ...)
 	fprintf(stderr, "\n");
 	pc = m68k_get_reg(NULL, M68K_REG_PPC);
 	m68k_disassemble(buff, pc, M68K_CPU_TYPE_68020);
-        fprintf(stderr, "At %04x: %s\n", pc, buff);
+	fprintf(stderr, "At %04x: %s\n", pc, buff);
 
 	exit(EXIT_FAILURE);
 }
@@ -93,7 +93,7 @@ uint32_t cpu_read_long(uint32_t address)
 	switch (address)
 	{
 		case 0x00: return INIT_SP;
-                case 0x04: return entrypoint;
+		case 0x04: return entrypoint;
 		case 0x80: emulated_syscall(); return 0x10000;
 		case 0x10000: return 0x4e734e73; /* rte; rte */
 		case 0x10004: return 0;
@@ -173,12 +173,13 @@ void disassemble_program()
 	char buff2[100];
 
 	pc = cpu_read_long_dasm(4);
-        printf("entry point is %0x\n", entrypoint);
-        printf("pc is %0x\n", pc);
+	printf("entry point is %0x\n", entrypoint);
+	printf("pc is %0x\n", pc);
 
-        while(pc <= entrypoint + 0x16e)
+
+	while(pc <= entrypoint + 0x16e)
 	{
-                instr_size = m68k_disassemble(buff, pc, M68K_CPU_TYPE_68020);
+		instr_size = m68k_disassemble(buff, pc, M68K_CPU_TYPE_68020);
 		make_hex(buff2, pc, instr_size);
 		printf("%03x: %-20s: %s\n", pc, buff2, buff);
 		pc += instr_size;
@@ -236,58 +237,58 @@ static void emulated_syscall(void)
 	{
 		case 1: /* exit */
 			exit(m68k_get_reg(NULL, M68K_REG_D1));
-			
-                case 3: /* read */
+
+		case 3: /* read */
 		{
 			uint32_t fd = m68k_get_reg(NULL, M68K_REG_D1);
 			uint32_t ptr = m68k_get_reg(NULL, M68K_REG_D2);
-                        uint32_t count = m68k_get_reg(NULL, M68K_REG_D3);
-                        m68k_set_reg(M68K_REG_D0, read(fd, g_ram + transform_address(ptr), count));
+			uint32_t count = m68k_get_reg(NULL, M68K_REG_D3);
+			m68k_set_reg(M68K_REG_D0, read(fd, g_ram + transform_address(ptr), count));
 			break;
 		}
 
-                case 4: /* write */
-                {
-                        uint32_t fd = m68k_get_reg(NULL, M68K_REG_D1);
-                        uint32_t ptr = m68k_get_reg(NULL, M68K_REG_D2);
-                        uint32_t len = m68k_get_reg(NULL, M68K_REG_D3);
-                        m68k_set_reg(M68K_REG_D0, write(fd, g_ram + transform_address(ptr), len));
-                        break;
-                }
+		case 4: /* write */
+		{
+			uint32_t fd = m68k_get_reg(NULL, M68K_REG_D1);
+			uint32_t ptr = m68k_get_reg(NULL, M68K_REG_D2);
+			uint32_t len = m68k_get_reg(NULL, M68K_REG_D3);
+			m68k_set_reg(M68K_REG_D0, write(fd, g_ram + transform_address(ptr), len));
+			break;
+		}
 
-                case 5: /* open */
-                {
-                        uint32_t pathname = m68k_get_reg(NULL, M68K_REG_D1);
-                        uint32_t flags = m68k_get_reg(NULL, M68K_REG_D2);
-                        uint32_t mode = m68k_get_reg(NULL, M68K_REG_D3);
-                        m68k_set_reg(M68K_REG_D0, open(g_ram + transform_address(pathname), flags, mode));
-                        break;
-                }
+		case 5: /* open */
+		{
+			uint32_t pathname = m68k_get_reg(NULL, M68K_REG_D1);
+			uint32_t flags = m68k_get_reg(NULL, M68K_REG_D2);
+			uint32_t mode = m68k_get_reg(NULL, M68K_REG_D3);
+			m68k_set_reg(M68K_REG_D0, open(g_ram + transform_address(pathname), flags, mode));
+			break;
+		}
 
-                case 6: /* close */
-                {
-                        uint32_t fd = m68k_get_reg(NULL, M68K_REG_D1);
-                        m68k_set_reg(M68K_REG_D0, close(fd));
-                        break;
-                }
+		case 6: /* close */
+		{
+			uint32_t fd = m68k_get_reg(NULL, M68K_REG_D1);
+			m68k_set_reg(M68K_REG_D0, close(fd));
+			break;
+		}
 
-                case 10: /* unlink */
-                {
-                        uint32_t pathname = m68k_get_reg(NULL, M68K_REG_D1);
-                        m68k_set_reg(M68K_REG_D0, unlink(g_ram + transform_address(pathname)));
-                        break;
-                }
+		case 10: /* unlink */
+		{
+			uint32_t pathname = m68k_get_reg(NULL, M68K_REG_D1);
+			m68k_set_reg(M68K_REG_D0, unlink(g_ram + transform_address(pathname)));
+			break;
+		}
 
-                case 19: /* lseek */
-                {
-                        uint32_t fd = m68k_get_reg(NULL, M68K_REG_D1);
-                        uint32_t offset = m68k_get_reg(NULL, M68K_REG_D2);
-                        uint32_t whence = m68k_get_reg(NULL, M68K_REG_D3);
-                        m68k_set_reg(M68K_REG_D0, lseek(fd, offset, whence));
-                        break;
-                }
+		case 19: /* lseek */
+		{
+			uint32_t fd = m68k_get_reg(NULL, M68K_REG_D1);
+			uint32_t offset = m68k_get_reg(NULL, M68K_REG_D2);
+			uint32_t whence = m68k_get_reg(NULL, M68K_REG_D3);
+			m68k_set_reg(M68K_REG_D0, lseek(fd, offset, whence));
+			break;
+		}
 
-                case 45: /* brk */
+		case 45: /* brk */
 		{
 			uint32_t newpos = m68k_get_reg(NULL, M68K_REG_D1);
 			if (newpos == 0)
@@ -302,8 +303,8 @@ static void emulated_syscall(void)
 			break;
 		}
 
-                case 20: /* getpid */
-                case 48: /* signal */
+		case 20: /* getpid */
+		case 48: /* signal */
 		case 54: /* ioctl */
 		case 78: /* gettimeofday */
 			m68k_set_reg(M68K_REG_D0, 0);
@@ -331,9 +332,9 @@ static void load_program(FILE* fd)
 	if (fread(g_ram, 1, phentsize, fd) != phentsize)
 		exit_error("couldn't read program header");
 
-        uint32_t offset = READ_LONG(g_ram, 0x04);
-        uint32_t vaddr = READ_LONG(g_ram, 0x08);
-        uint32_t filesz = READ_LONG(g_ram, 0x10);
+	uint32_t offset = READ_LONG(g_ram, 0x04);
+	uint32_t vaddr = READ_LONG(g_ram, 0x08);
+	uint32_t filesz = READ_LONG(g_ram, 0x10);
 	uint32_t memsz = READ_LONG(g_ram, 0x14);
 	brkbase = brkpos = vaddr + memsz;
 
@@ -361,23 +362,19 @@ int main(int argc, char* argv[])
 
 	load_program(fhandle);
 
-        //disassemble_program();
+	//disassemble_program();
 
-        //printf("now at line %d\n", __LINE__);
-        m68k_set_cpu_type(M68K_CPU_TYPE_68040);
-        //printf("now at line %d\n", __LINE__);
-        m68k_init();
-        //printf("initialising core\n");
-        m68k_pulse_reset();
-        //printf("now at line %d\n", __LINE__);
+	m68k_set_cpu_type(M68K_CPU_TYPE_68040);
+	m68k_init();
+	m68k_pulse_reset();
 
 	/* On entry, the Linux stack looks like this.
 	 * 
 	 * sp+..           NULL
-     * sp+8+(4*argc)   env (X quads)
-     * sp+4+(4*argc)   NULL
-     * sp+4            argv (argc quads)
-     * sp              argc
+	 * sp+8+(4*argc)   env (X quads)
+	 * sp+4+(4*argc)   NULL
+	 * sp+4            argv (argc quads)
+	 * sp              argc
 	 *
 	 * We'll set it up with a bodgy stack frame with argc=0 just to keep the
 	 * startup code happy.
@@ -392,13 +389,12 @@ int main(int argc, char* argv[])
 		unsigned long argv = sp;
 		cpu_write_long(sp -= 4, argv);
 		cpu_write_long(sp -= 4, 0);
-                m68k_set_reg(M68K_REG_SP, sp);   /* init sp is also addr 0 */
+		m68k_set_reg(M68K_REG_SP, sp);   /* init sp is also addr 0 */
 	}
 
-        //printf("running program ");
-        for (;;) {
-             m68k_execute(100000);
-        }
+	for (;;) {
+		m68k_execute(100000);
+	}
 
 	return 0;
 }