diff --git a/Makefile b/Makefile
index d8fa04026..616f2651b 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,8 @@ DEFAULT_PLATFORM = pc86
 
 # Where should the ACK put its temporary files?
 
-ACK_TEMP_DIR = /tmp
+#ACK_TEMP_DIR = /tmp
+ACK_TEMP_DIR = tmp
 
 # Where is the ACK going to be installed, eventually? If you don't want to
 # install it and just want to run the ACK from the build directory
@@ -31,6 +32,7 @@ LDFLAGS =
 
 AR = ar
 CC = gcc
+#CC = clang
 
 # Which build system to use; use 'ninja' or 'make' (in lower case). Leave
 # blank to autodetect.
diff --git a/plat/linux68k/emu/build.lua b/plat/linux68k/emu/build.lua
index 3fe02056a..6579ef929 100644
--- a/plat/linux68k/emu/build.lua
+++ b/plat/linux68k/emu/build.lua
@@ -11,15 +11,15 @@ normalrule {
 		"+m68kmake",
 		"./musashi/m68k_in.c",
 		"./musashi/m68kcpu.h",
+		"./musashi/m68kmmu.h",
 		"./m68kconf.h",
 		"./musashi/m68kcpu.c",
+		"./musashi/m68kfpu.c",
 		"./musashi/m68kdasm.c",
 		"./musashi/m68k.h",
+		"./musashi/softfloat",
 	},
 	outleaves = {
-		"m68kopac.c",
-		"m68kopdm.c",
-		"m68kopnz.c",
 		"m68kops.c",
 		"m68kops.h",
 		"m68kcpu.h",
@@ -29,7 +29,7 @@ normalrule {
 		"m68k.h",
 	},
 	commands = {
-		"cp %{ins[2]} %{ins[3]} %{ins[4]} %{ins[5]} %{ins[6]} %{ins[7]} %{dir}",
+		"cp -R %{ins[2]} %{ins[3]} %{ins[4]} %{ins[5]} %{ins[6]} %{ins[7]} %{ins[8]} %{ins[9]} %{ins[10]} %{dir}",
 		"cd %{dir} && %{ins[1]}"
 	}
 }
@@ -50,9 +50,9 @@ cprogram {
 	srcs = {
 		"./sim.c",
 		matching(filenamesof("+m68k_engine"), "%.c$"),
+		"./musashi/softfloat/softfloat.c",
 	},
 	deps = {
 		"+headers",
 	}
 }
-
diff --git a/plat/linux68k/emu/m68kconf.h b/plat/linux68k/emu/m68kconf.h
index c5c9b1aef..023cf006b 100755
--- a/plat/linux68k/emu/m68kconf.h
+++ b/plat/linux68k/emu/m68kconf.h
@@ -3,20 +3,28 @@
 /* ======================================================================== */
 /*
  *                                  MUSASHI
- *                                Version 3.4
+ *                                Version 3.32
  *
  * A portable Motorola M680x0 processor emulation engine.
- * Copyright 1998-2001 Karl Stenerud.  All rights reserved.
+ * Copyright Karl Stenerud.  All rights reserved.
  *
- * This code may be freely used for non-commercial purposes as long as this
- * copyright notice remains unaltered in the source code and any binary files
- * containing this code in compiled form.
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
  *
- * All other lisencing terms must be negotiated with the author
- * (Karl Stenerud).
- *
- * The latest version of this code can be obtained at:
- * http://kstenerud.cjb.net
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
  */
 
 
@@ -59,6 +67,8 @@
 #define M68K_EMULATE_010            OPT_ON
 #define M68K_EMULATE_EC020          OPT_ON
 #define M68K_EMULATE_020            OPT_ON
+#define M68K_EMULATE_030            OPT_ON
+#define M68K_EMULATE_040            OPT_ON
 
 
 /* If ON, the CPU will call m68k_read_immediate_xx() for immediate addressing
@@ -101,6 +111,35 @@
 #define M68K_EMULATE_RESET          OPT_OFF
 #define M68K_RESET_CALLBACK()       cpu_pulse_reset()
 
+/* If ON, CPU will call the callback when it encounters a cmpi.l #v, dn
+ * instruction.
+ */
+#define M68K_CMPILD_HAS_CALLBACK     OPT_OFF
+#define M68K_CMPILD_CALLBACK(v,r)    your_cmpild_handler_function(v,r)
+
+
+/* If ON, CPU will call the callback when it encounters a rte
+ * instruction.
+ */
+#define M68K_RTE_HAS_CALLBACK       OPT_OFF
+#define M68K_RTE_CALLBACK()         your_rte_handler_function()
+
+/* If ON, CPU will call the callback when it encounters a tas
+ * instruction.
+ */
+#define M68K_TAS_HAS_CALLBACK       OPT_OFF
+#define M68K_TAS_CALLBACK()         your_tas_handler_function()
+
+/* If ON, CPU will call the callback when it encounters an illegal instruction
+ * passing the opcode as argument. If the callback returns 1, then it's considered
+ * as a normal instruction, and the illegal exception in canceled. If it returns 0,
+ * the exception occurs normally.
+ * The callback looks like int callback(int opcode)
+ * You should put OPT_SPECIFY_HANDLER here if you cant to use it, otherwise it will
+ * use a dummy default handler and you'll have to call m68k_set_illg_instr_callback explicitely
+ */
+#define M68K_ILLG_HAS_CALLBACK	    OPT_OFF
+#define M68K_ILLG_CALLBACK(opcode)  op_illg(opcode)
 
 /* If ON, CPU will call the set fc callback on every memory access to
  * differentiate between user/supervisor, program/data access like a real
@@ -111,7 +150,6 @@
 #define M68K_EMULATE_FC             OPT_OFF
 #define M68K_SET_FC_CALLBACK(A)     cpu_set_fc(A)
 
-
 /* If ON, CPU will call the pc changed callback when it changes the PC by a
  * large value.  This allows host programs to be nicer when it comes to
  * fetching immediate data and instructions on a banked memory system.
@@ -124,11 +162,11 @@
  * instruction.
  */
 #define M68K_INSTRUCTION_HOOK       OPT_SPECIFY_HANDLER
-#define M68K_INSTRUCTION_CALLBACK() cpu_instr_callback()
+#define M68K_INSTRUCTION_CALLBACK(pc) cpu_instr_callback(pc)
 
 
 /* If ON, the CPU will emulate the 4-byte prefetch queue of a real 68000 */
-#define M68K_EMULATE_PREFETCH       OPT_ON
+#define M68K_EMULATE_PREFETCH       OPT_OFF /*xxx*/
 
 
 /* If ON, the CPU will generate address error exceptions if it tries to
@@ -146,6 +184,9 @@
 #define M68K_LOG_1010_1111          OPT_OFF
 #define M68K_LOG_FILEHANDLE         stderr
 
+/* Emulate PMMU : if you enable this, there will be a test to see if the current chip has some enabled pmmu added to every memory access,
+ * so enable this only if it's useful */
+#define M68K_EMULATE_PMMU   OPT_OFF
 
 /* ----------------------------- COMPATIBILITY ---------------------------- */
 
@@ -157,19 +198,9 @@
 /* If ON, the enulation core will use 64-bit integers to speed up some
  * operations.
 */
-#define M68K_USE_64_BIT  OPT_OFF
+#define M68K_USE_64_BIT  OPT_ON
 
 
-/* Set to your compiler's static inline keyword to enable it, or
- * set it to blank to disable it.
- * If you define INLINE in the makefile, it will override this value.
- * NOTE: not enabling inline functions will SEVERELY slow down emulation.
- */
-#ifndef INLINE
-//#define INLINE static __inline__
-#define INLINE static
-#endif /* INLINE */
-
 #endif /* M68K_COMPILE_FOR_MAME */
 
 #include "sim.h"
@@ -185,7 +216,6 @@
 #define m68k_write_memory_16(A, V) cpu_write_word(A, V)
 #define m68k_write_memory_32(A, V) cpu_write_long(A, V)
 
-
 /* ======================================================================== */
 /* ============================== END OF FILE ============================= */
 /* ======================================================================== */
diff --git a/plat/linux68k/emu/musashi/example/makefile b/plat/linux68k/emu/musashi/example/Makefile
old mode 100755
new mode 100644
similarity index 67%
rename from plat/linux68k/emu/musashi/example/makefile
rename to plat/linux68k/emu/musashi/example/Makefile
index d5c0bd431..961581ffa
--- a/plat/linux68k/emu/musashi/example/makefile
+++ b/plat/linux68k/emu/musashi/example/Makefile
@@ -1,42 +1,42 @@
-EXENAME          = sim
-
-OSD_DOS          = osd_dos.c
-
-OSDFILES         = $(OSD_DOS)
-MAINFILES        = sim.c
-MUSASHIFILES     = m68kcpu.c m68kdasm.c
-MUSASHIGENCFILES = m68kops.c m68kopac.c m68kopdm.c m68kopnz.c
-MUSASHIGENHFILES = m68kops.h
-MUSASHIGENERATOR = m68kmake
-
-EXE = .exe
-EXEPATH = .\\
-# EXE =
-# EXEPATH = ./
-
-.CFILES   = $(MAINFILES) $(OSDFILES) $(MUSASHIFILES) $(MUSASHIGENCFILES)
-.OFILES   = $(.CFILES:%.c=%.o)
-
-CC        = gcc
-WARNINGS  = -Wall -pedantic
-CFLAGS    = $(WARNINGS)
-LFLAGS    = $(WARNINGS)
-
-TARGET = $(EXENAME)$(EXE)
-
-DELETEFILES = $(MUSASHIGENCFILES) $(MUSASHIGENHFILES) $(.OFILES) $(TARGET) $(MUSASHIGENERATOR)$(EXE)
-
-
-all: $(TARGET)
-
-clean:
-	rm -f $(DELETEFILES)
-
-$(TARGET): $(MUSASHIGENHFILES) $(.OFILES) makefile
-	$(CC) -o $@ $(.OFILES) $(LFLAGS)
-
-$(MUSASHIGENCFILES) $(MUSASHIGENHFILES): $(MUSASHIGENERATOR)$(EXE)
-	$(EXEPATH)$(MUSASHIGENERATOR)$(EXE)
-
-$(MUSASHIGENERATOR)$(EXE):  $(MUSASHIGENERATOR).c
-	$(CC) -o  $(MUSASHIGENERATOR)$(EXE)  $(MUSASHIGENERATOR).c
+EXENAME          = sim
+
+OSD_DOS          = osd_dos.c
+
+OSDFILES         = osd_linux.c # $(OSD_DOS)
+MAINFILES        = sim.c
+MUSASHIFILES     = m68kcpu.c m68kdasm.c softfloat/softfloat.c
+MUSASHIGENCFILES = m68kops.c
+MUSASHIGENHFILES = m68kops.h
+MUSASHIGENERATOR = m68kmake
+
+# EXE = .exe
+# EXEPATH = .\\
+EXE =
+EXEPATH = ./
+
+.CFILES   = $(MAINFILES) $(OSDFILES) $(MUSASHIFILES) $(MUSASHIGENCFILES)
+.OFILES   = $(.CFILES:%.c=%.o)
+
+CC        = gcc
+WARNINGS  = -Wall -Wextra -pedantic
+CFLAGS    = $(WARNINGS)
+LFLAGS    = $(WARNINGS)
+
+TARGET = $(EXENAME)$(EXE)
+
+DELETEFILES = $(MUSASHIGENCFILES) $(MUSASHIGENHFILES) $(.OFILES) $(TARGET) $(MUSASHIGENERATOR)$(EXE)
+
+
+all: $(TARGET)
+
+clean:
+	rm -f $(DELETEFILES)
+
+$(TARGET): $(MUSASHIGENHFILES) $(.OFILES) Makefile
+	$(CC) -o $@ $(.OFILES) $(LFLAGS) -lm
+
+$(MUSASHIGENCFILES) $(MUSASHIGENHFILES): $(MUSASHIGENERATOR)$(EXE)
+	$(EXEPATH)$(MUSASHIGENERATOR)$(EXE)
+
+$(MUSASHIGENERATOR)$(EXE):  $(MUSASHIGENERATOR).c
+	$(CC) -o  $(MUSASHIGENERATOR)$(EXE)  $(MUSASHIGENERATOR).c
diff --git a/plat/linux68k/emu/musashi/example/example.txt b/plat/linux68k/emu/musashi/example/example.txt
index d701fe61f..fc679f6ee 100755
--- a/plat/linux68k/emu/musashi/example/example.txt
+++ b/plat/linux68k/emu/musashi/example/example.txt
@@ -1,290 +1,290 @@
-EXAMPLE:
--------
-As an example, I'll build an imaginary hardware platform.
-
-
-The system is fairly simple, comprising a 68000, an input device, an output
-device, a non-maskable-interrupt device, and an interrupt controller.
-
-
-The input device receives input from the user and asserts its interrupt
-request line until its value is read.  Reading from the input device's
-memory-mapped port will both clear its interrupt request and read an ASCII
-representation (8 bits) of what the user entered.
-
-The output device reads value when it is selected through its memory-mapped
-port and outputs it to a display.  The value it reads will be interpreted as
-an ASCII value and output to the display. The output device is fairly slow
-(it can only process 1 byte per second), and so it asserts its interrupt
-request line when it is ready to receive a byte.  Writing to the output device
-sends a byte to it.  If the output device is not ready, the write is ignored.
-Reading from the output device returns 0 and clears its interrupt request line
-until another byte is written to it and 1 second elapses.
-
-The non-maskable-interrupt (NMI) device, as can be surmised from the name,
-generates a non-maskable-interrupt.  This is connected to some kind of external
-switch that the user can push to generate a NMI.
-
-Since there are 3 devices interrupting the CPU, an interrupt controller is
-needed.  The interrupt controller takes 7 inputs and encodes the highest
-priority asserted line on the 3 output pins.  the input device is wired to IN2
-and the output device is wired to IN1 on the controller.  The NMI device is
-wired to IN7 and all the other inputs are wired low.
-
-The bus is also connected to a 1K ROM and a 256 byte RAM.
-Beware: This platform places ROM and RAM in the same address range and uses
-        the FC pins to select the correct address space!
-        (You didn't expect me to make it easy, did you? =)
-
-
-
-Here is the schematic in all its ASCII splendour:
--------------------------------------------------
-
-     NMI     TIED
-    SWITCH   LOW
-      |       |
-      | +-+-+-+
-      | | | | | +------------------------------------------------+
-      | | | | | | +------------------------------------+         |
-      | | | | | | |                                    |         |
-     +-------------+                                   |         |
-     |7 6 5 4 3 2 1|                                   |         |
-     |             |                                   |         |
-     | INT CONTRLR |                                   |         |
-     |             |                                   |         |
-     |i i i        |                                   |         |
-     |2 1 0        |                                   |         |
-     +-------------+                                   |         |
-      | | |                                            |         |
-      | | |     +--------------------------------+--+  |         |
-      o o o     |                                |  |  |         |
-    +--------------+  +-------+  +----------+  +---------+  +----------+
-    | I I I     a  |  |       |  |          |  | r  a  i |  |    i     |
-    | 2 1 0    23  |  |       |  |          |  | e  c    |  |          |
-    |              |  |       |  |          |  | a  k    |  |          |
-    |              |  |       |  |          |  | d       |  |          |
-    |              |  |       |  |          |  |         |  |          |
-    |    M68000    |  |  ROM  |  |   RAM    |  |   IN    |  |   OUT    |
-    |              |  |       |  |          |  |         |  |          |
-    |            a9|--|a9     |--|          |--|         |--|          |
-    |            a8|--|a8     |--|          |--|         |--|          |
-    |            a7|--|a7     |--|a7        |--|         |--|          |
-    |            a6|--|a6     |--|a6        |--|         |--|          |
-    |            a5|--|a5     |--|a5        |--|         |--|          |
-    |            a4|--|a4     |--|a4        |--|         |--|          |
-    |            a3|--|a3     |--|a3        |--|         |--|          |
-    |            a2|--|a2     |--|a2        |--|         |--|          |
-    |            a1|--|a1     |--|a1        |--|         |--|          |
-    |            a0|--|a0     |--|a0        |--|         |--|          |
-    |              |  |       |  |          |  |         |  |          |
-    |           d15|--|d15    |--|d15       |--|         |--|          |
-    |           d14|--|d14    |--|d14       |--|         |--|          |
-    |           d13|--|d13    |--|d13       |--|         |--|          |
-    |           d12|--|d12    |--|d12       |--|         |--|          |
-    |           d11|--|d11    |--|d11       |--|         |--|          |
-    |           d10|--|d10    |--|d10       |--|         |--|          |
-    |            d9|--|d9     |--|d9        |--|         |--|          |
-    |            d8|--|d8     |--|d8        |--|         |--|          |
-    |            d7|--|d7     |--|d7        |--|d7       |--|d7        |
-    |            d6|--|d6     |--|d6        |--|d6       |--|d6        |
-    |            d5|--|d5     |--|d5        |--|d5       |--|d5        |
-    |            d4|--|d4     |--|d4        |--|d4       |--|d4        |
-    |            d3|--|d3     |--|d3        |--|d3       |--|d3        |
-    |            d2|--|d2     |--|d2        |--|d2       |--|d2        |
-    |            d1|--|d1     |--|d1        |--|d1       |--|d1  w     |
-    |            d0|--|d0     |--|d0        |--|d0       |--|d0  r     |
-    |              |  |       |  |          |  |         |  |    i   a |
-    | a      F F F |  |       |  |          |  |         |  |    t   c |
-    |22  rW  2 1 0 |  |  cs   |  | cs   rW  |  |         |  |    e   k |
-    +--------------+  +-------+  +----------+  +---------+  +----------+
-      |   |  | | |        |         |    |                       |   |
-      |   |  | | |        |         |    |                       |   |
-      |   |  | | |    +-------+  +-----+ |                     +---+ |
-      |   |  | | |    |  IC1  |  | IC2 | |                     |AND| |
-      |   |  | | |    |a b c d|  |a b c| |                     +---+ |
-      |   |  | | |    +-------+  +-----+ |                      | |  |
-      |   |  | | |     | | | |    | | |  |                      | +--+
-      |   |  | | |     | | | |    | | |  |                      | |
-      |   |  | | |     | | | |    | | |  |                      | |
-      |   |  | | |     | | | |    | | |  |                      | |
-      |   |  | | +-----)-)-+-)----)-)-+  |                      | |
-      |   |  | +-------)-+---)----)-+    |                      | |
-      |   |  +---------+-----)----+      |                      | |
-      |   |                  |           |                      | |
-      |   +------------------+-----------+----------------------+ |
-      |                                                           |
-      +-----------------------------------------------------------+
-
-IC1: output=1 if a=0 and b=1 and c=0 and d=0
-IC2: output=1 if a=0 and b=0 and c=1
-
-
-
-Here is the listing for program.bin:
------------------------------------
-
-                        INPUT_ADDRESS   equ $800000
-                        OUTPUT_ADDRESS  equ $400000
-                        CIRCULAR_BUFFER equ $c0
-                        CAN_OUTPUT      equ $d0
-                        STACK_AREA      equ $100
-                        
-                        vector_table:
-00000000 0000 0100      	dc.l STACK_AREA				*  0: SP
-00000004 0000 00c0      	dc.l init					*  1: PC
-00000008 0000 0148      	dc.l unhandled_exception	*  2: bus error
-0000000c 0000 0148      	dc.l unhandled_exception	*  3: address error
-00000010 0000 0148      	dc.l unhandled_exception	*  4: illegal instruction
-00000014 0000 0148      	dc.l unhandled_exception	*  5: zero divide
-00000018 0000 0148      	dc.l unhandled_exception	*  6: chk
-0000001c 0000 0148      	dc.l unhandled_exception	*  7: trapv
-00000020 0000 0148      	dc.l unhandled_exception	*  8: privilege violation
-00000024 0000 0148      	dc.l unhandled_exception	*  9: trace
-00000028 0000 0148      	dc.l unhandled_exception	* 10: 1010
-0000002c 0000 0148      	dc.l unhandled_exception	* 11: 1111
-00000030 0000 0148      	dc.l unhandled_exception	* 12: -
-00000034 0000 0148      	dc.l unhandled_exception	* 13: -
-00000038 0000 0148      	dc.l unhandled_exception	* 14: -
-0000003c 0000 0148      	dc.l unhandled_exception	* 15: uninitialized interrupt
-00000040 0000 0148      	dc.l unhandled_exception	* 16: -
-00000044 0000 0148      	dc.l unhandled_exception	* 17: -
-00000048 0000 0148      	dc.l unhandled_exception	* 18: -
-0000004c 0000 0148      	dc.l unhandled_exception	* 19: -
-00000050 0000 0148      	dc.l unhandled_exception	* 20: -
-00000054 0000 0148      	dc.l unhandled_exception	* 21: -
-00000058 0000 0148      	dc.l unhandled_exception	* 22: -
-0000005c 0000 0148      	dc.l unhandled_exception	* 23: -
-00000060 0000 0148      	dc.l unhandled_exception	* 24: spurious interrupt
-00000064 0000 0136      	dc.l output_ready			* 25: l1 irq
-00000068 0000 010e      	dc.l input_ready			* 26: l2 irq
-0000006c 0000 0148      	dc.l unhandled_exception	* 27: l3 irq
-00000070 0000 0148      	dc.l unhandled_exception	* 28: l4 irq
-00000074 0000 0148      	dc.l unhandled_exception	* 29: l5 irq
-00000078 0000 0148      	dc.l unhandled_exception	* 30: l6 irq
-0000007c 0000 014e      	dc.l nmi					* 31: l7 irq
-00000080 0000 0148      	dc.l unhandled_exception	* 32: trap 0
-00000084 0000 0148      	dc.l unhandled_exception	* 33: trap 1
-00000088 0000 0148      	dc.l unhandled_exception	* 34: trap 2
-0000008c 0000 0148      	dc.l unhandled_exception	* 35: trap 3
-00000090 0000 0148      	dc.l unhandled_exception	* 36: trap 4
-00000094 0000 0148      	dc.l unhandled_exception	* 37: trap 5
-00000098 0000 0148      	dc.l unhandled_exception	* 38: trap 6
-0000009c 0000 0148      	dc.l unhandled_exception	* 39: trap 7
-000000a0 0000 0148      	dc.l unhandled_exception	* 40: trap 8
-000000a4 0000 0148      	dc.l unhandled_exception	* 41: trap 9
-000000a8 0000 0148      	dc.l unhandled_exception	* 42: trap 10
-000000ac 0000 0148      	dc.l unhandled_exception	* 43: trap 11
-000000b0 0000 0148      	dc.l unhandled_exception	* 44: trap 12
-000000b4 0000 0148      	dc.l unhandled_exception	* 45: trap 13
-000000b8 0000 0148      	dc.l unhandled_exception	* 46: trap 14
-000000bc 0000 0148      	dc.l unhandled_exception	* 47: trap 15
-                        * This is the end of the useful part of the table.
-                        * We will now do the Capcom thing and put code starting at $c0.
-                        
-                        init:
-                        * Copy the exception vector table to RAM.
-000000c0 227c 0000 0000 	move.l  #0, a1						* a1 is RAM index
-000000c6 303c 002f      	move.w  #47, d0						* d0 is counter (48 vectors)
-000000ca 41fa 0006      	lea.l   (copy_table,PC), a0			* a0 is scratch
-000000ce 2208           	move.l  a0, d1						* d1 is ROM index
-000000d0 4481           	neg.l   d1
-                        copy_table:
-000000d2 22fb 18fe      	dc.l    $22fb18fe					* stoopid as68k generates 020 code here
-                        *	move.l  (copy_table,PC,d1.l), (a1)+
-000000d6 5841           	addq    #4, d1
-000000d8 51c8 fff8      	dbf     d0, copy_table
-                        
-                        main_init:
-                        * Initialize main program
-000000dc 11fc 0000 00d0 	move.b  #0, CAN_OUTPUT
-000000e2 4df8 00c0      	lea.l   CIRCULAR_BUFFER, a6
-000000e6 7c00           	moveq   #0, d6						* output buffer ptr
-000000e8 7e00           	moveq   #0, d7						* input buffer ptr
-000000ea 027c f8ff      	andi    #$f8ff, SR					* clear interrupt mask
-                        main:
-                        * Main program
-000000ee 4a38 00d0      	tst.b   CAN_OUTPUT					* can we output?
-000000f2 67fa           	beq     main
-000000f4 be06           	cmp.b   d6, d7						* is there data?
-000000f6 67f6           	beq     main
-000000f8 11fc 0000 00d0 	move.b  #0, CAN_OUTPUT
-000000fe 13f6 6000 0040 	move.b  (0,a6,d6.w), OUTPUT_ADDRESS	* write data
-         0000
-00000106 5246           	addq    #1, d6
-00000108 0206 000f      	andi.b  #15, d6						* update circular buffer
-0000010c 60e0           	bra     main
-                        
-                        
-                        input_ready:
-0000010e 2f00           	move.l  d0, -(a7)
-00000110 2f01           	move.l  d1, -(a7)
-00000112 1239 0080 0000 	move.b  INPUT_ADDRESS, d1			* read data
-00000118 1007           	move.b  d7, d0						* check if buffer full
-0000011a 5240           	addq    #1, d0
-0000011c 0200 000f      	andi.b  #15, d0
-00000120 bc00           	cmp.b   d0, d6
-00000122 6700 000c      	beq     input_ready_quit			* throw away if full
-00000126 1d81 7000      	move.b  d1, (0,a6,d7.w)				* store the data
-0000012a 5247           	addq    #1, d7
-0000012c 0207 000f      	andi.b  #15, d7						* update circular buffer
-                        input_ready_quit:
-00000130 221f           	move.l  (a7)+, d1
-00000132 201f           	move.l  (a7)+, d0
-00000134 4e73           	rte
-                        
-                        output_ready:
-00000136 2f00           	move.l  d0, -(a7)
-00000138 11fc 0001 00d0 	move.b  #1, CAN_OUTPUT
-0000013e 1039 0040 0000 	move.b  OUTPUT_ADDRESS, d0			* acknowledge the interrupt
-00000144 201f           	move.l  (a7)+, d0
-00000146 4e73           	rte
-                        
-                        unhandled_exception:
-00000148 4e72 2700      	stop	#$2700						* wait for NMI
-0000014c 60fa           	bra     unhandled_exception			* shouldn't get here
-                        
-                        nmi:
-                        * perform a soft reset
-0000014e 46fc 2700      	move    #$2700, SR					* set status register
-00000152 2e7a feac      	move.l  (vector_table,PC), a7		* reset stack pointer
-00000156 4e70           	reset								* reset peripherals
-00000158 4efa feaa      	jmp     (vector_table+4,PC)			* reset program counter
-                        
-                        END
-
-
-
-Compiling the example host environment:
---------------------------------------
-
-I've only put in an os-dependant portion for dos/windows, so you'll either
-have to compile for that system or make your own osd code based on osd_dos.c
-and modify the makefile accordingly.
-
-I compiled this example using the compiler from mingw (www.mingw.org) but you
-could also use djgpp (www.delorie.com).
-
-- Copy the m68k files to a directory.  Then extract the files from example.zip to
-  the same directory, overwriting m68kconf.h.  program.bin is the actual 68000
-  program you will be running.
-- Make your own osd_get_key() in the same fashion as in osd_dos.c if you're not
-  compiling for dos/windows.
-- Type make
-- Perform the necessary animal sacrifices.
-- Type sim program.bin
-
-
-Keys:
-	ESC           - quits the simulator
-	~             - generates an NMI interrupt
-	Any other key - Genearate input for the input device
-
-
-Note: I've cheated a bit in the emulation.  There is no speed control
-      to set the speed the CPU runs at; it simply runs as fast as your
-      processor can run it.
-      To add speed control, you will need a high-precision timestamp
-      function (like the RDTSC instruction for newer Pentium CPUs)
-      and a bit of arithmetic to make the cycles argument for m68k_execute().
-      I'll leave that as an excercise to the reader.
+EXAMPLE:
+-------
+As an example, I'll build an imaginary hardware platform.
+
+
+The system is fairly simple, comprising a 68000, an input device, an output
+device, a non-maskable-interrupt device, and an interrupt controller.
+
+
+The input device receives input from the user and asserts its interrupt
+request line until its value is read.  Reading from the input device's
+memory-mapped port will both clear its interrupt request and read an ASCII
+representation (8 bits) of what the user entered.
+
+The output device reads value when it is selected through its memory-mapped
+port and outputs it to a display.  The value it reads will be interpreted as
+an ASCII value and output to the display. The output device is fairly slow
+(it can only process 1 byte per second), and so it asserts its interrupt
+request line when it is ready to receive a byte.  Writing to the output device
+sends a byte to it.  If the output device is not ready, the write is ignored.
+Reading from the output device returns 0 and clears its interrupt request line
+until another byte is written to it and 1 second elapses.
+
+The non-maskable-interrupt (NMI) device, as can be surmised from the name,
+generates a non-maskable-interrupt.  This is connected to some kind of external
+switch that the user can push to generate a NMI.
+
+Since there are 3 devices interrupting the CPU, an interrupt controller is
+needed.  The interrupt controller takes 7 inputs and encodes the highest
+priority asserted line on the 3 output pins.  the input device is wired to IN2
+and the output device is wired to IN1 on the controller.  The NMI device is
+wired to IN7 and all the other inputs are wired low.
+
+The bus is also connected to a 1K ROM and a 256 byte RAM.
+Beware: This platform places ROM and RAM in the same address range and uses
+        the FC pins to select the correct address space!
+        (You didn't expect me to make it easy, did you? =)
+
+
+
+Here is the schematic in all its ASCII splendour:
+-------------------------------------------------
+
+     NMI     TIED
+    SWITCH   LOW
+      |       |
+      | +-+-+-+
+      | | | | | +------------------------------------------------+
+      | | | | | | +------------------------------------+         |
+      | | | | | | |                                    |         |
+     +-------------+                                   |         |
+     |7 6 5 4 3 2 1|                                   |         |
+     |             |                                   |         |
+     | INT CONTRLR |                                   |         |
+     |             |                                   |         |
+     |i i i        |                                   |         |
+     |2 1 0        |                                   |         |
+     +-------------+                                   |         |
+      | | |                                            |         |
+      | | |     +--------------------------------+--+  |         |
+      o o o     |                                |  |  |         |
+    +--------------+  +-------+  +----------+  +---------+  +----------+
+    | I I I     a  |  |       |  |          |  | r  a  i |  |    i     |
+    | 2 1 0    23  |  |       |  |          |  | e  c    |  |          |
+    |              |  |       |  |          |  | a  k    |  |          |
+    |              |  |       |  |          |  | d       |  |          |
+    |              |  |       |  |          |  |         |  |          |
+    |    M68000    |  |  ROM  |  |   RAM    |  |   IN    |  |   OUT    |
+    |              |  |       |  |          |  |         |  |          |
+    |            a9|--|a9     |--|          |--|         |--|          |
+    |            a8|--|a8     |--|          |--|         |--|          |
+    |            a7|--|a7     |--|a7        |--|         |--|          |
+    |            a6|--|a6     |--|a6        |--|         |--|          |
+    |            a5|--|a5     |--|a5        |--|         |--|          |
+    |            a4|--|a4     |--|a4        |--|         |--|          |
+    |            a3|--|a3     |--|a3        |--|         |--|          |
+    |            a2|--|a2     |--|a2        |--|         |--|          |
+    |            a1|--|a1     |--|a1        |--|         |--|          |
+    |            a0|--|a0     |--|a0        |--|         |--|          |
+    |              |  |       |  |          |  |         |  |          |
+    |           d15|--|d15    |--|d15       |--|         |--|          |
+    |           d14|--|d14    |--|d14       |--|         |--|          |
+    |           d13|--|d13    |--|d13       |--|         |--|          |
+    |           d12|--|d12    |--|d12       |--|         |--|          |
+    |           d11|--|d11    |--|d11       |--|         |--|          |
+    |           d10|--|d10    |--|d10       |--|         |--|          |
+    |            d9|--|d9     |--|d9        |--|         |--|          |
+    |            d8|--|d8     |--|d8        |--|         |--|          |
+    |            d7|--|d7     |--|d7        |--|d7       |--|d7        |
+    |            d6|--|d6     |--|d6        |--|d6       |--|d6        |
+    |            d5|--|d5     |--|d5        |--|d5       |--|d5        |
+    |            d4|--|d4     |--|d4        |--|d4       |--|d4        |
+    |            d3|--|d3     |--|d3        |--|d3       |--|d3        |
+    |            d2|--|d2     |--|d2        |--|d2       |--|d2        |
+    |            d1|--|d1     |--|d1        |--|d1       |--|d1  w     |
+    |            d0|--|d0     |--|d0        |--|d0       |--|d0  r     |
+    |              |  |       |  |          |  |         |  |    i   a |
+    | a      F F F |  |       |  |          |  |         |  |    t   c |
+    |22  rW  2 1 0 |  |  cs   |  | cs   rW  |  |         |  |    e   k |
+    +--------------+  +-------+  +----------+  +---------+  +----------+
+      |   |  | | |        |         |    |                       |   |
+      |   |  | | |        |         |    |                       |   |
+      |   |  | | |    +-------+  +-----+ |                     +---+ |
+      |   |  | | |    |  IC1  |  | IC2 | |                     |AND| |
+      |   |  | | |    |a b c d|  |a b c| |                     +---+ |
+      |   |  | | |    +-------+  +-----+ |                      | |  |
+      |   |  | | |     | | | |    | | |  |                      | +--+
+      |   |  | | |     | | | |    | | |  |                      | |
+      |   |  | | |     | | | |    | | |  |                      | |
+      |   |  | | |     | | | |    | | |  |                      | |
+      |   |  | | +-----)-)-+-)----)-)-+  |                      | |
+      |   |  | +-------)-+---)----)-+    |                      | |
+      |   |  +---------+-----)----+      |                      | |
+      |   |                  |           |                      | |
+      |   +------------------+-----------+----------------------+ |
+      |                                                           |
+      +-----------------------------------------------------------+
+
+IC1: output=1 if a=0 and b=1 and c=0 and d=0
+IC2: output=1 if a=0 and b=0 and c=1
+
+
+
+Here is the listing for program.bin:
+-----------------------------------
+
+                        INPUT_ADDRESS   equ $800000
+                        OUTPUT_ADDRESS  equ $400000
+                        CIRCULAR_BUFFER equ $c0
+                        CAN_OUTPUT      equ $d0
+                        STACK_AREA      equ $100
+                        
+                        vector_table:
+00000000 0000 0100      	dc.l STACK_AREA				*  0: SP
+00000004 0000 00c0      	dc.l init					*  1: PC
+00000008 0000 0148      	dc.l unhandled_exception	*  2: bus error
+0000000c 0000 0148      	dc.l unhandled_exception	*  3: address error
+00000010 0000 0148      	dc.l unhandled_exception	*  4: illegal instruction
+00000014 0000 0148      	dc.l unhandled_exception	*  5: zero divide
+00000018 0000 0148      	dc.l unhandled_exception	*  6: chk
+0000001c 0000 0148      	dc.l unhandled_exception	*  7: trapv
+00000020 0000 0148      	dc.l unhandled_exception	*  8: privilege violation
+00000024 0000 0148      	dc.l unhandled_exception	*  9: trace
+00000028 0000 0148      	dc.l unhandled_exception	* 10: 1010
+0000002c 0000 0148      	dc.l unhandled_exception	* 11: 1111
+00000030 0000 0148      	dc.l unhandled_exception	* 12: -
+00000034 0000 0148      	dc.l unhandled_exception	* 13: -
+00000038 0000 0148      	dc.l unhandled_exception	* 14: -
+0000003c 0000 0148      	dc.l unhandled_exception	* 15: uninitialized interrupt
+00000040 0000 0148      	dc.l unhandled_exception	* 16: -
+00000044 0000 0148      	dc.l unhandled_exception	* 17: -
+00000048 0000 0148      	dc.l unhandled_exception	* 18: -
+0000004c 0000 0148      	dc.l unhandled_exception	* 19: -
+00000050 0000 0148      	dc.l unhandled_exception	* 20: -
+00000054 0000 0148      	dc.l unhandled_exception	* 21: -
+00000058 0000 0148      	dc.l unhandled_exception	* 22: -
+0000005c 0000 0148      	dc.l unhandled_exception	* 23: -
+00000060 0000 0148      	dc.l unhandled_exception	* 24: spurious interrupt
+00000064 0000 0136      	dc.l output_ready			* 25: l1 irq
+00000068 0000 010e      	dc.l input_ready			* 26: l2 irq
+0000006c 0000 0148      	dc.l unhandled_exception	* 27: l3 irq
+00000070 0000 0148      	dc.l unhandled_exception	* 28: l4 irq
+00000074 0000 0148      	dc.l unhandled_exception	* 29: l5 irq
+00000078 0000 0148      	dc.l unhandled_exception	* 30: l6 irq
+0000007c 0000 014e      	dc.l nmi					* 31: l7 irq
+00000080 0000 0148      	dc.l unhandled_exception	* 32: trap 0
+00000084 0000 0148      	dc.l unhandled_exception	* 33: trap 1
+00000088 0000 0148      	dc.l unhandled_exception	* 34: trap 2
+0000008c 0000 0148      	dc.l unhandled_exception	* 35: trap 3
+00000090 0000 0148      	dc.l unhandled_exception	* 36: trap 4
+00000094 0000 0148      	dc.l unhandled_exception	* 37: trap 5
+00000098 0000 0148      	dc.l unhandled_exception	* 38: trap 6
+0000009c 0000 0148      	dc.l unhandled_exception	* 39: trap 7
+000000a0 0000 0148      	dc.l unhandled_exception	* 40: trap 8
+000000a4 0000 0148      	dc.l unhandled_exception	* 41: trap 9
+000000a8 0000 0148      	dc.l unhandled_exception	* 42: trap 10
+000000ac 0000 0148      	dc.l unhandled_exception	* 43: trap 11
+000000b0 0000 0148      	dc.l unhandled_exception	* 44: trap 12
+000000b4 0000 0148      	dc.l unhandled_exception	* 45: trap 13
+000000b8 0000 0148      	dc.l unhandled_exception	* 46: trap 14
+000000bc 0000 0148      	dc.l unhandled_exception	* 47: trap 15
+                        * This is the end of the useful part of the table.
+                        * We will now do the Capcom thing and put code starting at $c0.
+                        
+                        init:
+                        * Copy the exception vector table to RAM.
+000000c0 227c 0000 0000 	move.l  #0, a1						* a1 is RAM index
+000000c6 303c 002f      	move.w  #47, d0						* d0 is counter (48 vectors)
+000000ca 41fa 0006      	lea.l   (copy_table,PC), a0			* a0 is scratch
+000000ce 2208           	move.l  a0, d1						* d1 is ROM index
+000000d0 4481           	neg.l   d1
+                        copy_table:
+000000d2 22fb 18fe      	dc.l    $22fb18fe					* stoopid as68k generates 020 code here
+                        *	move.l  (copy_table,PC,d1.l), (a1)+
+000000d6 5841           	addq    #4, d1
+000000d8 51c8 fff8      	dbf     d0, copy_table
+                        
+                        main_init:
+                        * Initialize main program
+000000dc 11fc 0000 00d0 	move.b  #0, CAN_OUTPUT
+000000e2 4df8 00c0      	lea.l   CIRCULAR_BUFFER, a6
+000000e6 7c00           	moveq   #0, d6						* output buffer ptr
+000000e8 7e00           	moveq   #0, d7						* input buffer ptr
+000000ea 027c f8ff      	andi    #$f8ff, SR					* clear interrupt mask
+                        main:
+                        * Main program
+000000ee 4a38 00d0      	tst.b   CAN_OUTPUT					* can we output?
+000000f2 67fa           	beq     main
+000000f4 be06           	cmp.b   d6, d7						* is there data?
+000000f6 67f6           	beq     main
+000000f8 11fc 0000 00d0 	move.b  #0, CAN_OUTPUT
+000000fe 13f6 6000 0040 	move.b  (0,a6,d6.w), OUTPUT_ADDRESS	* write data
+         0000
+00000106 5246           	addq    #1, d6
+00000108 0206 000f      	andi.b  #15, d6						* update circular buffer
+0000010c 60e0           	bra     main
+                        
+                        
+                        input_ready:
+0000010e 2f00           	move.l  d0, -(a7)
+00000110 2f01           	move.l  d1, -(a7)
+00000112 1239 0080 0000 	move.b  INPUT_ADDRESS, d1			* read data
+00000118 1007           	move.b  d7, d0						* check if buffer full
+0000011a 5240           	addq    #1, d0
+0000011c 0200 000f      	andi.b  #15, d0
+00000120 bc00           	cmp.b   d0, d6
+00000122 6700 000c      	beq     input_ready_quit			* throw away if full
+00000126 1d81 7000      	move.b  d1, (0,a6,d7.w)				* store the data
+0000012a 5247           	addq    #1, d7
+0000012c 0207 000f      	andi.b  #15, d7						* update circular buffer
+                        input_ready_quit:
+00000130 221f           	move.l  (a7)+, d1
+00000132 201f           	move.l  (a7)+, d0
+00000134 4e73           	rte
+                        
+                        output_ready:
+00000136 2f00           	move.l  d0, -(a7)
+00000138 11fc 0001 00d0 	move.b  #1, CAN_OUTPUT
+0000013e 1039 0040 0000 	move.b  OUTPUT_ADDRESS, d0			* acknowledge the interrupt
+00000144 201f           	move.l  (a7)+, d0
+00000146 4e73           	rte
+                        
+                        unhandled_exception:
+00000148 4e72 2700      	stop	#$2700						* wait for NMI
+0000014c 60fa           	bra     unhandled_exception			* shouldn't get here
+                        
+                        nmi:
+                        * perform a soft reset
+0000014e 46fc 2700      	move    #$2700, SR					* set status register
+00000152 2e7a feac      	move.l  (vector_table,PC), a7		* reset stack pointer
+00000156 4e70           	reset								* reset peripherals
+00000158 4efa feaa      	jmp     (vector_table+4,PC)			* reset program counter
+                        
+                        END
+
+
+
+Compiling the example host environment:
+--------------------------------------
+
+I've only put in an os-dependant portion for dos/windows, so you'll either
+have to compile for that system or make your own osd code based on osd_dos.c
+and modify the makefile accordingly.
+
+I compiled this example using the compiler from mingw (www.mingw.org) but you
+could also use djgpp (www.delorie.com).
+
+- Copy the m68k files to a directory.  Then extract the files from example.zip to
+  the same directory, overwriting m68kconf.h.  program.bin is the actual 68000
+  program you will be running.
+- Make your own osd_get_key() in the same fashion as in osd_dos.c if you're not
+  compiling for dos/windows.
+- Type make
+- Perform the necessary animal sacrifices.
+- Type sim program.bin
+
+
+Keys:
+	ESC           - quits the simulator
+	~             - generates an NMI interrupt
+	Any other key - Genearate input for the input device
+
+
+Note: I've cheated a bit in the emulation.  There is no speed control
+      to set the speed the CPU runs at; it simply runs as fast as your
+      processor can run it.
+      To add speed control, you will need a high-precision timestamp
+      function (like the RDTSC instruction for newer Pentium CPUs)
+      and a bit of arithmetic to make the cycles argument for m68k_execute().
+      I'll leave that as an excercise to the reader.
diff --git a/plat/linux68k/emu/musashi/example/m68k.h b/plat/linux68k/emu/musashi/example/m68k.h
new file mode 120000
index 000000000..1ca6bd3d8
--- /dev/null
+++ b/plat/linux68k/emu/musashi/example/m68k.h
@@ -0,0 +1 @@
+../m68k.h
\ No newline at end of file
diff --git a/plat/linux68k/emu/musashi/example/m68k_in.c b/plat/linux68k/emu/musashi/example/m68k_in.c
new file mode 120000
index 000000000..faeffd9a1
--- /dev/null
+++ b/plat/linux68k/emu/musashi/example/m68k_in.c
@@ -0,0 +1 @@
+../m68k_in.c
\ No newline at end of file
diff --git a/plat/linux68k/emu/musashi/example/m68kconf.h b/plat/linux68k/emu/musashi/example/m68kconf.h
index 05cc6ade4..248979218 100755
--- a/plat/linux68k/emu/musashi/example/m68kconf.h
+++ b/plat/linux68k/emu/musashi/example/m68kconf.h
@@ -1,192 +1,220 @@
-/* ======================================================================== */
-/* ========================= LICENSING & COPYRIGHT ======================== */
-/* ======================================================================== */
-/*
- *                                  MUSASHI
- *                                Version 3.4
- *
- * A portable Motorola M680x0 processor emulation engine.
- * Copyright 1998-2001 Karl Stenerud.  All rights reserved.
- *
- * This code may be freely used for non-commercial purposes as long as this
- * copyright notice remains unaltered in the source code and any binary files
- * containing this code in compiled form.
- *
- * All other lisencing terms must be negotiated with the author
- * (Karl Stenerud).
- *
- * The latest version of this code can be obtained at:
- * http://kstenerud.cjb.net
- */
-
-
-
-#ifndef M68KCONF__HEADER
-#define M68KCONF__HEADER
-
-
-/* Configuration switches.
- * Use OPT_SPECIFY_HANDLER for configuration options that allow callbacks.
- * OPT_SPECIFY_HANDLER causes the core to link directly to the function
- * or macro you specify, rather than using callback functions whose pointer
- * must be passed in using m68k_set_xxx_callback().
- */
-#define OPT_OFF             0
-#define OPT_ON              1
-#define OPT_SPECIFY_HANDLER 2
-
-
-/* ======================================================================== */
-/* ============================== MAME STUFF ============================== */
-/* ======================================================================== */
-
-/* If you're compiling this for MAME, only change M68K_COMPILE_FOR_MAME
- * to OPT_ON and use m68kmame.h to configure the 68k core.
- */
-#ifndef M68K_COMPILE_FOR_MAME
-#define M68K_COMPILE_FOR_MAME      OPT_OFF
-#endif /* M68K_COMPILE_FOR_MAME */
-
-
-#if M68K_COMPILE_FOR_MAME == OPT_OFF
-
-
-/* ======================================================================== */
-/* ============================= CONFIGURATION ============================ */
-/* ======================================================================== */
-
-/* Turn ON if you want to use the following M68K variants */
-#define M68K_EMULATE_010            OPT_ON
-#define M68K_EMULATE_EC020          OPT_ON
-#define M68K_EMULATE_020            OPT_ON
-
-
-/* If ON, the CPU will call m68k_read_immediate_xx() for immediate addressing
- * and m68k_read_pcrelative_xx() for PC-relative addressing.
- * If off, all read requests from the CPU will be redirected to m68k_read_xx()
- */
-#define M68K_SEPARATE_READS         OPT_OFF
-
-/* If ON, the CPU will call m68k_write_32_pd() when it executes move.l with a
- * predecrement destination EA mode instead of m68k_write_32().
- * To simulate real 68k behavior, m68k_write_32_pd() must first write the high
- * word to [address+2], and then write the low word to [address].
- */
-#define M68K_SIMULATE_PD_WRITES     OPT_OFF
-
-/* If ON, CPU will call the interrupt acknowledge callback when it services an
- * interrupt.
- * If off, all interrupts will be autovectored and all interrupt requests will
- * auto-clear when the interrupt is serviced.
- */
-#define M68K_EMULATE_INT_ACK        OPT_SPECIFY_HANDLER
-#define M68K_INT_ACK_CALLBACK(A)    cpu_irq_ack(A)
-
-
-/* If ON, CPU will call the breakpoint acknowledge callback when it encounters
- * a breakpoint instruction and it is running a 68010+.
- */
-#define M68K_EMULATE_BKPT_ACK       OPT_OFF
-#define M68K_BKPT_ACK_CALLBACK()    your_bkpt_ack_handler_function()
-
-
-/* If ON, the CPU will monitor the trace flags and take trace exceptions
- */
-#define M68K_EMULATE_TRACE          OPT_OFF
-
-
-/* If ON, CPU will call the output reset callback when it encounters a reset
- * instruction.
- */
-#define M68K_EMULATE_RESET          OPT_SPECIFY_HANDLER
-#define M68K_RESET_CALLBACK()       cpu_pulse_reset()
-
-
-/* If ON, CPU will call the set fc callback on every memory access to
- * differentiate between user/supervisor, program/data access like a real
- * 68000 would.  This should be enabled and the callback should be set if you
- * want to properly emulate the m68010 or higher. (moves uses function codes
- * to read/write data from different address spaces)
- */
-#define M68K_EMULATE_FC             OPT_SPECIFY_HANDLER
-#define M68K_SET_FC_CALLBACK(A)     cpu_set_fc(A)
-
-
-/* If ON, CPU will call the pc changed callback when it changes the PC by a
- * large value.  This allows host programs to be nicer when it comes to
- * fetching immediate data and instructions on a banked memory system.
- */
-#define M68K_MONITOR_PC             OPT_OFF
-#define M68K_SET_PC_CALLBACK(A)     your_pc_changed_handler_function(A)
-
-
-/* If ON, CPU will call the instruction hook callback before every
- * instruction.
- */
-#define M68K_INSTRUCTION_HOOK       OPT_SPECIFY_HANDLER
-#define M68K_INSTRUCTION_CALLBACK() cpu_instr_callback()
-
-
-/* If ON, the CPU will emulate the 4-byte prefetch queue of a real 68000 */
-#define M68K_EMULATE_PREFETCH       OPT_ON
-
-
-/* If ON, the CPU will generate address error exceptions if it tries to
- * access a word or longword at an odd address.
- * NOTE: This is only emulated properly for 68000 mode.
- */
-#define M68K_EMULATE_ADDRESS_ERROR  OPT_ON
-
-
-/* Turn ON to enable logging of illegal instruction calls.
- * M68K_LOG_FILEHANDLE must be #defined to a stdio file stream.
- * Turn on M68K_LOG_1010_1111 to log all 1010 and 1111 calls.
- */
-#define M68K_LOG_ENABLE             OPT_OFF
-#define M68K_LOG_1010_1111          OPT_OFF
-#define M68K_LOG_FILEHANDLE         some_file_handle
-
-
-/* ----------------------------- COMPATIBILITY ---------------------------- */
-
-/* The following options set optimizations that violate the current ANSI
- * standard, but will be compliant under the forthcoming C9X standard.
- */
-
-
-/* If ON, the enulation core will use 64-bit integers to speed up some
- * operations.
-*/
-#define M68K_USE_64_BIT  OPT_OFF
-
-
-/* Set to your compiler's static inline keyword to enable it, or
- * set it to blank to disable it.
- * If you define INLINE in the makefile, it will override this value.
- * NOTE: not enabling inline functions will SEVERELY slow down emulation.
- */
-#ifndef INLINE
-#define INLINE static __inline__
-#endif /* INLINE */
-
-#endif /* M68K_COMPILE_FOR_MAME */
-
-#include "sim.h"
-
-#define m68k_read_memory_8(A) cpu_read_byte(A)
-#define m68k_read_memory_16(A) cpu_read_word(A)
-#define m68k_read_memory_32(A) cpu_read_long(A)
-
-#define m68k_read_disassembler_16(A) cpu_read_word_dasm(A)
-#define m68k_read_disassembler_32(A) cpu_read_long_dasm(A)
-
-#define m68k_write_memory_8(A, V) cpu_write_byte(A, V)
-#define m68k_write_memory_16(A, V) cpu_write_word(A, V)
-#define m68k_write_memory_32(A, V) cpu_write_long(A, V)
-
-
-/* ======================================================================== */
-/* ============================== END OF FILE ============================= */
-/* ======================================================================== */
-
-#endif /* M68KCONF__HEADER */
+/* ======================================================================== */
+/* ========================= LICENSING & COPYRIGHT ======================== */
+/* ======================================================================== */
+/*
+ *                                  MUSASHI
+ *                                Version 3.32
+ *
+ * A portable Motorola M680x0 processor emulation engine.
+ * Copyright Karl Stenerud.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+
+
+#ifndef M68KCONF__HEADER
+#define M68KCONF__HEADER
+
+
+/* Configuration switches.
+ * Use OPT_SPECIFY_HANDLER for configuration options that allow callbacks.
+ * OPT_SPECIFY_HANDLER causes the core to link directly to the function
+ * or macro you specify, rather than using callback functions whose pointer
+ * must be passed in using m68k_set_xxx_callback().
+ */
+#define OPT_OFF             0
+#define OPT_ON              1
+#define OPT_SPECIFY_HANDLER 2
+
+
+/* ======================================================================== */
+/* ============================== MAME STUFF ============================== */
+/* ======================================================================== */
+
+/* If you're compiling this for MAME, only change M68K_COMPILE_FOR_MAME
+ * to OPT_ON and use m68kmame.h to configure the 68k core.
+ */
+#ifndef M68K_COMPILE_FOR_MAME
+#define M68K_COMPILE_FOR_MAME      OPT_OFF
+#endif /* M68K_COMPILE_FOR_MAME */
+
+
+#if M68K_COMPILE_FOR_MAME == OPT_OFF
+
+
+/* ======================================================================== */
+/* ============================= CONFIGURATION ============================ */
+/* ======================================================================== */
+
+/* Turn ON if you want to use the following M68K variants */
+#define M68K_EMULATE_010            OPT_ON
+#define M68K_EMULATE_EC020          OPT_ON
+#define M68K_EMULATE_020            OPT_ON
+#define M68K_EMULATE_040            OPT_ON
+
+
+/* If ON, the CPU will call m68k_read_immediate_xx() for immediate addressing
+ * and m68k_read_pcrelative_xx() for PC-relative addressing.
+ * If off, all read requests from the CPU will be redirected to m68k_read_xx()
+ */
+#define M68K_SEPARATE_READS         OPT_OFF
+
+/* If ON, the CPU will call m68k_write_32_pd() when it executes move.l with a
+ * predecrement destination EA mode instead of m68k_write_32().
+ * To simulate real 68k behavior, m68k_write_32_pd() must first write the high
+ * word to [address+2], and then write the low word to [address].
+ */
+#define M68K_SIMULATE_PD_WRITES     OPT_OFF
+
+/* If ON, CPU will call the interrupt acknowledge callback when it services an
+ * interrupt.
+ * If off, all interrupts will be autovectored and all interrupt requests will
+ * auto-clear when the interrupt is serviced.
+ */
+#define M68K_EMULATE_INT_ACK        OPT_SPECIFY_HANDLER
+#define M68K_INT_ACK_CALLBACK(A)    cpu_irq_ack(A)
+
+
+/* If ON, CPU will call the breakpoint acknowledge callback when it encounters
+ * a breakpoint instruction and it is running a 68010+.
+ */
+#define M68K_EMULATE_BKPT_ACK       OPT_OFF
+#define M68K_BKPT_ACK_CALLBACK()    your_bkpt_ack_handler_function()
+
+
+/* If ON, the CPU will monitor the trace flags and take trace exceptions
+ */
+#define M68K_EMULATE_TRACE          OPT_OFF
+
+
+/* If ON, CPU will call the output reset callback when it encounters a reset
+ * instruction.
+ */
+#define M68K_EMULATE_RESET          OPT_SPECIFY_HANDLER
+#define M68K_RESET_CALLBACK()       cpu_pulse_reset()
+
+/* If ON, CPU will call the callback when it encounters a cmpi.l #v, dn
+ * instruction.
+ */
+#define M68K_CMPILD_HAS_CALLBACK     OPT_OFF
+#define M68K_CMPILD_CALLBACK(v,r)    your_cmpild_handler_function(v,r)
+
+
+/* If ON, CPU will call the callback when it encounters a rte
+ * instruction.
+ */
+#define M68K_RTE_HAS_CALLBACK       OPT_OFF
+#define M68K_RTE_CALLBACK()         your_rte_handler_function()
+
+/* If ON, CPU will call the callback when it encounters a tas
+ * instruction.
+ */
+#define M68K_TAS_HAS_CALLBACK       OPT_OFF
+#define M68K_TAS_CALLBACK()         your_tas_handler_function()
+
+/* If ON, CPU will call the callback when it encounters an illegal instruction
+ * passing the opcode as argument. If the callback returns 1, then it's considered
+ * as a normal instruction, and the illegal exception in canceled. If it returns 0,
+ * the exception occurs normally.
+ * The callback looks like int callback(int opcode)
+ * You should put OPT_SPECIFY_HANDLER here if you cant to use it, otherwise it will
+ * use a dummy default handler and you'll have to call m68k_set_illg_instr_callback explicitely
+ */
+#define M68K_ILLG_HAS_CALLBACK	    OPT_OFF
+#define M68K_ILLG_CALLBACK(opcode)  op_illg(opcode)
+
+/* If ON, CPU will call the set fc callback on every memory access to
+ * differentiate between user/supervisor, program/data access like a real
+ * 68000 would.  This should be enabled and the callback should be set if you
+ * want to properly emulate the m68010 or higher. (moves uses function codes
+ * to read/write data from different address spaces)
+ */
+#define M68K_EMULATE_FC             OPT_SPECIFY_HANDLER
+#define M68K_SET_FC_CALLBACK(A)     cpu_set_fc(A)
+
+/* If ON, CPU will call the pc changed callback when it changes the PC by a
+ * large value.  This allows host programs to be nicer when it comes to
+ * fetching immediate data and instructions on a banked memory system.
+ */
+#define M68K_MONITOR_PC             OPT_OFF
+#define M68K_SET_PC_CALLBACK(A)     your_pc_changed_handler_function(A)
+
+
+/* If ON, CPU will call the instruction hook callback before every
+ * instruction.
+ */
+#define M68K_INSTRUCTION_HOOK       OPT_SPECIFY_HANDLER
+#define M68K_INSTRUCTION_CALLBACK(pc) cpu_instr_callback(pc)
+
+
+/* If ON, the CPU will emulate the 4-byte prefetch queue of a real 68000 */
+#define M68K_EMULATE_PREFETCH       OPT_ON
+
+
+/* If ON, the CPU will generate address error exceptions if it tries to
+ * access a word or longword at an odd address.
+ * NOTE: This is only emulated properly for 68000 mode.
+ */
+#define M68K_EMULATE_ADDRESS_ERROR  OPT_ON
+
+
+/* Turn ON to enable logging of illegal instruction calls.
+ * M68K_LOG_FILEHANDLE must be #defined to a stdio file stream.
+ * Turn on M68K_LOG_1010_1111 to log all 1010 and 1111 calls.
+ */
+#define M68K_LOG_ENABLE             OPT_OFF
+#define M68K_LOG_1010_1111          OPT_OFF
+#define M68K_LOG_FILEHANDLE         some_file_handle
+
+
+/* ----------------------------- COMPATIBILITY ---------------------------- */
+
+/* The following options set optimizations that violate the current ANSI
+ * standard, but will be compliant under the forthcoming C9X standard.
+ */
+
+
+/* If ON, the enulation core will use 64-bit integers to speed up some
+ * operations.
+*/
+#define M68K_USE_64_BIT  OPT_ON
+
+
+#include "sim.h"
+
+#define m68k_read_memory_8(A) cpu_read_byte(A)
+#define m68k_read_memory_16(A) cpu_read_word(A)
+#define m68k_read_memory_32(A) cpu_read_long(A)
+
+#define m68k_read_disassembler_16(A) cpu_read_word_dasm(A)
+#define m68k_read_disassembler_32(A) cpu_read_long_dasm(A)
+
+#define m68k_write_memory_8(A, V) cpu_write_byte(A, V)
+#define m68k_write_memory_16(A, V) cpu_write_word(A, V)
+#define m68k_write_memory_32(A, V) cpu_write_long(A, V)
+
+
+#endif /* M68K_COMPILE_FOR_MAME */
+
+/* ======================================================================== */
+/* ============================== END OF FILE ============================= */
+/* ======================================================================== */
+
+#endif /* M68KCONF__HEADER */
diff --git a/plat/linux68k/emu/musashi/example/m68kcpu.c b/plat/linux68k/emu/musashi/example/m68kcpu.c
new file mode 120000
index 000000000..b07102e80
--- /dev/null
+++ b/plat/linux68k/emu/musashi/example/m68kcpu.c
@@ -0,0 +1 @@
+../m68kcpu.c
\ No newline at end of file
diff --git a/plat/linux68k/emu/musashi/example/m68kcpu.h b/plat/linux68k/emu/musashi/example/m68kcpu.h
new file mode 120000
index 000000000..bfe2ed2e4
--- /dev/null
+++ b/plat/linux68k/emu/musashi/example/m68kcpu.h
@@ -0,0 +1 @@
+../m68kcpu.h
\ No newline at end of file
diff --git a/plat/linux68k/emu/musashi/example/m68kdasm.c b/plat/linux68k/emu/musashi/example/m68kdasm.c
new file mode 120000
index 000000000..c17dfc31c
--- /dev/null
+++ b/plat/linux68k/emu/musashi/example/m68kdasm.c
@@ -0,0 +1 @@
+../m68kdasm.c
\ No newline at end of file
diff --git a/plat/linux68k/emu/musashi/example/m68kfpu.c b/plat/linux68k/emu/musashi/example/m68kfpu.c
new file mode 120000
index 000000000..ee8a01028
--- /dev/null
+++ b/plat/linux68k/emu/musashi/example/m68kfpu.c
@@ -0,0 +1 @@
+../m68kfpu.c
\ No newline at end of file
diff --git a/plat/linux68k/emu/musashi/example/m68kmake.c b/plat/linux68k/emu/musashi/example/m68kmake.c
new file mode 120000
index 000000000..81816fbc0
--- /dev/null
+++ b/plat/linux68k/emu/musashi/example/m68kmake.c
@@ -0,0 +1 @@
+../m68kmake.c
\ No newline at end of file
diff --git a/plat/linux68k/emu/musashi/example/m68kmmu.h b/plat/linux68k/emu/musashi/example/m68kmmu.h
new file mode 120000
index 000000000..032b33f31
--- /dev/null
+++ b/plat/linux68k/emu/musashi/example/m68kmmu.h
@@ -0,0 +1 @@
+../m68kmmu.h
\ No newline at end of file
diff --git a/plat/linux68k/emu/musashi/example/osd.h b/plat/linux68k/emu/musashi/example/osd.h
index 5825095a3..1ac9e0bd6 100755
--- a/plat/linux68k/emu/musashi/example/osd.h
+++ b/plat/linux68k/emu/musashi/example/osd.h
@@ -1,6 +1,6 @@
-#ifndef HEADER__OSD
-#define HEADER__OSD
-
-int osd_get_char(void);
-
-#endif /* HEADER__OSD */
+#ifndef HEADER__OSD
+#define HEADER__OSD
+
+int osd_get_char(void);
+
+#endif /* HEADER__OSD */
diff --git a/plat/linux68k/emu/musashi/example/osd_dos.c b/plat/linux68k/emu/musashi/example/osd_dos.c
index 38ab69791..46fe3a69c 100755
--- a/plat/linux68k/emu/musashi/example/osd_dos.c
+++ b/plat/linux68k/emu/musashi/example/osd_dos.c
@@ -1,16 +1,16 @@
-#include "osd.h"
-
-/* OS-dependant code to get a character from the user.
- * This function must not block, and must either return an ASCII code or -1.
- */
-#include <conio.h>
-int osd_get_char(void)
-{
-	int ch = -1;
-	if(kbhit())
-	{
-		while(kbhit())
-			ch = getch();
-	}
-	return ch;
-}
+#include "osd.h"
+
+/* OS-dependant code to get a character from the user.
+ * This function must not block, and must either return an ASCII code or -1.
+ */
+#include <conio.h>
+int osd_get_char(void)
+{
+	int ch = -1;
+	if(kbhit())
+	{
+		while(kbhit())
+			ch = getch();
+	}
+	return ch;
+}
diff --git a/plat/linux68k/emu/musashi/example/osd_linux.c b/plat/linux68k/emu/musashi/example/osd_linux.c
new file mode 100644
index 000000000..0fbe789dc
--- /dev/null
+++ b/plat/linux68k/emu/musashi/example/osd_linux.c
@@ -0,0 +1,46 @@
+#include <stdio.h>
+#include <termios.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/time.h>
+
+void changemode(int dir)
+{
+  static struct termios oldt, newt;
+
+  if ( dir == 1 )
+  {
+    tcgetattr( STDIN_FILENO, &oldt);
+    newt = oldt;
+    newt.c_lflag &= ~( ICANON | ECHO );
+    tcsetattr( STDIN_FILENO, TCSANOW, &newt);
+  }
+  else
+    tcsetattr( STDIN_FILENO, TCSANOW, &oldt);
+}
+
+int kbhit (void)
+{
+  struct timeval tv;
+  fd_set rdfs;
+
+  tv.tv_sec = 0;
+  tv.tv_usec = 0;
+
+  FD_ZERO(&rdfs);
+  FD_SET (STDIN_FILENO, &rdfs);
+
+  select(STDIN_FILENO+1, &rdfs, NULL, NULL, &tv);
+  return FD_ISSET(STDIN_FILENO, &rdfs);
+
+}
+
+int osd_get_char() {
+    changemode(1);
+    int ch = -1;
+    while(kbhit())
+	ch = getchar();
+    changemode(0);
+    return ch;
+}
+
diff --git a/plat/linux68k/emu/musashi/example/sim.c b/plat/linux68k/emu/musashi/example/sim.c
index 1e5555cd0..173045ef6 100755
--- a/plat/linux68k/emu/musashi/example/sim.c
+++ b/plat/linux68k/emu/musashi/example/sim.c
@@ -1,561 +1,563 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdarg.h>
-#include <time.h>
-#include "sim.h"
-#include "m68k.h"
-#include "osd.h"
-
-void disassemble_program();
-
-/* Memory-mapped IO ports */
-#define INPUT_ADDRESS 0x800000
-#define OUTPUT_ADDRESS 0x400000
-
-/* IRQ connections */
-#define IRQ_NMI_DEVICE 7
-#define IRQ_INPUT_DEVICE 2
-#define IRQ_OUTPUT_DEVICE 1
-
-/* Time between characters sent to output device (seconds) */
-#define OUTPUT_DEVICE_PERIOD 1
-
-/* ROM and RAM sizes */
-#define MAX_ROM 0xfff
-#define MAX_RAM 0xff
-
-
-/* Read/write macros */
-#define READ_BYTE(BASE, ADDR) (BASE)[ADDR]
-#define READ_WORD(BASE, ADDR) (((BASE)[ADDR]<<8) |			\
-							  (BASE)[(ADDR)+1])
-#define READ_LONG(BASE, ADDR) (((BASE)[ADDR]<<24) |			\
-							  ((BASE)[(ADDR)+1]<<16) |		\
-							  ((BASE)[(ADDR)+2]<<8) |		\
-							  (BASE)[(ADDR)+3])
-
-#define WRITE_BYTE(BASE, ADDR, VAL) (BASE)[ADDR] = (VAL)&0xff
-#define WRITE_WORD(BASE, ADDR, VAL) (BASE)[ADDR] = ((VAL)>>8) & 0xff;		\
-									(BASE)[(ADDR)+1] = (VAL)&0xff
-#define WRITE_LONG(BASE, ADDR, VAL) (BASE)[ADDR] = ((VAL)>>24) & 0xff;		\
-									(BASE)[(ADDR)+1] = ((VAL)>>16)&0xff;	\
-									(BASE)[(ADDR)+2] = ((VAL)>>8)&0xff;		\
-									(BASE)[(ADDR)+3] = (VAL)&0xff
-
-
-/* Prototypes */
-void exit_error(char* fmt, ...);
-
-unsigned int cpu_read_byte(unsigned int address);
-unsigned int cpu_read_word(unsigned int address);
-unsigned int cpu_read_long(unsigned int address);
-void cpu_write_byte(unsigned int address, unsigned int value);
-void cpu_write_word(unsigned int address, unsigned int value);
-void cpu_write_long(unsigned int address, unsigned int value);
-void cpu_pulse_reset(void);
-void cpu_set_fc(unsigned int fc);
-int cpu_irq_ack(int level);
-
-void nmi_device_reset(void);
-void nmi_device_update(void);
-int nmi_device_ack(void);
-
-void input_device_reset(void);
-void input_device_update(void);
-int input_device_ack(void);
-unsigned int input_device_read(void);
-void input_device_write(unsigned int value);
-
-void output_device_reset(void);
-void output_device_update(void);
-int output_device_ack(void);
-unsigned int output_device_read(void);
-void output_device_write(unsigned int value);
-
-void int_controller_set(unsigned int value);
-void int_controller_clear(unsigned int value);
-
-void get_user_input(void);
-
-
-/* Data */
-unsigned int g_quit = 0;                        /* 1 if we want to quit */
-unsigned int g_nmi = 0;                         /* 1 if nmi pending */
-
-int          g_input_device_value = -1;         /* Current value in input device */
-
-unsigned int g_output_device_ready = 0;         /* 1 if output device is ready */
-time_t       g_output_device_last_output;       /* Time of last char output */
-
-unsigned int g_int_controller_pending = 0;      /* list of pending interrupts */
-unsigned int g_int_controller_highest_int = 0;  /* Highest pending interrupt */
-
-unsigned char g_rom[MAX_ROM+1];                 /* ROM */
-unsigned char g_ram[MAX_RAM+1];                 /* RAM */
-unsigned int  g_fc;                             /* Current function code from CPU */
-
-
-/* Exit with an error message.  Use printf syntax. */
-void exit_error(char* fmt, ...)
-{
-	static int guard_val = 0;
-	char buff[100];
-	unsigned int pc;
-	va_list args;
-
-	if(guard_val)
-		return;
-	else
-		guard_val = 1;
-
-	va_start(args, fmt);
-	vfprintf(stderr, fmt, args);
-	va_end(args);
-	fprintf(stderr, "\n");
-	pc = m68k_get_reg(NULL, M68K_REG_PPC);
-	m68k_disassemble(buff, pc, M68K_CPU_TYPE_68000);
-	fprintf(stderr, "At %04x: %s\n", pc, buff);
-
-	exit(EXIT_FAILURE);
-}
-
-
-/* Read data from RAM, ROM, or a device */
-unsigned int cpu_read_byte(unsigned int address)
-{
-	if(g_fc & 2)	/* Program */
-	{
-		if(address > MAX_ROM)
-			exit_error("Attempted to read byte from ROM address %08x", address);
-		return READ_BYTE(g_rom, address);
-	}
-
-	/* Otherwise it's data space */
-	switch(address)
-	{
-		case INPUT_ADDRESS:
-			return input_device_read();
-		case OUTPUT_ADDRESS:
-			return output_device_read();
-		default:
-			break;
-	}
-	if(address > MAX_RAM)
-		exit_error("Attempted to read byte from RAM address %08x", address);
-		return READ_BYTE(g_ram, address);
-}
-
-unsigned int cpu_read_word(unsigned int address)
-{
-	if(g_fc & 2)	/* Program */
-	{
-		if(address > MAX_ROM)
-			exit_error("Attempted to read word from ROM address %08x", address);
-		return READ_WORD(g_rom, address);
-	}
-
-	/* Otherwise it's data space */
-	switch(address)
-	{
-		case INPUT_ADDRESS:
-			return input_device_read();
-		case OUTPUT_ADDRESS:
-			return output_device_read();
-		default:
-			break;
-	}
-	if(address > MAX_RAM)
-		exit_error("Attempted to read word from RAM address %08x", address);
-		return READ_WORD(g_ram, address);
-}
-
-unsigned int cpu_read_long(unsigned int address)
-{
-	if(g_fc & 2)	/* Program */
-	{
-		if(address > MAX_ROM)
-			exit_error("Attempted to read long from ROM address %08x", address);
-		return READ_LONG(g_rom, address);
-	}
-
-	/* Otherwise it's data space */
-	switch(address)
-	{
-		case INPUT_ADDRESS:
-			return input_device_read();
-		case OUTPUT_ADDRESS:
-			return output_device_read();
-		default:
-			break;
-	}
-	if(address > MAX_RAM)
-		exit_error("Attempted to read long from RAM address %08x", address);
-		return READ_LONG(g_ram, address);
-}
-
-
-unsigned int cpu_read_word_dasm(unsigned int address)
-{
-	if(address > MAX_ROM)
-		exit_error("Disassembler attempted to read word from ROM address %08x", address);
-	return READ_WORD(g_rom, address);
-}
-
-unsigned int cpu_read_long_dasm(unsigned int address)
-{
-	if(address > MAX_ROM)
-		exit_error("Dasm attempted to read long from ROM address %08x", address);
-	return READ_LONG(g_rom, address);
-}
-
-
-/* Write data to RAM or a device */
-void cpu_write_byte(unsigned int address, unsigned int value)
-{
-	if(g_fc & 2)	/* Program */
-		exit_error("Attempted to write %02x to ROM address %08x", value&0xff, address);
-
-	/* Otherwise it's data space */
-	switch(address)
-	{
-		case INPUT_ADDRESS:
-			input_device_write(value&0xff);
-			return;
-		case OUTPUT_ADDRESS:
-			output_device_write(value&0xff);
-			return;
-		default:
-			break;
-	}
-	if(address > MAX_RAM)
-		exit_error("Attempted to write %02x to RAM address %08x", value&0xff, address);
-	WRITE_BYTE(g_ram, address, value);
-}
-
-void cpu_write_word(unsigned int address, unsigned int value)
-{
-	if(g_fc & 2)	/* Program */
-		exit_error("Attempted to write %04x to ROM address %08x", value&0xffff, address);
-
-	/* Otherwise it's data space */
-	switch(address)
-	{
-		case INPUT_ADDRESS:
-			input_device_write(value&0xffff);
-			return;
-		case OUTPUT_ADDRESS:
-			output_device_write(value&0xffff);
-			return;
-		default:
-			break;
-	}
-	if(address > MAX_RAM)
-		exit_error("Attempted to write %04x to RAM address %08x", value&0xffff, address);
-	WRITE_WORD(g_ram, address, value);
-}
-
-void cpu_write_long(unsigned int address, unsigned int value)
-{
-	if(g_fc & 2)	/* Program */
-		exit_error("Attempted to write %08x to ROM address %08x", value, address);
-
-	/* Otherwise it's data space */
-	switch(address)
-	{
-		case INPUT_ADDRESS:
-			input_device_write(value);
-			return;
-		case OUTPUT_ADDRESS:
-			output_device_write(value);
-			return;
-		default:
-			break;
-	}
-	if(address > MAX_RAM)
-		exit_error("Attempted to write %08x to RAM address %08x", value, address);
-	WRITE_LONG(g_ram, address, value);
-}
-
-/* Called when the CPU pulses the RESET line */
-void cpu_pulse_reset(void)
-{
-	nmi_device_reset();
-	output_device_reset();
-	input_device_reset();
-}
-
-/* Called when the CPU changes the function code pins */
-void cpu_set_fc(unsigned int fc)
-{
-	g_fc = fc;
-}
-
-/* Called when the CPU acknowledges an interrupt */
-int cpu_irq_ack(int level)
-{
-	switch(level)
-	{
-		case IRQ_NMI_DEVICE:
-			return nmi_device_ack();
-		case IRQ_INPUT_DEVICE:
-			return input_device_ack();
-		case IRQ_OUTPUT_DEVICE:
-			return output_device_ack();
-	}
-	return M68K_INT_ACK_SPURIOUS;
-}
-
-
-
-
-/* Implementation for the NMI device */
-void nmi_device_reset(void)
-{
-	g_nmi = 0;
-}
-
-void nmi_device_update(void)
-{
-	if(g_nmi)
-	{
-		g_nmi = 0;
-		int_controller_set(IRQ_NMI_DEVICE);
-	}
-}
-
-int nmi_device_ack(void)
-{
-	printf("\nNMI\n");fflush(stdout);
-	int_controller_clear(IRQ_NMI_DEVICE);
-	return M68K_INT_ACK_AUTOVECTOR;
-}
-
-
-/* Implementation for the input device */
-void input_device_reset(void)
-{
-	g_input_device_value = -1;
-	int_controller_clear(IRQ_INPUT_DEVICE);
-}
-
-void input_device_update(void)
-{
-	if(g_input_device_value >= 0)
-		int_controller_set(IRQ_INPUT_DEVICE);
-}
-
-int input_device_ack(void)
-{
-	return M68K_INT_ACK_AUTOVECTOR;
-}
-
-unsigned int input_device_read(void)
-{
-	int value = g_input_device_value > 0 ? g_input_device_value : 0;
-	int_controller_clear(IRQ_INPUT_DEVICE);
-	g_input_device_value = -1;
-	return value;
-}
-
-void input_device_write(unsigned int value)
-{
-}
-
-
-/* Implementation for the output device */
-void output_device_reset(void)
-{
-	g_output_device_last_output = time(NULL);
-	g_output_device_ready = 0;
-	int_controller_clear(IRQ_OUTPUT_DEVICE);
-}
-
-void output_device_update(void)
-{
-	if(!g_output_device_ready)
-	{
-		if((time(NULL) - g_output_device_last_output) >= OUTPUT_DEVICE_PERIOD)
-		{
-			g_output_device_ready = 1;
-			int_controller_set(IRQ_OUTPUT_DEVICE);
-		}
-	}
-}
-
-int output_device_ack(void)
-{
-	return M68K_INT_ACK_AUTOVECTOR;
-}
-
-unsigned int output_device_read(void)
-{
-	int_controller_clear(IRQ_OUTPUT_DEVICE);
-	return 0;
-}
-
-void output_device_write(unsigned int value)
-{
-	char ch;
-	if(g_output_device_ready)
-	{
-		ch = value & 0xff;
-		printf("%c", ch);
-		g_output_device_last_output = time(NULL);
-		g_output_device_ready = 0;
-		int_controller_clear(IRQ_OUTPUT_DEVICE);
-	}
-}
-
-
-/* Implementation for the interrupt controller */
-void int_controller_set(unsigned int value)
-{
-	unsigned int old_pending = g_int_controller_pending;
-
-	g_int_controller_pending |= (1<<value);
-
-	if(old_pending != g_int_controller_pending && value > g_int_controller_highest_int)
-	{
-		g_int_controller_highest_int = value;
-		m68k_set_irq(g_int_controller_highest_int);
-	}
-}
-
-void int_controller_clear(unsigned int value)
-{
-	g_int_controller_pending &= ~(1<<value);
-
-	for(g_int_controller_highest_int = 7;g_int_controller_highest_int > 0;g_int_controller_highest_int--)
-		if(g_int_controller_pending & (1<<g_int_controller_highest_int))
-			break;
-
-	m68k_set_irq(g_int_controller_highest_int);
-}
-
-
-/* Parse user input and update any devices that need user input */
-void get_user_input(void)
-{
-	static int last_ch = -1;
-	int ch = osd_get_char();
-
-	if(ch >= 0)
-	{
-		switch(ch)
-		{
-			case 0x1b:
-				g_quit = 1;
-				break;
-			case '~':
-				if(last_ch != ch)
-					g_nmi = 1;
-				break;
-			default:
-				g_input_device_value = ch;
-		}
-	}
-	last_ch = ch;
-}
-
-/* Disassembler */
-void make_hex(char* buff, unsigned int pc, unsigned int length)
-{
-	char* ptr = buff;
-
-	for(;length>0;length -= 2)
-	{
-		sprintf(ptr, "%04x", cpu_read_word_dasm(pc));
-		pc += 2;
-		ptr += 4;
-		if(length > 2)
-			*ptr++ = ' ';
-	}
-}
-
-void disassemble_program()
-{
-	unsigned int pc;
-	unsigned int instr_size;
-	char buff[100];
-	char buff2[100];
-
-	pc = cpu_read_long_dasm(4);
-
-	while(pc <= 0x16e)
-	{
-		instr_size = m68k_disassemble(buff, pc, M68K_CPU_TYPE_68000);
-		make_hex(buff2, pc, instr_size);
-		printf("%03x: %-20s: %s\n", pc, buff2, buff);
-		pc += instr_size;
-	}
-	fflush(stdout);
-}
-
-void cpu_instr_callback()
-{
-/* The following code would print out instructions as they are executed */
-/*
-	static char buff[100];
-	static char buff2[100];
-	static unsigned int pc;
-	static unsigned int instr_size;
-
-	pc = m68k_get_reg(NULL, M68K_REG_PC);
-	instr_size = m68k_disassemble(buff, pc, M68K_CPU_TYPE_68000);
-	make_hex(buff2, pc, instr_size);
-	printf("E %03x: %-20s: %s\n", pc, buff2, buff);
-	fflush(stdout);
-*/
-}
-
-
-
-/* The main loop */
-int main(int argc, char* argv[])
-{
-	FILE* fhandle;
-
-	if(argc != 2)
-	{
-		printf("Usage: sim <program file>\n");
-		exit(-1);
-	}
-
-	if((fhandle = fopen(argv[1], "rb")) == NULL)
-		exit_error("Unable to open %s", argv[1]);
-
-	if(fread(g_rom, 1, MAX_ROM+1, fhandle) <= 0)
-		exit_error("Error reading %s", argv[1]);
-
-//	disassemble_program();
-
-	m68k_init();
-	m68k_set_cpu_type(M68K_CPU_TYPE_68000);
-	m68k_pulse_reset();
-	input_device_reset();
-	output_device_reset();
-	nmi_device_reset();
-
-	g_quit = 0;
-	while(!g_quit)
-	{
-		// Our loop requires some interleaving to allow us to update the
-		// input, output, and nmi devices.
-
-		get_user_input();
-
-		// Values to execute determine the interleave rate.
-		// Smaller values allow for more accurate interleaving with multiple
-		// devices/CPUs but is more processor intensive.
-		// 100000 is usually a good value to start at, then work from there.
-
-		// Note that I am not emulating the correct clock speed!
-		m68k_execute(100000);
-		output_device_update();
-		input_device_update();
-		nmi_device_update();
-	}
-
-	return 0;
-}
-
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <time.h>
+#include "sim.h"
+#include "m68k.h"
+#include "osd.h"
+
+void disassemble_program();
+
+/* Memory-mapped IO ports */
+#define INPUT_ADDRESS 0x800000
+#define OUTPUT_ADDRESS 0x400000
+
+/* IRQ connections */
+#define IRQ_NMI_DEVICE 7
+#define IRQ_INPUT_DEVICE 2
+#define IRQ_OUTPUT_DEVICE 1
+
+/* Time between characters sent to output device (seconds) */
+#define OUTPUT_DEVICE_PERIOD 1
+
+/* ROM and RAM sizes */
+#define MAX_ROM 0xfff
+#define MAX_RAM 0xff
+
+
+/* Read/write macros */
+#define READ_BYTE(BASE, ADDR) (BASE)[ADDR]
+#define READ_WORD(BASE, ADDR) (((BASE)[ADDR]<<8) |			\
+							  (BASE)[(ADDR)+1])
+#define READ_LONG(BASE, ADDR) (((BASE)[ADDR]<<24) |			\
+							  ((BASE)[(ADDR)+1]<<16) |		\
+							  ((BASE)[(ADDR)+2]<<8) |		\
+							  (BASE)[(ADDR)+3])
+
+#define WRITE_BYTE(BASE, ADDR, VAL) (BASE)[ADDR] = (VAL)&0xff
+#define WRITE_WORD(BASE, ADDR, VAL) (BASE)[ADDR] = ((VAL)>>8) & 0xff;		\
+									(BASE)[(ADDR)+1] = (VAL)&0xff
+#define WRITE_LONG(BASE, ADDR, VAL) (BASE)[ADDR] = ((VAL)>>24) & 0xff;		\
+									(BASE)[(ADDR)+1] = ((VAL)>>16)&0xff;	\
+									(BASE)[(ADDR)+2] = ((VAL)>>8)&0xff;		\
+									(BASE)[(ADDR)+3] = (VAL)&0xff
+
+
+/* Prototypes */
+void exit_error(char* fmt, ...);
+
+unsigned int cpu_read_byte(unsigned int address);
+unsigned int cpu_read_word(unsigned int address);
+unsigned int cpu_read_long(unsigned int address);
+void cpu_write_byte(unsigned int address, unsigned int value);
+void cpu_write_word(unsigned int address, unsigned int value);
+void cpu_write_long(unsigned int address, unsigned int value);
+void cpu_pulse_reset(void);
+void cpu_set_fc(unsigned int fc);
+int cpu_irq_ack(int level);
+
+void nmi_device_reset(void);
+void nmi_device_update(void);
+int nmi_device_ack(void);
+
+void input_device_reset(void);
+void input_device_update(void);
+int input_device_ack(void);
+unsigned int input_device_read(void);
+void input_device_write(unsigned int value);
+
+void output_device_reset(void);
+void output_device_update(void);
+int output_device_ack(void);
+unsigned int output_device_read(void);
+void output_device_write(unsigned int value);
+
+void int_controller_set(unsigned int value);
+void int_controller_clear(unsigned int value);
+
+void get_user_input(void);
+
+
+/* Data */
+unsigned int g_quit = 0;                        /* 1 if we want to quit */
+unsigned int g_nmi = 0;                         /* 1 if nmi pending */
+
+int          g_input_device_value = -1;         /* Current value in input device */
+
+unsigned int g_output_device_ready = 0;         /* 1 if output device is ready */
+time_t       g_output_device_last_output;       /* Time of last char output */
+
+unsigned int g_int_controller_pending = 0;      /* list of pending interrupts */
+unsigned int g_int_controller_highest_int = 0;  /* Highest pending interrupt */
+
+unsigned char g_rom[MAX_ROM+1];                 /* ROM */
+unsigned char g_ram[MAX_RAM+1];                 /* RAM */
+unsigned int  g_fc;                             /* Current function code from CPU */
+
+
+/* Exit with an error message.  Use printf syntax. */
+void exit_error(char* fmt, ...)
+{
+	static int guard_val = 0;
+	char buff[100];
+	unsigned int pc;
+	va_list args;
+
+	if(guard_val)
+		return;
+	else
+		guard_val = 1;
+
+	va_start(args, fmt);
+	vfprintf(stderr, fmt, args);
+	va_end(args);
+	fprintf(stderr, "\n");
+	pc = m68k_get_reg(NULL, M68K_REG_PPC);
+	m68k_disassemble(buff, pc, M68K_CPU_TYPE_68000);
+	fprintf(stderr, "At %04x: %s\n", pc, buff);
+
+	exit(EXIT_FAILURE);
+}
+
+
+/* Read data from RAM, ROM, or a device */
+unsigned int cpu_read_byte(unsigned int address)
+{
+	if(g_fc & 2)	/* Program */
+	{
+		if(address > MAX_ROM)
+			exit_error("Attempted to read byte from ROM address %08x", address);
+		return READ_BYTE(g_rom, address);
+	}
+
+	/* Otherwise it's data space */
+	switch(address)
+	{
+		case INPUT_ADDRESS:
+			return input_device_read();
+		case OUTPUT_ADDRESS:
+			return output_device_read();
+		default:
+			break;
+	}
+	if(address > MAX_RAM)
+		exit_error("Attempted to read byte from RAM address %08x", address);
+	return READ_BYTE(g_ram, address);
+}
+
+unsigned int cpu_read_word(unsigned int address)
+{
+	if(g_fc & 2)	/* Program */
+	{
+		if(address > MAX_ROM)
+			exit_error("Attempted to read word from ROM address %08x", address);
+		return READ_WORD(g_rom, address);
+	}
+
+	/* Otherwise it's data space */
+	switch(address)
+	{
+		case INPUT_ADDRESS:
+			return input_device_read();
+		case OUTPUT_ADDRESS:
+			return output_device_read();
+		default:
+			break;
+	}
+	if(address > MAX_RAM)
+		exit_error("Attempted to read word from RAM address %08x", address);
+	return READ_WORD(g_ram, address);
+}
+
+unsigned int cpu_read_long(unsigned int address)
+{
+	if(g_fc & 2)	/* Program */
+	{
+		if(address > MAX_ROM)
+			exit_error("Attempted to read long from ROM address %08x", address);
+		return READ_LONG(g_rom, address);
+	}
+
+	/* Otherwise it's data space */
+	switch(address)
+	{
+		case INPUT_ADDRESS:
+			return input_device_read();
+		case OUTPUT_ADDRESS:
+			return output_device_read();
+		default:
+			break;
+	}
+	if(address > MAX_RAM)
+		exit_error("Attempted to read long from RAM address %08x", address);
+	return READ_LONG(g_ram, address);
+}
+
+
+unsigned int cpu_read_word_dasm(unsigned int address)
+{
+	if(address > MAX_ROM)
+		exit_error("Disassembler attempted to read word from ROM address %08x", address);
+	return READ_WORD(g_rom, address);
+}
+
+unsigned int cpu_read_long_dasm(unsigned int address)
+{
+	if(address > MAX_ROM)
+		exit_error("Dasm attempted to read long from ROM address %08x", address);
+	return READ_LONG(g_rom, address);
+}
+
+
+/* Write data to RAM or a device */
+void cpu_write_byte(unsigned int address, unsigned int value)
+{
+	if(g_fc & 2)	/* Program */
+		exit_error("Attempted to write %02x to ROM address %08x", value&0xff, address);
+
+	/* Otherwise it's data space */
+	switch(address)
+	{
+		case INPUT_ADDRESS:
+			input_device_write(value&0xff);
+			return;
+		case OUTPUT_ADDRESS:
+			output_device_write(value&0xff);
+			return;
+		default:
+			break;
+	}
+	if(address > MAX_RAM)
+		exit_error("Attempted to write %02x to RAM address %08x", value&0xff, address);
+	WRITE_BYTE(g_ram, address, value);
+}
+
+void cpu_write_word(unsigned int address, unsigned int value)
+{
+	if(g_fc & 2)	/* Program */
+		exit_error("Attempted to write %04x to ROM address %08x", value&0xffff, address);
+
+	/* Otherwise it's data space */
+	switch(address)
+	{
+		case INPUT_ADDRESS:
+			input_device_write(value&0xffff);
+			return;
+		case OUTPUT_ADDRESS:
+			output_device_write(value&0xffff);
+			return;
+		default:
+			break;
+	}
+	if(address > MAX_RAM)
+		exit_error("Attempted to write %04x to RAM address %08x", value&0xffff, address);
+	WRITE_WORD(g_ram, address, value);
+}
+
+void cpu_write_long(unsigned int address, unsigned int value)
+{
+	if(g_fc & 2)	/* Program */
+		exit_error("Attempted to write %08x to ROM address %08x", value, address);
+
+	/* Otherwise it's data space */
+	switch(address)
+	{
+		case INPUT_ADDRESS:
+			input_device_write(value);
+			return;
+		case OUTPUT_ADDRESS:
+			output_device_write(value);
+			return;
+		default:
+			break;
+	}
+	if(address > MAX_RAM)
+		exit_error("Attempted to write %08x to RAM address %08x", value, address);
+	WRITE_LONG(g_ram, address, value);
+}
+
+/* Called when the CPU pulses the RESET line */
+void cpu_pulse_reset(void)
+{
+	nmi_device_reset();
+	output_device_reset();
+	input_device_reset();
+}
+
+/* Called when the CPU changes the function code pins */
+void cpu_set_fc(unsigned int fc)
+{
+	g_fc = fc;
+}
+
+/* Called when the CPU acknowledges an interrupt */
+int cpu_irq_ack(int level)
+{
+	switch(level)
+	{
+		case IRQ_NMI_DEVICE:
+			return nmi_device_ack();
+		case IRQ_INPUT_DEVICE:
+			return input_device_ack();
+		case IRQ_OUTPUT_DEVICE:
+			return output_device_ack();
+	}
+	return M68K_INT_ACK_SPURIOUS;
+}
+
+
+
+
+/* Implementation for the NMI device */
+void nmi_device_reset(void)
+{
+	g_nmi = 0;
+}
+
+void nmi_device_update(void)
+{
+	if(g_nmi)
+	{
+		g_nmi = 0;
+		int_controller_set(IRQ_NMI_DEVICE);
+	}
+}
+
+int nmi_device_ack(void)
+{
+	printf("\nNMI\n");fflush(stdout);
+	int_controller_clear(IRQ_NMI_DEVICE);
+	return M68K_INT_ACK_AUTOVECTOR;
+}
+
+
+/* Implementation for the input device */
+void input_device_reset(void)
+{
+	g_input_device_value = -1;
+	int_controller_clear(IRQ_INPUT_DEVICE);
+}
+
+void input_device_update(void)
+{
+	if(g_input_device_value >= 0)
+		int_controller_set(IRQ_INPUT_DEVICE);
+}
+
+int input_device_ack(void)
+{
+	return M68K_INT_ACK_AUTOVECTOR;
+}
+
+unsigned int input_device_read(void)
+{
+	int value = g_input_device_value > 0 ? g_input_device_value : 0;
+	int_controller_clear(IRQ_INPUT_DEVICE);
+	g_input_device_value = -1;
+	return value;
+}
+
+void input_device_write(unsigned int value)
+{
+	(void)value;
+}
+
+
+/* Implementation for the output device */
+void output_device_reset(void)
+{
+	g_output_device_last_output = time(NULL);
+	g_output_device_ready = 0;
+	int_controller_clear(IRQ_OUTPUT_DEVICE);
+}
+
+void output_device_update(void)
+{
+	if(!g_output_device_ready)
+	{
+		if((time(NULL) - g_output_device_last_output) >= OUTPUT_DEVICE_PERIOD)
+		{
+			g_output_device_ready = 1;
+			int_controller_set(IRQ_OUTPUT_DEVICE);
+		}
+	}
+}
+
+int output_device_ack(void)
+{
+	return M68K_INT_ACK_AUTOVECTOR;
+}
+
+unsigned int output_device_read(void)
+{
+	int_controller_clear(IRQ_OUTPUT_DEVICE);
+	return 0;
+}
+
+void output_device_write(unsigned int value)
+{
+	char ch;
+	if(g_output_device_ready)
+	{
+		ch = value & 0xff;
+		printf("%c", ch);
+		g_output_device_last_output = time(NULL);
+		g_output_device_ready = 0;
+		int_controller_clear(IRQ_OUTPUT_DEVICE);
+	}
+}
+
+
+/* Implementation for the interrupt controller */
+void int_controller_set(unsigned int value)
+{
+	unsigned int old_pending = g_int_controller_pending;
+
+	g_int_controller_pending |= (1<<value);
+
+	if(old_pending != g_int_controller_pending && value > g_int_controller_highest_int)
+	{
+		g_int_controller_highest_int = value;
+		m68k_set_irq(g_int_controller_highest_int);
+	}
+}
+
+void int_controller_clear(unsigned int value)
+{
+	g_int_controller_pending &= ~(1<<value);
+
+	for(g_int_controller_highest_int = 7;g_int_controller_highest_int > 0;g_int_controller_highest_int--)
+		if(g_int_controller_pending & (1<<g_int_controller_highest_int))
+			break;
+
+	m68k_set_irq(g_int_controller_highest_int);
+}
+
+
+/* Parse user input and update any devices that need user input */
+void get_user_input(void)
+{
+	static int last_ch = -1;
+	int ch = osd_get_char();
+
+	if(ch >= 0)
+	{
+		switch(ch)
+		{
+			case 0x1b:
+				g_quit = 1;
+				break;
+			case '~':
+				if(last_ch != ch)
+					g_nmi = 1;
+				break;
+			default:
+				g_input_device_value = ch;
+		}
+	}
+	last_ch = ch;
+}
+
+/* Disassembler */
+void make_hex(char* buff, unsigned int pc, unsigned int length)
+{
+	char* ptr = buff;
+
+	for(;length>0;length -= 2)
+	{
+		sprintf(ptr, "%04x", cpu_read_word_dasm(pc));
+		pc += 2;
+		ptr += 4;
+		if(length > 2)
+			*ptr++ = ' ';
+	}
+}
+
+void disassemble_program()
+{
+	unsigned int pc;
+	unsigned int instr_size;
+	char buff[100];
+	char buff2[100];
+
+	pc = cpu_read_long_dasm(4);
+
+	while(pc <= 0x16e)
+	{
+		instr_size = m68k_disassemble(buff, pc, M68K_CPU_TYPE_68000);
+		make_hex(buff2, pc, instr_size);
+		printf("%03x: %-20s: %s\n", pc, buff2, buff);
+		pc += instr_size;
+	}
+	fflush(stdout);
+}
+
+void cpu_instr_callback(int pc)
+{
+	(void)pc;
+/* The following code would print out instructions as they are executed */
+/*
+	static char buff[100];
+	static char buff2[100];
+	static unsigned int pc;
+	static unsigned int instr_size;
+
+	pc = m68k_get_reg(NULL, M68K_REG_PC);
+	instr_size = m68k_disassemble(buff, pc, M68K_CPU_TYPE_68000);
+	make_hex(buff2, pc, instr_size);
+	printf("E %03x: %-20s: %s\n", pc, buff2, buff);
+	fflush(stdout);
+*/
+}
+
+
+
+/* The main loop */
+int main(int argc, char* argv[])
+{
+	FILE* fhandle;
+
+	if(argc != 2)
+	{
+		printf("Usage: sim <program file>\n");
+		exit(-1);
+	}
+
+	if((fhandle = fopen(argv[1], "rb")) == NULL)
+		exit_error("Unable to open %s", argv[1]);
+
+	if(fread(g_rom, 1, MAX_ROM+1, fhandle) <= 0)
+		exit_error("Error reading %s", argv[1]);
+
+//	disassemble_program();
+
+	m68k_init();
+	m68k_set_cpu_type(M68K_CPU_TYPE_68000);
+	m68k_pulse_reset();
+	input_device_reset();
+	output_device_reset();
+	nmi_device_reset();
+
+	g_quit = 0;
+	while(!g_quit)
+	{
+		// Our loop requires some interleaving to allow us to update the
+		// input, output, and nmi devices.
+
+		get_user_input();
+
+		// Values to execute determine the interleave rate.
+		// Smaller values allow for more accurate interleaving with multiple
+		// devices/CPUs but is more processor intensive.
+		// 100000 is usually a good value to start at, then work from there.
+
+		// Note that I am not emulating the correct clock speed!
+		m68k_execute(100000);
+		output_device_update();
+		input_device_update();
+		nmi_device_update();
+	}
+
+	return 0;
+}
+
diff --git a/plat/linux68k/emu/musashi/example/sim.h b/plat/linux68k/emu/musashi/example/sim.h
index 75db0cba1..e778006b4 100755
--- a/plat/linux68k/emu/musashi/example/sim.h
+++ b/plat/linux68k/emu/musashi/example/sim.h
@@ -1,15 +1,15 @@
-#ifndef SIM__HEADER
-#define SIM__HEADER
-
-unsigned int cpu_read_byte(unsigned int address);
-unsigned int cpu_read_word(unsigned int address);
-unsigned int cpu_read_long(unsigned int address);
-void cpu_write_byte(unsigned int address, unsigned int value);
-void cpu_write_word(unsigned int address, unsigned int value);
-void cpu_write_long(unsigned int address, unsigned int value);
-void cpu_pulse_reset(void);
-void cpu_set_fc(unsigned int fc);
-int  cpu_irq_ack(int level);
-void cpu_instr_callback();
-
-#endif /* SIM__HEADER */
+#ifndef SIM__HEADER
+#define SIM__HEADER
+
+unsigned int cpu_read_byte(unsigned int address);
+unsigned int cpu_read_word(unsigned int address);
+unsigned int cpu_read_long(unsigned int address);
+void cpu_write_byte(unsigned int address, unsigned int value);
+void cpu_write_word(unsigned int address, unsigned int value);
+void cpu_write_long(unsigned int address, unsigned int value);
+void cpu_pulse_reset(void);
+void cpu_set_fc(unsigned int fc);
+int  cpu_irq_ack(int level);
+void cpu_instr_callback(int pc);
+
+#endif /* SIM__HEADER */
diff --git a/plat/linux68k/emu/musashi/m68k.h b/plat/linux68k/emu/musashi/m68k.h
index e2f0b273f..555b314f0 100755
--- a/plat/linux68k/emu/musashi/m68k.h
+++ b/plat/linux68k/emu/musashi/m68k.h
@@ -3,10 +3,10 @@
 /* ======================================================================== */
 /*
  *                                  MUSASHI
- *                                Version 3.4
+ *                                Version 3.32
  *
  * A portable Motorola M680x0 processor emulation engine.
- * Copyright 1998-2001 Karl Stenerud.  All rights reserved.
+ * Copyright Karl Stenerud.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -30,14 +30,29 @@
 #ifndef M68K__HEADER
 #define M68K__HEADER
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef ARRAY_LENGTH
+#define ARRAY_LENGTH(x)         (sizeof(x) / sizeof(x[0]))
+#endif
+
+#ifndef FALSE
+#define FALSE 0
+#define TRUE 1
+#endif
 
 /* ======================================================================== */
 /* ============================= CONFIGURATION ============================ */
 /* ======================================================================== */
 
 /* Import the configuration for this build */
+#ifdef MUSASHI_CNF
+#include MUSASHI_CNF
+#else
 #include "m68kconf.h"
-
+#endif
 
 /* ======================================================================== */
 /* ============================ GENERAL DEFINES =========================== */
@@ -83,8 +98,12 @@ enum
 	M68K_CPU_TYPE_68010,
 	M68K_CPU_TYPE_68EC020,
 	M68K_CPU_TYPE_68020,
-	M68K_CPU_TYPE_68030,	/* Supported by disassembler ONLY */
-	M68K_CPU_TYPE_68040		/* Supported by disassembler ONLY */
+	M68K_CPU_TYPE_68EC030,
+	M68K_CPU_TYPE_68030,
+	M68K_CPU_TYPE_68EC040,
+	M68K_CPU_TYPE_68LC040,
+	M68K_CPU_TYPE_68040,
+	M68K_CPU_TYPE_SCC68070
 };
 
 /* Registers used by m68k_get_reg() and m68k_set_reg() */
@@ -241,6 +260,20 @@ void m68k_set_reset_instr_callback(void  (*callback)(void));
  */
 void m68k_set_pc_changed_callback(void  (*callback)(unsigned int new_pc));
 
+/* Set the callback for the TAS instruction.
+ * You must enable M68K_TAS_HAS_CALLBACK in m68kconf.h.
+ * The CPU calls this callback every time it encounters a TAS instruction.
+ * Default behavior: return 1, allow writeback.
+ */
+void m68k_set_tas_instr_callback(int  (*callback)(void));
+
+/* Set the callback for illegal instructions.
+ * You must enable M68K_ILLG_HAS_CALLBACK in m68kconf.h.
+ * The CPU calls this callback every time it encounters an illegal instruction
+ * which must return 1 if it handles the instruction normally or 0 if it's really an illegal instruction.
+ * Default behavior: return 0, exception will occur.
+ */
+void m68k_set_illg_instr_callback(int  (*callback)(int));
 
 /* Set the callback for CPU function code changes.
  * You must enable M68K_EMULATE_FC in m68kconf.h.
@@ -258,7 +291,7 @@ void m68k_set_fc_callback(void  (*callback)(unsigned int new_fc));
  * instruction cycle.
  * Default behavior: do nothing.
  */
-void m68k_set_instr_hook_callback(void  (*callback)(void));
+void m68k_set_instr_hook_callback(void  (*callback)(unsigned int pc));
 
 
 
@@ -304,11 +337,21 @@ void m68k_end_timeslice(void);          /* End timeslice now */
  */
 void m68k_set_irq(unsigned int int_level);
 
+/* Set the virtual irq lines, where the highest level
+ * active line is automatically selected.  If you use this function,
+ * do not use m68k_set_irq.
+ */
+void m68k_set_virq(unsigned int level, unsigned int active);
+unsigned int m68k_get_virq(unsigned int level);
 
 /* Halt the CPU as if you pulsed the HALT pin. */
 void m68k_pulse_halt(void);
 
 
+/* Trigger a bus error exception */
+void m68k_pulse_bus_error(void);
+
+
 /* Context switching to allow multiple CPUs */
 
 /* Get the size of the cpu context in bytes */
@@ -321,7 +364,7 @@ unsigned int m68k_get_context(void* dst);
 void m68k_set_context(void* dst);
 
 /* Register the CPU state information */
-void m68k_state_register(const char *type);
+void m68k_state_register(const char *type, int index);
 
 
 /* Peek at the internals of a CPU context.  This can either be a context
@@ -341,6 +384,11 @@ unsigned int m68k_is_valid_instruction(unsigned int instruction, unsigned int cp
  */
 unsigned int m68k_disassemble(char* str_buff, unsigned int pc, unsigned int cpu_type);
 
+/* Same as above but accepts raw opcode data directly rather than fetching
+ * via the read/write interfaces.
+ */
+unsigned int m68k_disassemble_raw(char* str_buff, unsigned int pc, const unsigned char* opdata, const unsigned char* argdata, unsigned int cpu_type);
+
 
 /* ======================================================================== */
 /* ============================== MAME STUFF ============================== */
@@ -355,4 +403,9 @@ unsigned int m68k_disassemble(char* str_buff, unsigned int pc, unsigned int cpu_
 /* ============================== END OF FILE ============================= */
 /* ======================================================================== */
 
+#ifdef __cplusplus
+}
+#endif
+
+
 #endif /* M68K__HEADER */
diff --git a/plat/linux68k/emu/musashi/m68k_in.c b/plat/linux68k/emu/musashi/m68k_in.c
index bc4217f5d..6e2831f04 100755
--- a/plat/linux68k/emu/musashi/m68k_in.c
+++ b/plat/linux68k/emu/musashi/m68k_in.c
@@ -8,10 +8,10 @@ must fix:
 /* ======================================================================== */
 /*
  *                                  MUSASHI
- *                                Version 3.4
+ *                                Version 3.32
  *
  * A portable Motorola M680x0 processor emulation engine.
- * Copyright 1998-2001 Karl Stenerud.  All rights reserved.
+ * Copyright Karl Stenerud.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -131,9 +131,10 @@ M68KMAKE_TABLE_HEADER
 /* ========================= OPCODE TABLE BUILDER ========================= */
 /* ======================================================================== */
 
+#include <stdio.h>
 #include "m68kops.h"
 
-#define NUM_CPU_TYPES 3
+#define NUM_CPU_TYPES 5
 
 void  (*m68ki_instruction_jump_table[0x10000])(void); /* opcode handler jump table */
 unsigned char m68ki_cycles[NUM_CPU_TYPES][0x10000]; /* Cycles used by CPU type */
@@ -149,23 +150,23 @@ typedef struct
 
 
 /* Opcode handler table */
-static opcode_handler_struct m68k_opcode_handler_table[] =
+static const opcode_handler_struct m68k_opcode_handler_table[] =
 {
-/*   function                      mask    match    000  010  020 */
+/*   function                      mask    match    000  010  020  040 */
 
 
 
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 M68KMAKE_TABLE_FOOTER
 
-	{0, 0, 0, {0, 0, 0}}
+	{0, 0, 0, {0, 0, 0, 0, 0}}
 };
 
 
 /* Build the opcode handler jump table */
 void m68ki_build_opcode_table(void)
 {
-	opcode_handler_struct *ostruct;
+	const opcode_handler_struct *ostruct;
 	int cycle_cost;
 	int instr;
 	int i;
@@ -278,7 +279,11 @@ void m68ki_build_opcode_table(void)
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 M68KMAKE_OPCODE_HANDLER_HEADER
 
+#include <stdio.h>
 #include "m68kcpu.h"
+extern void m68040_fpu_op0(void);
+extern void m68040_fpu_op1(void);
+extern void m68881_mmu_ops();
 
 /* ======================================================================== */
 /* ========================= INSTRUCTION HANDLERS ========================= */
@@ -368,523 +373,528 @@ cpu cycles:  Base number of cycles required to execute this opcode on the
 
 
 
-              spec  spec                    allowed ea  mode   cpu cycles
-name    size  proc   ea   bit pattern       A+-DXWLdxI  0 1 2  000 010 020  comments
-======  ====  ====  ====  ================  ==========  = = =  === === ===  =============
+              spec  spec                    allowed ea  mode       cpu cycles
+name    size  proc   ea   bit pattern       A+-DXWLdxI  0 1 2 3 4  000 010 020 030 040  comments
+======  ====  ====  ====  ================  ==========  = = = = =  === === === === === =============
 M68KMAKE_TABLE_START
-1010       0  .     .     1010............  ..........  U U U    4   4   4
-1111       0  .     .     1111............  ..........  U U U    4   4   4
-abcd       8  rr    .     1100...100000...  ..........  U U U    6   6   4
-abcd       8  mm    ax7   1100111100001...  ..........  U U U   18  18  16
-abcd       8  mm    ay7   1100...100001111  ..........  U U U   18  18  16
-abcd       8  mm    axy7  1100111100001111  ..........  U U U   18  18  16
-abcd       8  mm    .     1100...100001...  ..........  U U U   18  18  16
-add        8  er    d     1101...000000...  ..........  U U U    4   4   2
-add        8  er    .     1101...000......  A+-DXWLdxI  U U U    4   4   2
-add       16  er    d     1101...001000...  ..........  U U U    4   4   2
-add       16  er    a     1101...001001...  ..........  U U U    4   4   2
-add       16  er    .     1101...001......  A+-DXWLdxI  U U U    4   4   2
-add       32  er    d     1101...010000...  ..........  U U U    6   6   2
-add       32  er    a     1101...010001...  ..........  U U U    6   6   2
-add       32  er    .     1101...010......  A+-DXWLdxI  U U U    6   6   2
-add        8  re    .     1101...100......  A+-DXWL...  U U U    8   8   4
-add       16  re    .     1101...101......  A+-DXWL...  U U U    8   8   4
-add       32  re    .     1101...110......  A+-DXWL...  U U U   12  12   4
-adda      16  .     d     1101...011000...  ..........  U U U    8   8   2
-adda      16  .     a     1101...011001...  ..........  U U U    8   8   2
-adda      16  .     .     1101...011......  A+-DXWLdxI  U U U    8   8   2
-adda      32  .     d     1101...111000...  ..........  U U U    6   6   2
-adda      32  .     a     1101...111001...  ..........  U U U    6   6   2
-adda      32  .     .     1101...111......  A+-DXWLdxI  U U U    6   6   2
-addi       8  .     d     0000011000000...  ..........  U U U    8   8   2
-addi       8  .     .     0000011000......  A+-DXWL...  U U U   12  12   4
-addi      16  .     d     0000011001000...  ..........  U U U    8   8   2
-addi      16  .     .     0000011001......  A+-DXWL...  U U U   12  12   4
-addi      32  .     d     0000011010000...  ..........  U U U   16  14   2
-addi      32  .     .     0000011010......  A+-DXWL...  U U U   20  20   4
-addq       8  .     d     0101...000000...  ..........  U U U    4   4   2
-addq       8  .     .     0101...000......  A+-DXWL...  U U U    8   8   4
-addq      16  .     d     0101...001000...  ..........  U U U    4   4   2
-addq      16  .     a     0101...001001...  ..........  U U U    4   4   2
-addq      16  .     .     0101...001......  A+-DXWL...  U U U    8   8   4
-addq      32  .     d     0101...010000...  ..........  U U U    8   8   2
-addq      32  .     a     0101...010001...  ..........  U U U    8   8   2
-addq      32  .     .     0101...010......  A+-DXWL...  U U U   12  12   4
-addx       8  rr    .     1101...100000...  ..........  U U U    4   4   2
-addx      16  rr    .     1101...101000...  ..........  U U U    4   4   2
-addx      32  rr    .     1101...110000...  ..........  U U U    8   6   2
-addx       8  mm    ax7   1101111100001...  ..........  U U U   18  18  12
-addx       8  mm    ay7   1101...100001111  ..........  U U U   18  18  12
-addx       8  mm    axy7  1101111100001111  ..........  U U U   18  18  12
-addx       8  mm    .     1101...100001...  ..........  U U U   18  18  12
-addx      16  mm    .     1101...101001...  ..........  U U U   18  18  12
-addx      32  mm    .     1101...110001...  ..........  U U U   30  30  12
-and        8  er    d     1100...000000...  ..........  U U U    4   4   2
-and        8  er    .     1100...000......  A+-DXWLdxI  U U U    4   4   2
-and       16  er    d     1100...001000...  ..........  U U U    4   4   2
-and       16  er    .     1100...001......  A+-DXWLdxI  U U U    4   4   2
-and       32  er    d     1100...010000...  ..........  U U U    6   6   2
-and       32  er    .     1100...010......  A+-DXWLdxI  U U U    6   6   2
-and        8  re    .     1100...100......  A+-DXWL...  U U U    8   8   4
-and       16  re    .     1100...101......  A+-DXWL...  U U U    8   8   4
-and       32  re    .     1100...110......  A+-DXWL...  U U U   12  12   4
-andi       8  toc   .     0000001000111100  ..........  U U U   20  16  12
-andi      16  tos   .     0000001001111100  ..........  S S S   20  16  12
-andi       8  .     d     0000001000000...  ..........  U U U    8   8   2
-andi       8  .     .     0000001000......  A+-DXWL...  U U U   12  12   4
-andi      16  .     d     0000001001000...  ..........  U U U    8   8   2
-andi      16  .     .     0000001001......  A+-DXWL...  U U U   12  12   4
-andi      32  .     d     0000001010000...  ..........  U U U   14  14   2
-andi      32  .     .     0000001010......  A+-DXWL...  U U U   20  20   4
-asr        8  s     .     1110...000000...  ..........  U U U    6   6   6
-asr       16  s     .     1110...001000...  ..........  U U U    6   6   6
-asr       32  s     .     1110...010000...  ..........  U U U    8   8   6
-asr        8  r     .     1110...000100...  ..........  U U U    6   6   6
-asr       16  r     .     1110...001100...  ..........  U U U    6   6   6
-asr       32  r     .     1110...010100...  ..........  U U U    8   8   6
-asr       16  .     .     1110000011......  A+-DXWL...  U U U    8   8   5
-asl        8  s     .     1110...100000...  ..........  U U U    6   6   8
-asl       16  s     .     1110...101000...  ..........  U U U    6   6   8
-asl       32  s     .     1110...110000...  ..........  U U U    8   8   8
-asl        8  r     .     1110...100100...  ..........  U U U    6   6   8
-asl       16  r     .     1110...101100...  ..........  U U U    6   6   8
-asl       32  r     .     1110...110100...  ..........  U U U    8   8   8
-asl       16  .     .     1110000111......  A+-DXWL...  U U U    8   8   6
-bcc        8  .     .     0110............  ..........  U U U   10  10   6
-bcc       16  .     .     0110....00000000  ..........  U U U   10  10   6
-bcc       32  .     .     0110....11111111  ..........  . . U    .   .   6
-bchg       8  r     .     0000...101......  A+-DXWL...  U U U    8   8   4
-bchg      32  r     d     0000...101000...  ..........  U U U    8   8   4
-bchg       8  s     .     0000100001......  A+-DXWL...  U U U   12  12   4
-bchg      32  s     d     0000100001000...  ..........  U U U   12  12   4
-bclr       8  r     .     0000...110......  A+-DXWL...  U U U    8  10   4
-bclr      32  r     d     0000...110000...  ..........  U U U   10  10   4
-bclr       8  s     .     0000100010......  A+-DXWL...  U U U   12  12   4
-bclr      32  s     d     0000100010000...  ..........  U U U   14  14   4
-bfchg     32  .     d     1110101011000...  ..........  . . U    .   .  12  timing not quite correct
-bfchg     32  .     .     1110101011......  A..DXWL...  . . U    .   .  20
-bfclr     32  .     d     1110110011000...  ..........  . . U    .   .  12
-bfclr     32  .     .     1110110011......  A..DXWL...  . . U    .   .  20
-bfexts    32  .     d     1110101111000...  ..........  . . U    .   .   8
-bfexts    32  .     .     1110101111......  A..DXWLdx.  . . U    .   .  15
-bfextu    32  .     d     1110100111000...  ..........  . . U    .   .   8
-bfextu    32  .     .     1110100111......  A..DXWLdx.  . . U    .   .  15
-bfffo     32  .     d     1110110111000...  ..........  . . U    .   .  18
-bfffo     32  .     .     1110110111......  A..DXWLdx.  . . U    .   .  28
-bfins     32  .     d     1110111111000...  ..........  . . U    .   .  10
-bfins     32  .     .     1110111111......  A..DXWL...  . . U    .   .  17
-bfset     32  .     d     1110111011000...  ..........  . . U    .   .  12
-bfset     32  .     .     1110111011......  A..DXWL...  . . U    .   .  20
-bftst     32  .     d     1110100011000...  ..........  . . U    .   .   6
-bftst     32  .     .     1110100011......  A..DXWLdx.  . . U    .   .  13
-bkpt       0  .     .     0100100001001...  ..........  . U U    .  10  10
-bra        8  .     .     01100000........  ..........  U U U   10  10  10
-bra       16  .     .     0110000000000000  ..........  U U U   10  10  10
-bra       32  .     .     0110000011111111  ..........  U U U    .   .  10
-bset      32  r     d     0000...111000...  ..........  U U U    8   8   4
-bset       8  r     .     0000...111......  A+-DXWL...  U U U    8   8   4
-bset       8  s     .     0000100011......  A+-DXWL...  U U U   12  12   4
-bset      32  s     d     0000100011000...  ..........  U U U   12  12   4
-bsr        8  .     .     01100001........  ..........  U U U   18  18   7
-bsr       16  .     .     0110000100000000  ..........  U U U   18  18   7
-bsr       32  .     .     0110000111111111  ..........  . . U    .   .   7
-btst       8  r     .     0000...100......  A+-DXWLdxI  U U U    4   4   4
-btst      32  r     d     0000...100000...  ..........  U U U    6   6   4
-btst       8  s     .     0000100000......  A+-DXWLdx.  U U U    8   8   4
-btst      32  s     d     0000100000000...  ..........  U U U   10  10   4
-callm     32  .     .     0000011011......  A..DXWLdx.  . . U    .   .  60  not properly emulated
-cas        8  .     .     0000101011......  A+-DXWL...  . . U    .   .  12
-cas       16  .     .     0000110011......  A+-DXWL...  . . U    .   .  12
-cas       32  .     .     0000111011......  A+-DXWL...  . . U    .   .  12
-cas2      16  .     .     0000110011111100  ..........  . . U    .   .  12
-cas2      32  .     .     0000111011111100  ..........  . . U    .   .  12
-chk       16  .     d     0100...110000...  ..........  U U U   10   8   8
-chk       16  .     .     0100...110......  A+-DXWLdxI  U U U   10   8   8
-chk       32  .     d     0100...100000...  ..........  . . U    .   .   8
-chk       32  .     .     0100...100......  A+-DXWLdxI  . . U    .   .   8
-chk2cmp2   8  .     pcdi  0000000011111010  ..........  . . U    .   .  23
-chk2cmp2   8  .     pcix  0000000011111011  ..........  . . U    .   .  23
-chk2cmp2   8  .     .     0000000011......  A..DXWL...  . . U    .   .  18
-chk2cmp2  16  .     pcdi  0000001011111010  ..........  . . U    .   .  23
-chk2cmp2  16  .     pcix  0000001011111011  ..........  . . U    .   .  23
-chk2cmp2  16  .     .     0000001011......  A..DXWL...  . . U    .   .  18
-chk2cmp2  32  .     pcdi  0000010011111010  ..........  . . U    .   .  23
-chk2cmp2  32  .     pcix  0000010011111011  ..........  . . U    .   .  23
-chk2cmp2  32  .     .     0000010011......  A..DXWL...  . . U    .   .  18
-clr        8  .     d     0100001000000...  ..........  U U U    4   4   2
-clr        8  .     .     0100001000......  A+-DXWL...  U U U    8   4   4
-clr       16  .     d     0100001001000...  ..........  U U U    4   4   2
-clr       16  .     .     0100001001......  A+-DXWL...  U U U    8   4   4
-clr       32  .     d     0100001010000...  ..........  U U U    6   6   2
-clr       32  .     .     0100001010......  A+-DXWL...  U U U   12   6   4
-cmp        8  .     d     1011...000000...  ..........  U U U    4   4   2
-cmp        8  .     .     1011...000......  A+-DXWLdxI  U U U    4   4   2
-cmp       16  .     d     1011...001000...  ..........  U U U    4   4   2
-cmp       16  .     a     1011...001001...  ..........  U U U    4   4   2
-cmp       16  .     .     1011...001......  A+-DXWLdxI  U U U    4   4   2
-cmp       32  .     d     1011...010000...  ..........  U U U    6   6   2
-cmp       32  .     a     1011...010001...  ..........  U U U    6   6   2
-cmp       32  .     .     1011...010......  A+-DXWLdxI  U U U    6   6   2
-cmpa      16  .     d     1011...011000...  ..........  U U U    6   6   4
-cmpa      16  .     a     1011...011001...  ..........  U U U    6   6   4
-cmpa      16  .     .     1011...011......  A+-DXWLdxI  U U U    6   6   4
-cmpa      32  .     d     1011...111000...  ..........  U U U    6   6   4
-cmpa      32  .     a     1011...111001...  ..........  U U U    6   6   4
-cmpa      32  .     .     1011...111......  A+-DXWLdxI  U U U    6   6   4
-cmpi       8  .     d     0000110000000...  ..........  U U U    8   8   2
-cmpi       8  .     .     0000110000......  A+-DXWL...  U U U    8   8   2
-cmpi       8  .     pcdi  0000110000111010  ..........  . . U    .   .   7
-cmpi       8  .     pcix  0000110000111011  ..........  . . U    .   .   9
-cmpi      16  .     d     0000110001000...  ..........  U U U    8   8   2
-cmpi      16  .     .     0000110001......  A+-DXWL...  U U U    8   8   2
-cmpi      16  .     pcdi  0000110001111010  ..........  . . U    .   .   7
-cmpi      16  .     pcix  0000110001111011  ..........  . . U    .   .   9
-cmpi      32  .     d     0000110010000...  ..........  U U U   14  12   2
-cmpi      32  .     .     0000110010......  A+-DXWL...  U U U   12  12   2
-cmpi      32  .     pcdi  0000110010111010  ..........  . . U    .   .   7
-cmpi      32  .     pcix  0000110010111011  ..........  . . U    .   .   9
-cmpm       8  .     ax7   1011111100001...  ..........  U U U   12  12   9
-cmpm       8  .     ay7   1011...100001111  ..........  U U U   12  12   9
-cmpm       8  .     axy7  1011111100001111  ..........  U U U   12  12   9
-cmpm       8  .     .     1011...100001...  ..........  U U U   12  12   9
-cmpm      16  .     .     1011...101001...  ..........  U U U   12  12   9
-cmpm      32  .     .     1011...110001...  ..........  U U U   20  20   9
-cpbcc     32  .     .     1111...01.......  ..........  . . U    .   .   4  unemulated
-cpdbcc    32  .     .     1111...001001...  ..........  . . U    .   .   4  unemulated
-cpgen     32  .     .     1111...000......  ..........  . . U    .   .   4  unemulated
-cpscc     32  .     .     1111...001......  ..........  . . U    .   .   4  unemulated
-cptrapcc  32  .     .     1111...001111...  ..........  . . U    .   .   4  unemulated
-dbt       16  .     .     0101000011001...  ..........  U U U   12  12   6
-dbf       16  .     .     0101000111001...  ..........  U U U   12  12   6
-dbcc      16  .     .     0101....11001...  ..........  U U U   12  12   6
-divs      16  .     d     1000...111000...  ..........  U U U  158 122  56
-divs      16  .     .     1000...111......  A+-DXWLdxI  U U U  158 122  56
-divu      16  .     d     1000...011000...  ..........  U U U  140 108  44
-divu      16  .     .     1000...011......  A+-DXWLdxI  U U U  140 108  44
-divl      32  .     d     0100110001000...  ..........  . . U    .   .  84
-divl      32  .     .     0100110001......  A+-DXWLdxI  . . U    .   .  84
-eor        8  .     d     1011...100000...  ..........  U U U    4   4   2
-eor        8  .     .     1011...100......  A+-DXWL...  U U U    8   8   4
-eor       16  .     d     1011...101000...  ..........  U U U    4   4   2
-eor       16  .     .     1011...101......  A+-DXWL...  U U U    8   8   4
-eor       32  .     d     1011...110000...  ..........  U U U    8   6   2
-eor       32  .     .     1011...110......  A+-DXWL...  U U U   12  12   4
-eori       8  toc   .     0000101000111100  ..........  U U U   20  16  12
-eori      16  tos   .     0000101001111100  ..........  S S S   20  16  12
-eori       8  .     d     0000101000000...  ..........  U U U    8   8   2
-eori       8  .     .     0000101000......  A+-DXWL...  U U U   12  12   4
-eori      16  .     d     0000101001000...  ..........  U U U    8   8   2
-eori      16  .     .     0000101001......  A+-DXWL...  U U U   12  12   4
-eori      32  .     d     0000101010000...  ..........  U U U   16  14   2
-eori      32  .     .     0000101010......  A+-DXWL...  U U U   20  20   4
-exg       32  dd    .     1100...101000...  ..........  U U U    6   6   2
-exg       32  aa    .     1100...101001...  ..........  U U U    6   6   2
-exg       32  da    .     1100...110001...  ..........  U U U    6   6   2
-ext       16  .     .     0100100010000...  ..........  U U U    4   4   4
-ext       32  .     .     0100100011000...  ..........  U U U    4   4   4
-extb      32  .     .     0100100111000...  ..........  . . U    .   .   4
-illegal    0  .     .     0100101011111100  ..........  U U U    4   4   4
-jmp       32  .     .     0100111011......  A..DXWLdx.  U U U    4   4   0
-jsr       32  .     .     0100111010......  A..DXWLdx.  U U U   12  12   0
-lea       32  .     .     0100...111......  A..DXWLdx.  U U U    0   0   2
-link      16  .     a7    0100111001010111  ..........  U U U   16  16   5
-link      16  .     .     0100111001010...  ..........  U U U   16  16   5
-link      32  .     a7    0100100000001111  ..........  . . U    .   .   6
-link      32  .     .     0100100000001...  ..........  . . U    .   .   6
-lsr        8  s     .     1110...000001...  ..........  U U U    6   6   4
-lsr       16  s     .     1110...001001...  ..........  U U U    6   6   4
-lsr       32  s     .     1110...010001...  ..........  U U U    8   8   4
-lsr        8  r     .     1110...000101...  ..........  U U U    6   6   6
-lsr       16  r     .     1110...001101...  ..........  U U U    6   6   6
-lsr       32  r     .     1110...010101...  ..........  U U U    8   8   6
-lsr       16  .     .     1110001011......  A+-DXWL...  U U U    8   8   5
-lsl        8  s     .     1110...100001...  ..........  U U U    6   6   4
-lsl       16  s     .     1110...101001...  ..........  U U U    6   6   4
-lsl       32  s     .     1110...110001...  ..........  U U U    8   8   4
-lsl        8  r     .     1110...100101...  ..........  U U U    6   6   6
-lsl       16  r     .     1110...101101...  ..........  U U U    6   6   6
-lsl       32  r     .     1110...110101...  ..........  U U U    8   8   6
-lsl       16  .     .     1110001111......  A+-DXWL...  U U U    8   8   5
-move       8  d     d     0001...000000...  ..........  U U U    4   4   2
-move       8  d     .     0001...000......  A+-DXWLdxI  U U U    4   4   2
-move       8  ai    d     0001...010000...  ..........  U U U    8   8   4
-move       8  ai    .     0001...010......  A+-DXWLdxI  U U U    8   8   4
-move       8  pi    d     0001...011000...  ..........  U U U    8   8   4
-move       8  pi    .     0001...011......  A+-DXWLdxI  U U U    8   8   4
-move       8  pi7   d     0001111011000...  ..........  U U U    8   8   4
-move       8  pi7   .     0001111011......  A+-DXWLdxI  U U U    8   8   4
-move       8  pd    d     0001...100000...  ..........  U U U    8   8   5
-move       8  pd    .     0001...100......  A+-DXWLdxI  U U U    8   8   5
-move       8  pd7   d     0001111100000...  ..........  U U U    8   8   5
-move       8  pd7   .     0001111100......  A+-DXWLdxI  U U U    8   8   5
-move       8  di    d     0001...101000...  ..........  U U U   12  12   5
-move       8  di    .     0001...101......  A+-DXWLdxI  U U U   12  12   5
-move       8  ix    d     0001...110000...  ..........  U U U   14  14   7
-move       8  ix    .     0001...110......  A+-DXWLdxI  U U U   14  14   7
-move       8  aw    d     0001000111000...  ..........  U U U   12  12   4
-move       8  aw    .     0001000111......  A+-DXWLdxI  U U U   12  12   4
-move       8  al    d     0001001111000...  ..........  U U U   16  16   6
-move       8  al    .     0001001111......  A+-DXWLdxI  U U U   16  16   6
-move      16  d     d     0011...000000...  ..........  U U U    4   4   2
-move      16  d     a     0011...000001...  ..........  U U U    4   4   2
-move      16  d     .     0011...000......  A+-DXWLdxI  U U U    4   4   2
-move      16  ai    d     0011...010000...  ..........  U U U    8   8   4
-move      16  ai    a     0011...010001...  ..........  U U U    8   8   4
-move      16  ai    .     0011...010......  A+-DXWLdxI  U U U    8   8   4
-move      16  pi    d     0011...011000...  ..........  U U U    8   8   4
-move      16  pi    a     0011...011001...  ..........  U U U    8   8   4
-move      16  pi    .     0011...011......  A+-DXWLdxI  U U U    8   8   4
-move      16  pd    d     0011...100000...  ..........  U U U    8   8   5
-move      16  pd    a     0011...100001...  ..........  U U U    8   8   5
-move      16  pd    .     0011...100......  A+-DXWLdxI  U U U    8   8   5
-move      16  di    d     0011...101000...  ..........  U U U   12  12   5
-move      16  di    a     0011...101001...  ..........  U U U   12  12   5
-move      16  di    .     0011...101......  A+-DXWLdxI  U U U   12  12   5
-move      16  ix    d     0011...110000...  ..........  U U U   14  14   7
-move      16  ix    a     0011...110001...  ..........  U U U   14  14   7
-move      16  ix    .     0011...110......  A+-DXWLdxI  U U U   14  14   7
-move      16  aw    d     0011000111000...  ..........  U U U   12  12   4
-move      16  aw    a     0011000111001...  ..........  U U U   12  12   4
-move      16  aw    .     0011000111......  A+-DXWLdxI  U U U   12  12   4
-move      16  al    d     0011001111000...  ..........  U U U   16  16   6
-move      16  al    a     0011001111001...  ..........  U U U   16  16   6
-move      16  al    .     0011001111......  A+-DXWLdxI  U U U   16  16   6
-move      32  d     d     0010...000000...  ..........  U U U    4   4   2
-move      32  d     a     0010...000001...  ..........  U U U    4   4   2
-move      32  d     .     0010...000......  A+-DXWLdxI  U U U    4   4   2
-move      32  ai    d     0010...010000...  ..........  U U U   12  12   4
-move      32  ai    a     0010...010001...  ..........  U U U   12  12   4
-move      32  ai    .     0010...010......  A+-DXWLdxI  U U U   12  12   4
-move      32  pi    d     0010...011000...  ..........  U U U   12  12   4
-move      32  pi    a     0010...011001...  ..........  U U U   12  12   4
-move      32  pi    .     0010...011......  A+-DXWLdxI  U U U   12  12   4
-move      32  pd    d     0010...100000...  ..........  U U U   12  14   5
-move      32  pd    a     0010...100001...  ..........  U U U   12  14   5
-move      32  pd    .     0010...100......  A+-DXWLdxI  U U U   12  14   5
-move      32  di    d     0010...101000...  ..........  U U U   16  16   5
-move      32  di    a     0010...101001...  ..........  U U U   16  16   5
-move      32  di    .     0010...101......  A+-DXWLdxI  U U U   16  16   5
-move      32  ix    d     0010...110000...  ..........  U U U   18  18   7
-move      32  ix    a     0010...110001...  ..........  U U U   18  18   7
-move      32  ix    .     0010...110......  A+-DXWLdxI  U U U   18  18   7
-move      32  aw    d     0010000111000...  ..........  U U U   16  16   4
-move      32  aw    a     0010000111001...  ..........  U U U   16  16   4
-move      32  aw    .     0010000111......  A+-DXWLdxI  U U U   16  16   4
-move      32  al    d     0010001111000...  ..........  U U U   20  20   6
-move      32  al    a     0010001111001...  ..........  U U U   20  20   6
-move      32  al    .     0010001111......  A+-DXWLdxI  U U U   20  20   6
-movea     16  .     d     0011...001000...  ..........  U U U    4   4   2
-movea     16  .     a     0011...001001...  ..........  U U U    4   4   2
-movea     16  .     .     0011...001......  A+-DXWLdxI  U U U    4   4   2
-movea     32  .     d     0010...001000...  ..........  U U U    4   4   2
-movea     32  .     a     0010...001001...  ..........  U U U    4   4   2
-movea     32  .     .     0010...001......  A+-DXWLdxI  U U U    4   4   2
-move      16  frc   d     0100001011000...  ..........  . U U    .   4   4
-move      16  frc   .     0100001011......  A+-DXWL...  . U U    .   8   4
-move      16  toc   d     0100010011000...  ..........  U U U   12  12   4
-move      16  toc   .     0100010011......  A+-DXWLdxI  U U U   12  12   4
-move      16  frs   d     0100000011000...  ..........  U S S    6   4   8 U only for 000
-move      16  frs   .     0100000011......  A+-DXWL...  U S S    8   8   8 U only for 000
-move      16  tos   d     0100011011000...  ..........  S S S   12  12   8
-move      16  tos   .     0100011011......  A+-DXWLdxI  S S S   12  12   8
-move      32  fru   .     0100111001101...  ..........  S S S    4   6   2
-move      32  tou   .     0100111001100...  ..........  S S S    4   6   2
-movec     32  cr    .     0100111001111010  ..........  . S S    .  12   6
-movec     32  rc    .     0100111001111011  ..........  . S S    .  10  12
-movem     16  re    pd    0100100010100...  ..........  U U U    8   8   4
-movem     16  re    .     0100100010......  A..DXWL...  U U U    4   4   4  cycles for hypothetical d addressing mode (020 unverified)
-movem     32  re    pd    0100100011100...  ..........  U U U    8   8   4
-movem     32  re    .     0100100011......  A..DXWL...  U U U    0   0   4  cycles for hypothetical d addressing mode (020 unverified)
-movem     16  er    pi    0100110010011...  ..........  U U U   12  12   8
-movem     16  er    pcdi  0100110010111010  ..........  U U U   16  16   9
-movem     16  er    pcix  0100110010111011  ..........  U U U   18  18  11
-movem     16  er    .     0100110010......  A..DXWL...  U U U    8   8   8  cycles for hypothetical d addressing mode (020 unverified)
-movem     32  er    pi    0100110011011...  ..........  U U U   12  12   8
-movem     32  er    pcdi  0100110011111010  ..........  U U U   16  16   9
-movem     32  er    pcix  0100110011111011  ..........  U U U   18  18  11
-movem     32  er    .     0100110011......  A..DXWL...  U U U    4   4   8  cycles for hypothetical d addressing mode (020 unverified)
-movep     16  er    .     0000...100001...  ..........  U U U   16  16  12
-movep     32  er    .     0000...101001...  ..........  U U U   24  24  18
-movep     16  re    .     0000...110001...  ..........  U U U   16  16  11
-movep     32  re    .     0000...111001...  ..........  U U U   24  24  17
-moveq     32  .     .     0111...0........  ..........  U U U    4   4   2
-moves      8  .     .     0000111000......  A+-DXWL...  . S S    .  14   5
-moves     16  .     .     0000111001......  A+-DXWL...  . S S    .  14   5
-moves     32  .     .     0000111010......  A+-DXWL...  . S S    .  16   5
-muls      16  .     d     1100...111000...  ..........  U U U   54  32  27
-muls      16  .     .     1100...111......  A+-DXWLdxI  U U U   54  32  27
-mulu      16  .     d     1100...011000...  ..........  U U U   54  30  27
-mulu      16  .     .     1100...011......  A+-DXWLdxI  U U U   54  30  27
-mull      32  .     d     0100110000000...  ..........  . . U    .   .  43
-mull      32  .     .     0100110000......  A+-DXWLdxI  . . U    .   .  43
-nbcd       8  .     d     0100100000000...  ..........  U U U    6   6   6
-nbcd       8  .     .     0100100000......  A+-DXWL...  U U U    8   8   6
-neg        8  .     d     0100010000000...  ..........  U U U    4   4   2
-neg        8  .     .     0100010000......  A+-DXWL...  U U U    8   8   4
-neg       16  .     d     0100010001000...  ..........  U U U    4   4   2
-neg       16  .     .     0100010001......  A+-DXWL...  U U U    8   8   4
-neg       32  .     d     0100010010000...  ..........  U U U    6   6   2
-neg       32  .     .     0100010010......  A+-DXWL...  U U U   12  12   4
-negx       8  .     d     0100000000000...  ..........  U U U    4   4   2
-negx       8  .     .     0100000000......  A+-DXWL...  U U U    8   8   4
-negx      16  .     d     0100000001000...  ..........  U U U    4   4   2
-negx      16  .     .     0100000001......  A+-DXWL...  U U U    8   8   4
-negx      32  .     d     0100000010000...  ..........  U U U    6   6   2
-negx      32  .     .     0100000010......  A+-DXWL...  U U U   12  12   4
-nop        0  .     .     0100111001110001  ..........  U U U    4   4   2
-not        8  .     d     0100011000000...  ..........  U U U    4   4   2
-not        8  .     .     0100011000......  A+-DXWL...  U U U    8   8   4
-not       16  .     d     0100011001000...  ..........  U U U    4   4   2
-not       16  .     .     0100011001......  A+-DXWL...  U U U    8   8   4
-not       32  .     d     0100011010000...  ..........  U U U    6   6   2
-not       32  .     .     0100011010......  A+-DXWL...  U U U   12  12   4
-or         8  er    d     1000...000000...  ..........  U U U    4   4   2
-or         8  er    .     1000...000......  A+-DXWLdxI  U U U    4   4   2
-or        16  er    d     1000...001000...  ..........  U U U    4   4   2
-or        16  er    .     1000...001......  A+-DXWLdxI  U U U    4   4   2
-or        32  er    d     1000...010000...  ..........  U U U    6   6   2
-or        32  er    .     1000...010......  A+-DXWLdxI  U U U    6   6   2
-or         8  re    .     1000...100......  A+-DXWL...  U U U    8   8   4
-or        16  re    .     1000...101......  A+-DXWL...  U U U    8   8   4
-or        32  re    .     1000...110......  A+-DXWL...  U U U   12  12   4
-ori        8  toc   .     0000000000111100  ..........  U U U   20  16  12
-ori       16  tos   .     0000000001111100  ..........  S S S   20  16  12
-ori        8  .     d     0000000000000...  ..........  U U U    8   8   2
-ori        8  .     .     0000000000......  A+-DXWL...  U U U   12  12   4
-ori       16  .     d     0000000001000...  ..........  U U U    8   8   2
-ori       16  .     .     0000000001......  A+-DXWL...  U U U   12  12   4
-ori       32  .     d     0000000010000...  ..........  U U U   16  14   2
-ori       32  .     .     0000000010......  A+-DXWL...  U U U   20  20   4
-pack      16  rr    .     1000...101000...  ..........  . . U    .   .   6
-pack      16  mm    ax7   1000111101001...  ..........  . . U    .   .  13
-pack      16  mm    ay7   1000...101001111  ..........  . . U    .   .  13
-pack      16  mm    axy7  1000111101001111  ..........  . . U    .   .  13
-pack      16  mm    .     1000...101001...  ..........  . . U    .   .  13
-pea       32  .     .     0100100001......  A..DXWLdx.  U U U    6   6   5
-reset      0  .     .     0100111001110000  ..........  S S S    0   0   0
-ror        8  s     .     1110...000011...  ..........  U U U    6   6   8
-ror       16  s     .     1110...001011...  ..........  U U U    6   6   8
-ror       32  s     .     1110...010011...  ..........  U U U    8   8   8
-ror        8  r     .     1110...000111...  ..........  U U U    6   6   8
-ror       16  r     .     1110...001111...  ..........  U U U    6   6   8
-ror       32  r     .     1110...010111...  ..........  U U U    8   8   8
-ror       16  .     .     1110011011......  A+-DXWL...  U U U    8   8   7
-rol        8  s     .     1110...100011...  ..........  U U U    6   6   8
-rol       16  s     .     1110...101011...  ..........  U U U    6   6   8
-rol       32  s     .     1110...110011...  ..........  U U U    8   8   8
-rol        8  r     .     1110...100111...  ..........  U U U    6   6   8
-rol       16  r     .     1110...101111...  ..........  U U U    6   6   8
-rol       32  r     .     1110...110111...  ..........  U U U    8   8   8
-rol       16  .     .     1110011111......  A+-DXWL...  U U U    8   8   7
-roxr       8  s     .     1110...000010...  ..........  U U U    6   6  12
-roxr      16  s     .     1110...001010...  ..........  U U U    6   6  12
-roxr      32  s     .     1110...010010...  ..........  U U U    8   8  12
-roxr       8  r     .     1110...000110...  ..........  U U U    6   6  12
-roxr      16  r     .     1110...001110...  ..........  U U U    6   6  12
-roxr      32  r     .     1110...010110...  ..........  U U U    8   8  12
-roxr      16  .     .     1110010011......  A+-DXWL...  U U U    8   8   5
-roxl       8  s     .     1110...100010...  ..........  U U U    6   6  12
-roxl      16  s     .     1110...101010...  ..........  U U U    6   6  12
-roxl      32  s     .     1110...110010...  ..........  U U U    8   8  12
-roxl       8  r     .     1110...100110...  ..........  U U U    6   6  12
-roxl      16  r     .     1110...101110...  ..........  U U U    6   6  12
-roxl      32  r     .     1110...110110...  ..........  U U U    8   8  12
-roxl      16  .     .     1110010111......  A+-DXWL...  U U U    8   8   5
-rtd       32  .     .     0100111001110100  ..........  . U U    .  16  10
-rte       32  .     .     0100111001110011  ..........  S S S   20  24  20  bus fault not emulated
-rtm       32  .     .     000001101100....  ..........  . . U    .   .  19  not properly emulated
-rtr       32  .     .     0100111001110111  ..........  U U U   20  20  14
-rts       32  .     .     0100111001110101  ..........  U U U   16  16  10
-sbcd       8  rr    .     1000...100000...  ..........  U U U    6   6   4
-sbcd       8  mm    ax7   1000111100001...  ..........  U U U   18  18  16
-sbcd       8  mm    ay7   1000...100001111  ..........  U U U   18  18  16
-sbcd       8  mm    axy7  1000111100001111  ..........  U U U   18  18  16
-sbcd       8  mm    .     1000...100001...  ..........  U U U   18  18  16
-st         8  .     d     0101000011000...  ..........  U U U    6   4   4
-st         8  .     .     0101000011......  A+-DXWL...  U U U    8   8   6
-sf         8  .     d     0101000111000...  ..........  U U U    4   4   4
-sf         8  .     .     0101000111......  A+-DXWL...  U U U    8   8   6
-scc        8  .     d     0101....11000...  ..........  U U U    4   4   4
-scc        8  .     .     0101....11......  A+-DXWL...  U U U    8   8   6
-stop       0  .     .     0100111001110010  ..........  S S S    4   4   8
-sub        8  er    d     1001...000000...  ..........  U U U    4   4   2
-sub        8  er    .     1001...000......  A+-DXWLdxI  U U U    4   4   2
-sub       16  er    d     1001...001000...  ..........  U U U    4   4   2
-sub       16  er    a     1001...001001...  ..........  U U U    4   4   2
-sub       16  er    .     1001...001......  A+-DXWLdxI  U U U    4   4   2
-sub       32  er    d     1001...010000...  ..........  U U U    6   6   2
-sub       32  er    a     1001...010001...  ..........  U U U    6   6   2
-sub       32  er    .     1001...010......  A+-DXWLdxI  U U U    6   6   2
-sub        8  re    .     1001...100......  A+-DXWL...  U U U    8   8   4
-sub       16  re    .     1001...101......  A+-DXWL...  U U U    8   8   4
-sub       32  re    .     1001...110......  A+-DXWL...  U U U   12  12   4
-suba      16  .     d     1001...011000...  ..........  U U U    8   8   2
-suba      16  .     a     1001...011001...  ..........  U U U    8   8   2
-suba      16  .     .     1001...011......  A+-DXWLdxI  U U U    8   8   2
-suba      32  .     d     1001...111000...  ..........  U U U    6   6   2
-suba      32  .     a     1001...111001...  ..........  U U U    6   6   2
-suba      32  .     .     1001...111......  A+-DXWLdxI  U U U    6   6   2
-subi       8  .     d     0000010000000...  ..........  U U U    8   8   2
-subi       8  .     .     0000010000......  A+-DXWL...  U U U   12  12   4
-subi      16  .     d     0000010001000...  ..........  U U U    8   8   2
-subi      16  .     .     0000010001......  A+-DXWL...  U U U   12  12   4
-subi      32  .     d     0000010010000...  ..........  U U U   16  14   2
-subi      32  .     .     0000010010......  A+-DXWL...  U U U   20  20   4
-subq       8  .     d     0101...100000...  ..........  U U U    4   4   2
-subq       8  .     .     0101...100......  A+-DXWL...  U U U    8   8   4
-subq      16  .     d     0101...101000...  ..........  U U U    4   4   2
-subq      16  .     a     0101...101001...  ..........  U U U    8   4   2
-subq      16  .     .     0101...101......  A+-DXWL...  U U U    8   8   4
-subq      32  .     d     0101...110000...  ..........  U U U    8   8   2
-subq      32  .     a     0101...110001...  ..........  U U U    8   8   2
-subq      32  .     .     0101...110......  A+-DXWL...  U U U   12  12   4
-subx       8  rr    .     1001...100000...  ..........  U U U    4   4   2
-subx      16  rr    .     1001...101000...  ..........  U U U    4   4   2
-subx      32  rr    .     1001...110000...  ..........  U U U    8   6   2
-subx       8  mm    ax7   1001111100001...  ..........  U U U   18  18  12
-subx       8  mm    ay7   1001...100001111  ..........  U U U   18  18  12
-subx       8  mm    axy7  1001111100001111  ..........  U U U   18  18  12
-subx       8  mm    .     1001...100001...  ..........  U U U   18  18  12
-subx      16  mm    .     1001...101001...  ..........  U U U   18  18  12
-subx      32  mm    .     1001...110001...  ..........  U U U   30  30  12
-swap      32  .     .     0100100001000...  ..........  U U U    4   4   4
-tas        8  .     d     0100101011000...  ..........  U U U    4   4   4
-tas        8  .     .     0100101011......  A+-DXWL...  U U U   14  14  12
-trap       0  .     .     010011100100....  ..........  U U U    4   4   4
-trapt      0  .     .     0101000011111100  ..........  . . U    .   .   4
-trapt     16  .     .     0101000011111010  ..........  . . U    .   .   6
-trapt     32  .     .     0101000011111011  ..........  . . U    .   .   8
-trapf      0  .     .     0101000111111100  ..........  . . U    .   .   4
-trapf     16  .     .     0101000111111010  ..........  . . U    .   .   6
-trapf     32  .     .     0101000111111011  ..........  . . U    .   .   8
-trapcc     0  .     .     0101....11111100  ..........  . . U    .   .   4
-trapcc    16  .     .     0101....11111010  ..........  . . U    .   .   6
-trapcc    32  .     .     0101....11111011  ..........  . . U    .   .   8
-trapv      0  .     .     0100111001110110  ..........  U U U    4   4   4
-tst        8  .     d     0100101000000...  ..........  U U U    4   4   2
-tst        8  .     .     0100101000......  A+-DXWL...  U U U    4   4   2
-tst        8  .     pcdi  0100101000111010  ..........  . . U    .   .   7
-tst        8  .     pcix  0100101000111011  ..........  . . U    .   .   9
-tst        8  .     i     0100101000111100  ..........  . . U    .   .   6
-tst       16  .     d     0100101001000...  ..........  U U U    4   4   2
-tst       16  .     a     0100101001001...  ..........  . . U    .   .   2
-tst       16  .     .     0100101001......  A+-DXWL...  U U U    4   4   2
-tst       16  .     pcdi  0100101001111010  ..........  . . U    .   .   7
-tst       16  .     pcix  0100101001111011  ..........  . . U    .   .   9
-tst       16  .     i     0100101001111100  ..........  . . U    .   .   6
-tst       32  .     d     0100101010000...  ..........  U U U    4   4   2
-tst       32  .     a     0100101010001...  ..........  . . U    .   .   2
-tst       32  .     .     0100101010......  A+-DXWL...  U U U    4   4   2
-tst       32  .     pcdi  0100101010111010  ..........  . . U    .   .   7
-tst       32  .     pcix  0100101010111011  ..........  . . U    .   .   9
-tst       32  .     i     0100101010111100  ..........  . . U    .   .   6
-unlk      32  .     a7    0100111001011111  ..........  U U U   12  12   6
-unlk      32  .     .     0100111001011...  ..........  U U U   12  12   6
-unpk      16  rr    .     1000...110000...  ..........  . . U    .   .   8
-unpk      16  mm    ax7   1000111110001...  ..........  . . U    .   .  13
-unpk      16  mm    ay7   1000...110001111  ..........  . . U    .   .  13
-unpk      16  mm    axy7  1000111110001111  ..........  . . U    .   .  13
-unpk      16  mm    .     1000...110001...  ..........  . . U    .   .  13
+1010       0  .     .     1010............  ..........  U U U U U   4   4   4   4   4
+1111       0  .     .     1111............  ..........  U U U U U   4   4   4   4   4
+040fpu0   32  .     .     11110010........  ..........  . . . . U   .   .   .   .   0
+040fpu1   32  .     .     11110011........  ..........  . . . . U   .   .   .   .   0
+abcd       8  rr    .     1100...100000...  ..........  U U U U U   6   6   4   4   4
+abcd       8  mm    ax7   1100111100001...  ..........  U U U U U  18  18  16  16  16
+abcd       8  mm    ay7   1100...100001111  ..........  U U U U U  18  18  16  16  16
+abcd       8  mm    axy7  1100111100001111  ..........  U U U U U  18  18  16  16  16
+abcd       8  mm    .     1100...100001...  ..........  U U U U U  18  18  16  16  16
+add        8  er    d     1101...000000...  ..........  U U U U U   4   4   2   2   2
+add        8  er    .     1101...000......  A+-DXWLdxI  U U U U U   4   4   2   2   2
+add       16  er    d     1101...001000...  ..........  U U U U U   4   4   2   2   2
+add       16  er    a     1101...001001...  ..........  U U U U U   4   4   2   2   2
+add       16  er    .     1101...001......  A+-DXWLdxI  U U U U U   4   4   2   2   2
+add       32  er    d     1101...010000...  ..........  U U U U U   6   6   2   2   2
+add       32  er    a     1101...010001...  ..........  U U U U U   6   6   2   2   2
+add       32  er    .     1101...010......  A+-DXWLdxI  U U U U U   6   6   2   2   2
+add        8  re    .     1101...100......  A+-DXWL...  U U U U U   8   8   4   4   4
+add       16  re    .     1101...101......  A+-DXWL...  U U U U U   8   8   4   4   4
+add       32  re    .     1101...110......  A+-DXWL...  U U U U U  12  12   4   4   4
+adda      16  .     d     1101...011000...  ..........  U U U U U   8   8   2   2   2
+adda      16  .     a     1101...011001...  ..........  U U U U U   8   8   2   2   2
+adda      16  .     .     1101...011......  A+-DXWLdxI  U U U U U   8   8   2   2   2
+adda      32  .     d     1101...111000...  ..........  U U U U U   6   6   2   2   2
+adda      32  .     a     1101...111001...  ..........  U U U U U   6   6   2   2   2
+adda      32  .     .     1101...111......  A+-DXWLdxI  U U U U U   6   6   2   2   2
+addi       8  .     d     0000011000000...  ..........  U U U U U   8   8   2   2   2
+addi       8  .     .     0000011000......  A+-DXWL...  U U U U U  12  12   4   4   4
+addi      16  .     d     0000011001000...  ..........  U U U U U   8   8   2   2   2
+addi      16  .     .     0000011001......  A+-DXWL...  U U U U U  12  12   4   4   4
+addi      32  .     d     0000011010000...  ..........  U U U U U  16  14   2   2   2
+addi      32  .     .     0000011010......  A+-DXWL...  U U U U U  20  20   4   4   4
+addq       8  .     d     0101...000000...  ..........  U U U U U   4   4   2   2   2
+addq       8  .     .     0101...000......  A+-DXWL...  U U U U U   8   8   4   4   4
+addq      16  .     d     0101...001000...  ..........  U U U U U   4   4   2   2   2
+addq      16  .     a     0101...001001...  ..........  U U U U U   4   4   2   2   2
+addq      16  .     .     0101...001......  A+-DXWL...  U U U U U   8   8   4   4   4
+addq      32  .     d     0101...010000...  ..........  U U U U U   8   8   2   2   2
+addq      32  .     a     0101...010001...  ..........  U U U U U   8   8   2   2   2
+addq      32  .     .     0101...010......  A+-DXWL...  U U U U U  12  12   4   4   4
+addx       8  rr    .     1101...100000...  ..........  U U U U U   4   4   2   2   2
+addx      16  rr    .     1101...101000...  ..........  U U U U U   4   4   2   2   2
+addx      32  rr    .     1101...110000...  ..........  U U U U U   8   6   2   2   2
+addx       8  mm    ax7   1101111100001...  ..........  U U U U U  18  18  12  12  12
+addx       8  mm    ay7   1101...100001111  ..........  U U U U U  18  18  12  12  12
+addx       8  mm    axy7  1101111100001111  ..........  U U U U U  18  18  12  12  12
+addx       8  mm    .     1101...100001...  ..........  U U U U U  18  18  12  12  12
+addx      16  mm    .     1101...101001...  ..........  U U U U U  18  18  12  12  12
+addx      32  mm    .     1101...110001...  ..........  U U U U U  30  30  12  12  12
+and        8  er    d     1100...000000...  ..........  U U U U U   4   4   2   2   2
+and        8  er    .     1100...000......  A+-DXWLdxI  U U U U U   4   4   2   2   2
+and       16  er    d     1100...001000...  ..........  U U U U U   4   4   2   2   2
+and       16  er    .     1100...001......  A+-DXWLdxI  U U U U U   4   4   2   2   2
+and       32  er    d     1100...010000...  ..........  U U U U U   6   6   2   2   2
+and       32  er    .     1100...010......  A+-DXWLdxI  U U U U U   6   6   2   2   2
+and        8  re    .     1100...100......  A+-DXWL...  U U U U U   8   8   4   4   4
+and       16  re    .     1100...101......  A+-DXWL...  U U U U U   8   8   4   4   4
+and       32  re    .     1100...110......  A+-DXWL...  U U U U U  12  12   4   4   4
+andi      16  toc   .     0000001000111100  ..........  U U U U U  20  16  12  12  12
+andi      16  tos   .     0000001001111100  ..........  S S S S S  20  16  12  12  12
+andi       8  .     d     0000001000000...  ..........  U U U U U   8   8   2   2   2
+andi       8  .     .     0000001000......  A+-DXWL...  U U U U U  12  12   4   4   4
+andi      16  .     d     0000001001000...  ..........  U U U U U   8   8   2   2   2
+andi      16  .     .     0000001001......  A+-DXWL...  U U U U U  12  12   4   4   4
+andi      32  .     d     0000001010000...  ..........  U U U U U  14  14   2   2   2
+andi      32  .     .     0000001010......  A+-DXWL...  U U U U U  20  20   4   4   4
+asr        8  s     .     1110...000000...  ..........  U U U U U   6   6   6   6   6
+asr       16  s     .     1110...001000...  ..........  U U U U U   6   6   6   6   6
+asr       32  s     .     1110...010000...  ..........  U U U U U   8   8   6   6   6
+asr        8  r     .     1110...000100...  ..........  U U U U U   6   6   6   6   6
+asr       16  r     .     1110...001100...  ..........  U U U U U   6   6   6   6   6
+asr       32  r     .     1110...010100...  ..........  U U U U U   8   8   6   6   6
+asr       16  .     .     1110000011......  A+-DXWL...  U U U U U   8   8   5   5   5
+asl        8  s     .     1110...100000...  ..........  U U U U U   6   6   8   8   8
+asl       16  s     .     1110...101000...  ..........  U U U U U   6   6   8   8   8
+asl       32  s     .     1110...110000...  ..........  U U U U U   8   8   8   8   8
+asl        8  r     .     1110...100100...  ..........  U U U U U   6   6   8   8   8
+asl       16  r     .     1110...101100...  ..........  U U U U U   6   6   8   8   8
+asl       32  r     .     1110...110100...  ..........  U U U U U   8   8   8   8   8
+asl       16  .     .     1110000111......  A+-DXWL...  U U U U U   8   8   6   6   6
+bcc        8  .     .     0110............  ..........  U U U U U  10  10   6   6   6
+bcc       16  .     .     0110....00000000  ..........  U U U U U  10  10   6   6   6
+bcc       32  .     .     0110....11111111  ..........  U U U U U  10  10   6   6   6
+bchg       8  r     .     0000...101......  A+-DXWL...  U U U U U   8   8   4   4   4
+bchg      32  r     d     0000...101000...  ..........  U U U U U   8   8   4   4   4
+bchg       8  s     .     0000100001......  A+-DXWL...  U U U U U  12  12   4   4   4
+bchg      32  s     d     0000100001000...  ..........  U U U U U  12  12   4   4   4
+bclr       8  r     .     0000...110......  A+-DXWL...  U U U U U   8  10   4   4   4
+bclr      32  r     d     0000...110000...  ..........  U U U U U  10  10   4   4   4
+bclr       8  s     .     0000100010......  A+-DXWL...  U U U U U  12  12   4   4   4
+bclr      32  s     d     0000100010000...  ..........  U U U U U  14  14   4   4   4
+bfchg     32  .     d     1110101011000...  ..........  . . U U U   .   .  12  12  12  timing not quite correct
+bfchg     32  .     .     1110101011......  A..DXWL...  . . U U U   .   .  20  20  20
+bfclr     32  .     d     1110110011000...  ..........  . . U U U   .   .  12  12  12
+bfclr     32  .     .     1110110011......  A..DXWL...  . . U U U   .   .  20  20  20
+bfexts    32  .     d     1110101111000...  ..........  . . U U U   .   .   8   8   8
+bfexts    32  .     .     1110101111......  A..DXWLdx.  . . U U U   .   .  15  15  15
+bfextu    32  .     d     1110100111000...  ..........  . . U U U   .   .   8   8   8
+bfextu    32  .     .     1110100111......  A..DXWLdx.  . . U U U   .   .  15  15  15
+bfffo     32  .     d     1110110111000...  ..........  . . U U U   .   .  18  18  18
+bfffo     32  .     .     1110110111......  A..DXWLdx.  . . U U U   .   .  28  28  28
+bfins     32  .     d     1110111111000...  ..........  . . U U U   .   .  10  10  10
+bfins     32  .     .     1110111111......  A..DXWL...  . . U U U   .   .  17  17  17
+bfset     32  .     d     1110111011000...  ..........  . . U U U   .   .  12  12  12
+bfset     32  .     .     1110111011......  A..DXWL...  . . U U U   .   .  20  20  20
+bftst     32  .     d     1110100011000...  ..........  . . U U U   .   .   6   6   6
+bftst     32  .     .     1110100011......  A..DXWLdx.  . . U U U   .   .  13  13  13
+bkpt       0  .     .     0100100001001...  ..........  . U U U U   .  10  10  10  10
+bra        8  .     .     01100000........  ..........  U U U U U  10  10  10  10  10
+bra       16  .     .     0110000000000000  ..........  U U U U U  10  10  10  10  10
+bra       32  .     .     0110000011111111  ..........  U U U U U  10  10  10  10  10
+bset      32  r     d     0000...111000...  ..........  U U U U U   8   8   4   4   4
+bset       8  r     .     0000...111......  A+-DXWL...  U U U U U   8   8   4   4   4
+bset       8  s     .     0000100011......  A+-DXWL...  U U U U U  12  12   4   4   4
+bset      32  s     d     0000100011000...  ..........  U U U U U  12  12   4   4   4
+bsr        8  .     .     01100001........  ..........  U U U U U  18  18   7   7   7
+bsr       16  .     .     0110000100000000  ..........  U U U U U  18  18   7   7   7
+bsr       32  .     .     0110000111111111  ..........  U U U U U  18  18   7   7   7
+btst       8  r     .     0000...100......  A+-DXWLdxI  U U U U U   4   4   4   4   4
+btst      32  r     d     0000...100000...  ..........  U U U U U   6   6   4   4   4
+btst       8  s     .     0000100000......  A+-DXWLdx.  U U U U U   8   8   4   4   4
+btst      32  s     d     0000100000000...  ..........  U U U U U  10  10   4   4   4
+callm     32  .     .     0000011011......  A..DXWLdx.  . . U U U   .   .  60  60  60  not properly emulated
+cas        8  .     .     0000101011......  A+-DXWL...  . . U U U   .   .  12  12  12
+cas       16  .     .     0000110011......  A+-DXWL...  . . U U U   .   .  12  12  12
+cas       32  .     .     0000111011......  A+-DXWL...  . . U U U   .   .  12  12  12
+cas2      16  .     .     0000110011111100  ..........  . . U U U   .   .  12  12  12
+cas2      32  .     .     0000111011111100  ..........  . . U U U   .   .  12  12  12
+chk       16  .     d     0100...110000...  ..........  U U U U U  10   8   8   8   8
+chk       16  .     .     0100...110......  A+-DXWLdxI  U U U U U  10   8   8   8   8
+chk       32  .     d     0100...100000...  ..........  . . U U U   .   .   8   8   8
+chk       32  .     .     0100...100......  A+-DXWLdxI  . . U U U   .   .   8   8   8
+chk2cmp2   8  .     pcdi  0000000011111010  ..........  . . U U U   .   .  23  23  23
+chk2cmp2   8  .     pcix  0000000011111011  ..........  . . U U U   .   .  23  23  23
+chk2cmp2   8  .     .     0000000011......  A..DXWL...  . . U U U   .   .  18  18  18
+chk2cmp2  16  .     pcdi  0000001011111010  ..........  . . U U U   .   .  23  23  23
+chk2cmp2  16  .     pcix  0000001011111011  ..........  . . U U U   .   .  23  23  23
+chk2cmp2  16  .     .     0000001011......  A..DXWL...  . . U U U   .   .  18  18  18
+chk2cmp2  32  .     pcdi  0000010011111010  ..........  . . U U U   .   .  23  23  23
+chk2cmp2  32  .     pcix  0000010011111011  ..........  . . U U U   .   .  23  23  23
+chk2cmp2  32  .     .     0000010011......  A..DXWL...  . . U U U   .   .  18  18  18
+clr        8  .     d     0100001000000...  ..........  U U U U U   4   4   2   2   2
+clr        8  .     .     0100001000......  A+-DXWL...  U U U U U   8   4   4   4   4
+clr       16  .     d     0100001001000...  ..........  U U U U U   4   4   2   2   2
+clr       16  .     .     0100001001......  A+-DXWL...  U U U U U   8   4   4   4   4
+clr       32  .     d     0100001010000...  ..........  U U U U U   6   6   2   2   2
+clr       32  .     .     0100001010......  A+-DXWL...  U U U U U  12   6   4   4   4
+cmp        8  .     d     1011...000000...  ..........  U U U U U   4   4   2   2   2
+cmp        8  .     .     1011...000......  A+-DXWLdxI  U U U U U   4   4   2   2   2
+cmp       16  .     d     1011...001000...  ..........  U U U U U   4   4   2   2   2
+cmp       16  .     a     1011...001001...  ..........  U U U U U   4   4   2   2   2
+cmp       16  .     .     1011...001......  A+-DXWLdxI  U U U U U   4   4   2   2   2
+cmp       32  .     d     1011...010000...  ..........  U U U U U   6   6   2   2   2
+cmp       32  .     a     1011...010001...  ..........  U U U U U   6   6   2   2   2
+cmp       32  .     .     1011...010......  A+-DXWLdxI  U U U U U   6   6   2   2   2
+cmpa      16  .     d     1011...011000...  ..........  U U U U U   6   6   4   4   4
+cmpa      16  .     a     1011...011001...  ..........  U U U U U   6   6   4   4   4
+cmpa      16  .     .     1011...011......  A+-DXWLdxI  U U U U U   6   6   4   4   4
+cmpa      32  .     d     1011...111000...  ..........  U U U U U   6   6   4   4   4
+cmpa      32  .     a     1011...111001...  ..........  U U U U U   6   6   4   4   4
+cmpa      32  .     .     1011...111......  A+-DXWLdxI  U U U U U   6   6   4   4   4
+cmpi       8  .     d     0000110000000...  ..........  U U U U U   8   8   2   2   2
+cmpi       8  .     .     0000110000......  A+-DXWL...  U U U U U   8   8   2   2   2
+cmpi       8  .     pcdi  0000110000111010  ..........  . . U U U   .   .   7   7   7
+cmpi       8  .     pcix  0000110000111011  ..........  . . U U U   .   .   9   9   9
+cmpi      16  .     d     0000110001000...  ..........  U U U U U   8   8   2   2   2
+cmpi      16  .     .     0000110001......  A+-DXWL...  U U U U U   8   8   2   2   2
+cmpi      16  .     pcdi  0000110001111010  ..........  . . U U U   .   .   7   7   7
+cmpi      16  .     pcix  0000110001111011  ..........  . . U U U   .   .   9   9   9
+cmpi      32  .     d     0000110010000...  ..........  U U U U U  14  12   2   2   2
+cmpi      32  .     .     0000110010......  A+-DXWL...  U U U U U  12  12   2   2   2
+cmpi      32  .     pcdi  0000110010111010  ..........  . . U U U   .   .   7   7   7
+cmpi      32  .     pcix  0000110010111011  ..........  . . U U U   .   .   9   9   9
+cmpm       8  .     ax7   1011111100001...  ..........  U U U U U  12  12   9   9   9
+cmpm       8  .     ay7   1011...100001111  ..........  U U U U U  12  12   9   9   9
+cmpm       8  .     axy7  1011111100001111  ..........  U U U U U  12  12   9   9   9
+cmpm       8  .     .     1011...100001...  ..........  U U U U U  12  12   9   9   9
+cmpm      16  .     .     1011...101001...  ..........  U U U U U  12  12   9   9   9
+cmpm      32  .     .     1011...110001...  ..........  U U U U U  20  20   9   9   9
+cpbcc     32  .     .     1111...01.......  ..........  . . U U .   .   .   4   4   .  unemulated
+cpdbcc    32  .     .     1111...001001...  ..........  . . U U .   .   .   4   4   .  unemulated
+cpgen     32  .     .     1111...000......  ..........  . . U U .   .   .   4   4   .  unemulated
+cpscc     32  .     .     1111...001......  ..........  . . U U .   .   .   4   4   .  unemulated
+cptrapcc  32  .     .     1111...001111...  ..........  . . U U .   .   .   4   4   .  unemulated
+dbt       16  .     .     0101000011001...  ..........  U U U U U  12  12   6   6   6
+dbf       16  .     .     0101000111001...  ..........  U U U U U  12  12   6   6   6
+dbcc      16  .     .     0101....11001...  ..........  U U U U U  12  12   6   6   6
+divs      16  .     d     1000...111000...  ..........  U U U U U 158 122  56  56  56
+divs      16  .     .     1000...111......  A+-DXWLdxI  U U U U U 158 122  56  56  56
+divu      16  .     d     1000...011000...  ..........  U U U U U 140 108  44  44  44
+divu      16  .     .     1000...011......  A+-DXWLdxI  U U U U U 140 108  44  44  44
+divl      32  .     d     0100110001000...  ..........  . . U U U   .   .  84  84  84
+divl      32  .     .     0100110001......  A+-DXWLdxI  . . U U U   .   .  84  84  84
+eor        8  .     d     1011...100000...  ..........  U U U U U   4   4   2   2   2
+eor        8  .     .     1011...100......  A+-DXWL...  U U U U U   8   8   4   4   4
+eor       16  .     d     1011...101000...  ..........  U U U U U   4   4   2   2   2
+eor       16  .     .     1011...101......  A+-DXWL...  U U U U U   8   8   4   4   4
+eor       32  .     d     1011...110000...  ..........  U U U U U   8   6   2   2   2
+eor       32  .     .     1011...110......  A+-DXWL...  U U U U U  12  12   4   4   4
+eori      16  toc   .     0000101000111100  ..........  U U U U U  20  16  12  12  12
+eori      16  tos   .     0000101001111100  ..........  S S S S S  20  16  12  12  12
+eori       8  .     d     0000101000000...  ..........  U U U U U   8   8   2   2   2
+eori       8  .     .     0000101000......  A+-DXWL...  U U U U U  12  12   4   4   4
+eori      16  .     d     0000101001000...  ..........  U U U U U   8   8   2   2   2
+eori      16  .     .     0000101001......  A+-DXWL...  U U U U U  12  12   4   4   4
+eori      32  .     d     0000101010000...  ..........  U U U U U  16  14   2   2   2
+eori      32  .     .     0000101010......  A+-DXWL...  U U U U U  20  20   4   4   4
+exg       32  dd    .     1100...101000...  ..........  U U U U U   6   6   2   2   2
+exg       32  aa    .     1100...101001...  ..........  U U U U U   6   6   2   2   2
+exg       32  da    .     1100...110001...  ..........  U U U U U   6   6   2   2   2
+ext       16  .     .     0100100010000...  ..........  U U U U U   4   4   4   4   4
+ext       32  .     .     0100100011000...  ..........  U U U U U   4   4   4   4   4
+extb      32  .     .     0100100111000...  ..........  . . U U U   .   .   4   4   4
+illegal    0  .     .     0100101011111100  ..........  U U U U U   4   4   4   4   4
+jmp       32  .     .     0100111011......  A..DXWLdx.  U U U U U   4   4   0   0   0
+jsr       32  .     .     0100111010......  A..DXWLdx.  U U U U U  12  12   0   0   0
+lea       32  .     .     0100...111......  A..DXWLdx.  U U U U U   0   0   2   2   2
+link      16  .     a7    0100111001010111  ..........  U U U U U  16  16   5   5   5
+link      16  .     .     0100111001010...  ..........  U U U U U  16  16   5   5   5
+link      32  .     a7    0100100000001111  ..........  . . U U U   .   .   6   6   6
+link      32  .     .     0100100000001...  ..........  . . U U U   .   .   6   6   6
+lsr        8  s     .     1110...000001...  ..........  U U U U U   6   6   4   4   4
+lsr       16  s     .     1110...001001...  ..........  U U U U U   6   6   4   4   4
+lsr       32  s     .     1110...010001...  ..........  U U U U U   8   8   4   4   4
+lsr        8  r     .     1110...000101...  ..........  U U U U U   6   6   6   6   6
+lsr       16  r     .     1110...001101...  ..........  U U U U U   6   6   6   6   6
+lsr       32  r     .     1110...010101...  ..........  U U U U U   8   8   6   6   6
+lsr       16  .     .     1110001011......  A+-DXWL...  U U U U U   8   8   5   5   5
+lsl        8  s     .     1110...100001...  ..........  U U U U U   6   6   4   4   4
+lsl       16  s     .     1110...101001...  ..........  U U U U U   6   6   4   4   4
+lsl       32  s     .     1110...110001...  ..........  U U U U U   8   8   4   4   4
+lsl        8  r     .     1110...100101...  ..........  U U U U U   6   6   6   6   6
+lsl       16  r     .     1110...101101...  ..........  U U U U U   6   6   6   6   6
+lsl       32  r     .     1110...110101...  ..........  U U U U U   8   8   6   6   6
+lsl       16  .     .     1110001111......  A+-DXWL...  U U U U U   8   8   5   5   5
+move       8  d     d     0001...000000...  ..........  U U U U U   4   4   2   2   2
+move       8  d     .     0001...000......  A+-DXWLdxI  U U U U U   4   4   2   2   2
+move       8  ai    d     0001...010000...  ..........  U U U U U   8   8   4   4   4
+move       8  ai    .     0001...010......  A+-DXWLdxI  U U U U U   8   8   4   4   4
+move       8  pi    d     0001...011000...  ..........  U U U U U   8   8   4   4   4
+move       8  pi    .     0001...011......  A+-DXWLdxI  U U U U U   8   8   4   4   4
+move       8  pi7   d     0001111011000...  ..........  U U U U U   8   8   4   4   4
+move       8  pi7   .     0001111011......  A+-DXWLdxI  U U U U U   8   8   4   4   4
+move       8  pd    d     0001...100000...  ..........  U U U U U   8   8   5   5   5
+move       8  pd    .     0001...100......  A+-DXWLdxI  U U U U U   8   8   5   5   5
+move       8  pd7   d     0001111100000...  ..........  U U U U U   8   8   5   5   5
+move       8  pd7   .     0001111100......  A+-DXWLdxI  U U U U U   8   8   5   5   5
+move       8  di    d     0001...101000...  ..........  U U U U U  12  12   5   5   5
+move       8  di    .     0001...101......  A+-DXWLdxI  U U U U U  12  12   5   5   5
+move       8  ix    d     0001...110000...  ..........  U U U U U  14  14   7   7   7
+move       8  ix    .     0001...110......  A+-DXWLdxI  U U U U U  14  14   7   7   7
+move       8  aw    d     0001000111000...  ..........  U U U U U  12  12   4   4   4
+move       8  aw    .     0001000111......  A+-DXWLdxI  U U U U U  12  12   4   4   4
+move       8  al    d     0001001111000...  ..........  U U U U U  16  16   6   6   6
+move       8  al    .     0001001111......  A+-DXWLdxI  U U U U U  16  16   6   6   6
+move      16  d     d     0011...000000...  ..........  U U U U U   4   4   2   2   2
+move      16  d     a     0011...000001...  ..........  U U U U U   4   4   2   2   2
+move      16  d     .     0011...000......  A+-DXWLdxI  U U U U U   4   4   2   2   2
+move      16  ai    d     0011...010000...  ..........  U U U U U   8   8   4   4   4
+move      16  ai    a     0011...010001...  ..........  U U U U U   8   8   4   4   4
+move      16  ai    .     0011...010......  A+-DXWLdxI  U U U U U   8   8   4   4   4
+move      16  pi    d     0011...011000...  ..........  U U U U U   8   8   4   4   4
+move      16  pi    a     0011...011001...  ..........  U U U U U   8   8   4   4   4
+move      16  pi    .     0011...011......  A+-DXWLdxI  U U U U U   8   8   4   4   4
+move      16  pd    d     0011...100000...  ..........  U U U U U   8   8   5   5   5
+move      16  pd    a     0011...100001...  ..........  U U U U U   8   8   5   5   5
+move      16  pd    .     0011...100......  A+-DXWLdxI  U U U U U   8   8   5   5   5
+move      16  di    d     0011...101000...  ..........  U U U U U  12  12   5   5   5
+move      16  di    a     0011...101001...  ..........  U U U U U  12  12   5   5   5
+move      16  di    .     0011...101......  A+-DXWLdxI  U U U U U  12  12   5   5   5
+move      16  ix    d     0011...110000...  ..........  U U U U U  14  14   7   7   7
+move      16  ix    a     0011...110001...  ..........  U U U U U  14  14   7   7   7
+move      16  ix    .     0011...110......  A+-DXWLdxI  U U U U U  14  14   7   7   7
+move      16  aw    d     0011000111000...  ..........  U U U U U  12  12   4   4   4
+move      16  aw    a     0011000111001...  ..........  U U U U U  12  12   4   4   4
+move      16  aw    .     0011000111......  A+-DXWLdxI  U U U U U  12  12   4   4   4
+move      16  al    d     0011001111000...  ..........  U U U U U  16  16   6   6   6
+move      16  al    a     0011001111001...  ..........  U U U U U  16  16   6   6   6
+move      16  al    .     0011001111......  A+-DXWLdxI  U U U U U  16  16   6   6   6
+move      32  d     d     0010...000000...  ..........  U U U U U   4   4   2   2   2
+move      32  d     a     0010...000001...  ..........  U U U U U   4   4   2   2   2
+move      32  d     .     0010...000......  A+-DXWLdxI  U U U U U   4   4   2   2   2
+move      32  ai    d     0010...010000...  ..........  U U U U U  12  12   4   4   4
+move      32  ai    a     0010...010001...  ..........  U U U U U  12  12   4   4   4
+move      32  ai    .     0010...010......  A+-DXWLdxI  U U U U U  12  12   4   4   4
+move      32  pi    d     0010...011000...  ..........  U U U U U  12  12   4   4   4
+move      32  pi    a     0010...011001...  ..........  U U U U U  12  12   4   4   4
+move      32  pi    .     0010...011......  A+-DXWLdxI  U U U U U  12  12   4   4   4
+move      32  pd    d     0010...100000...  ..........  U U U U U  12  14   5   5   5
+move      32  pd    a     0010...100001...  ..........  U U U U U  12  14   5   5   5
+move      32  pd    .     0010...100......  A+-DXWLdxI  U U U U U  12  14   5   5   5
+move      32  di    d     0010...101000...  ..........  U U U U U  16  16   5   5   5
+move      32  di    a     0010...101001...  ..........  U U U U U  16  16   5   5   5
+move      32  di    .     0010...101......  A+-DXWLdxI  U U U U U  16  16   5   5   5
+move      32  ix    d     0010...110000...  ..........  U U U U U  18  18   7   7   7
+move      32  ix    a     0010...110001...  ..........  U U U U U  18  18   7   7   7
+move      32  ix    .     0010...110......  A+-DXWLdxI  U U U U U  18  18   7   7   7
+move      32  aw    d     0010000111000...  ..........  U U U U U  16  16   4   4   4
+move      32  aw    a     0010000111001...  ..........  U U U U U  16  16   4   4   4
+move      32  aw    .     0010000111......  A+-DXWLdxI  U U U U U  16  16   4   4   4
+move      32  al    d     0010001111000...  ..........  U U U U U  20  20   6   6   6
+move      32  al    a     0010001111001...  ..........  U U U U U  20  20   6   6   6
+move      32  al    .     0010001111......  A+-DXWLdxI  U U U U U  20  20   6   6   6
+movea     16  .     d     0011...001000...  ..........  U U U U U   4   4   2   2   2
+movea     16  .     a     0011...001001...  ..........  U U U U U   4   4   2   2   2
+movea     16  .     .     0011...001......  A+-DXWLdxI  U U U U U   4   4   2   2   2
+movea     32  .     d     0010...001000...  ..........  U U U U U   4   4   2   2   2
+movea     32  .     a     0010...001001...  ..........  U U U U U   4   4   2   2   2
+movea     32  .     .     0010...001......  A+-DXWLdxI  U U U U U   4   4   2   2   2
+move      16  frc   d     0100001011000...  ..........  . U U U U   .   4   4   4   4
+move      16  frc   .     0100001011......  A+-DXWL...  . U U U U   .   8   4   4   4
+move      16  toc   d     0100010011000...  ..........  U U U U U  12  12   4   4   4
+move      16  toc   .     0100010011......  A+-DXWLdxI  U U U U U  12  12   4   4   4
+move      16  frs   d     0100000011000...  ..........  U S S S S   6   4   8   8   8  U only for 000
+move      16  frs   .     0100000011......  A+-DXWL...  U S S S S   8   8   8   8   8  U only for 000
+move      16  tos   d     0100011011000...  ..........  S S S S S  12  12   8   8   8
+move      16  tos   .     0100011011......  A+-DXWLdxI  S S S S S  12  12   8   8   8
+move      32  fru   .     0100111001101...  ..........  S S S S S   4   6   2   2   2
+move      32  tou   .     0100111001100...  ..........  S S S S S   4   6   2   2   2
+movec     32  cr    .     0100111001111010  ..........  . S S S S   .  12   6   6   6
+movec     32  rc    .     0100111001111011  ..........  . S S S S   .  10  12  12  12
+movem     16  re    pd    0100100010100...  ..........  U U U U U   8   8   4   4   4
+movem     16  re    .     0100100010......  A..DXWL...  U U U U U   8   8   4   4   4
+movem     32  re    pd    0100100011100...  ..........  U U U U U   8   8   4   4   4
+movem     32  re    .     0100100011......  A..DXWL...  U U U U U   8   8   4   4   4
+movem     16  er    pi    0100110010011...  ..........  U U U U U  12  12   8   8   8
+movem     16  er    pcdi  0100110010111010  ..........  U U U U U  16  16   9   9   9
+movem     16  er    pcix  0100110010111011  ..........  U U U U U  18  18  11  11  11
+movem     16  er    .     0100110010......  A..DXWL...  U U U U U  12  12   8   8   8
+movem     32  er    pi    0100110011011...  ..........  U U U U U  12  12   8   8   8
+movem     32  er    pcdi  0100110011111010  ..........  U U U U U  16  16   9   9   9
+movem     32  er    pcix  0100110011111011  ..........  U U U U U  18  18  11  11  11
+movem     32  er    .     0100110011......  A..DXWL...  U U U U U  12  12   8   8   8
+movep     16  er    .     0000...100001...  ..........  U U U U U  16  16  12  12  12
+movep     32  er    .     0000...101001...  ..........  U U U U U  24  24  18  18  18
+movep     16  re    .     0000...110001...  ..........  U U U U U  16  16  11  11  11
+movep     32  re    .     0000...111001...  ..........  U U U U U  24  24  17  17  17
+moveq     32  .     .     0111...0........  ..........  U U U U U   4   4   2   2   2
+moves      8  .     .     0000111000......  A+-DXWL...  . S S S S   .  14   5   5   5
+moves     16  .     .     0000111001......  A+-DXWL...  . S S S S   .  14   5   5   5
+moves     32  .     .     0000111010......  A+-DXWL...  . S S S S   .  16   5   5   5
+move16    32  .     .     1111011000100...  ..........  . . . . U   .   .   .   .   4  TODO: correct timing
+muls      16  .     d     1100...111000...  ..........  U U U U U  54  32  27  27  27
+muls      16  .     .     1100...111......  A+-DXWLdxI  U U U U U  54  32  27  27  27
+mulu      16  .     d     1100...011000...  ..........  U U U U U  54  30  27  27  27
+mulu      16  .     .     1100...011......  A+-DXWLdxI  U U U U U  54  30  27  27  27
+mull      32  .     d     0100110000000...  ..........  . . U U U   .   .  43  43  43
+mull      32  .     .     0100110000......  A+-DXWLdxI  . . U U U   .   .  43  43  43
+nbcd       8  .     d     0100100000000...  ..........  U U U U U   6   6   6   6   6
+nbcd       8  .     .     0100100000......  A+-DXWL...  U U U U U   8   8   6   6   6
+neg        8  .     d     0100010000000...  ..........  U U U U U   4   4   2   2   2
+neg        8  .     .     0100010000......  A+-DXWL...  U U U U U   8   8   4   4   4
+neg       16  .     d     0100010001000...  ..........  U U U U U   4   4   2   2   2
+neg       16  .     .     0100010001......  A+-DXWL...  U U U U U   8   8   4   4   4
+neg       32  .     d     0100010010000...  ..........  U U U U U   6   6   2   2   2
+neg       32  .     .     0100010010......  A+-DXWL...  U U U U U  12  12   4   4   4
+negx       8  .     d     0100000000000...  ..........  U U U U U   4   4   2   2   2
+negx       8  .     .     0100000000......  A+-DXWL...  U U U U U   8   8   4   4   4
+negx      16  .     d     0100000001000...  ..........  U U U U U   4   4   2   2   2
+negx      16  .     .     0100000001......  A+-DXWL...  U U U U U   8   8   4   4   4
+negx      32  .     d     0100000010000...  ..........  U U U U U   6   6   2   2   2
+negx      32  .     .     0100000010......  A+-DXWL...  U U U U U  12  12   4   4   4
+nop        0  .     .     0100111001110001  ..........  U U U U U   4   4   2   2   2
+not        8  .     d     0100011000000...  ..........  U U U U U   4   4   2   2   2
+not        8  .     .     0100011000......  A+-DXWL...  U U U U U   8   8   4   4   4
+not       16  .     d     0100011001000...  ..........  U U U U U   4   4   2   2   2
+not       16  .     .     0100011001......  A+-DXWL...  U U U U U   8   8   4   4   4
+not       32  .     d     0100011010000...  ..........  U U U U U   6   6   2   2   2
+not       32  .     .     0100011010......  A+-DXWL...  U U U U U  12  12   4   4   4
+or         8  er    d     1000...000000...  ..........  U U U U U   4   4   2   2   2
+or         8  er    .     1000...000......  A+-DXWLdxI  U U U U U   4   4   2   2   2
+or        16  er    d     1000...001000...  ..........  U U U U U   4   4   2   2   2
+or        16  er    .     1000...001......  A+-DXWLdxI  U U U U U   4   4   2   2   2
+or        32  er    d     1000...010000...  ..........  U U U U U   6   6   2   2   2
+or        32  er    .     1000...010......  A+-DXWLdxI  U U U U U   6   6   2   2   2
+or         8  re    .     1000...100......  A+-DXWL...  U U U U U   8   8   4   4   4
+or        16  re    .     1000...101......  A+-DXWL...  U U U U U   8   8   4   4   4
+or        32  re    .     1000...110......  A+-DXWL...  U U U U U  12  12   4   4   4
+ori       16  toc   .     0000000000111100  ..........  U U U U U  20  16  12  12  12
+ori       16  tos   .     0000000001111100  ..........  S S S S S  20  16  12  12  12
+ori        8  .     d     0000000000000...  ..........  U U U U U   8   8   2   2   2
+ori        8  .     .     0000000000......  A+-DXWL...  U U U U U  12  12   4   4   4
+ori       16  .     d     0000000001000...  ..........  U U U U U   8   8   2   2   2
+ori       16  .     .     0000000001......  A+-DXWL...  U U U U U  12  12   4   4   4
+ori       32  .     d     0000000010000...  ..........  U U U U U  16  14   2   2   2
+ori       32  .     .     0000000010......  A+-DXWL...  U U U U U  20  20   4   4   4
+pack      16  rr    .     1000...101000...  ..........  . . U U U   .   .   6   6   6
+pack      16  mm    ax7   1000111101001...  ..........  . . U U U   .   .  13  13  13
+pack      16  mm    ay7   1000...101001111  ..........  . . U U U   .   .  13  13  13
+pack      16  mm    axy7  1000111101001111  ..........  . . U U U   .   .  13  13  13
+pack      16  mm    .     1000...101001...  ..........  . . U U U   .   .  13  13  13
+pea       32  .     .     0100100001......  A..DXWLdx.  U U U U U   6   6   5   5   5
+pflush    32  .     .     1111010100011000  ..........  . . . . S   .   .   .   .   4   TODO: correct timing
+pmmu      32  .     .     1111000.........  ..........  . . S S S   .   .   8   8   8
+reset      0  .     .     0100111001110000  ..........  S S S S S   0   0   0   0   0
+ror        8  s     .     1110...000011...  ..........  U U U U U   6   6   8   8   8
+ror       16  s     .     1110...001011...  ..........  U U U U U   6   6   8   8   8
+ror       32  s     .     1110...010011...  ..........  U U U U U   8   8   8   8   8
+ror        8  r     .     1110...000111...  ..........  U U U U U   6   6   8   8   8
+ror       16  r     .     1110...001111...  ..........  U U U U U   6   6   8   8   8
+ror       32  r     .     1110...010111...  ..........  U U U U U   8   8   8   8   8
+ror       16  .     .     1110011011......  A+-DXWL...  U U U U U   8   8   7   7   7
+rol        8  s     .     1110...100011...  ..........  U U U U U   6   6   8   8   8
+rol       16  s     .     1110...101011...  ..........  U U U U U   6   6   8   8   8
+rol       32  s     .     1110...110011...  ..........  U U U U U   8   8   8   8   8
+rol        8  r     .     1110...100111...  ..........  U U U U U   6   6   8   8   8
+rol       16  r     .     1110...101111...  ..........  U U U U U   6   6   8   8   8
+rol       32  r     .     1110...110111...  ..........  U U U U U   8   8   8   8   8
+rol       16  .     .     1110011111......  A+-DXWL...  U U U U U   8   8   7   7   7
+roxr       8  s     .     1110...000010...  ..........  U U U U U   6   6  12  12  12
+roxr      16  s     .     1110...001010...  ..........  U U U U U   6   6  12  12  12
+roxr      32  s     .     1110...010010...  ..........  U U U U U   8   8  12  12  12
+roxr       8  r     .     1110...000110...  ..........  U U U U U   6   6  12  12  12
+roxr      16  r     .     1110...001110...  ..........  U U U U U   6   6  12  12  12
+roxr      32  r     .     1110...010110...  ..........  U U U U U   8   8  12  12  12
+roxr      16  .     .     1110010011......  A+-DXWL...  U U U U U   8   8   5   5   5
+roxl       8  s     .     1110...100010...  ..........  U U U U U   6   6  12  12  12
+roxl      16  s     .     1110...101010...  ..........  U U U U U   6   6  12  12  12
+roxl      32  s     .     1110...110010...  ..........  U U U U U   8   8  12  12  12
+roxl       8  r     .     1110...100110...  ..........  U U U U U   6   6  12  12  12
+roxl      16  r     .     1110...101110...  ..........  U U U U U   6   6  12  12  12
+roxl      32  r     .     1110...110110...  ..........  U U U U U   8   8  12  12  12
+roxl      16  .     .     1110010111......  A+-DXWL...  U U U U U   8   8   5   5   5
+rtd       32  .     .     0100111001110100  ..........  . U U U U   .  16  10  10  10
+rte       32  .     .     0100111001110011  ..........  S S S S S  20  24  20  20  20  bus fault not emulated
+rtm       32  .     .     000001101100....  ..........  . . U U U   .   .  19  19  19  not properly emulated
+rtr       32  .     .     0100111001110111  ..........  U U U U U  20  20  14  14  14
+rts       32  .     .     0100111001110101  ..........  U U U U U  16  16  10  10  10
+sbcd       8  rr    .     1000...100000...  ..........  U U U U U   6   6   4   4   4
+sbcd       8  mm    ax7   1000111100001...  ..........  U U U U U  18  18  16  16  16
+sbcd       8  mm    ay7   1000...100001111  ..........  U U U U U  18  18  16  16  16
+sbcd       8  mm    axy7  1000111100001111  ..........  U U U U U  18  18  16  16  16
+sbcd       8  mm    .     1000...100001...  ..........  U U U U U  18  18  16  16  16
+st         8  .     d     0101000011000...  ..........  U U U U U   6   4   4   4   4
+st         8  .     .     0101000011......  A+-DXWL...  U U U U U   8   8   6   6   6
+sf         8  .     d     0101000111000...  ..........  U U U U U   4   4   4   4   4
+sf         8  .     .     0101000111......  A+-DXWL...  U U U U U   8   8   6   6   6
+scc        8  .     d     0101....11000...  ..........  U U U U U   4   4   4   4   4
+scc        8  .     .     0101....11......  A+-DXWL...  U U U U U   8   8   6   6   6
+stop       0  .     .     0100111001110010  ..........  S S S S S   4   4   8   8   8
+sub        8  er    d     1001...000000...  ..........  U U U U U   4   4   2   2   2
+sub        8  er    .     1001...000......  A+-DXWLdxI  U U U U U   4   4   2   2   2
+sub       16  er    d     1001...001000...  ..........  U U U U U   4   4   2   2   2
+sub       16  er    a     1001...001001...  ..........  U U U U U   4   4   2   2   2
+sub       16  er    .     1001...001......  A+-DXWLdxI  U U U U U   4   4   2   2   2
+sub       32  er    d     1001...010000...  ..........  U U U U U   6   6   2   2   2
+sub       32  er    a     1001...010001...  ..........  U U U U U   6   6   2   2   2
+sub       32  er    .     1001...010......  A+-DXWLdxI  U U U U U   6   6   2   2   2
+sub        8  re    .     1001...100......  A+-DXWL...  U U U U U   8   8   4   4   4
+sub       16  re    .     1001...101......  A+-DXWL...  U U U U U   8   8   4   4   4
+sub       32  re    .     1001...110......  A+-DXWL...  U U U U U  12  12   4   4   4
+suba      16  .     d     1001...011000...  ..........  U U U U U   8   8   2   2   2
+suba      16  .     a     1001...011001...  ..........  U U U U U   8   8   2   2   2
+suba      16  .     .     1001...011......  A+-DXWLdxI  U U U U U   8   8   2   2   2
+suba      32  .     d     1001...111000...  ..........  U U U U U   6   6   2   2   2
+suba      32  .     a     1001...111001...  ..........  U U U U U   6   6   2   2   2
+suba      32  .     .     1001...111......  A+-DXWLdxI  U U U U U   6   6   2   2   2
+subi       8  .     d     0000010000000...  ..........  U U U U U   8   8   2   2   2
+subi       8  .     .     0000010000......  A+-DXWL...  U U U U U  12  12   4   4   4
+subi      16  .     d     0000010001000...  ..........  U U U U U   8   8   2   2   2
+subi      16  .     .     0000010001......  A+-DXWL...  U U U U U  12  12   4   4   4
+subi      32  .     d     0000010010000...  ..........  U U U U U  16  14   2   2   2
+subi      32  .     .     0000010010......  A+-DXWL...  U U U U U  20  20   4   4   4
+subq       8  .     d     0101...100000...  ..........  U U U U U   4   4   2   2   2
+subq       8  .     .     0101...100......  A+-DXWL...  U U U U U   8   8   4   4   4
+subq      16  .     d     0101...101000...  ..........  U U U U U   4   4   2   2   2
+subq      16  .     a     0101...101001...  ..........  U U U U U   8   4   2   2   2
+subq      16  .     .     0101...101......  A+-DXWL...  U U U U U   8   8   4   4   4
+subq      32  .     d     0101...110000...  ..........  U U U U U   8   8   2   2   2
+subq      32  .     a     0101...110001...  ..........  U U U U U   8   8   2   2   2
+subq      32  .     .     0101...110......  A+-DXWL...  U U U U U  12  12   4   4   4
+subx       8  rr    .     1001...100000...  ..........  U U U U U   4   4   2   2   2
+subx      16  rr    .     1001...101000...  ..........  U U U U U   4   4   2   2   2
+subx      32  rr    .     1001...110000...  ..........  U U U U U   8   6   2   2   2
+subx       8  mm    ax7   1001111100001...  ..........  U U U U U  18  18  12  12  12
+subx       8  mm    ay7   1001...100001111  ..........  U U U U U  18  18  12  12  12
+subx       8  mm    axy7  1001111100001111  ..........  U U U U U  18  18  12  12  12
+subx       8  mm    .     1001...100001...  ..........  U U U U U  18  18  12  12  12
+subx      16  mm    .     1001...101001...  ..........  U U U U U  18  18  12  12  12
+subx      32  mm    .     1001...110001...  ..........  U U U U U  30  30  12  12  12
+swap      32  .     .     0100100001000...  ..........  U U U U U   4   4   4   4   4
+tas        8  .     d     0100101011000...  ..........  U U U U U   4   4   4   4   4
+tas        8  .     .     0100101011......  A+-DXWL...  U U U U U  14  14  12  12  12
+trap       0  .     .     010011100100....  ..........  U U U U U   4   4   4   4   4
+trapt      0  .     .     0101000011111100  ..........  . . U U U   .   .   4   4   4
+trapt     16  .     .     0101000011111010  ..........  . . U U U   .   .   6   6   6
+trapt     32  .     .     0101000011111011  ..........  . . U U U   .   .   8   8   8
+trapf      0  .     .     0101000111111100  ..........  . . U U U   .   .   4   4   4
+trapf     16  .     .     0101000111111010  ..........  . . U U U   .   .   6   6   6
+trapf     32  .     .     0101000111111011  ..........  . . U U U   .   .   8   8   8
+trapcc     0  .     .     0101....11111100  ..........  . . U U U   .   .   4   4   4
+trapcc    16  .     .     0101....11111010  ..........  . . U U U   .   .   6   6   6
+trapcc    32  .     .     0101....11111011  ..........  . . U U U   .   .   8   8   8
+trapv      0  .     .     0100111001110110  ..........  U U U U U   4   4   4   4   4
+tst        8  .     d     0100101000000...  ..........  U U U U U   4   4   2   2   2
+tst        8  .     .     0100101000......  A+-DXWL...  U U U U U   4   4   2   2   2
+tst        8  .     pcdi  0100101000111010  ..........  . . U U U   .   .   7   7   7
+tst        8  .     pcix  0100101000111011  ..........  . . U U U   .   .   9   9   9
+tst        8  .     i     0100101000111100  ..........  . . U U U   .   .   6   6   6
+tst       16  .     d     0100101001000...  ..........  U U U U U   4   4   2   2   2
+tst       16  .     a     0100101001001...  ..........  . . U U U   .   .   2   2   2
+tst       16  .     .     0100101001......  A+-DXWL...  U U U U U   4   4   2   2   2
+tst       16  .     pcdi  0100101001111010  ..........  . . U U U   .   .   7   7   7
+tst       16  .     pcix  0100101001111011  ..........  . . U U U   .   .   9   9   9
+tst       16  .     i     0100101001111100  ..........  . . U U U   .   .   6   6   6
+tst       32  .     d     0100101010000...  ..........  U U U U U   4   4   2   2   2
+tst       32  .     a     0100101010001...  ..........  . . U U U   .   .   2   2   2
+tst       32  .     .     0100101010......  A+-DXWL...  U U U U U   4   4   2   2   2
+tst       32  .     pcdi  0100101010111010  ..........  . . U U U   .   .   7   7   7
+tst       32  .     pcix  0100101010111011  ..........  . . U U U   .   .   9   9   9
+tst       32  .     i     0100101010111100  ..........  . . U U U   .   .   6   6   6
+unlk      32  .     a7    0100111001011111  ..........  U U U U U  12  12   6   6   6
+unlk      32  .     .     0100111001011...  ..........  U U U U U  12  12   6   6   6
+unpk      16  rr    .     1000...110000...  ..........  . . U U U   .   .   8   8   8
+unpk      16  mm    ax7   1000111110001...  ..........  . . U U U   .   .  13  13  13
+unpk      16  mm    ay7   1000...110001111  ..........  . . U U U   .   .  13  13  13
+unpk      16  mm    axy7  1000111110001111  ..........  . . U U U   .   .  13  13  13
+unpk      16  mm    .     1000...110001...  ..........  . . U U U   .   .  13  13  13
 
 
 
@@ -903,6 +913,29 @@ M68KMAKE_OP(1111, 0, ., .)
 }
 
 
+M68KMAKE_OP(040fpu0, 32, ., .)
+{
+	if(CPU_TYPE_IS_030_PLUS(CPU_TYPE))
+	{
+		m68040_fpu_op0();
+		return;
+	}
+	m68ki_exception_1111();
+}
+
+
+M68KMAKE_OP(040fpu1, 32, ., .)
+{
+	if(CPU_TYPE_IS_030_PLUS(CPU_TYPE))
+	{
+		m68040_fpu_op1();
+		return;
+	}
+	m68ki_exception_1111();
+}
+
+
+
 M68KMAKE_OP(abcd, 8, rr, .)
 {
 	uint* r_dst = &DX;
@@ -1227,8 +1260,8 @@ M68KMAKE_OP(adda, 16, ., a)
 
 M68KMAKE_OP(adda, 16, ., .)
 {
-	signed short src = MAKE_INT_16(M68KMAKE_GET_OPER_AY_16);
 	uint* r_dst = &AX;
+	uint src = MAKE_INT_16(M68KMAKE_GET_OPER_AY_16);
 
 	*r_dst = MASK_OUT_ABOVE_32(*r_dst + src);
 }
@@ -1807,7 +1840,7 @@ M68KMAKE_OP(andi, 32, ., .)
 }
 
 
-M68KMAKE_OP(andi, 8, toc, .)
+M68KMAKE_OP(andi, 16, toc, .)
 {
 	m68ki_set_ccr(m68ki_get_ccr() & OPER_I_8());
 }
@@ -1833,6 +1866,9 @@ M68KMAKE_OP(asr, 8, s, .)
 	uint src = MASK_OUT_ABOVE_8(*r_dst);
 	uint res = src >> shift;
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	if(GET_MSB_8(src))
 		res |= m68ki_shift_8_table[shift];
 
@@ -1852,6 +1888,9 @@ M68KMAKE_OP(asr, 16, s, .)
 	uint src = MASK_OUT_ABOVE_16(*r_dst);
 	uint res = src >> shift;
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	if(GET_MSB_16(src))
 		res |= m68ki_shift_16_table[shift];
 
@@ -1871,6 +1910,9 @@ M68KMAKE_OP(asr, 32, s, .)
 	uint src = *r_dst;
 	uint res = src >> shift;
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	if(GET_MSB_32(src))
 		res |= m68ki_shift_32_table[shift];
 
@@ -2064,6 +2106,9 @@ M68KMAKE_OP(asl, 8, s, .)
 	uint src = MASK_OUT_ABOVE_8(*r_dst);
 	uint res = MASK_OUT_ABOVE_8(src << shift);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	*r_dst = MASK_OUT_BELOW_8(*r_dst) | res;
 
 	FLAG_X = FLAG_C = src << shift;
@@ -2081,6 +2126,9 @@ M68KMAKE_OP(asl, 16, s, .)
 	uint src = MASK_OUT_ABOVE_16(*r_dst);
 	uint res = MASK_OUT_ABOVE_16(src << shift);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	*r_dst = MASK_OUT_BELOW_16(*r_dst) | res;
 
 	FLAG_N = NFLAG_16(res);
@@ -2098,6 +2146,9 @@ M68KMAKE_OP(asl, 32, s, .)
 	uint src = *r_dst;
 	uint res = MASK_OUT_ABOVE_32(src << shift);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	*r_dst = res;
 
 	FLAG_N = NFLAG_32(res);
@@ -2277,7 +2328,16 @@ M68KMAKE_OP(bcc, 32, ., .)
 		REG_PC += 4;
 		return;
 	}
-	m68ki_exception_illegal();
+	else
+	{
+		if(M68KMAKE_CC)
+		{
+			m68ki_trace_t0();			   /* auto-disable (see m68kcpu.h) */
+			m68ki_branch_8(MASK_OUT_ABOVE_8(REG_IR));
+			return;
+		}
+		USE_CYCLES(CYC_BCC_NOTAKE_B);
+	}
 }
 
 
@@ -3121,7 +3181,13 @@ M68KMAKE_OP(bra, 32, ., .)
 			USE_ALL_CYCLES();
 		return;
 	}
-	m68ki_exception_illegal();
+	else
+	{
+		m68ki_trace_t0();				   /* auto-disable (see m68kcpu.h) */
+		m68ki_branch_8(MASK_OUT_ABOVE_8(REG_IR));
+		if(REG_PC == REG_PPC)
+			USE_ALL_CYCLES();
+	}
 }
 
 
@@ -3196,7 +3262,12 @@ M68KMAKE_OP(bsr, 32, ., .)
 		m68ki_branch_32(offset);
 		return;
 	}
-	m68ki_exception_illegal();
+	else
+	{
+		m68ki_trace_t0();				   /* auto-disable (see m68kcpu.h) */
+		m68ki_push_32(REG_PC);
+		m68ki_branch_8(MASK_OUT_ABOVE_8(REG_IR));
+	}
 }
 
 
@@ -3369,8 +3440,8 @@ M68KMAKE_OP(cas2, 16, ., .)
 				return;
 			}
 		}
-		*compare1 = BIT_1F(word2) ? MAKE_INT_16(dest1) : MASK_OUT_BELOW_16(*compare1) | dest1;
-		*compare2 = BIT_F(word2) ? MAKE_INT_16(dest2) : MASK_OUT_BELOW_16(*compare2) | dest2;
+		*compare1 = BIT_1F(word2) ? (uint)MAKE_INT_16(dest1) : MASK_OUT_BELOW_16(*compare1) | dest1;
+		*compare2 = BIT_F(word2) ? (uint)MAKE_INT_16(dest2) : MASK_OUT_BELOW_16(*compare2) | dest2;
 		return;
 	}
 	m68ki_exception_illegal();
@@ -3509,28 +3580,24 @@ M68KMAKE_OP(chk2cmp2, 8, ., pcdi)
 	if(CPU_TYPE_IS_EC020_PLUS(CPU_TYPE))
 	{
 		uint word2 = OPER_I_16();
-		uint compare = REG_DA[(word2 >> 12) & 15]&0xff;
+		sint compare = REG_DA[(word2 >> 12) & 15]&0xff;
 		uint ea = EA_PCDI_8();
-		uint lower_bound = m68ki_read_pcrel_8(ea);
-		uint upper_bound = m68ki_read_pcrel_8(ea + 1);
+		sint lower_bound = m68ki_read_pcrel_8(ea);
+		sint upper_bound = m68ki_read_pcrel_8(ea + 1);
 
 		if(!BIT_F(word2))
-			FLAG_C = MAKE_INT_8(compare) - MAKE_INT_8(lower_bound);
-		else
-			FLAG_C = compare - lower_bound;
-		FLAG_Z = !((upper_bound==compare) | (lower_bound==compare));
-		if(COND_CS())
-		{
-			if(BIT_B(word2))
-				m68ki_exception_trap(EXCEPTION_CHK);
-			return;
-		}
+			compare = (int32)(int8)compare;
+      
+		FLAG_Z = !((upper_bound==compare) || (lower_bound==compare));  // JFF: | => ||
+
+    FLAG_C = (lower_bound <= upper_bound ? compare < lower_bound || compare > upper_bound : compare > upper_bound || compare < lower_bound) << 8;
 
-		FLAG_C = upper_bound - compare;
 		if(COND_CS() && BIT_B(word2))
 				m68ki_exception_trap(EXCEPTION_CHK);
 		return;
 	}
+
+      
 	m68ki_exception_illegal();
 }
 
@@ -3540,27 +3607,21 @@ M68KMAKE_OP(chk2cmp2, 8, ., pcix)
 	if(CPU_TYPE_IS_EC020_PLUS(CPU_TYPE))
 	{
 		uint word2 = OPER_I_16();
-		uint compare = REG_DA[(word2 >> 12) & 15]&0xff;
+		sint compare = REG_DA[(word2 >> 12) & 15]&0xff;
 		uint ea = EA_PCIX_8();
-		uint lower_bound = m68ki_read_pcrel_8(ea);
-		uint upper_bound = m68ki_read_pcrel_8(ea + 1);
+		sint lower_bound = m68ki_read_pcrel_8(ea);
+		sint upper_bound = m68ki_read_pcrel_8(ea + 1);
 
 		if(!BIT_F(word2))
-			FLAG_C = MAKE_INT_8(compare) - MAKE_INT_8(lower_bound);
-		else
-			FLAG_C = compare - lower_bound;
-		FLAG_Z = !((upper_bound==compare) | (lower_bound==compare));
-		if(COND_CS())
-		{
-			if(BIT_B(word2))
-				m68ki_exception_trap(EXCEPTION_CHK);
-			return;
-		}
+			compare = (int32)(int8)compare;
+		FLAG_Z = !((upper_bound==compare) || (lower_bound==compare));  // JFF: | => ||, faster operation short circuits
+
+    FLAG_C = (lower_bound <= upper_bound ? compare < lower_bound || compare > upper_bound : compare > upper_bound || compare < lower_bound) << 8;
 
-		FLAG_C = upper_bound - compare;
 		if(COND_CS() && BIT_B(word2))
 				m68ki_exception_trap(EXCEPTION_CHK);
 		return;
+
 	}
 	m68ki_exception_illegal();
 }
@@ -3571,24 +3632,18 @@ M68KMAKE_OP(chk2cmp2, 8, ., .)
 	if(CPU_TYPE_IS_EC020_PLUS(CPU_TYPE))
 	{
 		uint word2 = OPER_I_16();
-		uint compare = REG_DA[(word2 >> 12) & 15]&0xff;
+		sint compare = REG_DA[(word2 >> 12) & 15]&0xff;
 		uint ea = M68KMAKE_GET_EA_AY_8;
-		uint lower_bound = m68ki_read_8(ea);
-		uint upper_bound = m68ki_read_8(ea + 1);
+		sint lower_bound = (int8)m68ki_read_8(ea);
+		sint upper_bound = (int8)m68ki_read_8(ea + 1);
 
 		if(!BIT_F(word2))
-			FLAG_C = MAKE_INT_8(compare) - MAKE_INT_8(lower_bound);
-		else
-			FLAG_C = compare - lower_bound;
-		FLAG_Z = !((upper_bound==compare) | (lower_bound==compare));
-		if(COND_CS())
-		{
-			if(BIT_B(word2))
-				m68ki_exception_trap(EXCEPTION_CHK);
-			return;
-		}
+			compare = (int32)(int8)compare;
+      
+ 		FLAG_Z = !((upper_bound==compare) || (lower_bound==compare));  // JFF: | => ||
+
+    FLAG_C = (lower_bound <= upper_bound ? compare < lower_bound || compare > upper_bound : compare > upper_bound || compare < lower_bound) << 8;
 
-		FLAG_C = upper_bound - compare;
 		if(COND_CS() && BIT_B(word2))
 				m68ki_exception_trap(EXCEPTION_CHK);
 		return;
@@ -3602,29 +3657,17 @@ M68KMAKE_OP(chk2cmp2, 16, ., pcdi)
 	if(CPU_TYPE_IS_EC020_PLUS(CPU_TYPE))
 	{
 		uint word2 = OPER_I_16();
-		uint compare = REG_DA[(word2 >> 12) & 15]&0xffff;
+		sint compare = REG_DA[(word2 >> 12) & 15]&0xffff;
 		uint ea = EA_PCDI_16();
-		uint lower_bound = m68ki_read_pcrel_16(ea);
-		uint upper_bound = m68ki_read_pcrel_16(ea + 2);
+		sint lower_bound = (int16)m68ki_read_pcrel_16(ea);
+		sint upper_bound = (int16)m68ki_read_pcrel_16(ea + 2);
 
 		if(!BIT_F(word2))
-			FLAG_C = MAKE_INT_16(compare) - MAKE_INT_16(lower_bound);
-		else
-			FLAG_C = compare - lower_bound;
-		FLAG_Z = !((upper_bound==compare) | (lower_bound==compare));
-		FLAG_C = CFLAG_16(FLAG_C);
-		if(COND_CS())
-		{
-			if(BIT_B(word2))
-				m68ki_exception_trap(EXCEPTION_CHK);
-			return;
-		}
+			compare = (int32)(int16)compare;
+ 		FLAG_Z = !((upper_bound==compare) || (lower_bound==compare));  // JFF: | => ||
+
+    FLAG_C = (lower_bound <= upper_bound ? compare < lower_bound || compare > upper_bound : compare > upper_bound || compare < lower_bound) << 8;
 
-		if(!BIT_F(word2))
-			FLAG_C = MAKE_INT_16(upper_bound) - MAKE_INT_16(compare);
-		else
-			FLAG_C = upper_bound - compare;
-		FLAG_C = CFLAG_16(FLAG_C);
 		if(COND_CS() && BIT_B(word2))
 				m68ki_exception_trap(EXCEPTION_CHK);
 		return;
@@ -3638,29 +3681,17 @@ M68KMAKE_OP(chk2cmp2, 16, ., pcix)
 	if(CPU_TYPE_IS_EC020_PLUS(CPU_TYPE))
 	{
 		uint word2 = OPER_I_16();
-		uint compare = REG_DA[(word2 >> 12) & 15]&0xffff;
+		sint compare = REG_DA[(word2 >> 12) & 15]&0xffff;
 		uint ea = EA_PCIX_16();
-		uint lower_bound = m68ki_read_pcrel_16(ea);
-		uint upper_bound = m68ki_read_pcrel_16(ea + 2);
+		sint lower_bound = (int16)m68ki_read_pcrel_16(ea);
+		sint upper_bound = (int16)m68ki_read_pcrel_16(ea + 2);
 
 		if(!BIT_F(word2))
-			FLAG_C = MAKE_INT_16(compare) - MAKE_INT_16(lower_bound);
-		else
-			FLAG_C = compare - lower_bound;
-		FLAG_Z = !((upper_bound==compare) | (lower_bound==compare));
-		FLAG_C = CFLAG_16(FLAG_C);
-		if(COND_CS())
-		{
-			if(BIT_B(word2))
-				m68ki_exception_trap(EXCEPTION_CHK);
-			return;
-		}
+			compare = (int32)(int16)compare;
+ 		FLAG_Z = !((upper_bound==compare) || (lower_bound==compare));  // JFF: | => ||
+
+    FLAG_C = (lower_bound <= upper_bound ? compare < lower_bound || compare > upper_bound : compare > upper_bound || compare < lower_bound) << 8;
 
-		if(!BIT_F(word2))
-			FLAG_C = MAKE_INT_16(upper_bound) - MAKE_INT_16(compare);
-		else
-			FLAG_C = upper_bound - compare;
-		FLAG_C = CFLAG_16(FLAG_C);
 		if(COND_CS() && BIT_B(word2))
 				m68ki_exception_trap(EXCEPTION_CHK);
 		return;
@@ -3674,30 +3705,17 @@ M68KMAKE_OP(chk2cmp2, 16, ., .)
 	if(CPU_TYPE_IS_EC020_PLUS(CPU_TYPE))
 	{
 		uint word2 = OPER_I_16();
-		uint compare = REG_DA[(word2 >> 12) & 15]&0xffff;
+		sint compare = REG_DA[(word2 >> 12) & 15]&0xffff;
 		uint ea = M68KMAKE_GET_EA_AY_16;
-		uint lower_bound = m68ki_read_16(ea);
-		uint upper_bound = m68ki_read_16(ea + 2);
+		sint lower_bound = (int16)m68ki_read_16(ea);
+		sint upper_bound = (int16)m68ki_read_16(ea + 2);
 
 		if(!BIT_F(word2))
-			FLAG_C = MAKE_INT_16(compare) - MAKE_INT_16(lower_bound);
-		else
-			FLAG_C = compare - lower_bound;
+			compare = (int32)(int16)compare;
+ 		FLAG_Z = !((upper_bound==compare) || (lower_bound==compare));  // JFF: | => ||
 
-		FLAG_Z = !((upper_bound==compare) | (lower_bound==compare));
-		FLAG_C = CFLAG_16(FLAG_C);
-		if(COND_CS())
-		{
-			if(BIT_B(word2))
-				m68ki_exception_trap(EXCEPTION_CHK);
-			return;
-		}
-		if(!BIT_F(word2))
-			FLAG_C = MAKE_INT_16(upper_bound) - MAKE_INT_16(compare);
-		else
-			FLAG_C = upper_bound - compare;
+    FLAG_C = (lower_bound <= upper_bound ? compare < lower_bound || compare > upper_bound : compare > upper_bound || compare < lower_bound) << 8;
 
-		FLAG_C = CFLAG_16(FLAG_C);
 		if(COND_CS() && BIT_B(word2))
 				m68ki_exception_trap(EXCEPTION_CHK);
 		return;
@@ -3711,23 +3729,15 @@ M68KMAKE_OP(chk2cmp2, 32, ., pcdi)
 	if(CPU_TYPE_IS_EC020_PLUS(CPU_TYPE))
 	{
 		uint word2 = OPER_I_16();
-		uint compare = REG_DA[(word2 >> 12) & 15];
+		sint compare = REG_DA[(word2 >> 12) & 15];
 		uint ea = EA_PCDI_32();
-		uint lower_bound = m68ki_read_pcrel_32(ea);
-		uint upper_bound = m68ki_read_pcrel_32(ea + 4);
+		sint lower_bound = m68ki_read_pcrel_32(ea);
+		sint upper_bound = m68ki_read_pcrel_32(ea + 4);
 
-		FLAG_C = compare - lower_bound;
-		FLAG_Z = !((upper_bound==compare) | (lower_bound==compare));
-		FLAG_C = CFLAG_SUB_32(lower_bound, compare, FLAG_C);
-		if(COND_CS())
-		{
-			if(BIT_B(word2))
-				m68ki_exception_trap(EXCEPTION_CHK);
-			return;
-		}
+		FLAG_Z = !((upper_bound==compare) || (lower_bound==compare));  // JFF: | => ||
 
-		FLAG_C = upper_bound - compare;
-		FLAG_C = CFLAG_SUB_32(compare, upper_bound, FLAG_C);
+    FLAG_C = (lower_bound <= upper_bound ? compare < lower_bound || compare > upper_bound : compare > upper_bound || compare < lower_bound) << 8;
+    
 		if(COND_CS() && BIT_B(word2))
 				m68ki_exception_trap(EXCEPTION_CHK);
 		return;
@@ -3741,23 +3751,15 @@ M68KMAKE_OP(chk2cmp2, 32, ., pcix)
 	if(CPU_TYPE_IS_EC020_PLUS(CPU_TYPE))
 	{
 		uint word2 = OPER_I_16();
-		uint compare = REG_DA[(word2 >> 12) & 15];
+		sint compare = REG_DA[(word2 >> 12) & 15];
 		uint ea = EA_PCIX_32();
-		uint lower_bound = m68ki_read_pcrel_32(ea);
-		uint upper_bound = m68ki_read_pcrel_32(ea + 4);
+		sint lower_bound = m68ki_read_32(ea);
+		sint upper_bound = m68ki_read_32(ea + 4);
 
-		FLAG_C = compare - lower_bound;
-		FLAG_Z = !((upper_bound==compare) | (lower_bound==compare));
-		FLAG_C = CFLAG_SUB_32(lower_bound, compare, FLAG_C);
-		if(COND_CS())
-		{
-			if(BIT_B(word2))
-				m68ki_exception_trap(EXCEPTION_CHK);
-			return;
-		}
+		FLAG_Z = !((upper_bound==compare) || (lower_bound==compare));  // JFF: | => ||
 
-		FLAG_C = upper_bound - compare;
-		FLAG_C = CFLAG_SUB_32(compare, upper_bound, FLAG_C);
+    FLAG_C = (lower_bound <= upper_bound ? compare < lower_bound || compare > upper_bound : compare > upper_bound || compare < lower_bound) << 8;
+    
 		if(COND_CS() && BIT_B(word2))
 				m68ki_exception_trap(EXCEPTION_CHK);
 		return;
@@ -3769,25 +3771,18 @@ M68KMAKE_OP(chk2cmp2, 32, ., pcix)
 M68KMAKE_OP(chk2cmp2, 32, ., .)
 {
 	if(CPU_TYPE_IS_EC020_PLUS(CPU_TYPE))
-	{
+	{   
 		uint word2 = OPER_I_16();
-		uint compare = REG_DA[(word2 >> 12) & 15];
+		// JFF changed the logic. chk2/cmp2 uses signed values, not unsigned
+		sint compare = REG_DA[(word2 >> 12) & 15];
 		uint ea = M68KMAKE_GET_EA_AY_32;
-		uint lower_bound = m68ki_read_32(ea);
-		uint upper_bound = m68ki_read_32(ea + 4);
+		sint lower_bound = m68ki_read_32(ea);
+		sint upper_bound = m68ki_read_32(ea + 4);
 
-		FLAG_C = compare - lower_bound;
-		FLAG_Z = !((upper_bound==compare) | (lower_bound==compare));
-		FLAG_C = CFLAG_SUB_32(lower_bound, compare, FLAG_C);
-		if(COND_CS())
-		{
-			if(BIT_B(word2))
-				m68ki_exception_trap(EXCEPTION_CHK);
-			return;
-		}
+		FLAG_Z = !((upper_bound==compare) || (lower_bound==compare));  // JFF: | => ||
 
-		FLAG_C = upper_bound - compare;
-		FLAG_C = CFLAG_SUB_32(compare, upper_bound, FLAG_C);
+    FLAG_C = (lower_bound <= upper_bound ? compare < lower_bound || compare > upper_bound : compare > upper_bound || compare < lower_bound) << 8;
+    
 		if(COND_CS() && BIT_B(word2))
 				m68ki_exception_trap(EXCEPTION_CHK);
 		return;
@@ -4174,6 +4169,7 @@ M68KMAKE_OP(cmpi, 32, ., d)
 	uint dst = DY;
 	uint res = dst - src;
 
+	m68ki_cmpild_callback(src, REG_IR & 7);		   /* auto-disable (see m68kcpu.h) */
 	FLAG_N = NFLAG_32(res);
 	FLAG_Z = MASK_OUT_ABOVE_32(res);
 	FLAG_V = VFLAG_SUB_32(src, dst, res);
@@ -4367,6 +4363,8 @@ M68KMAKE_OP(cptrapcc, 32, ., .)
 		M68K_DO_LOG((M68K_LOG_FILEHANDLE "%s at %08x: called unimplemented instruction %04x (%s)\n",
 					 m68ki_cpu_names[CPU_TYPE], ADDRESS_68K(REG_PC - 2), REG_IR,
 					 m68k_disassemble_quick(ADDRESS_68K(REG_PC - 2))));
+    // JFF: unsupported, but at least if the trap doesn't occur, app should still work, so at least PC increase is correct
+    REG_PC += 4;  
 		return;
 	}
 	m68ki_exception_1111();
@@ -5127,7 +5125,7 @@ M68KMAKE_OP(eori, 32, ., .)
 }
 
 
-M68KMAKE_OP(eori, 8, toc, .)
+M68KMAKE_OP(eori, 16, toc, .)
 {
 	m68ki_set_ccr(m68ki_get_ccr() ^ OPER_I_8());
 }
@@ -5302,6 +5300,9 @@ M68KMAKE_OP(lsr, 8, s, .)
 	uint src = MASK_OUT_ABOVE_8(*r_dst);
 	uint res = src >> shift;
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	*r_dst = MASK_OUT_BELOW_8(*r_dst) | res;
 
 	FLAG_N = NFLAG_CLEAR;
@@ -5318,6 +5319,9 @@ M68KMAKE_OP(lsr, 16, s, .)
 	uint src = MASK_OUT_ABOVE_16(*r_dst);
 	uint res = src >> shift;
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	*r_dst = MASK_OUT_BELOW_16(*r_dst) | res;
 
 	FLAG_N = NFLAG_CLEAR;
@@ -5334,6 +5338,9 @@ M68KMAKE_OP(lsr, 32, s, .)
 	uint src = *r_dst;
 	uint res = src >> shift;
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	*r_dst = res;
 
 	FLAG_N = NFLAG_CLEAR;
@@ -5475,6 +5482,9 @@ M68KMAKE_OP(lsl, 8, s, .)
 	uint src = MASK_OUT_ABOVE_8(*r_dst);
 	uint res = MASK_OUT_ABOVE_8(src << shift);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	*r_dst = MASK_OUT_BELOW_8(*r_dst) | res;
 
 	FLAG_N = NFLAG_8(res);
@@ -5491,6 +5501,9 @@ M68KMAKE_OP(lsl, 16, s, .)
 	uint src = MASK_OUT_ABOVE_16(*r_dst);
 	uint res = MASK_OUT_ABOVE_16(src << shift);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	*r_dst = MASK_OUT_BELOW_16(*r_dst) | res;
 
 	FLAG_N = NFLAG_16(res);
@@ -5507,6 +5520,9 @@ M68KMAKE_OP(lsl, 32, s, .)
 	uint src = *r_dst;
 	uint res = MASK_OUT_ABOVE_32(src << shift);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	*r_dst = res;
 
 	FLAG_N = NFLAG_32(res);
@@ -6388,7 +6404,8 @@ M68KMAKE_OP(move, 32, pd, d)
 	uint res = DY;
 	uint ea = EA_AX_PD_32();
 
-	m68ki_write_32(ea, res);
+	m68ki_write_16(ea+2, res & 0xFFFF );
+	m68ki_write_16(ea, (res >> 16) & 0xFFFF );
 
 	FLAG_N = NFLAG_32(res);
 	FLAG_Z = res;
@@ -6402,7 +6419,8 @@ M68KMAKE_OP(move, 32, pd, a)
 	uint res = AY;
 	uint ea = EA_AX_PD_32();
 
-	m68ki_write_32(ea, res);
+	m68ki_write_16(ea+2, res & 0xFFFF );
+	m68ki_write_16(ea, (res >> 16) & 0xFFFF );
 
 	FLAG_N = NFLAG_32(res);
 	FLAG_Z = res;
@@ -6416,7 +6434,8 @@ M68KMAKE_OP(move, 32, pd, .)
 	uint res = M68KMAKE_GET_OPER_AY_32;
 	uint ea = EA_AX_PD_32();
 
-	m68ki_write_32(ea, res);
+	m68ki_write_16(ea+2, res & 0xFFFF );
+	m68ki_write_16(ea, (res >> 16) & 0xFFFF );
 
 	FLAG_N = NFLAG_32(res);
 	FLAG_Z = res;
@@ -6787,6 +6806,70 @@ M68KMAKE_OP(movec, 32, cr, .)
 				}
 				m68ki_exception_illegal();
 				return;
+			case 0x003:				/* TC */
+				if(CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
+			case 0x004:				/* ITT0 */
+				if(CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
+			case 0x005:				/* ITT1 */
+				if(CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
+			case 0x006:				/* DTT0 */
+				if(CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
+			case 0x007:				/* DTT1 */
+				if(CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
+			case 0x805:				/* MMUSR */
+				if(CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
+			case 0x806:				/* URP */
+				if(CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
+			case 0x807:				/* SRP */
+				if(CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
 			default:
 				m68ki_exception_illegal();
 				return;
@@ -6817,9 +6900,22 @@ M68KMAKE_OP(movec, 32, rc, .)
 				REG_DFC = REG_DA[(word2 >> 12) & 15] & 7;
 				return;
 			case 0x002:			   /* CACR */
+				/* Only EC020 and later have CACR */
 				if(CPU_TYPE_IS_EC020_PLUS(CPU_TYPE))
 				{
-					REG_CACR = REG_DA[(word2 >> 12) & 15];
+					/* 68030 can write all bits except 5-7, 040 can write all */
+					if (CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+					{
+						REG_CACR = REG_DA[(word2 >> 12) & 15];
+					}
+					else if (CPU_TYPE_IS_030_PLUS(CPU_TYPE))
+					{
+						REG_CACR = REG_DA[(word2 >> 12) & 15] & 0xff1f;
+					}
+					else
+					{
+						REG_CACR = REG_DA[(word2 >> 12) & 15] & 0x0f;
+					}
 					return;
 				}
 				m68ki_exception_illegal();
@@ -6865,6 +6961,70 @@ M68KMAKE_OP(movec, 32, rc, .)
 				}
 				m68ki_exception_illegal();
 				return;
+			case 0x003:			/* TC */
+				if (CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
+			case 0x004:			/* ITT0 */
+				if (CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
+			case 0x005:			/* ITT1 */
+				if (CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
+			case 0x006:			/* DTT0 */
+				if (CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
+			case 0x007:			/* DTT1 */
+				if (CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
+			case 0x805:			/* MMUSR */
+				if (CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
+			case 0x806:			/* URP */
+				if (CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
+			case 0x807:			/* SRP */
+				if (CPU_TYPE_IS_040_PLUS(CPU_TYPE))
+				{
+					/* TODO */
+					return;
+				}
+				m68ki_exception_illegal();
+				return;
 			default:
 				m68ki_exception_illegal();
 				return;
@@ -6927,7 +7087,8 @@ M68KMAKE_OP(movem, 32, re, pd)
 		if(register_list & (1 << i))
 		{
 			ea -= 4;
-			m68ki_write_32(ea, REG_DA[15-i]);
+			m68ki_write_16(ea+2, REG_DA[15-i] & 0xFFFF );
+			m68ki_write_16(ea, (REG_DA[15-i] >> 16) & 0xFFFF );
 			count++;
 		}
 	AY = ea;
@@ -7260,6 +7421,22 @@ M68KMAKE_OP(moveq, 32, ., .)
 }
 
 
+M68KMAKE_OP(move16, 32, ., .)
+{
+	uint16 w2 = OPER_I_16();
+	int ax = REG_IR & 7;
+	int ay = (w2 >> 12) & 7;
+
+	m68ki_write_32(REG_A[ay],    m68ki_read_32(REG_A[ax]));
+	m68ki_write_32(REG_A[ay]+4,  m68ki_read_32(REG_A[ax]+4));
+	m68ki_write_32(REG_A[ay]+8,  m68ki_read_32(REG_A[ax]+8));
+	m68ki_write_32(REG_A[ay]+12, m68ki_read_32(REG_A[ax]+12));
+
+	REG_A[ax] += 16;
+	REG_A[ay] += 16;
+}
+
+
 M68KMAKE_OP(muls, 16, ., d)
 {
 	uint* r_dst = &DX;
@@ -8090,7 +8267,7 @@ M68KMAKE_OP(ori, 32, ., .)
 }
 
 
-M68KMAKE_OP(ori, 8, toc, .)
+M68KMAKE_OP(ori, 16, toc, .)
 {
 	m68ki_set_ccr(m68ki_get_ccr() | OPER_I_8());
 }
@@ -8198,6 +8375,27 @@ M68KMAKE_OP(pea, 32, ., .)
 	m68ki_push_32(ea);
 }
 
+M68KMAKE_OP(pflush, 32, ., .)
+{
+	if ((CPU_TYPE_IS_EC020_PLUS(CPU_TYPE)) && (HAS_PMMU))
+	{
+		fprintf(stderr,"68040: unhandled PFLUSH\n");
+		return;
+	}
+	m68ki_exception_1111();
+}
+
+M68KMAKE_OP(pmmu, 32, ., .)
+{
+	if ((CPU_TYPE_IS_EC020_PLUS(CPU_TYPE)) && (HAS_PMMU))
+	{
+		m68881_mmu_ops();
+	}
+	else
+	{
+		m68ki_exception_1111();
+	}
+}
 
 M68KMAKE_OP(reset, 0, ., .)
 {
@@ -8219,6 +8417,9 @@ M68KMAKE_OP(ror, 8, s, .)
 	uint src = MASK_OUT_ABOVE_8(*r_dst);
 	uint res = ROR_8(src, shift);
 
+	if(orig_shift != 0)
+		USE_CYCLES(orig_shift<<CYC_SHIFT);
+
 	*r_dst = MASK_OUT_BELOW_8(*r_dst) | res;
 
 	FLAG_N = NFLAG_8(res);
@@ -8235,6 +8436,9 @@ M68KMAKE_OP(ror, 16, s, .)
 	uint src = MASK_OUT_ABOVE_16(*r_dst);
 	uint res = ROR_16(src, shift);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	*r_dst = MASK_OUT_BELOW_16(*r_dst) | res;
 
 	FLAG_N = NFLAG_16(res);
@@ -8251,6 +8455,9 @@ M68KMAKE_OP(ror, 32, s, .)
 	uint64 src = *r_dst;
 	uint res = ROR_32(src, shift);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	*r_dst = res;
 
 	FLAG_N = NFLAG_32(res);
@@ -8364,6 +8571,9 @@ M68KMAKE_OP(rol, 8, s, .)
 	uint src = MASK_OUT_ABOVE_8(*r_dst);
 	uint res = ROL_8(src, shift);
 
+	if(orig_shift != 0)
+		USE_CYCLES(orig_shift<<CYC_SHIFT);
+
 	*r_dst = MASK_OUT_BELOW_8(*r_dst) | res;
 
 	FLAG_N = NFLAG_8(res);
@@ -8380,6 +8590,9 @@ M68KMAKE_OP(rol, 16, s, .)
 	uint src = MASK_OUT_ABOVE_16(*r_dst);
 	uint res = ROL_16(src, shift);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	*r_dst = MASK_OUT_BELOW_16(*r_dst) | res;
 
 	FLAG_N = NFLAG_16(res);
@@ -8396,6 +8609,9 @@ M68KMAKE_OP(rol, 32, s, .)
 	uint64 src = *r_dst;
 	uint res = ROL_32(src, shift);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	*r_dst = res;
 
 	FLAG_N = NFLAG_32(res);
@@ -8489,7 +8705,7 @@ M68KMAKE_OP(rol, 32, r, .)
 
 		*r_dst = res;
 
-		FLAG_C = (src >> (32 - shift)) << 8;
+		FLAG_C = (src >> ((32 - shift) & 0x1f)) << 8;
 		FLAG_N = NFLAG_32(res);
 		FLAG_Z = res;
 		FLAG_V = VFLAG_CLEAR;
@@ -8525,6 +8741,9 @@ M68KMAKE_OP(roxr, 8, s, .)
 	uint src = MASK_OUT_ABOVE_8(*r_dst);
 	uint res = ROR_9(src | (XFLAG_AS_1() << 8), shift);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	FLAG_C = FLAG_X = res;
 	res = MASK_OUT_ABOVE_8(res);
 
@@ -8543,6 +8762,9 @@ M68KMAKE_OP(roxr, 16, s, .)
 	uint src = MASK_OUT_ABOVE_16(*r_dst);
 	uint res = ROR_17(src | (XFLAG_AS_1() << 16), shift);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	FLAG_C = FLAG_X = res >> 8;
 	res = MASK_OUT_ABOVE_16(res);
 
@@ -8563,6 +8785,9 @@ M68KMAKE_OP(roxr, 32, s, .)
 	uint64 src   = *r_dst;
 	uint64 res   = src | (((uint64)XFLAG_AS_1()) << 32);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	res = ROR_33_64(res, shift);
 
 	FLAG_C = FLAG_X = res >> 24;
@@ -8582,6 +8807,9 @@ M68KMAKE_OP(roxr, 32, s, .)
 	uint res = MASK_OUT_ABOVE_32((ROR_33(src, shift) & ~(1 << (32 - shift))) | (XFLAG_AS_1() << (32 - shift)));
 	uint new_x_flag = src & (1 << (shift - 1));
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	*r_dst = res;
 
 	FLAG_C = FLAG_X = (new_x_flag != 0)<<8;
@@ -8737,6 +8965,9 @@ M68KMAKE_OP(roxl, 8, s, .)
 	uint src = MASK_OUT_ABOVE_8(*r_dst);
 	uint res = ROL_9(src | (XFLAG_AS_1() << 8), shift);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	FLAG_C = FLAG_X = res;
 	res = MASK_OUT_ABOVE_8(res);
 
@@ -8755,6 +8986,9 @@ M68KMAKE_OP(roxl, 16, s, .)
 	uint src = MASK_OUT_ABOVE_16(*r_dst);
 	uint res = ROL_17(src | (XFLAG_AS_1() << 16), shift);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	FLAG_C = FLAG_X = res >> 8;
 	res = MASK_OUT_ABOVE_16(res);
 
@@ -8775,6 +9009,9 @@ M68KMAKE_OP(roxl, 32, s, .)
 	uint64 src   = *r_dst;
 	uint64 res   = src | (((uint64)XFLAG_AS_1()) << 32);
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	res = ROL_33_64(res, shift);
 
 	FLAG_C = FLAG_X = res >> 24;
@@ -8794,6 +9031,9 @@ M68KMAKE_OP(roxl, 32, s, .)
 	uint res = MASK_OUT_ABOVE_32((ROL_33(src, shift) & ~(1 << (shift - 1))) | (XFLAG_AS_1() << (shift - 1)));
 	uint new_x_flag = src & (1 << (32 - shift));
 
+	if(shift != 0)
+		USE_CYCLES(shift<<CYC_SHIFT);
+
 	*r_dst = res;
 
 	FLAG_C = FLAG_X = (new_x_flag != 0)<<8;
@@ -8966,6 +9206,7 @@ M68KMAKE_OP(rte, 32, ., .)
 		uint new_pc;
 		uint format_word;
 
+		m68ki_rte_callback();		   /* auto-disable (see m68kcpu.h) */
 		m68ki_trace_t0();			   /* auto-disable (see m68kcpu.h) */
 
 		if(CPU_TYPE_IS_000(CPU_TYPE))
@@ -8994,10 +9235,36 @@ M68KMAKE_OP(rte, 32, ., .)
 				CPU_INSTR_MODE = INSTRUCTION_YES;
 				CPU_RUN_MODE = RUN_MODE_NORMAL;
 				return;
+			} else if (format_word == 8) {
+				/* Format 8 stack frame -- 68010 only. 29 word bus/address error */
+				new_sr = m68ki_pull_16();
+				new_pc = m68ki_pull_32();
+				m68ki_fake_pull_16();	/* format word */
+				m68ki_fake_pull_16();	/* special status word */
+				m68ki_fake_pull_32();	/* fault address */
+				m68ki_fake_pull_16();	/* unused/reserved */
+				m68ki_fake_pull_16();	/* data output buffer */
+				m68ki_fake_pull_16();	/* unused/reserved */
+				m68ki_fake_pull_16();	/* data input buffer */
+				m68ki_fake_pull_16();	/* unused/reserved */
+				m68ki_fake_pull_16();	/* instruction input buffer */
+				m68ki_fake_pull_32();	/* internal information, 16 words */
+				m68ki_fake_pull_32();	/* (actually, we use 8 DWORDs) */
+				m68ki_fake_pull_32();
+				m68ki_fake_pull_32();
+				m68ki_fake_pull_32();
+				m68ki_fake_pull_32();
+				m68ki_fake_pull_32();
+				m68ki_fake_pull_32();
+				m68ki_jump(new_pc);
+				m68ki_set_sr(new_sr);
+				CPU_INSTR_MODE = INSTRUCTION_YES;
+				CPU_RUN_MODE = RUN_MODE_NORMAL;
+				return;
 			}
 			CPU_INSTR_MODE = INSTRUCTION_YES;
 			CPU_RUN_MODE = RUN_MODE_NORMAL;
-			/* Not handling bus fault (9) */
+			/* Not handling other exception types (9) */
 			m68ki_exception_format_error();
 			return;
 		}
@@ -9456,8 +9723,8 @@ M68KMAKE_OP(suba, 16, ., a)
 
 M68KMAKE_OP(suba, 16, ., .)
 {
-	signed short src = MAKE_INT_16(M68KMAKE_GET_OPER_AY_16);
 	uint* r_dst = &AX;
+	uint src = MAKE_INT_16(M68KMAKE_GET_OPER_AY_16);
 
 	*r_dst = MASK_OUT_ABOVE_32(*r_dst - src);
 }
@@ -9888,12 +10155,20 @@ M68KMAKE_OP(tas, 8, ., .)
 {
 	uint ea = M68KMAKE_GET_EA_AY_8;
 	uint dst = m68ki_read_8(ea);
+	uint allow_writeback;
 
 	FLAG_Z = dst;
 	FLAG_N = NFLAG_8(dst);
 	FLAG_V = VFLAG_CLEAR;
 	FLAG_C = CFLAG_CLEAR;
-	m68ki_write_8(ea, dst | 0x80);
+
+	/* The Genesis/Megadrive games Gargoyles and Ex-Mutants need the TAS writeback
+       disabled in order to function properly.  Some Amiga software may also rely
+       on this, but only when accessing specific addresses so additional functionality
+       will be needed. */
+	allow_writeback = m68ki_tas_callback();
+
+	if (allow_writeback==1) m68ki_write_8(ea, dst | 0x80);
 }
 
 
@@ -9919,6 +10194,7 @@ M68KMAKE_OP(trapt, 16, ., .)
 {
 	if(CPU_TYPE_IS_EC020_PLUS(CPU_TYPE))
 	{
+        REG_PC += 2; // JFF else stackframe & return addresses are incorrect
 		m68ki_exception_trap(EXCEPTION_TRAPV);	/* HJB 990403 */
 		return;
 	}
@@ -9930,6 +10206,7 @@ M68KMAKE_OP(trapt, 32, ., .)
 {
 	if(CPU_TYPE_IS_EC020_PLUS(CPU_TYPE))
 	{
+        REG_PC += 4; // JFF else stackframe & return addresses are incorrect
 		m68ki_exception_trap(EXCEPTION_TRAPV);	/* HJB 990403 */
 		return;
 	}
@@ -9985,12 +10262,13 @@ M68KMAKE_OP(trapcc, 16, ., .)
 {
 	if(CPU_TYPE_IS_EC020_PLUS(CPU_TYPE))
 	{
+     	REG_PC += 2;  /* JFF increase before or 1) stackframe is incorrect 2) RTE address is wrong if trap is taken */
 		if(M68KMAKE_CC)
 		{
 			m68ki_exception_trap(EXCEPTION_TRAPV);	/* HJB 990403 */
 			return;
 		}
-		REG_PC += 2;
+
 		return;
 	}
 	m68ki_exception_illegal();
@@ -10001,12 +10279,12 @@ M68KMAKE_OP(trapcc, 32, ., .)
 {
 	if(CPU_TYPE_IS_EC020_PLUS(CPU_TYPE))
 	{
+		REG_PC += 4;  /* JFF increase before or 1) stackframe is incorrect 2) RTE address is wrong if trap is taken */
 		if(M68KMAKE_CC)
 		{
 			m68ki_exception_trap(EXCEPTION_TRAPV);	/* HJB 990403 */
 			return;
 		}
-		REG_PC += 4;
 		return;
 	}
 	m68ki_exception_illegal();
diff --git a/plat/linux68k/emu/musashi/m68kconf.h b/plat/linux68k/emu/musashi/m68kconf.h
index 7edd073aa..8844952df 100755
--- a/plat/linux68k/emu/musashi/m68kconf.h
+++ b/plat/linux68k/emu/musashi/m68kconf.h
@@ -3,10 +3,10 @@
 /* ======================================================================== */
 /*
  *                                  MUSASHI
- *                                Version 3.4
+ *                                Version 3.32
  *
  * A portable Motorola M680x0 processor emulation engine.
- * Copyright 1998-2001 Karl Stenerud.  All rights reserved.
+ * Copyright Karl Stenerud.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -52,7 +52,7 @@
  * to OPT_ON and use m68kmame.h to configure the 68k core.
  */
 #ifndef M68K_COMPILE_FOR_MAME
-#define M68K_COMPILE_FOR_MAME      OPT_ON
+#define M68K_COMPILE_FOR_MAME      OPT_OFF
 #endif /* M68K_COMPILE_FOR_MAME */
 
 
@@ -67,6 +67,8 @@
 #define M68K_EMULATE_010            OPT_ON
 #define M68K_EMULATE_EC020          OPT_ON
 #define M68K_EMULATE_020            OPT_ON
+#define M68K_EMULATE_030            OPT_ON
+#define M68K_EMULATE_040            OPT_ON
 
 
 /* If ON, the CPU will call m68k_read_immediate_xx() for immediate addressing
@@ -109,6 +111,35 @@
 #define M68K_EMULATE_RESET          OPT_OFF
 #define M68K_RESET_CALLBACK()       your_reset_handler_function()
 
+/* If ON, CPU will call the callback when it encounters a cmpi.l #v, dn
+ * instruction.
+ */
+#define M68K_CMPILD_HAS_CALLBACK     OPT_OFF
+#define M68K_CMPILD_CALLBACK(v,r)    your_cmpild_handler_function(v,r)
+
+
+/* If ON, CPU will call the callback when it encounters a rte
+ * instruction.
+ */
+#define M68K_RTE_HAS_CALLBACK       OPT_OFF
+#define M68K_RTE_CALLBACK()         your_rte_handler_function()
+
+/* If ON, CPU will call the callback when it encounters a tas
+ * instruction.
+ */
+#define M68K_TAS_HAS_CALLBACK       OPT_OFF
+#define M68K_TAS_CALLBACK()         your_tas_handler_function()
+
+/* If ON, CPU will call the callback when it encounters an illegal instruction
+ * passing the opcode as argument. If the callback returns 1, then it's considered
+ * as a normal instruction, and the illegal exception in canceled. If it returns 0,
+ * the exception occurs normally.
+ * The callback looks like int callback(int opcode)
+ * You should put OPT_SPECIFY_HANDLER here if you cant to use it, otherwise it will
+ * use a dummy default handler and you'll have to call m68k_set_illg_instr_callback explicitely
+ */
+#define M68K_ILLG_HAS_CALLBACK	    OPT_OFF
+#define M68K_ILLG_CALLBACK(opcode)  op_illg(opcode)
 
 /* If ON, CPU will call the set fc callback on every memory access to
  * differentiate between user/supervisor, program/data access like a real
@@ -119,7 +150,6 @@
 #define M68K_EMULATE_FC             OPT_OFF
 #define M68K_SET_FC_CALLBACK(A)     your_set_fc_handler_function(A)
 
-
 /* If ON, CPU will call the pc changed callback when it changes the PC by a
  * large value.  This allows host programs to be nicer when it comes to
  * fetching immediate data and instructions on a banked memory system.
@@ -132,7 +162,7 @@
  * instruction.
  */
 #define M68K_INSTRUCTION_HOOK       OPT_OFF
-#define M68K_INSTRUCTION_CALLBACK() your_instruction_hook_function()
+#define M68K_INSTRUCTION_CALLBACK(pc) your_instruction_hook_function(pc)
 
 
 /* If ON, the CPU will emulate the 4-byte prefetch queue of a real 68000 */
@@ -154,6 +184,9 @@
 #define M68K_LOG_1010_1111          OPT_OFF
 #define M68K_LOG_FILEHANDLE         some_file_handle
 
+/* Emulate PMMU : if you enable this, there will be a test to see if the current chip has some enabled pmmu added to every memory access,
+ * so enable this only if it's useful */
+#define M68K_EMULATE_PMMU   OPT_ON
 
 /* ----------------------------- COMPATIBILITY ---------------------------- */
 
@@ -165,21 +198,11 @@
 /* If ON, the enulation core will use 64-bit integers to speed up some
  * operations.
 */
-#define M68K_USE_64_BIT  OPT_OFF
+#define M68K_USE_64_BIT  OPT_ON
 
 
-/* Set to your compiler's static inline keyword to enable it, or
- * set it to blank to disable it.
- * If you define INLINE in the makefile, it will override this value.
- * NOTE: not enabling inline functions will SEVERELY slow down emulation.
- */
-#ifndef INLINE
-#define INLINE static __inline__
-#endif /* INLINE */
-
 #endif /* M68K_COMPILE_FOR_MAME */
 
-
 /* ======================================================================== */
 /* ============================== END OF FILE ============================= */
 /* ======================================================================== */
diff --git a/plat/linux68k/emu/musashi/m68kcpu.c b/plat/linux68k/emu/musashi/m68kcpu.c
index 89820c0f0..040980abd 100755
--- a/plat/linux68k/emu/musashi/m68kcpu.c
+++ b/plat/linux68k/emu/musashi/m68kcpu.c
@@ -3,10 +3,10 @@
 /* ======================================================================== */
 /*
  *                                  MUSASHI
- *                                Version 3.4
+ *                                Version 4.60
  *
  * A portable Motorola M680x0 processor emulation engine.
- * Copyright 1998-2001 Karl Stenerud.  All rights reserved.
+ * Copyright Karl Stenerud.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -38,9 +38,19 @@
 /* ================================ INCLUDES ============================== */
 /* ======================================================================== */
 
+extern void m68040_fpu_op0(void);
+extern void m68040_fpu_op1(void);
+extern void m68881_mmu_ops();
+extern unsigned char m68ki_cycles[][0x10000];
+extern void (*m68ki_instruction_jump_table[0x10000])(void); /* opcode handler jump table */
+extern void m68ki_build_opcode_table(void);
+
 #include "m68kops.h"
 #include "m68kcpu.h"
 
+#include "m68kfpu.c"
+#include "m68kmmu.h" // uses some functions from m68kfpu.c which are static !
+
 /* ======================================================================== */
 /* ================================= DATA ================================= */
 /* ======================================================================== */
@@ -51,7 +61,7 @@ uint m68ki_tracing = 0;
 uint m68ki_address_space;
 
 #ifdef M68K_LOG_ENABLE
-char* m68ki_cpu_names[9] =
+const char *const m68ki_cpu_names[] =
 {
 	"Invalid CPU",
 	"M68000",
@@ -69,15 +79,21 @@ char* m68ki_cpu_names[9] =
 m68ki_cpu_core m68ki_cpu = {0};
 
 #if M68K_EMULATE_ADDRESS_ERROR
+#ifdef _BSD_SETJMP_H
+sigjmp_buf m68ki_aerr_trap;
+#else
 jmp_buf m68ki_aerr_trap;
+#endif
 #endif /* M68K_EMULATE_ADDRESS_ERROR */
 
 uint    m68ki_aerr_address;
 uint    m68ki_aerr_write_mode;
 uint    m68ki_aerr_fc;
 
+jmp_buf m68ki_bus_error_jmp_buf;
+
 /* Used by shift & rotate instructions */
-uint8 m68ki_shift_8_table[65] =
+const uint8 m68ki_shift_8_table[65] =
 {
 	0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff, 0xff, 0xff, 0xff,
 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
@@ -86,7 +102,7 @@ uint8 m68ki_shift_8_table[65] =
 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 	0xff, 0xff, 0xff, 0xff, 0xff
 };
-uint16 m68ki_shift_16_table[65] =
+const uint16 m68ki_shift_16_table[65] =
 {
 	0x0000, 0x8000, 0xc000, 0xe000, 0xf000, 0xf800, 0xfc00, 0xfe00, 0xff00,
 	0xff80, 0xffc0, 0xffe0, 0xfff0, 0xfff8, 0xfffc, 0xfffe, 0xffff, 0xffff,
@@ -97,7 +113,7 @@ uint16 m68ki_shift_16_table[65] =
 	0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
 	0xffff, 0xffff
 };
-uint m68ki_shift_32_table[65] =
+const uint m68ki_shift_32_table[65] =
 {
 	0x00000000, 0x80000000, 0xc0000000, 0xe0000000, 0xf0000000, 0xf8000000,
 	0xfc000000, 0xfe000000, 0xff000000, 0xff800000, 0xffc00000, 0xffe00000,
@@ -116,16 +132,16 @@ uint m68ki_shift_32_table[65] =
 /* Number of clock cycles to use for exception processing.
  * I used 4 for any vectors that are undocumented for processing times.
  */
-uint8 m68ki_exception_cycle_table[3][256] =
+const uint8 m68ki_exception_cycle_table[5][256] =
 {
 	{ /* 000 */
-		  4, /*  0: Reset - Initial Stack Pointer                      */
+		 40, /*  0: Reset - Initial Stack Pointer                      */
 		  4, /*  1: Reset - Initial Program Counter                    */
 		 50, /*  2: Bus Error                             (unemulated) */
 		 50, /*  3: Address Error                         (unemulated) */
 		 34, /*  4: Illegal Instruction                                */
-		 38, /*  5: Divide by Zero -- ASG: changed from 42             */
-		 40, /*  6: CHK -- ASG: chanaged from 44                       */
+		 38, /*  5: Divide by Zero                                     */
+		 40, /*  6: CHK                                                */
 		 34, /*  7: TRAPV                                              */
 		 34, /*  8: Privilege Violation                                */
 		 34, /*  9: Trace                                              */
@@ -151,7 +167,7 @@ uint8 m68ki_exception_cycle_table[3][256] =
 		 44, /* 29: Level 5 Interrupt Autovector                       */
 		 44, /* 30: Level 6 Interrupt Autovector                       */
 		 44, /* 31: Level 7 Interrupt Autovector                       */
-		 34, /* 32: TRAP #0 -- ASG: chanaged from 38                   */
+		 34, /* 32: TRAP #0                                            */
 		 34, /* 33: TRAP #1                                            */
 		 34, /* 34: TRAP #2                                            */
 		 34, /* 35: TRAP #3                                            */
@@ -192,7 +208,7 @@ uint8 m68ki_exception_cycle_table[3][256] =
 		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4
 	},
 	{ /* 010 */
-		  4, /*  0: Reset - Initial Stack Pointer                      */
+		 40, /*  0: Reset - Initial Stack Pointer                      */
 		  4, /*  1: Reset - Initial Program Counter                    */
 		126, /*  2: Bus Error                             (unemulated) */
 		126, /*  3: Address Error                         (unemulated) */
@@ -336,10 +352,156 @@ uint8 m68ki_exception_cycle_table[3][256] =
 		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
 		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
 		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4
+	},
+	{ /* 030 - not correct */
+		  4, /*  0: Reset - Initial Stack Pointer                      */
+		  4, /*  1: Reset - Initial Program Counter                    */
+		 50, /*  2: Bus Error                             (unemulated) */
+		 50, /*  3: Address Error                         (unemulated) */
+		 20, /*  4: Illegal Instruction                                */
+		 38, /*  5: Divide by Zero                                     */
+		 40, /*  6: CHK                                                */
+		 20, /*  7: TRAPV                                              */
+		 34, /*  8: Privilege Violation                                */
+		 25, /*  9: Trace                                              */
+		 20, /* 10: 1010                                               */
+		 20, /* 11: 1111                                               */
+		  4, /* 12: RESERVED                                           */
+		  4, /* 13: Coprocessor Protocol Violation        (unemulated) */
+		  4, /* 14: Format Error                                       */
+		 30, /* 15: Uninitialized Interrupt                            */
+		  4, /* 16: RESERVED                                           */
+		  4, /* 17: RESERVED                                           */
+		  4, /* 18: RESERVED                                           */
+		  4, /* 19: RESERVED                                           */
+		  4, /* 20: RESERVED                                           */
+		  4, /* 21: RESERVED                                           */
+		  4, /* 22: RESERVED                                           */
+		  4, /* 23: RESERVED                                           */
+		 30, /* 24: Spurious Interrupt                                 */
+		 30, /* 25: Level 1 Interrupt Autovector                       */
+		 30, /* 26: Level 2 Interrupt Autovector                       */
+		 30, /* 27: Level 3 Interrupt Autovector                       */
+		 30, /* 28: Level 4 Interrupt Autovector                       */
+		 30, /* 29: Level 5 Interrupt Autovector                       */
+		 30, /* 30: Level 6 Interrupt Autovector                       */
+		 30, /* 31: Level 7 Interrupt Autovector                       */
+		 20, /* 32: TRAP #0                                            */
+		 20, /* 33: TRAP #1                                            */
+		 20, /* 34: TRAP #2                                            */
+		 20, /* 35: TRAP #3                                            */
+		 20, /* 36: TRAP #4                                            */
+		 20, /* 37: TRAP #5                                            */
+		 20, /* 38: TRAP #6                                            */
+		 20, /* 39: TRAP #7                                            */
+		 20, /* 40: TRAP #8                                            */
+		 20, /* 41: TRAP #9                                            */
+		 20, /* 42: TRAP #10                                           */
+		 20, /* 43: TRAP #11                                           */
+		 20, /* 44: TRAP #12                                           */
+		 20, /* 45: TRAP #13                                           */
+		 20, /* 46: TRAP #14                                           */
+		 20, /* 47: TRAP #15                                           */
+		  4, /* 48: FP Branch or Set on Unknown Condition (unemulated) */
+		  4, /* 49: FP Inexact Result                     (unemulated) */
+		  4, /* 50: FP Divide by Zero                     (unemulated) */
+		  4, /* 51: FP Underflow                          (unemulated) */
+		  4, /* 52: FP Operand Error                      (unemulated) */
+		  4, /* 53: FP Overflow                           (unemulated) */
+		  4, /* 54: FP Signaling NAN                      (unemulated) */
+		  4, /* 55: FP Unimplemented Data Type            (unemulated) */
+		  4, /* 56: MMU Configuration Error               (unemulated) */
+		  4, /* 57: MMU Illegal Operation Error           (unemulated) */
+		  4, /* 58: MMU Access Level Violation Error      (unemulated) */
+		  4, /* 59: RESERVED                                           */
+		  4, /* 60: RESERVED                                           */
+		  4, /* 61: RESERVED                                           */
+		  4, /* 62: RESERVED                                           */
+		  4, /* 63: RESERVED                                           */
+		     /* 64-255: User Defined                                   */
+		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4
+	},
+	{ /* 040 */ // TODO: these values are not correct
+		  4, /*  0: Reset - Initial Stack Pointer                      */
+		  4, /*  1: Reset - Initial Program Counter                    */
+		 50, /*  2: Bus Error                             (unemulated) */
+		 50, /*  3: Address Error                         (unemulated) */
+		 20, /*  4: Illegal Instruction                                */
+		 38, /*  5: Divide by Zero                                     */
+		 40, /*  6: CHK                                                */
+		 20, /*  7: TRAPV                                              */
+		 34, /*  8: Privilege Violation                                */
+		 25, /*  9: Trace                                              */
+		 20, /* 10: 1010                                               */
+		 20, /* 11: 1111                                               */
+		  4, /* 12: RESERVED                                           */
+		  4, /* 13: Coprocessor Protocol Violation        (unemulated) */
+		  4, /* 14: Format Error                                       */
+		 30, /* 15: Uninitialized Interrupt                            */
+		  4, /* 16: RESERVED                                           */
+		  4, /* 17: RESERVED                                           */
+		  4, /* 18: RESERVED                                           */
+		  4, /* 19: RESERVED                                           */
+		  4, /* 20: RESERVED                                           */
+		  4, /* 21: RESERVED                                           */
+		  4, /* 22: RESERVED                                           */
+		  4, /* 23: RESERVED                                           */
+		 30, /* 24: Spurious Interrupt                                 */
+		 30, /* 25: Level 1 Interrupt Autovector                       */
+		 30, /* 26: Level 2 Interrupt Autovector                       */
+		 30, /* 27: Level 3 Interrupt Autovector                       */
+		 30, /* 28: Level 4 Interrupt Autovector                       */
+		 30, /* 29: Level 5 Interrupt Autovector                       */
+		 30, /* 30: Level 6 Interrupt Autovector                       */
+		 30, /* 31: Level 7 Interrupt Autovector                       */
+		 20, /* 32: TRAP #0                                            */
+		 20, /* 33: TRAP #1                                            */
+		 20, /* 34: TRAP #2                                            */
+		 20, /* 35: TRAP #3                                            */
+		 20, /* 36: TRAP #4                                            */
+		 20, /* 37: TRAP #5                                            */
+		 20, /* 38: TRAP #6                                            */
+		 20, /* 39: TRAP #7                                            */
+		 20, /* 40: TRAP #8                                            */
+		 20, /* 41: TRAP #9                                            */
+		 20, /* 42: TRAP #10                                           */
+		 20, /* 43: TRAP #11                                           */
+		 20, /* 44: TRAP #12                                           */
+		 20, /* 45: TRAP #13                                           */
+		 20, /* 46: TRAP #14                                           */
+		 20, /* 47: TRAP #15                                           */
+		  4, /* 48: FP Branch or Set on Unknown Condition (unemulated) */
+		  4, /* 49: FP Inexact Result                     (unemulated) */
+		  4, /* 50: FP Divide by Zero                     (unemulated) */
+		  4, /* 51: FP Underflow                          (unemulated) */
+		  4, /* 52: FP Operand Error                      (unemulated) */
+		  4, /* 53: FP Overflow                           (unemulated) */
+		  4, /* 54: FP Signaling NAN                      (unemulated) */
+		  4, /* 55: FP Unimplemented Data Type            (unemulated) */
+		  4, /* 56: MMU Configuration Error               (unemulated) */
+		  4, /* 57: MMU Illegal Operation Error           (unemulated) */
+		  4, /* 58: MMU Access Level Violation Error      (unemulated) */
+		  4, /* 59: RESERVED                                           */
+		  4, /* 60: RESERVED                                           */
+		  4, /* 61: RESERVED                                           */
+		  4, /* 62: RESERVED                                           */
+		  4, /* 63: RESERVED                                           */
+		     /* 64-255: User Defined                                   */
+		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+		  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4
 	}
 };
 
-uint8 m68ki_ea_idx_cycle_table[64] =
+const uint8 m68ki_ea_idx_cycle_table[64] =
 {
 	 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 	 0, /* ..01.000 no memory indirect, base NULL             */
@@ -390,6 +552,31 @@ static void default_reset_instr_callback(void)
 {
 }
 
+/* Called when a cmpi.l #v, dn instruction is executed */
+static void default_cmpild_instr_callback(unsigned int val, int reg)
+{
+	(void)val;
+	(void)reg;
+}
+
+/* Called when a rte instruction is executed */
+static void default_rte_instr_callback(void)
+{
+}
+
+/* Called when a tas instruction is executed */
+static int default_tas_instr_callback(void)
+{
+	return 1; // allow writeback
+}
+
+/* Called when an illegal instruction is encountered */
+static int default_illg_instr_callback(int opcode)
+{
+	(void)opcode;
+	return 0; // not handled : exception will occur
+}
+
 /* Called when the program counter changed by a large value */
 static unsigned int default_pc_changed_callback_data;
 static void default_pc_changed_callback(unsigned int new_pc)
@@ -405,8 +592,9 @@ static void default_set_fc_callback(unsigned int new_fc)
 }
 
 /* Called every instruction cycle prior to execution */
-static void default_instr_hook_callback(void)
+static void default_instr_hook_callback(unsigned int pc)
 {
+	(void)pc;
 }
 
 
@@ -415,7 +603,6 @@ static void default_instr_hook_callback(void)
 	jmp_buf m68ki_aerr_trap;
 #endif /* M68K_EMULATE_ADDRESS_ERROR */
 
-
 /* ======================================================================== */
 /* ================================= API ================================== */
 /* ======================================================================== */
@@ -474,6 +661,7 @@ unsigned int m68k_get_reg(void* context, m68k_register_t regnum)
 				case CPU_TYPE_010:		return (unsigned int)M68K_CPU_TYPE_68010;
 				case CPU_TYPE_EC020:	return (unsigned int)M68K_CPU_TYPE_68EC020;
 				case CPU_TYPE_020:		return (unsigned int)M68K_CPU_TYPE_68020;
+				case CPU_TYPE_040:		return (unsigned int)M68K_CPU_TYPE_68040;
 			}
 			return M68K_CPU_TYPE_INVALID;
 		default:			return 0;
@@ -502,7 +690,7 @@ void m68k_set_reg(m68k_register_t regnum, unsigned int value)
 		case M68K_REG_A6:	REG_A[6] = MASK_OUT_ABOVE_32(value); return;
 		case M68K_REG_A7:	REG_A[7] = MASK_OUT_ABOVE_32(value); return;
 		case M68K_REG_PC:	m68ki_jump(MASK_OUT_ABOVE_32(value)); return;
-		case M68K_REG_SR:	m68ki_set_sr(value); return;
+		case M68K_REG_SR:	m68ki_set_sr_noint_nosp(value); return;
 		case M68K_REG_SP:	REG_SP = MASK_OUT_ABOVE_32(value); return;
 		case M68K_REG_USP:	if(FLAG_S)
 								REG_USP = MASK_OUT_ABOVE_32(value);
@@ -547,6 +735,26 @@ void m68k_set_reset_instr_callback(void  (*callback)(void))
 	CALLBACK_RESET_INSTR = callback ? callback : default_reset_instr_callback;
 }
 
+void m68k_set_cmpild_instr_callback(void  (*callback)(unsigned int, int))
+{
+	CALLBACK_CMPILD_INSTR = callback ? callback : default_cmpild_instr_callback;
+}
+
+void m68k_set_rte_instr_callback(void  (*callback)(void))
+{
+	CALLBACK_RTE_INSTR = callback ? callback : default_rte_instr_callback;
+}
+
+void m68k_set_tas_instr_callback(int  (*callback)(void))
+{
+	CALLBACK_TAS_INSTR = callback ? callback : default_tas_instr_callback;
+}
+
+void m68k_set_illg_instr_callback(int  (*callback)(int))
+{
+	CALLBACK_ILLG_INSTR = callback ? callback : default_illg_instr_callback;
+}
+
 void m68k_set_pc_changed_callback(void  (*callback)(unsigned int new_pc))
 {
 	CALLBACK_PC_CHANGED = callback ? callback : default_pc_changed_callback;
@@ -557,7 +765,7 @@ void m68k_set_fc_callback(void  (*callback)(unsigned int new_fc))
 	CALLBACK_SET_FC = callback ? callback : default_set_fc_callback;
 }
 
-void m68k_set_instr_hook_callback(void  (*callback)(void))
+void m68k_set_instr_hook_callback(void  (*callback)(unsigned int pc))
 {
 	CALLBACK_INSTR_HOOK = callback ? callback : default_instr_hook_callback;
 }
@@ -582,6 +790,12 @@ void m68k_set_cpu_type(unsigned int cpu_type)
 			CYC_MOVEM_L      = 3;
 			CYC_SHIFT        = 1;
 			CYC_RESET        = 132;
+			HAS_PMMU	 = 0;
+			return;
+		case M68K_CPU_TYPE_SCC68070:
+			m68k_set_cpu_type(M68K_CPU_TYPE_68010);
+			CPU_ADDRESS_MASK = 0xffffffff;
+			CPU_TYPE         = CPU_TYPE_SCC070;
 			return;
 		case M68K_CPU_TYPE_68010:
 			CPU_TYPE         = CPU_TYPE_010;
@@ -598,6 +812,7 @@ void m68k_set_cpu_type(unsigned int cpu_type)
 			CYC_MOVEM_L      = 3;
 			CYC_SHIFT        = 1;
 			CYC_RESET        = 130;
+			HAS_PMMU	 = 0;
 			return;
 		case M68K_CPU_TYPE_68EC020:
 			CPU_TYPE         = CPU_TYPE_EC020;
@@ -614,6 +829,7 @@ void m68k_set_cpu_type(unsigned int cpu_type)
 			CYC_MOVEM_L      = 2;
 			CYC_SHIFT        = 0;
 			CYC_RESET        = 518;
+			HAS_PMMU	 = 0;
 			return;
 		case M68K_CPU_TYPE_68020:
 			CPU_TYPE         = CPU_TYPE_020;
@@ -630,6 +846,91 @@ void m68k_set_cpu_type(unsigned int cpu_type)
 			CYC_MOVEM_L      = 2;
 			CYC_SHIFT        = 0;
 			CYC_RESET        = 518;
+			HAS_PMMU	 = 0;
+			return;
+		case M68K_CPU_TYPE_68030:
+			CPU_TYPE         = CPU_TYPE_030;
+			CPU_ADDRESS_MASK = 0xffffffff;
+			CPU_SR_MASK      = 0xf71f; /* T1 T0 S  M  -- I2 I1 I0 -- -- -- X  N  Z  V  C  */
+			CYC_INSTRUCTION  = m68ki_cycles[3];
+			CYC_EXCEPTION    = m68ki_exception_cycle_table[3];
+			CYC_BCC_NOTAKE_B = -2;
+			CYC_BCC_NOTAKE_W = 0;
+			CYC_DBCC_F_NOEXP = 0;
+			CYC_DBCC_F_EXP   = 4;
+			CYC_SCC_R_TRUE   = 0;
+			CYC_MOVEM_W      = 2;
+			CYC_MOVEM_L      = 2;
+			CYC_SHIFT        = 0;
+			CYC_RESET        = 518;
+			HAS_PMMU	       = 1;
+			return;
+		case M68K_CPU_TYPE_68EC030:
+			CPU_TYPE         = CPU_TYPE_EC030;
+			CPU_ADDRESS_MASK = 0xffffffff;
+			CPU_SR_MASK          = 0xf71f; /* T1 T0 S  M  -- I2 I1 I0 -- -- -- X  N  Z  V  C  */
+			CYC_INSTRUCTION  = m68ki_cycles[3];
+			CYC_EXCEPTION    = m68ki_exception_cycle_table[3];
+			CYC_BCC_NOTAKE_B = -2;
+			CYC_BCC_NOTAKE_W = 0;
+			CYC_DBCC_F_NOEXP = 0;
+			CYC_DBCC_F_EXP   = 4;
+			CYC_SCC_R_TRUE   = 0;
+			CYC_MOVEM_W      = 2;
+			CYC_MOVEM_L      = 2;
+			CYC_SHIFT        = 0;
+			CYC_RESET        = 518;
+			HAS_PMMU	       = 0;		/* EC030 lacks the PMMU and is effectively a die-shrink 68020 */
+			return;
+		case M68K_CPU_TYPE_68040:		// TODO: these values are not correct
+			CPU_TYPE         = CPU_TYPE_040;
+			CPU_ADDRESS_MASK = 0xffffffff;
+			CPU_SR_MASK      = 0xf71f; /* T1 T0 S  M  -- I2 I1 I0 -- -- -- X  N  Z  V  C  */
+			CYC_INSTRUCTION  = m68ki_cycles[4];
+			CYC_EXCEPTION    = m68ki_exception_cycle_table[4];
+			CYC_BCC_NOTAKE_B = -2;
+			CYC_BCC_NOTAKE_W = 0;
+			CYC_DBCC_F_NOEXP = 0;
+			CYC_DBCC_F_EXP   = 4;
+			CYC_SCC_R_TRUE   = 0;
+			CYC_MOVEM_W      = 2;
+			CYC_MOVEM_L      = 2;
+			CYC_SHIFT        = 0;
+			CYC_RESET        = 518;
+			HAS_PMMU	 = 1;
+			return;
+		case M68K_CPU_TYPE_68EC040: // Just a 68040 without pmmu apparently...
+			CPU_TYPE         = CPU_TYPE_EC040;
+			CPU_ADDRESS_MASK = 0xffffffff;
+			CPU_SR_MASK      = 0xf71f; /* T1 T0 S  M  -- I2 I1 I0 -- -- -- X  N  Z  V  C  */
+			CYC_INSTRUCTION  = m68ki_cycles[4];
+			CYC_EXCEPTION    = m68ki_exception_cycle_table[4];
+			CYC_BCC_NOTAKE_B = -2;
+			CYC_BCC_NOTAKE_W = 0;
+			CYC_DBCC_F_NOEXP = 0;
+			CYC_DBCC_F_EXP   = 4;
+			CYC_SCC_R_TRUE   = 0;
+			CYC_MOVEM_W      = 2;
+			CYC_MOVEM_L      = 2;
+			CYC_SHIFT        = 0;
+			CYC_RESET        = 518;
+			HAS_PMMU	 = 0;
+			return;
+		case M68K_CPU_TYPE_68LC040:
+			CPU_TYPE         = CPU_TYPE_LC040;
+			m68ki_cpu.sr_mask          = 0xf71f; /* T1 T0 S  M  -- I2 I1 I0 -- -- -- X  N  Z  V  C  */
+			m68ki_cpu.cyc_instruction  = m68ki_cycles[4];
+			m68ki_cpu.cyc_exception    = m68ki_exception_cycle_table[4];
+			m68ki_cpu.cyc_bcc_notake_b = -2;
+			m68ki_cpu.cyc_bcc_notake_w = 0;
+			m68ki_cpu.cyc_dbcc_f_noexp = 0;
+			m68ki_cpu.cyc_dbcc_f_exp   = 4;
+			m68ki_cpu.cyc_scc_r_true   = 0;
+			m68ki_cpu.cyc_movem_w      = 2;
+			m68ki_cpu.cyc_movem_l      = 2;
+			m68ki_cpu.cyc_shift        = 0;
+			m68ki_cpu.cyc_reset        = 518;
+			HAS_PMMU	       = 1;
 			return;
 	}
 }
@@ -638,23 +939,34 @@ void m68k_set_cpu_type(unsigned int cpu_type)
 /* ASG: removed per-instruction interrupt checks */
 int m68k_execute(int num_cycles)
 {
+	/* eat up any reset cycles */
+	if (RESET_CYCLES) {
+	    int rc = RESET_CYCLES;
+	    RESET_CYCLES = 0;
+	    num_cycles -= rc;
+	    if (num_cycles <= 0)
+		return rc;
+	}
+
+	/* Set our pool of clock cycles available */
+	SET_CYCLES(num_cycles);
+	m68ki_initial_cycles = num_cycles;
+
+	/* See if interrupts came in */
+	m68ki_check_interrupts();
+
 	/* Make sure we're not stopped */
 	if(!CPU_STOPPED)
 	{
-		/* Set our pool of clock cycles available */
-		SET_CYCLES(num_cycles);
-		m68ki_initial_cycles = num_cycles;
-
-		/* ASG: update cycles */
-		USE_CYCLES(CPU_INT_CYCLES);
-		CPU_INT_CYCLES = 0;
-
 		/* Return point if we had an address error */
 		m68ki_set_address_error_trap(); /* auto-disable (see m68kcpu.h) */
 
+		m68ki_check_bus_error_trap();
+
 		/* Main loop.  Keep going until we run out of clock cycles */
 		do
 		{
+			int i;
 			/* Set tracing accodring to T1. (T0 is done inside instruction) */
 			m68ki_trace_t1(); /* auto-disable (see m68kcpu.h) */
 
@@ -662,11 +974,16 @@ int m68k_execute(int num_cycles)
 			m68ki_use_data_space(); /* auto-disable (see m68kcpu.h) */
 
 			/* Call external hook to peek at CPU */
-			m68ki_instr_hook(); /* auto-disable (see m68kcpu.h) */
+			m68ki_instr_hook(REG_PC); /* auto-disable (see m68kcpu.h) */
 
 			/* Record previous program counter */
 			REG_PPC = REG_PC;
 
+			/* Record previous D/A register state (in case of bus error) */
+			for (i = 15; i >= 0; i--){
+				REG_DA_SAVE[i] = REG_DA[i];
+			}
+
 			/* Read an instruction and call its handler */
 			REG_IR = m68ki_read_imm_16();
 			m68ki_instruction_jump_table[REG_IR]();
@@ -678,20 +995,12 @@ int m68k_execute(int num_cycles)
 
 		/* set previous PC to current PC for the next entry into the loop */
 		REG_PPC = REG_PC;
-
-		/* ASG: update cycles */
-		USE_CYCLES(CPU_INT_CYCLES);
-		CPU_INT_CYCLES = 0;
-
-		/* return how many clocks we used */
-		return m68ki_initial_cycles - GET_CYCLES();
 	}
+	else
+		SET_CYCLES(0);
 
-	/* We get here if the CPU is stopped or halted */
-	SET_CYCLES(0);
-	CPU_INT_CYCLES = 0;
-
-	return num_cycles;
+	/* return how many clocks we used */
+	return m68ki_initial_cycles - GET_CYCLES();
 }
 
 
@@ -732,9 +1041,29 @@ void m68k_set_irq(unsigned int int_level)
 	/* A transition from < 7 to 7 always interrupts (NMI) */
 	/* Note: Level 7 can also level trigger like a normal IRQ */
 	if(old_level != 0x0700 && CPU_INT_LEVEL == 0x0700)
-		m68ki_exception_interrupt(7); /* Edge triggered level 7 (NMI) */
+		m68ki_cpu.nmi_pending = TRUE;
+}
+
+void m68k_set_virq(unsigned int level, unsigned int active)
+{
+	uint state = m68ki_cpu.virq_state;
+	uint blevel;
+
+	if(active)
+		state |= 1 << level;
 	else
-		m68ki_check_interrupts(); /* Level triggered (IRQ) */
+		state &= ~(1 << level);
+	m68ki_cpu.virq_state = state;
+
+	for(blevel = 7; blevel > 0; blevel--)
+		if(state & (1 << blevel))
+			break;
+	m68k_set_irq(blevel);
+}
+
+unsigned int m68k_get_virq(unsigned int level)
+{
+	return (m68ki_cpu.virq_state & (1 << level)) ? 1 : 0;
 }
 
 void m68k_init(void)
@@ -751,14 +1080,27 @@ void m68k_init(void)
 	m68k_set_int_ack_callback(NULL);
 	m68k_set_bkpt_ack_callback(NULL);
 	m68k_set_reset_instr_callback(NULL);
+	m68k_set_cmpild_instr_callback(NULL);
+	m68k_set_rte_instr_callback(NULL);
+	m68k_set_tas_instr_callback(NULL);
+	m68k_set_illg_instr_callback(NULL);
 	m68k_set_pc_changed_callback(NULL);
 	m68k_set_fc_callback(NULL);
 	m68k_set_instr_hook_callback(NULL);
 }
 
+/* Trigger a Bus Error exception */
+void m68k_pulse_bus_error(void)
+{
+	m68ki_exception_bus_error();
+}
+
 /* Pulse the RESET line on the CPU */
 void m68k_pulse_reset(void)
 {
+	/* Disable the PMMU on reset */
+	m68ki_cpu.pmmu_enabled = 0;
+
 	/* Clear all stop levels and eat up all remaining cycles */
 	CPU_STOPPED = 0;
 	SET_CYCLES(0);
@@ -771,6 +1113,8 @@ void m68k_pulse_reset(void)
 	m68ki_clear_trace();
 	/* Interrupt mask to level 7 */
 	FLAG_INT_MASK = 0x0700;
+	CPU_INT_LEVEL = 0;
+	m68ki_cpu.virq_state = 0;
 	/* Reset VBR */
 	REG_VBR = 0;
 	/* Go to supervisor mode */
@@ -789,6 +1133,8 @@ void m68k_pulse_reset(void)
 	m68ki_jump(REG_PC);
 
 	CPU_RUN_MODE = RUN_MODE_NORMAL;
+
+	RESET_CYCLES = CYC_EXCEPTION[EXCEPTION_RESET];
 }
 
 /* Pulse the HALT line on the CPU */
@@ -797,7 +1143,6 @@ void m68k_pulse_halt(void)
 	CPU_STOPPED |= STOP_LEVEL_HALT;
 }
 
-
 /* Get and set the current CPU context */
 /* This is to allow for multiple CPUs */
 unsigned int m68k_context_size()
@@ -816,20 +1161,16 @@ void m68k_set_context(void* src)
 	if(src) m68ki_cpu = *(m68ki_cpu_core*)src;
 }
 
-
-
 /* ======================================================================== */
 /* ============================== MAME STUFF ============================== */
 /* ======================================================================== */
 
 #if M68K_COMPILE_FOR_MAME == OPT_ON
 
-#include "state.h"
-
 static struct {
 	UINT16 sr;
-	int stopped;
-	int halted;
+	UINT8 stopped;
+	UINT8 halted;
 } m68k_substate;
 
 static void m68k_prepare_substate(void)
@@ -847,29 +1188,26 @@ static void m68k_post_load(void)
 	m68ki_jump(REG_PC);
 }
 
-void m68k_state_register(const char *type)
+void m68k_state_register(const char *type, int index)
 {
-	int cpu = cpu_getactivecpu();
-
-	state_save_register_UINT32(type, cpu, "D"         , REG_D, 8);
-	state_save_register_UINT32(type, cpu, "A"         , REG_A, 8);
-	state_save_register_UINT32(type, cpu, "PPC"       , &REG_PPC, 1);
-	state_save_register_UINT32(type, cpu, "PC"        , &REG_PC, 1);
-	state_save_register_UINT32(type, cpu, "USP"       , &REG_USP, 1);
-	state_save_register_UINT32(type, cpu, "ISP"       , &REG_ISP, 1);
-	state_save_register_UINT32(type, cpu, "MSP"       , &REG_MSP, 1);
-	state_save_register_UINT32(type, cpu, "VBR"       , &REG_VBR, 1);
-	state_save_register_UINT32(type, cpu, "SFC"       , &REG_SFC, 1);
-	state_save_register_UINT32(type, cpu, "DFC"       , &REG_DFC, 1);
-	state_save_register_UINT32(type, cpu, "CACR"      , &REG_CACR, 1);
-	state_save_register_UINT32(type, cpu, "CAAR"      , &REG_CAAR, 1);
-	state_save_register_UINT16(type, cpu, "SR"        , &m68k_substate.sr, 1);
-	state_save_register_UINT32(type, cpu, "INT_LEVEL" , &CPU_INT_LEVEL, 1);
-	state_save_register_UINT32(type, cpu, "INT_CYCLES", &CPU_INT_CYCLES, 1);
-	state_save_register_int   (type, cpu, "STOPPED"   , &m68k_substate.stopped);
-	state_save_register_int   (type, cpu, "HALTED"    , &m68k_substate.halted);
-	state_save_register_UINT32(type, cpu, "PREF_ADDR" , &CPU_PREF_ADDR, 1);
-	state_save_register_UINT32(type, cpu, "PREF_DATA" , &CPU_PREF_DATA, 1);
+	/* Note, D covers A because the dar array is common, REG_A=REG_D+8 */
+	state_save_register_item_array(type, index, REG_D);
+	state_save_register_item(type, index, REG_PPC);
+	state_save_register_item(type, index, REG_PC);
+	state_save_register_item(type, index, REG_USP);
+	state_save_register_item(type, index, REG_ISP);
+	state_save_register_item(type, index, REG_MSP);
+	state_save_register_item(type, index, REG_VBR);
+	state_save_register_item(type, index, REG_SFC);
+	state_save_register_item(type, index, REG_DFC);
+	state_save_register_item(type, index, REG_CACR);
+	state_save_register_item(type, index, REG_CAAR);
+	state_save_register_item(type, index, m68k_substate.sr);
+	state_save_register_item(type, index, CPU_INT_LEVEL);
+	state_save_register_item(type, index, m68k_substate.stopped);
+	state_save_register_item(type, index, m68k_substate.halted);
+	state_save_register_item(type, index, CPU_PREF_ADDR);
+	state_save_register_item(type, index, CPU_PREF_DATA);
 	state_save_register_func_presave(m68k_prepare_substate);
 	state_save_register_func_postload(m68k_post_load);
 }
diff --git a/plat/linux68k/emu/musashi/m68kcpu.h b/plat/linux68k/emu/musashi/m68kcpu.h
index d4e92cc9d..63afc7c48 100755
--- a/plat/linux68k/emu/musashi/m68kcpu.h
+++ b/plat/linux68k/emu/musashi/m68kcpu.h
@@ -3,10 +3,10 @@
 /* ======================================================================== */
 /*
  *                                  MUSASHI
- *                                Version 3.4
+ *                                Version 4.5
  *
  * A portable Motorola M680x0 processor emulation engine.
- * Copyright 1998-2001 Karl Stenerud.  All rights reserved.
+ * Copyright Karl Stenerud.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -33,12 +33,15 @@
 #ifndef M68KCPU__HEADER
 #define M68KCPU__HEADER
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #include "m68k.h"
+
 #include <limits.h>
 
-#if M68K_EMULATE_ADDRESS_ERROR
 #include <setjmp.h>
-#endif /* M68K_EMULATE_ADDRESS_ERROR */
 
 /* ======================================================================== */
 /* ==================== ARCHITECTURE-DEPENDANT DEFINES ==================== */
@@ -63,26 +66,37 @@
 #undef sint
 #undef uint
 
-#define sint8  signed   char			/* ASG: changed from char to signed char */
-#define sint16 signed   short
-#define sint32 signed   long
-#define uint8  unsigned char
-#define uint16 unsigned short
-#define uint32 unsigned long
+typedef signed   char  sint8;  		/* ASG: changed from char to signed char */
+typedef signed   short sint16;
+typedef signed   int   sint32; 		/* AWJ: changed from long to int */
+typedef unsigned char  uint8;
+typedef unsigned short uint16;
+typedef unsigned int   uint32; 			/* AWJ: changed from long to int */
 
 /* signed and unsigned int must be at least 32 bits wide */
-#define sint   signed   int
-#define uint   unsigned int
+typedef signed   int sint;
+typedef unsigned int uint;
 
 
 #if M68K_USE_64_BIT
-#define sint64 signed   long long
-#define uint64 unsigned long long
+typedef signed   long long sint64;
+typedef unsigned long long uint64;
 #else
-#define sint64 sint32
-#define uint64 uint32
+typedef sint32 sint64;
+typedef uint32 uint64;
 #endif /* M68K_USE_64_BIT */
 
+/* U64 and S64 are used to wrap long integer constants. */
+#ifdef __GNUC__
+#define U64(val) val##ULL
+#define S64(val) val##LL
+#else
+#define U64(val) val
+#define S64(val) val
+#endif
+
+#include "softfloat/milieu.h"
+#include "softfloat/softfloat.h"
 
 
 /* Allow for architectures that don't have 8-bit sizes */
@@ -93,7 +107,7 @@
 	#define sint8  signed   int
 	#undef  uint8
 	#define uint8  unsigned int
-	INLINE sint MAKE_INT_8(uint value)
+	static inline sint MAKE_INT_8(uint value)
 	{
 		return (value & 0x80) ? value | ~0xff : value & 0xff;
 	}
@@ -108,7 +122,7 @@
 	#define sint16 signed   int
 	#undef  uint16
 	#define uint16 unsigned int
-	INLINE sint MAKE_INT_16(uint value)
+	static inline sint MAKE_INT_16(uint value)
 	{
 		return (value & 0x8000) ? value | ~0xffff : value & 0xffff;
 	}
@@ -116,18 +130,18 @@
 
 
 /* Allow for architectures that don't have 32-bit sizes */
-#if ULONG_MAX == 0xffffffff
+#if UINT_MAX == 0xffffffff
 	#define MAKE_INT_32(A) (sint32)(A)
 #else
 	#undef  sint32
 	#define sint32  signed   int
 	#undef  uint32
 	#define uint32  unsigned int
-	INLINE sint MAKE_INT_32(uint value)
+	static inline sint MAKE_INT_32(uint value)
 	{
 		return (value & 0x80000000) ? value | ~0xffffffff : value & 0xffffffff;
 	}
-#endif /* ULONG_MAX == 0xffffffff */
+#endif /* UINT_MAX == 0xffffffff */
 
 
 
@@ -137,6 +151,7 @@
 /* ======================================================================== */
 
 /* Exception Vectors handled by emulation */
+#define EXCEPTION_RESET                    0
 #define EXCEPTION_BUS_ERROR                2 /* This one is not emulated! */
 #define EXCEPTION_ADDRESS_ERROR            3 /* This one is partially emulated (doesn't stack a proper frame yet) */
 #define EXCEPTION_ILLEGAL_INSTRUCTION      4
@@ -161,10 +176,17 @@
 #define FUNCTION_CODE_CPU_SPACE          7
 
 /* CPU types for deciding what to emulate */
-#define CPU_TYPE_000   1
-#define CPU_TYPE_010   2
-#define CPU_TYPE_EC020 4
-#define CPU_TYPE_020   8
+#define CPU_TYPE_000	(0x00000001)
+#define CPU_TYPE_008    (0x00000002)
+#define CPU_TYPE_010    (0x00000004)
+#define CPU_TYPE_EC020  (0x00000008)
+#define CPU_TYPE_020    (0x00000010)
+#define CPU_TYPE_EC030  (0x00000020)
+#define CPU_TYPE_030    (0x00000040)
+#define CPU_TYPE_EC040  (0x00000080)
+#define CPU_TYPE_LC040  (0x00000100)
+#define CPU_TYPE_040    (0x00000200)
+#define CPU_TYPE_SCC070 (0x00000400)
 
 /* Different ways to stop the CPU */
 #define STOP_LEVEL_STOP 1
@@ -305,6 +327,7 @@
 #define CPU_TYPE         m68ki_cpu.cpu_type
 
 #define REG_DA           m68ki_cpu.dar /* easy access to data and address regs */
+#define REG_DA_SAVE           m68ki_cpu.dar_save
 #define REG_D            m68ki_cpu.dar
 #define REG_A            (m68ki_cpu.dar+8)
 #define REG_PPC 		 m68ki_cpu.ppc
@@ -321,6 +344,11 @@
 #define REG_CAAR         m68ki_cpu.caar
 #define REG_IR           m68ki_cpu.ir
 
+#define REG_FP           m68ki_cpu.fpr
+#define REG_FPCR         m68ki_cpu.fpcr
+#define REG_FPSR         m68ki_cpu.fpsr
+#define REG_FPIAR        m68ki_cpu.fpiar
+
 #define FLAG_T1          m68ki_cpu.t1_flag
 #define FLAG_T0          m68ki_cpu.t0_flag
 #define FLAG_S           m68ki_cpu.s_flag
@@ -333,7 +361,6 @@
 #define FLAG_INT_MASK    m68ki_cpu.int_mask
 
 #define CPU_INT_LEVEL    m68ki_cpu.int_level /* ASG: changed from CPU_INTS_PENDING */
-#define CPU_INT_CYCLES   m68ki_cpu.int_cycles /* ASG */
 #define CPU_STOPPED      m68ki_cpu.stopped
 #define CPU_PREF_ADDR    m68ki_cpu.pref_addr
 #define CPU_PREF_DATA    m68ki_cpu.pref_data
@@ -353,11 +380,18 @@
 #define CYC_MOVEM_L      m68ki_cpu.cyc_movem_l
 #define CYC_SHIFT        m68ki_cpu.cyc_shift
 #define CYC_RESET        m68ki_cpu.cyc_reset
+#define HAS_PMMU	 m68ki_cpu.has_pmmu
+#define PMMU_ENABLED	 m68ki_cpu.pmmu_enabled
+#define RESET_CYCLES	 m68ki_cpu.reset_cycles
 
 
 #define CALLBACK_INT_ACK     m68ki_cpu.int_ack_callback
 #define CALLBACK_BKPT_ACK    m68ki_cpu.bkpt_ack_callback
 #define CALLBACK_RESET_INSTR m68ki_cpu.reset_instr_callback
+#define CALLBACK_CMPILD_INSTR m68ki_cpu.cmpild_instr_callback
+#define CALLBACK_RTE_INSTR    m68ki_cpu.rte_instr_callback
+#define CALLBACK_TAS_INSTR    m68ki_cpu.tas_instr_callback
+#define CALLBACK_ILLG_INSTR    m68ki_cpu.illg_instr_callback
 #define CALLBACK_PC_CHANGED  m68ki_cpu.pc_changed_callback
 #define CALLBACK_SET_FC      m68ki_cpu.set_fc_callback
 #define CALLBACK_INSTR_HOOK  m68ki_cpu.instr_hook_callback
@@ -369,8 +403,24 @@
 /* These defines are dependant on the configuration defines in m68kconf.h */
 
 /* Disable certain comparisons if we're not using all CPU types */
+#if M68K_EMULATE_040
+#define CPU_TYPE_IS_040_PLUS(A)    ((A) & (CPU_TYPE_040 | CPU_TYPE_EC040))
+	#define CPU_TYPE_IS_040_LESS(A)    1
+#else
+	#define CPU_TYPE_IS_040_PLUS(A)    0
+	#define CPU_TYPE_IS_040_LESS(A)    1
+#endif
+
+#if M68K_EMULATE_030
+#define CPU_TYPE_IS_030_PLUS(A)    ((A) & (CPU_TYPE_030 | CPU_TYPE_EC030 | CPU_TYPE_040 | CPU_TYPE_EC040))
+#define CPU_TYPE_IS_030_LESS(A)    1
+#else
+#define CPU_TYPE_IS_030_PLUS(A)	0
+#define CPU_TYPE_IS_030_LESS(A)    1
+#endif
+
 #if M68K_EMULATE_020
-	#define CPU_TYPE_IS_020_PLUS(A)    ((A) & CPU_TYPE_020)
+#define CPU_TYPE_IS_020_PLUS(A)    ((A) & (CPU_TYPE_020 | CPU_TYPE_030 | CPU_TYPE_EC030 | CPU_TYPE_040 | CPU_TYPE_EC040))
 	#define CPU_TYPE_IS_020_LESS(A)    1
 #else
 	#define CPU_TYPE_IS_020_PLUS(A)    0
@@ -378,7 +428,7 @@
 #endif
 
 #if M68K_EMULATE_EC020
-	#define CPU_TYPE_IS_EC020_PLUS(A)  ((A) & (CPU_TYPE_EC020 | CPU_TYPE_020))
+#define CPU_TYPE_IS_EC020_PLUS(A)  ((A) & (CPU_TYPE_EC020 | CPU_TYPE_020 | CPU_TYPE_030 | CPU_TYPE_EC030 | CPU_TYPE_040 | CPU_TYPE_EC040))
 	#define CPU_TYPE_IS_EC020_LESS(A)  ((A) & (CPU_TYPE_000 | CPU_TYPE_010 | CPU_TYPE_EC020))
 #else
 	#define CPU_TYPE_IS_EC020_PLUS(A)  CPU_TYPE_IS_020_PLUS(A)
@@ -387,8 +437,8 @@
 
 #if M68K_EMULATE_010
 	#define CPU_TYPE_IS_010(A)         ((A) == CPU_TYPE_010)
-	#define CPU_TYPE_IS_010_PLUS(A)    ((A) & (CPU_TYPE_010 | CPU_TYPE_EC020 | CPU_TYPE_020))
-	#define CPU_TYPE_IS_010_LESS(A)    ((A) & (CPU_TYPE_000 | CPU_TYPE_010))
+#define CPU_TYPE_IS_010_PLUS(A)    ((A) & (CPU_TYPE_010 | CPU_TYPE_EC020 | CPU_TYPE_020 | CPU_TYPE_EC030 | CPU_TYPE_030 | CPU_TYPE_040 | CPU_TYPE_EC040))
+#define CPU_TYPE_IS_010_LESS(A)    ((A) & (CPU_TYPE_000 | CPU_TYPE_008 | CPU_TYPE_010))
 #else
 	#define CPU_TYPE_IS_010(A)         0
 	#define CPU_TYPE_IS_010_PLUS(A)    CPU_TYPE_IS_EC020_PLUS(A)
@@ -401,7 +451,7 @@
 	#define CPU_TYPE_IS_020_VARIANT(A) 0
 #endif
 
-#if M68K_EMULATE_020 || M68K_EMULATE_EC020 || M68K_EMULATE_010
+#if M68K_EMULATE_040 || M68K_EMULATE_020 || M68K_EMULATE_EC020 || M68K_EMULATE_010
 	#define CPU_TYPE_IS_000(A)         ((A) == CPU_TYPE_000)
 #else
 	#define CPU_TYPE_IS_000(A)         1
@@ -450,14 +500,54 @@
 	#define m68ki_output_reset()
 #endif /* M68K_EMULATE_RESET */
 
-#if M68K_INSTRUCTION_HOOK
-	#if M68K_INSTRUCTION_HOOK == OPT_SPECIFY_HANDLER
-		#define m68ki_instr_hook() M68K_INSTRUCTION_CALLBACK()
+#if M68K_CMPILD_HAS_CALLBACK
+	#if M68K_CMPILD_HAS_CALLBACK == OPT_SPECIFY_HANDLER
+		#define m68ki_cmpild_callback(v,r) M68K_CMPILD_CALLBACK(v,r)
 	#else
-		#define m68ki_instr_hook() CALLBACK_INSTR_HOOK()
+		#define m68ki_cmpild_callback(v,r) CALLBACK_CMPILD_INSTR(v,r)
 	#endif
 #else
-	#define m68ki_instr_hook()
+	#define m68ki_cmpild_callback(v,r)
+#endif /* M68K_CMPILD_HAS_CALLBACK */
+
+#if M68K_RTE_HAS_CALLBACK
+	#if M68K_RTE_HAS_CALLBACK == OPT_SPECIFY_HANDLER
+		#define m68ki_rte_callback() M68K_RTE_CALLBACK()
+	#else
+		#define m68ki_rte_callback() CALLBACK_RTE_INSTR()
+	#endif
+#else
+	#define m68ki_rte_callback()
+#endif /* M68K_RTE_HAS_CALLBACK */
+
+#if M68K_TAS_HAS_CALLBACK
+	#if M68K_TAS_HAS_CALLBACK == OPT_SPECIFY_HANDLER
+		#define m68ki_tas_callback() M68K_TAS_CALLBACK()
+	#else
+		#define m68ki_tas_callback() CALLBACK_TAS_INSTR()
+	#endif
+#else
+	#define m68ki_tas_callback() 1
+#endif /* M68K_TAS_HAS_CALLBACK */
+
+#if M68K_ILLG_HAS_CALLBACK
+	#if M68K_ILLG_HAS_CALLBACK == OPT_SPECIFY_HANDLER
+		#define m68ki_illg_callback(opcode) M68K_ILLG_CALLBACK(opcode)
+	#else
+		#define m68ki_illg_callback(opcode) CALLBACK_ILLG_INSTR(opcode)
+	#endif
+#else
+	#define m68ki_illg_callback(opcode) 0 // Default is 0 = not handled, exception will occur
+#endif /* M68K_ILLG_HAS_CALLBACK */
+
+#if M68K_INSTRUCTION_HOOK
+	#if M68K_INSTRUCTION_HOOK == OPT_SPECIFY_HANDLER
+		#define m68ki_instr_hook(pc) M68K_INSTRUCTION_CALLBACK(pc)
+	#else
+		#define m68ki_instr_hook(pc) CALLBACK_INSTR_HOOK(pc)
+	#endif
+#else
+	#define m68ki_instr_hook(pc)
 #endif /* M68K_INSTRUCTION_HOOK */
 
 #if M68K_MONITOR_PC
@@ -511,8 +601,32 @@
 /* Address error */
 #if M68K_EMULATE_ADDRESS_ERROR
 	#include <setjmp.h>
-	extern jmp_buf m68ki_aerr_trap;
 
+/* sigjmp() on Mac OS X and *BSD in general saves signal contexts and is super-slow, use sigsetjmp() to tell it not to */
+#ifdef _BSD_SETJMP_H
+extern sigjmp_buf m68ki_aerr_trap;
+#define m68ki_set_address_error_trap(m68k) \
+	if(sigsetjmp(m68ki_aerr_trap, 0) != 0) \
+	{ \
+		m68ki_exception_address_error(m68k); \
+		if(m68ki_stopped) \
+		{ \
+			if (m68ki_remaining_cycles > 0) \
+				m68ki_remaining_cycles = 0; \
+			return m68ki_initial_cycles; \
+		} \
+	}
+
+#define m68ki_check_address_error(ADDR, WRITE_MODE, FC) \
+	if((ADDR)&1) \
+	{ \
+		m68ki_aerr_address = ADDR; \
+		m68ki_aerr_write_mode = WRITE_MODE; \
+		m68ki_aerr_fc = FC; \
+		siglongjmp(m68ki_aerr_trap, 1); \
+	}
+#else
+extern jmp_buf m68ki_aerr_trap;
 	#define m68ki_set_address_error_trap() \
 		if(setjmp(m68ki_aerr_trap) != 0) \
 		{ \
@@ -520,7 +634,6 @@
 			if(CPU_STOPPED) \
 			{ \
 				SET_CYCLES(0); \
-				CPU_INT_CYCLES = 0; \
 				return m68ki_initial_cycles; \
 			} \
 			/* ensure we don't re-enter execution loop after an
@@ -540,16 +653,24 @@
 			m68ki_aerr_fc = FC; \
 			longjmp(m68ki_aerr_trap, 1); \
 		}
+#endif
+
+	#define m68ki_check_address_error_010_less(ADDR, WRITE_MODE, FC) \
+		if (CPU_TYPE_IS_010_LESS(CPU_TYPE)) \
+		{ \
+			m68ki_check_address_error(ADDR, WRITE_MODE, FC) \
+		}
 #else
 	#define m68ki_set_address_error_trap()
 	#define m68ki_check_address_error(ADDR, WRITE_MODE, FC)
+	#define m68ki_check_address_error_010_less(ADDR, WRITE_MODE, FC)
 #endif /* M68K_ADDRESS_ERROR */
 
 /* Logging */
 #if M68K_LOG_ENABLE
 	#include <stdio.h>
 	extern FILE* M68K_LOG_FILEHANDLE
-	extern char* m68ki_cpu_names[];
+	extern const char *const m68ki_cpu_names[];
 
 	#define M68K_DO_LOG(A) if(M68K_LOG_FILEHANDLE) fprintf A
 	#if M68K_LOG_1010_1111
@@ -774,9 +895,6 @@
 #define m68ki_write_32_pd(A, V) m68ki_write_32_fc(A, FLAG_S | FUNCTION_CODE_USER_DATA, V)
 #endif
 
-/* map read immediate 8 to read immediate 16 */
-#define m68ki_read_imm_8() MASK_OUT_ABOVE_8(m68ki_read_imm_16())
-
 /* Map PC-relative reads */
 #define m68ki_read_pcrel_8(A) m68k_read_pcrelative_8(A)
 #define m68ki_read_pcrel_16(A) m68k_read_pcrelative_16(A)
@@ -798,10 +916,18 @@
 /* =============================== PROTOTYPES ============================= */
 /* ======================================================================== */
 
+typedef union
+{
+	uint64 i;
+	double f;
+} fp_reg;
+
 typedef struct
 {
-	uint cpu_type;     /* CPU Type: 68000, 68010, 68EC020, or 68020 */
+	uint cpu_type;     /* CPU Type: 68000, 68008, 68010, 68EC020, 68020, 68EC030, 68030, 68EC040, or 68040 */
 	uint dar[16];      /* Data and Address Registers */
+	uint dar_save[16];  /* Saved Data and Address Registers (pushed onto the
+						   stack when a bus error occurs)*/
 	uint ppc;		   /* Previous program counter */
 	uint pc;           /* Program Counter */
 	uint sp[7];        /* User, Interrupt, and Master Stack Pointers */
@@ -811,6 +937,10 @@ typedef struct
 	uint cacr;         /* Cache Control Register (m68020, unemulated) */
 	uint caar;         /* Cache Address Register (m68020, unemulated) */
 	uint ir;           /* Instruction Register */
+	floatx80 fpr[8];     /* FPU Data Register (m68030/040) */
+	uint fpiar;        /* FPU Instruction Address Register (m68040) */
+	uint fpsr;         /* FPU Status Register (m68040) */
+	uint fpcr;         /* FPU Control Register (m68040) */
 	uint t1_flag;      /* Trace 1 */
 	uint t0_flag;      /* Trace 0 */
 	uint s_flag;       /* Supervisor */
@@ -822,7 +952,6 @@ typedef struct
 	uint c_flag;       /* Carry */
 	uint int_mask;     /* I0-I2 */
 	uint int_level;    /* State of interrupt pins IPL0-IPL2 -- ASG: changed from ints_pending */
-	uint int_cycles;   /* ASG: extra cycles from generated interrupts */
 	uint stopped;      /* Stopped state */
 	uint pref_addr;    /* Last prefetch address */
 	uint pref_data;    /* Data in the prefetch queue */
@@ -830,6 +959,10 @@ typedef struct
 	uint sr_mask;      /* Implemented status register bits */
 	uint instr_mode;   /* Stores whether we are in instruction mode or group 0/1 exception mode */
 	uint run_mode;     /* Stores whether we are processing a reset, bus error, address error, or something else */
+	int    has_pmmu;     /* Indicates if a PMMU available (yes on 030, 040, no on EC030) */
+	int    pmmu_enabled; /* Indicates if the PMMU is enabled */
+	int    fpu_just_reset; /* Indicates the FPU was just reset */
+	uint reset_cycles;
 
 	/* Clocks required for instructions / exceptions */
 	uint cyc_bcc_notake_b;
@@ -841,16 +974,31 @@ typedef struct
 	uint cyc_movem_l;
 	uint cyc_shift;
 	uint cyc_reset;
-	uint8* cyc_instruction;
-	uint8* cyc_exception;
+
+	/* Virtual IRQ lines state */
+	uint virq_state;
+	uint nmi_pending;
+
+	/* PMMU registers */
+	uint mmu_crp_aptr, mmu_crp_limit;
+	uint mmu_srp_aptr, mmu_srp_limit;
+	uint mmu_tc;
+	uint16 mmu_sr;
+
+	const uint8* cyc_instruction;
+	const uint8* cyc_exception;
 
 	/* Callbacks to host */
 	int  (*int_ack_callback)(int int_line);           /* Interrupt Acknowledge */
 	void (*bkpt_ack_callback)(unsigned int data);     /* Breakpoint Acknowledge */
 	void (*reset_instr_callback)(void);               /* Called when a RESET instruction is encountered */
+ 	void (*cmpild_instr_callback)(unsigned int, int); /* Called when a CMPI.L #v, Dn instruction is encountered */
+ 	void (*rte_instr_callback)(void);                 /* Called when a RTE instruction is encountered */
+	int  (*tas_instr_callback)(void);                 /* Called when a TAS instruction is encountered, allows / disallows writeback */
+	int  (*illg_instr_callback)(int);                 /* Called when an illegal instruction is encountered, allows handling */
 	void (*pc_changed_callback)(unsigned int new_pc); /* Called when the PC changes by a large amount */
 	void (*set_fc_callback)(unsigned int new_fc);     /* Called when the CPU function code changes */
-	void (*instr_hook_callback)(void);                /* Called every instruction cycle prior to execution */
+	void (*instr_hook_callback)(unsigned int pc);     /* Called every instruction cycle prior to execution */
 
 } m68ki_cpu_core;
 
@@ -858,132 +1006,22 @@ typedef struct
 extern m68ki_cpu_core m68ki_cpu;
 extern sint           m68ki_remaining_cycles;
 extern uint           m68ki_tracing;
-extern uint8          m68ki_shift_8_table[];
-extern uint16         m68ki_shift_16_table[];
-extern uint           m68ki_shift_32_table[];
-extern uint8          m68ki_exception_cycle_table[][256];
+extern const uint8    m68ki_shift_8_table[];
+extern const uint16   m68ki_shift_16_table[];
+extern const uint     m68ki_shift_32_table[];
+extern const uint8    m68ki_exception_cycle_table[][256];
 extern uint           m68ki_address_space;
-extern uint8          m68ki_ea_idx_cycle_table[];
+extern const uint8    m68ki_ea_idx_cycle_table[];
 
 extern uint           m68ki_aerr_address;
 extern uint           m68ki_aerr_write_mode;
 extern uint           m68ki_aerr_fc;
 
-/* Read data immediately after the program counter */
-INLINE uint m68ki_read_imm_16(void);
-INLINE uint m68ki_read_imm_32(void);
-
-/* Read data with specific function code */
-INLINE uint m68ki_read_8_fc  (uint address, uint fc);
-INLINE uint m68ki_read_16_fc (uint address, uint fc);
-INLINE uint m68ki_read_32_fc (uint address, uint fc);
-
-/* Write data with specific function code */
-INLINE void m68ki_write_8_fc (uint address, uint fc, uint value);
-INLINE void m68ki_write_16_fc(uint address, uint fc, uint value);
-INLINE void m68ki_write_32_fc(uint address, uint fc, uint value);
-#if M68K_SIMULATE_PD_WRITES
-INLINE void m68ki_write_32_pd_fc(uint address, uint fc, uint value);
-#endif /* M68K_SIMULATE_PD_WRITES */
-
-/* Indexed and PC-relative ea fetching */
-INLINE uint m68ki_get_ea_pcdi(void);
-INLINE uint m68ki_get_ea_pcix(void);
-INLINE uint m68ki_get_ea_ix(uint An);
-
-/* Operand fetching */
-INLINE uint OPER_AY_AI_8(void);
-INLINE uint OPER_AY_AI_16(void);
-INLINE uint OPER_AY_AI_32(void);
-INLINE uint OPER_AY_PI_8(void);
-INLINE uint OPER_AY_PI_16(void);
-INLINE uint OPER_AY_PI_32(void);
-INLINE uint OPER_AY_PD_8(void);
-INLINE uint OPER_AY_PD_16(void);
-INLINE uint OPER_AY_PD_32(void);
-INLINE uint OPER_AY_DI_8(void);
-INLINE uint OPER_AY_DI_16(void);
-INLINE uint OPER_AY_DI_32(void);
-INLINE uint OPER_AY_IX_8(void);
-INLINE uint OPER_AY_IX_16(void);
-INLINE uint OPER_AY_IX_32(void);
-
-INLINE uint OPER_AX_AI_8(void);
-INLINE uint OPER_AX_AI_16(void);
-INLINE uint OPER_AX_AI_32(void);
-INLINE uint OPER_AX_PI_8(void);
-INLINE uint OPER_AX_PI_16(void);
-INLINE uint OPER_AX_PI_32(void);
-INLINE uint OPER_AX_PD_8(void);
-INLINE uint OPER_AX_PD_16(void);
-INLINE uint OPER_AX_PD_32(void);
-INLINE uint OPER_AX_DI_8(void);
-INLINE uint OPER_AX_DI_16(void);
-INLINE uint OPER_AX_DI_32(void);
-INLINE uint OPER_AX_IX_8(void);
-INLINE uint OPER_AX_IX_16(void);
-INLINE uint OPER_AX_IX_32(void);
-
-INLINE uint OPER_A7_PI_8(void);
-INLINE uint OPER_A7_PD_8(void);
-
-INLINE uint OPER_AW_8(void);
-INLINE uint OPER_AW_16(void);
-INLINE uint OPER_AW_32(void);
-INLINE uint OPER_AL_8(void);
-INLINE uint OPER_AL_16(void);
-INLINE uint OPER_AL_32(void);
-INLINE uint OPER_PCDI_8(void);
-INLINE uint OPER_PCDI_16(void);
-INLINE uint OPER_PCDI_32(void);
-INLINE uint OPER_PCIX_8(void);
-INLINE uint OPER_PCIX_16(void);
-INLINE uint OPER_PCIX_32(void);
-
-/* Stack operations */
-INLINE void m68ki_push_16(uint value);
-INLINE void m68ki_push_32(uint value);
-INLINE uint m68ki_pull_16(void);
-INLINE uint m68ki_pull_32(void);
-
-/* Program flow operations */
-INLINE void m68ki_jump(uint new_pc);
-INLINE void m68ki_jump_vector(uint vector);
-INLINE void m68ki_branch_8(uint offset);
-INLINE void m68ki_branch_16(uint offset);
-INLINE void m68ki_branch_32(uint offset);
-
-/* Status register operations. */
-INLINE void m68ki_set_s_flag(uint value);            /* Only bit 2 of value should be set (i.e. 4 or 0) */
-INLINE void m68ki_set_sm_flag(uint value);           /* only bits 1 and 2 of value should be set */
-INLINE void m68ki_set_ccr(uint value);               /* set the condition code register */
-INLINE void m68ki_set_sr(uint value);                /* set the status register */
-INLINE void m68ki_set_sr_noint(uint value);          /* set the status register */
-
-/* Exception processing */
-INLINE uint m68ki_init_exception(void);              /* Initial exception processing */
-
-INLINE void m68ki_stack_frame_3word(uint pc, uint sr); /* Stack various frame types */
-INLINE void m68ki_stack_frame_buserr(uint sr);
-
-INLINE void m68ki_stack_frame_0000(uint pc, uint sr, uint vector);
-INLINE void m68ki_stack_frame_0001(uint pc, uint sr, uint vector);
-INLINE void m68ki_stack_frame_0010(uint sr, uint vector);
-INLINE void m68ki_stack_frame_1000(uint pc, uint sr, uint vector);
-INLINE void m68ki_stack_frame_1010(uint sr, uint vector, uint pc);
-INLINE void m68ki_stack_frame_1011(uint sr, uint vector, uint pc);
-
-INLINE void m68ki_exception_trap(uint vector);
-INLINE void m68ki_exception_trapN(uint vector);
-INLINE void m68ki_exception_trace(void);
-INLINE void m68ki_exception_privilege_violation(void);
-INLINE void m68ki_exception_1010(void);
-INLINE void m68ki_exception_1111(void);
-INLINE void m68ki_exception_illegal(void);
-INLINE void m68ki_exception_format_error(void);
-INLINE void m68ki_exception_address_error(void);
-INLINE void m68ki_exception_interrupt(uint int_level);
-INLINE void m68ki_check_interrupts(void);            /* ASG: check for interrupts */
+/* Forward declarations to keep some of the macros happy */
+static inline uint m68ki_read_16_fc (uint address, uint fc);
+static inline uint m68ki_read_32_fc (uint address, uint fc);
+static inline uint m68ki_get_ea_ix(uint An);
+static inline void m68ki_check_interrupts(void);            /* ASG: check for interrupts */
 
 /* quick disassembly (used for logging) */
 char* m68ki_disassemble_quick(unsigned int pc, unsigned int cpu_type);
@@ -996,59 +1034,93 @@ char* m68ki_disassemble_quick(unsigned int pc, unsigned int cpu_type);
 
 /* ---------------------------- Read Immediate ---------------------------- */
 
+extern uint pmmu_translate_addr(uint addr_in);
+
 /* Handles all immediate reads, does address error check, function code setting,
  * and prefetching if they are enabled in m68kconf.h
  */
-INLINE uint m68ki_read_imm_16(void)
+static inline uint m68ki_read_imm_16(void)
 {
 	m68ki_set_fc(FLAG_S | FUNCTION_CODE_USER_PROGRAM); /* auto-disable (see m68kcpu.h) */
 	m68ki_check_address_error(REG_PC, MODE_READ, FLAG_S | FUNCTION_CODE_USER_PROGRAM); /* auto-disable (see m68kcpu.h) */
+
+#if M68K_SEPARATE_READS
+#if M68K_EMULATE_PMMU
+	if (PMMU_ENABLED)
+	    address = pmmu_translate_addr(address);
+#endif
+#endif
+
 #if M68K_EMULATE_PREFETCH
-	if(MASK_OUT_BELOW_2(REG_PC) != CPU_PREF_ADDR)
+{
+	uint result;
+	if(REG_PC != CPU_PREF_ADDR)
 	{
-		CPU_PREF_ADDR = MASK_OUT_BELOW_2(REG_PC);
-		CPU_PREF_DATA = m68k_read_immediate_32(ADDRESS_68K(CPU_PREF_ADDR));
+		CPU_PREF_ADDR = REG_PC;
+		CPU_PREF_DATA = m68k_read_immediate_16(ADDRESS_68K(CPU_PREF_ADDR));
 	}
+	result = MASK_OUT_ABOVE_16(CPU_PREF_DATA);
 	REG_PC += 2;
-	return MASK_OUT_ABOVE_16(CPU_PREF_DATA >> ((2-((REG_PC-2)&2))<<3));
+	CPU_PREF_ADDR = REG_PC;
+	CPU_PREF_DATA = m68k_read_immediate_16(ADDRESS_68K(CPU_PREF_ADDR));
+	return result;
+}
 #else
 	REG_PC += 2;
 	return m68k_read_immediate_16(ADDRESS_68K(REG_PC-2));
 #endif /* M68K_EMULATE_PREFETCH */
 }
-INLINE uint m68ki_read_imm_32(void)
+
+static inline uint m68ki_read_imm_8(void)
 {
+	/* map read immediate 8 to read immediate 16 */
+	return MASK_OUT_ABOVE_8(m68ki_read_imm_16());
+}
+
+static inline uint m68ki_read_imm_32(void)
+{
+#if M68K_SEPARATE_READS
+#if M68K_EMULATE_PMMU
+	if (PMMU_ENABLED)
+	    address = pmmu_translate_addr(address);
+#endif
+#endif
+
 #if M68K_EMULATE_PREFETCH
 	uint temp_val;
 
+        printf("(%s) now at line %d\n", __FILE__, __LINE__);
+
 	m68ki_set_fc(FLAG_S | FUNCTION_CODE_USER_PROGRAM); /* auto-disable (see m68kcpu.h) */
 	m68ki_check_address_error(REG_PC, MODE_READ, FLAG_S | FUNCTION_CODE_USER_PROGRAM); /* auto-disable (see m68kcpu.h) */
-	if(MASK_OUT_BELOW_2(REG_PC) != CPU_PREF_ADDR)
+
+        printf("(%s) now at line %d\n", __FILE__, __LINE__);
+
+        if(REG_PC != CPU_PREF_ADDR)
 	{
-		CPU_PREF_ADDR = MASK_OUT_BELOW_2(REG_PC);
-		CPU_PREF_DATA = m68k_read_immediate_32(ADDRESS_68K(CPU_PREF_ADDR));
+		CPU_PREF_ADDR = REG_PC;
+		CPU_PREF_DATA = m68k_read_immediate_16(ADDRESS_68K(CPU_PREF_ADDR));
 	}
-	temp_val = CPU_PREF_DATA;
+	temp_val = MASK_OUT_ABOVE_16(CPU_PREF_DATA);
 	REG_PC += 2;
-	if(MASK_OUT_BELOW_2(REG_PC) != CPU_PREF_ADDR)
-	{
-		CPU_PREF_ADDR = MASK_OUT_BELOW_2(REG_PC);
-		CPU_PREF_DATA = m68k_read_immediate_32(ADDRESS_68K(CPU_PREF_ADDR));
-		temp_val = MASK_OUT_ABOVE_32((temp_val << 16) | (CPU_PREF_DATA >> 16));
-	}
+	CPU_PREF_ADDR = REG_PC;
+	CPU_PREF_DATA = m68k_read_immediate_16(ADDRESS_68K(CPU_PREF_ADDR));
+
+	temp_val = MASK_OUT_ABOVE_32((temp_val << 16) | MASK_OUT_ABOVE_16(CPU_PREF_DATA));
 	REG_PC += 2;
+	CPU_PREF_ADDR = REG_PC;
+	CPU_PREF_DATA = m68k_read_immediate_16(ADDRESS_68K(CPU_PREF_ADDR));
 
 	return temp_val;
 #else
 	m68ki_set_fc(FLAG_S | FUNCTION_CODE_USER_PROGRAM); /* auto-disable (see m68kcpu.h) */
 	m68ki_check_address_error(REG_PC, MODE_READ, FLAG_S | FUNCTION_CODE_USER_PROGRAM); /* auto-disable (see m68kcpu.h) */
 	REG_PC += 4;
-	return m68k_read_immediate_32(ADDRESS_68K(REG_PC-4));
+        //printf("(%s) now at line %d\n", __FILE__, __LINE__);
+        return m68k_read_immediate_32(ADDRESS_68K(REG_PC-4));
 #endif /* M68K_EMULATE_PREFETCH */
 }
 
-
-
 /* ------------------------- Top level read/write ------------------------- */
 
 /* Handles all memory accesses (except for immediate reads if they are
@@ -1057,58 +1129,106 @@ INLINE uint m68ki_read_imm_32(void)
  * These functions will also check for address error and set the function
  * code if they are enabled in m68kconf.h.
  */
-INLINE uint m68ki_read_8_fc(uint address, uint fc)
+static inline uint m68ki_read_8_fc(uint address, uint fc)
 {
+	(void)fc;
 	m68ki_set_fc(fc); /* auto-disable (see m68kcpu.h) */
+
+#if M68K_EMULATE_PMMU
+	if (PMMU_ENABLED)
+	    address = pmmu_translate_addr(address);
+#endif
+
 	return m68k_read_memory_8(ADDRESS_68K(address));
 }
-INLINE uint m68ki_read_16_fc(uint address, uint fc)
+static inline uint m68ki_read_16_fc(uint address, uint fc)
 {
+	(void)fc;
 	m68ki_set_fc(fc); /* auto-disable (see m68kcpu.h) */
-	m68ki_check_address_error(address, MODE_READ, fc); /* auto-disable (see m68kcpu.h) */
+	m68ki_check_address_error_010_less(address, MODE_READ, fc); /* auto-disable (see m68kcpu.h) */
+
+#if M68K_EMULATE_PMMU
+	if (PMMU_ENABLED)
+	    address = pmmu_translate_addr(address);
+#endif
+
 	return m68k_read_memory_16(ADDRESS_68K(address));
 }
-INLINE uint m68ki_read_32_fc(uint address, uint fc)
+static inline uint m68ki_read_32_fc(uint address, uint fc)
 {
+	(void)fc;
 	m68ki_set_fc(fc); /* auto-disable (see m68kcpu.h) */
-	m68ki_check_address_error(address, MODE_READ, fc); /* auto-disable (see m68kcpu.h) */
+	m68ki_check_address_error_010_less(address, MODE_READ, fc); /* auto-disable (see m68kcpu.h) */
+
+#if M68K_EMULATE_PMMU
+	if (PMMU_ENABLED)
+	    address = pmmu_translate_addr(address);
+#endif
+
 	return m68k_read_memory_32(ADDRESS_68K(address));
 }
 
-INLINE void m68ki_write_8_fc(uint address, uint fc, uint value)
+static inline void m68ki_write_8_fc(uint address, uint fc, uint value)
 {
+	(void)fc;
 	m68ki_set_fc(fc); /* auto-disable (see m68kcpu.h) */
+
+#if M68K_EMULATE_PMMU
+	if (PMMU_ENABLED)
+	    address = pmmu_translate_addr(address);
+#endif
+
 	m68k_write_memory_8(ADDRESS_68K(address), value);
 }
-INLINE void m68ki_write_16_fc(uint address, uint fc, uint value)
+static inline void m68ki_write_16_fc(uint address, uint fc, uint value)
 {
+	(void)fc;
 	m68ki_set_fc(fc); /* auto-disable (see m68kcpu.h) */
-	m68ki_check_address_error(address, MODE_WRITE, fc); /* auto-disable (see m68kcpu.h) */
+	m68ki_check_address_error_010_less(address, MODE_WRITE, fc); /* auto-disable (see m68kcpu.h) */
+
+#if M68K_EMULATE_PMMU
+	if (PMMU_ENABLED)
+	    address = pmmu_translate_addr(address);
+#endif
+
 	m68k_write_memory_16(ADDRESS_68K(address), value);
 }
-INLINE void m68ki_write_32_fc(uint address, uint fc, uint value)
+static inline void m68ki_write_32_fc(uint address, uint fc, uint value)
 {
+	(void)fc;
 	m68ki_set_fc(fc); /* auto-disable (see m68kcpu.h) */
-	m68ki_check_address_error(address, MODE_WRITE, fc); /* auto-disable (see m68kcpu.h) */
+	m68ki_check_address_error_010_less(address, MODE_WRITE, fc); /* auto-disable (see m68kcpu.h) */
+
+#if M68K_EMULATE_PMMU
+	if (PMMU_ENABLED)
+	    address = pmmu_translate_addr(address);
+#endif
+
 	m68k_write_memory_32(ADDRESS_68K(address), value);
 }
 
 #if M68K_SIMULATE_PD_WRITES
-INLINE void m68ki_write_32_pd_fc(uint address, uint fc, uint value)
+static inline void m68ki_write_32_pd_fc(uint address, uint fc, uint value)
 {
+	(void)fc;
 	m68ki_set_fc(fc); /* auto-disable (see m68kcpu.h) */
-	m68ki_check_address_error(address, MODE_WRITE, fc); /* auto-disable (see m68kcpu.h) */
+	m68ki_check_address_error_010_less(address, MODE_WRITE, fc); /* auto-disable (see m68kcpu.h) */
+
+#if M68K_EMULATE_PMMU
+	if (PMMU_ENABLED)
+	    address = pmmu_translate_addr(address);
+#endif
+
 	m68k_write_memory_32_pd(ADDRESS_68K(address), value);
 }
 #endif
 
-
 /* --------------------- Effective Address Calculation -------------------- */
 
 /* The program counter relative addressing modes cause operands to be
  * retrieved from program space, not data space.
  */
-INLINE uint m68ki_get_ea_pcdi(void)
+static inline uint m68ki_get_ea_pcdi(void)
 {
 	uint old_pc = REG_PC;
 	m68ki_use_program_space(); /* auto-disable */
@@ -1116,7 +1236,7 @@ INLINE uint m68ki_get_ea_pcdi(void)
 }
 
 
-INLINE uint m68ki_get_ea_pcix(void)
+static inline uint m68ki_get_ea_pcix(void)
 {
 	m68ki_use_program_space(); /* auto-disable */
 	return m68ki_get_ea_ix(REG_PC);
@@ -1164,7 +1284,7 @@ INLINE uint m68ki_get_ea_pcix(void)
  * 1  011  mem indir with long outer
  * 1  100-111  reserved
  */
-INLINE uint m68ki_get_ea_ix(uint An)
+static inline uint m68ki_get_ea_ix(uint An)
 {
 	/* An = base register */
 	uint extension = m68ki_read_imm_16();
@@ -1217,7 +1337,7 @@ INLINE uint m68ki_get_ea_ix(uint An)
 
 	/* Check if base displacement is present */
 	if(BIT_5(extension))                /* BD SIZE */
-		bd = BIT_4(extension) ? m68ki_read_imm_32() : MAKE_INT_16(m68ki_read_imm_16());
+		bd = BIT_4(extension) ? m68ki_read_imm_32() : (uint32)MAKE_INT_16(m68ki_read_imm_16());
 
 	/* If no indirect action, we are done */
 	if(!(extension&7))                  /* No Memory Indirect */
@@ -1225,7 +1345,7 @@ INLINE uint m68ki_get_ea_ix(uint An)
 
 	/* Check if outer displacement is present */
 	if(BIT_1(extension))                /* I/IS:  od */
-		od = BIT_0(extension) ? m68ki_read_imm_32() : MAKE_INT_16(m68ki_read_imm_16());
+		od = BIT_0(extension) ? m68ki_read_imm_32() : (uint32)MAKE_INT_16(m68ki_read_imm_16());
 
 	/* Postindex */
 	if(BIT_2(extension))                /* I/IS:  0 = preindex, 1 = postindex */
@@ -1237,78 +1357,78 @@ INLINE uint m68ki_get_ea_ix(uint An)
 
 
 /* Fetch operands */
-INLINE uint OPER_AY_AI_8(void)  {uint ea = EA_AY_AI_8();  return m68ki_read_8(ea); }
-INLINE uint OPER_AY_AI_16(void) {uint ea = EA_AY_AI_16(); return m68ki_read_16(ea);}
-INLINE uint OPER_AY_AI_32(void) {uint ea = EA_AY_AI_32(); return m68ki_read_32(ea);}
-INLINE uint OPER_AY_PI_8(void)  {uint ea = EA_AY_PI_8();  return m68ki_read_8(ea); }
-INLINE uint OPER_AY_PI_16(void) {uint ea = EA_AY_PI_16(); return m68ki_read_16(ea);}
-INLINE uint OPER_AY_PI_32(void) {uint ea = EA_AY_PI_32(); return m68ki_read_32(ea);}
-INLINE uint OPER_AY_PD_8(void)  {uint ea = EA_AY_PD_8();  return m68ki_read_8(ea); }
-INLINE uint OPER_AY_PD_16(void) {uint ea = EA_AY_PD_16(); return m68ki_read_16(ea);}
-INLINE uint OPER_AY_PD_32(void) {uint ea = EA_AY_PD_32(); return m68ki_read_32(ea);}
-INLINE uint OPER_AY_DI_8(void)  {uint ea = EA_AY_DI_8();  return m68ki_read_8(ea); }
-INLINE uint OPER_AY_DI_16(void) {uint ea = EA_AY_DI_16(); return m68ki_read_16(ea);}
-INLINE uint OPER_AY_DI_32(void) {uint ea = EA_AY_DI_32(); return m68ki_read_32(ea);}
-INLINE uint OPER_AY_IX_8(void)  {uint ea = EA_AY_IX_8();  return m68ki_read_8(ea); }
-INLINE uint OPER_AY_IX_16(void) {uint ea = EA_AY_IX_16(); return m68ki_read_16(ea);}
-INLINE uint OPER_AY_IX_32(void) {uint ea = EA_AY_IX_32(); return m68ki_read_32(ea);}
+static inline uint OPER_AY_AI_8(void)  {uint ea = EA_AY_AI_8();  return m68ki_read_8(ea); }
+static inline uint OPER_AY_AI_16(void) {uint ea = EA_AY_AI_16(); return m68ki_read_16(ea);}
+static inline uint OPER_AY_AI_32(void) {uint ea = EA_AY_AI_32(); return m68ki_read_32(ea);}
+static inline uint OPER_AY_PI_8(void)  {uint ea = EA_AY_PI_8();  return m68ki_read_8(ea); }
+static inline uint OPER_AY_PI_16(void) {uint ea = EA_AY_PI_16(); return m68ki_read_16(ea);}
+static inline uint OPER_AY_PI_32(void) {uint ea = EA_AY_PI_32(); return m68ki_read_32(ea);}
+static inline uint OPER_AY_PD_8(void)  {uint ea = EA_AY_PD_8();  return m68ki_read_8(ea); }
+static inline uint OPER_AY_PD_16(void) {uint ea = EA_AY_PD_16(); return m68ki_read_16(ea);}
+static inline uint OPER_AY_PD_32(void) {uint ea = EA_AY_PD_32(); return m68ki_read_32(ea);}
+static inline uint OPER_AY_DI_8(void)  {uint ea = EA_AY_DI_8();  return m68ki_read_8(ea); }
+static inline uint OPER_AY_DI_16(void) {uint ea = EA_AY_DI_16(); return m68ki_read_16(ea);}
+static inline uint OPER_AY_DI_32(void) {uint ea = EA_AY_DI_32(); return m68ki_read_32(ea);}
+static inline uint OPER_AY_IX_8(void)  {uint ea = EA_AY_IX_8();  return m68ki_read_8(ea); }
+static inline uint OPER_AY_IX_16(void) {uint ea = EA_AY_IX_16(); return m68ki_read_16(ea);}
+static inline uint OPER_AY_IX_32(void) {uint ea = EA_AY_IX_32(); return m68ki_read_32(ea);}
 
-INLINE uint OPER_AX_AI_8(void)  {uint ea = EA_AX_AI_8();  return m68ki_read_8(ea); }
-INLINE uint OPER_AX_AI_16(void) {uint ea = EA_AX_AI_16(); return m68ki_read_16(ea);}
-INLINE uint OPER_AX_AI_32(void) {uint ea = EA_AX_AI_32(); return m68ki_read_32(ea);}
-INLINE uint OPER_AX_PI_8(void)  {uint ea = EA_AX_PI_8();  return m68ki_read_8(ea); }
-INLINE uint OPER_AX_PI_16(void) {uint ea = EA_AX_PI_16(); return m68ki_read_16(ea);}
-INLINE uint OPER_AX_PI_32(void) {uint ea = EA_AX_PI_32(); return m68ki_read_32(ea);}
-INLINE uint OPER_AX_PD_8(void)  {uint ea = EA_AX_PD_8();  return m68ki_read_8(ea); }
-INLINE uint OPER_AX_PD_16(void) {uint ea = EA_AX_PD_16(); return m68ki_read_16(ea);}
-INLINE uint OPER_AX_PD_32(void) {uint ea = EA_AX_PD_32(); return m68ki_read_32(ea);}
-INLINE uint OPER_AX_DI_8(void)  {uint ea = EA_AX_DI_8();  return m68ki_read_8(ea); }
-INLINE uint OPER_AX_DI_16(void) {uint ea = EA_AX_DI_16(); return m68ki_read_16(ea);}
-INLINE uint OPER_AX_DI_32(void) {uint ea = EA_AX_DI_32(); return m68ki_read_32(ea);}
-INLINE uint OPER_AX_IX_8(void)  {uint ea = EA_AX_IX_8();  return m68ki_read_8(ea); }
-INLINE uint OPER_AX_IX_16(void) {uint ea = EA_AX_IX_16(); return m68ki_read_16(ea);}
-INLINE uint OPER_AX_IX_32(void) {uint ea = EA_AX_IX_32(); return m68ki_read_32(ea);}
+static inline uint OPER_AX_AI_8(void)  {uint ea = EA_AX_AI_8();  return m68ki_read_8(ea); }
+static inline uint OPER_AX_AI_16(void) {uint ea = EA_AX_AI_16(); return m68ki_read_16(ea);}
+static inline uint OPER_AX_AI_32(void) {uint ea = EA_AX_AI_32(); return m68ki_read_32(ea);}
+static inline uint OPER_AX_PI_8(void)  {uint ea = EA_AX_PI_8();  return m68ki_read_8(ea); }
+static inline uint OPER_AX_PI_16(void) {uint ea = EA_AX_PI_16(); return m68ki_read_16(ea);}
+static inline uint OPER_AX_PI_32(void) {uint ea = EA_AX_PI_32(); return m68ki_read_32(ea);}
+static inline uint OPER_AX_PD_8(void)  {uint ea = EA_AX_PD_8();  return m68ki_read_8(ea); }
+static inline uint OPER_AX_PD_16(void) {uint ea = EA_AX_PD_16(); return m68ki_read_16(ea);}
+static inline uint OPER_AX_PD_32(void) {uint ea = EA_AX_PD_32(); return m68ki_read_32(ea);}
+static inline uint OPER_AX_DI_8(void)  {uint ea = EA_AX_DI_8();  return m68ki_read_8(ea); }
+static inline uint OPER_AX_DI_16(void) {uint ea = EA_AX_DI_16(); return m68ki_read_16(ea);}
+static inline uint OPER_AX_DI_32(void) {uint ea = EA_AX_DI_32(); return m68ki_read_32(ea);}
+static inline uint OPER_AX_IX_8(void)  {uint ea = EA_AX_IX_8();  return m68ki_read_8(ea); }
+static inline uint OPER_AX_IX_16(void) {uint ea = EA_AX_IX_16(); return m68ki_read_16(ea);}
+static inline uint OPER_AX_IX_32(void) {uint ea = EA_AX_IX_32(); return m68ki_read_32(ea);}
 
-INLINE uint OPER_A7_PI_8(void)  {uint ea = EA_A7_PI_8();  return m68ki_read_8(ea); }
-INLINE uint OPER_A7_PD_8(void)  {uint ea = EA_A7_PD_8();  return m68ki_read_8(ea); }
+static inline uint OPER_A7_PI_8(void)  {uint ea = EA_A7_PI_8();  return m68ki_read_8(ea); }
+static inline uint OPER_A7_PD_8(void)  {uint ea = EA_A7_PD_8();  return m68ki_read_8(ea); }
 
-INLINE uint OPER_AW_8(void)     {uint ea = EA_AW_8();     return m68ki_read_8(ea); }
-INLINE uint OPER_AW_16(void)    {uint ea = EA_AW_16();    return m68ki_read_16(ea);}
-INLINE uint OPER_AW_32(void)    {uint ea = EA_AW_32();    return m68ki_read_32(ea);}
-INLINE uint OPER_AL_8(void)     {uint ea = EA_AL_8();     return m68ki_read_8(ea); }
-INLINE uint OPER_AL_16(void)    {uint ea = EA_AL_16();    return m68ki_read_16(ea);}
-INLINE uint OPER_AL_32(void)    {uint ea = EA_AL_32();    return m68ki_read_32(ea);}
-INLINE uint OPER_PCDI_8(void)   {uint ea = EA_PCDI_8();   return m68ki_read_pcrel_8(ea); }
-INLINE uint OPER_PCDI_16(void)  {uint ea = EA_PCDI_16();  return m68ki_read_pcrel_16(ea);}
-INLINE uint OPER_PCDI_32(void)  {uint ea = EA_PCDI_32();  return m68ki_read_pcrel_32(ea);}
-INLINE uint OPER_PCIX_8(void)   {uint ea = EA_PCIX_8();   return m68ki_read_pcrel_8(ea); }
-INLINE uint OPER_PCIX_16(void)  {uint ea = EA_PCIX_16();  return m68ki_read_pcrel_16(ea);}
-INLINE uint OPER_PCIX_32(void)  {uint ea = EA_PCIX_32();  return m68ki_read_pcrel_32(ea);}
+static inline uint OPER_AW_8(void)     {uint ea = EA_AW_8();     return m68ki_read_8(ea); }
+static inline uint OPER_AW_16(void)    {uint ea = EA_AW_16();    return m68ki_read_16(ea);}
+static inline uint OPER_AW_32(void)    {uint ea = EA_AW_32();    return m68ki_read_32(ea);}
+static inline uint OPER_AL_8(void)     {uint ea = EA_AL_8();     return m68ki_read_8(ea); }
+static inline uint OPER_AL_16(void)    {uint ea = EA_AL_16();    return m68ki_read_16(ea);}
+static inline uint OPER_AL_32(void)    {uint ea = EA_AL_32();    return m68ki_read_32(ea);}
+static inline uint OPER_PCDI_8(void)   {uint ea = EA_PCDI_8();   return m68ki_read_pcrel_8(ea); }
+static inline uint OPER_PCDI_16(void)  {uint ea = EA_PCDI_16();  return m68ki_read_pcrel_16(ea);}
+static inline uint OPER_PCDI_32(void)  {uint ea = EA_PCDI_32();  return m68ki_read_pcrel_32(ea);}
+static inline uint OPER_PCIX_8(void)   {uint ea = EA_PCIX_8();   return m68ki_read_pcrel_8(ea); }
+static inline uint OPER_PCIX_16(void)  {uint ea = EA_PCIX_16();  return m68ki_read_pcrel_16(ea);}
+static inline uint OPER_PCIX_32(void)  {uint ea = EA_PCIX_32();  return m68ki_read_pcrel_32(ea);}
 
 
 
 /* ---------------------------- Stack Functions --------------------------- */
 
 /* Push/pull data from the stack */
-INLINE void m68ki_push_16(uint value)
+static inline void m68ki_push_16(uint value)
 {
 	REG_SP = MASK_OUT_ABOVE_32(REG_SP - 2);
 	m68ki_write_16(REG_SP, value);
 }
 
-INLINE void m68ki_push_32(uint value)
+static inline void m68ki_push_32(uint value)
 {
 	REG_SP = MASK_OUT_ABOVE_32(REG_SP - 4);
 	m68ki_write_32(REG_SP, value);
 }
 
-INLINE uint m68ki_pull_16(void)
+static inline uint m68ki_pull_16(void)
 {
 	REG_SP = MASK_OUT_ABOVE_32(REG_SP + 2);
 	return m68ki_read_16(REG_SP-2);
 }
 
-INLINE uint m68ki_pull_32(void)
+static inline uint m68ki_pull_32(void)
 {
 	REG_SP = MASK_OUT_ABOVE_32(REG_SP + 4);
 	return m68ki_read_32(REG_SP-4);
@@ -1318,22 +1438,22 @@ INLINE uint m68ki_pull_32(void)
 /* Increment/decrement the stack as if doing a push/pull but
  * don't do any memory access.
  */
-INLINE void m68ki_fake_push_16(void)
+static inline void m68ki_fake_push_16(void)
 {
 	REG_SP = MASK_OUT_ABOVE_32(REG_SP - 2);
 }
 
-INLINE void m68ki_fake_push_32(void)
+static inline void m68ki_fake_push_32(void)
 {
 	REG_SP = MASK_OUT_ABOVE_32(REG_SP - 4);
 }
 
-INLINE void m68ki_fake_pull_16(void)
+static inline void m68ki_fake_pull_16(void)
 {
 	REG_SP = MASK_OUT_ABOVE_32(REG_SP + 2);
 }
 
-INLINE void m68ki_fake_pull_32(void)
+static inline void m68ki_fake_pull_32(void)
 {
 	REG_SP = MASK_OUT_ABOVE_32(REG_SP + 4);
 }
@@ -1345,13 +1465,13 @@ INLINE void m68ki_fake_pull_32(void)
  * These functions will also call the pc_changed callback if it was enabled
  * in m68kconf.h.
  */
-INLINE void m68ki_jump(uint new_pc)
+static inline void m68ki_jump(uint new_pc)
 {
 	REG_PC = new_pc;
 	m68ki_pc_changed(REG_PC);
 }
 
-INLINE void m68ki_jump_vector(uint vector)
+static inline void m68ki_jump_vector(uint vector)
 {
 	REG_PC = (vector<<2) + REG_VBR;
 	REG_PC = m68ki_read_data_32(REG_PC);
@@ -1364,30 +1484,28 @@ INLINE void m68ki_jump_vector(uint vector)
  * So far I've found no problems with not calling pc_changed for 8 or 16
  * bit branches.
  */
-INLINE void m68ki_branch_8(uint offset)
+static inline void m68ki_branch_8(uint offset)
 {
 	REG_PC += MAKE_INT_8(offset);
 }
 
-INLINE void m68ki_branch_16(uint offset)
+static inline void m68ki_branch_16(uint offset)
 {
 	REG_PC += MAKE_INT_16(offset);
 }
 
-INLINE void m68ki_branch_32(uint offset)
+static inline void m68ki_branch_32(uint offset)
 {
 	REG_PC += offset;
 	m68ki_pc_changed(REG_PC);
 }
 
-
-
 /* ---------------------------- Status Register --------------------------- */
 
 /* Set the S flag and change the active stack pointer.
  * Note that value MUST be 4 or 0.
  */
-INLINE void m68ki_set_s_flag(uint value)
+static inline void m68ki_set_s_flag(uint value)
 {
 	/* Backup the old stack pointer */
 	REG_SP_BASE[FLAG_S | ((FLAG_S>>1) & FLAG_M)] = REG_SP;
@@ -1400,7 +1518,7 @@ INLINE void m68ki_set_s_flag(uint value)
 /* Set the S and M flags and change the active stack pointer.
  * Note that value MUST be 0, 2, 4, or 6 (bit2 = S, bit1 = M).
  */
-INLINE void m68ki_set_sm_flag(uint value)
+static inline void m68ki_set_sm_flag(uint value)
 {
 	/* Backup the old stack pointer */
 	REG_SP_BASE[FLAG_S | ((FLAG_S>>1) & FLAG_M)] = REG_SP;
@@ -1412,7 +1530,7 @@ INLINE void m68ki_set_sm_flag(uint value)
 }
 
 /* Set the S and M flags.  Don't touch the stack pointer. */
-INLINE void m68ki_set_sm_flag_nosp(uint value)
+static inline void m68ki_set_sm_flag_nosp(uint value)
 {
 	/* Set the S and M flags */
 	FLAG_S = value & SFLAG_SET;
@@ -1421,7 +1539,7 @@ INLINE void m68ki_set_sm_flag_nosp(uint value)
 
 
 /* Set the condition code register */
-INLINE void m68ki_set_ccr(uint value)
+static inline void m68ki_set_ccr(uint value)
 {
 	FLAG_X = BIT_4(value)  << 4;
 	FLAG_N = BIT_3(value)  << 4;
@@ -1431,7 +1549,7 @@ INLINE void m68ki_set_ccr(uint value)
 }
 
 /* Set the status register but don't check for interrupts */
-INLINE void m68ki_set_sr_noint(uint value)
+static inline void m68ki_set_sr_noint(uint value)
 {
 	/* Mask out the "unimplemented" bits */
 	value &= CPU_SR_MASK;
@@ -1447,7 +1565,7 @@ INLINE void m68ki_set_sr_noint(uint value)
 /* Set the status register but don't check for interrupts nor
  * change the stack pointer
  */
-INLINE void m68ki_set_sr_noint_nosp(uint value)
+static inline void m68ki_set_sr_noint_nosp(uint value)
 {
 	/* Mask out the "unimplemented" bits */
 	value &= CPU_SR_MASK;
@@ -1461,7 +1579,7 @@ INLINE void m68ki_set_sr_noint_nosp(uint value)
 }
 
 /* Set the status register and check for interrupts */
-INLINE void m68ki_set_sr(uint value)
+static inline void m68ki_set_sr(uint value)
 {
 	m68ki_set_sr_noint(value);
 	m68ki_check_interrupts();
@@ -1471,7 +1589,7 @@ INLINE void m68ki_set_sr(uint value)
 /* ------------------------- Exception Processing ------------------------- */
 
 /* Initiate exception processing */
-INLINE uint m68ki_init_exception(void)
+static inline uint m68ki_init_exception(void)
 {
 	/* Save the old status register */
 	uint sr = m68ki_get_sr();
@@ -1486,7 +1604,7 @@ INLINE uint m68ki_init_exception(void)
 }
 
 /* 3 word stack frame (68000 only) */
-INLINE void m68ki_stack_frame_3word(uint pc, uint sr)
+static inline void m68ki_stack_frame_3word(uint pc, uint sr)
 {
 	m68ki_push_32(pc);
 	m68ki_push_16(sr);
@@ -1495,7 +1613,7 @@ INLINE void m68ki_stack_frame_3word(uint pc, uint sr)
 /* Format 0 stack frame.
  * This is the standard stack frame for 68010+.
  */
-INLINE void m68ki_stack_frame_0000(uint pc, uint sr, uint vector)
+static inline void m68ki_stack_frame_0000(uint pc, uint sr, uint vector)
 {
 	/* Stack a 3-word frame if we are 68000 */
 	if(CPU_TYPE == CPU_TYPE_000)
@@ -1511,7 +1629,7 @@ INLINE void m68ki_stack_frame_0000(uint pc, uint sr, uint vector)
 /* Format 1 stack frame (68020).
  * For 68020, this is the 4 word throwaway frame.
  */
-INLINE void m68ki_stack_frame_0001(uint pc, uint sr, uint vector)
+static inline void m68ki_stack_frame_0001(uint pc, uint sr, uint vector)
 {
 	m68ki_push_16(0x1000 | (vector<<2));
 	m68ki_push_32(pc);
@@ -1521,7 +1639,7 @@ INLINE void m68ki_stack_frame_0001(uint pc, uint sr, uint vector)
 /* Format 2 stack frame.
  * This is used only by 68020 for trap exceptions.
  */
-INLINE void m68ki_stack_frame_0010(uint sr, uint vector)
+static inline void m68ki_stack_frame_0010(uint sr, uint vector)
 {
 	m68ki_push_32(REG_PPC);
 	m68ki_push_16(0x2000 | (vector<<2));
@@ -1532,7 +1650,7 @@ INLINE void m68ki_stack_frame_0010(uint sr, uint vector)
 
 /* Bus error stack frame (68000 only).
  */
-INLINE void m68ki_stack_frame_buserr(uint sr)
+static inline void m68ki_stack_frame_buserr(uint sr)
 {
 	m68ki_push_32(REG_PC);
 	m68ki_push_16(sr);
@@ -1549,7 +1667,7 @@ INLINE void m68ki_stack_frame_buserr(uint sr)
 /* Format 8 stack frame (68010).
  * 68010 only.  This is the 29 word bus/address error frame.
  */
-void m68ki_stack_frame_1000(uint pc, uint sr, uint vector)
+static inline void m68ki_stack_frame_1000(uint pc, uint sr, uint vector)
 {
 	/* VERSION
 	 * NUMBER
@@ -1603,7 +1721,7 @@ void m68ki_stack_frame_1000(uint pc, uint sr, uint vector)
  * if the error happens at an instruction boundary.
  * PC stacked is address of next instruction.
  */
-void m68ki_stack_frame_1010(uint sr, uint vector, uint pc)
+static inline void m68ki_stack_frame_1010(uint sr, uint vector, uint pc)
 {
 	/* INTERNAL REGISTER */
 	m68ki_push_16(0);
@@ -1650,7 +1768,7 @@ void m68ki_stack_frame_1010(uint sr, uint vector, uint pc)
  * if the error happens during instruction execution.
  * PC stacked is address of instruction in progress.
  */
-void m68ki_stack_frame_1011(uint sr, uint vector, uint pc)
+static inline void m68ki_stack_frame_1011(uint sr, uint vector, uint pc)
 {
 	/* INTERNAL REGISTERS (18 words) */
 	m68ki_push_32(0);
@@ -1721,7 +1839,7 @@ void m68ki_stack_frame_1011(uint sr, uint vector, uint pc)
 /* Used for Group 2 exceptions.
  * These stack a type 2 frame on the 020.
  */
-INLINE void m68ki_exception_trap(uint vector)
+static inline void m68ki_exception_trap(uint vector)
 {
 	uint sr = m68ki_init_exception();
 
@@ -1737,7 +1855,7 @@ INLINE void m68ki_exception_trap(uint vector)
 }
 
 /* Trap#n stacks a 0 frame but behaves like group2 otherwise */
-INLINE void m68ki_exception_trapN(uint vector)
+static inline void m68ki_exception_trapN(uint vector)
 {
 	uint sr = m68ki_init_exception();
 	m68ki_stack_frame_0000(REG_PC, sr, vector);
@@ -1748,7 +1866,7 @@ INLINE void m68ki_exception_trapN(uint vector)
 }
 
 /* Exception for trace mode */
-INLINE void m68ki_exception_trace(void)
+static inline void m68ki_exception_trace(void)
 {
 	uint sr = m68ki_init_exception();
 
@@ -1775,7 +1893,7 @@ INLINE void m68ki_exception_trace(void)
 }
 
 /* Exception for privilege violation */
-INLINE void m68ki_exception_privilege_violation(void)
+static inline void m68ki_exception_privilege_violation(void)
 {
 	uint sr = m68ki_init_exception();
 
@@ -1793,8 +1911,45 @@ INLINE void m68ki_exception_privilege_violation(void)
 	USE_CYCLES(CYC_EXCEPTION[EXCEPTION_PRIVILEGE_VIOLATION] - CYC_INSTRUCTION[REG_IR]);
 }
 
+extern jmp_buf m68ki_bus_error_jmp_buf;
+
+#define m68ki_check_bus_error_trap() setjmp(m68ki_bus_error_jmp_buf)
+
+/* Exception for bus error */
+static inline void m68ki_exception_bus_error(void)
+{
+	int i;
+
+	/* If we were processing a bus error, address error, or reset,
+	 * this is a catastrophic failure.
+	 * Halt the CPU
+	 */
+	if(CPU_RUN_MODE == RUN_MODE_BERR_AERR_RESET)
+	{
+m68k_read_memory_8(0x00ffff01);
+		CPU_STOPPED = STOP_LEVEL_HALT;
+		return;
+	}
+	CPU_RUN_MODE = RUN_MODE_BERR_AERR_RESET;
+
+	/* Use up some clock cycles and undo the instruction's cycles */
+	USE_CYCLES(CYC_EXCEPTION[EXCEPTION_BUS_ERROR] - CYC_INSTRUCTION[REG_IR]);
+
+	for (i = 15; i >= 0; i--){
+		REG_DA[i] = REG_DA_SAVE[i];
+	}
+
+	uint sr = m68ki_init_exception();
+	m68ki_stack_frame_1000(REG_PPC, sr, EXCEPTION_BUS_ERROR);
+
+	m68ki_jump_vector(EXCEPTION_BUS_ERROR);
+	longjmp(m68ki_bus_error_jmp_buf, 1);
+}
+
+extern int cpu_log_enabled;
+
 /* Exception for A-Line instructions */
-INLINE void m68ki_exception_1010(void)
+static inline void m68ki_exception_1010(void)
 {
 	uint sr;
 #if M68K_LOG_1010_1111 == OPT_ON
@@ -1812,7 +1967,7 @@ INLINE void m68ki_exception_1010(void)
 }
 
 /* Exception for F-Line instructions */
-INLINE void m68ki_exception_1111(void)
+static inline void m68ki_exception_1111(void)
 {
 	uint sr;
 
@@ -1830,14 +1985,20 @@ INLINE void m68ki_exception_1111(void)
 	USE_CYCLES(CYC_EXCEPTION[EXCEPTION_1111] - CYC_INSTRUCTION[REG_IR]);
 }
 
+#if M68K_ILLG_HAS_CALLBACK == OPT_SPECIFY_HANDLER
+extern int m68ki_illg_callback(int);
+#endif
+
 /* Exception for illegal instructions */
-INLINE void m68ki_exception_illegal(void)
+static inline void m68ki_exception_illegal(void)
 {
 	uint sr;
 
 	M68K_DO_LOG((M68K_LOG_FILEHANDLE "%s at %08x: illegal instruction %04x (%s)\n",
 				 m68ki_cpu_names[CPU_TYPE], ADDRESS_68K(REG_PPC), REG_IR,
 				 m68ki_disassemble_quick(ADDRESS_68K(REG_PPC))));
+	if (m68ki_illg_callback(REG_IR))
+	    return;
 
 	sr = m68ki_init_exception();
 
@@ -1856,7 +2017,7 @@ INLINE void m68ki_exception_illegal(void)
 }
 
 /* Exception for format errror in RTE */
-INLINE void m68ki_exception_format_error(void)
+static inline void m68ki_exception_format_error(void)
 {
 	uint sr = m68ki_init_exception();
 	m68ki_stack_frame_0000(REG_PC, sr, EXCEPTION_FORMAT_ERROR);
@@ -1867,7 +2028,7 @@ INLINE void m68ki_exception_format_error(void)
 }
 
 /* Exception for address error */
-INLINE void m68ki_exception_address_error(void)
+static inline void m68ki_exception_address_error(void)
 {
 	uint sr = m68ki_init_exception();
 
@@ -1888,16 +2049,16 @@ m68k_read_memory_8(0x00ffff01);
 
 	m68ki_jump_vector(EXCEPTION_ADDRESS_ERROR);
 
-	/* Use up some clock cycles. Note that we don't need to undo the 
+	/* Use up some clock cycles. Note that we don't need to undo the
 	instruction's cycles here as we've longjmp:ed directly from the
 	instruction handler without passing the part of the excecute loop
 	that deducts instruction cycles */
-	USE_CYCLES(CYC_EXCEPTION[EXCEPTION_ADDRESS_ERROR]); 
+	USE_CYCLES(CYC_EXCEPTION[EXCEPTION_ADDRESS_ERROR]);
 }
 
 
 /* Service an interrupt request and start exception processing */
-void m68ki_exception_interrupt(uint int_level)
+static inline void m68ki_exception_interrupt(uint int_level)
 {
 	uint vector;
 	uint sr;
@@ -1960,7 +2121,7 @@ void m68ki_exception_interrupt(uint int_level)
 	m68ki_jump(new_pc);
 
 	/* Defer cycle counting until later */
-	CPU_INT_CYCLES += CYC_EXCEPTION[vector];
+	USE_CYCLES(CYC_EXCEPTION[vector]);
 
 #if !M68K_EMULATE_INT_ACK
 	/* Automatically clear IRQ if we are not using an acknowledge scheme */
@@ -1970,9 +2131,14 @@ void m68ki_exception_interrupt(uint int_level)
 
 
 /* ASG: Check for interrupts */
-INLINE void m68ki_check_interrupts(void)
+static inline void m68ki_check_interrupts(void)
 {
-	if(CPU_INT_LEVEL > FLAG_INT_MASK)
+	if(m68ki_cpu.nmi_pending)
+	{
+		m68ki_cpu.nmi_pending = FALSE;
+		m68ki_exception_interrupt(7);
+	}
+	else if(CPU_INT_LEVEL > FLAG_INT_MASK)
 		m68ki_exception_interrupt(CPU_INT_LEVEL>>8);
 }
 
@@ -1982,4 +2148,8 @@ INLINE void m68ki_check_interrupts(void)
 /* ============================== END OF FILE ============================= */
 /* ======================================================================== */
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* M68KCPU__HEADER */
diff --git a/plat/linux68k/emu/musashi/m68kdasm.c b/plat/linux68k/emu/musashi/m68kdasm.c
index 146da471a..fbbde31c0 100755
--- a/plat/linux68k/emu/musashi/m68kdasm.c
+++ b/plat/linux68k/emu/musashi/m68kdasm.c
@@ -3,10 +3,10 @@
 /* ======================================================================== */
 /*
  *                                  MUSASHI
- *                                Version 3.4
+ *                                Version 3.32
  *
  * A portable Motorola M680x0 processor emulation engine.
- * Copyright 1998-2001 Karl Stenerud.  All rights reserved.
+ * Copyright Karl Stenerud.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -38,6 +38,14 @@
 #include <string.h>
 #include "m68k.h"
 
+#ifndef uint32
+#define uint32 uint
+#endif
+
+#ifndef uint16
+#define uint16 unsigned short
+#endif
+
 #ifndef DECL_SPEC
 #define DECL_SPEC
 #endif
@@ -129,6 +137,15 @@
 #define EXT_OUTER_DISPLACEMENT_LONG(A)    (((A)&3) == 3 && ((A)&0x47) < 0x44)
 
 
+/* Opcode flags */
+#if M68K_COMPILE_FOR_MAME == OPT_ON
+#define SET_OPCODE_FLAGS(x)	g_opcode_type = x;
+#define COMBINE_OPCODE_FLAGS(x) ((x) | g_opcode_type | DASMFLAG_SUPPORTED)
+#else
+#define SET_OPCODE_FLAGS(x)
+#define COMBINE_OPCODE_FLAGS(X) (X)
+#endif
+
 
 /* ======================================================================== */
 /* =============================== PROTOTYPES ============================= */
@@ -147,6 +164,7 @@ uint  peek_imm_32(void);
 /* make signed integers 100% portably */
 static int make_int_8(int value);
 static int make_int_16(int value);
+static int make_int_32(int value);
 
 /* make a string of a hex value */
 static char* make_signed_hex_str_8(uint val);
@@ -201,20 +219,23 @@ static char g_helper_str[100]; /* string to hold helpful info */
 static uint g_cpu_pc;        /* program counter */
 static uint g_cpu_ir;        /* instruction register */
 static uint g_cpu_type;
+static uint g_opcode_type;
+static const unsigned char* g_rawop;
+static uint g_rawbasepc;
 
 /* used by ops like asr, ror, addq, etc */
-static uint g_3bit_qdata_table[8] = {8, 1, 2, 3, 4, 5, 6, 7};
+static const uint g_3bit_qdata_table[8] = {8, 1, 2, 3, 4, 5, 6, 7};
 
-static uint g_5bit_data_table[32] =
+static const uint g_5bit_data_table[32] =
 {
 	32,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
 	16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
 };
 
-static char* g_cc[16] =
+static const char *const g_cc[16] =
 {"t", "f", "hi", "ls", "cc", "cs", "ne", "eq", "vc", "vs", "pl", "mi", "ge", "lt", "gt", "le"};
 
-static char* g_cpcc[64] =
+static const char *const g_cpcc[64] =
 {/* 000    001    010    011    100    101    110    111 */
 	  "f",  "eq", "ogt", "oge", "olt", "ole", "ogl",  "or", /* 000 */
 	 "un", "ueq", "ugt", "uge", "ult", "ule",  "ne",   "t", /* 001 */
@@ -226,6 +247,16 @@ static char* g_cpcc[64] =
 	  "?",   "?",   "?",   "?",   "?",   "?",   "?",   "?"  /* 111 */
 };
 
+static const char *const g_mmuregs[8] =
+{
+	"tc", "drp", "srp", "crp", "cal", "val", "sccr", "acr"
+};
+
+static const char *const g_mmucond[16] =
+{
+	"bs", "bc", "ls", "lc", "ss", "sc", "as", "ac",
+	"ws", "wc", "is", "ic", "gs", "gc", "cs", "cc"
+};
 
 /* ======================================================================== */
 /* =========================== UTILITY FUNCTIONS ========================== */
@@ -234,17 +265,56 @@ static char* g_cpcc[64] =
 #define LIMIT_CPU_TYPES(ALLOWED_CPU_TYPES)	\
 	if(!(g_cpu_type & ALLOWED_CPU_TYPES))	\
 	{										\
-		d68000_illegal();					\
+		if((g_cpu_ir & 0xf000) == 0xf000)	\
+			d68000_1111();					\
+		else d68000_illegal();				\
 		return;								\
 	}
 
-#define read_imm_8()  (m68k_read_disassembler_16(((g_cpu_pc+=2)-2)&g_address_mask)&0xff)
-#define read_imm_16() m68k_read_disassembler_16(((g_cpu_pc+=2)-2)&g_address_mask)
-#define read_imm_32() m68k_read_disassembler_32(((g_cpu_pc+=4)-4)&g_address_mask)
+static uint dasm_read_imm_8(uint advance)
+{
+	uint result;
+	if (g_rawop)
+		result = g_rawop[g_cpu_pc + 1 - g_rawbasepc];
+	else
+		result = m68k_read_disassembler_16(g_cpu_pc & g_address_mask) & 0xff;
+	g_cpu_pc += advance;
+	return result;
+}
 
-#define peek_imm_8()  (m68k_read_disassembler_16(g_cpu_pc & g_address_mask)&0xff)
-#define peek_imm_16() m68k_read_disassembler_16(g_cpu_pc & g_address_mask)
-#define peek_imm_32() m68k_read_disassembler_32(g_cpu_pc & g_address_mask)
+static uint dasm_read_imm_16(uint advance)
+{
+	uint result;
+	if (g_rawop)
+		result = (g_rawop[g_cpu_pc + 0 - g_rawbasepc] << 8) |
+		          g_rawop[g_cpu_pc + 1 - g_rawbasepc];
+	else
+		result = m68k_read_disassembler_16(g_cpu_pc & g_address_mask) & 0xffff;
+	g_cpu_pc += advance;
+	return result;
+}
+
+static uint dasm_read_imm_32(uint advance)
+{
+	uint result;
+	if (g_rawop)
+		result = (g_rawop[g_cpu_pc + 0 - g_rawbasepc] << 24) |
+		         (g_rawop[g_cpu_pc + 1 - g_rawbasepc] << 16) |
+		         (g_rawop[g_cpu_pc + 2 - g_rawbasepc] << 8) |
+		          g_rawop[g_cpu_pc + 3 - g_rawbasepc];
+	else
+		result = m68k_read_disassembler_32(g_cpu_pc & g_address_mask) & 0xffffffff;
+	g_cpu_pc += advance;
+	return result;
+}
+
+#define read_imm_8()  dasm_read_imm_8(2)
+#define read_imm_16() dasm_read_imm_16(2)
+#define read_imm_32() dasm_read_imm_32(4)
+
+#define peek_imm_8()  dasm_read_imm_8(0)
+#define peek_imm_16() dasm_read_imm_16(0)
+#define peek_imm_32() dasm_read_imm_32(0)
 
 /* Fake a split interface */
 #define get_ea_mode_str_8(instruction) get_ea_mode_str(instruction, 0)
@@ -259,6 +329,11 @@ static char* g_cpcc[64] =
 #define get_imm_str_u16() get_imm_str_u(1)
 #define get_imm_str_u32() get_imm_str_u(2)
 
+static int sext_7bit_int(int value)
+{
+	return (value & 0x40) ? (value | 0xffffff80) : (value & 0x7f);
+}
+
 
 /* 100% portable signed int generators */
 static int make_int_8(int value)
@@ -271,6 +346,10 @@ static int make_int_16(int value)
 	return (value & 0x8000) ? value | ~0xffff : value & 0xffff;
 }
 
+static int make_int_32(int value)
+{
+	return (value & 0x80000000) ? value | ~0xffffffff : value & 0xffffffff;
+}
 
 /* Get string representation of hex values */
 static char* make_signed_hex_str_8(uint val)
@@ -425,7 +504,14 @@ static char* get_ea_mode_str(uint instruction, uint size)
 					strcat(mode, "[");
 				if(base)
 				{
-					strcat(mode, make_signed_hex_str_16(base));
+					if (EXT_BASE_DISPLACEMENT_LONG(extension))
+					{
+						strcat(mode, make_signed_hex_str_32(base));
+					}
+					else
+					{
+						strcat(mode, make_signed_hex_str_16(base));
+					}
 					comma = 1;
 				}
 				if(*base_reg)
@@ -875,20 +961,20 @@ static void d68000_asl_ea(void)
 static void d68000_bcc_8(void)
 {
 	uint temp_pc = g_cpu_pc;
-	sprintf(g_dasm_str, "b%-2s     %x", g_cc[(g_cpu_ir>>8)&0xf], temp_pc + make_int_8(g_cpu_ir));
+	sprintf(g_dasm_str, "b%-2s     $%x", g_cc[(g_cpu_ir>>8)&0xf], temp_pc + make_int_8(g_cpu_ir));
 }
 
 static void d68000_bcc_16(void)
 {
 	uint temp_pc = g_cpu_pc;
-	sprintf(g_dasm_str, "b%-2s     %x", g_cc[(g_cpu_ir>>8)&0xf], temp_pc + make_int_16(read_imm_16()));
+	sprintf(g_dasm_str, "b%-2s     $%x", g_cc[(g_cpu_ir>>8)&0xf], temp_pc + make_int_16(read_imm_16()));
 }
 
 static void d68020_bcc_32(void)
 {
 	uint temp_pc = g_cpu_pc;
 	LIMIT_CPU_TYPES(M68020_PLUS);
-	sprintf(g_dasm_str, "b%-2s     %x; (2+)", g_cc[(g_cpu_ir>>8)&0xf], temp_pc + read_imm_32());
+	sprintf(g_dasm_str, "b%-2s     $%x; (2+)", g_cc[(g_cpu_ir>>8)&0xf], temp_pc + read_imm_32());
 }
 
 static void d68000_bchg_r(void)
@@ -1090,20 +1176,20 @@ static void d68020_bftst(void)
 static void d68000_bra_8(void)
 {
 	uint temp_pc = g_cpu_pc;
-	sprintf(g_dasm_str, "bra     %x", temp_pc + make_int_8(g_cpu_ir));
+	sprintf(g_dasm_str, "bra     $%x", temp_pc + make_int_8(g_cpu_ir));
 }
 
 static void d68000_bra_16(void)
 {
 	uint temp_pc = g_cpu_pc;
-	sprintf(g_dasm_str, "bra     %x", temp_pc + make_int_16(read_imm_16()));
+	sprintf(g_dasm_str, "bra     $%x", temp_pc + make_int_16(read_imm_16()));
 }
 
 static void d68020_bra_32(void)
 {
 	uint temp_pc = g_cpu_pc;
 	LIMIT_CPU_TYPES(M68020_PLUS);
-	sprintf(g_dasm_str, "bra     %x; (2+)", temp_pc + read_imm_32());
+	sprintf(g_dasm_str, "bra     $%x; (2+)", temp_pc + read_imm_32());
 }
 
 static void d68000_bset_r(void)
@@ -1120,20 +1206,23 @@ static void d68000_bset_s(void)
 static void d68000_bsr_8(void)
 {
 	uint temp_pc = g_cpu_pc;
-	sprintf(g_dasm_str, "bsr     %x", temp_pc + make_int_8(g_cpu_ir));
+	sprintf(g_dasm_str, "bsr     $%x", temp_pc + make_int_8(g_cpu_ir));
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OVER);
 }
 
 static void d68000_bsr_16(void)
 {
 	uint temp_pc = g_cpu_pc;
-	sprintf(g_dasm_str, "bsr     %x", temp_pc + make_int_16(read_imm_16()));
+	sprintf(g_dasm_str, "bsr     $%x", temp_pc + make_int_16(read_imm_16()));
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OVER);
 }
 
 static void d68020_bsr_32(void)
 {
 	uint temp_pc = g_cpu_pc;
 	LIMIT_CPU_TYPES(M68020_PLUS);
-	sprintf(g_dasm_str, "bsr     %x; (2+)", temp_pc + peek_imm_32());
+	sprintf(g_dasm_str, "bsr     $%x; (2+)", temp_pc + read_imm_32());
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OVER);
 }
 
 static void d68000_btst_r(void)
@@ -1211,12 +1300,14 @@ static void d68020_cas2_32(void)
 static void d68000_chk_16(void)
 {
 	sprintf(g_dasm_str, "chk.w   %s, D%d", get_ea_mode_str_16(g_cpu_ir), (g_cpu_ir>>9)&7);
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OVER);
 }
 
 static void d68020_chk_32(void)
 {
 	LIMIT_CPU_TYPES(M68020_PLUS);
 	sprintf(g_dasm_str, "chk.l   %s, D%d; (2+)", get_ea_mode_str_32(g_cpu_ir), (g_cpu_ir>>9)&7);
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OVER);
 }
 
 static void d68020_chk2_cmp2_8(void)
@@ -1392,7 +1483,7 @@ static void d68020_cpbcc_16(void)
 	uint new_pc = g_cpu_pc;
 	LIMIT_CPU_TYPES(M68020_PLUS);
 	extension = read_imm_16();
-	new_pc += make_int_16(peek_imm_16());
+	new_pc += make_int_16(read_imm_16());
 	sprintf(g_dasm_str, "%db%-4s  %s; %x (extension = %x) (2-3)", (g_cpu_ir>>9)&7, g_cpcc[g_cpu_ir&0x3f], get_imm_str_s16(), new_pc, extension);
 }
 
@@ -1402,7 +1493,7 @@ static void d68020_cpbcc_32(void)
 	uint new_pc = g_cpu_pc;
 	LIMIT_CPU_TYPES(M68020_PLUS);
 	extension = read_imm_16();
-	new_pc += peek_imm_32();
+	new_pc += read_imm_32();
 	sprintf(g_dasm_str, "%db%-4s  %s; %x (extension = %x) (2-3)", (g_cpu_ir>>9)&7, g_cpcc[g_cpu_ir&0x3f], get_imm_str_s16(), new_pc, extension);
 }
 
@@ -1414,7 +1505,7 @@ static void d68020_cpdbcc(void)
 	LIMIT_CPU_TYPES(M68020_PLUS);
 	extension1 = read_imm_16();
 	extension2 = read_imm_16();
-	new_pc += make_int_16(peek_imm_16());
+	new_pc += make_int_16(read_imm_16());
 	sprintf(g_dasm_str, "%ddb%-4s D%d,%s; %x (extension = %x) (2-3)", (g_cpu_ir>>9)&7, g_cpcc[extension1&0x3f], g_cpu_ir&7, get_imm_str_s16(), new_pc, extension2);
 }
 
@@ -1427,13 +1518,27 @@ static void d68020_cpgen(void)
 static void d68020_cprestore(void)
 {
 	LIMIT_CPU_TYPES(M68020_PLUS);
-	sprintf(g_dasm_str, "%drestore %s; (2-3)", (g_cpu_ir>>9)&7, get_ea_mode_str_8(g_cpu_ir));
+	if (((g_cpu_ir>>9)&7) == 1)
+	{
+		sprintf(g_dasm_str, "frestore %s", get_ea_mode_str_8(g_cpu_ir));
+	}
+	else
+	{
+		sprintf(g_dasm_str, "%drestore %s; (2-3)", (g_cpu_ir>>9)&7, get_ea_mode_str_8(g_cpu_ir));
+	}
 }
 
 static void d68020_cpsave(void)
 {
 	LIMIT_CPU_TYPES(M68020_PLUS);
-	sprintf(g_dasm_str, "%dsave   %s; (2-3)", (g_cpu_ir>>9)&7, get_ea_mode_str_8(g_cpu_ir));
+	if (((g_cpu_ir>>9)&7) == 1)
+	{
+		sprintf(g_dasm_str, "fsave   %s", get_ea_mode_str_8(g_cpu_ir));
+	}
+	else
+	{
+		sprintf(g_dasm_str, "%dsave   %s; (2-3)", (g_cpu_ir>>9)&7, get_ea_mode_str_8(g_cpu_ir));
+	}
 }
 
 static void d68020_cpscc(void)
@@ -1499,13 +1604,15 @@ static void d68040_cpush(void)
 static void d68000_dbra(void)
 {
 	uint temp_pc = g_cpu_pc;
-	sprintf(g_dasm_str, "dbra    D%d, %x", g_cpu_ir & 7, temp_pc + make_int_16(read_imm_16()));
+	sprintf(g_dasm_str, "dbra    D%d, $%x", g_cpu_ir & 7, temp_pc + make_int_16(read_imm_16()));
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OVER);
 }
 
 static void d68000_dbcc(void)
 {
 	uint temp_pc = g_cpu_pc;
-	sprintf(g_dasm_str, "db%-2s    D%d, %x", g_cc[(g_cpu_ir>>8)&0xf], g_cpu_ir & 7, temp_pc + make_int_16(read_imm_16()));
+	sprintf(g_dasm_str, "db%-2s    D%d, $%x", g_cc[(g_cpu_ir>>8)&0xf], g_cpu_ir & 7, temp_pc + make_int_16(read_imm_16()));
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OVER);
 }
 
 static void d68000_divs(void)
@@ -1606,6 +1713,220 @@ static void d68020_extb_32(void)
 	sprintf(g_dasm_str, "extb.l  D%d; (2+)", g_cpu_ir&7);
 }
 
+static void d68040_fpu(void)
+{
+	char float_data_format[8][3] =
+	{
+		".l", ".s", ".x", ".p", ".w", ".d", ".b", ".p"
+	};
+
+	char mnemonic[40];
+	uint32 w2, src, dst_reg;
+	LIMIT_CPU_TYPES(M68030_PLUS);
+	w2 = read_imm_16();
+
+	src = (w2 >> 10) & 0x7;
+	dst_reg = (w2 >> 7) & 0x7;
+
+	// special override for FMOVECR
+	if ((((w2 >> 13) & 0x7) == 2) && (((w2>>10)&0x7) == 7))
+	{
+		sprintf(g_dasm_str, "fmovecr   #$%0x, fp%d", (w2&0x7f), dst_reg);
+		return;
+	}
+
+	switch ((w2 >> 13) & 0x7)
+	{
+		case 0x0:
+		case 0x2:
+		{
+			switch(w2 & 0x7f)
+			{
+				case 0x00:	sprintf(mnemonic, "fmove"); break;
+				case 0x01:	sprintf(mnemonic, "fint"); break;
+				case 0x02:	sprintf(mnemonic, "fsinh"); break;
+				case 0x03:	sprintf(mnemonic, "fintrz"); break;
+				case 0x04:	sprintf(mnemonic, "fsqrt"); break;
+				case 0x06:	sprintf(mnemonic, "flognp1"); break;
+				case 0x08:	sprintf(mnemonic, "fetoxm1"); break;
+				case 0x09:	sprintf(mnemonic, "ftanh1"); break;
+				case 0x0a:	sprintf(mnemonic, "fatan"); break;
+				case 0x0c:	sprintf(mnemonic, "fasin"); break;
+				case 0x0d:	sprintf(mnemonic, "fatanh"); break;
+				case 0x0e:	sprintf(mnemonic, "fsin"); break;
+				case 0x0f:	sprintf(mnemonic, "ftan"); break;
+				case 0x10:	sprintf(mnemonic, "fetox"); break;
+				case 0x11:	sprintf(mnemonic, "ftwotox"); break;
+				case 0x12:	sprintf(mnemonic, "ftentox"); break;
+				case 0x14:	sprintf(mnemonic, "flogn"); break;
+				case 0x15:	sprintf(mnemonic, "flog10"); break;
+				case 0x16:	sprintf(mnemonic, "flog2"); break;
+				case 0x18:	sprintf(mnemonic, "fabs"); break;
+				case 0x19:	sprintf(mnemonic, "fcosh"); break;
+				case 0x1a:	sprintf(mnemonic, "fneg"); break;
+				case 0x1c:	sprintf(mnemonic, "facos"); break;
+				case 0x1d:	sprintf(mnemonic, "fcos"); break;
+				case 0x1e:	sprintf(mnemonic, "fgetexp"); break;
+				case 0x1f:	sprintf(mnemonic, "fgetman"); break;
+				case 0x20:	sprintf(mnemonic, "fdiv"); break;
+				case 0x21:	sprintf(mnemonic, "fmod"); break;
+				case 0x22:	sprintf(mnemonic, "fadd"); break;
+				case 0x23:	sprintf(mnemonic, "fmul"); break;
+				case 0x24:	sprintf(mnemonic, "fsgldiv"); break;
+				case 0x25:	sprintf(mnemonic, "frem"); break;
+				case 0x26:	sprintf(mnemonic, "fscale"); break;
+				case 0x27:	sprintf(mnemonic, "fsglmul"); break;
+				case 0x28:	sprintf(mnemonic, "fsub"); break;
+				case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: case 0x35: case 0x36: case 0x37:
+							sprintf(mnemonic, "fsincos"); break;
+				case 0x38:	sprintf(mnemonic, "fcmp"); break;
+				case 0x3a:	sprintf(mnemonic, "ftst"); break;
+				case 0x41:	sprintf(mnemonic, "fssqrt"); break;
+				case 0x45:	sprintf(mnemonic, "fdsqrt"); break;
+				case 0x58:	sprintf(mnemonic, "fsabs"); break;
+				case 0x5a:	sprintf(mnemonic, "fsneg"); break;
+				case 0x5c:	sprintf(mnemonic, "fdabs"); break;
+				case 0x5e:	sprintf(mnemonic, "fdneg"); break;
+				case 0x60:	sprintf(mnemonic, "fsdiv"); break;
+				case 0x62:	sprintf(mnemonic, "fsadd"); break;
+				case 0x63:	sprintf(mnemonic, "fsmul"); break;
+				case 0x64:	sprintf(mnemonic, "fddiv"); break;
+				case 0x66:	sprintf(mnemonic, "fdadd"); break;
+				case 0x67:	sprintf(mnemonic, "fdmul"); break;
+				case 0x68:	sprintf(mnemonic, "fssub"); break;
+				case 0x6c:	sprintf(mnemonic, "fdsub"); break;
+
+				default:	sprintf(mnemonic, "FPU (?)"); break;
+			}
+
+			if (w2 & 0x4000)
+			{
+				sprintf(g_dasm_str, "%s%s   %s, FP%d", mnemonic, float_data_format[src], get_ea_mode_str_32(g_cpu_ir), dst_reg);
+			}
+			else
+			{
+				sprintf(g_dasm_str, "%s.x   FP%d, FP%d", mnemonic, src, dst_reg);
+			}
+			break;
+		}
+
+		case 0x3:
+		{
+			switch ((w2>>10)&7)
+			{
+				case 3:		// packed decimal w/fixed k-factor
+					sprintf(g_dasm_str, "fmove%s   FP%d, %s {#%d}", float_data_format[(w2>>10)&7], dst_reg, get_ea_mode_str_32(g_cpu_ir), sext_7bit_int(w2&0x7f));
+					break;
+
+				case 7:		// packed decimal w/dynamic k-factor (register)
+					sprintf(g_dasm_str, "fmove%s   FP%d, %s {D%d}", float_data_format[(w2>>10)&7], dst_reg, get_ea_mode_str_32(g_cpu_ir), (w2>>4)&7);
+					break;
+
+				default:
+					sprintf(g_dasm_str, "fmove%s   FP%d, %s", float_data_format[(w2>>10)&7], dst_reg, get_ea_mode_str_32(g_cpu_ir));
+					break;
+			}
+			break;
+		}
+
+		case 0x4:	// ea to control
+		{
+			sprintf(g_dasm_str, "fmovem.l   %s, ", get_ea_mode_str_32(g_cpu_ir));
+			if (w2 & 0x1000) strcat(g_dasm_str, "fpcr");
+			if (w2 & 0x0800) strcat(g_dasm_str, "/fpsr");
+			if (w2 & 0x0400) strcat(g_dasm_str, "/fpiar");
+			break;
+		}
+
+		case 0x5:	// control to ea
+		{
+			
+			strcpy(g_dasm_str, "fmovem.l   ");
+			if (w2 & 0x1000) strcat(g_dasm_str, "fpcr");
+			if (w2 & 0x0800) strcat(g_dasm_str, "/fpsr");
+			if (w2 & 0x0400) strcat(g_dasm_str, "/fpiar");
+			strcat(g_dasm_str, ", ");
+			strcat(g_dasm_str, get_ea_mode_str_32(g_cpu_ir));
+			break;
+		}
+
+		case 0x6:	// memory to FPU, list
+		{
+			char temp[32];
+
+			if ((w2>>11) & 1)	// dynamic register list
+			{
+				sprintf(g_dasm_str, "fmovem.x   %s, D%d", get_ea_mode_str_32(g_cpu_ir), (w2>>4)&7);
+			}
+			else	// static register list
+			{
+				int i;
+
+				sprintf(g_dasm_str, "fmovem.x   %s, ", get_ea_mode_str_32(g_cpu_ir));
+
+				for (i = 0; i < 8; i++)
+				{
+					if (w2 & (1<<i))
+					{
+						if ((w2>>12) & 1)	// postincrement or control
+						{
+							sprintf(temp, "FP%d ", 7-i);
+						}
+						else			// predecrement
+						{
+							sprintf(temp, "FP%d ", i);
+						}
+						strcat(g_dasm_str, temp);
+					}
+				}
+			}
+			break;
+		}
+
+		case 0x7:	// FPU to memory, list
+		{
+			char temp[32];
+
+			if ((w2>>11) & 1)	// dynamic register list
+			{
+				sprintf(g_dasm_str, "fmovem.x   D%d, %s", (w2>>4)&7, get_ea_mode_str_32(g_cpu_ir));
+			}
+			else	// static register list
+			{
+				int i;
+
+				sprintf(g_dasm_str, "fmovem.x   ");
+
+				for (i = 0; i < 8; i++)
+				{
+					if (w2 & (1<<i))
+					{
+						if ((w2>>12) & 1)	// postincrement or control
+						{
+							sprintf(temp, "FP%d ", 7-i);
+						}
+						else			// predecrement
+						{
+							sprintf(temp, "FP%d ", i);
+						}
+						strcat(g_dasm_str, temp);
+					}
+				}
+
+				strcat(g_dasm_str, ", ");
+				strcat(g_dasm_str, get_ea_mode_str_32(g_cpu_ir));
+			}
+			break;
+		}
+
+		default:
+		{
+			sprintf(g_dasm_str, "FPU (?) ");
+			break;
+		}
+	}
+}
+
 static void d68000_jmp(void)
 {
 	sprintf(g_dasm_str, "jmp     %s", get_ea_mode_str_32(g_cpu_ir));
@@ -1614,6 +1935,7 @@ static void d68000_jmp(void)
 static void d68000_jsr(void)
 {
 	sprintf(g_dasm_str, "jsr     %s", get_ea_mode_str_32(g_cpu_ir));
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OVER);
 }
 
 static void d68000_lea(void)
@@ -1840,7 +2162,7 @@ static void d68010_movec(void)
 			processor = "?";
 	}
 
-	if(BIT_1(g_cpu_ir))
+	if(BIT_0(g_cpu_ir))
 		sprintf(g_dasm_str, "movec %c%d, %s; (%s)", BIT_F(extension) ? 'A' : 'D', (extension>>12)&7, reg_name, processor);
 	else
 		sprintf(g_dasm_str, "movec %s, %c%d; (%s)", reg_name, BIT_F(extension) ? 'A' : 'D', (extension>>12)&7, processor);
@@ -2374,6 +2696,21 @@ static void d68000_pea(void)
 	sprintf(g_dasm_str, "pea     %s", get_ea_mode_str_32(g_cpu_ir));
 }
 
+// this is a 68040-specific form of PFLUSH
+static void d68040_pflush(void)
+{
+	LIMIT_CPU_TYPES(M68040_PLUS);
+
+	if (g_cpu_ir & 0x10)
+	{
+		sprintf(g_dasm_str, "pflusha%s", (g_cpu_ir & 8) ? "" : "n");
+	}
+	else
+	{
+		sprintf(g_dasm_str, "pflush%s(A%d)", (g_cpu_ir & 8) ? "" : "n", g_cpu_ir & 7);
+	}
+}
+
 static void d68000_reset(void)
 {
 	sprintf(g_dasm_str, "reset");
@@ -2524,27 +2861,32 @@ static void d68010_rtd(void)
 {
 	LIMIT_CPU_TYPES(M68010_PLUS);
 	sprintf(g_dasm_str, "rtd     %s; (1+)", get_imm_str_s16());
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OUT);
 }
 
 static void d68000_rte(void)
 {
 	sprintf(g_dasm_str, "rte");
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OUT);
 }
 
 static void d68020_rtm(void)
 {
 	LIMIT_CPU_TYPES(M68020_ONLY);
 	sprintf(g_dasm_str, "rtm     %c%d; (2+)", BIT_3(g_cpu_ir) ? 'A' : 'D', g_cpu_ir&7);
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OUT);
 }
 
 static void d68000_rtr(void)
 {
 	sprintf(g_dasm_str, "rtr");
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OUT);
 }
 
 static void d68000_rts(void)
 {
 	sprintf(g_dasm_str, "rts");
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OUT);
 }
 
 static void d68000_sbcd_rr(void)
@@ -2689,23 +3031,27 @@ static void d68020_trapcc_0(void)
 {
 	LIMIT_CPU_TYPES(M68020_PLUS);
 	sprintf(g_dasm_str, "trap%-2s; (2+)", g_cc[(g_cpu_ir>>8)&0xf]);
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OVER);
 }
 
 static void d68020_trapcc_16(void)
 {
 	LIMIT_CPU_TYPES(M68020_PLUS);
 	sprintf(g_dasm_str, "trap%-2s  %s; (2+)", g_cc[(g_cpu_ir>>8)&0xf], get_imm_str_u16());
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OVER);
 }
 
 static void d68020_trapcc_32(void)
 {
 	LIMIT_CPU_TYPES(M68020_PLUS);
 	sprintf(g_dasm_str, "trap%-2s  %s; (2+)", g_cc[(g_cpu_ir>>8)&0xf], get_imm_str_u32());
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OVER);
 }
 
 static void d68000_trapv(void)
 {
 	sprintf(g_dasm_str, "trapv");
+	SET_OPCODE_FLAGS(DASMFLAG_STEP_OVER);
 }
 
 static void d68000_tst_8(void)
@@ -2807,6 +3153,137 @@ static void d68020_unpk_mm(void)
 }
 
 
+// PFLUSH:  001xxx0xxxxxxxxx
+// PLOAD:   001000x0000xxxxx
+// PVALID1: 0010100000000000
+// PVALID2: 0010110000000xxx
+// PMOVE 1: 010xxxx000000000
+// PMOVE 2: 011xxxx0000xxx00
+// PMOVE 3: 011xxxx000000000
+// PTEST:   100xxxxxxxxxxxxx
+// PFLUSHR:  1010000000000000
+static void d68851_p000(void)
+{
+	char* str;
+	uint modes = read_imm_16();
+
+	// do this after fetching the second PMOVE word so we properly get the 3rd if necessary
+	str = get_ea_mode_str_32(g_cpu_ir);
+
+	if ((modes & 0xfde0) == 0x2000)	// PLOAD
+	{
+		if (modes & 0x0200)
+		{
+	 		sprintf(g_dasm_str, "pload  #%d, %s", (modes>>10)&7, str);
+		}
+		else
+		{
+	 		sprintf(g_dasm_str, "pload  %s, #%d", str, (modes>>10)&7);
+		}
+		return;
+	}
+
+	if ((modes & 0xe200) == 0x2000)	// PFLUSH
+	{
+		sprintf(g_dasm_str, "pflushr %x, %x, %s", modes & 0x1f, (modes>>5)&0xf, str);
+		return;
+	}
+
+	if (modes == 0xa000)	// PFLUSHR
+	{
+		sprintf(g_dasm_str, "pflushr %s", str);
+	}
+
+	if (modes == 0x2800)	// PVALID (FORMAT 1)
+	{
+		sprintf(g_dasm_str, "pvalid VAL, %s", str);
+		return;
+	}
+
+	if ((modes & 0xfff8) == 0x2c00)	// PVALID (FORMAT 2)
+	{
+		sprintf(g_dasm_str, "pvalid A%d, %s", modes & 0xf, str);
+		return;
+	}
+
+	if ((modes & 0xe000) == 0x8000)	// PTEST
+	{
+		sprintf(g_dasm_str, "ptest #%d, %s", modes & 0x1f, str);
+		return;
+	}
+
+	switch ((modes>>13) & 0x7)
+	{
+		case 0:	// MC68030/040 form with FD bit
+		case 2:	// MC68881 form, FD never set
+			if (modes & 0x0100)
+			{
+				if (modes & 0x0200)
+				{
+			 		sprintf(g_dasm_str, "pmovefd  %s, %s", g_mmuregs[(modes>>10)&7], str);
+				}
+				else
+				{
+			 		sprintf(g_dasm_str, "pmovefd  %s, %s", str, g_mmuregs[(modes>>10)&7]);
+				}
+			}
+			else
+			{
+				if (modes & 0x0200)
+				{
+			 		sprintf(g_dasm_str, "pmove  %s, %s", g_mmuregs[(modes>>10)&7], str);
+				}
+				else
+				{
+			 		sprintf(g_dasm_str, "pmove  %s, %s", str, g_mmuregs[(modes>>10)&7]);
+				}
+			}
+			break;
+
+		case 3:	// MC68030 to/from status reg
+			if (modes & 0x0200)
+			{
+		 		sprintf(g_dasm_str, "pmove  mmusr, %s", str);
+			}
+			else
+			{
+		 		sprintf(g_dasm_str, "pmove  %s, mmusr", str);
+			}
+			break;
+
+		default:
+			sprintf(g_dasm_str, "pmove [unknown form] %s", str);
+			break;
+	}
+}
+
+static void d68851_pbcc16(void)
+{
+	uint32 temp_pc = g_cpu_pc;
+
+	sprintf(g_dasm_str, "pb%s %x", g_mmucond[g_cpu_ir&0xf], temp_pc + make_int_16(read_imm_16()));
+}
+
+static void d68851_pbcc32(void)
+{
+	uint32 temp_pc = g_cpu_pc;
+
+	sprintf(g_dasm_str, "pb%s %x", g_mmucond[g_cpu_ir&0xf], temp_pc + make_int_32(read_imm_32()));
+}
+
+static void d68851_pdbcc(void)
+{
+	uint32 temp_pc = g_cpu_pc;
+	uint16 modes = read_imm_16();
+
+	sprintf(g_dasm_str, "pb%s %x", g_mmucond[modes&0xf], temp_pc + make_int_16(read_imm_16()));
+}
+
+// PScc:  0000000000xxxxxx
+static void d68851_p001(void)
+{
+	sprintf(g_dasm_str, "MMU 001 group");
+}
 
 /* ======================================================================== */
 /* ======================= INSTRUCTION TABLE BUILDER ====================== */
@@ -2827,9 +3304,9 @@ static void d68020_unpk_mm(void)
   1 = pc idx
 */
 
-static opcode_struct g_opcode_info[] =
+static const opcode_struct g_opcode_info[] =
 {
-/*  opcode handler    mask    match   ea mask */
+/*  opcode handler             mask    match   ea mask */
 	{d68000_1010         , 0xf000, 0xa000, 0x000},
 	{d68000_1111         , 0xf000, 0xf000, 0x000},
 	{d68000_abcd_rr      , 0xf1f8, 0xc100, 0x000},
@@ -2967,6 +3444,7 @@ static opcode_struct g_opcode_info[] =
 	{d68020_extb_32      , 0xfff8, 0x49c0, 0x000},
 	{d68000_ext_16       , 0xfff8, 0x4880, 0x000},
 	{d68000_ext_32       , 0xfff8, 0x48c0, 0x000},
+	{d68040_fpu          , 0xffc0, 0xf200, 0x000},
 	{d68000_illegal      , 0xffff, 0x4afc, 0x000},
 	{d68000_jmp          , 0xffc0, 0x4ec0, 0x27b},
 	{d68000_jsr          , 0xffc0, 0x4e80, 0x27b},
@@ -3046,6 +3524,7 @@ static opcode_struct g_opcode_info[] =
 	{d68020_pack_rr      , 0xf1f8, 0x8140, 0x000},
 	{d68020_pack_mm      , 0xf1f8, 0x8148, 0x000},
 	{d68000_pea          , 0xffc0, 0x4840, 0x27b},
+	{d68040_pflush       , 0xffe0, 0xf500, 0x000},
 	{d68000_reset        , 0xffff, 0x4e70, 0x000},
 	{d68000_ror_s_8      , 0xf1f8, 0xe018, 0x000},
 	{d68000_ror_s_16     , 0xf1f8, 0xe058, 0x000},
@@ -3128,6 +3607,11 @@ static opcode_struct g_opcode_info[] =
 	{d68000_unlk         , 0xfff8, 0x4e58, 0x000},
 	{d68020_unpk_rr      , 0xf1f8, 0x8180, 0x000},
 	{d68020_unpk_mm      , 0xf1f8, 0x8188, 0x000},
+	{d68851_p000         , 0xffc0, 0xf000, 0x000},
+	{d68851_pbcc16       , 0xffc0, 0xf080, 0x000},
+	{d68851_pbcc32       , 0xffc0, 0xf0c0, 0x000},
+	{d68851_pdbcc        , 0xfff8, 0xf048, 0x000},
+	{d68851_p001         , 0xffc0, 0xf040, 0x000},
 	{0, 0, 0, 0}
 };
 
@@ -3200,19 +3684,17 @@ static void build_opcode_table(void)
 	uint i;
 	uint opcode;
 	opcode_struct* ostruct;
-	uint opcode_info_length = 0;
+	opcode_struct opcode_info[ARRAY_LENGTH(g_opcode_info)];
 
-	for(ostruct = g_opcode_info;ostruct->opcode_handler != 0;ostruct++)
-		opcode_info_length++;
-
-	qsort((void *)g_opcode_info, opcode_info_length, sizeof(g_opcode_info[0]), compare_nof_true_bits);
+	memcpy(opcode_info, g_opcode_info, sizeof(g_opcode_info));
+	qsort((void *)opcode_info, ARRAY_LENGTH(opcode_info)-1, sizeof(opcode_info[0]), compare_nof_true_bits);
 
 	for(i=0;i<0x10000;i++)
 	{
 		g_instruction_table[i] = d68000_illegal; /* default to illegal */
 		opcode = i;
 		/* search through opcode info for a match */
-		for(ostruct = g_opcode_info;ostruct->opcode_handler != 0;ostruct++)
+		for(ostruct = opcode_info;ostruct->opcode_handler != 0;ostruct++)
 		{
 			/* match opcode mask and allowed ea modes */
 			if((opcode & ostruct->mask) == ostruct->match)
@@ -3265,11 +3747,14 @@ unsigned int m68k_disassemble(char* str_buff, unsigned int pc, unsigned int cpu_
 			g_cpu_type = TYPE_68020;
 			g_address_mask = 0xffffffff;
 			break;
+		case M68K_CPU_TYPE_68EC030:
 		case M68K_CPU_TYPE_68030:
 			g_cpu_type = TYPE_68030;
 			g_address_mask = 0xffffffff;
 			break;
 		case M68K_CPU_TYPE_68040:
+		case M68K_CPU_TYPE_68EC040:
+		case M68K_CPU_TYPE_68LC040:
 			g_cpu_type = TYPE_68040;
 			g_address_mask = 0xffffffff;
 			break;
@@ -3280,9 +3765,10 @@ unsigned int m68k_disassemble(char* str_buff, unsigned int pc, unsigned int cpu_
 	g_cpu_pc = pc;
 	g_helper_str[0] = 0;
 	g_cpu_ir = read_imm_16();
+	g_opcode_type = 0;
 	g_instruction_table[g_cpu_ir]();
 	sprintf(str_buff, "%s%s", g_dasm_str, g_helper_str);
-	return g_cpu_pc - pc;
+	return COMBINE_OPCODE_FLAGS(g_cpu_pc - pc);
 }
 
 char* m68ki_disassemble_quick(unsigned int pc, unsigned int cpu_type)
@@ -3293,6 +3779,18 @@ char* m68ki_disassemble_quick(unsigned int pc, unsigned int cpu_type)
 	return buff;
 }
 
+unsigned int m68k_disassemble_raw(char* str_buff, unsigned int pc, const unsigned char* opdata, const unsigned char* argdata, unsigned int cpu_type)
+{
+	unsigned int result;
+	(void)argdata;
+
+	g_rawop = opdata;
+	g_rawbasepc = pc;
+	result = m68k_disassemble(str_buff, pc, cpu_type);
+	g_rawop = NULL;
+	return result;
+}
+
 /* Check if the instruction is a valid one */
 unsigned int m68k_is_valid_instruction(unsigned int instruction, unsigned int cpu_type)
 {
@@ -3323,6 +3821,7 @@ unsigned int m68k_is_valid_instruction(unsigned int instruction, unsigned int cp
 				return 0;
 			if(g_instruction_table[instruction] == d68010_rtd)
 				return 0;
+			// Fallthrough
 		case M68K_CPU_TYPE_68010:
 			if(g_instruction_table[instruction] == d68020_bcc_32)
 				return 0;
@@ -3444,9 +3943,11 @@ unsigned int m68k_is_valid_instruction(unsigned int instruction, unsigned int cp
 				return 0;
 			if(g_instruction_table[instruction] == d68020_unpk_mm)
 				return 0;
+			// Fallthrough
 		case M68K_CPU_TYPE_68EC020:
 		case M68K_CPU_TYPE_68020:
 		case M68K_CPU_TYPE_68030:
+		case M68K_CPU_TYPE_68EC030:
 			if(g_instruction_table[instruction] == d68040_cinv)
 				return 0;
 			if(g_instruction_table[instruction] == d68040_cpush)
@@ -3461,6 +3962,32 @@ unsigned int m68k_is_valid_instruction(unsigned int instruction, unsigned int cp
 				return 0;
 			if(g_instruction_table[instruction] == d68040_move16_al_ai)
 				return 0;
+			// Fallthrough
+		case M68K_CPU_TYPE_68040:
+		case M68K_CPU_TYPE_68EC040:
+		case M68K_CPU_TYPE_68LC040:
+			if(g_instruction_table[instruction] == d68020_cpbcc_16)
+				return 0;
+			if(g_instruction_table[instruction] == d68020_cpbcc_32)
+				return 0;
+			if(g_instruction_table[instruction] == d68020_cpdbcc)
+				return 0;
+			if(g_instruction_table[instruction] == d68020_cpgen)
+				return 0;
+			if(g_instruction_table[instruction] == d68020_cprestore)
+				return 0;
+			if(g_instruction_table[instruction] == d68020_cpsave)
+				return 0;
+			if(g_instruction_table[instruction] == d68020_cpscc)
+				return 0;
+			if(g_instruction_table[instruction] == d68020_cptrapcc_0)
+				return 0;
+			if(g_instruction_table[instruction] == d68020_cptrapcc_16)
+				return 0;
+			if(g_instruction_table[instruction] == d68020_cptrapcc_32)
+				return 0;
+			if(g_instruction_table[instruction] == d68040_pflush)
+				return 0;
 	}
 	if(cpu_type != M68K_CPU_TYPE_68020 && cpu_type != M68K_CPU_TYPE_68EC020 &&
 	  (g_instruction_table[instruction] == d68020_callm ||
@@ -3470,7 +3997,7 @@ unsigned int m68k_is_valid_instruction(unsigned int instruction, unsigned int cp
 	return 1;
 }
 
-
+// f028 2215 0008
 
 /* ======================================================================== */
 /* ============================== END OF FILE ============================= */
diff --git a/plat/linux68k/emu/musashi/m68kfpu.c b/plat/linux68k/emu/musashi/m68kfpu.c
new file mode 100644
index 000000000..46070f56c
--- /dev/null
+++ b/plat/linux68k/emu/musashi/m68kfpu.c
@@ -0,0 +1,1819 @@
+#include <math.h>
+#include <stdio.h>
+#include <stdarg.h>
+
+
+extern void exit(int);
+
+static void fatalerror(char *format, ...) {
+  va_list ap;
+  va_start(ap,format);
+  vfprintf(stderr,format,ap);  // JFF: fixed. Was using fprintf and arguments were wrong
+  va_end(ap);
+  exit(1);
+}
+
+#define FPCC_N			0x08000000
+#define FPCC_Z			0x04000000
+#define FPCC_I			0x02000000
+#define FPCC_NAN		0x01000000
+
+#define DOUBLE_INFINITY					(unsigned long long)(0x7ff0000000000000)
+#define DOUBLE_EXPONENT					(unsigned long long)(0x7ff0000000000000)
+#define DOUBLE_MANTISSA					(unsigned long long)(0x000fffffffffffff)
+
+extern flag floatx80_is_nan( floatx80 a );
+
+// masks for packed dwords, positive k-factor
+static uint32 pkmask2[18] =
+{
+ 0xffffffff, 0, 0xf0000000, 0xff000000, 0xfff00000, 0xffff0000,
+ 0xfffff000, 0xffffff00, 0xfffffff0, 0xffffffff,
+ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+ 0xffffffff, 0xffffffff, 0xffffffff
+};
+
+static uint32 pkmask3[18] =
+{
+ 0xffffffff, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0xf0000000, 0xff000000, 0xfff00000, 0xffff0000,
+ 0xfffff000, 0xffffff00, 0xfffffff0, 0xffffffff,
+};
+
+static inline double fx80_to_double(floatx80 fx)
+{
+  uint64 d;
+  double *foo;
+
+  foo = (double *)&d;
+
+  d = floatx80_to_float64(fx);
+
+  return *foo;
+}
+
+static inline floatx80 double_to_fx80(double in)
+{
+  uint64 *d;
+
+  d = (uint64 *)&in;
+
+  return float64_to_floatx80(*d);
+}
+
+static inline floatx80 load_extended_float80(uint32 ea)
+{
+  uint32 d1,d2;
+	uint16 d3;
+  floatx80 fp;
+
+  d3 = m68ki_read_16(ea);
+  d1 = m68ki_read_32(ea+4);
+  d2 = m68ki_read_32(ea+8);
+
+  fp.high = d3;
+  fp.low = ((uint64)d1<<32) | (d2 & 0xffffffff);
+
+  return fp;
+}
+
+static inline void store_extended_float80(uint32 ea, floatx80 fpr)
+{
+  m68ki_write_16(ea+0, fpr.high);
+  m68ki_write_16(ea+2, 0);
+  m68ki_write_32(ea+4, (fpr.low>>32)&0xffffffff);
+  m68ki_write_32(ea+8, fpr.low&0xffffffff);
+}
+
+static inline floatx80 load_pack_float80(uint32 ea)
+{
+  uint32 dw1, dw2, dw3;
+  floatx80 result;
+  double tmp;
+  char str[128], *ch;
+
+  dw1 = m68ki_read_32(ea);
+  dw2 = m68ki_read_32(ea+4);
+  dw3 = m68ki_read_32(ea+8);
+
+  ch = &str[0];
+  if (dw1 & 0x80000000)	// mantissa sign
+    {
+      *ch++ = '-';
+    }
+  *ch++ = (char)((dw1 & 0xf) + '0');
+  *ch++ = '.';
+  *ch++ = (char)(((dw2 >> 28) & 0xf) + '0');
+  *ch++ = (char)(((dw2 >> 24) & 0xf) + '0');
+  *ch++ = (char)(((dw2 >> 20) & 0xf) + '0');
+  *ch++ = (char)(((dw2 >> 16) & 0xf) + '0');
+  *ch++ = (char)(((dw2 >> 12) & 0xf) + '0');
+  *ch++ = (char)(((dw2 >> 8)  & 0xf) + '0');
+  *ch++ = (char)(((dw2 >> 4)  & 0xf) + '0');
+  *ch++ = (char)(((dw2 >> 0)  & 0xf) + '0');
+  *ch++ = (char)(((dw3 >> 28) & 0xf) + '0');
+  *ch++ = (char)(((dw3 >> 24) & 0xf) + '0');
+  *ch++ = (char)(((dw3 >> 20) & 0xf) + '0');
+  *ch++ = (char)(((dw3 >> 16) & 0xf) + '0');
+  *ch++ = (char)(((dw3 >> 12) & 0xf) + '0');
+  *ch++ = (char)(((dw3 >> 8)  & 0xf) + '0');
+  *ch++ = (char)(((dw3 >> 4)  & 0xf) + '0');
+  *ch++ = (char)(((dw3 >> 0)  & 0xf) + '0');
+  *ch++ = 'E';
+  if (dw1 & 0x40000000)	// exponent sign
+    {
+      *ch++ = '-';
+    }
+  *ch++ = (char)(((dw1 >> 24) & 0xf) + '0');
+  *ch++ = (char)(((dw1 >> 20) & 0xf) + '0');
+  *ch++ = (char)(((dw1 >> 16) & 0xf) + '0');
+  *ch = '\0';
+
+  sscanf(str, "%le", &tmp);
+
+  result = double_to_fx80(tmp);
+
+  return result;
+}
+
+static inline void store_pack_float80(uint32 ea, int k, floatx80 fpr)
+{
+  uint32 dw1, dw2, dw3;
+  char str[128], *ch;
+  int i, j, exp;
+
+  dw1 = dw2 = dw3 = 0;
+  ch = &str[0];
+
+  sprintf(str, "%.16e", fx80_to_double(fpr));
+
+  if (*ch == '-')
+    {
+      ch++;
+      dw1 = 0x80000000;
+    }
+
+  if (*ch == '+')
+    {
+      ch++;
+    }
+
+  dw1 |= (*ch++ - '0');
+
+  if (*ch == '.')
+    {
+      ch++;
+    }
+
+  // handle negative k-factor here
+  if ((k <= 0) && (k >= -13))
+    {
+      exp = 0;
+      for (i = 0; i < 3; i++)
+	{
+	  if (ch[18+i] >= '0' && ch[18+i] <= '9')
+	    {
+	      exp = (exp << 4) | (ch[18+i] - '0');
+	    }
+	}
+
+      if (ch[17] == '-')
+	{
+	  exp = -exp;
+	}
+
+      k = -k;
+      // last digit is (k + exponent - 1)
+      k += (exp - 1);
+
+      // round up the last significant mantissa digit
+      if (ch[k+1] >= '5')
+	{
+	  ch[k]++;
+	}
+
+      // zero out the rest of the mantissa digits
+      for (j = (k+1); j < 16; j++)
+	{
+	  ch[j] = '0';
+	}
+
+      // now zero out K to avoid tripping the positive K detection below
+      k = 0;
+    }
+
+  // crack 8 digits of the mantissa
+  for (i = 0; i < 8; i++)
+    {
+      dw2 <<= 4;
+      if (*ch >= '0' && *ch <= '9')
+	{
+	  dw2 |= *ch++ - '0';
+	}
+    }
+
+  // next 8 digits of the mantissa
+  for (i = 0; i < 8; i++)
+    {
+      dw3 <<= 4;
+      if (*ch >= '0' && *ch <= '9')
+	dw3 |= *ch++ - '0';
+    }
+
+  // handle masking if k is positive
+  if (k >= 1)
+    {
+      if (k <= 17)
+	{
+	  dw2 &= pkmask2[k];
+	  dw3 &= pkmask3[k];
+	}
+      else
+	{
+	  dw2 &= pkmask2[17];
+	  dw3 &= pkmask3[17];
+	  //			m68ki_cpu.fpcr |=  (need to set OPERR bit)
+	}
+    }
+
+  // finally, crack the exponent
+  if (*ch == 'e' || *ch == 'E')
+    {
+      ch++;
+      if (*ch == '-')
+	{
+	  ch++;
+	  dw1 |= 0x40000000;
+	}
+
+      if (*ch == '+')
+	{
+	  ch++;
+	}
+
+      j = 0;
+      for (i = 0; i < 3; i++)
+	{
+	  if (*ch >= '0' && *ch <= '9')
+	    {
+	      j = (j << 4) | (*ch++ - '0');
+	    }
+	}
+
+      dw1 |= (j << 16);
+    }
+
+  m68ki_write_32(ea, dw1);
+  m68ki_write_32(ea+4, dw2);
+  m68ki_write_32(ea+8, dw3);
+}
+
+static inline void SET_CONDITION_CODES(floatx80 reg)
+{
+  REG_FPSR &= ~(FPCC_N|FPCC_Z|FPCC_I|FPCC_NAN);
+
+  // sign flag
+  if (reg.high & 0x8000)
+    {
+      REG_FPSR |= FPCC_N;
+    }
+
+  // zero flag
+  if (((reg.high & 0x7fff) == 0) && ((reg.low<<1) == 0))
+    {
+      REG_FPSR |= FPCC_Z;
+    }
+
+  // infinity flag
+  if (((reg.high & 0x7fff) == 0x7fff) && ((reg.low<<1) == 0))
+    {
+      REG_FPSR |= FPCC_I;
+    }
+
+  // NaN flag
+  if (floatx80_is_nan(reg))
+    {
+      REG_FPSR |= FPCC_NAN;
+    }
+}
+
+static inline int TEST_CONDITION(int condition)
+{
+  int n = (REG_FPSR & FPCC_N) != 0;
+  int z = (REG_FPSR & FPCC_Z) != 0;
+  int nan = (REG_FPSR & FPCC_NAN) != 0;
+  int r = 0;
+  switch (condition)
+    {
+      case 0x10:
+      case 0x00:		return 0;					// False
+
+      case 0x11:
+      case 0x01:		return (z);					// Equal
+
+      case 0x12:
+      case 0x02:		return (!(nan || z || n));			// Greater Than
+
+      case 0x13:
+      case 0x03:		return (z || !(nan || n));			// Greater or Equal
+
+      case 0x14:
+      case 0x04:		return (n && !(nan || z));			// Less Than
+
+      case 0x15:
+      case 0x05:		return (z || (n && !nan));			// Less Than or Equal
+
+      case 0x16:
+      case 0x06:		return !nan && !z;
+
+      case 0x17:
+      case 0x07:		return !nan;
+
+      case 0x18:
+      case 0x08:		return nan;
+
+      case 0x19:
+      case 0x09:		return nan || z;
+
+      case 0x1a:
+      case 0x0a:		return (nan || !(n || z));			// Not Less Than or Equal
+
+      case 0x1b:
+      case 0x0b:		return (nan || z || !n);			// Not Less Than
+
+      case 0x1c:
+      case 0x0c:		return (nan || (n && !z));			// Not Greater or Equal Than
+
+      case 0x1d:
+      case 0x0d:		return (nan || z || n);				// Not Greater Than
+
+      case 0x1e:
+      case 0x0e:		return (!z);					// Not Equal
+
+      case 0x1f:
+      case 0x0f:		return 1;					// True
+
+      default:		fatalerror("M68kFPU: test_condition: unhandled condition %02X\n", condition);
+    }
+
+  return r;
+}
+
+static uint8 READ_EA_8(int ea)
+{
+  int mode = (ea >> 3) & 0x7;
+  int reg = (ea & 0x7);
+
+  switch (mode)
+    {
+      case 0:		// Dn
+	{
+	  return REG_D[reg];
+	}
+      case 2: 	// (An)
+	{
+	  uint32 ea = REG_A[reg];
+	  return m68ki_read_8(ea);
+	}
+      case 5:		// (d16, An)
+	{
+	  uint32 ea = EA_AY_DI_8();
+	  return m68ki_read_8(ea);
+	}
+      case 6:		// (An) + (Xn) + d8
+	{
+	  uint32 ea = EA_AY_IX_8();
+	  return m68ki_read_8(ea);
+	}
+      case 7:
+	{
+	  switch (reg)
+	    {
+	      case 0:		// (xxx).W
+		{
+		  uint32 ea = (uint32)OPER_I_16();
+		  return m68ki_read_8(ea);
+		}
+	      case 1:		// (xxx).L
+		{
+		  uint32 d1 = OPER_I_16();
+		  uint32 d2 = OPER_I_16();
+		  uint32 ea = (d1 << 16) | d2;
+		  return m68ki_read_8(ea);
+		}
+	      case 4:		// #<data>
+		{
+		  return  OPER_I_8();
+		}
+	      default:	fatalerror("M68kFPU: READ_EA_8: unhandled mode %d, reg %d at %08X\n", mode, reg, REG_PC);
+	    }
+	  break;
+	}
+      default:	fatalerror("M68kFPU: READ_EA_8: unhandled mode %d, reg %d at %08X\n", mode, reg, REG_PC);
+    }
+
+  return 0;
+}
+
+static uint16 READ_EA_16(int ea)
+{
+  int mode = (ea >> 3) & 0x7;
+  int reg = (ea & 0x7);
+
+  switch (mode)
+    {
+      case 0:		// Dn
+	{
+			return (uint16)(REG_D[reg]);
+	}
+      case 2:		// (An)
+	{
+	  uint32 ea = REG_A[reg];
+	  return m68ki_read_16(ea);
+	}
+      case 5:		// (d16, An)
+	{
+	  uint32 ea = EA_AY_DI_16();
+	  return m68ki_read_16(ea);
+	}
+      case 6:		// (An) + (Xn) + d8
+	{
+	  uint32 ea = EA_AY_IX_16();
+	  return m68ki_read_16(ea);
+	}
+      case 7:
+	{
+	  switch (reg)
+	    {
+	      case 0:		// (xxx).W
+		{
+		  uint32 ea = (uint32)OPER_I_16();
+		  return m68ki_read_16(ea);
+		}
+	      case 1:		// (xxx).L
+		{
+		  uint32 d1 = OPER_I_16();
+		  uint32 d2 = OPER_I_16();
+		  uint32 ea = (d1 << 16) | d2;
+		  return m68ki_read_16(ea);
+		}
+	      case 4:		// #<data>
+		{
+		  return OPER_I_16();
+		}
+
+	      default:	fatalerror("M68kFPU: READ_EA_16: unhandled mode %d, reg %d at %08X\n", mode, reg, REG_PC);
+	    }
+	  break;
+	}
+      default:	fatalerror("M68kFPU: READ_EA_16: unhandled mode %d, reg %d at %08X\n", mode, reg, REG_PC);
+    }
+
+  return 0;
+}
+
+static uint32 READ_EA_32(int ea)
+{
+  int mode = (ea >> 3) & 0x7;
+  int reg = (ea & 0x7);
+
+  switch (mode)
+    {
+      case 0:		// Dn
+	{
+	  return REG_D[reg];
+	}
+      case 2:		// (An)
+	{
+	  uint32 ea = REG_A[reg];
+	  return m68ki_read_32(ea);
+	}
+      case 3:		// (An)+
+	{
+	  uint32 ea = EA_AY_PI_32();
+	  return m68ki_read_32(ea);
+	}
+      case 5:		// (d16, An)
+	{
+	  uint32 ea = EA_AY_DI_32();
+	  return m68ki_read_32(ea);
+	}
+      case 6:		// (An) + (Xn) + d8
+	{
+	  uint32 ea = EA_AY_IX_32();
+	  return m68ki_read_32(ea);
+	}
+      case 7:
+	{
+	  switch (reg)
+	    {
+	      case 0:		// (xxx).W
+		{
+		  uint32 ea = (uint32)OPER_I_16();
+		  return m68ki_read_32(ea);
+		}
+	      case 1:		// (xxx).L
+		{
+		  uint32 d1 = OPER_I_16();
+		  uint32 d2 = OPER_I_16();
+		  uint32 ea = (d1 << 16) | d2;
+		  return m68ki_read_32(ea);
+		}
+	      case 2:		// (d16, PC)
+		{
+		  uint32 ea = EA_PCDI_32();
+		  return m68ki_read_32(ea);
+		}
+	      case 4:		// #<data>
+		{
+		  return  OPER_I_32();
+		}
+	      default:	fatalerror("M68kFPU: READ_EA_32: unhandled mode %d, reg %d at %08X\n", mode, reg, REG_PC);
+	    }
+	  break;
+	}
+      default:	fatalerror("M68kFPU: READ_EA_32: unhandled mode %d, reg %d at %08X\n", mode, reg, REG_PC);
+    }
+  return 0;
+}
+
+static uint64 READ_EA_64(int ea)
+{
+  int mode = (ea >> 3) & 0x7;
+  int reg = (ea & 0x7);
+  uint32 h1, h2;
+
+  switch (mode)
+    {
+      case 2:		// (An)
+	{
+	  uint32 ea = REG_A[reg];
+	  h1 = m68ki_read_32(ea+0);
+	  h2 = m68ki_read_32(ea+4);
+	  return  (uint64)(h1) << 32 | (uint64)(h2);
+	}
+      case 3:		// (An)+
+	{
+	  uint32 ea = REG_A[reg];
+	  REG_A[reg] += 8;
+	  h1 = m68ki_read_32(ea+0);
+	  h2 = m68ki_read_32(ea+4);
+	  return  (uint64)(h1) << 32 | (uint64)(h2);
+	}
+      case 5:		// (d16, An)
+	{
+	  uint32 ea = EA_AY_DI_32();
+	  h1 = m68ki_read_32(ea+0);
+	  h2 = m68ki_read_32(ea+4);
+	  return  (uint64)(h1) << 32 | (uint64)(h2);
+	}
+      case 7:
+	{
+	  switch (reg)
+	    {
+	      case 4:		// #<data>
+		{
+		  h1 = OPER_I_32();
+		  h2 = OPER_I_32();
+		  return  (uint64)(h1) << 32 | (uint64)(h2);
+		}
+	      case 2:		// (d16, PC)
+		{
+		  uint32 ea = EA_PCDI_32();
+		  h1 = m68ki_read_32(ea+0);
+		  h2 = m68ki_read_32(ea+4);
+		  return  (uint64)(h1) << 32 | (uint64)(h2);
+		}
+	      default:	fatalerror("M68kFPU: READ_EA_64: unhandled mode %d, reg %d at %08X\n", mode, reg, REG_PC);
+	    }
+	  break;
+	}
+      default:	fatalerror("M68kFPU: READ_EA_64: unhandled mode %d, reg %d at %08X\n", mode, reg, REG_PC);
+    }
+
+  return 0;
+}
+
+
+static floatx80 READ_EA_FPE(int mode, int reg, uint32 di_mode_ea)
+{
+  floatx80 fpr;
+
+  switch (mode)
+    {
+      case 2:		// (An)
+	{
+	  uint32 ea = REG_A[reg];
+	  fpr = load_extended_float80(ea);
+	  break;
+	}
+
+      case 3:		// (An)+
+	{
+	  uint32 ea = REG_A[reg];
+	  REG_A[reg] += 12;
+	  fpr = load_extended_float80(ea);
+	  break;
+	}
+      case 5:		// (d16, An)  (added by JFF)
+	{
+	  fpr = load_extended_float80(di_mode_ea);
+         
+	  break;
+
+	}
+      case 7:	// extended modes
+	{
+	  switch (reg)
+	    {
+	      case 2:	// (d16, PC)
+		{
+		  uint32 ea = EA_PCDI_32();
+		  fpr = load_extended_float80(ea);
+		}
+		break;
+
+	      case 3:	// (d16,PC,Dx.w)
+		{
+		  uint32 ea = EA_PCIX_32();
+		  fpr = load_extended_float80(ea);
+		}
+		break;
+	      case 4: // immediate (JFF)
+		{
+		  uint32 ea = REG_PC;
+		  fpr = load_extended_float80(ea);
+		  REG_PC += 12;
+		}
+		break;
+	      default:
+		fatalerror("M68kFPU: READ_EA_FPE: unhandled mode %d, reg %d, at %08X\n", mode, reg, REG_PC);
+		break;
+	    }
+	}
+	break;
+
+      default:	fatalerror("M68kFPU: READ_EA_FPE: unhandled mode %d, reg %d, at %08X\n", mode, reg, REG_PC); break;
+    }
+
+  return fpr;
+}
+
+static floatx80 READ_EA_PACK(int ea)
+{
+  floatx80 fpr;
+  int mode = (ea >> 3) & 0x7;
+  int reg = (ea & 0x7);
+
+  switch (mode)
+    {
+      case 2:		// (An)
+	{
+	  uint32 ea = REG_A[reg];
+	  fpr = load_pack_float80(ea);
+	  break;
+	}
+
+      case 3:		// (An)+
+	{
+	  uint32 ea = REG_A[reg];
+	  REG_A[reg] += 12;
+	  fpr = load_pack_float80(ea);
+	  break;
+	}
+
+      case 7:	// extended modes
+	{
+	  switch (reg)
+	    {
+	      case 3:	// (d16,PC,Dx.w)
+		{
+		  uint32 ea = EA_PCIX_32();
+		  fpr = load_pack_float80(ea);
+		}
+		break;
+
+	      default:
+		fatalerror("M68kFPU: READ_EA_PACK: unhandled mode %d, reg %d, at %08X\n", mode, reg, REG_PC);
+		break;
+	    }
+	}
+	break;
+
+      default:	fatalerror("M68kFPU: READ_EA_PACK: unhandled mode %d, reg %d, at %08X\n", mode, reg, REG_PC); break;
+    }
+
+  return fpr;
+}
+
+static void WRITE_EA_8(int ea, uint8 data)
+{
+  int mode = (ea >> 3) & 0x7;
+  int reg = (ea & 0x7);
+
+  switch (mode)
+    {
+      case 0:		// Dn
+	{
+	  REG_D[reg] = data;
+	  break;
+	}
+      case 2:		// (An)
+	{
+	  uint32 ea = REG_A[reg];
+	  m68ki_write_8(ea, data);
+	  break;
+	}
+      case 3:		// (An)+
+	{
+	  uint32 ea = EA_AY_PI_8();
+	  m68ki_write_8(ea, data);
+	  break;
+	}
+      case 4:		// -(An)
+	{
+	  uint32 ea = EA_AY_PD_8();
+	  m68ki_write_8(ea, data);
+	  break;
+	}
+      case 5:		// (d16, An)
+	{
+	  uint32 ea = EA_AY_DI_8();
+	  m68ki_write_8(ea, data);
+	  break;
+	}
+      case 6:		// (An) + (Xn) + d8
+	{
+	  uint32 ea = EA_AY_IX_8();
+	  m68ki_write_8(ea, data);
+	  break;
+	}
+      case 7:
+	{
+	  switch (reg)
+	    {
+	      case 1:		// (xxx).B
+		{
+		  uint32 d1 = OPER_I_16();
+		  uint32 d2 = OPER_I_16();
+		  uint32 ea = (d1 << 16) | d2;
+		  m68ki_write_8(ea, data);
+		  break;
+		}
+	      case 2:		// (d16, PC)
+		{
+		  uint32 ea = EA_PCDI_16();
+		  m68ki_write_8(ea, data);
+		  break;
+		}
+	      default:	fatalerror("M68kFPU: WRITE_EA_8: unhandled mode %d, reg %d at %08X\n", mode, reg, REG_PC);
+	    }
+	  break;
+	}
+      default:	fatalerror("M68kFPU: WRITE_EA_8: unhandled mode %d, reg %d, data %08X at %08X\n", mode, reg, data, REG_PC);
+    }
+}
+
+static void WRITE_EA_16(int ea, uint16 data)
+{
+  int mode = (ea >> 3) & 0x7;
+  int reg = (ea & 0x7);
+
+  switch (mode)
+    {
+      case 0:		// Dn
+	{
+	  REG_D[reg] = data;
+	  break;
+	}
+      case 2:		// (An)
+	{
+	  uint32 ea = REG_A[reg];
+	  m68ki_write_16(ea, data);
+	  break;
+	}
+      case 3:		// (An)+
+	{
+	  uint32 ea = EA_AY_PI_16();
+	  m68ki_write_16(ea, data);
+	  break;
+	}
+      case 4:		// -(An)
+	{
+	  uint32 ea = EA_AY_PD_16();
+	  m68ki_write_16(ea, data);
+	  break;
+	}
+      case 5:		// (d16, An)
+	{
+	  uint32 ea = EA_AY_DI_16();
+	  m68ki_write_16(ea, data);
+	  break;
+	}
+      case 6:		// (An) + (Xn) + d8
+	{
+	  uint32 ea = EA_AY_IX_16();
+	  m68ki_write_16(ea, data);
+	  break;
+	}
+      case 7:
+	{
+	  switch (reg)
+	    {
+	      case 1:		// (xxx).W
+		{
+		  uint32 d1 = OPER_I_16();
+		  uint32 d2 = OPER_I_16();
+		  uint32 ea = (d1 << 16) | d2;
+		  m68ki_write_16(ea, data);
+		  break;
+		}
+	      case 2:		// (d16, PC)
+		{
+		  uint32 ea = EA_PCDI_16();
+		  m68ki_write_16(ea, data);
+		  break;
+		}
+	      default:	fatalerror("M68kFPU: WRITE_EA_16: unhandled mode %d, reg %d at %08X\n", mode, reg, REG_PC);
+	    }
+	  break;
+	}
+      default:	fatalerror("M68kFPU: WRITE_EA_16: unhandled mode %d, reg %d, data %08X at %08X\n", mode, reg, data, REG_PC);
+    }
+}
+
+static void WRITE_EA_32(int ea, uint32 data)
+{
+  int mode = (ea >> 3) & 0x7;
+  int reg = (ea & 0x7);
+
+  switch (mode)
+    {
+      case 0:		// Dn
+	{
+	  REG_D[reg] = data;
+	  break;
+	}
+      case 1:		// An
+	{
+	  REG_A[reg] = data;
+	  break;
+	}
+      case 2:		// (An)
+	{
+	  uint32 ea = REG_A[reg];
+	  m68ki_write_32(ea, data);
+	  break;
+	}
+      case 3:		// (An)+
+	{
+	  uint32 ea = EA_AY_PI_32();
+	  m68ki_write_32(ea, data);
+	  break;
+	}
+      case 4:		// -(An)
+	{
+	  uint32 ea = EA_AY_PD_32();
+	  m68ki_write_32(ea, data);
+	  break;
+	}
+      case 5:		// (d16, An)
+	{
+	  uint32 ea = EA_AY_DI_32();
+	  m68ki_write_32(ea, data);
+	  break;
+	}
+      case 6:		// (An) + (Xn) + d8
+	{
+	  uint32 ea = EA_AY_IX_32();
+	  m68ki_write_32(ea, data);
+	  break;
+	}
+      case 7:
+	{
+	  switch (reg)
+	    {
+	      case 1:		// (xxx).L
+		{
+		  uint32 d1 = OPER_I_16();
+		  uint32 d2 = OPER_I_16();
+		  uint32 ea = (d1 << 16) | d2;
+                  m68ki_write_32(ea, data);
+                  break;
+		}
+	      case 2:		// (d16, PC)
+		{
+		  uint32 ea = EA_PCDI_32();
+                  m68ki_write_32(ea, data);
+                  break;
+		}
+	      default:	fatalerror("M68kFPU: WRITE_EA_32: unhandled mode %d, reg %d at %08X\n", mode, reg, REG_PC);
+	    }
+	  break;
+	}
+      default:	fatalerror("M68kFPU: WRITE_EA_32: unhandled mode %d, reg %d, data %08X at %08X\n", mode, reg, data, REG_PC);
+    }
+}
+
+static void WRITE_EA_64(int ea, uint64 data)
+{
+  int mode = (ea >> 3) & 0x7;
+  int reg = (ea & 0x7);
+
+  switch (mode)
+    {
+      case 2:		// (An)
+	{
+	  uint32 ea = REG_A[reg];
+	  m68ki_write_32(ea, (uint32)(data >> 32));
+	  m68ki_write_32(ea+4, (uint32)(data));
+	  break;
+	}
+      case 4:		// -(An)
+	{
+	  uint32 ea;
+	  REG_A[reg] -= 8;
+	  ea = REG_A[reg];
+	  m68ki_write_32(ea+0, (uint32)(data >> 32));
+	  m68ki_write_32(ea+4, (uint32)(data));
+	  break;
+	}
+      case 5:		// (d16, An)
+	{
+	  uint32 ea = EA_AY_DI_32();
+	  m68ki_write_32(ea+0, (uint32)(data >> 32));
+	  m68ki_write_32(ea+4, (uint32)(data));
+	  break;
+        }
+
+
+      case 7:    // (TBB)
+        {
+          switch (reg)
+            {
+              case 1:		// (xxx).L
+                {
+                  uint32 d1 = OPER_I_16();
+                  uint32 d2 = OPER_I_16();
+                  uint32 ea = (d1 << 16) | d2;
+                  m68ki_write_32(ea+0, (uint32)(data >> 32));
+                  m68ki_write_32(ea+4, (uint32)(data));
+                  break;
+                }
+              case 2:		// (d16, PC)
+                {
+                  uint32 ea = EA_PCDI_32();
+                  m68ki_write_32(ea+0, (uint32)(data >> 32));
+                  m68ki_write_32(ea+4, (uint32)(data));
+                  break;
+                }
+              default:	fatalerror("M68kFPU: WRITE_EA_32: unhandled mode %d, reg %d at %08X\n", mode, reg, REG_PC);
+            }
+          break;
+        }
+
+
+
+
+      default:	fatalerror("M68kFPU: WRITE_EA_64: unhandled mode %d, reg %d, data %08X%08X at %08X\n", mode, reg, (uint32)(data >> 32), (uint32)(data), REG_PC);
+    }
+}
+
+static void WRITE_EA_FPE(int mode, int reg, floatx80 fpr, uint32 di_mode_ea)
+{
+
+
+  switch (mode)
+    {
+      case 2:		// (An)
+	{
+	  uint32 ea;
+	  ea = REG_A[reg];
+	  store_extended_float80(ea, fpr);
+	  break;
+	}
+
+      case 3:		// (An)+
+	{
+	  uint32 ea;
+	  ea = REG_A[reg];
+	  store_extended_float80(ea, fpr);
+	  REG_A[reg] += 12;
+	  break;
+	}
+
+      case 4:		// -(An)
+	{
+	  uint32 ea;
+	  REG_A[reg] -= 12;
+	  ea = REG_A[reg];
+	  store_extended_float80(ea, fpr);
+	  break;
+	}
+      case 5:		// (d16, An)  (added by JFF)
+	{
+	  // EA_AY_DI_32() should not be done here because fmovem would increase
+	  // PC each time, reading incorrect displacement & advancing PC too much
+	  // uint32 ea = EA_AY_DI_32();
+	  store_extended_float80(di_mode_ea, fpr);
+         
+	  break;
+
+	}
+      case 7:
+	{
+	  switch (reg)
+	    {
+	      default:	fatalerror("M68kFPU: WRITE_EA_FPE: unhandled mode %d, reg %d, at %08X\n", mode, reg, REG_PC);
+	    }
+	  break;
+	}
+      default:	fatalerror("M68kFPU: WRITE_EA_FPE: unhandled mode %d, reg %d, at %08X\n", mode, reg, REG_PC);
+    }
+}
+
+static void WRITE_EA_PACK(int ea, int k, floatx80 fpr)
+{
+  int mode = (ea >> 3) & 0x7;
+  int reg = (ea & 0x7);
+
+  switch (mode)
+    {
+      case 2:		// (An)
+	{
+	  uint32 ea;
+	  ea = REG_A[reg];
+	  store_pack_float80(ea, k, fpr);
+	  break;
+	}
+
+      case 3:		// (An)+
+	{
+	  uint32 ea;
+	  ea = REG_A[reg];
+	  store_pack_float80(ea, k, fpr);
+	  REG_A[reg] += 12;
+	  break;
+	}
+
+      case 4:		// -(An)
+	{
+	  uint32 ea;
+	  REG_A[reg] -= 12;
+	  ea = REG_A[reg];
+	  store_pack_float80(ea, k, fpr);
+	  break;
+	}
+
+      case 7:
+	{
+	  switch (reg)
+	    {
+	      default:	fatalerror("M68kFPU: WRITE_EA_PACK: unhandled mode %d, reg %d, at %08X\n", mode, reg, REG_PC);
+	    }
+	}
+	break;
+      default:	fatalerror("M68kFPU: WRITE_EA_PACK: unhandled mode %d, reg %d, at %08X\n", mode, reg, REG_PC);
+    }
+}
+
+
+static void fpgen_rm_reg(uint16 w2)
+{
+  int ea = REG_IR & 0x3f;
+  int rm = (w2 >> 14) & 0x1;
+  int src = (w2 >> 10) & 0x7;
+  int dst = (w2 >>  7) & 0x7;
+  int opmode = w2 & 0x7f;
+  floatx80 source;
+
+  // fmovecr #$f, fp0	f200 5c0f
+
+  if (rm)
+    {
+      switch (src)
+	{
+	  case 0:		// Long-Word Integer
+	    {
+	      sint32 d = READ_EA_32(ea);
+	      source = int32_to_floatx80(d);
+	      break;
+	    }
+	  case 1:		// Single-precision Real
+	    {
+	      uint32 d = READ_EA_32(ea);
+	      source = float32_to_floatx80(d);
+	      break;
+	    }
+	  case 2:		// Extended-precision Real
+	    {
+	      int imode = (ea >> 3) & 0x7;
+	      int reg = (ea & 0x7);
+	      uint32 di_mode_ea = imode == 5 ? (REG_A[reg]+MAKE_INT_16(m68ki_read_imm_16())) : 0;
+	      source = READ_EA_FPE(imode,reg,di_mode_ea);
+	      break;
+	    }
+	  case 3:		// Packed-decimal Real
+	    {
+	      source = READ_EA_PACK(ea);
+	      break;
+	    }
+	  case 4:		// Word Integer
+	    {
+	      sint16 d = READ_EA_16(ea);
+	      source = int32_to_floatx80((sint32)d);
+	      break;
+	    }
+	  case 5:		// Double-precision Real
+	    {
+	      uint64 d = READ_EA_64(ea);
+
+	      source = float64_to_floatx80(d);
+	      break;
+	    }
+	  case 6:		// Byte Integer
+	    {
+	      sint8 d = READ_EA_8(ea);
+	      source = int32_to_floatx80((sint32)d);
+	      break;
+	    }
+	  case 7:		// FMOVECR load from constant ROM
+	    {
+	      switch (w2 & 0x7f)
+		{
+		  case 0x0:	// Pi
+		    source.high = 0x4000;
+		    source.low = U64(0xc90fdaa22168c235);
+		    break;
+
+		  case 0xb:	// log10(2)
+		    source.high = 0x3ffd;
+		    source.low = U64(0x9a209a84fbcff798);
+		    break;
+
+		  case 0xc:	// e
+		    source.high = 0x4000;
+		    source.low = U64(0xadf85458a2bb4a9b);
+		    break;
+
+		  case 0xd:	// log2(e)
+		    source.high = 0x3fff;
+		    source.low = U64(0xb8aa3b295c17f0bc);
+		    break;
+
+		  case 0xe:	// log10(e)
+		    source.high = 0x3ffd;
+		    source.low = U64(0xde5bd8a937287195);
+		    break;
+
+		  case 0xf:	// 0.0
+		    source = int32_to_floatx80((sint32)0);
+		    break;
+
+		  case 0x30:	// ln(2)
+		    source.high = 0x3ffe;
+		    source.low = U64(0xb17217f7d1cf79ac);
+		    break;
+
+		  case 0x31:	// ln(10)
+		    source.high = 0x4000;
+		    source.low = U64(0x935d8dddaaa8ac17);
+		    break;
+
+		  case 0x32:	// 1 (or 100?  manuals are unclear, but 1 would make more sense)
+		    source = int32_to_floatx80((sint32)1);
+		    break;
+
+		  case 0x33:	// 10^1
+		    source = int32_to_floatx80((sint32)10);
+		    break;
+
+		  case 0x34:	// 10^2
+		    source = int32_to_floatx80((sint32)10*10);
+		    break;
+
+		  default:
+		    fatalerror("fmove_rm_reg: unknown constant ROM offset %x at %08x\n", w2&0x7f, REG_PC-4);
+		    break;
+		}
+
+	      // handle it right here, the usual opmode bits aren't valid in the FMOVECR case
+	      REG_FP[dst] = source;	      
+	      SET_CONDITION_CODES(REG_FP[dst]); // JFF when destination is a register, we HAVE to update FPCR
+	      USE_CYCLES(4);
+	      return;
+	    }
+	  default:	fatalerror("fmove_rm_reg: invalid source specifier %x at %08X\n", src, REG_PC-4);
+	}
+    }
+  else
+    {
+      source = REG_FP[src];
+    }
+
+
+
+  switch (opmode)
+    {
+      case 0x00:		// FMOVE
+	{
+	  REG_FP[dst] = source;
+	  SET_CONDITION_CODES(REG_FP[dst]);  // JFF needs update condition codes
+	  USE_CYCLES(4);
+	  break;
+	}
+      case 0x01:		// Fsint
+	{
+	  sint32 temp;
+	  temp = floatx80_to_int32(source);
+	  REG_FP[dst] = int32_to_floatx80(temp);
+	  SET_CONDITION_CODES(REG_FP[dst]);  // JFF needs update condition codes
+	  break;
+	}
+      case 0x03:		// FsintRZ
+	{
+	  sint32 temp;
+	  temp = floatx80_to_int32_round_to_zero(source);
+	  REG_FP[dst] = int32_to_floatx80(temp);
+	  SET_CONDITION_CODES(REG_FP[dst]);  // JFF needs update condition codes
+	  break;
+	}
+      case 0x04:		// FSQRT
+	{
+	  REG_FP[dst] = floatx80_sqrt(source);
+	  SET_CONDITION_CODES(REG_FP[dst]);
+	  USE_CYCLES(109);
+	  break;
+	}
+      case 0x18:		// FABS
+	{
+	  REG_FP[dst] = source;
+	  REG_FP[dst].high &= 0x7fff;
+	  SET_CONDITION_CODES(REG_FP[dst]);
+	  USE_CYCLES(3);
+	  break;
+	}
+      case 0x1a:		// FNEG
+	{
+	  REG_FP[dst] = source;
+	  REG_FP[dst].high ^= 0x8000;
+	  SET_CONDITION_CODES(REG_FP[dst]);
+	  USE_CYCLES(3);
+	  break;
+	}
+      case 0x1e:		// FGETEXP
+	{
+          sint16 temp;
+
+          temp = source.high;	// get the exponent
+          temp -= 0x3fff;	// take off the bias
+          REG_FP[dst] = double_to_fx80((double)temp);
+          SET_CONDITION_CODES(REG_FP[dst]);
+	  USE_CYCLES(6);
+	}
+
+
+      case 0x1f:		// FGETMANT  (TBB)
+        {
+          REG_FP[dst] = source;
+          REG_FP[dst].high &= ~0x7fff;	// clear the bias, keep sign
+          REG_FP[dst].high |= 0x3fff;	// set new bias, 1.0 <= result < 2.0
+          SET_CONDITION_CODES(REG_FP[dst]);
+          //USE_CYCLES(6);
+        }
+
+
+      case 0x60:		// FSDIVS (JFF) (source has already been converted to floatx80)
+      case 0x20:		// FDIV
+	{
+	  REG_FP[dst] = floatx80_div(REG_FP[dst], source);
+	  SET_CONDITION_CODES(REG_FP[dst]); // JFF
+	  USE_CYCLES(43);
+	  break;
+	}
+      case 0x22:		// FADD
+	{
+	  REG_FP[dst] = floatx80_add(REG_FP[dst], source);
+	  SET_CONDITION_CODES(REG_FP[dst]);
+	  USE_CYCLES(9);
+	  break;
+	}
+ 
+      case 0x63:		// FSMULS (JFF) (source has already been converted to floatx80)
+      case 0x23:		// FMUL
+	{
+	  REG_FP[dst] = floatx80_mul(REG_FP[dst], source);
+	  SET_CONDITION_CODES(REG_FP[dst]);
+	  USE_CYCLES(11);  // maybe number of cycles is slightly inferior for FSMULS
+	  break;
+	}
+      case 0x25:		// FREM
+	{
+	  REG_FP[dst] = floatx80_rem(REG_FP[dst], source);
+	  SET_CONDITION_CODES(REG_FP[dst]);
+	  USE_CYCLES(43);	// guess
+	  break;
+	}
+      case 0x28:		// FSUB
+	{
+	  REG_FP[dst] = floatx80_sub(REG_FP[dst], source);
+	  SET_CONDITION_CODES(REG_FP[dst]);
+	  USE_CYCLES(9);
+	  break;
+	}
+      case 0x38:		// FCMP
+	{
+	  floatx80 res;
+	  res = floatx80_sub(REG_FP[dst], source);
+	  SET_CONDITION_CODES(res);
+	  USE_CYCLES(7);
+	  break;
+	}
+      case 0x3a:		// FTST
+	{
+	  floatx80 res;
+	  res = source;
+	  SET_CONDITION_CODES(res);
+	  USE_CYCLES(7);
+	  break;
+	}
+
+      default:	fatalerror("fpgen_rm_reg: unimplemented opmode %02X at %08X\n", opmode, REG_PC-4);
+    }
+}
+
+static void fmove_reg_mem(uint16 w2)
+{
+  int ea = REG_IR & 0x3f;
+  int src = (w2 >>  7) & 0x7;
+  int dst = (w2 >> 10) & 0x7;
+  int k = (w2 & 0x7f);
+
+  switch (dst)
+    {
+      case 0:		// Long-Word Integer
+	{
+	  sint32 d = (sint32)floatx80_to_int32(REG_FP[src]);
+	  WRITE_EA_32(ea, d);
+	  break;
+	}
+      case 1:		// Single-precision Real
+	{
+	  uint32 d = floatx80_to_float32(REG_FP[src]);
+	  WRITE_EA_32(ea, d);
+	  break;
+	}
+      case 2:		// Extended-precision Real
+	{
+	  int mode = (ea >> 3) & 0x7;
+	  int reg = (ea & 0x7);
+	  uint32 di_mode_ea = mode == 5 ? (REG_A[reg]+MAKE_INT_16(m68ki_read_imm_16())) : 0;
+	    		      
+	  WRITE_EA_FPE(mode, reg, REG_FP[src], di_mode_ea);
+	  break;
+	}
+      case 3:		// Packed-decimal Real with Static K-factor
+	{
+	  // sign-extend k
+	  k = (k & 0x40) ? (k | 0xffffff80) : (k & 0x7f);
+	  WRITE_EA_PACK(ea, k, REG_FP[src]);
+	  break;
+	}
+      case 4:		// Word Integer
+	{
+	  WRITE_EA_16(ea, (sint16)floatx80_to_int32(REG_FP[src]));
+	  break;
+	}
+      case 5:		// Double-precision Real
+	{
+	  uint64 d;
+
+	  d = floatx80_to_float64(REG_FP[src]);
+
+	  WRITE_EA_64(ea, d);
+	  break;
+	}
+      case 6:		// Byte Integer
+	{
+	  WRITE_EA_8(ea, (sint8)floatx80_to_int32(REG_FP[src]));
+	  break;
+	}
+      case 7:		// Packed-decimal Real with Dynamic K-factor
+	{
+	  WRITE_EA_PACK(ea, REG_D[k>>4], REG_FP[src]);
+	  break;
+	}
+    }
+
+  USE_CYCLES(12);
+}
+
+static void fmove_fpcr(uint16 w2)
+{
+  int ea = REG_IR & 0x3f;
+  int dir = (w2 >> 13) & 0x1;
+  int reg = (w2 >> 10) & 0x7;
+
+  if (dir)	// From system control reg to <ea>
+    {
+      if (reg & 4) WRITE_EA_32(ea, REG_FPCR);
+      if (reg & 2) WRITE_EA_32(ea, REG_FPSR);
+      if (reg & 1) WRITE_EA_32(ea, REG_FPIAR);
+    }
+  else		// From <ea> to system control reg
+    {
+      if (reg & 4) 
+	{
+	  REG_FPCR = READ_EA_32(ea);
+	  // JFF: need to update rounding mode from softfloat module
+	  float_rounding_mode = (REG_FPCR >> 4) & 0x3;
+	  
+	}
+	  
+      if (reg & 2) REG_FPSR = READ_EA_32(ea);
+      if (reg & 1) REG_FPIAR = READ_EA_32(ea);
+    }
+
+  USE_CYCLES(10);
+}
+
+static void fmovem(uint16 w2)
+{
+  int i;
+  int ea = REG_IR & 0x3f;
+  int dir = (w2 >> 13) & 0x1;
+  int mode = (w2 >> 11) & 0x3;
+  int reglist = w2 & 0xff;
+
+  if (dir)	// From FP regs to mem
+    {
+      /* Mode field: Specifies the type of the register list and addressing mode.
+	 1 & 3 are unsupported right now
+	 1  Dynamic register list, predecrement addressing mode.
+	   3  Dynamic register list, postincrement or control addressing mode.
+       */
+      
+      switch (mode)
+	{
+	  case 2:		// (JFF): Static register list, postincrement or control addressing mode.     
+	    {
+	      int imode = (ea >> 3) & 0x7;
+	      int reg = (ea & 0x7);
+	      int di_mode = imode == 5;	      
+	      
+	      uint32 di_mode_ea = di_mode ? (REG_A[reg]+MAKE_INT_16(m68ki_read_imm_16())) : 0;
+	    		      
+		      
+	      for (i=0; i < 8; i++)
+		{
+		  if (reglist & (1 << i))
+		    {
+		      WRITE_EA_FPE(imode,reg, REG_FP[7-i],di_mode_ea);
+		      USE_CYCLES(2);
+		      if (di_mode)
+			{
+			  di_mode_ea += 12;
+			}
+		      
+		    }
+		}
+	      break;
+	    }
+	  case 0:		// Static register list, predecrement addressing mode
+	    {
+	      int imode = (ea >> 3) & 0x7;
+	      int reg = (ea & 0x7);
+	      // the "di_mode_ea" parameter kludge is required here else WRITE_EA_FPE would have
+	      // to call EA_AY_DI_32() (that advances PC & reads displacement) each time
+	      // when the proper behaviour is 1) read once, 2) increment ea for each matching register
+	      // this forces to pre-read the mode (named "imode") so we can decide to read displacement, only once
+	      int di_mode = imode == 5;	      
+	      uint32 di_mode_ea =  di_mode ? (REG_A[reg]+MAKE_INT_16(m68ki_read_imm_16())) : 0;
+
+	      for (i=0; i < 8; i++)
+		{
+		  if (reglist & (1 << i))
+		    {
+		      WRITE_EA_FPE(imode,reg, REG_FP[i],di_mode_ea);
+		      USE_CYCLES(2);
+		      if (di_mode)
+			{
+			  di_mode_ea += 12;
+			}
+		    }
+		}
+	      break;
+	    }
+
+	  default:	fatalerror("040fpu0: FMOVEM: mode %d unimplemented at %08X\n", mode, REG_PC-4);
+	}
+    }
+  else		// From mem to FP regs
+    {
+      switch (mode)
+	{
+	  case 2:		// Static register list, postincrement addressing mode
+	    {
+	      int imode = (ea >> 3) & 0x7;
+	      int reg = (ea & 0x7);
+	      int di_mode = imode == 5;	    
+	      uint32 di_mode_ea = di_mode ? (REG_A[reg]+MAKE_INT_16(m68ki_read_imm_16())) : 0;
+	      
+	      for (i=0; i < 8; i++)
+		{
+		  if (reglist & (1 << i))
+		    {
+		      REG_FP[7-i] = READ_EA_FPE(imode,reg,di_mode_ea);
+		      USE_CYCLES(2);
+		      if (di_mode)
+			{
+			  di_mode_ea += 12;
+			}
+		      
+		    }
+		}
+	      break;
+	    }
+
+	  default:	fatalerror("040fpu0: FMOVEM: mode %d unimplemented at %08X\n", mode, REG_PC-4);
+	}
+    }
+}
+
+
+static void fscc()
+{
+  // added by JFF, this seems to work properly now 
+  int condition = OPER_I_16() & 0x3f;
+
+  int cc = TEST_CONDITION(condition);
+  
+  if ((REG_IR & 0x38) == 0)
+    {
+      // If the specified floating-point condition is true, sets the byte integer operand at
+      // the destination to TRUE (all ones); otherwise, sets the byte to FALSE (all zeros).
+      
+      REG_D[REG_IR & 7] = (REG_D[REG_IR & 7] & 0xFFFFFF00) | (cc ? 0xff : 0x00);
+    }
+  else
+    {
+      // unimplemented
+      fatalerror("040fpu0: fscc: memory not implemented at %08X\n", REG_PC-4);
+    }
+  USE_CYCLES(7);  // JFF unsure of the number of cycles!!
+}
+
+static void fbcc16(void)
+{
+  sint32 offset;
+  int condition = REG_IR & 0x3f;
+
+  offset = (sint16)(OPER_I_16());
+
+  // TODO: condition and jump!!!
+  if (TEST_CONDITION(condition))
+    {
+      m68ki_trace_t0();			   /* auto-disable (see m68kcpu.h) */
+      m68ki_branch_16(offset-2);
+    }
+
+  USE_CYCLES(7);
+}
+
+static void fbcc32(void)
+{
+  sint32 offset;
+  int condition = REG_IR & 0x3f;
+
+  offset = OPER_I_32();
+
+  // TODO: condition and jump!!!
+  if (TEST_CONDITION(condition))
+    {
+      m68ki_trace_t0();			   /* auto-disable (see m68kcpu.h) */
+      m68ki_branch_32(offset-4);
+    }
+
+  USE_CYCLES(7);
+}
+
+
+void m68040_fpu_op0()
+{
+  m68ki_cpu.fpu_just_reset = 0;
+
+  switch ((REG_IR >> 6) & 0x3)
+    {
+      case 0:
+	{
+   	uint16 w2 = OPER_I_16();
+	  switch ((w2 >> 13) & 0x7)
+	    {
+	      case 0x0:	// FPU ALU FP, FP
+	      case 0x2:	// FPU ALU ea, FP
+		{
+		  fpgen_rm_reg(w2);
+		  break;
+		}
+
+	      case 0x3:	// FMOVE FP, ea
+		{
+		  fmove_reg_mem(w2);
+		  break;
+		}
+
+	      case 0x4:	// FMOVEM ea, FPCR
+	      case 0x5:	// FMOVEM FPCR, ea
+		{
+		  fmove_fpcr(w2);
+		  break;
+		}
+
+	      case 0x6:	// FMOVEM ea, list
+	      case 0x7:	// FMOVEM list, ea
+		{
+		  fmovem(w2);
+		  break;
+		}
+
+	      default:	fatalerror("M68kFPU: unimplemented subop %d at %08X\n", (w2 >> 13) & 0x7, REG_PC-4);
+	    }
+	  break;
+	}
+
+      case 1:           // FScc (JFF)
+	{
+	  fscc();
+	  break;
+	}
+      case 2:		// FBcc disp16
+	{
+	  fbcc16();
+	  break;
+	}
+      case 3:		// FBcc disp32
+	{
+	  fbcc32();
+	  break;
+	}
+
+      default:	fatalerror("M68kFPU: unimplemented main op %d at %08X\n", (m68ki_cpu.ir >> 6) & 0x3,  REG_PC-4);
+    }
+}
+
+static void perform_fsave(uint32 addr, int inc)
+{
+  if (inc)
+    {
+      // 68881 IDLE, version 0x1f
+      m68ki_write_32(addr, 0x1f180000);
+      m68ki_write_32(addr+4, 0);
+      m68ki_write_32(addr+8, 0);
+      m68ki_write_32(addr+12, 0);
+      m68ki_write_32(addr+16, 0);
+      m68ki_write_32(addr+20, 0);
+      m68ki_write_32(addr+24, 0x70000000);
+    }
+  else
+    {
+      m68ki_write_32(addr, 0x70000000);
+      m68ki_write_32(addr-4, 0);
+      m68ki_write_32(addr-8, 0);
+      m68ki_write_32(addr-12, 0);
+      m68ki_write_32(addr-16, 0);
+      m68ki_write_32(addr-20, 0);
+      m68ki_write_32(addr-24, 0x1f180000);
+    }
+}
+
+// FRESTORE on a NULL frame reboots the FPU - all registers to NaN, the 3 status regs to 0
+static void do_frestore_null()
+{
+  int i;
+
+  REG_FPCR = 0;
+  REG_FPSR = 0;
+  REG_FPIAR = 0;
+  for (i = 0; i < 8; i++)
+    {
+      REG_FP[i].high = 0x7fff;
+      REG_FP[i].low = U64(0xffffffffffffffff);
+    }
+
+  // Mac IIci at 408458e6 wants an FSAVE of a just-restored NULL frame to also be NULL
+  // The PRM says it's possible to generate a NULL frame, but not how/when/why.  (need the 68881/68882 manual!)
+  m68ki_cpu.fpu_just_reset = 1;
+}
+
+void m68040_fpu_op1()
+{
+  int ea = REG_IR & 0x3f;
+  int mode = (ea >> 3) & 0x7;
+  int reg = (ea & 0x7);
+  uint32 addr, temp;
+
+  switch ((REG_IR >> 6) & 0x3)
+    {
+      case 0:		// FSAVE <ea>
+	{
+	  switch (mode)
+	    {
+	      case 3:	// (An)+
+		addr = EA_AY_PI_32();
+
+		if (m68ki_cpu.fpu_just_reset)
+		  {
+		    m68ki_write_32(addr, 0);
+		  }
+		else
+		  {
+		    // we normally generate an IDLE frame
+		    REG_A[reg] += 6*4;
+		    perform_fsave(addr, 1);
+		  }
+		break;
+
+	      case 4: // -(An)
+		addr = EA_AY_PD_32();
+
+		if (m68ki_cpu.fpu_just_reset)
+		  {
+		    m68ki_write_32(addr, 0);
+		  }
+		else
+		  {
+		    // we normally generate an IDLE frame
+		    REG_A[reg] -= 6*4;
+		    perform_fsave(addr, 0);
+		  }
+		break;
+
+	      default:
+		fatalerror("M68kFPU: FSAVE unhandled mode %d reg %d at %x\n", mode, reg, REG_PC);
+	    }
+	  break;
+	}
+	break;
+
+      case 1:		// FRESTORE <ea>
+	{
+	  switch (mode)
+	    {
+	      case 2: // (An)
+		addr = REG_A[reg];
+		temp = m68ki_read_32(addr);
+
+		// check for NULL frame
+		if (temp & 0xff000000)
+		  {
+		    // we don't handle non-NULL frames and there's no pre/post inc/dec to do here
+		    m68ki_cpu.fpu_just_reset = 0;
+		  }
+		else
+		  {
+		    do_frestore_null();
+		  }
+		break;
+
+	      case 3:	// (An)+
+		addr = EA_AY_PI_32();
+		temp = m68ki_read_32(addr);
+
+		// check for NULL frame
+		if (temp & 0xff000000)
+		  {
+		    m68ki_cpu.fpu_just_reset = 0;
+
+		    // how about an IDLE frame?
+		    if ((temp & 0x00ff0000) == 0x00180000)
+		      {
+			REG_A[reg] += 6*4;
+		      } // check UNIMP
+		    else if ((temp & 0x00ff0000) == 0x00380000)
+		      {
+			REG_A[reg] += 14*4;
+		      } // check BUSY
+		    else if ((temp & 0x00ff0000) == 0x00b40000)
+		      {
+			REG_A[reg] += 45*4;
+		      }
+		  }
+		else
+		  {
+		    do_frestore_null();
+		  }
+		break;
+
+	      default:
+		fatalerror("M68kFPU: FRESTORE unhandled mode %d reg %d at %x\n", mode, reg, REG_PC);
+	    }
+	  break;
+	}
+	break;
+
+      default:	fatalerror("m68040_fpu_op1: unimplemented op %d at %08X\n", (REG_IR >> 6) & 0x3, REG_PC-2);
+    }
+}
+
+
+
diff --git a/plat/linux68k/emu/musashi/m68kmake.c b/plat/linux68k/emu/musashi/m68kmake.c
index a3fb0d9d4..0b1d95155 100755
--- a/plat/linux68k/emu/musashi/m68kmake.c
+++ b/plat/linux68k/emu/musashi/m68kmake.c
@@ -3,10 +3,11 @@
 /* ======================================================================== */
 /*
  *                                  MUSASHI
- *                                Version 3.4
+ *                                Version 4.60
  *
  * A portable Motorola M680x0 processor emulation engine.
- * Copyright 1998-2001 Karl Stenerud.  All rights reserved.
+ * Copyright Karl Stenerud.  All rights reserved.
+ * FPU and MMU by R. Belmont.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -56,7 +57,7 @@
  */
 
 
-char* g_version = "3.3";
+static const char g_version[] = "4.60";
 
 /* ======================================================================== */
 /* =============================== INCLUDES =============================== */
@@ -77,7 +78,6 @@ char* g_version = "3.3";
 #define M68K_MAX_PATH 1024
 #define M68K_MAX_DIR  1024
 
-#define NUM_CPUS                          3	/* 000, 010, 020 */
 #define MAX_LINE_LENGTH                 200	/* length of 1 line */
 #define MAX_BODY_LENGTH                 300	/* Number of lines in 1 function */
 #define MAX_REPLACE_LENGTH               30	/* Max number of replace strings */
@@ -93,9 +93,6 @@ char* g_version = "3.3";
 #define FILENAME_INPUT      "m68k_in.c"
 #define FILENAME_PROTOTYPE  "m68kops.h"
 #define FILENAME_TABLE      "m68kops.c"
-#define FILENAME_OPS_AC     "m68kopac.c"
-#define FILENAME_OPS_DM     "m68kopdm.c"
-#define FILENAME_OPS_NZ     "m68kopnz.c"
 
 
 /* Identifier sequences recognized by this program */
@@ -135,9 +132,14 @@ char* g_version = "3.3";
 /* ============================== PROTOTYPES ============================== */
 /* ======================================================================== */
 
-#define CPU_TYPE_000 0
-#define CPU_TYPE_010 1
-#define CPU_TYPE_020 2
+enum {
+    CPU_TYPE_000=0,
+    CPU_TYPE_010,
+    CPU_TYPE_020,
+    CPU_TYPE_030,
+    CPU_TYPE_040,
+    NUM_CPUS
+};
 
 #define UNSPECIFIED "."
 #define UNSPECIFIED_CH '.'
@@ -185,7 +187,7 @@ typedef struct
 	char ea_allowed[EA_ALLOWED_LENGTH];   /* Effective addressing modes allowed */
 	char cpu_mode[NUM_CPUS];              /* User or supervisor mode */
 	char cpus[NUM_CPUS+1];                /* Allowed CPUs */
-	unsigned char cycles[NUM_CPUS];       /* cycles for 000, 010, 020 */
+	unsigned char cycles[NUM_CPUS];       /* cycles for 000, 010, 020, 030, 040 */
 } opcode_struct;
 
 
@@ -231,7 +233,6 @@ int extract_opcode_info(char* src, char* name, int* size, char* spec_proc, char*
 void add_replace_string(replace_struct* replace, char* search_str, char* replace_str);
 void write_body(FILE* filep, body_struct* body, replace_struct* replace);
 void get_base_name(char* base_name, opcode_struct* op);
-void write_prototype(FILE* filep, char* base_name);
 void write_function_name(FILE* filep, char* base_name);
 void add_opcode_output_table_entry(opcode_struct* op, char* name);
 static int DECL_SPEC compare_nof_true_bits(const void* aptr, const void* bptr);
@@ -241,7 +242,7 @@ void set_opcode_struct(opcode_struct* src, opcode_struct* dst, int ea_mode);
 void generate_opcode_handler(FILE* filep, body_struct* body, replace_struct* replace, opcode_struct* opinfo, int ea_mode);
 void generate_opcode_ea_variants(FILE* filep, body_struct* body, replace_struct* replace, opcode_struct* op);
 void generate_opcode_cc_variants(FILE* filep, body_struct* body, replace_struct* replace, opcode_struct* op_in, int offset);
-void process_opcode_handlers(void);
+void process_opcode_handlers(FILE* filep);
 void populate_table(void);
 void read_insert(char* insert);
 
@@ -258,9 +259,6 @@ char g_input_filename[M68K_MAX_PATH] = FILENAME_INPUT;
 FILE* g_input_file = NULL;
 FILE* g_prototype_file = NULL;
 FILE* g_table_file = NULL;
-FILE* g_ops_ac_file = NULL;
-FILE* g_ops_dm_file = NULL;
-FILE* g_ops_nz_file = NULL;
 
 int g_num_functions = 0;  /* Number of functions processed */
 int g_num_primitives = 0; /* Number of function primitives read */
@@ -272,7 +270,7 @@ opcode_struct g_opcode_input_table[MAX_OPCODE_INPUT_TABLE_LENGTH];
 opcode_struct g_opcode_output_table[MAX_OPCODE_OUTPUT_TABLE_LENGTH];
 int g_opcode_output_table_length = 0;
 
-ea_info_struct g_ea_info_table[13] =
+const ea_info_struct g_ea_info_table[13] =
 {/* fname    ea        mask  match */
 	{"",     "",       0x00, 0x00}, /* EA_MODE_NONE */
 	{"ai",   "AY_AI",  0x38, 0x10}, /* EA_MODE_AI   */
@@ -290,7 +288,7 @@ ea_info_struct g_ea_info_table[13] =
 };
 
 
-char* g_cc_table[16][2] =
+const char *const g_cc_table[16][2] =
 {
 	{ "t",  "T"}, /* 0000 */
 	{ "f",  "F"}, /* 0001 */
@@ -311,7 +309,7 @@ char* g_cc_table[16][2] =
 };
 
 /* size to index translator (0 -> 0, 8 and 16 -> 1, 32 -> 2) */
-int g_size_select_table[33] =
+const int g_size_select_table[33] =
 {
 	0,												/* unsized */
 	0, 0, 0, 0, 0, 0, 0, 1,							/*    8    */
@@ -320,25 +318,26 @@ int g_size_select_table[33] =
 };
 
 /* Extra cycles required for certain EA modes */
-int g_ea_cycle_table[13][NUM_CPUS][3] =
-{/*       000           010           020   */
-	{{ 0,  0,  0}, { 0,  0,  0}, { 0,  0,  0}}, /* EA_MODE_NONE */
-	{{ 0,  4,  8}, { 0,  4,  8}, { 0,  4,  4}}, /* EA_MODE_AI   */
-	{{ 0,  4,  8}, { 0,  4,  8}, { 0,  4,  4}}, /* EA_MODE_PI   */
-	{{ 0,  4,  8}, { 0,  4,  8}, { 0,  4,  4}}, /* EA_MODE_PI7  */
-	{{ 0,  6, 10}, { 0,  6, 10}, { 0,  5,  5}}, /* EA_MODE_PD   */
-	{{ 0,  6, 10}, { 0,  6, 10}, { 0,  5,  5}}, /* EA_MODE_PD7  */
-	{{ 0,  8, 12}, { 0,  8, 12}, { 0,  5,  5}}, /* EA_MODE_DI   */
-	{{ 0, 10, 14}, { 0, 10, 14}, { 0,  7,  7}}, /* EA_MODE_IX   */
-	{{ 0,  8, 12}, { 0,  8, 12}, { 0,  4,  4}}, /* EA_MODE_AW   */
-	{{ 0, 12, 16}, { 0, 12, 16}, { 0,  4,  4}}, /* EA_MODE_AL   */
-	{{ 0,  8, 12}, { 0,  8, 12}, { 0,  5,  5}}, /* EA_MODE_PCDI */
-	{{ 0, 10, 14}, { 0, 10, 14}, { 0,  7,  7}}, /* EA_MODE_PCIX */
-	{{ 0,  4,  8}, { 0,  4,  8}, { 0,  2,  4}}, /* EA_MODE_I    */
+/* TODO: correct timings for 030, 040 */
+const int g_ea_cycle_table[13][NUM_CPUS][3] =
+{/*       000           010           020           030           040  */
+	{{ 0,  0,  0}, { 0,  0,  0}, { 0,  0,  0}, { 0,  0,  0}, { 0,  0,  0}}, /* EA_MODE_NONE */
+	{{ 0,  4,  8}, { 0,  4,  8}, { 0,  4,  4}, { 0,  4,  4}, { 0,  4,  4}}, /* EA_MODE_AI   */
+	{{ 0,  4,  8}, { 0,  4,  8}, { 0,  4,  4}, { 0,  4,  4}, { 0,  4,  4}}, /* EA_MODE_PI   */
+	{{ 0,  4,  8}, { 0,  4,  8}, { 0,  4,  4}, { 0,  4,  4}, { 0,  4,  4}}, /* EA_MODE_PI7  */
+	{{ 0,  6, 10}, { 0,  6, 10}, { 0,  5,  5}, { 0,  5,  5}, { 0,  5,  5}}, /* EA_MODE_PD   */
+	{{ 0,  6, 10}, { 0,  6, 10}, { 0,  5,  5}, { 0,  5,  5}, { 0,  5,  5}}, /* EA_MODE_PD7  */
+	{{ 0,  8, 12}, { 0,  8, 12}, { 0,  5,  5}, { 0,  5,  5}, { 0,  5,  5}}, /* EA_MODE_DI   */
+	{{ 0, 10, 14}, { 0, 10, 14}, { 0,  7,  7}, { 0,  7,  7}, { 0,  7,  7}}, /* EA_MODE_IX   */
+	{{ 0,  8, 12}, { 0,  8, 12}, { 0,  4,  4}, { 0,  4,  4}, { 0,  4,  4}}, /* EA_MODE_AW   */
+	{{ 0, 12, 16}, { 0, 12, 16}, { 0,  4,  4}, { 0,  4,  4}, { 0,  4,  4}}, /* EA_MODE_AL   */
+	{{ 0,  8, 12}, { 0,  8, 12}, { 0,  5,  5}, { 0,  5,  5}, { 0,  5,  5}}, /* EA_MODE_PCDI */
+	{{ 0, 10, 14}, { 0, 10, 14}, { 0,  7,  7}, { 0,  7,  7}, { 0,  7,  7}}, /* EA_MODE_PCIX */
+	{{ 0,  4,  8}, { 0,  4,  8}, { 0,  2,  4}, { 0,  2,  4}, { 0,  2,  4}}, /* EA_MODE_I    */
 };
 
 /* Extra cycles for JMP instruction (000, 010) */
-int g_jmp_cycle_table[13] =
+const int g_jmp_cycle_table[13] =
 {
 	 0, /* EA_MODE_NONE */
 	 4, /* EA_MODE_AI   */
@@ -356,7 +355,7 @@ int g_jmp_cycle_table[13] =
 };
 
 /* Extra cycles for JSR instruction (000, 010) */
-int g_jsr_cycle_table[13] =
+const int g_jsr_cycle_table[13] =
 {
 	 0, /* EA_MODE_NONE */
 	 4, /* EA_MODE_AI   */
@@ -374,7 +373,7 @@ int g_jsr_cycle_table[13] =
 };
 
 /* Extra cycles for LEA instruction (000, 010) */
-int g_lea_cycle_table[13] =
+const int g_lea_cycle_table[13] =
 {
 	 0, /* EA_MODE_NONE */
 	 4, /* EA_MODE_AI   */
@@ -392,7 +391,7 @@ int g_lea_cycle_table[13] =
 };
 
 /* Extra cycles for PEA instruction (000, 010) */
-int g_pea_cycle_table[13] =
+const int g_pea_cycle_table[13] =
 {
 	 0, /* EA_MODE_NONE */
 	 6, /* EA_MODE_AI   */
@@ -409,8 +408,26 @@ int g_pea_cycle_table[13] =
 	 0, /* EA_MODE_I    */
 };
 
+/* Extra cycles for MOVEM instruction (000, 010) */
+const int g_movem_cycle_table[13] =
+{
+	 0, /* EA_MODE_NONE */
+	 0, /* EA_MODE_AI   */
+	 0, /* EA_MODE_PI   */
+	 0, /* EA_MODE_PI7  */
+	 0, /* EA_MODE_PD   */
+	 0, /* EA_MODE_PD7  */
+	 4, /* EA_MODE_DI   */
+	 6, /* EA_MODE_IX   */
+	 4, /* EA_MODE_AW   */
+	 8, /* EA_MODE_AL   */
+	 0, /* EA_MODE_PCDI */
+	 0, /* EA_MODE_PCIX */
+	 0, /* EA_MODE_I    */
+};
+
 /* Extra cycles for MOVES instruction (010) */
-int g_moves_cycle_table[13][3] =
+const int g_moves_cycle_table[13][3] =
 {
 	{ 0,  0,  0}, /* EA_MODE_NONE */
 	{ 0,  4,  6}, /* EA_MODE_AI   */
@@ -428,7 +445,7 @@ int g_moves_cycle_table[13][3] =
 };
 
 /* Extra cycles for CLR instruction (010) */
-int g_clr_cycle_table[13][3] =
+const int g_clr_cycle_table[13][3] =
 {
 	{ 0,  0,  0}, /* EA_MODE_NONE */
 	{ 0,  4,  6}, /* EA_MODE_AI   */
@@ -463,9 +480,6 @@ void error_exit(char* fmt, ...)
 
 	if(g_prototype_file) fclose(g_prototype_file);
 	if(g_table_file) fclose(g_table_file);
-	if(g_ops_ac_file) fclose(g_ops_ac_file);
-	if(g_ops_dm_file) fclose(g_ops_dm_file);
-	if(g_ops_nz_file) fclose(g_ops_nz_file);
 	if(g_input_file) fclose(g_input_file);
 
 	exit(EXIT_FAILURE);
@@ -482,9 +496,6 @@ void perror_exit(char* fmt, ...)
 
 	if(g_prototype_file) fclose(g_prototype_file);
 	if(g_table_file) fclose(g_table_file);
-	if(g_ops_ac_file) fclose(g_ops_ac_file);
-	if(g_ops_dm_file) fclose(g_ops_dm_file);
-	if(g_ops_nz_file) fclose(g_ops_nz_file);
 	if(g_input_file) fclose(g_input_file);
 
 	exit(EXIT_FAILURE);
@@ -586,7 +597,7 @@ int fgetline(char* buff, int nchars, FILE* file)
 	if(fgets(buff, nchars, file) == NULL)
 		return -1;
 	if(buff[0] == '\r')
-		memcpy(buff, buff + 1, nchars - 1);
+		memmove(buff, buff + 1, nchars - 1);
 
 	length = strlen(buff);
 	while(length && (buff[length-1] == '\r' || buff[length-1] == '\n'))
@@ -639,6 +650,8 @@ int get_oper_cycles(opcode_struct* op, int ea_mode, int cpu_type)
 			return op->cycles[cpu_type] + g_lea_cycle_table[ea_mode];
 		if(strcmp(op->name, "pea") == 0)
 			return op->cycles[cpu_type] + g_pea_cycle_table[ea_mode];
+		if(strcmp(op->name, "movem") == 0)
+			return op->cycles[cpu_type] + g_movem_cycle_table[ea_mode];
 	}
 	return op->cycles[cpu_type] + g_ea_cycle_table[ea_mode][cpu_type][size];
 }
@@ -649,7 +662,7 @@ opcode_struct* find_opcode(char* name, int size, char* spec_proc, char* spec_ea)
 	opcode_struct* op;
 
 
-	for(op = g_opcode_input_table;op->name != NULL;op++)
+	for(op = g_opcode_input_table;op < &g_opcode_input_table[MAX_OPCODE_INPUT_TABLE_LENGTH];op++)
 	{
 		if(	strcmp(name, op->name) == 0 &&
 			(size == op->size) &&
@@ -665,7 +678,7 @@ opcode_struct* find_illegal_opcode(void)
 {
 	opcode_struct* op;
 
-	for(op = g_opcode_input_table;op->name != NULL;op++)
+	for(op = g_opcode_input_table;op < &g_opcode_input_table[MAX_OPCODE_INPUT_TABLE_LENGTH];op++)
 	{
 		if(strcmp(op->name, "illegal") == 0)
 			return op;
@@ -750,7 +763,7 @@ void write_body(FILE* filep, body_struct* body, replace_struct* replace)
 			}
 			/* Found a directive with no matching replace string */
 			if(!found)
-				error_exit("Unknown " ID_BASE " directive");
+				error_exit("Unknown " ID_BASE " directive [%s]", output);
 		}
 		fprintf(filep, "%s\n", output);
 	}
@@ -769,16 +782,10 @@ void get_base_name(char* base_name, opcode_struct* op)
 		sprintf(base_name+strlen(base_name), "_%s", op->spec_ea);
 }
 
-/* Write the prototype of an opcode handler function */
-void write_prototype(FILE* filep, char* base_name)
-{
-	fprintf(filep, "void %s(void);\n", base_name);
-}
-
 /* Write the name of an opcode handler function */
 void write_function_name(FILE* filep, char* base_name)
 {
-	fprintf(filep, "void %s(void)\n", base_name);
+	fprintf(filep, "static void %s(void)\n", base_name);
 }
 
 void add_opcode_output_table_entry(opcode_struct* op, char* name)
@@ -861,7 +868,6 @@ void generate_opcode_handler(FILE* filep, body_struct* body, replace_struct* rep
 	/* Set the opcode structure and write the tables, prototypes, etc */
 	set_opcode_struct(opinfo, op, ea_mode);
 	get_base_name(str, op);
-	write_prototype(g_prototype_file, str);
 	add_opcode_output_table_entry(op, str);
 	write_function_name(filep, str);
 
@@ -980,10 +986,9 @@ void generate_opcode_cc_variants(FILE* filep, body_struct* body, replace_struct*
 }
 
 /* Process the opcode handlers section of the input file */
-void process_opcode_handlers(void)
+void process_opcode_handlers(FILE* filep)
 {
 	FILE* input_file = g_input_file;
-	FILE* output_file;
 	char func_name[MAX_LINE_LENGTH+1];
 	char oper_name[MAX_LINE_LENGTH+1];
 	int  oper_size;
@@ -993,9 +998,6 @@ void process_opcode_handlers(void)
 	replace_struct* replace = malloc(sizeof(replace_struct));
 	body_struct* body = malloc(sizeof(body_struct));
 
-
-	output_file = g_ops_ac_file;
-
 	for(;;)
 	{
 		/* Find the first line of the function */
@@ -1038,23 +1040,17 @@ void process_opcode_handlers(void)
 		if(opinfo == NULL)
 			error_exit("Unable to find matching table entry for %s", func_name);
 
-        /* Change output files if we pass 'c' or 'n' */
-		if(output_file == g_ops_ac_file && oper_name[0] > 'c')
-			output_file = g_ops_dm_file;
-		else if(output_file == g_ops_dm_file && oper_name[0] > 'm')
-			output_file = g_ops_nz_file;
-
 		replace->length = 0;
 
 		/* Generate opcode variants */
 		if(strcmp(opinfo->name, "bcc") == 0 || strcmp(opinfo->name, "scc") == 0)
-			generate_opcode_cc_variants(output_file, body, replace, opinfo, 1);
+			generate_opcode_cc_variants(filep, body, replace, opinfo, 1);
 		else if(strcmp(opinfo->name, "dbcc") == 0)
-			generate_opcode_cc_variants(output_file, body, replace, opinfo, 2);
+			generate_opcode_cc_variants(filep, body, replace, opinfo, 2);
 		else if(strcmp(opinfo->name, "trapcc") == 0)
-			generate_opcode_cc_variants(output_file, body, replace, opinfo, 4);
+			generate_opcode_cc_variants(filep, body, replace, opinfo, 4);
 		else
-			generate_opcode_ea_variants(output_file, body, replace, opinfo);
+			generate_opcode_ea_variants(filep, body, replace, opinfo);
 	}
 
 	free(replace);
@@ -1077,13 +1073,13 @@ void populate_table(void)
 	/* Find the start of the table */
 	while(strcmp(buff, ID_TABLE_START) != 0)
 		if(fgetline(buff, MAX_LINE_LENGTH, g_input_file) < 0)
-			error_exit("Premature EOF while reading table");
+			error_exit("(table_start) Premature EOF while reading table");
 
 	/* Process the entire table */
 	for(op = g_opcode_input_table;;op++)
 	{
 		if(fgetline(buff, MAX_LINE_LENGTH, g_input_file) < 0)
-			error_exit("Premature EOF while reading table");
+			error_exit("(inline) Premature EOF while reading table");
 		if(strlen(buff) == 0)
 			continue;
 		/* We finish when we find an input separator */
@@ -1221,13 +1217,15 @@ int main(int argc, char **argv)
 {
 	/* File stuff */
 	char output_path[M68K_MAX_DIR] = "";
-	char filename[M68K_MAX_PATH];
+        char filename[M68K_MAX_PATH*2];
 	/* Section identifier */
 	char section_id[MAX_LINE_LENGTH+1];
 	/* Inserts */
 	char temp_insert[MAX_INSERT_LENGTH+1];
 	char prototype_footer_insert[MAX_INSERT_LENGTH+1];
+	char table_header_insert[MAX_INSERT_LENGTH+1];
 	char table_footer_insert[MAX_INSERT_LENGTH+1];
+	char ophandler_header_insert[MAX_INSERT_LENGTH+1];
 	char ophandler_footer_insert[MAX_INSERT_LENGTH+1];
 	/* Flags if we've processed certain parts already */
 	int prototype_header_read = 0;
@@ -1239,8 +1237,8 @@ int main(int argc, char **argv)
 	int table_body_read = 0;
 	int ophandler_body_read = 0;
 
-	printf("\n\t\tMusashi v%s 68000, 68010, 68EC020, 68020 emulator\n", g_version);
-	printf("\t\tCopyright 1998-2000 Karl Stenerud (karl@mame.net)\n\n");
+	printf("\n\tMusashi v%s 68000, 68008, 68010, 68EC020, 68020, 68EC030, 68030, 68EC040, 68040 emulator\n", g_version);
+	printf("\t\tCopyright Karl Stenerud (kstenerud@gmail.com)\n\n");
 
 	/* Check if output path and source for the input file are given */
     if(argc > 1)
@@ -1266,18 +1264,6 @@ int main(int argc, char **argv)
 	if((g_table_file = fopen(filename, "wt")) == NULL)
 		perror_exit("Unable to create table file (%s)\n", filename);
 
-	sprintf(filename, "%s%s", output_path, FILENAME_OPS_AC);
-	if((g_ops_ac_file = fopen(filename, "wt")) == NULL)
-		perror_exit("Unable to create ops ac file (%s)\n", filename);
-
-	sprintf(filename, "%s%s", output_path, FILENAME_OPS_DM);
-	if((g_ops_dm_file = fopen(filename, "wt")) == NULL)
-		perror_exit("Unable to create ops dm file (%s)\n", filename);
-
-	sprintf(filename, "%s%s", output_path, FILENAME_OPS_NZ);
-	if((g_ops_nz_file = fopen(filename, "wt")) == NULL)
-		perror_exit("Unable to create ops nz file (%s)\n", filename);
-
 	if((g_input_file=fopen(g_input_filename, "rt")) == NULL)
 		perror_exit("can't open %s for input", g_input_filename);
 
@@ -1305,18 +1291,14 @@ int main(int argc, char **argv)
 		{
 			if(table_header_read)
 				error_exit("Duplicate table header");
-			read_insert(temp_insert);
-			fprintf(g_table_file, "%s", temp_insert);
+			read_insert(table_header_insert);
 			table_header_read = 1;
 		}
 		else if(strcmp(section_id, ID_OPHANDLER_HEADER) == 0)
 		{
 			if(ophandler_header_read)
 				error_exit("Duplicate opcode handler header");
-			read_insert(temp_insert);
-			fprintf(g_ops_ac_file, "%s\n\n", temp_insert);
-			fprintf(g_ops_dm_file, "%s\n\n", temp_insert);
-			fprintf(g_ops_nz_file, "%s\n\n", temp_insert);
+			read_insert(ophandler_header_insert);
 			ophandler_header_read = 1;
 		}
 		else if(strcmp(section_id, ID_PROTOTYPE_FOOTER) == 0)
@@ -1369,7 +1351,9 @@ int main(int argc, char **argv)
 			if(ophandler_body_read)
 				error_exit("Duplicate opcode handler section");
 
-			process_opcode_handlers();
+			fprintf(g_table_file, "%s\n\n", ophandler_header_insert);
+			process_opcode_handlers(g_table_file);
+			fprintf(g_table_file, "%s\n\n", ophandler_footer_insert);
 
 			ophandler_body_read = 1;
 		}
@@ -1393,13 +1377,11 @@ int main(int argc, char **argv)
 			if(!ophandler_body_read)
 				error_exit("Missing opcode handler body");
 
+			fprintf(g_table_file, "%s\n\n", table_header_insert);
 			print_opcode_output_table(g_table_file);
+			fprintf(g_table_file, "%s\n\n", table_footer_insert);
 
 			fprintf(g_prototype_file, "%s\n\n", prototype_footer_insert);
-			fprintf(g_table_file, "%s\n\n", table_footer_insert);
-			fprintf(g_ops_ac_file, "%s\n\n", ophandler_footer_insert);
-			fprintf(g_ops_dm_file, "%s\n\n", ophandler_footer_insert);
-			fprintf(g_ops_nz_file, "%s\n\n", ophandler_footer_insert);
 
 			break;
 		}
@@ -1412,9 +1394,6 @@ int main(int argc, char **argv)
 	/* Close all files and exit */
 	fclose(g_prototype_file);
 	fclose(g_table_file);
-	fclose(g_ops_ac_file);
-	fclose(g_ops_dm_file);
-	fclose(g_ops_nz_file);
 	fclose(g_input_file);
 
 	printf("Generated %d opcode handlers from %d primitives\n", g_num_functions, g_num_primitives);
diff --git a/plat/linux68k/emu/musashi/m68kmmu.h b/plat/linux68k/emu/musashi/m68kmmu.h
new file mode 100644
index 000000000..faba371e7
--- /dev/null
+++ b/plat/linux68k/emu/musashi/m68kmmu.h
@@ -0,0 +1,321 @@
+/*
+    m68kmmu.h - PMMU implementation for 68851/68030/68040
+
+    By R. Belmont
+
+    Copyright Nicola Salmoria and the MAME Team.
+    Visit http://mamedev.org for licensing and usage restrictions.
+*/
+
+/*
+	pmmu_translate_addr: perform 68851/68030-style PMMU address translation
+*/
+uint pmmu_translate_addr(uint addr_in)
+{
+	uint32 addr_out, tbl_entry = 0, tbl_entry2, tamode = 0, tbmode = 0, tcmode = 0;
+	uint root_aptr, root_limit, tofs, is, abits, bbits, cbits;
+	uint resolved, tptr, shift;
+
+	resolved = 0;
+	addr_out = addr_in;
+
+	// if SRP is enabled and we're in supervisor mode, use it
+	if ((m68ki_cpu.mmu_tc & 0x02000000) && (m68ki_get_sr() & 0x2000))
+	{
+		root_aptr = m68ki_cpu.mmu_srp_aptr;
+		root_limit = m68ki_cpu.mmu_srp_limit;
+	}
+	else	// else use the CRP
+	{
+		root_aptr = m68ki_cpu.mmu_crp_aptr;
+		root_limit = m68ki_cpu.mmu_crp_limit;
+	}
+
+	// get initial shift (# of top bits to ignore)
+	is = (m68ki_cpu.mmu_tc>>16) & 0xf;
+	abits = (m68ki_cpu.mmu_tc>>12)&0xf;
+	bbits = (m68ki_cpu.mmu_tc>>8)&0xf;
+	cbits = (m68ki_cpu.mmu_tc>>4)&0xf;
+
+//	fprintf(stderr,"PMMU: tcr %08x limit %08x aptr %08x is %x abits %d bbits %d cbits %d\n", m68ki_cpu.mmu_tc, root_limit, root_aptr, is, abits, bbits, cbits);
+
+	// get table A offset
+	tofs = (addr_in<<is)>>(32-abits);
+
+	// find out what format table A is
+	switch (root_limit & 3)
+	{
+		case 0:	// invalid, should cause MMU exception
+		case 1:	// page descriptor, should cause direct mapping
+			fatalerror("680x0 PMMU: Unhandled root mode\n");
+			break;
+
+		case 2:	// valid 4 byte descriptors
+			tofs *= 4;
+//			fprintf(stderr,"PMMU: reading table A entry at %08x\n", tofs + (root_aptr & 0xfffffffc));
+			tbl_entry = m68k_read_memory_32( tofs + (root_aptr & 0xfffffffc));
+			tamode = tbl_entry & 3;
+//			fprintf(stderr,"PMMU: addr %08x entry %08x mode %x tofs %x\n", addr_in, tbl_entry, tamode, tofs);
+			break;
+
+		case 3: // valid 8 byte descriptors
+			tofs *= 8;
+//			fprintf(stderr,"PMMU: reading table A entries at %08x\n", tofs + (root_aptr & 0xfffffffc));
+			tbl_entry2 = m68k_read_memory_32( tofs + (root_aptr & 0xfffffffc));
+			tbl_entry = m68k_read_memory_32( tofs + (root_aptr & 0xfffffffc)+4);
+			tamode = tbl_entry2 & 3;
+//			fprintf(stderr,"PMMU: addr %08x entry %08x entry2 %08x mode %x tofs %x\n", addr_in, tbl_entry, tbl_entry2, tamode, tofs);
+			break;
+	}
+
+	// get table B offset and pointer
+	tofs = (addr_in<<(is+abits))>>(32-bbits);
+	tptr = tbl_entry & 0xfffffff0;
+
+	// find out what format table B is, if any
+	switch (tamode)
+	{
+		case 0: // invalid, should cause MMU exception
+			fatalerror("680x0 PMMU: Unhandled Table A mode %d (addr_in %08x)\n", tamode, addr_in);
+			break;
+
+		case 2: // 4-byte table B descriptor
+			tofs *= 4;
+//			fprintf(stderr,"PMMU: reading table B entry at %08x\n", tofs + tptr);
+			tbl_entry = m68k_read_memory_32( tofs + tptr);
+			tbmode = tbl_entry & 3;
+//			fprintf(stderr,"PMMU: addr %08x entry %08x mode %x tofs %x\n", addr_in, tbl_entry, tbmode, tofs);
+			break;
+
+		case 3: // 8-byte table B descriptor
+			tofs *= 8;
+//			fprintf(stderr,"PMMU: reading table B entries at %08x\n", tofs + tptr);
+			tbl_entry2 = m68k_read_memory_32( tofs + tptr);
+			tbl_entry = m68k_read_memory_32( tofs + tptr + 4);
+			tbmode = tbl_entry2 & 3;
+//			fprintf(stderr,"PMMU: addr %08x entry %08x entry2 %08x mode %x tofs %x\n", addr_in, tbl_entry, tbl_entry2, tbmode, tofs);
+			break;
+
+		case 1:	// early termination descriptor
+			tbl_entry &= 0xffffff00;
+
+			shift = is+abits;
+			addr_out = ((addr_in<<shift)>>shift) + tbl_entry;
+			resolved = 1;
+			break;
+	}
+
+	// if table A wasn't early-out, continue to process table B
+	if (!resolved)
+	{
+		// get table C offset and pointer
+		tofs = (addr_in<<(is+abits+bbits))>>(32-cbits);
+		tptr = tbl_entry & 0xfffffff0;
+
+		switch (tbmode)
+		{
+			case 0:	// invalid, should cause MMU exception
+				fatalerror("680x0 PMMU: Unhandled Table B mode %d (addr_in %08x PC %x)\n", tbmode, addr_in, REG_PC);
+				break;
+
+			case 2: // 4-byte table C descriptor
+				tofs *= 4;
+//				fprintf(stderr,"PMMU: reading table C entry at %08x\n", tofs + tptr);
+				tbl_entry = m68k_read_memory_32(tofs + tptr);
+				tcmode = tbl_entry & 3;
+//				fprintf(stderr,"PMMU: addr %08x entry %08x mode %x tofs %x\n", addr_in, tbl_entry, tbmode, tofs);
+				break;
+
+			case 3: // 8-byte table C descriptor
+				tofs *= 8;
+//				fprintf(stderr,"PMMU: reading table C entries at %08x\n", tofs + tptr);
+				tbl_entry2 = m68k_read_memory_32(tofs + tptr);
+				tbl_entry = m68k_read_memory_32(tofs + tptr + 4);
+				tcmode = tbl_entry2 & 3;
+//				fprintf(stderr,"PMMU: addr %08x entry %08x entry2 %08x mode %x tofs %x\n", addr_in, tbl_entry, tbl_entry2, tbmode, tofs);
+				break;
+
+			case 1: // termination descriptor
+				tbl_entry &= 0xffffff00;
+
+				shift = is+abits+bbits;
+				addr_out = ((addr_in<<shift)>>shift) + tbl_entry;
+				resolved = 1;
+				break;
+		}
+	}
+
+	if (!resolved)
+	{
+		switch (tcmode)
+		{
+			case 0:	// invalid, should cause MMU exception
+			case 2: // 4-byte ??? descriptor
+			case 3: // 8-byte ??? descriptor
+				fatalerror("680x0 PMMU: Unhandled Table B mode %d (addr_in %08x PC %x)\n", tbmode, addr_in, REG_PC);
+				break;
+
+			case 1: // termination descriptor
+				tbl_entry &= 0xffffff00;
+
+				shift = is+abits+bbits+cbits;
+				addr_out = ((addr_in<<shift)>>shift) + tbl_entry;
+				resolved = 1;
+				break;
+		}
+	}
+
+
+//	fprintf(stderr,"PMMU: [%08x] => [%08x]\n", addr_in, addr_out);
+
+	return addr_out;
+}
+
+/*
+
+	m68881_mmu_ops: COP 0 MMU opcode handling
+
+*/
+
+void m68881_mmu_ops()
+{
+	uint16 modes;
+	uint32 ea = m68ki_cpu.ir & 0x3f;
+	uint64 temp64;
+
+	// catch the 2 "weird" encodings up front (PBcc)
+	if ((m68ki_cpu.ir & 0xffc0) == 0xf0c0)
+	{
+		fprintf(stderr,"680x0: unhandled PBcc\n");
+		return;
+	}
+	else if ((m68ki_cpu.ir & 0xffc0) == 0xf080)
+	{
+		fprintf(stderr,"680x0: unhandled PBcc\n");
+		return;
+	}
+	else	// the rest are 1111000xxxXXXXXX where xxx is the instruction family
+	{
+		switch ((m68ki_cpu.ir>>9) & 0x7)
+		{
+			case 0:
+				modes = OPER_I_16();
+
+				if ((modes & 0xfde0) == 0x2000)	// PLOAD
+				{
+					fprintf(stderr,"680x0: unhandled PLOAD\n");
+					return;
+				}
+				else if ((modes & 0xe200) == 0x2000)	// PFLUSH
+				{
+					fprintf(stderr,"680x0: unhandled PFLUSH PC=%x\n", REG_PC);
+					return;
+				}
+				else if (modes == 0xa000)	// PFLUSHR
+				{
+					fprintf(stderr,"680x0: unhandled PFLUSHR\n");
+					return;
+				}
+				else if (modes == 0x2800)	// PVALID (FORMAT 1)
+				{
+					fprintf(stderr,"680x0: unhandled PVALID1\n");
+					return;
+				}
+				else if ((modes & 0xfff8) == 0x2c00)	// PVALID (FORMAT 2)
+				{
+					fprintf(stderr,"680x0: unhandled PVALID2\n");
+					return;
+				}
+				else if ((modes & 0xe000) == 0x8000)	// PTEST
+				{
+					fprintf(stderr,"680x0: unhandled PTEST\n");
+					return;
+				}
+				else
+				{
+					switch ((modes>>13) & 0x7)
+					{
+						case 0:	// MC68030/040 form with FD bit
+						case 2:	// MC68881 form, FD never set
+							if (modes & 0x200)
+							{
+							 	switch ((modes>>10) & 7)
+								{
+									case 0:	// translation control register
+										WRITE_EA_32(ea, m68ki_cpu.mmu_tc);
+										break;
+
+									case 2: // supervisor root pointer
+										WRITE_EA_64(ea, (uint64)m68ki_cpu.mmu_srp_limit<<32 | (uint64)m68ki_cpu.mmu_srp_aptr);
+										break;
+
+									case 3: // CPU root pointer
+										WRITE_EA_64(ea, (uint64)m68ki_cpu.mmu_crp_limit<<32 | (uint64)m68ki_cpu.mmu_crp_aptr);
+										break;
+
+									default:
+										fprintf(stderr,"680x0: PMOVE from unknown MMU register %x, PC %x\n", (modes>>10) & 7, REG_PC);
+										break;
+								}
+							}
+							else
+							{
+							 	switch ((modes>>10) & 7)
+								{
+									case 0:	// translation control register
+										m68ki_cpu.mmu_tc = READ_EA_32(ea);
+
+										if (m68ki_cpu.mmu_tc & 0x80000000)
+										{
+											m68ki_cpu.pmmu_enabled = 1;
+										}
+										else
+										{
+											m68ki_cpu.pmmu_enabled = 0;
+										}
+										break;
+
+									case 2:	// supervisor root pointer
+										temp64 = READ_EA_64(ea);
+										m68ki_cpu.mmu_srp_limit = (temp64>>32) & 0xffffffff;
+										m68ki_cpu.mmu_srp_aptr = temp64 & 0xffffffff;
+										break;
+
+									case 3:	// CPU root pointer
+										temp64 = READ_EA_64(ea);
+										m68ki_cpu.mmu_crp_limit = (temp64>>32) & 0xffffffff;
+										m68ki_cpu.mmu_crp_aptr = temp64 & 0xffffffff;
+										break;
+
+									default:
+										fprintf(stderr,"680x0: PMOVE to unknown MMU register %x, PC %x\n", (modes>>10) & 7, REG_PC);
+										break;
+								}
+							}
+							break;
+
+						case 3:	// MC68030 to/from status reg
+							if (modes & 0x200)
+							{
+								WRITE_EA_32(ea, m68ki_cpu.mmu_sr);
+							}
+							else
+							{
+								m68ki_cpu.mmu_sr = READ_EA_32(ea);
+							}
+							break;
+
+						default:
+							fprintf(stderr,"680x0: unknown PMOVE mode %x (modes %04x) (PC %x)\n", (modes>>13) & 0x7, modes, REG_PC);
+							break;
+					}
+				}
+				break;
+
+			default:
+				fprintf(stderr,"680x0: unknown PMMU instruction group %d\n", (m68ki_cpu.ir>>9) & 0x7);
+				break;
+		}
+	}
+}
+
diff --git a/plat/linux68k/emu/musashi/readme.txt b/plat/linux68k/emu/musashi/readme.txt
index adf993398..e9d878c9c 100755
--- a/plat/linux68k/emu/musashi/readme.txt
+++ b/plat/linux68k/emu/musashi/readme.txt
@@ -1,7 +1,7 @@
                                     MUSASHI
                                     =======
 
-                                  Version 3.4
+                                  Version 4.10
 
              A portable Motorola M680x0 processor emulation engine.
             Copyright 1998-2002 Karl Stenerud.  All rights reserved.
@@ -11,8 +11,9 @@
 INTRODUCTION:
 ------------
 
-Musashi is a Motorola 68000, 68010, 68EC020, and 68020 emulator written in C.
-This emulator was written with two goals in mind: portability and speed.
+Musashi is a Motorola 68000, 68010, 68EC020, 68020, 68EC030, 68030, 68EC040 and
+68040 emulator written in C.  This emulator was written with two goals in mind:
+portability and speed.
 
 The emulator is written to ANSI C89 specifications.  It also uses inline
 functions, which are C9X compliant.
@@ -146,11 +147,23 @@ To enable separate immediate reads:
     unsigned int  m68k_read_immediate_16(unsigned int address);
     unsigned int  m68k_read_immediate_32(unsigned int address);
 
+    Now you also have the pcrelative stuff:
+    unsigned int  m68k_read_pcrelative_8(unsigned int address);
+    unsigned int  m68k_read_pcrelative_16(unsigned int address);
+    unsigned int  m68k_read_pcrelative_32(unsigned int address);
+
 - If you need to know the current PC (for banking and such), set
   M68K_MONITOR_PC to OPT_SPECIFY_HANDLER, and set M68K_SET_PC_CALLBACK(A) to
   your routine.
 
+- In the unlikely case where you need to emulate some PMMU in the immediate
+  reads and/or pcrealtive stuff, you'll need to explicitely call the
+  translation address mechanism from your user functions this way :
 
+    if (PMMU_ENABLED)
+        address = pmmu_translate_addr(address);
+
+  (this is handled automatically by normal memory accesses).
 
 ADDRESS SPACES:
 --------------
@@ -212,9 +225,12 @@ To set the CPU type you want to use:
     M68K_CPU_TYPE_68000,
     M68K_CPU_TYPE_68010,
     M68K_CPU_TYPE_68EC020,
-    M68K_CPU_TYPE_68020
-
-
+    M68K_CPU_TYPE_68020,
+    M68K_CPU_TYPE_68EC030,
+    M68K_CPU_TYPE_68030,
+    M68K_CPU_TYPE_68EC040,
+    M68K_CPU_TYPE_68040,
+    M68K_CPU_TYPE_SCC68070 (which is a 68010 with a 32 bit data bus).
 
 CLOCK FREQUENCY:
 ---------------
@@ -302,4 +318,25 @@ of the CPU.
 EXAMPLE:
 -------
 
-The subdir example contains a full example (currently DOS only).
+The subdir example contains a full example (currently linux & Dos only).
+
+Compilation
+-----------
+
+You can use the default Makefile in Musashi's directory, it works like this :
+1st build m68kmake, which will build m68kops.c and m68kops.h based on the
+contents of m68k_in.c.
+Then compile m68kcpu.o and m68kops.o. Add m68kdasm.o if you want the
+disassemble functions. When linking this to your project you will need libm
+for the fpu emulation of the 68040.
+
+Using some custom m68kconf.h outside Musashi's directory
+--------------------------------------------------------
+
+It can be useful to keep an untouched musashi directory in a project (from
+git for example) and maintain a separate m68kconf.h specific to the
+project. For this, pass -DMUSASHI_CNF="mycustomconfig.h" to gcc (or whatever
+compiler you use). Notice that if you use an unix shell (or make which uses
+the shell to launch its commands), then you need to escape the quotes like
+this : -DMUSASHI_CNF=\"mycustomconfig.h\"
+
diff --git a/plat/linux68k/emu/musashi/softfloat/README.txt b/plat/linux68k/emu/musashi/softfloat/README.txt
new file mode 100644
index 000000000..9500d25e8
--- /dev/null
+++ b/plat/linux68k/emu/musashi/softfloat/README.txt
@@ -0,0 +1,78 @@
+MAME note: this package is derived from the following original SoftFloat 
+package and has been "re-packaged" to work with MAME's conventions and
+build system.  The source files come from bits64/ and bits64/templates
+in the original distribution as MAME requires a compiler with a 64-bit
+integer type.
+
+
+Package Overview for SoftFloat Release 2b
+
+John R. Hauser
+2002 May 27
+
+
+----------------------------------------------------------------------------
+Overview
+
+SoftFloat is a software implementation of floating-point that conforms to
+the IEC/IEEE Standard for Binary Floating-Point Arithmetic.  SoftFloat is
+distributed in the form of C source code.  Compiling the SoftFloat sources
+generates two things:
+
+-- A SoftFloat object file (typically `softfloat.o') containing the complete
+   set of IEC/IEEE floating-point routines.
+
+-- A `timesoftfloat' program for evaluating the speed of the SoftFloat
+   routines.  (The SoftFloat module is linked into this program.)
+
+The SoftFloat package is documented in four text files:
+
+   SoftFloat.txt          Documentation for using the SoftFloat functions.
+   SoftFloat-source.txt   Documentation for compiling SoftFloat.
+   SoftFloat-history.txt  History of major changes to SoftFloat.
+   timesoftfloat.txt      Documentation for using `timesoftfloat'.
+
+Other files in the package comprise the source code for SoftFloat.
+
+Please be aware that some work is involved in porting this software to other
+targets.  It is not just a matter of getting `make' to complete without
+error messages.  I would have written the code that way if I could, but
+there are fundamental differences between systems that can't be hidden.
+You should not attempt to compile SoftFloat without first reading both
+`SoftFloat.txt' and `SoftFloat-source.txt'.
+
+
+----------------------------------------------------------------------------
+Legal Notice
+
+SoftFloat was written by me, John R. Hauser.  This work was made possible in
+part by the International Computer Science Institute, located at Suite 600,
+1947 Center Street, Berkeley, California 94704.  Funding was partially
+provided by the National Science Foundation under grant MIP-9311980.  The
+original version of this code was written as part of a project to build
+a fixed-point vector processor in collaboration with the University of
+California at Berkeley, overseen by Profs. Nelson Morgan and John Wawrzynek.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL
+LOSSES, COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO
+FURTHERMORE EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER
+SCIENCE INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES,
+COSTS, OR OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE
+SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, provided
+that the minimal documentation requirements stated in the source code are
+satisfied.
+
+
+----------------------------------------------------------------------------
+Contact Information
+
+At the time of this writing, the most up-to-date information about
+SoftFloat and the latest release can be found at the Web page `http://
+www.cs.berkeley.edu/~jhauser/arithmetic/SoftFloat.html'.
+
+
diff --git a/plat/linux68k/emu/musashi/softfloat/mamesf.h b/plat/linux68k/emu/musashi/softfloat/mamesf.h
new file mode 100644
index 000000000..c41950394
--- /dev/null
+++ b/plat/linux68k/emu/musashi/softfloat/mamesf.h
@@ -0,0 +1,61 @@
+/*----------------------------------------------------------------------------
+| One of the macros `BIGENDIAN' or `LITTLEENDIAN' must be defined.
+*----------------------------------------------------------------------------*/
+#ifdef LSB_FIRST
+#define LITTLEENDIAN
+#else
+#define BIGENDIAN
+#endif
+
+/*----------------------------------------------------------------------------
+| The macro `BITS64' can be defined to indicate that 64-bit integer types are
+| supported by the compiler.
+*----------------------------------------------------------------------------*/
+#define BITS64
+
+/*----------------------------------------------------------------------------
+| Each of the following `typedef's defines the most convenient type that holds
+| integers of at least as many bits as specified.  For example, `uint8' should
+| be the most convenient type that can hold unsigned integers of as many as
+| 8 bits.  The `flag' type must be able to hold either a 0 or 1.  For most
+| implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed
+| to the same as `int'.
+*----------------------------------------------------------------------------*/
+
+typedef sint8 flag;
+typedef sint8 int8;
+typedef sint16 int16;
+typedef sint32 int32;
+typedef sint64 int64;
+
+/*----------------------------------------------------------------------------
+| Each of the following `typedef's defines a type that holds integers
+| of _exactly_ the number of bits specified.  For instance, for most
+| implementation of C, `bits16' and `sbits16' should be `typedef'ed to
+| `unsigned short int' and `signed short int' (or `short int'), respectively.
+*----------------------------------------------------------------------------*/
+typedef uint8 bits8;
+typedef sint8 sbits8;
+typedef uint16 bits16;
+typedef sint16 sbits16;
+typedef uint32 bits32;
+typedef sint32 sbits32;
+typedef uint64 bits64;
+typedef sint64 sbits64;
+
+/*----------------------------------------------------------------------------
+| The `LIT64' macro takes as its argument a textual integer literal and
+| if necessary ``marks'' the literal as having a 64-bit integer type.
+| For example, the GNU C Compiler (`gcc') requires that 64-bit literals be
+| appended with the letters `LL' standing for `long long', which is `gcc's
+| name for the 64-bit integer type.  Some compilers may allow `LIT64' to be
+| defined as the identity macro:  `#define LIT64( a ) a'.
+*----------------------------------------------------------------------------*/
+#define LIT64( a ) a##ULL
+
+/*----------------------------------------------------------------------------
+| The macro `INLINE' can be used before functions that should be inlined.  If
+| a compiler does not support explicit inlining, this macro should be defined
+| to be `static'.
+*----------------------------------------------------------------------------*/
+// MAME defines INLINE
diff --git a/plat/linux68k/emu/musashi/softfloat/milieu.h b/plat/linux68k/emu/musashi/softfloat/milieu.h
new file mode 100644
index 000000000..10687b755
--- /dev/null
+++ b/plat/linux68k/emu/musashi/softfloat/milieu.h
@@ -0,0 +1,42 @@
+
+/*============================================================================
+
+This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+/*----------------------------------------------------------------------------
+| Include common integer types and flags.
+*----------------------------------------------------------------------------*/
+#include "mamesf.h"
+
+/*----------------------------------------------------------------------------
+| Symbolic Boolean literals.
+*----------------------------------------------------------------------------*/
+#define FALSE 0
+#define TRUE 1
diff --git a/plat/linux68k/emu/musashi/softfloat/softfloat-macros b/plat/linux68k/emu/musashi/softfloat/softfloat-macros
new file mode 100644
index 000000000..5de9031de
--- /dev/null
+++ b/plat/linux68k/emu/musashi/softfloat/softfloat-macros
@@ -0,0 +1,732 @@
+
+/*============================================================================
+
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+/*----------------------------------------------------------------------------
+| Shifts `a' right by the number of bits given in `count'.  If any nonzero
+| bits are shifted off, they are ``jammed'' into the least significant bit of
+| the result by setting the least significant bit to 1.  The value of `count'
+| can be arbitrarily large; in particular, if `count' is greater than 32, the
+| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+| The result is stored in the location pointed to by `zPtr'.
+*----------------------------------------------------------------------------*/
+
+static inline void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
+{
+    bits32 z;
+
+    if ( count == 0 ) {
+        z = a;
+    }
+    else if ( count < 32 ) {
+        z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
+    }
+    else {
+        z = ( a != 0 );
+    }
+    *zPtr = z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts `a' right by the number of bits given in `count'.  If any nonzero
+| bits are shifted off, they are ``jammed'' into the least significant bit of
+| the result by setting the least significant bit to 1.  The value of `count'
+| can be arbitrarily large; in particular, if `count' is greater than 64, the
+| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+| The result is stored in the location pointed to by `zPtr'.
+*----------------------------------------------------------------------------*/
+
+static inline void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
+{
+    bits64 z;
+
+    if ( count == 0 ) {
+        z = a;
+    }
+    else if ( count < 64 ) {
+        z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
+    }
+    else {
+        z = ( a != 0 );
+    }
+    *zPtr = z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
+| _plus_ the number of bits given in `count'.  The shifted result is at most
+| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
+| bits shifted off form a second 64-bit result as follows:  The _last_ bit
+| shifted off is the most-significant bit of the extra result, and the other
+| 63 bits of the extra result are all zero if and only if _all_but_the_last_
+| bits shifted off were all zero.  This extra result is stored in the location
+| pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
+|     (This routine makes more sense if `a0' and `a1' are considered to form
+| a fixed-point value with binary point between `a0' and `a1'.  This fixed-
+| point value is shifted right by the number of bits given in `count', and
+| the integer part of the result is returned at the location pointed to by
+| `z0Ptr'.  The fractional part of the result may be slightly corrupted as
+| described above, and is returned at the location pointed to by `z1Ptr'.)
+*----------------------------------------------------------------------------*/
+
+static inline void
+ shift64ExtraRightJamming(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z0, z1;
+    int8 negCount = ( - count ) & 63;
+
+    if ( count == 0 ) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if ( count < 64 ) {
+        z1 = ( a0<<negCount ) | ( a1 != 0 );
+        z0 = a0>>count;
+    }
+    else {
+        if ( count == 64 ) {
+            z1 = a0 | ( a1 != 0 );
+        }
+        else {
+            z1 = ( ( a0 | a1 ) != 0 );
+        }
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
+| number of bits given in `count'.  Any bits shifted off are lost.  The value
+| of `count' can be arbitrarily large; in particular, if `count' is greater
+| than 128, the result will be 0.  The result is broken into two 64-bit pieces
+| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void
+ shift128Right(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z0, z1;
+    int8 negCount = ( - count ) & 63;
+
+    if ( count == 0 ) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if ( count < 64 ) {
+        z1 = ( a0<<negCount ) | ( a1>>count );
+        z0 = a0>>count;
+    }
+    else {
+        z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
+| number of bits given in `count'.  If any nonzero bits are shifted off, they
+| are ``jammed'' into the least significant bit of the result by setting the
+| least significant bit to 1.  The value of `count' can be arbitrarily large;
+| in particular, if `count' is greater than 128, the result will be either
+| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
+| nonzero.  The result is broken into two 64-bit pieces which are stored at
+| the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void
+ shift128RightJamming(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z0, z1;
+    int8 negCount = ( - count ) & 63;
+
+    if ( count == 0 ) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if ( count < 64 ) {
+        z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
+        z0 = a0>>count;
+    }
+    else {
+        if ( count == 64 ) {
+            z1 = a0 | ( a1 != 0 );
+        }
+        else if ( count < 128 ) {
+            z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
+        }
+        else {
+            z1 = ( ( a0 | a1 ) != 0 );
+        }
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
+| by 64 _plus_ the number of bits given in `count'.  The shifted result is
+| at most 128 nonzero bits; these are broken into two 64-bit pieces which are
+| stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
+| off form a third 64-bit result as follows:  The _last_ bit shifted off is
+| the most-significant bit of the extra result, and the other 63 bits of the
+| extra result are all zero if and only if _all_but_the_last_ bits shifted off
+| were all zero.  This extra result is stored in the location pointed to by
+| `z2Ptr'.  The value of `count' can be arbitrarily large.
+|     (This routine makes more sense if `a0', `a1', and `a2' are considered
+| to form a fixed-point value with binary point between `a1' and `a2'.  This
+| fixed-point value is shifted right by the number of bits given in `count',
+| and the integer part of the result is returned at the locations pointed to
+| by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
+| corrupted as described above, and is returned at the location pointed to by
+| `z2Ptr'.)
+*----------------------------------------------------------------------------*/
+
+static inline void
+ shift128ExtraRightJamming(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     int16 count,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    int8 negCount = ( - count ) & 63;
+
+    if ( count == 0 ) {
+        z2 = a2;
+        z1 = a1;
+        z0 = a0;
+    }
+    else {
+        if ( count < 64 ) {
+            z2 = a1<<negCount;
+            z1 = ( a0<<negCount ) | ( a1>>count );
+            z0 = a0>>count;
+        }
+        else {
+            if ( count == 64 ) {
+                z2 = a1;
+                z1 = a0;
+            }
+            else {
+                a2 |= a1;
+                if ( count < 128 ) {
+                    z2 = a0<<negCount;
+                    z1 = a0>>( count & 63 );
+                }
+                else {
+                    z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
+                    z1 = 0;
+                }
+            }
+            z0 = 0;
+        }
+        z2 |= ( a2 != 0 );
+    }
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
+| number of bits given in `count'.  Any bits shifted off are lost.  The value
+| of `count' must be less than 64.  The result is broken into two 64-bit
+| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void
+ shortShift128Left(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+
+    *z1Ptr = a1<<count;
+    *z0Ptr =
+        ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
+| by the number of bits given in `count'.  Any bits shifted off are lost.
+| The value of `count' must be less than 64.  The result is broken into three
+| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
+| `z1Ptr', and `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void
+ shortShift192Left(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     int16 count,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    int8 negCount;
+
+    z2 = a2<<count;
+    z1 = a1<<count;
+    z0 = a0<<count;
+    if ( 0 < count ) {
+        negCount = ( ( - count ) & 63 );
+        z1 |= a2>>negCount;
+        z0 |= a1>>negCount;
+    }
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
+| value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
+| any carry out is lost.  The result is broken into two 64-bit pieces which
+| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void
+ add128(
+     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z1;
+
+    z1 = a1 + b1;
+    *z1Ptr = z1;
+    *z0Ptr = a0 + b0 + ( z1 < a1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
+| 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
+| modulo 2^192, so any carry out is lost.  The result is broken into three
+| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
+| `z1Ptr', and `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void
+ add192(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     bits64 b0,
+     bits64 b1,
+     bits64 b2,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    uint8 carry0, carry1;
+
+    z2 = a2 + b2;
+    carry1 = ( z2 < a2 );
+    z1 = a1 + b1;
+    carry0 = ( z1 < a1 );
+    z0 = a0 + b0;
+    z1 += carry1;
+    z0 += ( z1 < carry1 );
+    z0 += carry0;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
+| 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
+| 2^128, so any borrow out (carry out) is lost.  The result is broken into two
+| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
+| `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void
+ sub128(
+     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+
+    *z1Ptr = a1 - b1;
+    *z0Ptr = a0 - b0 - ( a1 < b1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
+| from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
+| Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
+| result is broken into three 64-bit pieces which are stored at the locations
+| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void
+ sub192(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     bits64 b0,
+     bits64 b1,
+     bits64 b2,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    uint8 borrow0, borrow1;
+
+    z2 = a2 - b2;
+    borrow1 = ( a2 < b2 );
+    z1 = a1 - b1;
+    borrow0 = ( a1 < b1 );
+    z0 = a0 - b0;
+    z0 -= ( z1 < borrow1 );
+    z1 -= borrow1;
+    z0 -= borrow0;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
+| into two 64-bit pieces which are stored at the locations pointed to by
+| `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits32 aHigh, aLow, bHigh, bLow;
+    bits64 z0, zMiddleA, zMiddleB, z1;
+
+    aLow = a;
+    aHigh = a>>32;
+    bLow = b;
+    bHigh = b>>32;
+    z1 = ( (bits64) aLow ) * bLow;
+    zMiddleA = ( (bits64) aLow ) * bHigh;
+    zMiddleB = ( (bits64) aHigh ) * bLow;
+    z0 = ( (bits64) aHigh ) * bHigh;
+    zMiddleA += zMiddleB;
+    z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
+    zMiddleA <<= 32;
+    z1 += zMiddleA;
+    z0 += ( z1 < zMiddleA );
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
+| `b' to obtain a 192-bit product.  The product is broken into three 64-bit
+| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
+| `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void
+ mul128By64To192(
+     bits64 a0,
+     bits64 a1,
+     bits64 b,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2, more1;
+
+    mul64To128( a1, b, &z1, &z2 );
+    mul64To128( a0, b, &z0, &more1 );
+    add128( z0, more1, 0, z1, &z0, &z1 );
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
+| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
+| product.  The product is broken into four 64-bit pieces which are stored at
+| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void
+ mul128To256(
+     bits64 a0,
+     bits64 a1,
+     bits64 b0,
+     bits64 b1,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr,
+     bits64 *z3Ptr
+ )
+{
+    bits64 z0, z1, z2, z3;
+    bits64 more1, more2;
+
+    mul64To128( a1, b1, &z2, &z3 );
+    mul64To128( a1, b0, &z1, &more2 );
+    add128( z1, more2, 0, z2, &z1, &z2 );
+    mul64To128( a0, b0, &z0, &more1 );
+    add128( z0, more1, 0, z1, &z0, &z1 );
+    mul64To128( a0, b1, &more1, &more2 );
+    add128( more1, more2, 0, z2, &more1, &z2 );
+    add128( z0, z1, 0, more1, &z0, &z1 );
+    *z3Ptr = z3;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns an approximation to the 64-bit integer quotient obtained by dividing
+| `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
+| divisor `b' must be at least 2^63.  If q is the exact quotient truncated
+| toward zero, the approximation returned lies between q and q + 2 inclusive.
+| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
+| unsigned integer is returned.
+*----------------------------------------------------------------------------*/
+
+static inline bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
+{
+    bits64 b0, b1;
+    bits64 rem0, rem1, term0, term1;
+    bits64 z;
+
+    if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
+    b0 = b>>32;
+    z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
+    mul64To128( b, z, &term0, &term1 );
+    sub128( a0, a1, term0, term1, &rem0, &rem1 );
+    while ( ( (sbits64) rem0 ) < 0 ) {
+        z -= LIT64( 0x100000000 );
+        b1 = b<<32;
+        add128( rem0, rem1, b0, b1, &rem0, &rem1 );
+    }
+    rem0 = ( rem0<<32 ) | ( rem1>>32 );
+    z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns an approximation to the square root of the 32-bit significand given
+| by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
+| `aExp' (the least significant bit) is 1, the integer returned approximates
+| 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
+| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
+| case, the approximation returned lies strictly within +/-2 of the exact
+| value.
+*----------------------------------------------------------------------------*/
+
+static inline bits32 estimateSqrt32( int16 aExp, bits32 a )
+{
+    static const bits16 sqrtOddAdjustments[] = {
+        0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
+        0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
+    };
+    static const bits16 sqrtEvenAdjustments[] = {
+        0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
+        0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
+    };
+    int8 index;
+    bits32 z;
+
+    index = ( a>>27 ) & 15;
+    if ( aExp & 1 ) {
+        z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
+        z = ( ( a / z )<<14 ) + ( z<<15 );
+        a >>= 1;
+    }
+    else {
+        z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
+        z = a / z + z;
+        z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
+        if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
+    }
+    return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the number of leading 0 bits before the most-significant 1 bit of
+| `a'.  If `a' is zero, 32 is returned.
+*----------------------------------------------------------------------------*/
+
+static int8 countLeadingZeros32( bits32 a )
+{
+    static const int8 countLeadingZerosHigh[] = {
+        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+    };
+    int8 shiftCount;
+
+    shiftCount = 0;
+    if ( a < 0x10000 ) {
+        shiftCount += 16;
+        a <<= 16;
+    }
+    if ( a < 0x1000000 ) {
+        shiftCount += 8;
+        a <<= 8;
+    }
+    shiftCount += countLeadingZerosHigh[ a>>24 ];
+    return shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the number of leading 0 bits before the most-significant 1 bit of
+| `a'.  If `a' is zero, 64 is returned.
+*----------------------------------------------------------------------------*/
+
+static int8 countLeadingZeros64( bits64 a )
+{
+    int8 shiftCount;
+
+    shiftCount = 0;
+    if ( a < ( (bits64) 1 )<<32 ) {
+        shiftCount += 32;
+    }
+    else {
+        a >>= 32;
+    }
+    shiftCount += countLeadingZeros32( a );
+    return shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
+| is equal to the 128-bit value formed by concatenating `b0' and `b1'.
+| Otherwise, returns 0.
+*----------------------------------------------------------------------------*/
+
+static inline flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+    return ( a0 == b0 ) && ( a1 == b1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
+| than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
+| Otherwise, returns 0.
+*----------------------------------------------------------------------------*/
+
+static inline flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
+| than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
+| returns 0.
+*----------------------------------------------------------------------------*/
+
+static inline flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
+| not equal to the 128-bit value formed by concatenating `b0' and `b1'.
+| Otherwise, returns 0.
+*----------------------------------------------------------------------------*/
+
+static inline flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+    return ( a0 != b0 ) || ( a1 != b1 );
+
+}
+
+/*-----------------------------------------------------------------------------
+| Changes the sign of the extended double-precision floating-point value 'a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static inline floatx80 floatx80_chs(floatx80 reg)
+{
+    reg.high ^= 0x8000;
+    return reg;
+}
+
diff --git a/plat/linux68k/emu/musashi/softfloat/softfloat-specialize b/plat/linux68k/emu/musashi/softfloat/softfloat-specialize
new file mode 100644
index 000000000..cc97273b8
--- /dev/null
+++ b/plat/linux68k/emu/musashi/softfloat/softfloat-specialize
@@ -0,0 +1,470 @@
+
+/*============================================================================
+
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+/*----------------------------------------------------------------------------
+| Underflow tininess-detection mode, statically initialized to default value.
+| (The declaration in `softfloat.h' must match the `int8' type here.)
+*----------------------------------------------------------------------------*/
+int8 float_detect_tininess = float_tininess_after_rounding;
+
+/*----------------------------------------------------------------------------
+| Raises the exceptions specified by `flags'.  Floating-point traps can be
+| defined here if desired.  It is currently not possible for such a trap to
+| substitute a result value.  If traps are not implemented, this routine
+| should be simply `float_exception_flags |= flags;'.
+*----------------------------------------------------------------------------*/
+
+void float_raise( int8 flags )
+{
+
+    float_exception_flags |= flags;
+
+}
+
+/*----------------------------------------------------------------------------
+| Internal canonical NaN format.
+*----------------------------------------------------------------------------*/
+typedef struct {
+    flag sign;
+    bits64 high, low;
+} commonNaNT;
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated single-precision NaN.
+*----------------------------------------------------------------------------*/
+#define float32_default_nan 0xFFFFFFFF
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is a NaN;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag float32_is_nan( float32 a )
+{
+
+    return ( 0xFF000000 < (bits32) ( a<<1 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is a signaling
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag float32_is_signaling_nan( float32 a )
+{
+
+    return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point NaN
+| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT float32ToCommonNaN( float32 a )
+{
+    commonNaNT z;
+
+    if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
+    z.sign = a>>31;
+    z.low = 0;
+    z.high = ( (bits64) a )<<41;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the single-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static float32 commonNaNToFloat32( commonNaNT a )
+{
+
+    return ( ( (bits32) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes two single-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static float32 propagateFloat32NaN( float32 a, float32 b )
+{
+    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = float32_is_nan( a );
+    aIsSignalingNaN = float32_is_signaling_nan( a );
+    bIsNaN = float32_is_nan( b );
+    bIsSignalingNaN = float32_is_signaling_nan( b );
+    a |= 0x00400000;
+    b |= 0x00400000;
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
+    if ( aIsNaN ) {
+        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
+    }
+    else {
+        return b;
+    }
+
+}
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated double-precision NaN.
+*----------------------------------------------------------------------------*/
+#define float64_default_nan LIT64( 0xFFFFFFFFFFFFFFFF )
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is a NaN;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag float64_is_nan( float64 a )
+{
+
+    return ( LIT64( 0xFFE0000000000000 ) < (bits64) ( a<<1 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is a signaling
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag float64_is_signaling_nan( float64 a )
+{
+
+    return
+           ( ( ( a>>51 ) & 0xFFF ) == 0xFFE )
+        && ( a & LIT64( 0x0007FFFFFFFFFFFF ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point NaN
+| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT float64ToCommonNaN( float64 a )
+{
+    commonNaNT z;
+
+    if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
+    z.sign = a>>63;
+    z.low = 0;
+    z.high = a<<12;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the double-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static float64 commonNaNToFloat64( commonNaNT a )
+{
+
+    return
+          ( ( (bits64) a.sign )<<63 )
+        | LIT64( 0x7FF8000000000000 )
+        | ( a.high>>12 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes two double-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static float64 propagateFloat64NaN( float64 a, float64 b )
+{
+    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = float64_is_nan( a );
+    aIsSignalingNaN = float64_is_signaling_nan( a );
+    bIsNaN = float64_is_nan( b );
+    bIsSignalingNaN = float64_is_signaling_nan( b );
+    a |= LIT64( 0x0008000000000000 );
+    b |= LIT64( 0x0008000000000000 );
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
+    if ( aIsNaN ) {
+        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
+    }
+    else {
+        return b;
+    }
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.  The
+| `high' and `low' values hold the most- and least-significant bits,
+| respectively.
+*----------------------------------------------------------------------------*/
+#define floatx80_default_nan_high 0xFFFF
+#define floatx80_default_nan_low  LIT64( 0xFFFFFFFFFFFFFFFF )
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is a
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_is_nan( floatx80 a )
+{
+
+    return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is a
+| signaling NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_is_signaling_nan( floatx80 a )
+{
+    bits64 aLow;
+
+    aLow = a.low & ~ LIT64( 0x4000000000000000 );
+    return
+           ( ( a.high & 0x7FFF ) == 0x7FFF )
+        && (bits64) ( aLow<<1 )
+        && ( a.low == aLow );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point NaN `a' to the canonical NaN format.  If `a' is a signaling NaN, the
+| invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT floatx80ToCommonNaN( floatx80 a )
+{
+    commonNaNT z;
+
+    if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
+    z.sign = a.high>>15;
+    z.low = 0;
+    z.high = a.low<<1;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the extended
+| double-precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static floatx80 commonNaNToFloatx80( commonNaNT a )
+{
+    floatx80 z;
+
+    z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 );
+    z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes two extended double-precision floating-point values `a' and `b', one
+| of which is a NaN, and returns the appropriate NaN result.  If either `a' or
+| `b' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b )
+{
+    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = floatx80_is_nan( a );
+    aIsSignalingNaN = floatx80_is_signaling_nan( a );
+    bIsNaN = floatx80_is_nan( b );
+    bIsSignalingNaN = floatx80_is_signaling_nan( b );
+    a.low |= LIT64( 0xC000000000000000 );
+    b.low |= LIT64( 0xC000000000000000 );
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
+    if ( aIsNaN ) {
+        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
+    }
+    else {
+        return b;
+    }
+
+}
+
+#define EXP_BIAS 0x3FFF
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline bits64 extractFloatx80Frac( floatx80 a )
+{
+
+    return a.low;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline int32 extractFloatx80Exp( floatx80 a )
+{
+
+    return a.high & 0x7FFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the extended double-precision floating-point value
+| `a'.
+*----------------------------------------------------------------------------*/
+
+static inline flag extractFloatx80Sign( floatx80 a )
+{
+
+    return a.high>>15;
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated quadruple-precision NaN.  The `high' and
+| `low' values hold the most- and least-significant bits, respectively.
+*----------------------------------------------------------------------------*/
+#define float128_default_nan_high LIT64( 0xFFFFFFFFFFFFFFFF )
+#define float128_default_nan_low  LIT64( 0xFFFFFFFFFFFFFFFF )
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is a NaN;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag float128_is_nan( float128 a )
+{
+
+    return
+           ( LIT64( 0xFFFE000000000000 ) <= (bits64) ( a.high<<1 ) )
+        && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is a
+| signaling NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag float128_is_signaling_nan( float128 a )
+{
+
+    return
+           ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE )
+        && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point NaN
+| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT float128ToCommonNaN( float128 a )
+{
+    commonNaNT z;
+
+    if ( float128_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
+    z.sign = a.high>>63;
+    shortShift128Left( a.high, a.low, 16, &z.high, &z.low );
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the quadruple-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static float128 commonNaNToFloat128( commonNaNT a )
+{
+    float128 z;
+
+    shift128Right( a.high, a.low, 16, &z.high, &z.low );
+    z.high |= ( ( (bits64) a.sign )<<63 ) | LIT64( 0x7FFF800000000000 );
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes two quadruple-precision floating-point values `a' and `b', one of
+| which is a NaN, and returns the appropriate NaN result.  If either `a' or
+| `b' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static float128 propagateFloat128NaN( float128 a, float128 b )
+{
+    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = float128_is_nan( a );
+    aIsSignalingNaN = float128_is_signaling_nan( a );
+    bIsNaN = float128_is_nan( b );
+    bIsSignalingNaN = float128_is_signaling_nan( b );
+    a.high |= LIT64( 0x0000800000000000 );
+    b.high |= LIT64( 0x0000800000000000 );
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
+    if ( aIsNaN ) {
+        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
+    }
+    else {
+        return b;
+    }
+
+}
+
+#endif
+
diff --git a/plat/linux68k/emu/musashi/softfloat/softfloat.c b/plat/linux68k/emu/musashi/softfloat/softfloat.c
new file mode 100644
index 000000000..e2bad5963
--- /dev/null
+++ b/plat/linux68k/emu/musashi/softfloat/softfloat.c
@@ -0,0 +1,4940 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+#include "../m68kcpu.h" // which includes softfloat.h after defining the basic types
+
+/*----------------------------------------------------------------------------
+| Floating-point rounding mode, extended double-precision rounding precision,
+| and exception flags.
+*----------------------------------------------------------------------------*/
+int8 float_exception_flags = 0;
+#ifdef FLOATX80
+int8 floatx80_rounding_precision = 80;
+#endif
+
+int8 float_rounding_mode = float_round_nearest_even;
+
+/*----------------------------------------------------------------------------
+| Functions and definitions to determine:  (1) whether tininess for underflow
+| is detected before or after rounding by default, (2) what (if anything)
+| happens when exceptions are raised, (3) how signaling NaNs are distinguished
+| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
+| are propagated from function inputs to output.  These details are target-
+| specific.
+*----------------------------------------------------------------------------*/
+#include "softfloat-specialize"
+
+/*----------------------------------------------------------------------------
+| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
+| and 7, and returns the properly rounded 32-bit integer corresponding to the
+| input.  If `zSign' is 1, the input is negated before being converted to an
+| integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
+| is simply rounded to an integer, with the inexact exception raised if the
+| input cannot be represented exactly as an integer.  However, if the fixed-
+| point input is too large, the invalid exception is raised and the largest
+| positive or negative integer is returned.
+*----------------------------------------------------------------------------*/
+
+static int32 roundAndPackInt32( flag zSign, bits64 absZ )
+{
+	int8 roundingMode;
+	flag roundNearestEven;
+	int8 roundIncrement, roundBits;
+	int32 z;
+
+	roundingMode = float_rounding_mode;
+	roundNearestEven = ( roundingMode == float_round_nearest_even );
+	roundIncrement = 0x40;
+	if ( ! roundNearestEven ) {
+		if ( roundingMode == float_round_to_zero ) {
+			roundIncrement = 0;
+		}
+		else {
+			roundIncrement = 0x7F;
+			if ( zSign ) {
+				if ( roundingMode == float_round_up ) roundIncrement = 0;
+			}
+			else {
+				if ( roundingMode == float_round_down ) roundIncrement = 0;
+			}
+		}
+	}
+	roundBits = absZ & 0x7F;
+	absZ = ( absZ + roundIncrement )>>7;
+	absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
+	z = absZ;
+	if ( zSign ) z = - z;
+	if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
+		float_raise( float_flag_invalid );
+		return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+	}
+	if ( roundBits ) float_exception_flags |= float_flag_inexact;
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
+| `absZ1', with binary point between bits 63 and 64 (between the input words),
+| and returns the properly rounded 64-bit integer corresponding to the input.
+| If `zSign' is 1, the input is negated before being converted to an integer.
+| Ordinarily, the fixed-point input is simply rounded to an integer, with
+| the inexact exception raised if the input cannot be represented exactly as
+| an integer.  However, if the fixed-point input is too large, the invalid
+| exception is raised and the largest positive or negative integer is
+| returned.
+*----------------------------------------------------------------------------*/
+
+static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 )
+{
+	int8 roundingMode;
+	flag roundNearestEven, increment;
+	int64 z;
+
+	roundingMode = float_rounding_mode;
+	roundNearestEven = ( roundingMode == float_round_nearest_even );
+	increment = ( (sbits64) absZ1 < 0 );
+	if ( ! roundNearestEven ) {
+		if ( roundingMode == float_round_to_zero ) {
+			increment = 0;
+		}
+		else {
+			if ( zSign ) {
+				increment = ( roundingMode == float_round_down ) && absZ1;
+			}
+			else {
+				increment = ( roundingMode == float_round_up ) && absZ1;
+			}
+		}
+	}
+	if ( increment ) {
+		++absZ0;
+		if ( absZ0 == 0 ) goto overflow;
+		absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
+	}
+	z = absZ0;
+	if ( zSign ) z = - z;
+	if ( z && ( ( z < 0 ) ^ zSign ) ) {
+	overflow:
+		float_raise( float_flag_invalid );
+		return
+				zSign ? (sbits64) LIT64( 0x8000000000000000 )
+                     : (sbits64)( 0x7FFFFFFFFFFFFFFF );
+	}
+	if ( absZ1 ) float_exception_flags |= float_flag_inexact;
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline bits32 extractFloat32Frac( float32 a )
+{
+	return a & 0x007FFFFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline int16 extractFloat32Exp( float32 a )
+{
+	return ( a>>23 ) & 0xFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline flag extractFloat32Sign( float32 a )
+{
+	return a>>31;
+
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal single-precision floating-point value represented
+| by the denormalized significand `aSig'.  The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+static void
+	normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
+{
+	int8 shiftCount;
+
+	shiftCount = countLeadingZeros32( aSig ) - 8;
+	*zSigPtr = aSig<<shiftCount;
+	*zExpPtr = 1 - shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+| single-precision floating-point value, returning the result.  After being
+| shifted into the proper positions, the three fields are simply added
+| together to form the result.  This means that any integer portion of `zSig'
+| will be added into the exponent.  Since a properly normalized significand
+| will have an integer portion equal to 1, the `zExp' input should be 1 less
+| than the desired result exponent whenever `zSig' is a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+static inline float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
+{
+	return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper single-precision floating-
+| point value corresponding to the abstract input.  Ordinarily, the abstract
+| value is simply rounded and packed into the single-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal single-
+| precision floating-point number.
+|     The input significand `zSig' has its binary point between bits 30
+| and 29, which is 7 bits to the left of the usual location.  This shifted
+| significand must be normalized or smaller.  If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding.  In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
+{
+	int8 roundingMode;
+	flag roundNearestEven;
+	int8 roundIncrement, roundBits;
+	flag isTiny;
+
+	roundingMode = float_rounding_mode;
+	roundNearestEven = ( roundingMode == float_round_nearest_even );
+	roundIncrement = 0x40;
+	if ( ! roundNearestEven ) {
+		if ( roundingMode == float_round_to_zero ) {
+			roundIncrement = 0;
+		}
+		else {
+			roundIncrement = 0x7F;
+			if ( zSign ) {
+				if ( roundingMode == float_round_up ) roundIncrement = 0;
+			}
+			else {
+				if ( roundingMode == float_round_down ) roundIncrement = 0;
+			}
+		}
+	}
+	roundBits = zSig & 0x7F;
+	if ( 0xFD <= (bits16) zExp ) {
+		if (    ( 0xFD < zExp )
+				|| (    ( zExp == 0xFD )
+					&& ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
+			) {
+			float_raise( float_flag_overflow | float_flag_inexact );
+			return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
+		}
+		if ( zExp < 0 ) {
+			isTiny =
+					( float_detect_tininess == float_tininess_before_rounding )
+				|| ( zExp < -1 )
+				|| ( zSig + roundIncrement < 0x80000000 );
+			shift32RightJamming( zSig, - zExp, &zSig );
+			zExp = 0;
+			roundBits = zSig & 0x7F;
+			if ( isTiny && roundBits ) float_raise( float_flag_underflow );
+		}
+	}
+	if ( roundBits ) float_exception_flags |= float_flag_inexact;
+	zSig = ( zSig + roundIncrement )>>7;
+	zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
+	if ( zSig == 0 ) zExp = 0;
+	return packFloat32( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper single-precision floating-
+| point value corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
+| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
+| floating-point exponent.
+*----------------------------------------------------------------------------*/
+
+static float32
+	normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
+{
+	int8 shiftCount;
+
+	shiftCount = countLeadingZeros32( zSig ) - 1;
+	return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the double-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline bits64 extractFloat64Frac( float64 a )
+{
+	return a & LIT64( 0x000FFFFFFFFFFFFF );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the double-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline int16 extractFloat64Exp( float64 a )
+{
+	return ( a>>52 ) & 0x7FF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the double-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline flag extractFloat64Sign( float64 a )
+{
+	return a>>63;
+
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal double-precision floating-point value represented
+| by the denormalized significand `aSig'.  The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+static void
+	normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
+{
+	int8 shiftCount;
+
+	shiftCount = countLeadingZeros64( aSig ) - 11;
+	*zSigPtr = aSig<<shiftCount;
+	*zExpPtr = 1 - shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+| double-precision floating-point value, returning the result.  After being
+| shifted into the proper positions, the three fields are simply added
+| together to form the result.  This means that any integer portion of `zSig'
+| will be added into the exponent.  Since a properly normalized significand
+| will have an integer portion equal to 1, the `zExp' input should be 1 less
+| than the desired result exponent whenever `zSig' is a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+static inline float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
+{
+	return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper double-precision floating-
+| point value corresponding to the abstract input.  Ordinarily, the abstract
+| value is simply rounded and packed into the double-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded
+| to a subnormal number, and the underflow and inexact exceptions are raised
+| if the abstract input cannot be represented exactly as a subnormal double-
+| precision floating-point number.
+|     The input significand `zSig' has its binary point between bits 62
+| and 61, which is 10 bits to the left of the usual location.  This shifted
+| significand must be normalized or smaller.  If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding.  In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
+{
+	int8 roundingMode;
+	flag roundNearestEven;
+	int16 roundIncrement, roundBits;
+	flag isTiny;
+
+	roundingMode = float_rounding_mode;
+	roundNearestEven = ( roundingMode == float_round_nearest_even );
+	roundIncrement = 0x200;
+	if ( ! roundNearestEven ) {
+		if ( roundingMode == float_round_to_zero ) {
+			roundIncrement = 0;
+		}
+		else {
+			roundIncrement = 0x3FF;
+			if ( zSign ) {
+				if ( roundingMode == float_round_up ) roundIncrement = 0;
+			}
+			else {
+				if ( roundingMode == float_round_down ) roundIncrement = 0;
+			}
+		}
+	}
+	roundBits = zSig & 0x3FF;
+	if ( 0x7FD <= (bits16) zExp ) {
+		if (    ( 0x7FD < zExp )
+				|| (    ( zExp == 0x7FD )
+					&& ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
+			) {
+			float_raise( float_flag_overflow | float_flag_inexact );
+			return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
+		}
+		if ( zExp < 0 ) {
+			isTiny =
+					( float_detect_tininess == float_tininess_before_rounding )
+				|| ( zExp < -1 )
+				|| ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
+			shift64RightJamming( zSig, - zExp, &zSig );
+			zExp = 0;
+			roundBits = zSig & 0x3FF;
+			if ( isTiny && roundBits ) float_raise( float_flag_underflow );
+		}
+	}
+	if ( roundBits ) float_exception_flags |= float_flag_inexact;
+	zSig = ( zSig + roundIncrement )>>10;
+	zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
+	if ( zSig == 0 ) zExp = 0;
+	return packFloat64( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper double-precision floating-
+| point value corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
+| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
+| floating-point exponent.
+*----------------------------------------------------------------------------*/
+
+static float64
+	normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
+{
+	int8 shiftCount;
+
+	shiftCount = countLeadingZeros64( zSig ) - 1;
+	return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal extended double-precision floating-point value
+| represented by the denormalized significand `aSig'.  The normalized exponent
+| and significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+static void
+	normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
+{
+	int8 shiftCount;
+
+	shiftCount = countLeadingZeros64( aSig );
+	*zSigPtr = aSig<<shiftCount;
+	*zExpPtr = 1 - shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and extended significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input.  Ordinarily, the abstract value is
+| rounded and packed into the extended double-precision format, with the
+| inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal extended
+| double-precision floating-point number.
+|     If `roundingPrecision' is 32 or 64, the result is rounded to the same
+| number of bits as single or double precision, respectively.  Otherwise, the
+| result is rounded to the full precision of the extended double-precision
+| format.
+|     The input significand must be normalized or smaller.  If the input
+| significand is not normalized, `zExp' must be 0; in that case, the result
+| returned is a subnormal number, and it must not require rounding.  The
+| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+// roundAndPackFloatx80 is now also used in fyl2x.c
+
+/* static */ floatx80
+	roundAndPackFloatx80(
+		int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
+	)
+{
+	int8 roundingMode;
+	flag roundNearestEven, increment, isTiny;
+	int64 roundIncrement, roundMask, roundBits;
+
+	roundingMode = float_rounding_mode;
+	roundNearestEven = ( roundingMode == float_round_nearest_even );
+	if ( roundingPrecision == 80 ) goto precision80;
+	if ( roundingPrecision == 64 ) {
+		roundIncrement = LIT64( 0x0000000000000400 );
+		roundMask = LIT64( 0x00000000000007FF );
+	}
+	else if ( roundingPrecision == 32 ) {
+		roundIncrement = LIT64( 0x0000008000000000 );
+		roundMask = LIT64( 0x000000FFFFFFFFFF );
+	}
+	else {
+		goto precision80;
+	}
+	zSig0 |= ( zSig1 != 0 );
+	if ( ! roundNearestEven ) {
+		if ( roundingMode == float_round_to_zero ) {
+			roundIncrement = 0;
+		}
+		else {
+			roundIncrement = roundMask;
+			if ( zSign ) {
+				if ( roundingMode == float_round_up ) roundIncrement = 0;
+			}
+			else {
+				if ( roundingMode == float_round_down ) roundIncrement = 0;
+			}
+		}
+	}
+	roundBits = zSig0 & roundMask;
+	if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
+		if (    ( 0x7FFE < zExp )
+				|| ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
+			) {
+			goto overflow;
+		}
+		if ( zExp <= 0 ) {
+			isTiny =
+					( float_detect_tininess == float_tininess_before_rounding )
+				|| ( zExp < 0 )
+				|| ( zSig0 <= zSig0 + roundIncrement );
+			shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
+			zExp = 0;
+			roundBits = zSig0 & roundMask;
+			if ( isTiny && roundBits ) float_raise( float_flag_underflow );
+			if ( roundBits ) float_exception_flags |= float_flag_inexact;
+			zSig0 += roundIncrement;
+			if ( (sbits64) zSig0 < 0 ) zExp = 1;
+			roundIncrement = roundMask + 1;
+			if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
+				roundMask |= roundIncrement;
+			}
+			zSig0 &= ~ roundMask;
+			return packFloatx80( zSign, zExp, zSig0 );
+		}
+	}
+	if ( roundBits ) float_exception_flags |= float_flag_inexact;
+	zSig0 += roundIncrement;
+	if ( zSig0 < roundIncrement ) {
+		++zExp;
+                zSig0 = (sbits64)( 0x8000000000000000 );
+	}
+	roundIncrement = roundMask + 1;
+	if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
+		roundMask |= roundIncrement;
+	}
+	zSig0 &= ~ roundMask;
+	if ( zSig0 == 0 ) zExp = 0;
+	return packFloatx80( zSign, zExp, zSig0 );
+	precision80:
+	increment = ( (sbits64) zSig1 < 0 );
+	if ( ! roundNearestEven ) {
+		if ( roundingMode == float_round_to_zero ) {
+			increment = 0;
+		}
+		else {
+			if ( zSign ) {
+				increment = ( roundingMode == float_round_down ) && zSig1;
+			}
+			else {
+				increment = ( roundingMode == float_round_up ) && zSig1;
+			}
+		}
+	}
+	if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
+		if (    ( 0x7FFE < zExp )
+				|| (    ( zExp == 0x7FFE )
+					&& ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
+					&& increment
+				)
+			) {
+			roundMask = 0;
+	overflow:
+			float_raise( float_flag_overflow | float_flag_inexact );
+			if (    ( roundingMode == float_round_to_zero )
+					|| ( zSign && ( roundingMode == float_round_up ) )
+					|| ( ! zSign && ( roundingMode == float_round_down ) )
+				) {
+				return packFloatx80( zSign, 0x7FFE, ~ roundMask );
+			}
+			return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+		}
+		if ( zExp <= 0 ) {
+			isTiny =
+					( float_detect_tininess == float_tininess_before_rounding )
+				|| ( zExp < 0 )
+				|| ! increment
+				|| ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
+			shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
+			zExp = 0;
+			if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
+			if ( zSig1 ) float_exception_flags |= float_flag_inexact;
+			if ( roundNearestEven ) {
+				increment = ( (sbits64) zSig1 < 0 );
+			}
+			else {
+				if ( zSign ) {
+					increment = ( roundingMode == float_round_down ) && zSig1;
+				}
+				else {
+					increment = ( roundingMode == float_round_up ) && zSig1;
+				}
+			}
+			if ( increment ) {
+				++zSig0;
+				zSig0 &=
+					~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
+				if ( (sbits64) zSig0 < 0 ) zExp = 1;
+			}
+			return packFloatx80( zSign, zExp, zSig0 );
+		}
+	}
+	if ( zSig1 ) float_exception_flags |= float_flag_inexact;
+	if ( increment ) {
+		++zSig0;
+		if ( zSig0 == 0 ) {
+			++zExp;
+			zSig0 = LIT64( 0x8000000000000000 );
+		}
+		else {
+			zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
+		}
+	}
+	else {
+		if ( zSig0 == 0 ) zExp = 0;
+	}
+	return packFloatx80( zSign, zExp, zSig0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent
+| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloatx80' except that the input significand does not have to be
+| normalized.
+*----------------------------------------------------------------------------*/
+
+static floatx80
+	normalizeRoundAndPackFloatx80(
+		int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
+	)
+{
+	int8 shiftCount;
+
+	if ( zSig0 == 0 ) {
+		zSig0 = zSig1;
+		zSig1 = 0;
+		zExp -= 64;
+	}
+	shiftCount = countLeadingZeros64( zSig0 );
+	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
+	zExp -= shiftCount;
+	return
+		roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the least-significant 64 fraction bits of the quadruple-precision
+| floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline bits64 extractFloat128Frac1( float128 a )
+{
+	return a.low;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the most-significant 48 fraction bits of the quadruple-precision
+| floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline bits64 extractFloat128Frac0( float128 a )
+{
+	return a.high & LIT64( 0x0000FFFFFFFFFFFF );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the quadruple-precision floating-point value
+| `a'.
+*----------------------------------------------------------------------------*/
+
+static inline int32 extractFloat128Exp( float128 a )
+{
+	return ( a.high>>48 ) & 0x7FFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the quadruple-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline flag extractFloat128Sign( float128 a )
+{
+	return a.high>>63;
+
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal quadruple-precision floating-point value
+| represented by the denormalized significand formed by the concatenation of
+| `aSig0' and `aSig1'.  The normalized exponent is stored at the location
+| pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
+| significand are stored at the location pointed to by `zSig0Ptr', and the
+| least significant 64 bits of the normalized significand are stored at the
+| location pointed to by `zSig1Ptr'.
+*----------------------------------------------------------------------------*/
+
+static void
+	normalizeFloat128Subnormal(
+		bits64 aSig0,
+		bits64 aSig1,
+		int32 *zExpPtr,
+		bits64 *zSig0Ptr,
+		bits64 *zSig1Ptr
+	)
+{
+	int8 shiftCount;
+
+	if ( aSig0 == 0 ) {
+		shiftCount = countLeadingZeros64( aSig1 ) - 15;
+		if ( shiftCount < 0 ) {
+			*zSig0Ptr = aSig1>>( - shiftCount );
+			*zSig1Ptr = aSig1<<( shiftCount & 63 );
+		}
+		else {
+			*zSig0Ptr = aSig1<<shiftCount;
+			*zSig1Ptr = 0;
+		}
+		*zExpPtr = - shiftCount - 63;
+	}
+	else {
+		shiftCount = countLeadingZeros64( aSig0 ) - 15;
+		shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
+		*zExpPtr = 1 - shiftCount;
+	}
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a'
+| to the single-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 int32_to_float32( int32 a )
+{
+	flag zSign;
+
+	if ( a == 0 ) return 0;
+	if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
+	zSign = ( a < 0 );
+	return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a'
+| to the double-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 int32_to_float64( int32 a )
+{
+	flag zSign;
+	uint32 absA;
+	int8 shiftCount;
+	bits64 zSig;
+
+	if ( a == 0 ) return 0;
+	zSign = ( a < 0 );
+	absA = zSign ? - a : a;
+	shiftCount = countLeadingZeros32( absA ) + 21;
+	zSig = absA;
+	return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a'
+| to the extended double-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 int32_to_floatx80( int32 a )
+{
+	flag zSign;
+	uint32 absA;
+	int8 shiftCount;
+	bits64 zSig;
+
+	if ( a == 0 ) return packFloatx80( 0, 0, 0 );
+	zSign = ( a < 0 );
+	absA = zSign ? - a : a;
+	shiftCount = countLeadingZeros32( absA ) + 32;
+	zSig = absA;
+	return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a' to
+| the quadruple-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 int32_to_float128( int32 a )
+{
+	flag zSign;
+	uint32 absA;
+	int8 shiftCount;
+	bits64 zSig0;
+
+	if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
+	zSign = ( a < 0 );
+	absA = zSign ? - a : a;
+	shiftCount = countLeadingZeros32( absA ) + 17;
+	zSig0 = absA;
+	return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a'
+| to the single-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 int64_to_float32( int64 a )
+{
+	flag zSign;
+	uint64 absA;
+	int8 shiftCount;
+//    bits32 zSig;
+
+	if ( a == 0 ) return 0;
+	zSign = ( a < 0 );
+	absA = zSign ? - a : a;
+	shiftCount = countLeadingZeros64( absA ) - 40;
+	if ( 0 <= shiftCount ) {
+		return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
+	}
+	else {
+		shiftCount += 7;
+		if ( shiftCount < 0 ) {
+			shift64RightJamming( absA, - shiftCount, &absA );
+		}
+		else {
+			absA <<= shiftCount;
+		}
+		return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA );
+	}
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a'
+| to the double-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 int64_to_float64( int64 a )
+{
+	flag zSign;
+
+	if ( a == 0 ) return 0;
+	if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
+		return packFloat64( 1, 0x43E, 0 );
+	}
+	zSign = ( a < 0 );
+	return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a'
+| to the extended double-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 int64_to_floatx80( int64 a )
+{
+	flag zSign;
+	uint64 absA;
+	int8 shiftCount;
+
+	if ( a == 0 ) return packFloatx80( 0, 0, 0 );
+	zSign = ( a < 0 );
+	absA = zSign ? - a : a;
+	shiftCount = countLeadingZeros64( absA );
+	return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a' to
+| the quadruple-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 int64_to_float128( int64 a )
+{
+	flag zSign;
+	uint64 absA;
+	int8 shiftCount;
+	int32 zExp;
+	bits64 zSig0, zSig1;
+
+	if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
+	zSign = ( a < 0 );
+	absA = zSign ? - a : a;
+	shiftCount = countLeadingZeros64( absA ) + 49;
+	zExp = 0x406E - shiftCount;
+	if ( 64 <= shiftCount ) {
+		zSig1 = 0;
+		zSig0 = absA;
+		shiftCount -= 64;
+	}
+	else {
+		zSig1 = absA;
+		zSig0 = 0;
+	}
+	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
+	return packFloat128( zSign, zExp, zSig0, zSig1 );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 32-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 float32_to_int32( float32 a )
+{
+	flag aSign;
+	int16 aExp, shiftCount;
+	bits32 aSig;
+	bits64 aSig64;
+
+	aSig = extractFloat32Frac( a );
+	aExp = extractFloat32Exp( a );
+	aSign = extractFloat32Sign( a );
+	if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
+	if ( aExp ) aSig |= 0x00800000;
+	shiftCount = 0xAF - aExp;
+	aSig64 = aSig;
+	aSig64 <<= 32;
+	if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
+	return roundAndPackInt32( aSign, aSig64 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 32-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int32 float32_to_int32_round_to_zero( float32 a )
+{
+	flag aSign;
+	int16 aExp, shiftCount;
+	bits32 aSig;
+	int32 z;
+
+	aSig = extractFloat32Frac( a );
+	aExp = extractFloat32Exp( a );
+	aSign = extractFloat32Sign( a );
+	shiftCount = aExp - 0x9E;
+	if ( 0 <= shiftCount ) {
+		if ( a != 0xCF000000 ) {
+			float_raise( float_flag_invalid );
+			if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
+		}
+		return (sbits32) 0x80000000;
+	}
+	else if ( aExp <= 0x7E ) {
+		if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
+		return 0;
+	}
+	aSig = ( aSig | 0x00800000 )<<8;
+	z = aSig>>( - shiftCount );
+	if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
+		float_exception_flags |= float_flag_inexact;
+	}
+	if ( aSign ) z = - z;
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 64-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 float32_to_int64( float32 a )
+{
+	flag aSign;
+	int16 aExp, shiftCount;
+	bits32 aSig;
+	bits64 aSig64, aSigExtra;
+
+	aSig = extractFloat32Frac( a );
+	aExp = extractFloat32Exp( a );
+	aSign = extractFloat32Sign( a );
+	shiftCount = 0xBE - aExp;
+	if ( shiftCount < 0 ) {
+		float_raise( float_flag_invalid );
+		if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
+			return LIT64( 0x7FFFFFFFFFFFFFFF );
+		}
+		return (sbits64) LIT64( 0x8000000000000000 );
+	}
+	if ( aExp ) aSig |= 0x00800000;
+	aSig64 = aSig;
+	aSig64 <<= 40;
+	shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
+	return roundAndPackInt64( aSign, aSig64, aSigExtra );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 64-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.  If
+| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
+| conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int64 float32_to_int64_round_to_zero( float32 a )
+{
+	flag aSign;
+	int16 aExp, shiftCount;
+	bits32 aSig;
+	bits64 aSig64;
+	int64 z;
+
+	aSig = extractFloat32Frac( a );
+	aExp = extractFloat32Exp( a );
+	aSign = extractFloat32Sign( a );
+	shiftCount = aExp - 0xBE;
+	if ( 0 <= shiftCount ) {
+		if ( a != 0xDF000000 ) {
+			float_raise( float_flag_invalid );
+			if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
+				return LIT64( 0x7FFFFFFFFFFFFFFF );
+			}
+		}
+		return (sbits64) LIT64( 0x8000000000000000 );
+	}
+	else if ( aExp <= 0x7E ) {
+		if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
+		return 0;
+	}
+	aSig64 = aSig | 0x00800000;
+	aSig64 <<= 40;
+	z = aSig64>>( - shiftCount );
+	if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
+		float_exception_flags |= float_flag_inexact;
+	}
+	if ( aSign ) z = - z;
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the double-precision floating-point format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float32_to_float64( float32 a )
+{
+	flag aSign;
+	int16 aExp;
+	bits32 aSig;
+
+	aSig = extractFloat32Frac( a );
+	aExp = extractFloat32Exp( a );
+	aSign = extractFloat32Sign( a );
+	if ( aExp == 0xFF ) {
+		if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
+		return packFloat64( aSign, 0x7FF, 0 );
+	}
+	if ( aExp == 0 ) {
+		if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
+		normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+		--aExp;
+	}
+	return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the extended double-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 float32_to_floatx80( float32 a )
+{
+	flag aSign;
+	int16 aExp;
+	bits32 aSig;
+
+	aSig = extractFloat32Frac( a );
+	aExp = extractFloat32Exp( a );
+	aSign = extractFloat32Sign( a );
+	if ( aExp == 0xFF ) {
+		if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
+		return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+	}
+	if ( aExp == 0 ) {
+		if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
+		normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+	}
+	aSig |= 0x00800000;
+	return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the double-precision floating-point format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float32_to_float128( float32 a )
+{
+	flag aSign;
+	int16 aExp;
+	bits32 aSig;
+
+	aSig = extractFloat32Frac( a );
+	aExp = extractFloat32Exp( a );
+	aSign = extractFloat32Sign( a );
+	if ( aExp == 0xFF ) {
+		if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) );
+		return packFloat128( aSign, 0x7FFF, 0, 0 );
+	}
+	if ( aExp == 0 ) {
+		if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
+		normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+		--aExp;
+	}
+	return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Rounds the single-precision floating-point value `a' to an integer, and
+| returns the result as a single-precision floating-point value.  The
+| operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_round_to_int( float32 a )
+{
+	flag aSign;
+	int16 aExp;
+	bits32 lastBitMask, roundBitsMask;
+	int8 roundingMode;
+	float32 z;
+
+	aExp = extractFloat32Exp( a );
+	if ( 0x96 <= aExp ) {
+		if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
+			return propagateFloat32NaN( a, a );
+		}
+		return a;
+	}
+	if ( aExp <= 0x7E ) {
+		if ( (bits32) ( a<<1 ) == 0 ) return a;
+		float_exception_flags |= float_flag_inexact;
+		aSign = extractFloat32Sign( a );
+		switch ( float_rounding_mode ) {
+			case float_round_nearest_even:
+			if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
+				return packFloat32( aSign, 0x7F, 0 );
+			}
+			break;
+			case float_round_down:
+			return aSign ? 0xBF800000 : 0;
+			case float_round_up:
+			return aSign ? 0x80000000 : 0x3F800000;
+		}
+		return packFloat32( aSign, 0, 0 );
+	}
+	lastBitMask = 1;
+	lastBitMask <<= 0x96 - aExp;
+	roundBitsMask = lastBitMask - 1;
+	z = a;
+	roundingMode = float_rounding_mode;
+	if ( roundingMode == float_round_nearest_even ) {
+		z += lastBitMask>>1;
+		if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
+	}
+	else if ( roundingMode != float_round_to_zero ) {
+		if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
+			z += roundBitsMask;
+		}
+	}
+	z &= ~ roundBitsMask;
+	if ( z != a ) float_exception_flags |= float_flag_inexact;
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the single-precision
+| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
+| before being returned.  `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
+{
+	int16 aExp, bExp, zExp;
+	bits32 aSig, bSig, zSig;
+	int16 expDiff;
+
+	aSig = extractFloat32Frac( a );
+	aExp = extractFloat32Exp( a );
+	bSig = extractFloat32Frac( b );
+	bExp = extractFloat32Exp( b );
+	expDiff = aExp - bExp;
+	aSig <<= 6;
+	bSig <<= 6;
+	if ( 0 < expDiff ) {
+		if ( aExp == 0xFF ) {
+			if ( aSig ) return propagateFloat32NaN( a, b );
+			return a;
+		}
+		if ( bExp == 0 ) {
+			--expDiff;
+		}
+		else {
+			bSig |= 0x20000000;
+		}
+		shift32RightJamming( bSig, expDiff, &bSig );
+		zExp = aExp;
+	}
+	else if ( expDiff < 0 ) {
+		if ( bExp == 0xFF ) {
+			if ( bSig ) return propagateFloat32NaN( a, b );
+			return packFloat32( zSign, 0xFF, 0 );
+		}
+		if ( aExp == 0 ) {
+			++expDiff;
+		}
+		else {
+			aSig |= 0x20000000;
+		}
+		shift32RightJamming( aSig, - expDiff, &aSig );
+		zExp = bExp;
+	}
+	else {
+		if ( aExp == 0xFF ) {
+			if ( aSig | bSig ) return propagateFloat32NaN( a, b );
+			return a;
+		}
+		if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
+		zSig = 0x40000000 + aSig + bSig;
+		zExp = aExp;
+		goto roundAndPack;
+	}
+	aSig |= 0x20000000;
+	zSig = ( aSig + bSig )<<1;
+	--zExp;
+	if ( (sbits32) zSig < 0 ) {
+		zSig = aSig + bSig;
+		++zExp;
+	}
+	roundAndPack:
+	return roundAndPackFloat32( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the single-
+| precision floating-point values `a' and `b'.  If `zSign' is 1, the
+| difference is negated before being returned.  `zSign' is ignored if the
+| result is a NaN.  The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
+{
+	int16 aExp, bExp, zExp;
+	bits32 aSig, bSig, zSig;
+	int16 expDiff;
+
+	aSig = extractFloat32Frac( a );
+	aExp = extractFloat32Exp( a );
+	bSig = extractFloat32Frac( b );
+	bExp = extractFloat32Exp( b );
+	expDiff = aExp - bExp;
+	aSig <<= 7;
+	bSig <<= 7;
+	if ( 0 < expDiff ) goto aExpBigger;
+	if ( expDiff < 0 ) goto bExpBigger;
+	if ( aExp == 0xFF ) {
+		if ( aSig | bSig ) return propagateFloat32NaN( a, b );
+		float_raise( float_flag_invalid );
+		return float32_default_nan;
+	}
+	if ( aExp == 0 ) {
+		aExp = 1;
+		bExp = 1;
+	}
+	if ( bSig < aSig ) goto aBigger;
+	if ( aSig < bSig ) goto bBigger;
+	return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
+	bExpBigger:
+	if ( bExp == 0xFF ) {
+		if ( bSig ) return propagateFloat32NaN( a, b );
+		return packFloat32( zSign ^ 1, 0xFF, 0 );
+	}
+	if ( aExp == 0 ) {
+		++expDiff;
+	}
+	else {
+		aSig |= 0x40000000;
+	}
+	shift32RightJamming( aSig, - expDiff, &aSig );
+	bSig |= 0x40000000;
+	bBigger:
+	zSig = bSig - aSig;
+	zExp = bExp;
+	zSign ^= 1;
+	goto normalizeRoundAndPack;
+	aExpBigger:
+	if ( aExp == 0xFF ) {
+		if ( aSig ) return propagateFloat32NaN( a, b );
+		return a;
+	}
+	if ( bExp == 0 ) {
+		--expDiff;
+	}
+	else {
+		bSig |= 0x40000000;
+	}
+	shift32RightJamming( bSig, expDiff, &bSig );
+	aSig |= 0x40000000;
+	aBigger:
+	zSig = aSig - bSig;
+	zExp = aExp;
+	normalizeRoundAndPack:
+	--zExp;
+	return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the single-precision floating-point values `a'
+| and `b'.  The operation is performed according to the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_add( float32 a, float32 b )
+{
+	flag aSign, bSign;
+
+	aSign = extractFloat32Sign( a );
+	bSign = extractFloat32Sign( b );
+	if ( aSign == bSign ) {
+		return addFloat32Sigs( a, b, aSign );
+	}
+	else {
+		return subFloat32Sigs( a, b, aSign );
+	}
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the single-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_sub( float32 a, float32 b )
+{
+	flag aSign, bSign;
+
+	aSign = extractFloat32Sign( a );
+	bSign = extractFloat32Sign( b );
+	if ( aSign == bSign ) {
+		return subFloat32Sigs( a, b, aSign );
+	}
+	else {
+		return addFloat32Sigs( a, b, aSign );
+	}
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the single-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_mul( float32 a, float32 b )
+{
+	flag aSign, bSign, zSign;
+	int16 aExp, bExp, zExp;
+	bits32 aSig, bSig;
+	bits64 zSig64;
+	bits32 zSig;
+
+	aSig = extractFloat32Frac( a );
+	aExp = extractFloat32Exp( a );
+	aSign = extractFloat32Sign( a );
+	bSig = extractFloat32Frac( b );
+	bExp = extractFloat32Exp( b );
+	bSign = extractFloat32Sign( b );
+	zSign = aSign ^ bSign;
+	if ( aExp == 0xFF ) {
+		if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
+			return propagateFloat32NaN( a, b );
+		}
+		if ( ( bExp | bSig ) == 0 ) {
+			float_raise( float_flag_invalid );
+			return float32_default_nan;
+		}
+		return packFloat32( zSign, 0xFF, 0 );
+	}
+	if ( bExp == 0xFF ) {
+		if ( bSig ) return propagateFloat32NaN( a, b );
+		if ( ( aExp | aSig ) == 0 ) {
+			float_raise( float_flag_invalid );
+			return float32_default_nan;
+		}
+		return packFloat32( zSign, 0xFF, 0 );
+	}
+	if ( aExp == 0 ) {
+		if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
+		normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+	}
+	if ( bExp == 0 ) {
+		if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
+		normalizeFloat32Subnormal( bSig, &bExp, &bSig );
+	}
+	zExp = aExp + bExp - 0x7F;
+	aSig = ( aSig | 0x00800000 )<<7;
+	bSig = ( bSig | 0x00800000 )<<8;
+	shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
+	zSig = zSig64;
+	if ( 0 <= (sbits32) ( zSig<<1 ) ) {
+		zSig <<= 1;
+		--zExp;
+	}
+	return roundAndPackFloat32( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the single-precision floating-point value `a'
+| by the corresponding value `b'.  The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_div( float32 a, float32 b )
+{
+	flag aSign, bSign, zSign;
+	int16 aExp, bExp, zExp;
+	bits32 aSig, bSig, zSig;
+
+	aSig = extractFloat32Frac( a );
+	aExp = extractFloat32Exp( a );
+	aSign = extractFloat32Sign( a );
+	bSig = extractFloat32Frac( b );
+	bExp = extractFloat32Exp( b );
+	bSign = extractFloat32Sign( b );
+	zSign = aSign ^ bSign;
+	if ( aExp == 0xFF ) {
+		if ( aSig ) return propagateFloat32NaN( a, b );
+		if ( bExp == 0xFF ) {
+			if ( bSig ) return propagateFloat32NaN( a, b );
+			float_raise( float_flag_invalid );
+			return float32_default_nan;
+		}
+		return packFloat32( zSign, 0xFF, 0 );
+	}
+	if ( bExp == 0xFF ) {
+		if ( bSig ) return propagateFloat32NaN( a, b );
+		return packFloat32( zSign, 0, 0 );
+	}
+	if ( bExp == 0 ) {
+		if ( bSig == 0 ) {
+			if ( ( aExp | aSig ) == 0 ) {
+				float_raise( float_flag_invalid );
+				return float32_default_nan;
+			}
+			float_raise( float_flag_divbyzero );
+			return packFloat32( zSign, 0xFF, 0 );
+		}
+		normalizeFloat32Subnormal( bSig, &bExp, &bSig );
+	}
+	if ( aExp == 0 ) {
+		if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
+		normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+	}
+	zExp = aExp - bExp + 0x7D;
+	aSig = ( aSig | 0x00800000 )<<7;
+	bSig = ( bSig | 0x00800000 )<<8;
+	if ( bSig <= ( aSig + aSig ) ) {
+		aSig >>= 1;
+		++zExp;
+	}
+	zSig = ( ( (bits64) aSig )<<32 ) / bSig;
+	if ( ( zSig & 0x3F ) == 0 ) {
+		zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
+	}
+	return roundAndPackFloat32( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the single-precision floating-point value `a'
+| with respect to the corresponding value `b'.  The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_rem( float32 a, float32 b )
+{
+	flag aSign, zSign;
+	int16 aExp, bExp, expDiff;
+	bits32 aSig, bSig;
+	bits32 q;
+	bits64 aSig64, bSig64, q64;
+	bits32 alternateASig;
+	sbits32 sigMean;
+
+	aSig = extractFloat32Frac( a );
+	aExp = extractFloat32Exp( a );
+	aSign = extractFloat32Sign( a );
+	bSig = extractFloat32Frac( b );
+	bExp = extractFloat32Exp( b );
+//    bSign = extractFloat32Sign( b );
+	if ( aExp == 0xFF ) {
+		if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
+			return propagateFloat32NaN( a, b );
+		}
+		float_raise( float_flag_invalid );
+		return float32_default_nan;
+	}
+	if ( bExp == 0xFF ) {
+		if ( bSig ) return propagateFloat32NaN( a, b );
+		return a;
+	}
+	if ( bExp == 0 ) {
+		if ( bSig == 0 ) {
+			float_raise( float_flag_invalid );
+			return float32_default_nan;
+		}
+		normalizeFloat32Subnormal( bSig, &bExp, &bSig );
+	}
+	if ( aExp == 0 ) {
+		if ( aSig == 0 ) return a;
+		normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+	}
+	expDiff = aExp - bExp;
+	aSig |= 0x00800000;
+	bSig |= 0x00800000;
+	if ( expDiff < 32 ) {
+		aSig <<= 8;
+		bSig <<= 8;
+		if ( expDiff < 0 ) {
+			if ( expDiff < -1 ) return a;
+			aSig >>= 1;
+		}
+		q = ( bSig <= aSig );
+		if ( q ) aSig -= bSig;
+		if ( 0 < expDiff ) {
+			q = ( ( (bits64) aSig )<<32 ) / bSig;
+			q >>= 32 - expDiff;
+			bSig >>= 2;
+			aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
+		}
+		else {
+			aSig >>= 2;
+			bSig >>= 2;
+		}
+	}
+	else {
+		if ( bSig <= aSig ) aSig -= bSig;
+		aSig64 = ( (bits64) aSig )<<40;
+		bSig64 = ( (bits64) bSig )<<40;
+		expDiff -= 64;
+		while ( 0 < expDiff ) {
+			q64 = estimateDiv128To64( aSig64, 0, bSig64 );
+			q64 = ( 2 < q64 ) ? q64 - 2 : 0;
+			aSig64 = - ( ( bSig * q64 )<<38 );
+			expDiff -= 62;
+		}
+		expDiff += 64;
+		q64 = estimateDiv128To64( aSig64, 0, bSig64 );
+		q64 = ( 2 < q64 ) ? q64 - 2 : 0;
+		q = q64>>( 64 - expDiff );
+		bSig <<= 6;
+		aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
+	}
+	do {
+		alternateASig = aSig;
+		++q;
+		aSig -= bSig;
+	} while ( 0 <= (sbits32) aSig );
+	sigMean = aSig + alternateASig;
+	if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
+		aSig = alternateASig;
+	}
+	zSign = ( (sbits32) aSig < 0 );
+	if ( zSign ) aSig = - aSig;
+	return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the single-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_sqrt( float32 a )
+{
+	flag aSign;
+	int16 aExp, zExp;
+	bits32 aSig, zSig;
+	bits64 rem, term;
+
+	aSig = extractFloat32Frac( a );
+	aExp = extractFloat32Exp( a );
+	aSign = extractFloat32Sign( a );
+	if ( aExp == 0xFF ) {
+		if ( aSig ) return propagateFloat32NaN( a, 0 );
+		if ( ! aSign ) return a;
+		float_raise( float_flag_invalid );
+		return float32_default_nan;
+	}
+	if ( aSign ) {
+		if ( ( aExp | aSig ) == 0 ) return a;
+		float_raise( float_flag_invalid );
+		return float32_default_nan;
+	}
+	if ( aExp == 0 ) {
+		if ( aSig == 0 ) return 0;
+		normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+	}
+	zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
+	aSig = ( aSig | 0x00800000 )<<8;
+	zSig = estimateSqrt32( aExp, aSig ) + 2;
+	if ( ( zSig & 0x7F ) <= 5 ) {
+		if ( zSig < 2 ) {
+			zSig = 0x7FFFFFFF;
+			goto roundAndPack;
+		}
+		aSig >>= aExp & 1;
+		term = ( (bits64) zSig ) * zSig;
+		rem = ( ( (bits64) aSig )<<32 ) - term;
+		while ( (sbits64) rem < 0 ) {
+			--zSig;
+			rem += ( ( (bits64) zSig )<<1 ) | 1;
+		}
+		zSig |= ( rem != 0 );
+	}
+	shift32RightJamming( zSig, 1, &zSig );
+	roundAndPack:
+	return roundAndPackFloat32( 0, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float32_eq( float32 a, float32 b )
+{
+	if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+			|| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+		) {
+		if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+			float_raise( float_flag_invalid );
+		}
+		return 0;
+	}
+	return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is less than
+| or equal to the corresponding value `b', and 0 otherwise.  The comparison
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float32_le( float32 a, float32 b )
+{
+	flag aSign, bSign;
+
+	if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+			|| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+		) {
+		float_raise( float_flag_invalid );
+		return 0;
+	}
+	aSign = extractFloat32Sign( a );
+	bSign = extractFloat32Sign( b );
+	if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
+	return ( a == b ) || ( aSign ^ ( a < b ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float32_lt( float32 a, float32 b )
+{
+	flag aSign, bSign;
+
+	if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+			|| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+		) {
+		float_raise( float_flag_invalid );
+		return 0;
+	}
+	aSign = extractFloat32Sign( a );
+	bSign = extractFloat32Sign( b );
+	if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
+	return ( a != b ) && ( aSign ^ ( a < b ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise.  The invalid exception is
+| raised if either operand is a NaN.  Otherwise, the comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float32_eq_signaling( float32 a, float32 b )
+{
+	if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+			|| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+		) {
+		float_raise( float_flag_invalid );
+		return 0;
+	}
+	return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is less than or
+| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
+| cause an exception.  Otherwise, the comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float32_le_quiet( float32 a, float32 b )
+{
+	flag aSign, bSign;
+//    int16 aExp, bExp;
+
+	if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+			|| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+		) {
+		if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+			float_raise( float_flag_invalid );
+		}
+		return 0;
+	}
+	aSign = extractFloat32Sign( a );
+	bSign = extractFloat32Sign( b );
+	if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
+	return ( a == b ) || ( aSign ^ ( a < b ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
+| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float32_lt_quiet( float32 a, float32 b )
+{
+	flag aSign, bSign;
+
+	if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+			|| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+		) {
+		if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+			float_raise( float_flag_invalid );
+		}
+		return 0;
+	}
+	aSign = extractFloat32Sign( a );
+	bSign = extractFloat32Sign( b );
+	if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
+	return ( a != b ) && ( aSign ^ ( a < b ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 32-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 float64_to_int32( float64 a )
+{
+	flag aSign;
+	int16 aExp, shiftCount;
+	bits64 aSig;
+
+	aSig = extractFloat64Frac( a );
+	aExp = extractFloat64Exp( a );
+	aSign = extractFloat64Sign( a );
+	if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
+	if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
+	shiftCount = 0x42C - aExp;
+	if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
+	return roundAndPackInt32( aSign, aSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 32-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int32 float64_to_int32_round_to_zero( float64 a )
+{
+	flag aSign;
+	int16 aExp, shiftCount;
+	bits64 aSig, savedASig;
+	int32 z;
+
+	aSig = extractFloat64Frac( a );
+	aExp = extractFloat64Exp( a );
+	aSign = extractFloat64Sign( a );
+	if ( 0x41E < aExp ) {
+		if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
+		goto invalid;
+	}
+	else if ( aExp < 0x3FF ) {
+		if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
+		return 0;
+	}
+	aSig |= LIT64( 0x0010000000000000 );
+	shiftCount = 0x433 - aExp;
+	savedASig = aSig;
+	aSig >>= shiftCount;
+	z = aSig;
+	if ( aSign ) z = - z;
+	if ( ( z < 0 ) ^ aSign ) {
+	invalid:
+		float_raise( float_flag_invalid );
+		return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+	}
+	if ( ( aSig<<shiftCount ) != savedASig ) {
+		float_exception_flags |= float_flag_inexact;
+	}
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 64-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 float64_to_int64( float64 a )
+{
+	flag aSign;
+	int16 aExp, shiftCount;
+	bits64 aSig, aSigExtra;
+
+	aSig = extractFloat64Frac( a );
+	aExp = extractFloat64Exp( a );
+	aSign = extractFloat64Sign( a );
+	if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
+	shiftCount = 0x433 - aExp;
+	if ( shiftCount <= 0 ) {
+		if ( 0x43E < aExp ) {
+			float_raise( float_flag_invalid );
+			if (    ! aSign
+					|| (    ( aExp == 0x7FF )
+						&& ( aSig != LIT64( 0x0010000000000000 ) ) )
+				) {
+				return LIT64( 0x7FFFFFFFFFFFFFFF );
+			}
+			return (sbits64) LIT64( 0x8000000000000000 );
+		}
+		aSigExtra = 0;
+		aSig <<= - shiftCount;
+	}
+	else {
+		shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
+	}
+	return roundAndPackInt64( aSign, aSig, aSigExtra );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 64-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int64 float64_to_int64_round_to_zero( float64 a )
+{
+	flag aSign;
+	int16 aExp, shiftCount;
+	bits64 aSig;
+	int64 z;
+
+	aSig = extractFloat64Frac( a );
+	aExp = extractFloat64Exp( a );
+	aSign = extractFloat64Sign( a );
+	if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
+	shiftCount = aExp - 0x433;
+	if ( 0 <= shiftCount ) {
+		if ( 0x43E <= aExp ) {
+			if ( a != LIT64( 0xC3E0000000000000 ) ) {
+				float_raise( float_flag_invalid );
+				if (    ! aSign
+						|| (    ( aExp == 0x7FF )
+							&& ( aSig != LIT64( 0x0010000000000000 ) ) )
+					) {
+					return LIT64( 0x7FFFFFFFFFFFFFFF );
+				}
+			}
+			return (sbits64) LIT64( 0x8000000000000000 );
+		}
+		z = aSig<<shiftCount;
+	}
+	else {
+		if ( aExp < 0x3FE ) {
+			if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
+			return 0;
+		}
+		z = aSig>>( - shiftCount );
+		if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
+			float_exception_flags |= float_flag_inexact;
+		}
+	}
+	if ( aSign ) z = - z;
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the single-precision floating-point format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float64_to_float32( float64 a )
+{
+	flag aSign;
+	int16 aExp;
+	bits64 aSig;
+	bits32 zSig;
+
+	aSig = extractFloat64Frac( a );
+	aExp = extractFloat64Exp( a );
+	aSign = extractFloat64Sign( a );
+	if ( aExp == 0x7FF ) {
+		if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
+		return packFloat32( aSign, 0xFF, 0 );
+	}
+	shift64RightJamming( aSig, 22, &aSig );
+	zSig = aSig;
+	if ( aExp || zSig ) {
+		zSig |= 0x40000000;
+		aExp -= 0x381;
+	}
+	return roundAndPackFloat32( aSign, aExp, zSig );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the extended double-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 float64_to_floatx80( float64 a )
+{
+	flag aSign;
+	int16 aExp;
+	bits64 aSig;
+
+	aSig = extractFloat64Frac( a );
+	aExp = extractFloat64Exp( a );
+	aSign = extractFloat64Sign( a );
+	if ( aExp == 0x7FF ) {
+		if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
+		return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+	}
+	if ( aExp == 0 ) {
+		if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
+		normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+	}
+	return
+		packFloatx80(
+			aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the quadruple-precision floating-point format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float64_to_float128( float64 a )
+{
+	flag aSign;
+	int16 aExp;
+	bits64 aSig, zSig0, zSig1;
+
+	aSig = extractFloat64Frac( a );
+	aExp = extractFloat64Exp( a );
+	aSign = extractFloat64Sign( a );
+	if ( aExp == 0x7FF ) {
+		if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) );
+		return packFloat128( aSign, 0x7FFF, 0, 0 );
+	}
+	if ( aExp == 0 ) {
+		if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
+		normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+		--aExp;
+	}
+	shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
+	return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Rounds the double-precision floating-point value `a' to an integer, and
+| returns the result as a double-precision floating-point value.  The
+| operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_round_to_int( float64 a )
+{
+	flag aSign;
+	int16 aExp;
+	bits64 lastBitMask, roundBitsMask;
+	int8 roundingMode;
+	float64 z;
+
+	aExp = extractFloat64Exp( a );
+	if ( 0x433 <= aExp ) {
+		if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
+			return propagateFloat64NaN( a, a );
+		}
+		return a;
+	}
+	if ( aExp < 0x3FF ) {
+		if ( (bits64) ( a<<1 ) == 0 ) return a;
+		float_exception_flags |= float_flag_inexact;
+		aSign = extractFloat64Sign( a );
+		switch ( float_rounding_mode ) {
+			case float_round_nearest_even:
+			if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
+				return packFloat64( aSign, 0x3FF, 0 );
+			}
+			break;
+			case float_round_down:
+			return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
+			case float_round_up:
+			return
+			aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
+		}
+		return packFloat64( aSign, 0, 0 );
+	}
+	lastBitMask = 1;
+	lastBitMask <<= 0x433 - aExp;
+	roundBitsMask = lastBitMask - 1;
+	z = a;
+	roundingMode = float_rounding_mode;
+	if ( roundingMode == float_round_nearest_even ) {
+		z += lastBitMask>>1;
+		if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
+	}
+	else if ( roundingMode != float_round_to_zero ) {
+		if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
+			z += roundBitsMask;
+		}
+	}
+	z &= ~ roundBitsMask;
+	if ( z != a ) float_exception_flags |= float_flag_inexact;
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the double-precision
+| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
+| before being returned.  `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
+{
+	int16 aExp, bExp, zExp;
+	bits64 aSig, bSig, zSig;
+	int16 expDiff;
+
+	aSig = extractFloat64Frac( a );
+	aExp = extractFloat64Exp( a );
+	bSig = extractFloat64Frac( b );
+	bExp = extractFloat64Exp( b );
+	expDiff = aExp - bExp;
+	aSig <<= 9;
+	bSig <<= 9;
+	if ( 0 < expDiff ) {
+		if ( aExp == 0x7FF ) {
+			if ( aSig ) return propagateFloat64NaN( a, b );
+			return a;
+		}
+		if ( bExp == 0 ) {
+			--expDiff;
+		}
+		else {
+			bSig |= LIT64( 0x2000000000000000 );
+		}
+		shift64RightJamming( bSig, expDiff, &bSig );
+		zExp = aExp;
+	}
+	else if ( expDiff < 0 ) {
+		if ( bExp == 0x7FF ) {
+			if ( bSig ) return propagateFloat64NaN( a, b );
+			return packFloat64( zSign, 0x7FF, 0 );
+		}
+		if ( aExp == 0 ) {
+			++expDiff;
+		}
+		else {
+			aSig |= LIT64( 0x2000000000000000 );
+		}
+		shift64RightJamming( aSig, - expDiff, &aSig );
+		zExp = bExp;
+	}
+	else {
+		if ( aExp == 0x7FF ) {
+			if ( aSig | bSig ) return propagateFloat64NaN( a, b );
+			return a;
+		}
+		if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
+		zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
+		zExp = aExp;
+		goto roundAndPack;
+	}
+	aSig |= LIT64( 0x2000000000000000 );
+	zSig = ( aSig + bSig )<<1;
+	--zExp;
+	if ( (sbits64) zSig < 0 ) {
+		zSig = aSig + bSig;
+		++zExp;
+	}
+	roundAndPack:
+	return roundAndPackFloat64( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the double-
+| precision floating-point values `a' and `b'.  If `zSign' is 1, the
+| difference is negated before being returned.  `zSign' is ignored if the
+| result is a NaN.  The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
+{
+	int16 aExp, bExp, zExp;
+	bits64 aSig, bSig, zSig;
+	int16 expDiff;
+
+	aSig = extractFloat64Frac( a );
+	aExp = extractFloat64Exp( a );
+	bSig = extractFloat64Frac( b );
+	bExp = extractFloat64Exp( b );
+	expDiff = aExp - bExp;
+	aSig <<= 10;
+	bSig <<= 10;
+	if ( 0 < expDiff ) goto aExpBigger;
+	if ( expDiff < 0 ) goto bExpBigger;
+	if ( aExp == 0x7FF ) {
+		if ( aSig | bSig ) return propagateFloat64NaN( a, b );
+		float_raise( float_flag_invalid );
+		return float64_default_nan;
+	}
+	if ( aExp == 0 ) {
+		aExp = 1;
+		bExp = 1;
+	}
+	if ( bSig < aSig ) goto aBigger;
+	if ( aSig < bSig ) goto bBigger;
+	return packFloat64( float_rounding_mode == float_round_down, 0, 0 );
+	bExpBigger:
+	if ( bExp == 0x7FF ) {
+		if ( bSig ) return propagateFloat64NaN( a, b );
+		return packFloat64( zSign ^ 1, 0x7FF, 0 );
+	}
+	if ( aExp == 0 ) {
+		++expDiff;
+	}
+	else {
+		aSig |= LIT64( 0x4000000000000000 );
+	}
+	shift64RightJamming( aSig, - expDiff, &aSig );
+	bSig |= LIT64( 0x4000000000000000 );
+	bBigger:
+	zSig = bSig - aSig;
+	zExp = bExp;
+	zSign ^= 1;
+	goto normalizeRoundAndPack;
+	aExpBigger:
+	if ( aExp == 0x7FF ) {
+		if ( aSig ) return propagateFloat64NaN( a, b );
+		return a;
+	}
+	if ( bExp == 0 ) {
+		--expDiff;
+	}
+	else {
+		bSig |= LIT64( 0x4000000000000000 );
+	}
+	shift64RightJamming( bSig, expDiff, &bSig );
+	aSig |= LIT64( 0x4000000000000000 );
+	aBigger:
+	zSig = aSig - bSig;
+	zExp = aExp;
+	normalizeRoundAndPack:
+	--zExp;
+	return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the double-precision floating-point values `a'
+| and `b'.  The operation is performed according to the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_add( float64 a, float64 b )
+{
+	flag aSign, bSign;
+
+	aSign = extractFloat64Sign( a );
+	bSign = extractFloat64Sign( b );
+	if ( aSign == bSign ) {
+		return addFloat64Sigs( a, b, aSign );
+	}
+	else {
+		return subFloat64Sigs( a, b, aSign );
+	}
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the double-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_sub( float64 a, float64 b )
+{
+	flag aSign, bSign;
+
+	aSign = extractFloat64Sign( a );
+	bSign = extractFloat64Sign( b );
+	if ( aSign == bSign ) {
+		return subFloat64Sigs( a, b, aSign );
+	}
+	else {
+		return addFloat64Sigs( a, b, aSign );
+	}
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the double-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_mul( float64 a, float64 b )
+{
+	flag aSign, bSign, zSign;
+	int16 aExp, bExp, zExp;
+	bits64 aSig, bSig, zSig0, zSig1;
+
+	aSig = extractFloat64Frac( a );
+	aExp = extractFloat64Exp( a );
+	aSign = extractFloat64Sign( a );
+	bSig = extractFloat64Frac( b );
+	bExp = extractFloat64Exp( b );
+	bSign = extractFloat64Sign( b );
+	zSign = aSign ^ bSign;
+	if ( aExp == 0x7FF ) {
+		if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
+			return propagateFloat64NaN( a, b );
+		}
+		if ( ( bExp | bSig ) == 0 ) {
+			float_raise( float_flag_invalid );
+			return float64_default_nan;
+		}
+		return packFloat64( zSign, 0x7FF, 0 );
+	}
+	if ( bExp == 0x7FF ) {
+		if ( bSig ) return propagateFloat64NaN( a, b );
+		if ( ( aExp | aSig ) == 0 ) {
+			float_raise( float_flag_invalid );
+			return float64_default_nan;
+		}
+		return packFloat64( zSign, 0x7FF, 0 );
+	}
+	if ( aExp == 0 ) {
+		if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
+		normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+	}
+	if ( bExp == 0 ) {
+		if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
+		normalizeFloat64Subnormal( bSig, &bExp, &bSig );
+	}
+	zExp = aExp + bExp - 0x3FF;
+	aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
+	bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
+	mul64To128( aSig, bSig, &zSig0, &zSig1 );
+	zSig0 |= ( zSig1 != 0 );
+	if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
+		zSig0 <<= 1;
+		--zExp;
+	}
+	return roundAndPackFloat64( zSign, zExp, zSig0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the double-precision floating-point value `a'
+| by the corresponding value `b'.  The operation is performed according to
+| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_div( float64 a, float64 b )
+{
+	flag aSign, bSign, zSign;
+	int16 aExp, bExp, zExp;
+	bits64 aSig, bSig, zSig;
+	bits64 rem0, rem1;
+	bits64 term0, term1;
+
+	aSig = extractFloat64Frac( a );
+	aExp = extractFloat64Exp( a );
+	aSign = extractFloat64Sign( a );
+	bSig = extractFloat64Frac( b );
+	bExp = extractFloat64Exp( b );
+	bSign = extractFloat64Sign( b );
+	zSign = aSign ^ bSign;
+	if ( aExp == 0x7FF ) {
+		if ( aSig ) return propagateFloat64NaN( a, b );
+		if ( bExp == 0x7FF ) {
+			if ( bSig ) return propagateFloat64NaN( a, b );
+			float_raise( float_flag_invalid );
+			return float64_default_nan;
+		}
+		return packFloat64( zSign, 0x7FF, 0 );
+	}
+	if ( bExp == 0x7FF ) {
+		if ( bSig ) return propagateFloat64NaN( a, b );
+		return packFloat64( zSign, 0, 0 );
+	}
+	if ( bExp == 0 ) {
+		if ( bSig == 0 ) {
+			if ( ( aExp | aSig ) == 0 ) {
+				float_raise( float_flag_invalid );
+				return float64_default_nan;
+			}
+			float_raise( float_flag_divbyzero );
+			return packFloat64( zSign, 0x7FF, 0 );
+		}
+		normalizeFloat64Subnormal( bSig, &bExp, &bSig );
+	}
+	if ( aExp == 0 ) {
+		if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
+		normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+	}
+	zExp = aExp - bExp + 0x3FD;
+	aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
+	bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
+	if ( bSig <= ( aSig + aSig ) ) {
+		aSig >>= 1;
+		++zExp;
+	}
+	zSig = estimateDiv128To64( aSig, 0, bSig );
+	if ( ( zSig & 0x1FF ) <= 2 ) {
+		mul64To128( bSig, zSig, &term0, &term1 );
+		sub128( aSig, 0, term0, term1, &rem0, &rem1 );
+		while ( (sbits64) rem0 < 0 ) {
+			--zSig;
+			add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
+		}
+		zSig |= ( rem1 != 0 );
+	}
+	return roundAndPackFloat64( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the double-precision floating-point value `a'
+| with respect to the corresponding value `b'.  The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_rem( float64 a, float64 b )
+{
+	flag aSign, zSign;
+	int16 aExp, bExp, expDiff;
+	bits64 aSig, bSig;
+	bits64 q, alternateASig;
+	sbits64 sigMean;
+
+	aSig = extractFloat64Frac( a );
+	aExp = extractFloat64Exp( a );
+	aSign = extractFloat64Sign( a );
+	bSig = extractFloat64Frac( b );
+	bExp = extractFloat64Exp( b );
+//    bSign = extractFloat64Sign( b );
+	if ( aExp == 0x7FF ) {
+		if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
+			return propagateFloat64NaN( a, b );
+		}
+		float_raise( float_flag_invalid );
+		return float64_default_nan;
+	}
+	if ( bExp == 0x7FF ) {
+		if ( bSig ) return propagateFloat64NaN( a, b );
+		return a;
+	}
+	if ( bExp == 0 ) {
+		if ( bSig == 0 ) {
+			float_raise( float_flag_invalid );
+			return float64_default_nan;
+		}
+		normalizeFloat64Subnormal( bSig, &bExp, &bSig );
+	}
+	if ( aExp == 0 ) {
+		if ( aSig == 0 ) return a;
+		normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+	}
+	expDiff = aExp - bExp;
+	aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
+	bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
+	if ( expDiff < 0 ) {
+		if ( expDiff < -1 ) return a;
+		aSig >>= 1;
+	}
+	q = ( bSig <= aSig );
+	if ( q ) aSig -= bSig;
+	expDiff -= 64;
+	while ( 0 < expDiff ) {
+		q = estimateDiv128To64( aSig, 0, bSig );
+		q = ( 2 < q ) ? q - 2 : 0;
+		aSig = - ( ( bSig>>2 ) * q );
+		expDiff -= 62;
+	}
+	expDiff += 64;
+	if ( 0 < expDiff ) {
+		q = estimateDiv128To64( aSig, 0, bSig );
+		q = ( 2 < q ) ? q - 2 : 0;
+		q >>= 64 - expDiff;
+		bSig >>= 2;
+		aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
+	}
+	else {
+		aSig >>= 2;
+		bSig >>= 2;
+	}
+	do {
+		alternateASig = aSig;
+		++q;
+		aSig -= bSig;
+	} while ( 0 <= (sbits64) aSig );
+	sigMean = aSig + alternateASig;
+	if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
+		aSig = alternateASig;
+	}
+	zSign = ( (sbits64) aSig < 0 );
+	if ( zSign ) aSig = - aSig;
+	return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the double-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_sqrt( float64 a )
+{
+	flag aSign;
+	int16 aExp, zExp;
+	bits64 aSig, zSig, doubleZSig;
+	bits64 rem0, rem1, term0, term1;
+//    float64 z;
+
+	aSig = extractFloat64Frac( a );
+	aExp = extractFloat64Exp( a );
+	aSign = extractFloat64Sign( a );
+	if ( aExp == 0x7FF ) {
+		if ( aSig ) return propagateFloat64NaN( a, a );
+		if ( ! aSign ) return a;
+		float_raise( float_flag_invalid );
+		return float64_default_nan;
+	}
+	if ( aSign ) {
+		if ( ( aExp | aSig ) == 0 ) return a;
+		float_raise( float_flag_invalid );
+		return float64_default_nan;
+	}
+	if ( aExp == 0 ) {
+		if ( aSig == 0 ) return 0;
+		normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+	}
+	zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
+	aSig |= LIT64( 0x0010000000000000 );
+	zSig = estimateSqrt32( aExp, aSig>>21 );
+	aSig <<= 9 - ( aExp & 1 );
+	zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
+	if ( ( zSig & 0x1FF ) <= 5 ) {
+		doubleZSig = zSig<<1;
+		mul64To128( zSig, zSig, &term0, &term1 );
+		sub128( aSig, 0, term0, term1, &rem0, &rem1 );
+		while ( (sbits64) rem0 < 0 ) {
+			--zSig;
+			doubleZSig -= 2;
+			add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
+		}
+		zSig |= ( ( rem0 | rem1 ) != 0 );
+	}
+	return roundAndPackFloat64( 0, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is equal to the
+| corresponding value `b', and 0 otherwise.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float64_eq( float64 a, float64 b )
+{
+	if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+			|| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+		) {
+		if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+			float_raise( float_flag_invalid );
+		}
+		return 0;
+	}
+	return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is less than or
+| equal to the corresponding value `b', and 0 otherwise.  The comparison is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float64_le( float64 a, float64 b )
+{
+	flag aSign, bSign;
+
+	if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+			|| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+		) {
+		float_raise( float_flag_invalid );
+		return 0;
+	}
+	aSign = extractFloat64Sign( a );
+	bSign = extractFloat64Sign( b );
+	if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
+	return ( a == b ) || ( aSign ^ ( a < b ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float64_lt( float64 a, float64 b )
+{
+	flag aSign, bSign;
+
+	if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+			|| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+		) {
+		float_raise( float_flag_invalid );
+		return 0;
+	}
+	aSign = extractFloat64Sign( a );
+	bSign = extractFloat64Sign( b );
+	if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
+	return ( a != b ) && ( aSign ^ ( a < b ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is equal to the
+| corresponding value `b', and 0 otherwise.  The invalid exception is raised
+| if either operand is a NaN.  Otherwise, the comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float64_eq_signaling( float64 a, float64 b )
+{
+	if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+			|| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+		) {
+		float_raise( float_flag_invalid );
+		return 0;
+	}
+	return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is less than or
+| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
+| cause an exception.  Otherwise, the comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float64_le_quiet( float64 a, float64 b )
+{
+	flag aSign, bSign;
+//    int16 aExp, bExp;
+
+	if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+			|| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+		) {
+		if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+			float_raise( float_flag_invalid );
+		}
+		return 0;
+	}
+	aSign = extractFloat64Sign( a );
+	bSign = extractFloat64Sign( b );
+	if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
+	return ( a == b ) || ( aSign ^ ( a < b ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
+| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float64_lt_quiet( float64 a, float64 b )
+{
+	flag aSign, bSign;
+
+	if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+			|| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+		) {
+		if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+			float_raise( float_flag_invalid );
+		}
+		return 0;
+	}
+	aSign = extractFloat64Sign( a );
+	bSign = extractFloat64Sign( b );
+	if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
+	return ( a != b ) && ( aSign ^ ( a < b ) );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 32-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic---which means in particular that the conversion
+| is rounded according to the current rounding mode.  If `a' is a NaN, the
+| largest positive integer is returned.  Otherwise, if the conversion
+| overflows, the largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 floatx80_to_int32( floatx80 a )
+{
+	flag aSign;
+	int32 aExp, shiftCount;
+	bits64 aSig;
+
+	aSig = extractFloatx80Frac( a );
+	aExp = extractFloatx80Exp( a );
+	aSign = extractFloatx80Sign( a );
+	if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
+	shiftCount = 0x4037 - aExp;
+	if ( shiftCount <= 0 ) shiftCount = 1;
+	shift64RightJamming( aSig, shiftCount, &aSig );
+	return roundAndPackInt32( aSign, aSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 32-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic, except that the conversion is always rounded
+| toward zero.  If `a' is a NaN, the largest positive integer is returned.
+| Otherwise, if the conversion overflows, the largest integer with the same
+| sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 floatx80_to_int32_round_to_zero( floatx80 a )
+{
+	flag aSign;
+	int32 aExp, shiftCount;
+	bits64 aSig, savedASig;
+	int32 z;
+
+	aSig = extractFloatx80Frac( a );
+	aExp = extractFloatx80Exp( a );
+	aSign = extractFloatx80Sign( a );
+	if ( 0x401E < aExp ) {
+		if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
+		goto invalid;
+	}
+	else if ( aExp < 0x3FFF ) {
+		if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
+		return 0;
+	}
+	shiftCount = 0x403E - aExp;
+	savedASig = aSig;
+	aSig >>= shiftCount;
+	z = aSig;
+	if ( aSign ) z = - z;
+	if ( ( z < 0 ) ^ aSign ) {
+	invalid:
+		float_raise( float_flag_invalid );
+		return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+	}
+	if ( ( aSig<<shiftCount ) != savedASig ) {
+		float_exception_flags |= float_flag_inexact;
+	}
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 64-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic---which means in particular that the conversion
+| is rounded according to the current rounding mode.  If `a' is a NaN,
+| the largest positive integer is returned.  Otherwise, if the conversion
+| overflows, the largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 floatx80_to_int64( floatx80 a )
+{
+	flag aSign;
+	int32 aExp, shiftCount;
+	bits64 aSig, aSigExtra;
+
+	aSig = extractFloatx80Frac( a );
+	aExp = extractFloatx80Exp( a );
+	aSign = extractFloatx80Sign( a );
+	shiftCount = 0x403E - aExp;
+	if ( shiftCount <= 0 ) {
+		if ( shiftCount ) {
+			float_raise( float_flag_invalid );
+			if (    ! aSign
+					|| (    ( aExp == 0x7FFF )
+						&& ( aSig != LIT64( 0x8000000000000000 ) ) )
+				) {
+				return LIT64( 0x7FFFFFFFFFFFFFFF );
+			}
+			return (sbits64) LIT64( 0x8000000000000000 );
+		}
+		aSigExtra = 0;
+	}
+	else {
+		shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
+	}
+	return roundAndPackInt64( aSign, aSig, aSigExtra );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 64-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic, except that the conversion is always rounded
+| toward zero.  If `a' is a NaN, the largest positive integer is returned.
+| Otherwise, if the conversion overflows, the largest integer with the same
+| sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 floatx80_to_int64_round_to_zero( floatx80 a )
+{
+	flag aSign;
+	int32 aExp, shiftCount;
+	bits64 aSig;
+	int64 z;
+
+	aSig = extractFloatx80Frac( a );
+	aExp = extractFloatx80Exp( a );
+	aSign = extractFloatx80Sign( a );
+	shiftCount = aExp - 0x403E;
+	if ( 0 <= shiftCount ) {
+		aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
+		if ( ( a.high != 0xC03E ) || aSig ) {
+			float_raise( float_flag_invalid );
+			if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
+				return LIT64( 0x7FFFFFFFFFFFFFFF );
+			}
+		}
+		return (sbits64) LIT64( 0x8000000000000000 );
+	}
+	else if ( aExp < 0x3FFF ) {
+		if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
+		return 0;
+	}
+	z = aSig>>( - shiftCount );
+	if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
+		float_exception_flags |= float_flag_inexact;
+	}
+	if ( aSign ) z = - z;
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the single-precision floating-point format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 floatx80_to_float32( floatx80 a )
+{
+	flag aSign;
+	int32 aExp;
+	bits64 aSig;
+
+	aSig = extractFloatx80Frac( a );
+	aExp = extractFloatx80Exp( a );
+	aSign = extractFloatx80Sign( a );
+	if ( aExp == 0x7FFF ) {
+		if ( (bits64) ( aSig<<1 ) ) {
+			return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
+		}
+		return packFloat32( aSign, 0xFF, 0 );
+	}
+	shift64RightJamming( aSig, 33, &aSig );
+	if ( aExp || aSig ) aExp -= 0x3F81;
+	return roundAndPackFloat32( aSign, aExp, aSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the double-precision floating-point format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 floatx80_to_float64( floatx80 a )
+{
+	flag aSign;
+	int32 aExp;
+	bits64 aSig, zSig;
+
+	aSig = extractFloatx80Frac( a );
+	aExp = extractFloatx80Exp( a );
+	aSign = extractFloatx80Sign( a );
+	if ( aExp == 0x7FFF ) {
+		if ( (bits64) ( aSig<<1 ) ) {
+			return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
+		}
+		return packFloat64( aSign, 0x7FF, 0 );
+	}
+	shift64RightJamming( aSig, 1, &zSig );
+	if ( aExp || aSig ) aExp -= 0x3C01;
+	return roundAndPackFloat64( aSign, aExp, zSig );
+
+}
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the quadruple-precision floating-point format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 floatx80_to_float128( floatx80 a )
+{
+	flag aSign;
+	int16 aExp;
+	bits64 aSig, zSig0, zSig1;
+
+	aSig = extractFloatx80Frac( a );
+	aExp = extractFloatx80Exp( a );
+	aSign = extractFloatx80Sign( a );
+	if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
+		return commonNaNToFloat128( floatx80ToCommonNaN( a ) );
+	}
+	shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
+	return packFloat128( aSign, aExp, zSig0, zSig1 );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Rounds the extended double-precision floating-point value `a' to an integer,
+| and returns the result as an extended quadruple-precision floating-point
+| value.  The operation is performed according to the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_round_to_int( floatx80 a )
+{
+	flag aSign;
+	int32 aExp;
+	bits64 lastBitMask, roundBitsMask;
+	int8 roundingMode;
+	floatx80 z;
+
+	aExp = extractFloatx80Exp( a );
+	if ( 0x403E <= aExp ) {
+		if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
+			return propagateFloatx80NaN( a, a );
+		}
+		return a;
+	}
+	if ( aExp < 0x3FFF ) {
+		if (    ( aExp == 0 )
+				&& ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
+			return a;
+		}
+		float_exception_flags |= float_flag_inexact;
+		aSign = extractFloatx80Sign( a );
+		switch ( float_rounding_mode ) {
+			case float_round_nearest_even:
+			if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
+				) {
+				return
+					packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
+			}
+			break;
+			case float_round_down:
+			return
+					aSign ?
+						packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
+				: packFloatx80( 0, 0, 0 );
+			case float_round_up:
+			return
+					aSign ? packFloatx80( 1, 0, 0 )
+				: packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
+		}
+		return packFloatx80( aSign, 0, 0 );
+	}
+	lastBitMask = 1;
+	lastBitMask <<= 0x403E - aExp;
+	roundBitsMask = lastBitMask - 1;
+	z = a;
+	roundingMode = float_rounding_mode;
+	if ( roundingMode == float_round_nearest_even ) {
+		z.low += lastBitMask>>1;
+		if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
+	}
+	else if ( roundingMode != float_round_to_zero ) {
+		if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
+			z.low += roundBitsMask;
+		}
+	}
+	z.low &= ~ roundBitsMask;
+	if ( z.low == 0 ) {
+		++z.high;
+		z.low = LIT64( 0x8000000000000000 );
+	}
+	if ( z.low != a.low ) float_exception_flags |= float_flag_inexact;
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the extended double-
+| precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
+| negated before being returned.  `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
+{
+	int32 aExp, bExp, zExp;
+	bits64 aSig, bSig, zSig0, zSig1;
+	int32 expDiff;
+
+	aSig = extractFloatx80Frac( a );
+	aExp = extractFloatx80Exp( a );
+	bSig = extractFloatx80Frac( b );
+	bExp = extractFloatx80Exp( b );
+	expDiff = aExp - bExp;
+	if ( 0 < expDiff ) {
+		if ( aExp == 0x7FFF ) {
+			if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
+			return a;
+		}
+		if ( bExp == 0 ) --expDiff;
+		shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
+		zExp = aExp;
+	}
+	else if ( expDiff < 0 ) {
+		if ( bExp == 0x7FFF ) {
+			if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+			return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+		}
+		if ( aExp == 0 ) ++expDiff;
+		shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
+		zExp = bExp;
+	}
+	else {
+		if ( aExp == 0x7FFF ) {
+			if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
+				return propagateFloatx80NaN( a, b );
+			}
+			return a;
+		}
+		zSig1 = 0;
+		zSig0 = aSig + bSig;
+		if ( aExp == 0 ) {
+			normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
+			goto roundAndPack;
+		}
+		zExp = aExp;
+		goto shiftRight1;
+	}
+	zSig0 = aSig + bSig;
+	if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
+	shiftRight1:
+	shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
+	zSig0 |= LIT64( 0x8000000000000000 );
+	++zExp;
+	roundAndPack:
+	return
+		roundAndPackFloatx80(
+			floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the extended
+| double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
+| difference is negated before being returned.  `zSign' is ignored if the
+| result is a NaN.  The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
+{
+	int32 aExp, bExp, zExp;
+	bits64 aSig, bSig, zSig0, zSig1;
+	int32 expDiff;
+	floatx80 z;
+
+	aSig = extractFloatx80Frac( a );
+	aExp = extractFloatx80Exp( a );
+	bSig = extractFloatx80Frac( b );
+	bExp = extractFloatx80Exp( b );
+	expDiff = aExp - bExp;
+	if ( 0 < expDiff ) goto aExpBigger;
+	if ( expDiff < 0 ) goto bExpBigger;
+	if ( aExp == 0x7FFF ) {
+		if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
+			return propagateFloatx80NaN( a, b );
+		}
+		float_raise( float_flag_invalid );
+		z.low = floatx80_default_nan_low;
+		z.high = floatx80_default_nan_high;
+		return z;
+	}
+	if ( aExp == 0 ) {
+		aExp = 1;
+		bExp = 1;
+	}
+	zSig1 = 0;
+	if ( bSig < aSig ) goto aBigger;
+	if ( aSig < bSig ) goto bBigger;
+	return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
+	bExpBigger:
+	if ( bExp == 0x7FFF ) {
+		if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+		return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
+	}
+	if ( aExp == 0 ) ++expDiff;
+	shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
+	bBigger:
+	sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
+	zExp = bExp;
+	zSign ^= 1;
+	goto normalizeRoundAndPack;
+	aExpBigger:
+	if ( aExp == 0x7FFF ) {
+		if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
+		return a;
+	}
+	if ( bExp == 0 ) --expDiff;
+	shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
+	aBigger:
+	sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
+	zExp = aExp;
+	normalizeRoundAndPack:
+	return
+		normalizeRoundAndPackFloatx80(
+			floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the extended double-precision floating-point
+| values `a' and `b'.  The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_add( floatx80 a, floatx80 b )
+{
+	flag aSign, bSign;
+
+	aSign = extractFloatx80Sign( a );
+	bSign = extractFloatx80Sign( b );
+	if ( aSign == bSign ) {
+		return addFloatx80Sigs( a, b, aSign );
+	}
+	else {
+		return subFloatx80Sigs( a, b, aSign );
+	}
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the extended double-precision floating-
+| point values `a' and `b'.  The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_sub( floatx80 a, floatx80 b )
+{
+	flag aSign, bSign;
+
+	aSign = extractFloatx80Sign( a );
+	bSign = extractFloatx80Sign( b );
+	if ( aSign == bSign ) {
+		return subFloatx80Sigs( a, b, aSign );
+	}
+	else {
+		return addFloatx80Sigs( a, b, aSign );
+	}
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the extended double-precision floating-
+| point values `a' and `b'.  The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_mul( floatx80 a, floatx80 b )
+{
+	flag aSign, bSign, zSign;
+	int32 aExp, bExp, zExp;
+	bits64 aSig, bSig, zSig0, zSig1;
+	floatx80 z;
+
+	aSig = extractFloatx80Frac( a );
+	aExp = extractFloatx80Exp( a );
+	aSign = extractFloatx80Sign( a );
+	bSig = extractFloatx80Frac( b );
+	bExp = extractFloatx80Exp( b );
+	bSign = extractFloatx80Sign( b );
+	zSign = aSign ^ bSign;
+	if ( aExp == 0x7FFF ) {
+		if (    (bits64) ( aSig<<1 )
+				|| ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
+			return propagateFloatx80NaN( a, b );
+		}
+		if ( ( bExp | bSig ) == 0 ) goto invalid;
+		return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+	}
+	if ( bExp == 0x7FFF ) {
+		if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+		if ( ( aExp | aSig ) == 0 ) {
+	invalid:
+			float_raise( float_flag_invalid );
+			z.low = floatx80_default_nan_low;
+			z.high = floatx80_default_nan_high;
+			return z;
+		}
+		return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+	}
+	if ( aExp == 0 ) {
+		if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
+		normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
+	}
+	if ( bExp == 0 ) {
+		if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
+		normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+	}
+	zExp = aExp + bExp - 0x3FFE;
+	mul64To128( aSig, bSig, &zSig0, &zSig1 );
+	if ( 0 < (sbits64) zSig0 ) {
+		shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
+		--zExp;
+	}
+	return
+		roundAndPackFloatx80(
+			floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the extended double-precision floating-point
+| value `a' by the corresponding value `b'.  The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_div( floatx80 a, floatx80 b )
+{
+	flag aSign, bSign, zSign;
+	int32 aExp, bExp, zExp;
+	bits64 aSig, bSig, zSig0, zSig1;
+	bits64 rem0, rem1, rem2, term0, term1, term2;
+	floatx80 z;
+
+	aSig = extractFloatx80Frac( a );
+	aExp = extractFloatx80Exp( a );
+	aSign = extractFloatx80Sign( a );
+	bSig = extractFloatx80Frac( b );
+	bExp = extractFloatx80Exp( b );
+	bSign = extractFloatx80Sign( b );
+	zSign = aSign ^ bSign;
+	if ( aExp == 0x7FFF ) {
+		if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
+		if ( bExp == 0x7FFF ) {
+			if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+			goto invalid;
+		}
+		return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+	}
+	if ( bExp == 0x7FFF ) {
+		if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+		return packFloatx80( zSign, 0, 0 );
+	}
+	if ( bExp == 0 ) {
+		if ( bSig == 0 ) {
+			if ( ( aExp | aSig ) == 0 ) {
+	invalid:
+				float_raise( float_flag_invalid );
+				z.low = floatx80_default_nan_low;
+				z.high = floatx80_default_nan_high;
+				return z;
+			}
+			float_raise( float_flag_divbyzero );
+			return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+		}
+		normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+	}
+	if ( aExp == 0 ) {
+		if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
+		normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
+	}
+	zExp = aExp - bExp + 0x3FFE;
+	rem1 = 0;
+	if ( bSig <= aSig ) {
+		shift128Right( aSig, 0, 1, &aSig, &rem1 );
+		++zExp;
+	}
+	zSig0 = estimateDiv128To64( aSig, rem1, bSig );
+	mul64To128( bSig, zSig0, &term0, &term1 );
+	sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
+	while ( (sbits64) rem0 < 0 ) {
+		--zSig0;
+		add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
+	}
+	zSig1 = estimateDiv128To64( rem1, 0, bSig );
+	if ( (bits64) ( zSig1<<1 ) <= 8 ) {
+		mul64To128( bSig, zSig1, &term1, &term2 );
+		sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+		while ( (sbits64) rem1 < 0 ) {
+			--zSig1;
+			add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
+		}
+		zSig1 |= ( ( rem1 | rem2 ) != 0 );
+	}
+	return
+		roundAndPackFloatx80(
+			floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the extended double-precision floating-point value
+| `a' with respect to the corresponding value `b'.  The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_rem( floatx80 a, floatx80 b )
+{
+	flag aSign, zSign;
+	int32 aExp, bExp, expDiff;
+	bits64 aSig0, aSig1, bSig;
+	bits64 q, term0, term1, alternateASig0, alternateASig1;
+	floatx80 z;
+
+	aSig0 = extractFloatx80Frac( a );
+	aExp = extractFloatx80Exp( a );
+	aSign = extractFloatx80Sign( a );
+	bSig = extractFloatx80Frac( b );
+	bExp = extractFloatx80Exp( b );
+//    bSign = extractFloatx80Sign( b );
+	if ( aExp == 0x7FFF ) {
+		if (    (bits64) ( aSig0<<1 )
+				|| ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
+			return propagateFloatx80NaN( a, b );
+		}
+		goto invalid;
+	}
+	if ( bExp == 0x7FFF ) {
+		if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+		return a;
+	}
+	if ( bExp == 0 ) {
+		if ( bSig == 0 ) {
+	invalid:
+			float_raise( float_flag_invalid );
+			z.low = floatx80_default_nan_low;
+			z.high = floatx80_default_nan_high;
+			return z;
+		}
+		normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+	}
+	if ( aExp == 0 ) {
+		if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
+		normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
+	}
+	bSig |= LIT64( 0x8000000000000000 );
+	zSign = aSign;
+	expDiff = aExp - bExp;
+	aSig1 = 0;
+	if ( expDiff < 0 ) {
+		if ( expDiff < -1 ) return a;
+		shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
+		expDiff = 0;
+	}
+	q = ( bSig <= aSig0 );
+	if ( q ) aSig0 -= bSig;
+	expDiff -= 64;
+	while ( 0 < expDiff ) {
+		q = estimateDiv128To64( aSig0, aSig1, bSig );
+		q = ( 2 < q ) ? q - 2 : 0;
+		mul64To128( bSig, q, &term0, &term1 );
+		sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+		shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
+		expDiff -= 62;
+	}
+	expDiff += 64;
+	if ( 0 < expDiff ) {
+		q = estimateDiv128To64( aSig0, aSig1, bSig );
+		q = ( 2 < q ) ? q - 2 : 0;
+		q >>= 64 - expDiff;
+		mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
+		sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+		shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
+		while ( le128( term0, term1, aSig0, aSig1 ) ) {
+			++q;
+			sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+		}
+	}
+	else {
+		term1 = 0;
+		term0 = bSig;
+	}
+	sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
+	if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
+			|| (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
+				&& ( q & 1 ) )
+		) {
+		aSig0 = alternateASig0;
+		aSig1 = alternateASig1;
+		zSign = ! zSign;
+	}
+	return
+		normalizeRoundAndPackFloatx80(
+			80, zSign, bExp + expDiff, aSig0, aSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the extended double-precision floating-point
+| value `a'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_sqrt( floatx80 a )
+{
+	flag aSign;
+	int32 aExp, zExp;
+	bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
+	bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+	floatx80 z;
+
+	aSig0 = extractFloatx80Frac( a );
+	aExp = extractFloatx80Exp( a );
+	aSign = extractFloatx80Sign( a );
+	if ( aExp == 0x7FFF ) {
+		if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
+		if ( ! aSign ) return a;
+		goto invalid;
+	}
+	if ( aSign ) {
+		if ( ( aExp | aSig0 ) == 0 ) return a;
+	invalid:
+		float_raise( float_flag_invalid );
+		z.low = floatx80_default_nan_low;
+		z.high = floatx80_default_nan_high;
+		return z;
+	}
+	if ( aExp == 0 ) {
+		if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
+		normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
+	}
+	zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
+	zSig0 = estimateSqrt32( aExp, aSig0>>32 );
+	shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
+	zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
+	doubleZSig0 = zSig0<<1;
+	mul64To128( zSig0, zSig0, &term0, &term1 );
+	sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
+	while ( (sbits64) rem0 < 0 ) {
+		--zSig0;
+		doubleZSig0 -= 2;
+		add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
+	}
+	zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
+	if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
+		if ( zSig1 == 0 ) zSig1 = 1;
+		mul64To128( doubleZSig0, zSig1, &term1, &term2 );
+		sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+		mul64To128( zSig1, zSig1, &term2, &term3 );
+		sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
+		while ( (sbits64) rem1 < 0 ) {
+			--zSig1;
+			shortShift128Left( 0, zSig1, 1, &term2, &term3 );
+			term3 |= 1;
+			term2 |= doubleZSig0;
+			add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
+		}
+		zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
+	}
+	shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
+	zSig0 |= doubleZSig0;
+	return
+		roundAndPackFloatx80(
+			floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is
+| equal to the corresponding value `b', and 0 otherwise.  The comparison is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_eq( floatx80 a, floatx80 b )
+{
+	if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+				&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
+			|| (    ( extractFloatx80Exp( b ) == 0x7FFF )
+				&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
+		) {
+		if (    floatx80_is_signaling_nan( a )
+				|| floatx80_is_signaling_nan( b ) ) {
+			float_raise( float_flag_invalid );
+		}
+		return 0;
+	}
+	return
+			( a.low == b.low )
+		&& (    ( a.high == b.high )
+				|| (    ( a.low == 0 )
+					&& ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
+			);
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is
+| less than or equal to the corresponding value `b', and 0 otherwise.  The
+| comparison is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_le( floatx80 a, floatx80 b )
+{
+	flag aSign, bSign;
+
+	if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+				&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
+			|| (    ( extractFloatx80Exp( b ) == 0x7FFF )
+				&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
+		) {
+		float_raise( float_flag_invalid );
+		return 0;
+	}
+	aSign = extractFloatx80Sign( a );
+	bSign = extractFloatx80Sign( b );
+	if ( aSign != bSign ) {
+		return
+				aSign
+			|| (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+					== 0 );
+	}
+	return
+			aSign ? le128( b.high, b.low, a.high, a.low )
+		: le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is
+| less than the corresponding value `b', and 0 otherwise.  The comparison
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_lt( floatx80 a, floatx80 b )
+{
+	flag aSign, bSign;
+
+	if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+				&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
+			|| (    ( extractFloatx80Exp( b ) == 0x7FFF )
+				&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
+		) {
+		float_raise( float_flag_invalid );
+		return 0;
+	}
+	aSign = extractFloatx80Sign( a );
+	bSign = extractFloatx80Sign( b );
+	if ( aSign != bSign ) {
+		return
+				aSign
+			&& (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+					!= 0 );
+	}
+	return
+			aSign ? lt128( b.high, b.low, a.high, a.low )
+		: lt128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is equal
+| to the corresponding value `b', and 0 otherwise.  The invalid exception is
+| raised if either operand is a NaN.  Otherwise, the comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_eq_signaling( floatx80 a, floatx80 b )
+{
+	if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+				&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
+			|| (    ( extractFloatx80Exp( b ) == 0x7FFF )
+				&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
+		) {
+		float_raise( float_flag_invalid );
+		return 0;
+	}
+	return
+			( a.low == b.low )
+		&& (    ( a.high == b.high )
+				|| (    ( a.low == 0 )
+					&& ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
+			);
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is less
+| than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
+| do not cause an exception.  Otherwise, the comparison is performed according
+| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_le_quiet( floatx80 a, floatx80 b )
+{
+	flag aSign, bSign;
+
+	if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+				&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
+			|| (    ( extractFloatx80Exp( b ) == 0x7FFF )
+				&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
+		) {
+		if (    floatx80_is_signaling_nan( a )
+				|| floatx80_is_signaling_nan( b ) ) {
+			float_raise( float_flag_invalid );
+		}
+		return 0;
+	}
+	aSign = extractFloatx80Sign( a );
+	bSign = extractFloatx80Sign( b );
+	if ( aSign != bSign ) {
+		return
+				aSign
+			|| (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+					== 0 );
+	}
+	return
+			aSign ? le128( b.high, b.low, a.high, a.low )
+		: le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is less
+| than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
+| an exception.  Otherwise, the comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_lt_quiet( floatx80 a, floatx80 b )
+{
+	flag aSign, bSign;
+
+	if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+				&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
+			|| (    ( extractFloatx80Exp( b ) == 0x7FFF )
+				&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
+		) {
+		if (    floatx80_is_signaling_nan( a )
+				|| floatx80_is_signaling_nan( b ) ) {
+			float_raise( float_flag_invalid );
+		}
+		return 0;
+	}
+	aSign = extractFloatx80Sign( a );
+	bSign = extractFloatx80Sign( b );
+	if ( aSign != bSign ) {
+		return
+				aSign
+			&& (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+					!= 0 );
+	}
+	return
+			aSign ? lt128( b.high, b.low, a.high, a.low )
+		: lt128( a.high, a.low, b.high, b.low );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 32-bit two's complement integer format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 float128_to_int32( float128 a )
+{
+	flag aSign;
+	int32 aExp, shiftCount;
+	bits64 aSig0, aSig1;
+
+	aSig1 = extractFloat128Frac1( a );
+	aSig0 = extractFloat128Frac0( a );
+	aExp = extractFloat128Exp( a );
+	aSign = extractFloat128Sign( a );
+	if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
+	if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
+	aSig0 |= ( aSig1 != 0 );
+	shiftCount = 0x4028 - aExp;
+	if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
+	return roundAndPackInt32( aSign, aSig0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 32-bit two's complement integer format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.  If
+| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
+| conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int32 float128_to_int32_round_to_zero( float128 a )
+{
+	flag aSign;
+	int32 aExp, shiftCount;
+	bits64 aSig0, aSig1, savedASig;
+	int32 z;
+
+	aSig1 = extractFloat128Frac1( a );
+	aSig0 = extractFloat128Frac0( a );
+	aExp = extractFloat128Exp( a );
+	aSign = extractFloat128Sign( a );
+	aSig0 |= ( aSig1 != 0 );
+	if ( 0x401E < aExp ) {
+		if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
+		goto invalid;
+	}
+	else if ( aExp < 0x3FFF ) {
+		if ( aExp || aSig0 ) float_exception_flags |= float_flag_inexact;
+		return 0;
+	}
+	aSig0 |= LIT64( 0x0001000000000000 );
+	shiftCount = 0x402F - aExp;
+	savedASig = aSig0;
+	aSig0 >>= shiftCount;
+	z = aSig0;
+	if ( aSign ) z = - z;
+	if ( ( z < 0 ) ^ aSign ) {
+	invalid:
+		float_raise( float_flag_invalid );
+		return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+	}
+	if ( ( aSig0<<shiftCount ) != savedASig ) {
+		float_exception_flags |= float_flag_inexact;
+	}
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 64-bit two's complement integer format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 float128_to_int64( float128 a )
+{
+	flag aSign;
+	int32 aExp, shiftCount;
+	bits64 aSig0, aSig1;
+
+	aSig1 = extractFloat128Frac1( a );
+	aSig0 = extractFloat128Frac0( a );
+	aExp = extractFloat128Exp( a );
+	aSign = extractFloat128Sign( a );
+	if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
+	shiftCount = 0x402F - aExp;
+	if ( shiftCount <= 0 ) {
+		if ( 0x403E < aExp ) {
+			float_raise( float_flag_invalid );
+			if (    ! aSign
+					|| (    ( aExp == 0x7FFF )
+						&& ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
+					)
+				) {
+				return LIT64( 0x7FFFFFFFFFFFFFFF );
+			}
+			return (sbits64) LIT64( 0x8000000000000000 );
+		}
+		shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
+	}
+	else {
+		shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
+	}
+	return roundAndPackInt64( aSign, aSig0, aSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 64-bit two's complement integer format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int64 float128_to_int64_round_to_zero( float128 a )
+{
+	flag aSign;
+	int32 aExp, shiftCount;
+	bits64 aSig0, aSig1;
+	int64 z;
+
+	aSig1 = extractFloat128Frac1( a );
+	aSig0 = extractFloat128Frac0( a );
+	aExp = extractFloat128Exp( a );
+	aSign = extractFloat128Sign( a );
+	if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
+	shiftCount = aExp - 0x402F;
+	if ( 0 < shiftCount ) {
+		if ( 0x403E <= aExp ) {
+			aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
+			if (    ( a.high == LIT64( 0xC03E000000000000 ) )
+					&& ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
+				if ( aSig1 ) float_exception_flags |= float_flag_inexact;
+			}
+			else {
+				float_raise( float_flag_invalid );
+				if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
+					return LIT64( 0x7FFFFFFFFFFFFFFF );
+				}
+			}
+			return (sbits64) LIT64( 0x8000000000000000 );
+		}
+		z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
+		if ( (bits64) ( aSig1<<shiftCount ) ) {
+			float_exception_flags |= float_flag_inexact;
+		}
+	}
+	else {
+		if ( aExp < 0x3FFF ) {
+			if ( aExp | aSig0 | aSig1 ) {
+				float_exception_flags |= float_flag_inexact;
+			}
+			return 0;
+		}
+		z = aSig0>>( - shiftCount );
+		if (    aSig1
+				|| ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
+			float_exception_flags |= float_flag_inexact;
+		}
+	}
+	if ( aSign ) z = - z;
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the single-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float128_to_float32( float128 a )
+{
+	flag aSign;
+	int32 aExp;
+	bits64 aSig0, aSig1;
+	bits32 zSig;
+
+	aSig1 = extractFloat128Frac1( a );
+	aSig0 = extractFloat128Frac0( a );
+	aExp = extractFloat128Exp( a );
+	aSign = extractFloat128Sign( a );
+	if ( aExp == 0x7FFF ) {
+		if ( aSig0 | aSig1 ) {
+			return commonNaNToFloat32( float128ToCommonNaN( a ) );
+		}
+		return packFloat32( aSign, 0xFF, 0 );
+	}
+	aSig0 |= ( aSig1 != 0 );
+	shift64RightJamming( aSig0, 18, &aSig0 );
+	zSig = aSig0;
+	if ( aExp || zSig ) {
+		zSig |= 0x40000000;
+		aExp -= 0x3F81;
+	}
+	return roundAndPackFloat32( aSign, aExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the double-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float128_to_float64( float128 a )
+{
+	flag aSign;
+	int32 aExp;
+	bits64 aSig0, aSig1;
+
+	aSig1 = extractFloat128Frac1( a );
+	aSig0 = extractFloat128Frac0( a );
+	aExp = extractFloat128Exp( a );
+	aSign = extractFloat128Sign( a );
+	if ( aExp == 0x7FFF ) {
+		if ( aSig0 | aSig1 ) {
+			return commonNaNToFloat64( float128ToCommonNaN( a ) );
+		}
+		return packFloat64( aSign, 0x7FF, 0 );
+	}
+	shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
+	aSig0 |= ( aSig1 != 0 );
+	if ( aExp || aSig0 ) {
+		aSig0 |= LIT64( 0x4000000000000000 );
+		aExp -= 0x3C01;
+	}
+	return roundAndPackFloat64( aSign, aExp, aSig0 );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the extended double-precision floating-point format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 float128_to_floatx80( float128 a )
+{
+	flag aSign;
+	int32 aExp;
+	bits64 aSig0, aSig1;
+
+	aSig1 = extractFloat128Frac1( a );
+	aSig0 = extractFloat128Frac0( a );
+	aExp = extractFloat128Exp( a );
+	aSign = extractFloat128Sign( a );
+	if ( aExp == 0x7FFF ) {
+		if ( aSig0 | aSig1 ) {
+			return commonNaNToFloatx80( float128ToCommonNaN( a ) );
+		}
+		return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+	}
+	if ( aExp == 0 ) {
+		if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
+		normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+	}
+	else {
+		aSig0 |= LIT64( 0x0001000000000000 );
+	}
+	shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
+	return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Rounds the quadruple-precision floating-point value `a' to an integer, and
+| returns the result as a quadruple-precision floating-point value.  The
+| operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_round_to_int( float128 a )
+{
+	flag aSign;
+	int32 aExp;
+	bits64 lastBitMask, roundBitsMask;
+	int8 roundingMode;
+	float128 z;
+
+	aExp = extractFloat128Exp( a );
+	if ( 0x402F <= aExp ) {
+		if ( 0x406F <= aExp ) {
+			if (    ( aExp == 0x7FFF )
+					&& ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
+				) {
+				return propagateFloat128NaN( a, a );
+			}
+			return a;
+		}
+		lastBitMask = 1;
+		lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
+		roundBitsMask = lastBitMask - 1;
+		z = a;
+		roundingMode = float_rounding_mode;
+		if ( roundingMode == float_round_nearest_even ) {
+			if ( lastBitMask ) {
+				add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
+				if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
+			}
+			else {
+				if ( (sbits64) z.low < 0 ) {
+					++z.high;
+					if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
+				}
+			}
+		}
+		else if ( roundingMode != float_round_to_zero ) {
+			if (   extractFloat128Sign( z )
+					^ ( roundingMode == float_round_up ) ) {
+				add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
+			}
+		}
+		z.low &= ~ roundBitsMask;
+	}
+	else {
+		if ( aExp < 0x3FFF ) {
+			if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
+			float_exception_flags |= float_flag_inexact;
+			aSign = extractFloat128Sign( a );
+			switch ( float_rounding_mode ) {
+				case float_round_nearest_even:
+				if (    ( aExp == 0x3FFE )
+						&& (   extractFloat128Frac0( a )
+							| extractFloat128Frac1( a ) )
+					) {
+					return packFloat128( aSign, 0x3FFF, 0, 0 );
+				}
+				break;
+				case float_round_down:
+				return
+						aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
+					: packFloat128( 0, 0, 0, 0 );
+				case float_round_up:
+				return
+						aSign ? packFloat128( 1, 0, 0, 0 )
+					: packFloat128( 0, 0x3FFF, 0, 0 );
+			}
+			return packFloat128( aSign, 0, 0, 0 );
+		}
+		lastBitMask = 1;
+		lastBitMask <<= 0x402F - aExp;
+		roundBitsMask = lastBitMask - 1;
+		z.low = 0;
+		z.high = a.high;
+		roundingMode = float_rounding_mode;
+		if ( roundingMode == float_round_nearest_even ) {
+			z.high += lastBitMask>>1;
+			if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
+				z.high &= ~ lastBitMask;
+			}
+		}
+		else if ( roundingMode != float_round_to_zero ) {
+			if (   extractFloat128Sign( z )
+					^ ( roundingMode == float_round_up ) ) {
+				z.high |= ( a.low != 0 );
+				z.high += roundBitsMask;
+			}
+		}
+		z.high &= ~ roundBitsMask;
+	}
+	if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
+		float_exception_flags |= float_flag_inexact;
+	}
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the quadruple-precision
+| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
+| before being returned.  `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float128 addFloat128Sigs( float128 a, float128 b, flag zSign )
+{
+	int32 aExp, bExp, zExp;
+	bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
+	int32 expDiff;
+
+	aSig1 = extractFloat128Frac1( a );
+	aSig0 = extractFloat128Frac0( a );
+	aExp = extractFloat128Exp( a );
+	bSig1 = extractFloat128Frac1( b );
+	bSig0 = extractFloat128Frac0( b );
+	bExp = extractFloat128Exp( b );
+	expDiff = aExp - bExp;
+	if ( 0 < expDiff ) {
+		if ( aExp == 0x7FFF ) {
+			if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
+			return a;
+		}
+		if ( bExp == 0 ) {
+			--expDiff;
+		}
+		else {
+			bSig0 |= LIT64( 0x0001000000000000 );
+		}
+		shift128ExtraRightJamming(
+			bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
+		zExp = aExp;
+	}
+	else if ( expDiff < 0 ) {
+		if ( bExp == 0x7FFF ) {
+			if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
+			return packFloat128( zSign, 0x7FFF, 0, 0 );
+		}
+		if ( aExp == 0 ) {
+			++expDiff;
+		}
+		else {
+			aSig0 |= LIT64( 0x0001000000000000 );
+		}
+		shift128ExtraRightJamming(
+			aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
+		zExp = bExp;
+	}
+	else {
+		if ( aExp == 0x7FFF ) {
+			if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
+				return propagateFloat128NaN( a, b );
+			}
+			return a;
+		}
+		add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
+		if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
+		zSig2 = 0;
+		zSig0 |= LIT64( 0x0002000000000000 );
+		zExp = aExp;
+		goto shiftRight1;
+	}
+	aSig0 |= LIT64( 0x0001000000000000 );
+	add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
+	--zExp;
+	if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
+	++zExp;
+	shiftRight1:
+	shift128ExtraRightJamming(
+		zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
+	roundAndPack:
+	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the quadruple-
+| precision floating-point values `a' and `b'.  If `zSign' is 1, the
+| difference is negated before being returned.  `zSign' is ignored if the
+| result is a NaN.  The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float128 subFloat128Sigs( float128 a, float128 b, flag zSign )
+{
+	int32 aExp, bExp, zExp;
+	bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
+	int32 expDiff;
+	float128 z;
+
+	aSig1 = extractFloat128Frac1( a );
+	aSig0 = extractFloat128Frac0( a );
+	aExp = extractFloat128Exp( a );
+	bSig1 = extractFloat128Frac1( b );
+	bSig0 = extractFloat128Frac0( b );
+	bExp = extractFloat128Exp( b );
+	expDiff = aExp - bExp;
+	shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
+	shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
+	if ( 0 < expDiff ) goto aExpBigger;
+	if ( expDiff < 0 ) goto bExpBigger;
+	if ( aExp == 0x7FFF ) {
+		if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
+			return propagateFloat128NaN( a, b );
+		}
+		float_raise( float_flag_invalid );
+		z.low = float128_default_nan_low;
+		z.high = float128_default_nan_high;
+		return z;
+	}
+	if ( aExp == 0 ) {
+		aExp = 1;
+		bExp = 1;
+	}
+	if ( bSig0 < aSig0 ) goto aBigger;
+	if ( aSig0 < bSig0 ) goto bBigger;
+	if ( bSig1 < aSig1 ) goto aBigger;
+	if ( aSig1 < bSig1 ) goto bBigger;
+	return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 );
+	bExpBigger:
+	if ( bExp == 0x7FFF ) {
+		if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
+		return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
+	}
+	if ( aExp == 0 ) {
+		++expDiff;
+	}
+	else {
+		aSig0 |= LIT64( 0x4000000000000000 );
+	}
+	shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
+	bSig0 |= LIT64( 0x4000000000000000 );
+	bBigger:
+	sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
+	zExp = bExp;
+	zSign ^= 1;
+	goto normalizeRoundAndPack;
+	aExpBigger:
+	if ( aExp == 0x7FFF ) {
+		if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
+		return a;
+	}
+	if ( bExp == 0 ) {
+		--expDiff;
+	}
+	else {
+		bSig0 |= LIT64( 0x4000000000000000 );
+	}
+	shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
+	aSig0 |= LIT64( 0x4000000000000000 );
+	aBigger:
+	sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
+	zExp = aExp;
+	normalizeRoundAndPack:
+	--zExp;
+	return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the quadruple-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_add( float128 a, float128 b )
+{
+	flag aSign, bSign;
+
+	aSign = extractFloat128Sign( a );
+	bSign = extractFloat128Sign( b );
+	if ( aSign == bSign ) {
+		return addFloat128Sigs( a, b, aSign );
+	}
+	else {
+		return subFloat128Sigs( a, b, aSign );
+	}
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the quadruple-precision floating-point
+| values `a' and `b'.  The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_sub( float128 a, float128 b )
+{
+	flag aSign, bSign;
+
+	aSign = extractFloat128Sign( a );
+	bSign = extractFloat128Sign( b );
+	if ( aSign == bSign ) {
+		return subFloat128Sigs( a, b, aSign );
+	}
+	else {
+		return addFloat128Sigs( a, b, aSign );
+	}
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the quadruple-precision floating-point
+| values `a' and `b'.  The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_mul( float128 a, float128 b )
+{
+	flag aSign, bSign, zSign;
+	int32 aExp, bExp, zExp;
+	bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
+	float128 z;
+
+	aSig1 = extractFloat128Frac1( a );
+	aSig0 = extractFloat128Frac0( a );
+	aExp = extractFloat128Exp( a );
+	aSign = extractFloat128Sign( a );
+	bSig1 = extractFloat128Frac1( b );
+	bSig0 = extractFloat128Frac0( b );
+	bExp = extractFloat128Exp( b );
+	bSign = extractFloat128Sign( b );
+	zSign = aSign ^ bSign;
+	if ( aExp == 0x7FFF ) {
+		if (    ( aSig0 | aSig1 )
+				|| ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
+			return propagateFloat128NaN( a, b );
+		}
+		if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
+		return packFloat128( zSign, 0x7FFF, 0, 0 );
+	}
+	if ( bExp == 0x7FFF ) {
+		if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
+		if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
+	invalid:
+			float_raise( float_flag_invalid );
+			z.low = float128_default_nan_low;
+			z.high = float128_default_nan_high;
+			return z;
+		}
+		return packFloat128( zSign, 0x7FFF, 0, 0 );
+	}
+	if ( aExp == 0 ) {
+		if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
+		normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+	}
+	if ( bExp == 0 ) {
+		if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
+		normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
+	}
+	zExp = aExp + bExp - 0x4000;
+	aSig0 |= LIT64( 0x0001000000000000 );
+	shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
+	mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
+	add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
+	zSig2 |= ( zSig3 != 0 );
+	if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
+		shift128ExtraRightJamming(
+			zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
+		++zExp;
+	}
+	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the quadruple-precision floating-point value
+| `a' by the corresponding value `b'.  The operation is performed according to
+| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_div( float128 a, float128 b )
+{
+	flag aSign, bSign, zSign;
+	int32 aExp, bExp, zExp;
+	bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
+	bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+	float128 z;
+
+	aSig1 = extractFloat128Frac1( a );
+	aSig0 = extractFloat128Frac0( a );
+	aExp = extractFloat128Exp( a );
+	aSign = extractFloat128Sign( a );
+	bSig1 = extractFloat128Frac1( b );
+	bSig0 = extractFloat128Frac0( b );
+	bExp = extractFloat128Exp( b );
+	bSign = extractFloat128Sign( b );
+	zSign = aSign ^ bSign;
+	if ( aExp == 0x7FFF ) {
+		if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
+		if ( bExp == 0x7FFF ) {
+			if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
+			goto invalid;
+		}
+		return packFloat128( zSign, 0x7FFF, 0, 0 );
+	}
+	if ( bExp == 0x7FFF ) {
+		if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
+		return packFloat128( zSign, 0, 0, 0 );
+	}
+	if ( bExp == 0 ) {
+		if ( ( bSig0 | bSig1 ) == 0 ) {
+			if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
+	invalid:
+				float_raise( float_flag_invalid );
+				z.low = float128_default_nan_low;
+				z.high = float128_default_nan_high;
+				return z;
+			}
+			float_raise( float_flag_divbyzero );
+			return packFloat128( zSign, 0x7FFF, 0, 0 );
+		}
+		normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
+	}
+	if ( aExp == 0 ) {
+		if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
+		normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+	}
+	zExp = aExp - bExp + 0x3FFD;
+	shortShift128Left(
+		aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
+	shortShift128Left(
+		bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
+	if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
+		shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
+		++zExp;
+	}
+	zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
+	mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
+	sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
+	while ( (sbits64) rem0 < 0 ) {
+		--zSig0;
+		add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
+	}
+	zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
+	if ( ( zSig1 & 0x3FFF ) <= 4 ) {
+		mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
+		sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
+		while ( (sbits64) rem1 < 0 ) {
+			--zSig1;
+			add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
+		}
+		zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
+	}
+	shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
+	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the quadruple-precision floating-point value `a'
+| with respect to the corresponding value `b'.  The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_rem( float128 a, float128 b )
+{
+	flag aSign, zSign;
+	int32 aExp, bExp, expDiff;
+	bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
+	bits64 allZero, alternateASig0, alternateASig1, sigMean1;
+	sbits64 sigMean0;
+	float128 z;
+
+	aSig1 = extractFloat128Frac1( a );
+	aSig0 = extractFloat128Frac0( a );
+	aExp = extractFloat128Exp( a );
+	aSign = extractFloat128Sign( a );
+	bSig1 = extractFloat128Frac1( b );
+	bSig0 = extractFloat128Frac0( b );
+	bExp = extractFloat128Exp( b );
+//    bSign = extractFloat128Sign( b );
+	if ( aExp == 0x7FFF ) {
+		if (    ( aSig0 | aSig1 )
+				|| ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
+			return propagateFloat128NaN( a, b );
+		}
+		goto invalid;
+	}
+	if ( bExp == 0x7FFF ) {
+		if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
+		return a;
+	}
+	if ( bExp == 0 ) {
+		if ( ( bSig0 | bSig1 ) == 0 ) {
+	invalid:
+			float_raise( float_flag_invalid );
+			z.low = float128_default_nan_low;
+			z.high = float128_default_nan_high;
+			return z;
+		}
+		normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
+	}
+	if ( aExp == 0 ) {
+		if ( ( aSig0 | aSig1 ) == 0 ) return a;
+		normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+	}
+	expDiff = aExp - bExp;
+	if ( expDiff < -1 ) return a;
+	shortShift128Left(
+		aSig0 | LIT64( 0x0001000000000000 ),
+		aSig1,
+		15 - ( expDiff < 0 ),
+		&aSig0,
+		&aSig1
+	);
+	shortShift128Left(
+		bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
+	q = le128( bSig0, bSig1, aSig0, aSig1 );
+	if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
+	expDiff -= 64;
+	while ( 0 < expDiff ) {
+		q = estimateDiv128To64( aSig0, aSig1, bSig0 );
+		q = ( 4 < q ) ? q - 4 : 0;
+		mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
+		shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
+		shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
+		sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
+		expDiff -= 61;
+	}
+	if ( -64 < expDiff ) {
+		q = estimateDiv128To64( aSig0, aSig1, bSig0 );
+		q = ( 4 < q ) ? q - 4 : 0;
+		q >>= - expDiff;
+		shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
+		expDiff += 52;
+		if ( expDiff < 0 ) {
+			shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
+		}
+		else {
+			shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
+		}
+		mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
+		sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
+	}
+	else {
+		shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
+		shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
+	}
+	do {
+		alternateASig0 = aSig0;
+		alternateASig1 = aSig1;
+		++q;
+		sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
+	} while ( 0 <= (sbits64) aSig0 );
+	add128(
+		aSig0, aSig1, alternateASig0, alternateASig1, (bits64 *)&sigMean0, &sigMean1 );
+	if (    ( sigMean0 < 0 )
+			|| ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
+		aSig0 = alternateASig0;
+		aSig1 = alternateASig1;
+	}
+	zSign = ( (sbits64) aSig0 < 0 );
+	if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
+	return
+		normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the quadruple-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_sqrt( float128 a )
+{
+	flag aSign;
+	int32 aExp, zExp;
+	bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
+	bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+	float128 z;
+
+	aSig1 = extractFloat128Frac1( a );
+	aSig0 = extractFloat128Frac0( a );
+	aExp = extractFloat128Exp( a );
+	aSign = extractFloat128Sign( a );
+	if ( aExp == 0x7FFF ) {
+		if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a );
+		if ( ! aSign ) return a;
+		goto invalid;
+	}
+	if ( aSign ) {
+		if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
+	invalid:
+		float_raise( float_flag_invalid );
+		z.low = float128_default_nan_low;
+		z.high = float128_default_nan_high;
+		return z;
+	}
+	if ( aExp == 0 ) {
+		if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
+		normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+	}
+	zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
+	aSig0 |= LIT64( 0x0001000000000000 );
+	zSig0 = estimateSqrt32( aExp, aSig0>>17 );
+	shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
+	zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
+	doubleZSig0 = zSig0<<1;
+	mul64To128( zSig0, zSig0, &term0, &term1 );
+	sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
+	while ( (sbits64) rem0 < 0 ) {
+		--zSig0;
+		doubleZSig0 -= 2;
+		add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
+	}
+	zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
+	if ( ( zSig1 & 0x1FFF ) <= 5 ) {
+		if ( zSig1 == 0 ) zSig1 = 1;
+		mul64To128( doubleZSig0, zSig1, &term1, &term2 );
+		sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+		mul64To128( zSig1, zSig1, &term2, &term3 );
+		sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
+		while ( (sbits64) rem1 < 0 ) {
+			--zSig1;
+			shortShift128Left( 0, zSig1, 1, &term2, &term3 );
+			term3 |= 1;
+			term2 |= doubleZSig0;
+			add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
+		}
+		zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
+	}
+	shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
+	return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float128_eq( float128 a, float128 b )
+{
+	if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+				&& ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+			|| (    ( extractFloat128Exp( b ) == 0x7FFF )
+				&& ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+		) {
+		if (    float128_is_signaling_nan( a )
+				|| float128_is_signaling_nan( b ) ) {
+			float_raise( float_flag_invalid );
+		}
+		return 0;
+	}
+	return
+			( a.low == b.low )
+		&& (    ( a.high == b.high )
+				|| (    ( a.low == 0 )
+					&& ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
+			);
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| or equal to the corresponding value `b', and 0 otherwise.  The comparison
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float128_le( float128 a, float128 b )
+{
+	flag aSign, bSign;
+
+	if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+				&& ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+			|| (    ( extractFloat128Exp( b ) == 0x7FFF )
+				&& ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+		) {
+		float_raise( float_flag_invalid );
+		return 0;
+	}
+	aSign = extractFloat128Sign( a );
+	bSign = extractFloat128Sign( b );
+	if ( aSign != bSign ) {
+		return
+				aSign
+			|| (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+					== 0 );
+	}
+	return
+			aSign ? le128( b.high, b.low, a.high, a.low )
+		: le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float128_lt( float128 a, float128 b )
+{
+	flag aSign, bSign;
+
+	if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+				&& ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+			|| (    ( extractFloat128Exp( b ) == 0x7FFF )
+				&& ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+		) {
+		float_raise( float_flag_invalid );
+		return 0;
+	}
+	aSign = extractFloat128Sign( a );
+	bSign = extractFloat128Sign( b );
+	if ( aSign != bSign ) {
+		return
+				aSign
+			&& (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+					!= 0 );
+	}
+	return
+			aSign ? lt128( b.high, b.low, a.high, a.low )
+		: lt128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise.  The invalid exception is
+| raised if either operand is a NaN.  Otherwise, the comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float128_eq_signaling( float128 a, float128 b )
+{
+	if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+				&& ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+			|| (    ( extractFloat128Exp( b ) == 0x7FFF )
+				&& ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+		) {
+		float_raise( float_flag_invalid );
+		return 0;
+	}
+	return
+			( a.low == b.low )
+		&& (    ( a.high == b.high )
+				|| (    ( a.low == 0 )
+					&& ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
+			);
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
+| cause an exception.  Otherwise, the comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float128_le_quiet( float128 a, float128 b )
+{
+	flag aSign, bSign;
+
+	if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+				&& ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+			|| (    ( extractFloat128Exp( b ) == 0x7FFF )
+				&& ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+		) {
+		if (    float128_is_signaling_nan( a )
+				|| float128_is_signaling_nan( b ) ) {
+			float_raise( float_flag_invalid );
+		}
+		return 0;
+	}
+	aSign = extractFloat128Sign( a );
+	bSign = extractFloat128Sign( b );
+	if ( aSign != bSign ) {
+		return
+				aSign
+			|| (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+					== 0 );
+	}
+	return
+			aSign ? le128( b.high, b.low, a.high, a.low )
+		: le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
+| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float128_lt_quiet( float128 a, float128 b )
+{
+	flag aSign, bSign;
+
+	if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+				&& ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+			|| (    ( extractFloat128Exp( b ) == 0x7FFF )
+				&& ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+		) {
+		if (    float128_is_signaling_nan( a )
+				|| float128_is_signaling_nan( b ) ) {
+			float_raise( float_flag_invalid );
+		}
+		return 0;
+	}
+	aSign = extractFloat128Sign( a );
+	bSign = extractFloat128Sign( b );
+	if ( aSign != bSign ) {
+		return
+				aSign
+			&& (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+					!= 0 );
+	}
+	return
+			aSign ? lt128( b.high, b.low, a.high, a.low )
+		: lt128( a.high, a.low, b.high, b.low );
+
+}
+
+#endif
diff --git a/plat/linux68k/emu/musashi/softfloat/softfloat.h b/plat/linux68k/emu/musashi/softfloat/softfloat.h
new file mode 100644
index 000000000..92135e663
--- /dev/null
+++ b/plat/linux68k/emu/musashi/softfloat/softfloat.h
@@ -0,0 +1,460 @@
+
+/*============================================================================
+
+This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+/*----------------------------------------------------------------------------
+| The macro `FLOATX80' must be defined to enable the extended double-precision
+| floating-point format `floatx80'.  If this macro is not defined, the
+| `floatx80' type will not be defined, and none of the functions that either
+| input or output the `floatx80' type will be defined.  The same applies to
+| the `FLOAT128' macro and the quadruple-precision format `float128'.
+*----------------------------------------------------------------------------*/
+#define FLOATX80
+#define FLOAT128
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point types.
+*----------------------------------------------------------------------------*/
+typedef bits32 float32;
+typedef bits64 float64;
+#ifdef FLOATX80
+typedef struct {
+	bits16 high;
+	bits64 low;
+} floatx80;
+#endif
+#ifdef FLOAT128
+typedef struct {
+	bits64 high, low;
+} float128;
+#endif
+
+/*----------------------------------------------------------------------------
+| Primitive arithmetic functions, including multi-word arithmetic, and
+| division and square root approximations.  (Can be specialized to target if
+| desired.)
+*----------------------------------------------------------------------------*/
+#include "softfloat-macros"
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point underflow tininess-detection mode.
+*----------------------------------------------------------------------------*/
+extern int8 float_detect_tininess;
+enum {
+	float_tininess_after_rounding  = 0,
+	float_tininess_before_rounding = 1
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point rounding mode.
+*----------------------------------------------------------------------------*/
+extern int8 float_rounding_mode;
+enum {
+	float_round_nearest_even = 0,
+	float_round_to_zero      = 1,
+	float_round_down         = 2,
+	float_round_up           = 3
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point exception flags.
+*----------------------------------------------------------------------------*/
+extern int8 float_exception_flags;
+enum {
+	float_flag_invalid = 0x01, float_flag_denormal = 0x02, float_flag_divbyzero = 0x04, float_flag_overflow = 0x08,
+	float_flag_underflow = 0x10, float_flag_inexact = 0x20
+};
+
+/*----------------------------------------------------------------------------
+| Routine to raise any or all of the software IEC/IEEE floating-point
+| exception flags.
+*----------------------------------------------------------------------------*/
+void float_raise( int8 );
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE integer-to-floating-point conversion routines.
+*----------------------------------------------------------------------------*/
+float32 int32_to_float32( int32 );
+float64 int32_to_float64( int32 );
+#ifdef FLOATX80
+floatx80 int32_to_floatx80( int32 );
+#endif
+#ifdef FLOAT128
+float128 int32_to_float128( int32 );
+#endif
+float32 int64_to_float32( int64 );
+float64 int64_to_float64( int64 );
+#ifdef FLOATX80
+floatx80 int64_to_floatx80( int64 );
+#endif
+#ifdef FLOAT128
+float128 int64_to_float128( int64 );
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int32 float32_to_int32( float32 );
+int32 float32_to_int32_round_to_zero( float32 );
+int64 float32_to_int64( float32 );
+int64 float32_to_int64_round_to_zero( float32 );
+float64 float32_to_float64( float32 );
+#ifdef FLOATX80
+floatx80 float32_to_floatx80( float32 );
+#endif
+#ifdef FLOAT128
+float128 float32_to_float128( float32 );
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision operations.
+*----------------------------------------------------------------------------*/
+float32 float32_round_to_int( float32 );
+float32 float32_add( float32, float32 );
+float32 float32_sub( float32, float32 );
+float32 float32_mul( float32, float32 );
+float32 float32_div( float32, float32 );
+float32 float32_rem( float32, float32 );
+float32 float32_sqrt( float32 );
+flag float32_eq( float32, float32 );
+flag float32_le( float32, float32 );
+flag float32_lt( float32, float32 );
+flag float32_eq_signaling( float32, float32 );
+flag float32_le_quiet( float32, float32 );
+flag float32_lt_quiet( float32, float32 );
+flag float32_is_signaling_nan( float32 );
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int32 float64_to_int32( float64 );
+int32 float64_to_int32_round_to_zero( float64 );
+int64 float64_to_int64( float64 );
+int64 float64_to_int64_round_to_zero( float64 );
+float32 float64_to_float32( float64 );
+#ifdef FLOATX80
+floatx80 float64_to_floatx80( float64 );
+#endif
+#ifdef FLOAT128
+float128 float64_to_float128( float64 );
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision operations.
+*----------------------------------------------------------------------------*/
+float64 float64_round_to_int( float64 );
+float64 float64_add( float64, float64 );
+float64 float64_sub( float64, float64 );
+float64 float64_mul( float64, float64 );
+float64 float64_div( float64, float64 );
+float64 float64_rem( float64, float64 );
+float64 float64_sqrt( float64 );
+flag float64_eq( float64, float64 );
+flag float64_le( float64, float64 );
+flag float64_lt( float64, float64 );
+flag float64_eq_signaling( float64, float64 );
+flag float64_le_quiet( float64, float64 );
+flag float64_lt_quiet( float64, float64 );
+flag float64_is_signaling_nan( float64 );
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int32 floatx80_to_int32( floatx80 );
+int32 floatx80_to_int32_round_to_zero( floatx80 );
+int64 floatx80_to_int64( floatx80 );
+int64 floatx80_to_int64_round_to_zero( floatx80 );
+float32 floatx80_to_float32( floatx80 );
+float64 floatx80_to_float64( floatx80 );
+#ifdef FLOAT128
+float128 floatx80_to_float128( floatx80 );
+#endif
+floatx80 floatx80_scale(floatx80 a, floatx80 b);
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
+| extended double-precision floating-point value, returning the result.
+*----------------------------------------------------------------------------*/
+
+static inline floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
+{
+	floatx80 z;
+
+	z.low = zSig;
+	z.high = ( ( (bits16) zSign )<<15 ) + zExp;
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision rounding precision.  Valid
+| values are 32, 64, and 80.
+*----------------------------------------------------------------------------*/
+extern int8 floatx80_rounding_precision;
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision operations.
+*----------------------------------------------------------------------------*/
+floatx80 floatx80_round_to_int( floatx80 );
+floatx80 floatx80_add( floatx80, floatx80 );
+floatx80 floatx80_sub( floatx80, floatx80 );
+floatx80 floatx80_mul( floatx80, floatx80 );
+floatx80 floatx80_div( floatx80, floatx80 );
+floatx80 floatx80_rem( floatx80, floatx80 );
+floatx80 floatx80_sqrt( floatx80 );
+flag floatx80_eq( floatx80, floatx80 );
+flag floatx80_le( floatx80, floatx80 );
+flag floatx80_lt( floatx80, floatx80 );
+flag floatx80_eq_signaling( floatx80, floatx80 );
+flag floatx80_le_quiet( floatx80, floatx80 );
+flag floatx80_lt_quiet( floatx80, floatx80 );
+flag floatx80_is_signaling_nan( floatx80 );
+
+/* int floatx80_fsin(floatx80 &a);
+int floatx80_fcos(floatx80 &a);
+int floatx80_ftan(floatx80 &a); */
+
+floatx80 floatx80_flognp1(floatx80 a);
+floatx80 floatx80_flogn(floatx80 a);
+floatx80 floatx80_flog2(floatx80 a);
+floatx80 floatx80_flog10(floatx80 a);
+
+// roundAndPackFloatx80 used to be in softfloat-round-pack, is now in softfloat.c
+floatx80 roundAndPackFloatx80(int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1);
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE quadruple-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int32 float128_to_int32( float128 );
+int32 float128_to_int32_round_to_zero( float128 );
+int64 float128_to_int64( float128 );
+int64 float128_to_int64_round_to_zero( float128 );
+float32 float128_to_float32( float128 );
+float64 float128_to_float64( float128 );
+#ifdef FLOATX80
+floatx80 float128_to_floatx80( float128 );
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE quadruple-precision operations.
+*----------------------------------------------------------------------------*/
+float128 float128_round_to_int( float128 );
+float128 float128_add( float128, float128 );
+float128 float128_sub( float128, float128 );
+float128 float128_mul( float128, float128 );
+float128 float128_div( float128, float128 );
+float128 float128_rem( float128, float128 );
+float128 float128_sqrt( float128 );
+flag float128_eq( float128, float128 );
+flag float128_le( float128, float128 );
+flag float128_lt( float128, float128 );
+flag float128_eq_signaling( float128, float128 );
+flag float128_le_quiet( float128, float128 );
+flag float128_lt_quiet( float128, float128 );
+flag float128_is_signaling_nan( float128 );
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', the exponent `zExp', and the significand formed
+| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
+| floating-point value, returning the result.  After being shifted into the
+| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
+| added together to form the most significant 32 bits of the result.  This
+| means that any integer portion of `zSig0' will be added into the exponent.
+| Since a properly normalized significand will have an integer portion equal
+| to 1, the `zExp' input should be 1 less than the desired result exponent
+| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+static inline float128
+	packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
+{
+	float128 z;
+
+	z.low = zSig1;
+	z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and extended significand formed by the concatenation of `zSig0', `zSig1',
+| and `zSig2', and returns the proper quadruple-precision floating-point value
+| corresponding to the abstract input.  Ordinarily, the abstract value is
+| simply rounded and packed into the quadruple-precision format, with the
+| inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal quadruple-
+| precision floating-point number.
+|     The input significand must be normalized or smaller.  If the input
+| significand is not normalized, `zExp' must be 0; in that case, the result
+| returned is a subnormal number, and it must not require rounding.  In the
+| usual case that the input significand is normalized, `zExp' must be 1 less
+| than the ``true'' floating-point exponent.  The handling of underflow and
+| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static inline float128
+	roundAndPackFloat128(
+		flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 )
+{
+	int8 roundingMode;
+	flag roundNearestEven, increment, isTiny;
+
+	roundingMode = float_rounding_mode;
+	roundNearestEven = ( roundingMode == float_round_nearest_even );
+	increment = ( (sbits64) zSig2 < 0 );
+	if ( ! roundNearestEven ) {
+		if ( roundingMode == float_round_to_zero ) {
+			increment = 0;
+		}
+		else {
+			if ( zSign ) {
+				increment = ( roundingMode == float_round_down ) && zSig2;
+			}
+			else {
+				increment = ( roundingMode == float_round_up ) && zSig2;
+			}
+		}
+	}
+	if ( 0x7FFD <= (bits32) zExp ) {
+		if (    ( 0x7FFD < zExp )
+				|| (    ( zExp == 0x7FFD )
+					&& eq128(
+							LIT64( 0x0001FFFFFFFFFFFF ),
+							LIT64( 0xFFFFFFFFFFFFFFFF ),
+							zSig0,
+							zSig1
+						)
+					&& increment
+				)
+			) {
+			float_raise( float_flag_overflow | float_flag_inexact );
+			if (    ( roundingMode == float_round_to_zero )
+					|| ( zSign && ( roundingMode == float_round_up ) )
+					|| ( ! zSign && ( roundingMode == float_round_down ) )
+				) {
+				return
+					packFloat128(
+						zSign,
+						0x7FFE,
+						LIT64( 0x0000FFFFFFFFFFFF ),
+						LIT64( 0xFFFFFFFFFFFFFFFF )
+					);
+			}
+			return packFloat128( zSign, 0x7FFF, 0, 0 );
+		}
+		if ( zExp < 0 ) {
+			isTiny =
+					( float_detect_tininess == float_tininess_before_rounding )
+				|| ( zExp < -1 )
+				|| ! increment
+				|| lt128(
+						zSig0,
+						zSig1,
+						LIT64( 0x0001FFFFFFFFFFFF ),
+						LIT64( 0xFFFFFFFFFFFFFFFF )
+					);
+			shift128ExtraRightJamming(
+				zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
+			zExp = 0;
+			if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
+			if ( roundNearestEven ) {
+				increment = ( (sbits64) zSig2 < 0 );
+			}
+			else {
+				if ( zSign ) {
+					increment = ( roundingMode == float_round_down ) && zSig2;
+				}
+				else {
+					increment = ( roundingMode == float_round_up ) && zSig2;
+				}
+			}
+		}
+	}
+	if ( zSig2 ) float_exception_flags |= float_flag_inexact;
+	if ( increment ) {
+		add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
+		zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
+	}
+	else {
+		if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
+	}
+	return packFloat128( zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand formed by the concatenation of `zSig0' and `zSig1', and
+| returns the proper quadruple-precision floating-point value corresponding
+| to the abstract input.  This routine is just like `roundAndPackFloat128'
+| except that the input significand has fewer bits and does not have to be
+| normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
+| point exponent.
+*----------------------------------------------------------------------------*/
+
+static inline float128
+	normalizeRoundAndPackFloat128(
+		flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
+{
+	int8 shiftCount;
+	bits64 zSig2;
+
+	if ( zSig0 == 0 ) {
+		zSig0 = zSig1;
+		zSig1 = 0;
+		zExp -= 64;
+	}
+	shiftCount = countLeadingZeros64( zSig0 ) - 15;
+	if ( 0 <= shiftCount ) {
+		zSig2 = 0;
+		shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
+	}
+	else {
+		shift128ExtraRightJamming(
+			zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
+	}
+	zExp -= shiftCount;
+	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
+
+}
+#endif
diff --git a/plat/linux68k/emu/sim.c b/plat/linux68k/emu/sim.c
index eb7794897..e78e5c437 100755
--- a/plat/linux68k/emu/sim.c
+++ b/plat/linux68k/emu/sim.c
@@ -6,6 +6,9 @@
 #include <time.h>
 #include <unistd.h>
 #include <errno.h>
+
+#include <fcntl.h>
+
 #include "sim.h"
 #include "m68k.h"
 
@@ -72,7 +75,7 @@ void exit_error(char* fmt, ...)
 	fprintf(stderr, "\n");
 	pc = m68k_get_reg(NULL, M68K_REG_PPC);
 	m68k_disassemble(buff, pc, M68K_CPU_TYPE_68020);
-	fprintf(stderr, "At %04x: %s\n", pc, buff);
+        fprintf(stderr, "At %04x: %s\n", pc, buff);
 
 	exit(EXIT_FAILURE);
 }
@@ -90,7 +93,7 @@ uint32_t cpu_read_long(uint32_t address)
 	switch (address)
 	{
 		case 0x00: return INIT_SP;
-		case 0x04: return entrypoint;
+                case 0x04: return entrypoint;
 		case 0x80: emulated_syscall(); return 0x10000;
 		case 0x10000: return 0x4e734e73; /* rte; rte */
 		case 0x10004: return 0;
@@ -170,10 +173,12 @@ void disassemble_program()
 	char buff2[100];
 
 	pc = cpu_read_long_dasm(4);
+        printf("entry point is %0x\n", entrypoint);
+        printf("pc is %0x\n", pc);
 
-	while(pc <= 0x16e)
+        while(pc <= entrypoint + 0x16e)
 	{
-		instr_size = m68k_disassemble(buff, pc, M68K_CPU_TYPE_68000);
+                instr_size = m68k_disassemble(buff, pc, M68K_CPU_TYPE_68020);
 		make_hex(buff2, pc, instr_size);
 		printf("%03x: %-20s: %s\n", pc, buff2, buff);
 		pc += instr_size;
@@ -181,8 +186,9 @@ void disassemble_program()
 	fflush(stdout);
 }
 
-void cpu_instr_callback()
+void cpu_instr_callback(int apc)
 {
+        (void)apc;
 	uint32_t pc = m68k_get_reg(NULL, M68K_REG_PC);
 	if (pc == 0xc)
 		exit_error("address exception");
@@ -219,6 +225,10 @@ void cpu_instr_callback()
 #endif
 }
 
+/**
+ * translate simulated linux syscall to native call on host
+ * see https://www.lurklurk.org/syscalls.html for m68k syscall numbers
+ **/
 static void emulated_syscall(void)
 {
 	int s = m68k_get_reg(NULL, M68K_REG_D0);
@@ -227,16 +237,57 @@ static void emulated_syscall(void)
 		case 1: /* exit */
 			exit(m68k_get_reg(NULL, M68K_REG_D1));
 			
-		case 4: /* write */
+                case 3: /* read */
 		{
 			uint32_t fd = m68k_get_reg(NULL, M68K_REG_D1);
 			uint32_t ptr = m68k_get_reg(NULL, M68K_REG_D2);
-			uint32_t len = m68k_get_reg(NULL, M68K_REG_D3);
-			m68k_set_reg(M68K_REG_D0, write(fd, g_ram + transform_address(ptr), len));
+                        uint32_t count = m68k_get_reg(NULL, M68K_REG_D3);
+                        m68k_set_reg(M68K_REG_D0, read(fd, g_ram + transform_address(ptr), count));
 			break;
 		}
 
-		case 45: /* brk */
+                case 4: /* write */
+                {
+                        uint32_t fd = m68k_get_reg(NULL, M68K_REG_D1);
+                        uint32_t ptr = m68k_get_reg(NULL, M68K_REG_D2);
+                        uint32_t len = m68k_get_reg(NULL, M68K_REG_D3);
+                        m68k_set_reg(M68K_REG_D0, write(fd, g_ram + transform_address(ptr), len));
+                        break;
+                }
+
+                case 5: /* open */
+                {
+                        uint32_t pathname = m68k_get_reg(NULL, M68K_REG_D1);
+                        uint32_t flags = m68k_get_reg(NULL, M68K_REG_D2);
+                        uint32_t mode = m68k_get_reg(NULL, M68K_REG_D3);
+                        m68k_set_reg(M68K_REG_D0, open(g_ram + transform_address(pathname), flags, mode));
+                        break;
+                }
+
+                case 6: /* close */
+                {
+                        uint32_t fd = m68k_get_reg(NULL, M68K_REG_D1);
+                        m68k_set_reg(M68K_REG_D0, close(fd));
+                        break;
+                }
+
+                case 10: /* unlink */
+                {
+                        uint32_t pathname = m68k_get_reg(NULL, M68K_REG_D1);
+                        m68k_set_reg(M68K_REG_D0, unlink(g_ram + transform_address(pathname)));
+                        break;
+                }
+
+                case 19: /* lseek */
+                {
+                        uint32_t fd = m68k_get_reg(NULL, M68K_REG_D1);
+                        uint32_t offset = m68k_get_reg(NULL, M68K_REG_D2);
+                        uint32_t whence = m68k_get_reg(NULL, M68K_REG_D3);
+                        m68k_set_reg(M68K_REG_D0, lseek(fd, offset, whence));
+                        break;
+                }
+
+                case 45: /* brk */
 		{
 			uint32_t newpos = m68k_get_reg(NULL, M68K_REG_D1);
 			if (newpos == 0)
@@ -251,8 +302,8 @@ static void emulated_syscall(void)
 			break;
 		}
 
-		case 20: /* getpid */
-		case 48: /* signal */
+                case 20: /* getpid */
+                case 48: /* signal */
 		case 54: /* ioctl */
 		case 78: /* gettimeofday */
 			m68k_set_reg(M68K_REG_D0, 0);
@@ -280,9 +331,9 @@ static void load_program(FILE* fd)
 	if (fread(g_ram, 1, phentsize, fd) != phentsize)
 		exit_error("couldn't read program header");
 
-	uint32_t offset = READ_LONG(g_ram, 0x04);
-	uint32_t vaddr = READ_LONG(g_ram, 0x08);
-	uint32_t filesz = READ_LONG(g_ram, 0x10);
+        uint32_t offset = READ_LONG(g_ram, 0x04);
+        uint32_t vaddr = READ_LONG(g_ram, 0x08);
+        uint32_t filesz = READ_LONG(g_ram, 0x10);
 	uint32_t memsz = READ_LONG(g_ram, 0x14);
 	brkbase = brkpos = vaddr + memsz;
 
@@ -310,11 +361,15 @@ int main(int argc, char* argv[])
 
 	load_program(fhandle);
 
-//	disassemble_program();
+        //disassemble_program();
 
-	m68k_set_cpu_type(M68K_CPU_TYPE_68020);
-	m68k_init();
-	m68k_pulse_reset();
+        //printf("now at line %d\n", __LINE__);
+        m68k_set_cpu_type(M68K_CPU_TYPE_68040);
+        //printf("now at line %d\n", __LINE__);
+        m68k_init();
+        //printf("initialising core\n");
+        m68k_pulse_reset();
+        //printf("now at line %d\n", __LINE__);
 
 	/* On entry, the Linux stack looks like this.
 	 * 
@@ -337,11 +392,13 @@ int main(int argc, char* argv[])
 		unsigned long argv = sp;
 		cpu_write_long(sp -= 4, argv);
 		cpu_write_long(sp -= 4, 0);
-		m68k_set_reg(M68K_REG_SP, sp);
+                m68k_set_reg(M68K_REG_SP, sp);   /* init sp is also addr 0 */
 	}
 
-	for (;;)
-		m68k_execute(100000);
+        //printf("running program ");
+        for (;;) {
+             m68k_execute(100000);
+        }
 
 	return 0;
 }
diff --git a/plat/linux68k/emu/sim.h b/plat/linux68k/emu/sim.h
index 75db0cba1..f0f9b0b61 100755
--- a/plat/linux68k/emu/sim.h
+++ b/plat/linux68k/emu/sim.h
@@ -10,6 +10,6 @@ void cpu_write_long(unsigned int address, unsigned int value);
 void cpu_pulse_reset(void);
 void cpu_set_fc(unsigned int fc);
 int  cpu_irq_ack(int level);
-void cpu_instr_callback();
+void cpu_instr_callback(int pc);
 
 #endif /* SIM__HEADER */