Merge pull request #73 from kernigh/kernigh-pr

better code from PowerPC ncg and mcg
2018-03-13 13:57:28 +01:00 · 2018-03-13 13:57:28 +01:00 · aabf0bdd69
commit aabf0bdd69
parent 4cb4bdc85f 85fcbde22f
143 changed files with 4153 additions and 2001 deletions
--- a/mach/i80/libem/cii.s
+++ b/mach/i80/libem/cii.s
@ -65,19 +65,19 @@
 	jmp 3f		! done

 !if destination size < source size only:
-shrink:	mov l,c		! load source size in hl
+shrink:	mov l,b		! load destination size in hl
 	mvi h,0
 	dad sp
 	mov d,h
-	mov e,l		! de points just above source
-	mov l,b		! load destination size in hl
+	mov e,l		! de points just above lowest bytes of source
+	mov l,c		! load source size in hl
 	mvi h,0
 	dad sp		! hl points just above "destination"

 1:	dcx d		! move upwards
 	dcx h
-	mov a,m
-	stax d
+	ldax d
+	mov m,a
 	dcr b
 	jnz 1b
 	sphl
--- a/mach/i80/libem/rol4.s
+++ b/mach/i80/libem/rol4.s
@ -25,8 +25,8 @@
 	mov e,a

 	mov a,b
-	ral
-1:	mov a,l
+1:	ral
+	mov a,l
 	ral
 	mov l,a
 	mov a,h
--- a/mach/i80/libem/ror4.s
+++ b/mach/i80/libem/ror4.s
@ -25,8 +25,8 @@
 	mov e,a

 	mov a,l
-	rar
-1:	mov a,b
+1:	rar
+	mov a,b
 	rar
 	mov b,a
 	mov a,c
--- a/mach/i80/ncg/table
+++ b/mach/i80/ncg/table
@ -385,8 +385,9 @@ gen dad de

 pat loi $1>=512
 kills ALL
-uses dereg={const2,$1}
-gen Call {label,".loi"}
+/* 'uses dereg={const2,$1}' fails to kill de. */
+gen lxi de,{const2,$1}
+    Call {label,".loi"}

 pat los $1==2
 with dereg
@ -597,8 +598,8 @@ gen 1:

 pat sti
 kills ALL
-uses dereg={const2,$1}
-gen Call {label,".sti"}
+gen lxi de,{const2,$1}
+    Call {label,".sti"}

 pat sts $1==2
 with dereg
@ -702,23 +703,24 @@ gen Call {label,".mli4"}

 pat dvi $1==2
 kills ALL
-uses areg={const1,129}
-gen Call {label,".dvi2"}		yields de
+/* 'uses areg={const1,129}' fails to kill a. */
+gen mvi a,{const1,129}
+    Call {label,".dvi2"}		yields de

 pat dvi $1==4
 kills ALL
-uses areg={const1,129}
-gen Call {label,".dvi4"}
+gen mvi a,{const1,129}
+    Call {label,".dvi4"}

 pat rmi $1==2
 kills ALL
-uses areg={const1,128}
-gen Call {label,".dvi2"}		yields de
+gen mvi a,{const1,128}
+    Call {label,".dvi2"}		yields de

 pat rmi $1==4
 kills ALL
-uses areg={const1,128}
-gen Call {label,".dvi4"}
+gen mvi a,{const1,128}
+    Call {label,".dvi4"}

 pat ngi $1==2
 with hl_or_de
@ -738,7 +740,7 @@ pat loc sli ($1 == 8) && ($2 == 2)
 with hl_or_de
 gen move %1.2, %1.1
    mvi %1.2, {const1,0}        yields %1
-    
+
 pat sli $1==2
 kills ALL
 gen Call {label,".sli2"}		yields de
@ -749,13 +751,13 @@ gen Call {label,".sli4"}

 pat sri $1==2
 kills ALL
-uses areg={const1,1}
-gen Call {label,".sri2"}		yields de
+gen mvi a,{const1,1}
+    Call {label,".sri2"}		yields de

 pat sri $1==4
 kills ALL
-uses areg={const1,1}
-gen Call {label,".sri4"}
+gen mvi a,{const1,1}
+    Call {label,".sri4"}

 /********************************************/
 /* Group 4: Unsigned arithmetic		    */
@ -775,23 +777,23 @@ gen Call {label,".mli4"}

 pat dvu $1==2
 kills ALL
-uses areg={const1,1}
-gen Call {label,".dvi2"}		yields de
+gen mvi a,{const1,1}
+    Call {label,".dvi2"}		yields de

 pat dvu $1==4
 kills ALL
-uses areg={const1,1}
-gen Call {label,".dvi4"}
+gen mvi a,{const1,1}
+    Call {label,".dvi4"}

 pat rmu $1==2
 kills ALL
-uses areg={const1,0}
-gen Call {label,".dvi2"}		yields de
+gen mvi a,{const1,0}
+    Call {label,".dvi2"}		yields de

 pat rmu $1==4
 kills ALL
-uses areg={const1,0}
-gen Call {label,".dvi4"}
+gen mvi a,{const1,0}
+    Call {label,".dvi4"}

 pat slu						leaving sli $1

@ -799,16 +801,16 @@ pat loc sru ($1 == 8) && ($2 == 2)
 with hl_or_de
 gen move %1.1, %1.2
    mvi %1.1, {const1,0}        yields %1
-    
+
 pat sru $1==2
 kills ALL
-uses areg={const1,0}
-gen Call {label,".sri2"}		yields de
+gen mvi a,{const1,0}
+    Call {label,".sri2"}		yields de

 pat sru $1==4
 kills ALL
-uses areg={const1,0}
-gen Call {label,".sri4"}
+gen mvi a,{const1,0}
+    Call {label,".sri4"}


 /********************************************/
@ -1047,8 +1049,8 @@ with hlreg

 pat cii
 kills ALL
-uses areg={const1,1}
-gen Call {label,".cii"}
+gen mvi a,{const1,1}
+    Call {label,".cii"}

 pat loc loc ciu					leaving loc $1 loc $2 cuu
 pat loc loc cui					leaving loc $1 loc $2 cuu
@ -1081,8 +1083,8 @@ with hl_or_de

 pat cuu
 kills ALL
-uses areg={const1,0}
-gen Call {label,".cii"}
+gen mvi a,{const1,0}
+    Call {label,".cii"}

 pat cfi
 kills ALL
@ -1128,8 +1130,8 @@ gen mov a,%1.2

 pat and defined($1)
 kills ALL
-uses dereg={const2,$1}
-gen Call {label,".and"}
+gen lxi de,{const2,$1}
+    Call {label,".and"}

 pat and !defined($1)
 with dereg
@ -1156,8 +1158,8 @@ gen mov a,%1.2

 pat ior defined($1)
 kills ALL
-uses dereg={const2,$1}
-gen Call {label,".ior"}
+gen lxi de,{const2,$1}
+    Call {label,".ior"}

 pat ior !defined($1)
 with dereg
@ -1184,8 +1186,8 @@ gen mov a,%1.2

 pat xor defined($1)
 kills ALL
-uses dereg={const2,$1}
-gen Call {label,".xor"}
+gen lxi de,{const2,$1}
+    Call {label,".xor"}

 pat xor !defined($1)
 with dereg
@ -1204,8 +1206,8 @@ gen mov a,%1.2

 pat com defined($1)
 kills ALL
-uses dereg={const2,$1}
-gen Call {label,".com"}
+gen lxi de,{const2,$1}
+    Call {label,".com"}

 pat com !defined($1)
 with dereg
@ -1269,8 +1271,8 @@ gen Call {label,".inn2"}		yields de

 pat inn defined($1)
 kills ALL
-uses dereg={const2,$1}
-gen Call {label,".inn"}			yields de
+gen lxi de,{const2,$1}
+    Call {label,".inn"}			yields de

 pat inn !defined($1)
 with dereg
@ -1284,8 +1286,8 @@ gen Call {label,".set2"}		yields de

 pat set defined($1)
 kills ALL
-uses dereg={const2,$1}
-gen Call {label,".set"}
+gen lxi de,{const2,$1}
+    Call {label,".set"}

 pat set !defined($1)
 with dereg
@ -1402,8 +1404,8 @@ pat cmi $1==2					leaving sbi 2

 pat cmi $1==4
 kills ALL
-uses areg={const1,1}
-gen Call {label,".cmi4"}		yields de
+gen mvi a,{const1,1}
+    Call {label,".cmi4"}		yields de

 pat cmf $1==4
 kills ALL
@ -1412,14 +1414,14 @@ gen Call {label,".cmf4"}
 pat cmf $1==8
 kills ALL
 gen Call {label,".cmf8"}
- 
+
 pat cmu $1==2
 with hl_or_de hl_or_de
 uses areg
 gen mov a,%2.1
    cmp %1.1
    jz {label,2f}
-    jc {label,1f}  
+    jc {label,1f}
    0:
    lxi %2,{const2,1}
    jmp {label,3f}
@ -1436,15 +1438,15 @@ gen mov a,%2.1

 pat cmu $1==4
 kills ALL
-uses areg={const1,0}
-gen Call {label,".cmi4"}		yields de
+gen mvi a,{const1,0}
+    Call {label,".cmi4"}		yields de

 pat cms $1==2					leaving cmi 2

 pat cms defined($1)
 kills ALL
-uses dereg={const2,$1}
-gen Call {label,".cms"}			yields de
+gen lxi de,{const2,$1}
+    Call {label,".cms"}			yields de

 pat cms !defined($1)
 with dereg
@ -1936,8 +1938,8 @@ gen dad sp

 pat blm
 kills ALL
-uses dereg={const2,$1}
-gen Call {label,".blm"}
+gen lxi de,{const2,$1}
+    Call {label,".blm"}

 pat bls
 with dereg
@ -1962,8 +1964,8 @@ with src1or2 src1or2			yields %2 %1 %2 %1

 pat dup
 kills ALL
-uses dereg={const2,$1}
-gen Call {label,".dup"}
+gen lxi de,{const2,$1}
+    Call {label,".dup"}

 pat dus $1==2
 with dereg
@ -1975,8 +1977,8 @@ with src1or2 src1or2			yields %1 %2

 pat exg defined($1)
 kills ALL
-uses dereg={const2,1}
-gen Call {label,".exg"}
+gen lxi de,{const2,$1}
+    Call {label,".exg"}

 pat fil
 uses hlreg={label,$1}
--- a/mach/i86/ncg/table
+++ b/mach/i86/ncg/table
@ -2292,7 +2292,7 @@ with CXREG REG REG
      rcl %3,{ANYCON,1}
      adc %2,{ANYCON,0}
      loop {label, 2b}
-      1:
+      1:				yields %3 %2

 pat loc ror $1==1 && $2==2
 with REG
@ -2311,7 +2311,7 @@ with CXREG REG REG
      rcl %3,{ANYCON,1}
      adc %2,{ANYCON,0}
      loop {label, 2b}
-      1:
+      1:				yields %3 %2

 /*******************************************************************
 *  Group 10 : Set Instructions                                    *
--- a/mach/powerpc/as/mach2.c
+++ b/mach/powerpc/as/mach2.c
@ -47,11 +47,15 @@
 %token <y_word> OP_FRT_FRB_C
 %token <y_word> OP_FRT_RA_D
 %token <y_word> OP_FRT_RA_RB
+%token <y_word> OP_L
 %token <y_word> OP_LEV
 %token <y_word> OP_LIA
 %token <y_word> OP_LIL
 %token <y_word> OP_LI32
+%token <y_word> OP_RA_RB
+%token <y_word> OP_RA_RB_TH
 %token <y_word> OP_RA_RS_C
+%token <y_word> OP_RA_RS_RA_C
 %token <y_word> OP_RA_RS_RB_C
 %token <y_word> OP_RA_RS_RB_MB5_ME5_C
 %token <y_word> OP_RA_RS_RB_MB6_C
@ -61,14 +65,14 @@
 %token <y_word> OP_RA_RS_SH6_MB6_C
 %token <y_word> OP_RA_RS_UI
 %token <y_word> OP_RA_RS_UI_CC
+%token <y_word> OP_RS
 %token <y_word> OP_RS_FXM
 %token <y_word> OP_RS_RA
 %token <y_word> OP_RS_RA_D
 %token <y_word> OP_RS_RA_DS
 %token <y_word> OP_RS_RA_NB
 %token <y_word> OP_RS_RA_RB
-%token <y_word> OP_RS_RA_RB_C
-%token <y_word> OP_RS_RA_RA_C
+%token <y_word> OP_RS_RA_RB_CC
 %token <y_word> OP_RS_RB
 %token <y_word> OP_RS_SPR
 %token <y_word> OP_RS_SR
@ -104,4 +108,5 @@

 %type <y_word> c
 %type <y_word> e16 negate16 u8 u7 u6 u5 u4 u2 u1
-%type <y_word> opt_bh cr_opt nb ds bda bdl lia lil spr_num
+%type <y_word> opt_bh cr_opt nb ds bda bdl lia lil
+%type <y_word> spr_num tbr_num opt_tbr
--- a/mach/powerpc/as/mach3.c
+++ b/mach/powerpc/as/mach3.c
@ -103,6 +103,10 @@
 0,     OP_HA,                0,                                        "ha16",
 0,     OP_LO,                 0,                                       "lo16",

+/* The next page numbers are from PowerPC User Instruction Set
+ * Architecture, Book I, Version 2.01.
+ */
+
 /* Branch processor instructions (page 20) */

 0,     OP_LIL,                18<<26 | 0<<1 | 0<<0,                    "b",
@ -128,7 +132,7 @@
 0,     OP_BT_BA_BB,           19<<26 | 417<<1,                         "crorc",
 0,     OP_BF_BFA,             19<<26 | 0<<1,                           "mcrf",

-/* extended mnemonics for bc, bcctr, bclr */
+/* extended mnemonics for bc, bcctr, bclr (page 144) */
 0,     OP_BH,       19<<26 | 20<<21 | 528<<1 | 0<<0,            "bctr",
 0,     OP_BH,       19<<26 | 20<<21 | 528<<1 | 1<<0,            "bctrl",
 0,     OP_BDL,      16<<26 | 16<<21 | 0<<1 | 0<<0,              "bdnz",
@ -186,7 +190,7 @@
 0,     OP_BI_BH,    19<<26 | 12<<21 | 16<<1 | 0<<0,             "btlr",
 0,     OP_BI_BH,    19<<26 | 12<<21 | 16<<1 | 1<<0,             "btlrl",

-/* extended m with condition in BI */
+/* extended m with condition in BI (page 146) */
 0,     OP_BICR_BDL,  16<<26 | 12<<21 | 2<<16 | 0<<1 | 0<<0,     "beq",
 0,     OP_BICR_BDA,  16<<26 | 12<<21 | 2<<16 | 1<<1 | 0<<0,     "beqa",
 0,     OP_BICR_BH,   19<<26 | 12<<21 | 2<<16 | 528<<1 | 0<<0,   "beqctr",
@ -284,7 +288,7 @@
 0,     OP_BICR_BH,   19<<26 | 12<<21 | 3<<16 | 16<<1 | 0<<0,    "bunlr",
 0,     OP_BICR_BH,   19<<26 | 12<<21 | 3<<16 | 16<<1 | 1<<0,    "bunlrl",

-/* extended m for cr logic */
+/* extended m for cr logic (page 147) */
 0,     OP_BT_BT_BT,  19<<26 | 289<<1,                           "crset",
 0,     OP_BT_BT_BT,  19<<26 | 193<<1,                           "crclr",
 0,     OP_BT_BA_BA,  19<<26 | 449<<1,                           "crmove",
@ -377,12 +381,12 @@
 0,     OP_RT_RA_C,            31<<26 | 0<<10 | 104<<1,                 "neg",
 0,     OP_RT_RA_C,            31<<26 | 1<<10 | 104<<1,                 "nego",

-/* extended m for addition */
+/* extended m for addition (pages 153, 154) */
 0,     OP_RT_RA_D,            14<<26,                       "la",
 0,     OP_RT_SI,              14<<26 | 0<<16,               "li",
 0,     OP_RT_SI,              15<<26 | 0<<16,               "lis",

-/* extended m for subtraction */
+/* extended m for subtraction (pages 147, 148) */
 0,     OP_RT_RB_RA_C,         31<<26 | 0<<10 | 40<<1,       "sub",
 0,     OP_RT_RB_RA_C,         31<<26 | 1<<10 | 40<<1,       "subo",
 0,     OP_RT_RB_RA_C,         31<<26 | 0<<10 | 8<<1,        "subc",
@ -418,7 +422,7 @@
 0,     OP_BF_L_RA_UI,         10<<26,                                  "cmpli",
 0,     OP_BF_L_RA_RB,         31<<26 | 32<<1,                          "cmpl",

-/* extended m for comparison */
+/* extended m for comparison (page 149) */
 0,     OP_BF_RA_SI,           11<<26 | 1<<21,               "cmpdi",
 0,     OP_BF_RA_RB,           31<<26 | 1<<21 | 0<<1,        "cmpd",
 0,     OP_BF_RA_UI,           10<<26 | 1<<21,               "cmpldi",
@ -434,7 +438,7 @@
 0,     OP_TO_RA_RB,           31<<26 | 68<<1,                          "td",
 0,     OP_TO_RA_RB,           31<<26 | 4<<1,                           "tw",

-/* extended m for traps */
+/* extended m for traps (page 150) */
 0,     OP_TOX_RA_RB,          31<<26 | 4<<21 | 68<<1,       "tdeq",
 0,     OP_TOX_RA_SI,          2<<26 | 4<<21,                "tdeqi",
 0,     OP_TOX_RA_RB,          31<<26 | 12<<21 | 68<<1,      "tdge",
@ -518,11 +522,10 @@
 0,     OP_RA_RS_C,            31<<26 | 58<<1,                          "cntlzd",
 0,     OP_RA_RS_C,            31<<26 | 26<<1,                          "cntlzw",

-/* extended m using logic */
-0,     OP_RS_RA_RA_C,         31<<26 | 444<<1,              "mr",
+/* extended m using logic (pages 153, 154) */
+0,     OP_RA_RS_RA_C,         31<<26 | 444<<1,              "mr",
 0,     OP,                    24<<26,                       "nop",
-0,     OP_RS_RA_RA_C,         31<<26 | 124<<1,              "not",
-0,     OP,                    26<<26,                       "xnop",
+0,     OP_RA_RS_RA_C,         31<<26 | 124<<1,              "not",

 /* page 69 */
 0,     OP_RA_RS_SH6_MB6_C,     30<<26 | 0<<2,               "rldicl",
@ -535,7 +538,7 @@
 0,     OP_RA_RS_SH6_MB6_C,     30<<26 | 3<<2,               "rldimi",
 0,     OP_RA_RS_SH5_MB5_ME5_C, 20<<26,                      "rlwimi",

-/* extended m for doubleword rotation */
+/* extended m for doubleword rotation (page 151) */
 0,     OP_clrlsldi,           30<<26 | 2<<2,                "clrlsldi",
 0,     OP_clrldi,             30<<26 | 0<<2,                "clrldi",
 0,     OP_clrrdi,             30<<26 | 1<<2,                "clrrdi",
@ -548,7 +551,7 @@
 0,     OP_sldi,               30<<26 | 1<<2,                "sldi",
 0,     OP_srdi,               30<<26 | 0<<2,                "srdi",

-/* extended m for word rotation */
+/* extended m for word rotation (page 152) */
 0,     OP_clrlslwi,           21<<26,                       "clrlslwi",
 0,     OP_clrlwi,             21<<26,                       "clrlwi",
 0,     OP_clrrwi,             21<<26,                       "clrrwi",
@ -573,21 +576,25 @@
 0,     OP_RA_RS_RB_C,         31<<26 | 792<<1,              "sraw",

 /* page 78 */
-0,     OP_RS_SPR,             31<<26 | 467<<1,                         "mtspr",
-0,     OP_RT_SPR,             31<<26 | 339<<1,                         "mfspr",
-0,     OP_RS_FXM,             31<<26 | 0<<21 | 144<<1,                 "mtcrf",
-0,     OP_RT,                 31<<26 | 0<<21 | 19<<1,                  "mfcr",
+0,     OP_RS_SPR,             31<<26 | 467<<1,              "mtspr",
+0,     OP_RT_SPR,             31<<26 | 339<<1,              "mfspr",
+0,     OP_RS_FXM,             31<<26 | 0<<20 | 144<<1,      "mtcrf",
+0,     OP_RT,                 31<<26 | 0<<20 | 19<<1,       "mfcr",

-/* extended m for special purpose registers */
+/* extended m for special purpose registers (page 153) */
 0,     OP_RT,       31<<26 | 9<<16 | 0<<11 | 339<<1,        "mfctr",
 0,     OP_RT,       31<<26 | 8<<16 | 0<<11 | 339<<1,        "mflr",
 0,     OP_RT,       31<<26 | 1<<16 | 0<<11 | 339<<1,        "mfxer",
-0,     OP_RT,       31<<26 | 9<<16 | 0<<11 | 467<<1,        "mtctr",
-0,     OP_RT,       31<<26 | 8<<16 | 0<<11 | 467<<1,        "mtlr",
-0,     OP_RT,       31<<26 | 1<<16 | 0<<11 | 467<<1,        "mtxer",
+0,     OP_RS,       31<<26 | 9<<16 | 0<<11 | 467<<1,        "mtctr",
+0,     OP_RS,       31<<26 | 8<<16 | 0<<11 | 467<<1,        "mtlr",
+0,     OP_RS,       31<<26 | 1<<16 | 0<<11 | 467<<1,        "mtxer",
+
+/* extended m for condition register (page 154) */
+0,     OP_RS,       31<<26 | 0<<20 | 255<<12 | 144<<1,      "mtcr",

 /* Floating point instructions (page 83) */

+/* page 98 */
 0,     OP_FRT_RA_D,           48<<26,                                  "lfs",
 0,     OP_FRT_RA_RB,          31<<26 | 535<<1,                         "lfsx",
 0,     OP_FRT_RA_D,           49<<26,                                  "lfsu",
@ -606,6 +613,7 @@
 0,     OP_FRS_RA_RB,          31<<26 | 759<<1,                         "stfdux",
 0,     OP_FRS_RA_RB,          31<<26 | 983<<1,                         "stfiwx",

+/* page 104 */
 0,     OP_FRT_FRB_C,          63<<26 | 72<<1,                          "fmr",
 0,     OP_FRT_FRB_C,          63<<26 | 40<<1,                          "fneg",
 0,     OP_FRT_FRB_C,          63<<26 | 264<<1,                         "fabs",
@ -629,6 +637,7 @@
 0,     OP_FRT_FRA_FRC_FRB_C,  63<<26 | 30<<1,                          "fnmsub",
 0,     OP_FRT_FRA_FRC_FRB_C,  59<<26 | 30<<1,                          "fnmsubs",

+/* page 109 */
 0,     OP_FRT_FRB_C,          63<<26 | 12<<1,                          "frsp",
 0,     OP_FRT_FRB_C,          63<<26 | 814<<1,                         "fctid",
 0,     OP_FRT_FRB_C,          63<<26 | 815<<1,                         "fctidz",
@ -652,4 +661,31 @@
 0,     OP_FRT_FRB_C,          63<<26 | 26<<1,                          "frsqrte",
 0,     OP_FRT_FRA_FRC_FRB_C,  63<<26 | 23<<1,                          "fsel",

-/* page 98 */
+/* Storage control instructions (Book II, page 15) */
+
+/* Book II, page 17 */
+0,     OP_RA_RB,              31<<26 | 982<<1,              "icbi",
+0,     OP_RA_RB_TH /* page 35 */,          31<<26 | 278<<1, "dcbt",
+0,     OP_RA_RB,              31<<26 | 246<<1,              "dcbtst",
+0,     OP_RA_RB,              31<<26 | 1014<<1,             "dcbz",
+0,     OP_RA_RB,              31<<26 | 54<<1,               "dcbst",
+0,     OP_RA_RB,              31<<26 | 86<<1,               "dcbf",
+0,     OP,                    19<<26 | 150<<1,              "isync",
+0,     OP_RT_RA_RB,           31<<26 | 20<<1,               "lwarx",
+0,     OP_RT_RA_RB,           31<<26 | 84<<1,               "ldarx",
+0,     OP_RS_RA_RB_CC,        31<<26 | 150<<1 | 1<<0,       "stwcx",
+0,     OP_RS_RA_RB_CC,        31<<26 | 150<<1 | 1<<0,       "stdcx",
+0,     OP_L,                  31<<26 | 598<<1,              "sync",
+0,     OP,                    31<<26 | 1<<21 | 598<<1,      "lwsync",
+0,     OP,                    31<<26 | 2<<21 | 598<<1,      "ptesync",
+0,     OP,                    31<<26 | 854<<1,              "eieio",
+
+/* Time base (Book II, page 30) */
+
+0,     OP_RT_TBR,   31<<26 | 371<<1,                        "mftb",
+0,     OP_RT,       31<<26 | 8<<11 | 13<<16 | 371<<1,       "mftbu",
+
+/* External control (Book II, page 33) */
+
+0,     OP_RT_RA_RB,           31<<26 | 310<<1,              "eciwx",
+0,     OP_RS_RA_RB,           31<<26 | 438<<1,              "ecowx",
--- a/mach/powerpc/as/mach4.c
+++ b/mach/powerpc/as/mach4.c
@ -42,7 +42,23 @@ operation
 	| OP_FRT_RA_D          FPR ',' e16 '(' GPR ')'    { emit_hl($1 | ($2<<21) | ($6<<16) | $4); }
 	| OP_FRT_RA_RB         FPR ',' GPR ',' GPR        { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); }
 	| OP_FRT_C             c FPR                      { emit4($1 | $2 | ($3<<21)); }
-	| OP_RA_RS_C           c GPR ',' GPR              { emit4($1 | $2 | ($5<<21) | ($3<<16)); }
+	| OP_L                              { emit4($1); }
+	| OP_L                 u2           { emit4($1 | ($2<<21)); }
+	| OP_LEV                            { emit4($1); }
+	| OP_LEV               u7           { emit4($1 | ($2<<5)); }
+	| OP_RA_RB             GPR ',' GPR
+	{ emit4($1 | ($2<<16) | ($4<<11)); }
+	| OP_RA_RB_TH          GPR ',' GPR opt_bh
+	{ emit4($1 | $5 | ($2<<16) | ($4<<11)); }
+	/*
+	 * For instructions with "mnemonic RS, RA, ..."
+	 * OP_RA_RS_... swaps RS and RA to (RA<<21) || (RS<<16)
+	 * OP_RS_RA_... keeps RS and RA as (RS<<21) || (RA<<16)
+	 */
+	| OP_RA_RS_C           c GPR ',' GPR
+	{ emit4($1 | $2 | ($5<<21) | ($3<<16)); }
+	| OP_RA_RS_RA_C        c GPR ',' GPR
+	{ emit4($1 | $2 | ($5<<21) | ($3<<16) | ($5<<11)); }
 	| OP_RA_RS_RB_C        c GPR ',' GPR ',' GPR
 	{ emit4($1 | $2 | ($5<<21) | ($3<<16) | ($7<<11)); }
 	| OP_RA_RS_RB_MB5_ME5_C c GPR ',' GPR ',' GPR ',' u5 ',' u5
@ -75,20 +91,19 @@ operation
 	| OP_RT_RB_RA_C        c GPR ',' GPR ',' GPR      { emit4($1 | $2 | ($3<<21) | ($7<<16) | ($5<<11)); }
 	| OP_RT_SI             GPR ',' e16                { emit_hl($1 | ($2<<21) | $4); }
 	| OP_RT_SPR            GPR ',' spr_num            { emit4($1 | ($2<<21) | ($4<<11)); }
+	| OP_RT_TBR            GPR opt_tbr                { emit4($1 | ($2<<21) | ($3<<11)); }
+	| OP_RS                GPR                        { emit4($1 | ($2<<21)); }
 	| OP_RS_FXM            u7 ',' GPR                 { emit4($1 | ($4<<21) | ($2<<12)); }
 	| OP_RS_RA_D           GPR ',' e16 '(' GPR ')'    { emit_hl($1 | ($2<<21) | ($6<<16) | $4); }
 	| OP_RS_RA_DS          GPR ',' ds '(' GPR ')'     { emit_hl($1 | ($2<<21) | ($6<<16) | $4); }
 	| OP_RS_RA_NB          GPR ',' GPR ',' nb         { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); }
 	| OP_RS_RA_RB          GPR ',' GPR ',' GPR        { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); }
-	| OP_RS_RA_RB_C        c GPR ',' GPR ',' GPR      { emit4($1 | $2 | ($5<<21) | ($3<<16) | ($7<<11)); }
-	| OP_RS_RA_RA_C        c GPR ',' GPR              { emit4($1 | $2 | ($5<<21) | ($3<<16) | ($5<<11)); }
+	| OP_RS_RA_RB_CC       C GPR ',' GPR ',' GPR      { emit4($1 | ($3<<21) | ($5<<16) | ($7<<11)); }
 	| OP_RS_SPR            spr_num ',' GPR            { emit4($1 | ($4<<21) | ($2<<11)); }
 	| OP_TO_RA_RB          u5 ',' GPR ',' GPR         { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); }
 	| OP_TO_RA_SI          u5 ',' GPR ',' e16         { emit_hl($1 | ($2<<21) | ($4<<16) | $6); }
 	| OP_TOX_RA_RB         GPR ',' GPR                { emit4($1 | ($2<<16) | ($4<<11)); }
 	| OP_TOX_RA_SI         GPR ',' e16                { emit_hl($1 | ($2<<16) | $4); }
-	| OP_LEV                                          { emit4($1); }
-	| OP_LEV               u7                         { emit4($1 | ($2<<5)); }
 	| OP_LIA               lia                        { emit4($1 | $2); }
 	| OP_LIL               lil                        { emit4($1 | $2); }
 	| OP_LI32              li32                       /* emitted in subrule */
@ -298,7 +313,7 @@ u2
 	}
 	;

-/* Optional comma, branch hint. */
+/* Optional comma, branch hint (or touch hint). */
 opt_bh
 	: /* nothing */         { $$ = 0; }
 	| ',' u2                { $$ = ($2<<11); }
@ -409,13 +424,28 @@ lia
 	}
 	;

+/*
+ * Instructions "mfspr", "mtspr", and "mftb" encode the 10-bit special
+ * purpose register (spr) or time base register (tbr) by swapping the
+ * low 5 bits with the high 5 bits.  The value from an SPR token has
+ * already been swapped.
+ */
+
 spr_num
-	: SPR { $$ = $1; }
-	| absexp
+	: SPR     { $$ = $1; }
+	| tbr_num { $$ = $1; }
+	;
+
+opt_tbr
+	: /* nothing */         { $$ = 8 | (12<<5); }
+	| ',' tbr_num           { $$ = $2; }
+	;
+
+tbr_num
+	: absexp
 	{
 		if (($1 < 0) || ($1 > 0x3ff))
-			serror("spr number out of range");
-		/* mfspr, mtspr swap the low and high 5 bits */
+			serror("10-bit unsigned value out of range");
 		$$ = ($1 >> 5) | (($1 & 0x1f) << 5);
 	}
 	;
--- a/mach/powerpc/libem/aar4.s
+++ b/mach/powerpc/libem/aar4.s
@ -8,21 +8,17 @@

 .define .aar4
 .aar4:
-	lis r0, hi16[.trap_earray]
-	ori r0, r0, lo16[.trap_earray]
-	mtspr ctr, r0            ! load CTR with trap address
-
 	lwz r4, 0(sp)            ! r4 = address of descriptor
 	lwz r5, 4(sp)            ! r5 = index
 	lwz r6, 8(sp)            ! r6 = address of array

 	lwz r0, 0(r4)
 	subf. r5, r0, r5         ! subtract lower bound from index
-	bltctr                   ! check lower bound
+	blt .trap_earray         ! check lower bound

 	lwz r0, 4(r4)
 	cmplw r5, r0
-	bgtctr                   ! check upper bound
+	bgt .trap_earray         ! check upper bound

 	lwz r3, 8(r4)            ! r3 = size of element
 	mullw r5, r5, r3         ! scale index by size
@ -30,3 +26,7 @@
 	stw r6, 8(sp)            ! push address of element
 	addi sp, sp, 8
 	blr
+
+.trap_earray:
+	li r3, 0                 ! EARRAY = 0 in h/em_abs.h
+	b .trp
--- a/mach/powerpc/libem/bls4.s
+++ b/mach/powerpc/libem/bls4.s
@ -0,0 +1,19 @@
+.sect .text
+
+! Does a block move of words between non-overlapping buffers.
+!  Stack: ( src dst len -- )
+
+.define .bls4
+.bls4:
+	lwz	r3, 0(sp)	! len
+	lwz	r4, 4(sp)	! dst
+	lwz	r5, 8(sp)	! src
+	addi	sp, sp, 12
+	srwi	r3, r3, 2
+	mtspr	ctr, r3
+	addi	r5, r5, -4
+	addi	r4, r4, -4
+1:	lwzu	r3, 4(r5)
+	stwu	r3, 4(r4)
+	bdnz	1b
+	blr
--- a/mach/powerpc/libem/build.lua
+++ b/mach/powerpc/libem/build.lua
@ -6,7 +6,7 @@ for _, plat in ipairs(vars.plats) do
 	acklibrary {
 		name = "lib_"..plat,
 		srcs = {
-			"./*.s", -- rm ret.s
+			"./*.s", -- dus4.s
 		},
 		vars = { plat = plat },
 		deps = {
@ -15,4 +15,3 @@ for _, plat in ipairs(vars.plats) do
 		}
 	}
 end
-
--- a/mach/powerpc/libem/cfu8.s
+++ b/mach/powerpc/libem/cfu8.s
@ -1,3 +1,5 @@
+.sect .text; .sect .rom; .sect .data; .sect .bss
+
 .sect .text

 ! Converts a 64-bit double into a 32-bit unsigned integer.
@ -6,32 +8,42 @@

 .define .cfu8
 .cfu8:
-	lis r3, ha16[.fd_00000000]
-	lfd f0, lo16[.fd_00000000](r3) ! f0 = 0.0
-
-	lfd f1, 0(sp)            ! value to be converted
-
-	lis r3, ha16[.fd_FFFFFFFF]
-	lfd f3, lo16[.fd_FFFFFFFF](r3) ! f3 = 0xFFFFFFFF
-
-	lis r3, ha16[.fd_80000000]
-	lfd f4, lo16[.fd_80000000](r3) ! f4 = 0x80000000
-
-	fsel f2, f1, f1, f0
-	fsub f5, f3, f1
-	fsel f2, f5, f2, f3
-	fsub f5, f2, f4
-	fcmpu cr0, f2, f4
-	fsel f2, f5, f5, f2
-	fctiwz f2, f2
-	
-	stfd f2, 0(sp)
-	addi sp, sp, 4
-
-	bltlr
-
-	lwz r3, 0(sp)
-	xoris r3, r3, 0x8000
-	stw r3, 0(sp)
-
+	lfd f1, 0(sp)                   ! f1 = value to convert
+	lis r3, ha16[.fs_80000000]
+	lfs f2, lo16[.fs_80000000](r3)  ! f2 = 2**31
+	fsub   f1, f1, f2
+	fctiwz f1, f1         ! convert value - 2**31
+	stfd   f1, 0(sp)
+	lwz   r3, 4(sp)
+	xoris r3, r3, 0x8000  ! add 2**31
+	stw   r3, 4(sp)
+	addi  sp, sp, 4
 	blr
+
+.sect .rom
+.fs_80000000:
+	!float 2.147483648e+9 sz 4
+	.data1 0117,00,00,00
+
+! Freescale and IBM provide an example using fsel to select value or
+! value - 2**31 for fctiwz.  The following code adapts Freescale's
+! _Programming Environments Manual for 32-Bit Implementations of the
+! PowerPC Architecture_, section C.3.2, pdf page 557.
+!
+! Given f2 = value clamped from 0 to 2**32 - 1, f4 = 2**31, then
+!	fsub	f5, f2, f4
+!	fcmpu	cr2, f2, f4
+!	fsel	f2, f5, f5, f2
+!	fctiwz	f2, f2
+!	stfdu	f2, 0(sp)
+!	lwz	r3, 4(sp)
+!	blt	cr2, 1f
+!	xoris	r3, r3, 0x8000
+! 1: yields r3 = the converted value.
+!
+! Debian's clang 3.5.0-10 and gcc 4.9.2-10 don't clamp the value
+! before conversion.  They avoid fsel and use the conditional branch
+! to pick between 2 fctwiz instructions.
+!
+! PowerPC 601 lacks fsel (but kernel might trap and emulate fsel).
+! PowerPC 603, 604, G3, G4, G5 have fsel.
--- a/mach/powerpc/libem/csa.s
+++ b/mach/powerpc/libem/csa.s
@ -13,22 +13,21 @@
 	lwz r4, 4(sp)
 	addi sp, sp, 8

-	lwz r5, 0(r3)            ! load default
-	mtspr ctr, r5
-	
-	lwz r5, 4(r3)            ! fetch lower bound
-	subf. r4, r5, r4         ! adjust value
-	bltctr                   ! jump to default if out of range
+	lwz r5, 0(r3)            ! r5 = default target

-	lwz r5, 8(r3)            ! fetch range
-	cmplw r4, r5
-	bgtctr                   ! jump to default if out of range
+	lwz r6, 4(r3)            ! fetch lower bound
+	subf. r4, r6, r4         ! adjust value
+	blt 1f                   ! jump to default if out of range
+
+	lwz r6, 8(r3)            ! fetch range
+	cmplw r4, r6
+	bgt 1f                   ! jump to default if out of range

 	addi r3, r3, 12          ! skip header
 	slwi r4, r4, 2           ! scale value (<<2)
-	lwzx r5, r3, r4          ! load target
-	mtspr ctr, r5
+	lwzx r5, r3, r4          ! r5 = new target

-	or. r5, r5, r5           ! test it
+1:	mtspr ctr, r5
+	mr. r5, r5               ! test it
 	bnectr                   ! jump to target if non-zero
 	b .trap_ecase            ! otherwise trap
--- a/mach/powerpc/libem/csb.s
+++ b/mach/powerpc/libem/csb.s
@ -13,23 +13,20 @@
 	lwz r4, 4(sp)
 	addi sp, sp, 8

-	lwz r5, 0(r3)            ! load default
-	mtspr ctr, r5
+	lwz r5, 0(r3)            ! r5 = default target

 	lwz r6, 4(r3)            ! fetch count
-
-1:
-	or. r6, r6, r6           ! test count
-	beqctr                   ! exit if zero
-	addi r6, r6, -1          ! otherwise decrement
-
-	lwzu r7, 8(r3)           ! fetch target index, increment pointer
+	mr. r6, r6               ! skip loop if count is zero
+	beq 3f                   !   (needed by Modula-2 "CASE i OF END")
+	mtspr ctr, r6
+1:	lwzu r7, 8(r3)           ! fetch target index, increment pointer
 	cmpw r4, r7              ! compare with value
-	bne 1b                   ! if not equal, go again
+	beq 2f
+	bdnz 1b                  ! if not equal, go again
+	b 3f

-	lwz r7, 4(r3)            ! fetch target address
-	mtspr ctr, r7
-
-	or. r7, r7, r7           ! test it
+2:	lwz r5, 4(r3)            ! r5 = new target
+3:	mtspr ctr, r5
+	mr. r5, r5               ! test target
 	bnectr                   ! jump to target if non-zero
 	b .trap_ecase            ! otherwise trap
--- a/mach/powerpc/libem/dus4.s
+++ b/mach/powerpc/libem/dus4.s
@ -0,0 +1,16 @@
+.sect .text
+
+! Duplicates some words on top of stack.
+!  Stack: ( a size -- a a )
+
+.define .dus4
+.dus4:
+	lwz	r3, 0(sp)
+	addi	sp, sp, 4
+	srwi	r4, r3, 2
+	mtspr	ctr, r4
+	add	r5, sp, r3
+1:	lwzu	r4, -4(r5)
+	stwu	r4, -4(sp)
+	bdnz	1b
+	blr
--- a/mach/powerpc/libem/exg.s
+++ b/mach/powerpc/libem/exg.s
@ -0,0 +1,22 @@
+.sect .text
+
+! Exchange top two values on stack.
+!   Stack: ( a b size -- b a )
+
+.define .exg
+.exg:
+	lwz	r3, 0(sp)		! r3 = size
+	srwi	r7, r3, 2
+	mtspr	ctr, r7			! ctr = size / 4
+	mr	r4, sp			! r4 = pointer before value b
+	add	r5, r4, r3		! r5 = pointer before value a
+
+	! Loop to swap each pair of words.
+1:	lwzu	r6, 4(r4)
+	lwzu	r7, 4(r5)
+	stw	r6, 0(r5)
+	stw	r7, 0(r4)
+	bdnz	1b			! loop ctr times
+
+	addi	sp, sp, 4		! drop size from stack
+	blr
--- a/mach/powerpc/libem/fd_80000000.s
+++ b/mach/powerpc/libem/fd_80000000.s
@ -1,10 +0,0 @@
-.sect .text; .sect .rom; .sect .data; .sect .bss
-
-.sect .rom
-
-! Contains a handy double-precision 0x80000000.
-
-.define .fd_80000000
-.fd_80000000:
-	!float 2.147483648e+9 sz 8
-	.data1 0101,0340,00,00,00,00,00,00
--- a/mach/powerpc/libem/fd_FFFFFFFF.s
+++ b/mach/powerpc/libem/fd_FFFFFFFF.s
@ -1,10 +0,0 @@
-.sect .text; .sect .rom; .sect .data; .sect .bss
-
-.sect .rom
-
-! Contains a handy double-precision 0xFFFFFFFF.
-
-.define .fd_FFFFFFFF
-.fd_FFFFFFFF:
-	!float 4.294967295e+9 sz 8
-	.data1 0101,0357,0377,0377,0377,0340,00,00
--- a/mach/powerpc/libem/fef4.s
+++ b/mach/powerpc/libem/fef4.s
@ -0,0 +1,48 @@
+.sect .text
+
+! Split a single-precision float into fraction and exponent, like
+! frexpf(3) in C, http://en.cppreference.com/w/c/numeric/math/frexp
+!
+! Stack: ( single -- fraction exponent )
+
+.define .fef4
+.fef4:
+	lwz r3, 0(sp)			! r3 = word of float bits
+
+	! IEEE single = sign * 1.fraction * 2**(exponent - 127)
+	!   sign  exponent  fraction
+	!   0     1..8      9..31
+	!
+	! IEEE exponent = 126 in [0.5, 1) or (-1, -0.5].
+
+	extrwi. r6, r3, 8, 1		! r6 = IEEE exponent
+	beq 3f				! jump if zero or denormalized
+	cmpwi r6, 255
+	addi r5, r6, -126		! r5 = our exponent
+	beq 2f				! jump if infinity or NaN
+	! fall through if normalized
+
+	! Put fraction in [0.5, 1) or (-1, -0.5].
+1:	li r6, 126
+	insrwi r3, r6, 8, 1		! IEEE exponent = 126
+	! fall through
+
+2:	stw r3, 0(sp)			! push fraction
+	stwu r5, -4(sp)			! push exponent
+	blr
+
+	! Got denormalized number or zero, probably zero.
+	! If zero, then exponent must also be zero.
+3:	extrwi. r6, r3, 23, 9		! r6 = fraction
+	bne 4f				! jump if not zero
+	li r5, 0			! exponent = 0
+	b 2b
+
+	! Got denormalized number = 0.fraction * 2**-126
+4:	cntlzw r5, r6
+	addi r5, r5, -8
+	slw r6, r6, r5			! shift left to make 1.fraction
+	insrwi r3, r6, 23, 9		! set new fraction
+	li r6, -126 + 1
+	subf r5, r5, r6			! r5 = our exponent
+	b 1b
--- a/mach/powerpc/libem/fef8.s
+++ b/mach/powerpc/libem/fef8.s
@ -3,7 +3,7 @@
 .sect .text

 ! Split a double-precision float into fraction and exponent, like
-! frexp(3) in C.
+! frexp(3) in C, http://en.cppreference.com/w/c/numeric/math/frexp
 !
 ! Stack: ( double -- fraction exponent )

@ -12,42 +12,41 @@
 	lwz r3, 0(sp)			! r3 = high word (bits 0..31)
 	lwz r4, 4(sp)			! r4 = low word (bits 32..63)

-	! IEEE double-precision format:
+	! IEEE double = sign * 1.fraction * 2**(exponent - 1023)
 	!   sign  exponent  fraction
 	!   0     1..11     12..63
 	!
-	! To get fraction in [0.5, 1) or (-1, -0.5], we subtract 1022
-	! from the IEEE exponent.
+	! IEEE exponent = 1022 in [0.5, 1) or (-1, -0.5].

 	extrwi. r6, r3, 11, 1		! r6 = IEEE exponent
-	addi r5, r6, -1022		! r5 = our exponent
-	beq 2f				! jump if zero or denormalized
+	beq 3f				! jump if zero or denormalized
 	cmpwi r6, 2047
-	beq 1f				! jump if infinity or NaN
+	addi r5, r6, -1022		! r5 = our exponent
+	beq 2f				! jump if infinity or NaN
 	! fall through if normalized

-	! Put fraction in [0.5, 1) or (-1, -0.5] by setting its
-	! IEEE exponent to 1022.
-	rlwinm r3, r3, 0, 12, 0		! clear old exponent
-	oris r3, r3, 1022 << 4		! set new exponent
+	! Put fraction in [0.5, 1) or (-1, -0.5].
+1:	li r6, 1022
+	insrwi r3, r6, 11, 1		! IEEE exponent = 1022
 	! fall through

-1:	stw r3, 0(sp)
+2:	stw r3, 0(sp)
 	stw r4, 4(sp)			! push fraction
 	stwu r5, -4(sp)			! push exponent
 	blr

-2:	! Got denormalized number or zero, probably zero.
-	extrwi r6, r3, 22, 12
+	! Got denormalized number or zero, probably zero.
+	! If zero, then exponent must also be zero.
+3:	extrwi r6, r3, 20, 12
 	or. r6, r6, r4			! r6 = high|low fraction
-	bne 3f				! jump if not zero
+	bne 4f				! jump if not zero
 	li r5, 0			! exponent = 0
-	b 1b
+	b 2b

-3:	! Got denormalized number, not zero.
-	lfd f0, 0(sp)
-	lis r6, ha16[_2_64]
-	lfd f1, lo16[_2_64](r6)
+	! Got denormalized number = 0.fraction * 2**-1022
+4:	lfd f0, 0(sp)
+	lis r6, ha16[.fs_2_64]
+	lfs f1, lo16[.fs_2_64](r6)
 	fmul f0, f0, f1			! multiply it by 2**64
 	stfd f0, 0(sp)
 	lwz r3, 0(sp)
@ -57,7 +56,6 @@
 	b 1b

 .sect .rom
-_2_64:
-	! (double) 2**64
-	.data4 0x43f00000
-	.data4 0x00000000
+.fs_2_64:
+	!float 1.84467440737095516e+19 sz 4
+	.data1 0137,0200,00,00
--- a/mach/powerpc/libem/fif4.s
+++ b/mach/powerpc/libem/fif4.s
@ -0,0 +1,64 @@
+.sect .text
+
+! Multiplies two single-precision floats, then splits the product into
+! fraction and integer, both as floats, like modff(3) in C,
+! http://en.cppreference.com/w/c/numeric/math/modf
+!
+! Stack: ( a b -- fraction integer )
+
+.define .fif4
+.fif4:
+	lfs f1, 4(sp)
+	lfs f2, 0(sp)
+	fmuls f1, f1, f2		! f1 = a * b
+	stfs f1, 0(sp)
+	lwz r3, 0(sp)			! r3 = word of float bits
+
+	! IEEE single = sign * 1.fraction * 2**(exponent - 127)
+	!   sign  exponent  fraction
+	!   0     1..8      9..31
+	!
+	! Subtract 127 from the IEEE exponent.  If the result is from
+	! 0 to 23, then the IEEE fraction has that many integer bits.
+
+	extrwi r5, r3, 8, 1		! r5 = IEEE exponent
+	addic. r5, r5, -127		! r5 = nr of integer bits
+	blt 3f				! branch if no integer
+	cmpwi r5, 24
+	bge 4f				! branch if no fraction
+	! fall through if integer with fraction
+
+	! f1 has r5 = 0 to 23 integer bits in the IEEE fraction.
+	! There are 23 - r5 fraction bits.
+	li r6, 23
+	subf r6, r5, r6
+	srw r3, r3, r6
+	slw r3, r3, r6			! clear fraction in word
+	! fall through
+
+1:	stw r3, 0(sp)
+	lfs f2, 0(sp)			! integer = high word, low word
+	fsubs f1, f1, f2		! fraction = value - integer
+2:	stfs f1, 4(sp)			! push fraction
+	stfs f2, 0(sp)			! push integer
+	blr
+
+	! f1 is a fraction without integer (or zero).
+	! Then integer is zero with same sign.
+3:	extlwi r3, r3, 1, 0		! extract sign bit
+	stfs f1, 4(sp)			! push fraction
+	stw r3, 0(sp)			! push integer = zero with sign
+	blr
+
+	! f1 is an integer without fraction (or infinity or NaN).
+	! Unless NaN, then fraction is zero with same sign.
+4:	fcmpu cr0, f1, f1
+	bun cr0, 5f
+	extlwi r3, r3, 1, 0		! extract sign bit
+	stw r3, 4(sp)			! push fraction = zero with sign
+	stfs f1, 0(sp)			! push integer
+	blr
+
+	! f1 is NaN, so both fraction and integer are NaN.
+5:	fmr f2, f1
+	b 2b
--- a/mach/powerpc/libem/fif8.s
+++ b/mach/powerpc/libem/fif8.s
@ -1,7 +1,8 @@
 .sect .text

 ! Multiplies two double-precision floats, then splits the product into
-! fraction and integer, like modf(3) in C.  On entry:
+! fraction and integer, both as floats, like modf(3) in C,
+! http://en.cppreference.com/w/c/numeric/math/modf
 !
 ! Stack: ( a b -- fraction integer )

@ -14,20 +15,18 @@
 	lwz r3, 0(sp)			! r3 = high word
 	lwz r4, 4(sp)			! r4 = low word

-	! IEEE double-precision format:
+	! IEEE double = sign * 1.fraction * 2**(exponent - 1023)
 	!   sign  exponent  fraction
 	!   0     1..11     12..63
 	!
 	! Subtract 1023 from the IEEE exponent.  If the result is from
 	! 0 to 51, then the IEEE fraction has that many integer bits.
-	! (IEEE has an implicit 1 before its fraction.  If the IEEE
-	! fraction has 0 integer bits, we still have an integer.)

 	extrwi r5, r3, 11, 1		! r5 = IEEE exponent
 	addic. r5, r5, -1023		! r5 = nr of integer bits
-	blt 4f				! branch if no integer
+	blt 3f				! branch if no integer
 	cmpwi r5, 52
-	bge 5f				! branch if no fraction
+	bge 4f				! branch if no fraction
 	cmpwi r5, 21
 	bge 6f				! branch if large integer
 	! fall through if small integer
@ -44,22 +43,38 @@
 1:	stw r3, 0(sp)
 	stw r4, 4(sp)
 	lfd f2, 0(sp)			! integer = high word, low word
-2:	fsub f1, f1, f2			! fraction = value - integer
-3:	stfd f1, 8(sp)			! push fraction
+	fsub f1, f1, f2			! fraction = value - integer
+2:	stfd f1, 8(sp)			! push fraction
 	stfd f2, 0(sp)			! push integer
 	blr

-4:	! f1 is a fraction without integer.
-	fsub f2, f1, f1			! integer = zero
-	b 3b
+	! f1 is a fraction without integer (or zero).
+	! Then integer is zero with same sign.
+3:	extlwi r3, r3, 1, 0		! extract sign bit
+	li r4, 0
+	stfd f1, 8(sp)			! push fraction
+	stw r4, 4(sp)
+	stw r3, 0(sp)			! push integer = zero with sign
+	blr

-5:	! f1 is an integer without fraction (or infinity or NaN).
-	fmr f2, f1			! integer = f1
+	! f1 is an integer without fraction (or infinity or NaN).
+	! Unless NaN, then fraction is zero with same sign.
+4:	fcmpu cr0, f1, f1		! integer = f1
+	bun cr0, 5f
+	extlwi r3, r3, 1, 0		! extract sign bit
+	li r4, 0
+	stw r4, 12(sp)
+	stw r3, 8(sp)			! push fraction = zero with sign
+	stfd f1, 0(sp)			! push integer
+	blr
+
+	! f1 is NaN, so both fraction and integer are NaN.
+5:	fmr f2, f1
 	b 2b

-6:	! f1 has r5 = 21 to 51 to integer bits.
+	! f1 has r5 = 21 to 51 to integer bits.
 	! Low word has 52 - r5 fraction bits.
-	li r6, 52
+6:	li r6, 52
 	subf r6, r5, r6
 	srw r4, r4, r6
 	slw r4, r4, r6			! clear fraction in low word
--- a/mach/powerpc/libem/inn.s
+++ b/mach/powerpc/libem/inn.s
@ -5,6 +5,9 @@
 /* Tests a bit in a bitset on the stack.
 *
 * Stack: ( bitset bitnum setsize -- bool )
+ *
+ * Some back ends push false if bitnum is too large.  We don't because
+ * the compilers tend to pass a small enough bitnum.
 */

 .define .inn
--- a/mach/powerpc/libem/rck.s
+++ b/mach/powerpc/libem/rck.s
@ -2,6 +2,9 @@

 ! Bounds check. Traps if the value is out of range.
 !  Stack: ( value descriptor -- value )
+!
+! This ".rck" only works with 4-byte integers.  The name is ".rck" and
+! not ".rck4" because many back ends only do rck with the word size.

 .define .rck
 .rck:
@ -18,3 +21,7 @@
    bgt .trap_erange

    blr
+
+.trap_erange:
+    li r3, 1       ! ERANGE = 1 in h/em_abs.h
+    b .trp
--- a/mach/powerpc/libem/set.s
+++ b/mach/powerpc/libem/set.s
@ -2,6 +2,9 @@

 ! Create singleton set.
 !  Stack: ( bitnumber size -- set )
+!
+! Some back ends trap ESET if bitnumber is out of range.  We don't
+! because the compilers tend to pass a valid bitnumber.

 .define .set
 .set:
--- a/mach/powerpc/libem/trp.s
+++ b/mach/powerpc/libem/trp.s
@ -0,0 +1,56 @@
+.sect .text
+
+.define .trap_ecase
+.trap_ecase:
+	li	r3, 20			! ECASE = 20 in h/em_abs.h
+	! FALLTHROUGH to .trp
+
+! Raises an EM trap.
+! Expects r3 = trap number.
+
+.define .trp
+.trp:
+	cmplwi	r3, 15			! traps > 15 can't be ignored
+	bgt	1f
+
+	lis	r4, ha16[.ignmask]
+	lwz	r4, lo16[.ignmask](r4)	! load ignore mask
+	srw	r4, r4, r3
+	andi.	r4, r4, 1
+	bnelr				! return if ignoring trap
+
+1:	lis	r4, ha16[.trppc]
+	lwz	r5, lo16[.trppc](r4)	! r5 = user trap routine
+	mr.	r5, r5
+	beq	2f			! if no user trap routine, bail out
+
+	mtspr	ctr, r5
+	mfspr	r6, lr
+	li	r0, 0
+	stwu	r3, -8(sp)		! push trap number
+	stw	r0, lo16[.trppc](r4)	! reset trap routine
+	stw	r6, 4(sp)		! save old lr
+	bctrl				! call trap routine
+
+	lwz	r0, 4(sp)
+	mtspr	lr, r0
+	addi	sp, sp, 8		! retract over stack usage
+	blr
+
+2:	! No trap handler.  Write error message, exit.
+	li	r3, 2
+	stwu	r3, -12(sp)
+	lis	r4, ha16[message]
+	addi	r4, r4, lo16[message]
+	li	r5, 6
+	stw	r4, 4(sp)
+	stw	r5, 8(sp)
+	bl	_write			! write(2, message, 6)
+
+	li	r3, 1
+	stw	r3, 0(sp)
+	bl	__exit			! _exit(1)
+
+.sect .rom
+message:
+	.ascii "TRAP!\n"
--- a/mach/powerpc/mcg/table
+++ b/mach/powerpc/mcg/table
@ -237,10 +237,13 @@ PATTERNS
    SETSP.I(in:(int)reg)
        emit "mr sp, %in"
        cost 4;
-    
+
    out:(int)reg = ANY.I
        cost 1;

+    out:(long)reg = ANY.L
+        cost 1;
+
    out:(int)reg = COPYF.I(in:(float)reg)
        emit "stfsu %in, -4(sp)"
        emit "lwz %out, 0(sp)"
@ -306,10 +309,21 @@ PATTERNS
 		emit "lwz %out, %addr"
 		cost 4;

+#if 0
+    /* FIXME: Doesn't work because %out.0 and %addr might share a
+     * register, so it corrupts %addr before it loads %out.1. */
    out:(long)reg = LOAD.L(addr:address)
        emit "lwz %out.0, 4+%addr"
        emit "lwz %out.1, 0+%addr"
        cost 8;
+#else
+    /* Works, but costs an extra instruction. */
+    out:(long)reg = LOAD.L(addr:address)
+        emit "la %out.1, %addr"
+        emit "lwz %out.0, 4(%out.1)"
+        emit "lwz %out.1, 0(%out.1)"
+        cost 12;
+#endif

 	out:(int)ushort0 = LOADH.I(addr:address)
 		emit "lhz %out, %addr"
@ -566,6 +580,13 @@ PATTERNS
        emit "! COMPARESI.I(cr, 0)"
        cost 4;

+    cr:(cr)cr = COMPAREUL.I(left:(long)reg, right:(long)reg)
+        emit "cmpl %cr, 0, %left.1, %right.1"
+        emit "bne 1f"
+        emit "cmpl %cr, 0, %left.0, %right.0"
+        emit "1:"
+        cost 12;
+


 /* Booleans */
--- a/mach/powerpc/ncg/mach.c
+++ b/mach/powerpc/ncg/mach.c
@ -10,8 +10,13 @@

 #include <limits.h>
 #include <stdint.h>
+#include <stb.h>

+static int writing_stabs = 0;
+
+#ifdef REGVARS
 static long framesize;
+#endif

 void
 con_part(int sz, word w)
@ -51,32 +56,42 @@ con_mult(word sz)
 #define FL_MSB_AT_LOW_ADDRESS	1
 #include <con_float>

-static void
-emit_prolog(void)
-{
-	fprintf(codefile, "mfspr r0, lr\n");
-	fprintf(codefile, "addi sp, sp, %ld\n", -framesize - 8);
-	fprintf(codefile, "stw fp, %ld(sp)\n", framesize);
-	fprintf(codefile, "stw r0, %ld(sp)\n", framesize + 4);
-	fprintf(codefile, "addi fp, sp, %ld\n", framesize);
-}
-
 void
 prolog(full nlocals)
 {
-	framesize = nlocals;
+	/*
+	 * For N_LSYM and N_PSYM stabs, we want gdb to use fp, not sp.
+	 * The trick is to use "stwu sp, _(sp)" then "addi fp, sp, 0"
+	 * before we save lr with "stw r0, _(sp)".
+	 *
+	 * Tried with Apple's gdb-696.  Refer to
+	 *  - gdb-696/src/gdb/rs6000-tdep.c, skip_prologue(), line 1101
+	 *  - gdb-696/src/gdb/macosx/ppc-macosx-frameinfo.c,
+	 *    ppc_parse_instructions(), line 717
+	 * https://opensource.apple.com/release/developer-tools-25.html
+	 */
+	fprintf(codefile, "mfspr r0, lr\n");
+	if (writing_stabs) {
+		fprintf(codefile, "stwu sp, -8(sp)\n");  /* for gdb */
+		fprintf(codefile, "stw fp, 0(sp)\n");
+	} else
+		fprintf(codefile, "stwu fp, -8(sp)\n");
+	fprintf(codefile, "addi fp, sp, 0\n");           /* for gdb */
+	fprintf(codefile, "stw r0, 4(sp)\n");

 #ifdef REGVARS
-	/* f_regsave() will call emit_prolog() */
+	framesize = nlocals;
+	/* regsave() increases framesize; f_regsave() adjusts sp. */
 #else
-	emit_prolog();
+	if (nlocals)
+		fprintf(codefile, "addi sp, sp, %ld\n", -nlocals);
 #endif
 }

 void
 mes(word type)
 {
-	int argt ;
+	int argt, a1, a2 ;

 	switch ( (int)type ) {
 	case ms_ext :
@ -91,6 +106,41 @@ mes(word type)
 				break ;
 			}
 		}
+	case ms_stb:
+		argt = getarg(str_ptyp | cst_ptyp);
+		if (argt == sp_cstx)
+			fputs(".symb \"\", ", codefile);
+		else {
+			fprintf(codefile, ".symb \"%s\", ", str);
+			argt = getarg(cst_ptyp);
+		}
+		a1 = argval;
+		argt = getarg(cst_ptyp);
+		a2 = argval;
+		argt = getarg(cst_ptyp|nof_ptyp|sof_ptyp|ilb_ptyp|pro_ptyp);
+		if (a1 == N_PSYM) {
+			/* Change offset from AB into offset from
+			   the frame pointer.
+			*/
+			argval += 8;
+		}
+		fprintf(codefile, "%s, 0x%x, %d\n", strarg(argt), a1, a2);
+		argt = getarg(end_ptyp);
+		break;
+	case ms_std:
+		writing_stabs = 1;  /* set by first "mes 13,...,100,0" */
+		argt = getarg(str_ptyp | cst_ptyp);
+		if (argt == sp_cstx)
+			str[0] = '\0';
+		else {
+			argt = getarg(cst_ptyp);
+		}
+		swtxt();
+		fprintf(codefile, ".symd \"%s\", 0x%x,", str, (int) argval);
+		argt = getarg(cst_ptyp);
+		fprintf(codefile, "%d\n", (int) argval);
+		argt = getarg(end_ptyp);
+		break;
 	default :
 		while ( getarg(any_ptyp) != sp_cend ) ;
 		break ;
@ -196,7 +246,7 @@ saveloadregs(const char* ops, const char* opm, const char *opf)
 	for (reg = 31; reg >= 0; reg--) {
 		if (savedf[reg] != LONG_MIN) {
 			offset -= 8;
-			fprintf(codefile, "%s f%d, %ld(fp)\n",
+			fprintf(codefile, "%s f%d,%ld(fp)\n",
 				opf, reg, offset);
 		}
 	}
@ -213,7 +263,7 @@ saveloadregs(const char* ops, const char* opm, const char *opf)
 		while (reg > 0 && savedi[reg - 1] != LONG_MIN)
 			reg--;
 		offset -= (32 - reg) * 4;
-		fprintf(codefile, "%s r%d, %ld(fp)\n", opm, reg, offset);
+		fprintf(codefile, "%s r%d,%ld(fp)\n", opm, reg, offset);
 	} else
 		reg = 32;

@ -221,7 +271,7 @@ saveloadregs(const char* ops, const char* opm, const char *opf)
 	for (reg--; reg >= 0; reg--) {
 		if (savedi[reg] != LONG_MIN) {
 			offset -= 4;
-			fprintf(codefile, "%s r%d, %ld(fp)\n",
+			fprintf(codefile, "%s r%d,%ld(fp)\n",
 				ops, reg, offset);
 		}
 	}
@ -232,7 +282,8 @@ f_regsave(void)
 {
 	int reg;

-	emit_prolog();
+	if (framesize)
+		fprintf(codefile, "addi sp, sp, %ld\n", -framesize);
 	saveloadregs("stw", "stmw", "stfd");

 	/*
--- a/mach/powerpc/ncg/table
+++ b/mach/powerpc/ncg/table
--- a/mach/powerpc/top/table
+++ b/mach/powerpc/top/table
@ -1,12 +1,14 @@

-/* PowerPC desciptor table for ACK target optimizer */
+/* PowerPC table for ACK target optimizer */

-MAXOP 3;
+MAXOP 5;
 LABEL_STARTER '.';

 %%;

+L1, L2, L3, L4, L5  { not_using_sp(VAL) };
 RNZ                 { strcmp(VAL, "r0") };  /* not r0 */
+UP                  { positive(VAL) };
 X, Y, Z             { TRUE };

 %%;
@ -16,10 +18,74 @@ X, Y, Z             { TRUE };
 addi  RNZ, RNZ, 0            -> ;
 addis RNZ, RNZ, 0            -> ;

+addi RNZ, RNZ, X : addi RNZ, RNZ, Y { plus(X, Y, Z) }
+                             -> addi RNZ, RNZ, Z ;
+
+/* Lower "addi sp, sp, UP" by lifting other instructions, looking for
+ * chances to merge or delete _addi_ instructions, and assuming that
+ * the code generator uses "sp" not "r1".
+ */
+addi sp, sp, UP : ANY L1                 { lift(ANY) }
+                             -> ANY L1                 : addi sp, sp, UP ;
+addi sp, sp, UP : ANY L1, L2             { lift(ANY) }
+                             -> ANY L1, L2             : addi sp, sp, UP ;
+addi sp, sp, UP : ANY L1, L2, L3         { lift(ANY) }
+                             -> ANY L1, L2, L3         : addi sp, sp, UP ;
+addi sp, sp, UP : ANY L1, L2, L3, L4     { lift(ANY) }
+                             -> ANY L1, L2, L3, L4     : addi sp, sp, UP ;
+addi sp, sp, UP : ANY L1, L2, L3, L4, L5 { lift(ANY) }
+                             -> ANY L1, L2, L3, L4, L5 : addi sp, sp, UP ;
+addi sp, sp, UP : lmw Y, L1 { Y[0]=='r' && atoi(Y+1)>1 }
+                             -> lmw Y, L1 : addi sp, sp, UP ;
+
+/* Merge _addi_ when popping from the stack. */
+addi sp, sp, X : lwz L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' }
+                             -> lwz L1, Z(sp) : addi sp, sp, X ;
+addi sp, sp, X : lfs L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' }
+                             -> lfs L1, Z(sp) : addi sp, sp, X ;
+addi sp, sp, X : lfd L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' }
+                             -> lfd L1, Z(sp) : addi sp, sp, X ;
+
+/* Lower or delete _addi_ when pushing to the stack. */
+addi sp, sp, X : stwu  L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' }
+                             -> stw  L1, Z(sp) : addi sp, sp, Z ;
+addi sp, sp, X : stfsu L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' }
+                             -> stfs L1, Z(sp) : addi sp, sp, Z ;
+addi sp, sp, X : stfdu L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' }
+                             -> stfd L1, Z(sp) : addi sp, sp, Z ;
+addi sp, sp, 4 : stfdu L1, -8(sp) -> stfdu L1, -4(sp) ;
+
+/* Delete _addi_ when setting the stack pointer. */
+addi sp, sp, X : addi sp, L1, Y   -> addi sp, L1, Y ;
+addi sp, sp, X : lwz sp, L1       -> lwz sp, L1 ;
+
+or X, Y, Y                   -> mr X, Y ;
+or. X, Y, Y                  -> mr. X, Y ;
+
 mr X, X                      -> ;
 fmr X, X                     -> ;

-or X, Y, Z : or. X, X, X     -> or. X, Y, Z ;
+add X, Y, Z   : mr. X, X     -> add. X, Y, Z ;
+and X, Y, Z   : mr. X, X     -> and. X, Y, Z ;
+andc X, Y, Z  : mr. X, X     -> andc. X, Y, Z ;
+divw X, Y, Z  : mr. X, X     -> divw. X, Y, Z ;
+divwu X, Y, Z : mr. X, X     -> divwu. X, Y, Z ;
+extsb X, Y, Z : mr. X, X     -> extsb. X, Y, Z ;
+extsh X, Y, Z : mr. X, X     -> extsh. X, Y, Z ;
+eqv X, Y, Z   : mr. X, X     -> eqv. X, Y, Z ;
+mullw X, Y, Z : mr. X, X     -> mullw. X, Y, Z ;
+nand X, Y, Z  : mr. X, X     -> nand. X, Y, Z ;
+nor X, Y, Z   : mr. X, X     -> nor. X, Y, Z ;
+or X, Y, Z    : mr. X, X     -> or. X, Y, Z ;
+orc X, Y, Z   : mr. X, X     -> orc. X, Y, Z ;
+slw X, Y, Z   : mr. X, X     -> slw. X, Y, Z ;
+slwi X, Y, Z  : mr. X, X     -> slwi. X, Y, Z ;
+subf X, Y, Z  : mr. X, X     -> subf. X, Y, Z ;
+sraw X, Y, Z  : mr. X, X     -> sraw. X, Y, Z ;
+srawi X, Y, Z : mr. X, X     -> srawi. X, Y, Z ;
+srw X, Y, Z   : mr. X, X     -> srw. X, Y, Z ;
+srwi X, Y, Z  : mr. X, X     -> srwi. X, Y, Z ;
+xor X, Y, Z   : mr. X, X     -> xor. X, Y, Z ;

 b X : labdef X               -> labdef X ;

@ -27,3 +93,98 @@ b X : labdef X               -> labdef X ;
 /* LT=0, GT=1, EQ=2, OV=3 */

 %%;
+
+/* Is it a word character? 0-9A-Za-z_ */
+static int isword(char c) {
+	return
+	    (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') ||
+	    (c >= 'a' && c <= 'z') || (c == '_');
+}
+
+/* Does operand _s_ not use the stack pointer? */
+int not_using_sp(const char *s) {
+	int boundary;
+
+	boundary = 1;
+	while (*s) {
+		if (boundary &&
+		    ((s[0]=='s' && s[1]=='p') || (s[0]=='r' && s[1]=='1')) &&
+		    !isword(s[2]))
+			return 0;
+		boundary = !isword(*s);
+		s++;
+	}
+	return 1;
+}
+
+
+int positive(const char *s) {
+	long n;
+	char *end;
+
+	n = strtol(s, &end, 10);
+	return *s != '\0' && *end == '\0' && n > 0;
+}
+
+
+/* Instructions to lift(), sorted in strcmp() order.  These are from
+ * ../ncg/table, minus branch instructions.
+ */
+const char *liftables[] = {
+	"add", "add.", "addi",
+	"and", "andc", "andi.", "andis.",
+	"cmp", "cmpi", "cmpl", "cmpli",
+	"cmplw", "cmplwi", "cmpw", "cmpwi",
+	"divw", "divwu", "eqv", "extlwi", "extrwi", "extsb", "extsh",
+	"fadd", "fadds", "fcmpo", "fctiwz", "fdiv", "fdivs",
+	"fmr", "fmul", "fmuls", "fneg", "frsp", "fsub", "fsubs",
+	"lbz", "lbzx",
+	"lfd", "lfdu", "lfdx", "lfs", "lfsu", "lfsx",
+	"lha", "lhax", "lhz", "lhzx",
+	"li", "lis", "lwz", "lwzu", "lwzx",
+	"mfcr", "mfspr", "mr", "mr.", "mtspr", "mullw",
+	"nand", "neg", "nor", "or", "or.", "ori", "oris",
+	"rlwinm", "rlwnm", "rotlwi", "rotrwi",
+	"slw", "slwi", "sraw", "srawi", "srw", "srwi",
+	"stb", "stbx",
+	"stfd", "stfdu", "stfdx", "stfs", "stfsu", "stfsx",
+	"sth", "sthx", "stw", "stwx", "stwu",
+	"subf", "xor", "xori", "xoris",
+};
+
+static int liftcmp(const void *a, const void *b) {
+	return strcmp(*(const char **)a, *(const char **)b);
+}
+
+/* May we lift instruction _s_ above "addi SP, SP, X"? */
+int lift(const char *s) {
+	return bsearch(&s, liftables,
+	    sizeof(liftables) / sizeof(liftables[0]),
+	    sizeof(liftables[0]), liftcmp);
+}
+
+
+/* Does it fit a signed 16-bit integer? */
+static int fits16(long l) {
+	return l >= -32768 && l <= 32767;
+}
+
+/* Tries sum = a + b with signed 16-bit integers. */
+int plus(const char *a, const char *b, const char *sum)
+{
+	long la, lb, lsum;
+	char *end;
+
+	la = strtol(a, &end, 10);
+	if (*a == '\0' || *end != '\0' || !fits16(la))
+		return 0;
+	lb = strtol(b, &end, 10);
+	if (*b == '\0' || *end != '\0' || !fits16(lb))
+		return 0;
+
+	lsum = la + lb;
+	if (!fits16(lsum))
+		return 0;
+	snprintf(sum, 7, "%ld", lsum);
+	return 1;
+}
--- a/mach/proto/mcg/main.c
+++ b/mach/proto/mcg/main.c
@ -42,13 +42,14 @@ int main(int argc, char* const argv[])
    const char* inputfilename = NULL;
    const char* outputfilename = NULL;
    FILE* output;
+    int i;

    program_name = argv[0];

    opterr = 1;
    for (;;)
    {
-        int c = getopt(argc, argv, "-d:D:C:o:");
+        int c = getopt(argc, argv, "d:D:C:o:");
        if (c == -1)
            break;

@ -79,20 +80,22 @@ int main(int argc, char* const argv[])
                    fatal("already specified an output file");
                outputfilename = optarg;
                break;
-
-            case 1:
-                if (inputfilename)
-                    fatal("unexpected argument '%s'", optarg);
-                inputfilename = optarg;
        }
    }

+    for (i = optind; i < argc; i++)
+    {
+        if (inputfilename)
+            fatal("unexpected argument '%s'", argv[i]);
+        inputfilename = argv[i];
+    }
+
    symbol_init();

-	if (!EM_open((char*) inputfilename))
-		fatal("couldn't open input '%s': %s",
+    if (!EM_open((char*) inputfilename))
+        fatal("couldn't open input '%s': %s",
            inputfilename ? inputfilename : "<stdin>", EM_error);
-	
+
    if (outputfilename)
    {
        outputfile = fopen(outputfilename, "w");
--- a/mach/proto/mcg/treebuilder.c
+++ b/mach/proto/mcg/treebuilder.c
@ -274,7 +274,7 @@ static struct ir* store(int size, struct ir* address, int offset, struct ir* val
    else
        opcode = IR_STORE;

-    if (offset > 0)
+    if (offset != 0)
        address = new_ir2(
            IR_ADD, EM_pointersize,
            address, new_wordir(offset)
@ -304,7 +304,7 @@ static struct ir* load(int size, struct ir* address, int offset)
    else
        opcode = IR_LOAD;

-    if (offset > 0)
+    if (offset != 0)
        address = new_ir2(
            IR_ADD, EM_pointersize,
            address, new_wordir(offset)
@ -416,6 +416,31 @@ static void helper_function(const char* name)
    );
 }

+static void helper_function_with_arg(const char* name, struct ir* arg)
+{
+    /* Abuses IR_SETRET to set a register to pass one argument to a
+     * helper function.
+     *
+     * FIXME:  As of January 2018, mach/powerpc/libem takes an
+     * argument in register r3 only for ".los4", ".sts4", ".trp".
+     * This is an accident.  Should the argument be on the stack, or
+     * should other helpers use a register? */
+
+    materialise_stack();
+    appendir(
+        new_ir1(
+            IR_SETRET, arg->size,
+            arg
+        )
+    );
+    appendir(
+        new_ir1(
+            IR_CALL, 0,
+            new_labelir(name)
+        )
+    );
+}
+
 static void insn_simple(int opcode)
 {
    switch (opcode)
@ -437,6 +462,7 @@ static void insn_simple(int opcode)
        case op_cii: simple_convert(IR_FROMSI); break;
        case op_ciu: simple_convert(IR_FROMSI); break;
        case op_cui: simple_convert(IR_FROMUI); break;
+        case op_cuu: simple_convert(IR_FROMUI); break;
        case op_cfu: simple_convert(IR_FROMUF); break;
        case op_cfi: simple_convert(IR_FROMSF); break;
        case op_cif: simple_convert(IR_FROMSI); break;
@ -496,10 +522,12 @@ static void insn_simple(int opcode)

        case op_lim:
        {
+            /* Traps use only 16 bits of .ignmask, but we keep an
+             * entire word, even if a word has more than 2 bytes. */
            push(
-                new_ir1(
-                    (EM_wordsize == 2) ? IR_LOAD : IR_LOADH, EM_wordsize,
-                    new_labelir(".ignmask")
+                load(
+                    EM_wordsize,
+                    new_labelir(".ignmask"), 0
                )
            );
            break;
@ -507,26 +535,34 @@ static void insn_simple(int opcode)

        case op_sim:
        {
-            sequence_point();
            appendir(
-                new_ir2(
-                    (EM_wordsize == 2) ? IR_STORE : IR_STOREH, EM_wordsize,
-                    new_labelir(".ignmask"),
+                store(
+                    EM_wordsize,
+                    new_labelir(".ignmask"), 0,
                    pop(EM_wordsize)
                )
            );
            break;
        }

-        case op_trp: helper_function(".trp"); break;
+        case op_trp:
+            helper_function_with_arg(".trp", pop(EM_wordsize));
+            break;

        case op_sig:
        {
+            struct ir* label = new_labelir(".trppc");
            struct ir* value = pop(EM_pointersize);
+            push(
+                load(
+                    EM_pointersize,
+                    label, 0
+                )
+            );
            appendir(
                store(
                    EM_pointersize,
-                    new_labelir(".trppc"), 0,
+                    label, 0,
                    value
                )
            );
@ -539,12 +575,13 @@ static void insn_simple(int opcode)
            break;
        }

-        /* FIXME: These instructions are really complex and barely used
-         * (Modula-2 and Pascal set support, I believe). Leave them until
-         * later. */
-        case op_set: helper_function(".unimplemented_set"); break;
-        case op_ior: helper_function(".unimplemented_ior"); break;
-
+        case op_and: helper_function(".and"); break;
+        case op_ior: helper_function(".ior"); break;
+        case op_xor: helper_function(".xor"); break;
+        case op_com: helper_function(".com"); break;
+        case op_cms: helper_function(".cms"); break;
+        case op_set: helper_function(".set"); break;
+        case op_inn: helper_function(".inn"); break;

        case op_dch:
            push(
@ -670,6 +707,31 @@ static void simple_alu2(int opcode, int size, int irop, const char* fallback)
    }
 }

+static void rotate(int opcode, int size, int irop, int irop_reverse)
+{
+    if (size > (2*EM_wordsize))
+        fatal("treebuilder: can't do opcode %s with size %d", em_mnem[opcode - sp_fmnem], size);
+    else
+    {
+        struct ir* right = pop(size);
+        struct ir* left = pop(size);
+        struct ir* bits = new_wordir(8 * size);
+
+        /* a rol b -> (a << b) | (a >> (32 - b)) */
+        push(
+            new_ir2(
+                IR_OR, size,
+                new_ir2(irop, size, left, right),
+                new_ir2(
+                    irop_reverse, size,
+                    left,
+                    new_ir2(IR_SUB, size, bits, right)
+                )
+            )
+        );
+    }
+}
+
 static struct ir* extract_block_refs(struct basicblock* bb)
 {
    struct ir* outir = NULL;
@ -720,26 +782,28 @@ static struct ir* ptradd(struct ir* address, int offset)
        );
 }

-static void blockmove(struct ir* dest, struct ir* src, struct ir* size)
+static struct ir* walk_static_chain(int level)
 {
-    /* memmove stack: ( size src dest -- ) */
-    push(size);
-    push(src);
-    push(dest);
+    struct ir* ir;

-    materialise_stack();
-    appendir(
-        new_ir1(
-            IR_CALL, 0,
-            new_labelir("memmove")
-        )
-    );
-    appendir(
-        new_ir1(
-            IR_STACKADJUST, EM_pointersize,
-            new_wordir(EM_pointersize*2 + EM_wordsize)
-        )
+    /* The static chain, when it exists, is the first argument of each
+     * procedure.  The chain begins with the current frame at level 0,
+     * and continues until we reach the outermost procedure. */
+    ir = new_ir0(
+        IR_GETFP, EM_pointersize
    );
+    while (level--)
+    {
+        /* Walk to the next frame pointer. */
+        ir = load(
+            EM_pointersize,
+            new_ir1(
+                IR_FPTOAB, EM_pointersize,
+                ir
+            ), 0
+        );
+    }
+    return ir;
 }

 static void insn_ivalue(int opcode, arith value)
@ -765,8 +829,10 @@ static void insn_ivalue(int opcode, arith value)

        case op_and: simple_alu2(opcode, value, IR_AND, ".and"); break;
        case op_ior: simple_alu2(opcode, value, IR_OR, ".ior"); break;
-        case op_xor: simple_alu2(opcode, value, IR_EOR, NULL); break;
+        case op_xor: simple_alu2(opcode, value, IR_EOR, ".xor"); break;
        case op_com: simple_alu1(opcode, value, IR_NOT, ".com"); break;
+        case op_rol: rotate(opcode, value, IR_LSL, IR_LSR); break;
+        case op_ror: rotate(opcode, value, IR_LSR, IR_LSL); break;

        case op_adf: simple_alu2(opcode, value, IR_ADDF, NULL); break;
        case op_sbf: simple_alu2(opcode, value, IR_SUBF, NULL); break;
@ -774,12 +840,23 @@ static void insn_ivalue(int opcode, arith value)
        case op_dvf: simple_alu2(opcode, value, IR_DIVF, NULL); break;
        case op_ngf: simple_alu1(opcode, value, IR_NEGF, NULL); break;

-        case op_cmu: /* fall through */
-        case op_cms: push(tristate_compare(value, IR_COMPAREUI)); break;
+        case op_cms:
+            if (value > (2*EM_wordsize))
+            {
+                push(new_wordir(value));
+                helper_function(".cms");
+                break;
+            }
+            /* fall through */
+        case op_cmu: push(tristate_compare(value, IR_COMPAREUI)); break;
        case op_cmi: push(tristate_compare(value, IR_COMPARESI)); break;
        case op_cmf: push(tristate_compare(value, IR_COMPAREF)); break;

-        case op_rck: helper_function(".rck"); break;
+        case op_rck:
+            if (value != EM_wordsize)
+                fatal("'rck %d' not supported", value);
+            helper_function(".rck");
+            break;
        case op_set: push(new_wordir(value)); helper_function(".set"); break;
        case op_inn: push(new_wordir(value)); helper_function(".inn"); break;

@ -930,26 +1007,24 @@ static void insn_ivalue(int opcode, arith value)

            if (value > (EM_wordsize*2))
            {
-                /* We're going to need to do multiple stores; fix the address
+                /* We're going to need to do multiple loads; fix the address
                 * so it'll go into a register and we can do maths on it. */
                appendir(ptr);
            }

+            /* Stack grows down.  Load backwards. */
            while (value > 0)
            {
                int s = EM_wordsize*2;
                if (value < s)
                    s = value;
-
+                value -= s;
                push(
                    load(
                        s,
-                        ptr, offset
+                        ptr, value
                    )
                );
-
-                value -= s;
-                offset += s;
            }

            assert(value == 0);
@ -1099,7 +1174,12 @@ static void insn_ivalue(int opcode, arith value)
        case op_dup:
        {
            sequence_point();
-            if ((value == (EM_wordsize*2)) && (peek(0) == EM_wordsize) && (peek(1) == EM_wordsize))
+            if (value > (2*EM_wordsize))
+            {
+                push(new_wordir(value));
+                helper_function(".dus4");
+            }
+            else if ((value == (EM_wordsize*2)) && (peek(0) == EM_wordsize) && (peek(1) == EM_wordsize))
            {
                struct ir* v1 = pop(EM_wordsize);
                struct ir* v2 = pop(EM_wordsize);
@ -1117,12 +1197,30 @@ static void insn_ivalue(int opcode, arith value)
            break;
        }

+        case op_dus:
+        {
+            if (value != EM_wordsize)
+                fatal("'dus %d' not supported", value);
+            helper_function(".dus4");
+            break;
+        }
+
        case op_exg:
        {
-            struct ir* v1 = pop(value);
-            struct ir* v2 = pop(value);
-            push(v1);
-            push(v2);
+            if (value > (2*EM_wordsize))
+            {
+                push(
+                    new_wordir(value)
+                );
+                helper_function(".exg");
+            }
+            else
+            {
+                struct ir* v1 = pop(value);
+                struct ir* v2 = pop(value);
+                push(v1);
+                push(v2);
+            }
            break;
        }

@ -1285,53 +1383,19 @@ static void insn_ivalue(int opcode, arith value)
        }

        case op_lxl:
-        {
-            struct ir* ir;
-
-            /* Walk the static chain. */
-
-            ir = new_ir0(
-                IR_GETFP, EM_pointersize
+            push(
+                walk_static_chain(value)
            );
-
-            while (value--)
-            {
-                ir = new_ir1(
-                    IR_CHAINFP, EM_pointersize,
-                    ir
-                );
-            }
-
-            push(ir);
            break;
-        }

        case op_lxa:
-        {
-            struct ir* ir;
-
-            /* Walk the static chain. */
-
-            ir = new_ir0(
-                IR_GETFP, EM_pointersize
-            );
-
-            while (value--)
-            {
-                ir = new_ir1(
-                    IR_CHAINFP, EM_pointersize,
-                    ir
-                );
-            }
-
            push(
                new_ir1(
                    IR_FPTOAB, EM_pointersize,
-                    ir
+                    walk_static_chain(value)
                )
            );
            break;
-        }

        case op_fef:
        {
@ -1394,6 +1458,7 @@ static void insn_ivalue(int opcode, arith value)
                    break;

                case 1:
+                    materialise_stack();
                    push(
                        appendir(
                            new_ir0(
@ -1403,10 +1468,6 @@ static void insn_ivalue(int opcode, arith value)
                    );
                    break;

-                case 2:
-                    helper_function(".unimplemented_lor_2");
-                    break;
-
                default:
                    fatal("'lor %d' not supported", value);
            }
@ -1436,10 +1497,6 @@ static void insn_ivalue(int opcode, arith value)
                    );
                    break;

-                case 2:
-                    helper_function(".unimplemented_str_2");
-                    break;
-
                default:
                    fatal("'str %d' not supported", value);
            }
@ -1448,100 +1505,27 @@ static void insn_ivalue(int opcode, arith value)
        }

        case op_blm:
-        {
-            /* Input stack: ( src dest -- ) */
-            struct ir* dest = pop(EM_pointersize);
-            struct ir* src = pop(EM_pointersize);
-            blockmove(dest, src, new_wordir(value));
+            push(new_wordir(value));
+            helper_function(".bls4");
            break;
-        }

        case op_bls:
-        {
-            /* Input stack: ( src dest size -- ) */
-            struct ir* dest = pop(EM_pointersize);
-            struct ir* src = pop(EM_pointersize);
-            struct ir* size = pop(EM_wordsize);
-            blockmove(dest, src, size);
+            if (value != EM_wordsize)
+                fatal("'bls %d' not supported", value);
+            helper_function(".bls4");
            break;
-        }

        case op_los:
-        {
-            /* Copy an arbitrary amount to the stack. */
-            struct ir* bytes = pop(EM_wordsize);
-            struct ir* address = pop(EM_pointersize);
-
-            materialise_stack();
-            appendir(
-                new_ir1(
-                    IR_STACKADJUST, EM_pointersize,
-                    new_ir1(
-                        IR_NEG, EM_wordsize,
-                        bytes
-                    )
-                )
-            );
-
-            push(
-                new_ir0(
-                    IR_GETSP, EM_pointersize
-                )
-            );
-            push(address);
-            push(bytes);
-            materialise_stack();
-            appendir(
-                new_ir1(
-                    IR_CALL, 0,
-                    new_labelir("memcpy")
-                )
-            );
-            appendir(
-                new_ir1(
-                    IR_STACKADJUST, EM_pointersize,
-                    new_wordir(EM_pointersize*2 + EM_wordsize)
-                )
-            );
+            if (value != EM_wordsize)
+                fatal("'los %d' not supported", value);
+            helper_function_with_arg(".los4", pop(EM_wordsize));
            break;
-        }

        case op_sts:
-        {
-            /* Copy an arbitrary amount from the stack. */
-            struct ir* bytes = pop(EM_wordsize);
-            struct ir* dest = pop(EM_pointersize);
-            struct ir* src;
-
-            materialise_stack();
-            src = appendir(
-                    new_ir0(
-                        IR_GETSP, EM_pointersize
-                    )
-                );
-
-            push(dest);
-            push(src);
-            push(bytes);
-            materialise_stack();
-            appendir(
-                new_ir1(
-                    IR_CALL, 0,
-                    new_labelir("memcpy")
-                )
-            );
-            appendir(
-                new_ir1(
-                    IR_STACKADJUST, EM_pointersize,
-                    new_ir2(
-                        IR_ADD, EM_wordsize,
-                        new_wordir(EM_pointersize*2 + EM_wordsize),
-                        bytes
-                    )
-                )
-            );
+            if (value != EM_wordsize)
+                fatal("'sts %d' not supported", value);
+            helper_function_with_arg(".sts4", pop(EM_wordsize));
            break;
-        }

        case op_lin:
        {
@ -1677,17 +1661,17 @@ static void insn_lvalue(int opcode, const char* label, arith offset)

        case op_gto:
        {
-            struct ir* descriptor = pop(EM_pointersize);
+            struct ir* descriptor = address_of_external(label, offset);

            appendir(
                new_ir1(
-                    IR_SETSP, EM_pointersize,
+                    IR_SETFP, EM_pointersize,
                    load(EM_pointersize, descriptor, EM_pointersize*2)
                )
            );
            appendir(
                new_ir1(
-                    IR_SETFP, EM_pointersize,
+                    IR_SETSP, EM_pointersize,
                    load(EM_pointersize, descriptor, EM_pointersize*1)
                )
            );
--- a/mach/proto/ncg/subr.c
+++ b/mach/proto/ncg/subr.c
@ -518,7 +518,7 @@ int split(token_p tp, int *ip, int ply, int toplevel) {
 	int tpl;

 	for (cp=c2coercs;cp->c2_texpno>=0; cp++) {
-		if (!match(tp,&machsets[cp->c2_texpno],0))
+		if (!match(tp,&machsets[cp->c2_texpno],cp->c2_expr))
 			continue;
 		ok=1;
 		for (i=0; ok && i<cp->c2_nsplit;i++) {
--- a/man/powerpc_as.6
+++ b/man/powerpc_as.6
@ -1,33 +1,136 @@
-.TH POWERPC_AS 1
+.TH POWERPC_AS 1 2018-03-07
 .ad
 .SH NAME
 powerpc_as \- assembler for PowerPC
-
 .SH SYNOPSIS
 as [options] argument ...
-
 .SH DESCRIPTION
 This assembler is made with the general framework
 described in \fIuni_ass\fP(6).
-
+.PP
+It can assemble the instructions from Book I and Book II of PowerPC
+version 2.01.
+This includes the branch, integer, and floating point instructions
+from Book I; and the cache, synchronization, and time base
+instructions from Book II.
+.PP
+There is no support for other instructions, such as supervisor-mode
+instructions or vector instructions.
+There is some support for 64-bit integer instructions, but the
+assembler only has 32-bit symbols.
 .SH SYNTAX
-Most 32-bit integer and floating point instructions are supported, but not many
-short form instructions. Instructions which take 16-bit operands can additionally
-use the following special functions:
-
-.IP hi16[value], ha16[value]
-Returns the high half of the value of the expression; if the value is not absolute,
-also generates the appropriate fixup. Use of either of these \fImust\fR be followed,
-in the next instruction, by the corresponding use of \fBlo16[]\fR. Use \fBhi16[]\fR
-if the low half is going to interpret its payload as an unsigned value, and
-\fBha16[]\fR if it will be interpreted as a signed value (so that the high half can
-be adjusted to match).
-
-.IP lo16[]
-Returns the low half of the value of the expression. No fixup is generated. Use of
-\fBlo16[]\fR must come in the instruction immediately after a use of \fBhi16[]\fR or
-\fBha16[]\fR.
-
+.SS general purpose registers
+There are 32 GPRs from \fBr0\fP to \fBr31\fP.
+In this assembler, \fBsp\fP is an alias for \fBr1\fP, and \fBfp\fP is
+an alias for \fBr2\fP, because \fIack\fP uses r1 as the stack pointer
+and r2 as the frame pointer.
+Other compilers don't use r2 as the frame pointer.
+.PP
+GPR syntax requires a register name, not a number.
+For example, \(oqaddi\ r5,\ r4,\ 1\(cq works, but
+\(oqaddi\ 5,\ 4,\ 1\(cq is a syntax error.
+.PP
+Certain instructions ignore the contents of \fBr0\fP and use zero.
+This happens when using r0 as the second operand of \fIaddi\fP or
+\fIaddis\fP, or when addressing \(oqexpr(r0)\(cq or
+\(oqr0,\ gpr\(cq.
+The syntax is still the name r0, not the number 0.
+.SS floating point registers
+There are 32 FPRs from \fBf0\fP to \fBf31\fP.
+Each FPR has 64 bits and can hold a single-precision or
+double-precision number.
+FPR syntax requires a register name, not a number.
+.SS special purpose registers
+The three named SPRs are \fBctr\fP (count register), \fBlr\fP (link
+register), and \fBxer\fP (exception register).
+\(oqmfspr\(cq and \(oqmtspr\(cq allow these names or a number.
+.SS condition register
+There is a 32-bit condition register, where bit 0 is most significant,
+and bit 31 is least significant.
+This gets split into 8 registers of 4 bits each, from \fBcr0\fP (with
+bits 0 to 3) to \fBcr7\fP (with bits 28 to 31).
+Some instructions use the names cr0 to cr7, others use a bit numbered
+0 to 31, and others use all 32 bits.
+.SS addressing modes
+\(oqexpr(gpr)\(cq addresses \fIexpr\fP + the contents of \fIgpr\fP,
+except that \(oqexpr(r0)\(cq addresses \fIexpr\fP\ +\ 0.
+A few instructions, like \(oqstwu\(cq, also update \fIgpr\fP by
+setting it to the address.
+.PP
+\(oqgprA,\ gprB\(cq in certain instructions addresses the contents of
+\fIgprA\fP + the contents of \fIgprB\fP, except that \(oqr0,\ gprB\(cq
+addresses 0\ +\ the contents of \fIgprB\fP.
+.SS 16-bit operands
+Some instructions have a 16-bit operand.
+This can be a bare \fIexpr\fP (which must fit signed or unsigned
+16 bits), or it can be one of these special functions:
+.IP "hi16[expr], ha16[expr]"
+Returns the high half of the 32-bit value of the expression.
+If the low half is negative (from 0x8000 to 0xffff),
+then \fBha16[]\fP adjusts the high half by adding 1.
+Use \fBhi16[]\fP if the instruction with \fBlo16[]\fP is going to
+interpret its operand as an unsigned value, or \fBha16[]\fP if it will
+interpret it as signed.
+.IP
+If \fIexpr\fP is not absolute, then the assembler must generate a
+fixup for the linker.
+The fixup only works if the instruction is
+\(oqaddis gpr, r0, hx16[expr]\(cq or \(oqlis gpr, hx16[expr]\(cq.
+.IP lo16[expr]
+Returns the low half of the 32-bit value of the expression.
+.SS short forms
+Some instructions have short forms using extended mnemonics (or
+simplified mnemonics) like \fIli\fP, \fIsrwi\fP, and many others.
+.IP "li r6, 789"
+is short for: addi r6, r0, 789
+.IP "srwi r3, r4, 2"
+is short for: rlwinm r3, r4, 30, 2, 31
+.PP
+This assembler doesn't support extended mnemonics with branch
+prediction, such as \fIblt+\fP or \fIbne-\fP.
+It always parses \(oq+\(cq and \(oq-\(cq as operators,
+never as part of a mnemonic.
+.SH EXAMPLES
+There are two ways to load r3 with _symbol\ =\ 0x1234abcd.
+One way is
+.PP
+.nf
+   lis  r3, hi16[_symbol]
+   ori  r3, r3, lo16[_symbol]  ! r3 = 0x12340000 | 0x0000abcd
+.fi
+.PP
+The other way is
+.PP
+.nf
+   lis  r3, ha16[_symbol]
+   addi r3, r3, lo16[_symbol]  ! r3 = 0x12350000 + 0xffffabcd
+.fi
+.PP
+The next code adds 1 to a global variable.
+.PP
+.nf
+   lis  r3, ha16[_var]
+   lwz  r4, lo16[_var](r3)
+   addi r4, r4, 1
+   stw  r4, lo16[_var](r3)
+.fi
 .SH "SEE ALSO"
 uni_ass(6),
 ack(1)
+.PP
+Freescale Semiconductor, \fIProgramming Environments Manual for 32-Bit
+Implementations of the PowerPC Architecture\fP, Rev. 3, September 2005.
+.PP
+IBM, \fIPowerPC User Instruction Set Architecture, Book I\fP, Version
+2.01, September 2003.
+.PP
+IBM, \fIPowerPC Virtual Environment Architecture, Book II\fP, Version
+2.01, December 2003.
+.SH CAVEATS
+Beware that not every processor can run every instruction.
+The 32-bit processors can't run 64-bit instructions like \fIlwa\fP,
+\fIstd\fP, and \fIfctid\fP.
+The PowerPC 601 can't run \fIstfiwx\fP, nor \fIfres\fP, \fIfrsqrte\fP,
+\fIfsel\fP.
+Many models, like the PowerPC G4, can't run \fIfsqrt\fP nor
+\fIfsqrts\fP.
--- a/modules/src/em_code/insert.c
+++ b/modules/src/em_code/insert.c
@ -99,20 +99,19 @@ C_out_parts(pp)
 		}
 		else {
 			/* copy the chunk to output */
-#ifdef INCORE
-			register char *s = C_BASE + pp->pp_begin;
-			char *se = C_BASE + pp->pp_end;
-
-			while (s < se) {
-				put(*s++);
-			}
-#else
 			register long b = pp->pp_begin;

 			while (b < pp->pp_end) {
+#ifdef INCORE
+				/* C_BASE is not constant, put() may
+				   move C_BASE, so each iteration of
+				   this loop must read C_BASE again.
+				*/
+				put(C_BASE[b++]);
+#else
 				put(getbyte(b++));
-			}
 #endif
+			}
 		}
 		prev = pp;
 		pp = pp->pp_next;
--- a/modules/src/object/wr_ranlib.c
+++ b/modules/src/object/wr_ranlib.c
@ -10,16 +10,27 @@ wr_ranlib(fd, ran, cnt1)
 	struct ranlib	*ran;
 	long	cnt1;
 {
-	{
-		register long cnt = cnt1;
-		register struct ranlib *r = ran;
-		register char *c = (char *) r;
+	struct ranlib *r;
+	long cnt, val;
+	char *c;

-		while (cnt--) {
-			put4(r->ran_off,c); c += 4;
-			put4(r->ran_pos,c); c += 4;
-			r++;
-		}
+	/*
+	 * We overwrite the structs in r with the bytes in c, so we
+	 * don't need to allocate another buffer.
+	 *
+	 * put4(r->ran_off, c) can fail if r->ran_off and c overlap in
+	 * memory, if this is a big-endian machine.  It tries to swap
+	 * the bytes from big to little endian, but overwrites some
+	 * bytes before reading them.  To prevent this, we must copy
+	 * each value before we overwrite it.
+	 */
+	r = ran;
+	c = (char *)r;
+	cnt = cnt1;
+	while (cnt--) {
+		val = r->ran_off; put4(val, c); c += 4;
+		val = r->ran_pos; put4(val, c); c += 4;
+		r++;
 	}
 	wr_bytes(fd, (char *) ran, cnt1 * SZ_RAN);
 }
--- a/modules/src/print/doprnt.c
+++ b/modules/src/print/doprnt.c
@ -16,7 +16,7 @@
 	%d = int
 $ */
 void
-doprnt(File *fp, char *fmt, va_list argp)
+doprnt(File *fp, const char *fmt, va_list argp)
 {
 	char buf[SSIZE];

--- a/modules/src/print/format.c
+++ b/modules/src/print/format.c
@ -35,7 +35,7 @@ integral(int c)
 	%d = int
 $ */
 int
-_format(char *buf, char *fmt, va_list argp)
+_format(char *buf, const char *fmt, va_list argp)
 {
 	register char *pf = fmt;
 	register char *pb = buf;
--- a/modules/src/print/fprint.c
+++ b/modules/src/print/fprint.c
@ -17,7 +17,7 @@
 $ */
 /*VARARGS*/
 void
-fprint(File *fp, char *fmt, ...)
+fprint(File *fp, const char *fmt, ...)
 {
 	va_list args;
 	char buf[SSIZE];
--- a/modules/src/print/print.c
+++ b/modules/src/print/print.c
@ -17,7 +17,7 @@
 $ */
 /*VARARGS*/
 void
-print(char *fmt, ...)
+print(const char *fmt, ...)
 {
 	va_list args;
 	char buf[SSIZE];
--- a/modules/src/print/print.h
+++ b/modules/src/print/print.h
@ -9,10 +9,10 @@

 #include <stdarg.h>

-void print(char *fmt, ...);
-void fprint(File *f, char *fmt, ...);
-void doprnt(File *f, char *fmt, va_list ap);
-int _format(char *buf, char *fmt, va_list ap);
-char *sprint(char *buf, char *fmt, ...);
+void print(const char *fmt, ...);
+void fprint(File *f, const char *fmt, ...);
+void doprnt(File *f, const char *fmt, va_list ap);
+int _format(char *buf, const char *fmt, va_list ap);
+char *sprint(char *buf, const char *fmt, ...);

 #endif /* __PRINT_INCLUDED__ */
--- a/modules/src/print/sprint.c
+++ b/modules/src/print/sprint.c
@ -17,7 +17,7 @@
 $ */
 /*VARARGS*/
 char *
-sprint(char *buf, char *fmt, ...)
+sprint(char *buf, const char *fmt, ...)
 {
 	va_list args;

--- a/plat/linux/libsys/errno.s
+++ b/plat/linux/libsys/errno.s
@ -1,28 +0,0 @@
-#
-! $Source: /cvsroot/tack/Ack/plat/linux386/libsys/errno.s,v $
-! $State: Exp $
-! $Revision: 1.1 $
-
-! Declare segments (the order is important).
-
-.sect .text
-.sect .rom
-.sect .data
-.sect .bss
-
-#define D(e) .define e; e
-
-.sect .data
-
-! Define various ACK error numbers. Note that these are *not* ANSI C
-! errnos, and are used for different purposes.
-
-D(ERANGE)         = 1
-D(ESET)           = 2
-D(EIDIVZ)         = 6
-D(EHEAP)          = 17
-D(EILLINS)        = 18
-D(EODDZ)          = 19
-D(ECASE)          = 20
-D(EBADMON)        = 25
-
--- a/plat/linux/libsys/syscalls.h
+++ b/plat/linux/libsys/syscalls.h
@ -174,6 +174,12 @@
 #define __NR_mremap 163
 #define __NR_setresuid 164
 #define __NR_getresuid 165
+
+/*
+ * i386, m68020, powerpc use different numbers after 165.
+ * This file only has the numbers for i386.
+ */
+#if defined(__i386)
 #define __NR_vm86 166
 #define __NR_query_module 167
 #define __NR_poll 168
@ -324,5 +330,6 @@

 #define concat(x, y) x##y
 #define MAPPED_SYSCALL(p, n) .define concat(p,n); concat(p,n): xor eax, eax; movb al, concat(__NR_,n); jmp __mapped_syscall
+#endif /* __i386 */

 #endif
--- a/plat/linux386/libsys/build.lua
+++ b/plat/linux386/libsys/build.lua
@ -6,6 +6,7 @@ acklibrary {
        "plat/linux/libsys/*.s",
    },
 	deps = {
+		"plat/linux/libsys/*.h",
 		"lang/cem/libcc.ansi/headers+headers",
 		"plat/linux386/include+headers",
 	},
--- a/plat/linux386/libsys/trapno.s
+++ b/plat/linux386/libsys/trapno.s
@ -0,0 +1,13 @@
+#define D(e) .define e; e
+
+! Define various EM trap numbers needed by mach/i386/libem.
+! Note that these are *not* ANSI C errnos.
+
+D(ERANGE)         = 1
+D(ESET)           = 2
+D(EIDIVZ)         = 6
+D(EHEAP)          = 17
+D(EILLINS)        = 18
+D(EODDZ)          = 19
+D(ECASE)          = 20
+D(EBADMON)        = 25
--- a/plat/linux68k/libsys/build.lua
+++ b/plat/linux68k/libsys/build.lua
@ -6,6 +6,7 @@ acklibrary {
        "plat/linux/libsys/*.s",
    },
 	deps = {
+		"plat/linux/libsys/*.h",
 		"lang/cem/libcc.ansi/headers+headers",
 		"plat/linux68k/include+headers",
 	},
--- a/plat/linuxppc/boot.s
+++ b/plat/linuxppc/boot.s
@ -32,7 +32,7 @@ begtext:

 	lwz r3, 0(sp)            ! r3 = argc
 	addi r4, sp, 4           ! r4 = argv
-	rlwinm r5, r3, 32-2, 2, 31 ! shift left 2 bits
+	srwi r5, r3, 2
 	add r5, r5, r4 
 	addi r5, r5, 8           ! r5 = env
 	
--- a/plat/linuxppc/descr
+++ b/plat/linuxppc/descr
@ -19,7 +19,7 @@ var PLATFORM=linuxppc
 var PLATFORMDIR={EM}/share/ack/{PLATFORM}
 var CPP_F=-D__unix
 var ALIGN=-a0:4 -a1:4 -a2:4 -a3:4 -b0:0x10000054
-var MACHOPT_F=-m3
+var MACHOPT_F=-m2
 var EGO_PLAT_FLAGS=-M{EM}/share/ack/ego/{ARCH}.descr

 # Override the setting in fe so that files compiled for linuxppc can see
--- a/plat/linuxppc/libsys/_syscall.s
+++ b/plat/linuxppc/libsys/_syscall.s
@ -12,17 +12,8 @@

 .sect .text

-EINVAL = 22
+#define EINVAL 22

-#define IFFALSE 4
-#define IFTRUE 12
-#define ALWAYS 20
-
-#define LT 0
-#define GT 1
-#define EQ 2
-#define OV 3
-	
 ! Perform a Linux system call.

 .define __syscall
@ -32,21 +23,21 @@ __syscall:
 	lwz r4, 8(sp)
 	lwz r5, 12(sp)
 	sc 0
-	bclr IFFALSE, OV, 0
-	
+	bnslr
+
 	! On error, r3 contains the errno.	
 	! It just so happens that errnos 1-34 are the same in Linux as in ACK.
-	cmpi cr0, 0, r3, 1
-	bc IFTRUE, LT, 2f
-	cmpi cr0, 0, r3, 34
-	bc IFTRUE, GT, 2f
-	
+	cmpwi r3, 1
+	blt 2f
+	cmpwi r3, 34
+	bgt 2f
+
 3:
-	li32 r4, _errno
-	stw r3, 0(r4)
-	addi r3, r0, -1
-	bclr ALWAYS, 0, 0
-	
+	lis r4, ha16[_errno]
+	stw r3, lo16[_errno](r4)
+	li r3, -1
+	blr
+
 2:
-	addi r3, r0, EINVAL
+	li r3, EINVAL
 	b 3b
--- a/plat/linuxppc/libsys/build.lua
+++ b/plat/linuxppc/libsys/build.lua
@ -4,12 +4,10 @@ acklibrary {
 		"./_syscall.s",
 		"./sigaction.s",
 		"./signal.c",
-		"./trap.s",
 		"plat/linux/libsys/_exit.c",
 		"plat/linux/libsys/_hol0.s",
 		"plat/linux/libsys/close.c",
 		"plat/linux/libsys/creat.c",
-		"plat/linux/libsys/errno.s",
 		"plat/linux/libsys/execve.c",
 		"plat/linux/libsys/getpid.c",
 		"plat/linux/libsys/gettimeofday.c",
@ -26,6 +24,7 @@ acklibrary {
 		"plat/linux/libsys/write.c",
 	},
 	deps = {
+		"plat/linux/libsys/*.h",
 		"lang/cem/libcc.ansi/headers+headers",
 		"plat/linuxppc/include+headers",
 	},
--- a/plat/linuxppc/libsys/sigaction.s
+++ b/plat/linuxppc/libsys/sigaction.s
@ -1,156 +1,194 @@
 #define __NR_sigaction		67
-#define SIG_BLOCK		0
+#define __NR_sigprocmask	126
 #define SIG_SETMASK		2
-#define MAXSIG			32

-/* offsets into our stack frame */
-#define mynew	16	/* new sigaction */
-#define mynset	32	/* new signal set */
-#define myoset	36	/* old signal set */
-#define mysave	40
-#define mysize	56
+/* offsets into struct sigaction */
+#define sa_handler	0	/* in union with sa_sigaction */
+#define sa_mask		4
+#define sa_flags	8
+#define sa_restorer	12
+
+/* offsets from stack pointer */
+#define mynewact	16	/* struct sigaction */
+#define myoldact	32
+#define newmask		64	/* signal set */
+#define oldmask		68
+#define oldhandler	72
+#define myret		76
+#define savelr		80
+#define signum		84	/* first argument */
+#define newact		88
+#define oldact		92

 .sect .text; .sect .rodata; .sect .data; .sect .bss

 /*
 * Linux calls signal handlers with arguments in registers, but the
 * ACK expects arguments on the stack.  This sigaction() uses a
- * "bridge" to move the arguments.
+ * "bridge" to move the arguments, but
+ *
+ *  - If the caller passes a bad pointer, this sigaction() causes
+ *    SIGBUS or SIGSEGV instead of setting errno = EFAULT.
+ *
+ *  - This sigaction() only works with signals 1 to 31, not with
+ *    real-time signals 32 to 64.
+ *
+ *  - This sigaction() is not safe for multiple threads.
+ *
+ * int sigaction(int signum, const struct sigaction *newact,
+ *		 struct sigaction *oldact);
 */
 .sect .text
 .define _sigaction
 _sigaction:
 	mflr	r0
-	subi	r1, r1, mysize
-	stw	r31, mysave+8(r1)
-	stw	r30, mysave+4(r1)
-	stw	r29, mysave(r1)
-	stw	r0, mysave+12(r1)
-	li	r3, 0
-	stw	r3, mynset(r1)	   	! mynset = 0
-	lwz	r29, mysize(r1)		! r29 = signal number
-	lwz	r30, mysize+4(r1)	! r30 = new action
-	lwz	r31, mysize+8(r1)	! r31 = old action
+	li	r3, __NR_sigprocmask
+	stwu	r3, -signum(sp)		/* keep 0(sp) = __NR_sigprocmask */
+	stw	r0, savelr(sp)
+
+	/* Copy newact to stack (before blocking SIGBUS, SIGSEGV). */
+	lwz	r3, newact(sp)
+	mr.	r3, r3
+	beq	1f			/* skip if newact == NULL */
+	lwz	r4, sa_handler(r3)
+	lwz	r5, sa_mask(r3)
+	lwz	r6, sa_flags(r3)
+	lwz	r7, sa_restorer(r3)
+	stw	r4, mynewact+sa_handler(sp)
+	stw	r5, mynewact+sa_mask(sp)
+	stw	r6, mynewact+sa_flags(sp)
+	stw	r7, mynewact+sa_restorer(sp)
+
 	/*
-	 * If the new action is non-NULL, the signal number is in
-	 * range 1 to MAXSIG, and the new handler is not SIG_DFL 0
-	 * or SIG_IGN 1, then we interpose our bridge.
+	 * Block all signals to prevent a race.  After we set sharray,
+	 * we must call the kernel's sigaction before the next signal
+	 * handler runs.  This prevents two problems:
+	 *
+	 *  - The bridge might call the new handler while the kernel
+	 *    uses the mask and flags of the old handler.
+	 *
+	 *  - The signal handler might call sigaction() and destroy
+	 *    sharray.  We must block all signals because any signal
+	 *    handler might call sigaction() for our signal.
 	 */
-	cmpwi	cr0, r30, 0
-	subi	r7, r29, 1		! r7 = index in handlers
-	cmplwi	cr7, r7, MAXSIG		! unsigned comparison
-	beq	cr0, kernel
-	bge	cr7, kernel
-	lwz	r3, 0(r30)		! r3 = new handler
-	clrrwi.	r3, r3, 1
-	beq	cr0, kernel
-	/*
-	 * Block the signal while we build the bridge.  Prevents a
-	 * race if a signal arrives after we change the bridge but
-	 * before we change the action in the kernel.
-	 */
-	li	r4, 1
-	slw	r4, r4, r7
-	stw	r4, mynset(r1)		! mynmask = 1 << (signal - 1)
-	li	r3, SIG_BLOCK
-	la	r4, mynset(r1)
-	la	r5, myoset(r1)
-	stw	r3, 0(r1)
-	stw	r4, 4(r1)
-	stw	r5, 8(r1)
-	bl	_sigprocmask
-	/*
-	 * Point our bridge to the new signal handler.  Then copy the
-	 * new sigaction but point it to our bridge.
-	 */
-	lis	r6, hi16[handlers]
-	ori	r6, r6, lo16[handlers]
-	subi	r7, r29, 1
-	slwi	r7, r7, 2
-	lwz	r3, 0(r30)		! r3 = new handler
-	stwx	r3, r6, r7		! put it in array of handlers
-	lis	r3, hi16[bridge]
-	ori	r3, r3, lo16[bridge]
-	lwz	r4, 4(r30)
-	lwz	r5, 8(r30)
-	lwz	r6, 12(r30)
-	stw	r3, mynew(r1)		! sa_handler or sa_sigaction
-	stw	r4, mynew+4(r1)		! sa_mask
-	stw	r5, mynew+8(r1)		! sa_flags
-	stw	r6, mynew+12(r1)	! sa_restorer
-	la	r30, mynew(r1)
-kernel:
-	li	r3, __NR_sigaction
-	stw	r3, 0(r1)
-	stw	r29, 4(r1)
-	stw	r30, 8(r1)
-	stw	r31, 12(r1)
+1:	li	r4, SIG_SETMASK
+	li	r5, -1			/* mask signals 1 to 32 */
+	stw	r5, newmask(sp)
+	la	r5, newmask(sp)
+	la	r6, oldmask(sp)
+	stw	r4, 4(sp)		/* kept 0(sp) = __NR_sigprocmask */
+	stw	r5, 8(sp)
+	stw	r6, 12(sp)
 	bl	__syscall
+
 	/*
-	 * If we blocked the signal, then restore the old signal mask.
+	 * If the signal number is in range 1 to 31, and the new
+	 * handler is not SIG_DFL 0 or SIG_IGN 1, then we interpose
+	 * our bridge.
 	 */
-	lwz	r3, mynset(r1)
-	cmpwi	cr0, r3, 0
-	beq	cr0, fixold
-	li	r3, SIG_SETMASK
-	la	r4, myoset(r1)
-	li	r5, 0
-	stw	r3, 0(r1)
-	stw	r4, 4(r1)
-	stw	r5, 8(r1)
-	bl	_sigprocmask
-	/*
-	 * If the old sigaction is non-NULL and points to our bridge,
-	 * then point it to the signal handler.
-	 */
-fixold:
-	cmpwi	cr0, r31, 0
-	beq	cr0, leave
-	lis	r3, hi16[bridge]
-	ori	r3, r3, lo16[bridge]
-	lwz	r4, 0(r31)
-	cmpw	cr0, r3, r4
-	bne	cr0, leave
-	lis	r6, hi16[handlers]
-	ori	r6, r6, lo16[handlers]
-	subi	r7, r29, 1
-	slwi	r7, r7, 2
-	lwzx	r3, r6, r7	! get it from array of handlers
-	stw	r3, 0(r31)	! put it in old sigaction
-leave:
-	lwz	r0, mysave+12(r1)
-	lwz	r29, mysave(r1)
-	lwz	r30, mysave+4(r1)
-	lwz	r31, mysave+8(r1)
-	addi	r1, r1, mysize
+	lwz	r4, signum(sp)		/* keep r4 = signum */
+	addi	r5, r4, -1
+	cmplwi	r5, 30
+	bgt	2f			/* skip if out of range */
+
+	slwi	r5, r5, 2		/* r5 = sharray index */
+	lis	r6, ha16[sharray]
+	la	r6, lo16[sharray](r6)	/* r6 = sharray */
+	lwzx	r0, r6, r5
+	stw	r0, oldhandler(sp)	/* remember old handler */
+	lwz	r0, newact(sp)
+	mr.	r0, r0
+	beq	2f			/* skip if newact == NULL */
+
+	lwz	r3, mynewact+sa_handler(sp)
+	cmplwi	r3, 2			/* r3 = new handler */
+	blt	2f			/* skip if SIG_DFL or SIG_IGN */
+
+	stwx	r3, r6, r5		/* put new handler in sharray */
+	lis	r3, ha16[sigbridge]
+	la	r3, lo16[sigbridge](r3)
+	stw	r3, mynewact+sa_handler(sp)
+
+	/* Call the kernel's sigaction. */
+	/* sigaction(signum, &mynewact or NULL, &myoldact or NULL) */
+2:	li	r3, __NR_sigaction
+	lwz	r0, newact(sp)
+	mr.	r0, r0
+	beq	3f
+	la	r5, mynewact(sp)
+	b	4f
+3:	li	r5, 0
+4:	lwz	r0, oldact(sp)
+	mr.	r0, r0
+	beq	5f
+	la	r6, myoldact(sp)
+	b	6f
+5:	li	r6, 0
+6:	stw	r3, 0(sp)
+	stw	r4, 4(sp)		/* kept r4 = signum */
+	stw	r5, 8(sp)
+	stw	r6, 12(sp)
+	bl	__syscall
+	stw	r3, myret(sp)
+
+	/* Unblock signals by restoring old signal mask. */
+	li	r3, __NR_sigprocmask
+	li	r4, SIG_SETMASK
+	la	r5, oldmask(sp)
+	li	r6, 0
+	stw	r3, 0(sp)
+	stw	r4, 4(sp)
+	stw	r5, 8(sp)
+	stw	r6, 12(sp)
+	bl	__syscall
+
+	/* Copy oldact from stack (after unblocking BUS, SEGV). */
+	lwz	r3, oldact(sp)
+	mr.	r3, r3
+	beq	8f			/* skip if oldact == NULL */
+	lwz	r4, myoldact+sa_handler(sp)
+	lis	r5, ha16[sigbridge]
+	la	r5, lo16[sigbridge](r5)
+	cmpw	r4, r5
+	bne	7f
+	lwz	r4, oldhandler(sp)
+7:	lwz	r5, myoldact+sa_mask(sp)
+	lwz	r6, myoldact+sa_flags(sp)
+	lwz	r7, myoldact+sa_restorer(sp)
+	stw	r4, sa_handler(r3)
+	stw	r5, sa_mask(r3)
+	stw	r6, sa_flags(r3)
+	stw	r7, sa_restorer(r3)
+
+8:	lwz	r0, savelr(sp)
+	lwz	r3, myret(sp)
+	addi	sp, sp, signum
 	mtlr	r0
-	blr			! return from sigaction
+	blr

 /*
- * Linux calls bridge(signum) or bridge(signum, info, context) with
- * arguments in registers r3, r4, r5.
+ * Linux calls sigbridge(signum) or sigbridge(signum, info, context)
+ * with arguments in registers r3, r4, r5.
 */
-bridge:
+sigbridge:
 	mflr	r0
-	subi	r1, r1, 16
+	stwu	r3, -16(sp)	/* signal number */
+	stw	r4, 4(sp)	/* info */
+	stw	r5, 8(sp)	/* context */
 	stw	r0, 12(r1)
-	stw	r3, 0(r1)	! signal number
-	stw	r4, 4(r1)	! info
-	stw	r5, 8(r1)	! context

-	lis	r6, hi16[handlers]
-	ori	r6, r6, lo16[handlers]
-	subi	r7, r3, 1
-	slwi	r7, r7, 2
+	lis	r6, hi16[sharray - 4]
+	la	r6, lo16[sharray - 4](r6)
+	slwi	r7, r3, 2
 	lwzx	r6, r6, r7
 	mtctr	r6
-	bctrl			! call our signal handler
+	bctrl			/* call our signal handler */

-	lwz	r0, 12(r1)
+	lwz	r0, 12(sp)
 	addi	r1, r1, 16
 	mtlr	r0
-	blr			! return from bridge
+	blr			/* sigreturn(2) */

 .sect .bss
-handlers:
-	.space 4 * MAXSIG	! array of signal handlers
+sharray:
+	.space 4 * 31		/* handlers for signals 1 to 31 */
--- a/plat/linuxppc/libsys/trap.s
+++ b/plat/linuxppc/libsys/trap.s
@ -1,112 +0,0 @@
-#
-! $Source: /cvsroot/tack/Ack/plat/linux386/libsys/_syscall.s,v $
-! $State: Exp $
-! $Revision: 1.1 $
-
-! Declare segments (the order is important).
-
-.sect .text
-.sect .rom
-.sect .data
-.sect .bss
-
-.sect .text
-
-#define IFFALSE 4
-#define IFTRUE 12
-#define ALWAYS 20
-
-#define LT 0
-#define GT 1
-#define EQ 2
-#define OV 3
-
-EARRAY	=  0
-ERANGE	=  1
-ESET	=  2
-EIOVFL	=  3
-EFOVFL	=  4
-EFUNFL	=  5
-EIDIVZ	=  6
-EFDIVZ	=  7
-EIUND	=  8
-EFUND	=  9
-ECONV	= 10
-ESTACK  = 16
-EHEAP	= 17
-EILLINS = 18
-EODDZ	= 19
-ECASE	= 20
-EMEMFLT	= 21
-EBADPTR = 22
-EBADPC  = 23
-EBADLAE = 24
-EBADMON = 25
-EBADLIN = 26
-EBADGTO = 27
-EUNIMPL = 63		! unimplemented em-instruction called
-
-! EM trap handling.
-
-.define .trap_ecase
-.trap_ecase:
-	addi r3, r0, ECASE
-	b .trap
-
-.define .trap_earray
-.trap_earray:
-	addi r3, r0, EARRAY
-	b .trap
-
-.define .trap_erange
-.trap_erange:
-	addi r3, r0, ERANGE
-	b .trap
-
-.define .trp
-.define .trap
-.trp:
-.trap:
-	cmpi cr0, 0, r3, 15      ! traps >15 can't be ignored
-	bc IFTRUE, LT, 1f
-
-	addi r4, r0, 1
-	rlwnm r4, r4, r3, 0, 31  ! calculate trap bit
-	li32 r5, .ignmask
-	lwz r5, 0(r5)            ! load ignore mask
-	and. r4, r4, r5          ! compare
-	bclr IFFALSE, EQ, 0      ! return if non-zero
-
-1:
-	li32 r4, .trppc
-	lwz r5, 0(r4)            ! load user trap routine
-	or. r5, r5, r5           ! test
-	bc IFTRUE, EQ, fatal     ! if no user trap routine, bail out
-
-	addi r0, r0, 0
-	stw r0, 0(r4)            ! reset trap routine
-
-	mfspr r0, lr
-	stwu r0, -4(sp)          ! save old lr
-
-	stwu r3, -4(sp)
-	mtspr ctr, r5
-	bcctrl ALWAYS, 0, 0      ! call trap routine
-
-	lwz r0, 4(sp)            ! load old lr again
-	addi sp, sp, 8           ! retract over stack usage
-	bclr ALWAYS, 0, 0        ! return
-
-fatal:
-	addi r3, r0, 1
-	li32 r4, message
-	addi r5, r0, 6
-	addi r0, r0, 4           ! write()
-	sc 0
-
-	addi r0, r0, 1           ! exit()
-	sc 0
-
-.sect .rom
-message:
-	.ascii "TRAP!\n"
--- a/plat/osx386/boot.s
+++ b/plat/osx386/boot.s
@ -58,8 +58,6 @@ begdata:

 .sect .bss
 begbss:
-.define hol0
-.comm hol0, 8                ! line number and filename (for debugging)

 .define _errno
 .comm _errno, 4              ! Posix errno storage
--- a/plat/osx386/libsys/build.lua
+++ b/plat/osx386/libsys/build.lua
@ -19,7 +19,8 @@ acklibrary {
 		"./sigaction.s",
 		"./stat.s",
 		"./write.s",
-		"plat/linux/libsys/errno.s",
+		"plat/linux/libsys/_hol0.s",
+		"plat/linux386/libsys/trapno.s",
 		"plat/osx/libsys/brk.c",
 		"plat/osx/libsys/creat.c",
 		"plat/osx/libsys/isatty.c",
--- a/plat/osxppc/boot.s
+++ b/plat/osxppc/boot.s
@ -29,7 +29,7 @@ begtext:

 	lwz r3, 0(sp)            ! r3 = argc
 	addi r4, sp, 4           ! r4 = argv
-	rlwinm r5, r3, 32-2, 2, 31 ! shift left 2 bits
+	srwi r5, r3, 2
 	add r5, r5, r4
 	addi r5, r5, 8           ! r5 = env

@ -49,8 +49,6 @@ begdata:

 .sect .bss
 begbss:
-.define hol0
-.comm hol0, 8                ! line number and filename (for debugging)

 .define _errno
 .comm _errno, 4              ! Posix errno storage
--- a/plat/osxppc/descr
+++ b/plat/osxppc/descr
@ -10,16 +10,17 @@ var l={w}
 var la={w}
 var f={w}
 var fa={w}
+# Size 8 has alignment 4 in Mac OS, 8 in Linux.
 var d=8
-var da={d}
+var da=4
 var x=8
-var xa={x}
+var xa=4
 var ARCH=powerpc
 var PLATFORM=osxppc
 var PLATFORMDIR={EM}/share/ack/{PLATFORM}
 var CPP_F=-D__unix
 var ALIGN=-a0:4 -a1:4 -a2:4096 -a3:4 -b0:0x129c
-var MACHOPT_F=-m3
+var MACHOPT_F=-m2
 var EGO_PLAT_FLAGS=-M{EM}/share/ack/ego/{ARCH}.descr

 # Override the setting in fe so that files compiled for osxppc can see
--- a/plat/osxppc/libsys/build.lua
+++ b/plat/osxppc/libsys/build.lua
@ -19,7 +19,7 @@ acklibrary {
 		"./sigaction.s",
 		"./stat.s",
 		"./write.s",
-		"plat/linuxppc/libsys/trap.s",
+		"plat/linux/libsys/_hol0.s",
 		"plat/osx/libsys/brk.c",
 		"plat/osx/libsys/creat.c",
 		"plat/osx/libsys/isatty.c",
--- a/plat/osxppc/libsys/set_errno.s
+++ b/plat/osxppc/libsys/set_errno.s
@ -1,7 +1,7 @@
 .sect .text
 .define .set_errno
 .set_errno:
-	li32 r10, _errno
-	stw r3, 0(r10)		! set errno
-	addi r3, r0, -1		! return -1
-	bclr 20, 0, 0
+	lis r4, ha16[_errno]
+	stw r3, lo16[_errno](r4)	! set errno
+	li r3, -1			! return -1
+	blr
--- a/plat/qemuppc/descr
+++ b/plat/qemuppc/descr
@ -19,11 +19,8 @@ var PLATFORM=qemuppc
 var PLATFORMDIR={EM}/share/ack/{PLATFORM}
 var CPP_F=-D__unix
 var ALIGN=-a0:4 -a1:4 -a2:4 -a3:4 -b0:0x01000000
-var C_LIB={PLATFORMDIR}/libc-ansi.a
-# bitfields reversed for compatibility with (g)cc.
-var CC_ALIGN=-Vr
-var OLD_C_LIB={C_LIB}
-var MACHOPT_F=
+var MACHOPT_F=-m2
+var EGO_PLAT_FLAGS=-M{EM}/share/ack/ego/{ARCH}.descr

 # Override the setting in fe so that files compiled for qemuppc can see
 # the platform-specific headers.
--- a/plat/qemuppc/libsys/trap.s
+++ b/plat/qemuppc/libsys/trap.s
@ -1,65 +0,0 @@
-#
-! $Source: /cvsroot/tack/Ack/plat/linux386/libsys/_syscall.s,v $
-! $State: Exp $
-! $Revision: 1.1 $
-
-! Declare segments (the order is important).
-
-.sect .text
-.sect .rom
-.sect .data
-.sect .bss
-
-.sect .text
-
-#define IFFALSE 4
-#define IFTRUE 12
-#define ALWAYS 20
-
-#define LT 0
-#define GT 1
-#define EQ 2
-#define OV 3
-
-EARRAY	=  0
-ERANGE	=  1
-ESET	=  2
-EIOVFL	=  3
-EFOVFL	=  4
-EFUNFL	=  5
-EIDIVZ	=  6
-EFDIVZ	=  7
-EIUND	=  8
-EFUND	=  9
-ECONV	= 10
-ESTACK  = 16
-EHEAP	= 17
-EILLINS = 18
-EODDZ	= 19
-ECASE	= 20
-EMEMFLT	= 21
-EBADPTR = 22
-EBADPC  = 23
-EBADLAE = 24
-EBADMON = 25
-EBADLIN = 26
-EBADGTO = 27
-EUNIMPL = 63		! unimplemented em-instruction called
-
-.define .trap_ecase
-.trap_ecase:
-	b .trp
-
-.define .trap_earray
-.trap_earray:
-	b .trp
-
-.define .trap_erange
-.trap_erange:
-	b .trap
-
-.define .trp
-.define .trap
-.trp:
-.trap:
-	b .trp					! spin forever
--- a/tests/plat/_dummy_e.c
+++ b/tests/plat/_dummy_e.c
@ -1,6 +1,6 @@
 #include "test.h"

-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
    ASSERT(0 == 0);
--- a/tests/plat/bss_e.c
+++ b/tests/plat/bss_e.c
@ -0,0 +1,27 @@
+#include "test.h"
+
+/*
+ * EM puts these variables in BSS.  Their initial values must be zero.
+ * Some platforms, like Linux, clear the BSS before they run the
+ * program.  For other platforms, like pc86, we clear the BSS in
+ * boot.s before we call _m_a_i_n.
+ */
+char c;
+int array[9000];
+short s;
+
+/* Bypasses the CRT, so there's no stdio. */
+void _m_a_i_n(void)
+{
+	int bad, i;
+
+	ASSERT(c == 0);
+	bad = 0;
+	for (i = 0; i < sizeof(array) / sizeof(array[0]); i++) {
+		if(array[i])
+			bad++;
+	}
+	ASSERT(bad == 0);
+	ASSERT(s == 0);
+	finished();
+}
--- a/tests/plat/bugs/bug-62-notvar_var_e.c
+++ b/tests/plat/bugs/bug-62-notvar_var_e.c
@ -40,7 +40,7 @@ void c(int i, int tru, int fal) {
  ASSERT((i != i) == fal);
 }

-/* Bypasses the CRT. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void) {
  a();
  b();
--- a/tests/plat/build.lua
+++ b/tests/plat/build.lua
@ -9,12 +9,14 @@ definerule("plat_testsuite",
 		-- Remember this is executed from the caller's directory; local
 		-- target names will resolve there.
 		local testfiles = filenamesof(
+			-- added structcopy_e.c
 			"tests/plat/*.c",
 			"tests/plat/*.e",
 			"tests/plat/*.p",
 			"tests/plat/b/*.b",
-			"tests/plat/bugs/bug-22-inn_mod.mod",
-			"tests/plat/bugs/bug-62-notvar_var_e.c"
+			"tests/plat/bugs/*.c",
+			"tests/plat/bugs/*.mod",
+			"tests/plat/m2/*.mod"
 		)

 		acklibrary {
--- a/tests/plat/csa_e.c
+++ b/tests/plat/csa_e.c
@ -11,7 +11,7 @@ int csa(int i)
    }
 }

-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
    ASSERT(csa(0) == 0);
@ -23,4 +23,4 @@ void _m_a_i_n(void)
    ASSERT(csa(6) == 0);

    finished();
-}
+}
--- a/tests/plat/csb_e.c
+++ b/tests/plat/csb_e.c
@ -11,7 +11,7 @@ int csa(int i)
    }
 }

-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
    ASSERT(csa(0) == 0);
@ -23,4 +23,4 @@ void _m_a_i_n(void)
    ASSERT(csa(600) == 0);

    finished();
-}
+}
--- a/tests/plat/doublecmp_e.c
+++ b/tests/plat/doublecmp_e.c
@ -4,7 +4,7 @@
 double one = 1.0;
 double zero = 0.0;

-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
    ASSERT(zero == zero);
@ -17,4 +17,4 @@ void _m_a_i_n(void)
    ASSERT(one  >= one);

    finished();
-}
+}
--- a/tests/plat/dup_e.e
+++ b/tests/plat/dup_e.e
@ -0,0 +1,139 @@
+#
+    mes 2, EM_WSIZE, EM_PSIZE
+
+/*
+ * Tests _dup_ and _dus_ by loading 20 bytes from _src_, then making
+ * and checking some duplicates.  The compilers might never _dup_ or
+ * _dus_ with large sizes, so the compilers might work even if this
+ * test fails.  You can cheat this test if _cms_ always pushes zero.
+ */
+
+    exa src
+    exa size
+src
+    con 3593880729I4, 782166578I4, 4150666996I4, 2453272937I4, 3470523049I4
+size
+    con 20I2
+
+    exp $check
+    exp $_m_a_i_n
+    pro $_m_a_i_n, 0
+
+    /* Push 3 copies of src on stack. */
+    lae src
+    loi 20        /* 1st copy */
+    dup 20        /* 2nd copy */
+    lae size
+    loi 2
+    loc 2
+    loc EM_WSIZE
+    cuu
+    dus EM_WSIZE  /* 3rd copy */
+
+    cal $check
+    cal $finished
+    end /* $_m_a_i_n */
+
+    pro $check, 4 * EM_PSIZE + EM_WSIZE
+#define p1    (-1 * EM_PSIZE)
+#define p2    (-2 * EM_PSIZE)
+#define p3    (-3 * EM_PSIZE)
+#define p4    (-4 * EM_PSIZE)
+#define i     (p4 - EM_WSIZE)
+
+    /* Set pointers to all 4 copies. */
+    lae src
+    lal p4
+    sti EM_PSIZE  /* p4 = src */
+    lal 0
+    lal p3
+    sti EM_PSIZE  /* p3 = 3rd copy */
+    lal 20
+    lal p2
+    sti EM_PSIZE  /* p2 = 2nd copy */
+    lal 40
+    lal p1
+    sti EM_PSIZE  /* p1 = 1st copy */
+
+    /* Loop 20 times to verify each byte. */
+    loc 0
+    stl i
+4
+    lal p4
+    loi EM_PSIZE
+    loi 1         /* byte from src */
+    lal p3
+    loi EM_PSIZE
+    loi 1         /* byte from 3rd copy */
+    cms EM_WSIZE
+    zeq *3
+    loc (3 * 256)
+    lol i
+    adi EM_WSIZE  /* 0x300 + i */
+    loc EM_WSIZE
+    loc 4
+    cuu
+    cal $fail
+    asp 4
+3
+    lal p4
+    loi EM_PSIZE
+    loi 1         /* byte from src */
+    lal p2
+    loi EM_PSIZE
+    loi 1         /* byte from 2nd copy */
+    cms EM_WSIZE
+    zeq *2
+    loc (2 * 256)
+    lol i
+    adi EM_WSIZE  /* 0x200 + i */
+    loc EM_WSIZE
+    loc 4
+    cuu
+    cal $fail
+    asp 4
+2
+    lal p4
+    loi EM_PSIZE
+    loi 1         /* byte from src */
+    lal p1
+    loi EM_PSIZE
+    loi 1         /* byte from 1st copy */
+    cms EM_WSIZE
+    zeq *1
+    loc (1 * 256)
+    lol i
+    adi EM_WSIZE  /* 0x100 + i */
+    loc EM_WSIZE
+    loc 4
+    cuu
+    cal $fail
+    asp 4
+1
+    lal p4
+    loi EM_PSIZE
+    adp 1
+    lal p4
+    sti EM_PSIZE  /* increment p4 */
+    lal p3
+    loi EM_PSIZE
+    adp 1
+    lal p3
+    sti EM_PSIZE  /* increment p3 */
+    lal p2
+    loi EM_PSIZE
+    adp 1
+    lal p2
+    sti EM_PSIZE  /* increment p2 */
+    lal p1
+    loi EM_PSIZE
+    adp 1
+    lal p1
+    sti EM_PSIZE  /* increment p1 */
+    inl i
+    lol i
+    loc 20
+    blt *4        /* loop 20 times */
+
+    ret 0
+    end /* $check */
--- a/tests/plat/exg_e.e
+++ b/tests/plat/exg_e.e
@ -0,0 +1,83 @@
+#
+    mes 2, EM_WSIZE, EM_PSIZE
+
+/*
+ * Tests _exg_ by loading 40 bytes from _src_, then exchanging 20 and
+ * 20 bytes, and checking the result.  The compilers might never _exg_
+ * large sizes, so the compilers might work even if this test fails.
+ * You can cheat this test if _cms_ always pushes zero.
+ */
+
+    exa src
+src
+    con 1539465570I4, 1344465418I4, 1317578918I4, 1163467696I4, 2645261331I4
+    con 3981585269I4, 1433968975I4, 4256886989I4, 4114909542I4, 1817334375I4
+
+    exp $check
+    exp $_m_a_i_n
+    pro $_m_a_i_n, 0
+
+    lae src
+    loi 40
+    exg 20
+    cal $check
+    cal $finished
+    end /* $_m_a_i_n */
+
+    pro $check, 2 * EM_PSIZE + EM_WSIZE
+#define p1    (-1 * EM_PSIZE)
+#define p2    (-2 * EM_PSIZE)
+#define i     (p2 - EM_WSIZE)
+
+    lae src
+    lal p2
+    sti EM_PSIZE  /* p2 = src */
+    lal 0
+    adp 20
+    lal p1
+    sti EM_PSIZE  /* p1 = exchanged copy + 20 */
+
+    /* Loop 40 times to verify each byte. */
+    loc 0
+    stl i
+1
+    lal p2
+    loi EM_PSIZE
+    loi 1         /* byte from src */
+    lal p1
+    loi EM_PSIZE
+    loi 1         /* byte from exchanged copy */
+    cms EM_WSIZE
+    zeq *2
+    lol i
+    loc EM_WSIZE
+    loc 4
+    cuu
+    cal $fail
+    asp 4
+2
+    lal p2
+    loi EM_PSIZE
+    adp 1
+    lal p2
+    sti EM_PSIZE  /* increment p2 */
+    lal p1
+    loi EM_PSIZE  /* p1 */
+    inl i
+    /* When i reaches 20, p1 would reach end of exchanged copy. */
+    lol i
+    loc 20
+    beq *3
+    adp 1         /* p1 + 1 */
+    bra *4
+3
+    adp -39       /* p1 - 39, beginning of exchanged copy */
+4
+    lal p1
+    sti EM_PSIZE  /* move p1 */
+    lol i
+    loc 40
+    blt *1
+
+    ret 0
+    end /* $check */
--- a/tests/plat/from_d_to_si_e.c
+++ b/tests/plat/from_d_to_si_e.c
@ -8,7 +8,7 @@ double minusone = -1.0;
 double big = (double)INT_MAX;
 double minusbig = (double)INT_MIN;

-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
    ASSERT((int)zero == 0);
@ -18,4 +18,4 @@ void _m_a_i_n(void)
    ASSERT((int)minusbig == INT_MIN);

    finished();
-}
+}
--- a/tests/plat/from_d_to_ui_e.c
+++ b/tests/plat/from_d_to_ui_e.c
@ -6,7 +6,7 @@ double one = 1.0;
 double zero = 0.0;
 double big = (double)UINT_MAX;

-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
    ASSERT((unsigned int)zero == 0);
@ -14,4 +14,4 @@ void _m_a_i_n(void)
    ASSERT((unsigned int)big == UINT_MAX);

    finished();
-}
+}
--- a/tests/plat/from_si_to_d_e.c
+++ b/tests/plat/from_si_to_d_e.c
@ -8,7 +8,7 @@ int minusone = -1;
 int big = INT_MAX;
 int minusbig = INT_MIN;

-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
    ASSERT((double)zero == 0.0);
@ -18,4 +18,4 @@ void _m_a_i_n(void)
    /* ASSERT((double)minusbig == (double)INT_MIN); FIXME: fails for now */

    finished();
-}
+}
--- a/tests/plat/from_ui_to_d_e.c
+++ b/tests/plat/from_ui_to_d_e.c
@ -6,7 +6,7 @@ unsigned int one_u = 1;
 unsigned int zero_u = 0;
 unsigned int big_u = UINT_MAX;

-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
    ASSERT((double)zero_u == 0.0);
@ -14,4 +14,4 @@ void _m_a_i_n(void)
    ASSERT((double)big_u == (double)UINT_MAX);

    finished();
-}
+}
--- a/tests/plat/inn_e.e
+++ b/tests/plat/inn_e.e
@ -14,6 +14,9 @@
    zeq *1

    loc __LINE__
+    loc EM_WSIZE
+    loc 4
+    cuu
    cal $fail
    ass EM_WSIZE
 1
@ -31,6 +34,9 @@
    zne *2

    loc __LINE__
+    loc EM_WSIZE
+    loc 4
+    cuu
    cal $fail
    ass EM_WSIZE
 2
@ -49,6 +55,9 @@
    zeq *3

    loc __LINE__
+    loc EM_WSIZE
+    loc 4
+    cuu
    cal $fail
    ass EM_WSIZE
 3
@ -67,11 +76,12 @@
    zne *4

    loc __LINE__
+    loc EM_WSIZE
+    loc 4
+    cuu
    cal $fail
    ass EM_WSIZE
 4

    cal $finished
-    ret 0
-    
    end
--- a/tests/plat/intadd_e.c
+++ b/tests/plat/intadd_e.c
@ -6,7 +6,7 @@ int one = 1;
 int zero = 0;
 int minusone = -1;

-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
    ASSERT((two + one)      == 3);
@ -28,4 +28,4 @@ void _m_a_i_n(void)
    ASSERT(((unsigned int)-1  + (unsigned int)two) == 1);

    finished();
-}
+}
--- a/tests/plat/intcmp_e.c
+++ b/tests/plat/intcmp_e.c
@ -4,7 +4,7 @@
 int one = 1;
 int zero = 0;

-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
    ASSERT(zero == zero);
@ -62,4 +62,4 @@ void _m_a_i_n(void)
    ASSERT((unsigned int)1 >= (unsigned int)one);

    finished();
-}
+}
--- a/tests/plat/intdiv_e.c
+++ b/tests/plat/intdiv_e.c
@ -6,7 +6,7 @@ int two = 2;
 int one = 1;
 int zero = 0;

-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
    ASSERT((three / two) == 1);
@ -25,4 +25,4 @@ void _m_a_i_n(void)
    ASSERT((3 / -two) == -1);

    finished();
-}
+}
--- a/tests/plat/intrem_e.c
+++ b/tests/plat/intrem_e.c
@ -6,7 +6,7 @@ int two = 2;
 int one = 1;
 int zero = 0;

-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
    ASSERT((three % two) == 1);
@ -25,4 +25,4 @@ void _m_a_i_n(void)
    ASSERT((3 % -two) == 1);

    finished();
-}
+}
--- a/tests/plat/intshift_e.c
+++ b/tests/plat/intshift_e.c
@ -6,7 +6,7 @@ int one = 1;
 int zero = 0;
 int minusone = -1;

-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
    ASSERT((one     <<zero) == 1);
@ -50,4 +50,4 @@ void _m_a_i_n(void)
    ASSERT(((unsigned int)minusone>>(unsigned int)1)  == (UINT_MAX>>1));

    finished();
-}
+}
--- a/tests/plat/intsub_e.c
+++ b/tests/plat/intsub_e.c
@ -7,7 +7,7 @@ int one = 1;
 int zero = 0;
 int minusone = -1;

-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
    ASSERT((two - one) == 1);
@ -29,4 +29,4 @@ void _m_a_i_n(void)
    ASSERT(((unsigned int)1   - (unsigned int)two) == UINT_MAX);

    finished();
-}
+}
--- a/tests/plat/lib/test.c
+++ b/tests/plat/lib/test.c
@ -5,7 +5,7 @@
 void finished(void)
 {
    static const char s[] = "@@FINISHED\n";
-    write(1, s, sizeof(s));
+    write(1, s, sizeof(s)-1);
    _exit(0);
 }

@ -16,7 +16,7 @@ void writehex(uint32_t code)

    do
    {
-        *--p = "0123456789abcdef"[code & 0xf];
+        *--p = "0123456789abcdef"[(unsigned int)code & 0xf];
        code >>= 4;
    }
    while (code > 0);
--- a/tests/plat/m2/ConvTest_mod.mod
+++ b/tests/plat/m2/ConvTest_mod.mod
@ -0,0 +1,36 @@
+MODULE ConvTest;
+FROM Conversions IMPORT
+  ConvertOctal, ConvertHex, ConvertCardinal, ConvertInteger;
+FROM Strings IMPORT CompareStr;
+FROM Test IMPORT fail, finished;
+
+(* Asserts a = b, or fails with code. *)
+PROCEDURE A(a, b: ARRAY OF CHAR; code: INTEGER);
+BEGIN
+  IF (CompareStr(a, b) # 0) OR (CompareStr(a, "wrong string") = 0) THEN
+    fail(code)
+  END
+END A;
+
+VAR
+  str: ARRAY [0..15] OF CHAR;
+BEGIN
+  ConvertOctal(  9, 6, str); A("    11", str, 1);
+  ConvertOctal( 59, 6, str); A("    73", str, 2);
+  ConvertOctal(278, 6, str); A("   426", str, 3);
+
+  ConvertHex(  9, 6, str); A("     9", str, 11H);
+  ConvertHex( 59, 6, str); A("    3B", str, 12H);
+  ConvertHex(278, 6, str); A("   116", str, 13H);
+
+  ConvertCardinal(  9, 6, str); A("     9", str, 21H);
+  ConvertCardinal( 59, 6, str); A("    59", str, 22H);
+  ConvertCardinal(278, 6, str); A("   278", str, 23H);
+
+  ConvertInteger(   9, 6, str); A("     9", str, 31H);
+  ConvertInteger(  59, 6, str); A("    59", str, 32H);
+  ConvertInteger( 278, 6, str); A("   278", str, 33H);
+  ConvertInteger(-424, 6, str); A("  -424", str, 34H);
+
+  finished;
+END ConvTest.
--- a/tests/plat/m2/NestProc_mod.mod
+++ b/tests/plat/m2/NestProc_mod.mod
@ -0,0 +1,132 @@
+(*
+ * Calls nested procedures.  The compiler emits the EM instructions
+ * _lxl_ and _lxa_ to access the variables in the statically enclosing
+ * procedures.
+ *
+ * You can cheat this test if a = b is TRUE for any a, b.
+ *)
+MODULE NestProc;
+FROM Test IMPORT fail, finished;
+
+(* Asserts cond, or fails with code. *)
+PROCEDURE A(cond: BOOLEAN; code: INTEGER);
+BEGIN
+  IF NOT cond THEN fail(code) END
+END A;
+
+TYPE
+  Set8 = SET OF [0..63];
+  (* Box has fields of size 8, 4, and 1. *)
+  Box = RECORD
+    huge: Set8;
+    big: LONGINT;
+    small: CHAR;
+    tiny: CHAR;
+  END;
+
+PROCEDURE First(a, b: INTEGER; in: Box): Box;
+  VAR c, d: INTEGER;
+      out: Box;
+
+  PROCEDURE Second(e: INTEGER);
+    VAR f: INTEGER;
+
+    PROCEDURE Third(g: INTEGER);
+      VAR h: INTEGER;
+
+      PROCEDURE CheckThird;
+      BEGIN
+        A(a = 1354, 31H);   (* lxa 3 *)
+        A(b = 3385, 32H);
+        A(c = 14349, 33H);  (* lxl 3 *)
+        A(d = 30989, 34H);
+        A(e = 28935, 35H);  (* lxa 2 *)
+        A(f = 13366, 36H);  (* lxl 2 *)
+        A(g = 7988, 37H);   (* lxa 1 *)
+        A(h = 11711, 38H);  (* lxl 1 *)
+      END CheckThird;
+
+      PROCEDURE Fourth(i: INTEGER);
+        VAR j: INTEGER;
+
+        PROCEDURE Fifth(k: INTEGER);
+          VAR l: INTEGER;
+
+          PROCEDURE Sixth(): INTEGER;
+          BEGIN
+            A(e = 2, 61H);      (* lxa 4 *)
+            A(f = 11703, 62H);  (* lxl 4 *)
+
+            b := 3385;   (* lxa 5 *)
+            d := 30989;  (* lxl 5 *)
+            e := 28935;  (* lxl 4 *)
+            f := 13366;  (* lxa 4 *)
+            CheckThird;
+
+            (* lxa 5 *)
+            A(in.huge = Set8{11, 12, 40, 40, 43, 56}, 63H);
+            A(in.big = 2130020019D, 64H);
+            A(in.small = 300C, 65H);
+            A(in.tiny = 175C, 66H);
+
+            (* lxl 5 *)
+            out.huge := Set8{8, 19, 36, 41, 47, 62};
+            out.big := 385360915D;
+            out.small := 366C;
+            out.tiny := 131C;
+
+            j := k;  (* lxl 2, lxa 1 *)
+            l := i;  (* lxl 1, lxa 2 *)
+            RETURN 5217;
+          END Sixth;
+
+          PROCEDURE TwiceSixth(): INTEGER;
+          BEGIN
+            (* lxa and lxl must follow the static chain from Sixth to
+             * Fifth, not dynamic chain from Sixth to TwiceSixth. *)
+            RETURN 2 * Sixth();
+          END TwiceSixth;
+
+        BEGIN (* Fifth *)
+          A(TwiceSixth() = 10434, 51H);
+          A(k = 11567, 51H);
+          A(l = 32557, 52H);
+        END Fifth;
+
+      BEGIN (* Fourth *)
+        Fifth(11567);  (* k *)
+        A(i = 32557, 41H);
+        A(j = 11567, 42H);
+      END Fourth;
+
+    BEGIN (* Third *)
+      h := 11711;
+      Fourth(32557);  (* i *)
+    END Third;
+
+  BEGIN (* Second *)
+    f := 11703;
+    Third(7988);  (* g *)
+  END Second;
+
+BEGIN (* First *)
+  c := 14349;
+  d := 17850;
+  Second(2);  (* e *)
+  RETURN out
+END First;
+
+VAR
+  x: Box;
+BEGIN
+  x.huge := Set8{11, 12, 40, 40, 43, 56};
+  x.big := 2130020019D;
+  x.small := 300C;
+  x.tiny := 175C;
+  x := First(1354, 19516, x);  (* a, b, in *)
+  A(x.huge = Set8{8, 19, 36, 41, 47, 62}, 71H);
+  A(x.big = 385360915D, 72H);
+  A(x.small = 366C, 73H);
+  A(x.tiny = 131C, 74H);
+  finished;
+END NestProc.
--- a/tests/plat/m2/OpenArray_mod.mod
+++ b/tests/plat/m2/OpenArray_mod.mod
@ -0,0 +1,59 @@
+(*
+ * Passes an open array to a procedure.  The back end must implement
+ * some EM instructions for accessing arrays.
+ *)
+MODULE OpenArray;
+FROM Test IMPORT fail, finished;
+
+(* Asserts condition or fails with code. *)
+PROCEDURE A(cond: BOOLEAN; code: INTEGER);
+BEGIN
+  IF NOT cond THEN fail(code) END
+END A;
+
+(* Called as Modify(ary1, 1) or Modify(ary2, 2). *)
+PROCEDURE Modify(VAR ary: ARRAY OF INTEGER; what: INTEGER);
+  VAR hi: INTEGER;
+BEGIN
+  hi := what * 100H;
+
+  (* Indices must be from 0 to HIGH(ary). *)
+  A((what = 1) = (HIGH(ary) = 3), hi + 1);
+  A((what = 2) = (HIGH(ary) = 9), hi + 2);
+
+  (* ary[2] must equal ary1[3] or ary2[3]. *)
+  A((what = 1) = (ary[2] = 13), hi + 3);
+  A((what = 2) = (ary[2] = 37), hi + 4);
+
+  (* Modify some values. *)
+  IF HIGH(ary) >= 3 THEN ary[3] := 20 END;
+  IF HIGH(ary) >= 6 THEN ary[6] := 40 END;
+  IF HIGH(ary) >= 9 THEN ary[9] := 12 END;
+END Modify;
+
+VAR
+  ary1: ARRAY [1..4] OF INTEGER;
+  ary2: ARRAY [1..10] OF INTEGER;
+BEGIN
+  (* Initialize the arrays. *)
+  ary1[1] :=  6; ary1[2] :=  9; ary1[3] := 13; ary1[4] := 49;
+
+  ary2[1] := 56; ary2[2] := 79; ary2[3] := 37; ary2[4] :=  0;
+  ary2[5] := 70; ary2[6] := 62; ary2[7] := 64; ary2[8] := 92;
+  ary2[9] := 29; ary2[10] := 90;
+
+  (* Pass them as open arrays. *)
+  Modify(ary1, 1);
+  Modify(ary2, 2);
+
+  (* Check that ary1[4], ary2[4, 7, 10] have been modified. *)
+  A(ary1[1] =  6, 301H); A(ary1[2] =  9, 301H); A(ary1[3] = 13, 303H);
+  A(ary1[4] = 20, 304H);
+
+  A(ary2[1] = 56, 401H); A(ary2[2] = 79, 402H); A(ary2[3] = 37, 403H);
+  A(ary2[4] = 20, 404H); A(ary2[5] = 70, 406H); A(ary2[6] = 62, 406H);
+  A(ary2[7] = 40, 407H); A(ary2[8] = 92, 408H); A(ary2[9] = 29, 409H);
+  A(ary2[10] = 12, 40AH);
+
+  finished;
+END OpenArray.
--- a/tests/plat/m2/SemaTest_mod.mod
+++ b/tests/plat/m2/SemaTest_mod.mod
@ -0,0 +1,157 @@
+(*
+ * Generates some integer sequences.  Each generator is a process that
+ * yields integers to the main process.  ACK switches processes by
+ * saving and restoring the stack.  It uses _lor_ and _str_ to save
+ * and restore the local base and frame pointer.
+ *)
+MODULE SemaTest;
+FROM Semaphores IMPORT Sema, NewSema, Down, Up, StartProcess;
+FROM Storage IMPORT ALLOCATE;
+FROM Test IMPORT fail, finished;
+
+TYPE
+  Generator = POINTER TO GeneratorRecord;
+  GeneratorRecord = RECORD
+    resume: Sema;       (* up when resuming generator *)
+    yield: Sema;        (* up when yielding value *)
+    value: INTEGER;
+  END;
+VAR
+  curgen: Generator;    (* current generator *)
+  startLock: Sema;      (* down when booting generator *)
+  startProc: PROC;
+  startSelf: Generator;
+
+PROCEDURE BootGenerator;
+  VAR pr: PROC; self: Generator;
+BEGIN
+  pr := startProc;
+  self := startSelf;
+  Up(startLock);
+  Down(self^.resume);   (* wait for first Resume *)
+  pr();
+END BootGenerator;
+
+PROCEDURE StartGenerator(gen: Generator; pr: PROC);
+BEGIN
+  gen^.resume := NewSema(0);
+  gen^.yield := NewSema(0);
+  Down(startLock);
+  startProc := pr;
+  startSelf := gen;
+  StartProcess(BootGenerator, 8192);
+END StartGenerator;
+
+PROCEDURE Resume(gen: Generator): INTEGER;
+  VAR self: Generator;
+BEGIN
+  self := curgen;
+  curgen := gen;
+  Up(gen^.resume);
+  Down(gen^.yield);     (* wait for Yield *)
+  curgen := self;
+  RETURN gen^.value
+END Resume;
+
+PROCEDURE Yield(i: INTEGER);
+  VAR self: Generator;
+BEGIN
+  self := curgen;
+  self^.value := i;
+  Up(self^.yield);      (* curgen becomes invalid *)
+  Down(self^.resume);   (* wait for Resume *)
+END Yield;
+
+PROCEDURE YieldHalfOf(i: INTEGER);
+BEGIN
+  Yield(i DIV 2);
+END YieldHalfOf;
+
+PROCEDURE Triangular;
+  (* Yields the triangular numbers, http://oeis.org/A000217 *)
+  VAR n: INTEGER;
+BEGIN
+  n := 0;
+  LOOP
+    YieldHalfOf(n * (n + 1));
+    INC(n);
+  END;
+END Triangular;
+
+PROCEDURE Pentagonal;
+  (* Yields the pentagonal numbers, http://oeis.org/A000326 *)
+  VAR n: INTEGER;
+BEGIN
+  n := 0;
+  LOOP
+    YieldHalfOf(n * (3 * n - 1));
+    INC(n);
+  END;
+END Pentagonal;
+
+PROCEDURE Odious;
+  (* Yields the odius numbers, http://oeis.org/A000069 *)
+  VAR b, i, n: INTEGER;
+BEGIN
+  n := 1;
+  LOOP
+    (* b := count bits in n *)
+    b := 0;
+    i := n;
+    WHILE i # 0 DO
+      INC(b, i MOD 2);
+      i := i DIV 2;
+    END;
+
+    IF (b MOD 2) = 1 THEN
+      Yield(n);
+    END;
+    INC(n);
+  END;
+END Odious;
+
+TYPE
+  Triple = ARRAY[1..3] OF INTEGER;
+PROCEDURE T(i1, i2, i3: INTEGER): Triple;
+  VAR t: Triple;
+BEGIN
+  t[1] := i1; t[2] := i2; t[3] := i3; RETURN t
+END T;
+
+CONST
+  two28 = 268435456D;   (* 0x1000_0000 *)
+VAR
+  a: ARRAY [0..9] OF Triple;
+  tri, pen, odi: Generator;
+  i, g1, g2, g3: INTEGER;
+BEGIN
+  startLock := NewSema(1);
+
+  ALLOCATE(tri, SIZE(GeneratorRecord));
+  ALLOCATE(pen, SIZE(GeneratorRecord));
+  ALLOCATE(odi, SIZE(GeneratorRecord));
+  StartGenerator(tri, Triangular);
+  StartGenerator(pen, Pentagonal);
+  StartGenerator(odi, Odious);
+
+  a[0] := T( 0,   0,  1);
+  a[1] := T( 1,   1,  2);
+  a[2] := T( 3,   5,  4);
+  a[3] := T( 6,  12,  7);
+  a[4] := T(10,  22,  8);
+  a[5] := T(15,  35, 11);
+  a[6] := T(21,  51, 13);
+  a[7] := T(28,  70, 14);
+  a[8] := T(36,  92, 16);
+  a[9] := T(45, 117, 19);
+
+  FOR i := 0 TO INTEGER(9) DO
+    g1 := Resume(tri);
+    g2 := Resume(pen);
+    g3 := Resume(odi);
+    IF g1 # a[i][1] THEN fail(1D * two28 + LONG(a[i][1])) END;
+    IF g2 # a[i][2] THEN fail(2D * two28 + LONG(a[i][2])) END;
+    IF g3 # a[i][3] THEN fail(3D * two28 + LONG(a[i][3])) END;
+  END;
+  finished;
+END SemaTest.
--- a/tests/plat/m2/Set100_mod.mod
+++ b/tests/plat/m2/Set100_mod.mod
@ -0,0 +1,61 @@
+(*
+ * Operates on sets of 100 integers.  The compiler emits, and the back
+ * end must implement, the EM instructions for large sets.
+ *)
+MODULE Set100;
+FROM Test IMPORT fail, finished;
+
+(* Asserts condition or fails with code. *)
+PROCEDURE A(cond: BOOLEAN; code: INTEGER);
+BEGIN
+  IF NOT cond THEN fail(code) END
+END A;
+
+TYPE
+  Num = [1..100];
+  NumSet = SET OF Num;
+VAR
+  (* VAR, not CONST, so compiler can't do constant operations. *)
+  primes, teens, lowevens, eighties, nineties: NumSet;
+CONST
+  (* These are the expected results of some set operations. *)
+  primeteen = NumSet{13, 17, 19};
+  compeighties = NumSet{80..82, 84..88};
+  teenxoreven = NumSet{2, 4, 6, 8, 10, 12, 13, 15, 17, 19, 20};
+  eightiesnineties = NumSet{80..99};
+
+(* Checks that some set is equal to the expected result.  Also checks
+ * that the set is not equal to the other sets. *)
+PROCEDURE Check(set: NumSet; what: INTEGER);
+  VAR hi: INTEGER;
+BEGIN
+  hi := what * 100H;
+
+  (* The compiler uses cms in EM to check set equality. *)
+  A((what = 1) = (set = primeteen), hi + 1);
+  A((what = 2) = (set = compeighties), hi + 2);
+  A((what = 3) = (set = teenxoreven), hi + 3);
+  A((what = 4) = (set = eightiesnineties), hi + 4);
+END Check;
+
+PROCEDURE Range(min: Num; max: Num): NumSet;
+BEGIN
+  (* The compiler calls LtoUset in lang/m2/libm2/LtoUset.e *)
+  RETURN NumSet{min..max}
+END Range;
+
+BEGIN
+  primes := NumSet{2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43,
+                   47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97};
+  teens := NumSet{13, 14, 15, 16, 17, 18, 19};
+  lowevens := NumSet{2, 4, 6, 8, 10, 12, 14, 16, 18, 20};
+  eighties := Range(80, 89);
+  nineties := Range(90, 99);
+
+  Check(primes * teens, 1);
+  Check(eighties - primes, 2);
+  Check(teens / lowevens, 3);
+  Check(eighties + nineties, 4);
+
+  finished;
+END Set100.
--- a/tests/plat/m2/StringTest_mod.mod
+++ b/tests/plat/m2/StringTest_mod.mod
@ -0,0 +1,55 @@
+MODULE StringTest;
+FROM Strings IMPORT
+  Assign, Insert, Delete, Pos, Copy, Concat, Length, CompareStr;
+FROM Test IMPORT fail, finished;
+
+(* Asserts condition or fails with code. *)
+PROCEDURE A(cond: BOOLEAN; code: INTEGER);
+BEGIN
+  IF NOT cond THEN fail(code) END
+END A;
+
+VAR
+  small: ARRAY [0..3] OF CHAR;
+  big: ARRAY [0..99] OF CHAR;
+BEGIN
+  (* CompareStr *)
+  A(CompareStr("ablaze", "ablaze") = 0, 1);
+  A(CompareStr("ablaze", "abloom") < 0, 2);
+  A(CompareStr("abloom", "ablaze") > 0, 3);
+  A(CompareStr("abloom", "abloom") = 0, 4);
+
+  (* Assign, Insert, Delete *)
+  Assign("obsequiosity", small);
+  A(CompareStr("obsequiosity", small) > 0, 11H);
+  Assign("obsequiosity", big);
+  A(CompareStr("obsequiosity", big) = 0, 12H);
+  A(big[11] = 'y', 13H);
+  A(big[11] # 0C, 14H);
+  A(big[12] # 'y', 15H);
+  A(big[12] = 0C, 16H);
+  Insert(" omnihuman", big, 9);
+  A(CompareStr("obsequios omnihumanity", big) = 0, 17H);
+  Delete(big, 6, 15);
+  A(CompareStr("obsequy", big) = 0, 18H);
+
+  (* Pos, Concat *)
+  Assign("Now is the time for all good men to come...", big);
+  A(Pos("w", big) = 2, 21H);
+  A(Pos("t", big) = 7, 22H);
+  A(Pos("ti", big) = 11, 23H);
+  A(Pos("men", big) = 29, 24H);
+  A(Pos("women", big) > 42, 25H);
+  Copy(big, 29, 2, small);
+  A(CompareStr("me", small) = 0, 26H);
+
+  (* Concat, Length *)
+  Concat("pictorial", "ist", big);
+  A(CompareStr("pictorialist", big) = 0, 31H);
+  A(Length(big) = 12, 32H);
+  Concat("zit", "her", small);
+  A(CompareStr("zither", small) > 0, 33H);
+  A(Length(small) < 5, 34H);
+
+  finished;
+END StringTest.
--- a/tests/plat/rck_e.e
+++ b/tests/plat/rck_e.e
@ -0,0 +1,186 @@
+#
+    mes 2, EM_WSIZE, EM_PSIZE
+
+/*
+ * Uses _rck_ for range checks.  Catches the EM trap if a value is out
+ * of range, and continues with the next instruction after _rck_.
+ *
+ * Some back ends, like i80, ignore _rck_, so this test fails.
+ */
+
+testnr
+    con 1         ; test number
+caught
+    con 0         ; number of caught traps
+
+    inp $next
+    inp $catch
+    inp $never
+    exp $_m_a_i_n
+    pro $_m_a_i_n,0
+
+    lim           ; load ignore mask
+    loc 2
+    and EM_WSIZE  ; check bit 1 << ERANGE
+    zeq *1        ; fail if ignoring ERANGE
+.1
+    rom 1I4
+    lae .1
+    loi 4
+    cal $fail
+    asp 4
+1
+
+    cal $next     ; increment testnr, catch next trap
+    loc 10125
+.2
+    rom 4283, 13644
+    lae .2
+    rck EM_WSIZE  ; testnr 2 in range
+    asp EM_WSIZE
+
+    cal $next
+    loc 4282
+    lae .2
+    rck EM_WSIZE  ; testnr 3 out of range
+    asp EM_WSIZE
+
+    cal $next
+    loc 4283
+    lae .2
+    rck EM_WSIZE  ; testnr 4 in range
+    asp EM_WSIZE
+
+    cal $next
+    loc 13644
+    lae .2
+    rck EM_WSIZE  ; testnr 5 in range
+    asp EM_WSIZE
+
+    cal $next
+    loc 13655
+    lae .2
+    rck EM_WSIZE  ; testnr 6 out of range
+    asp EM_WSIZE
+
+    cal $next
+    loc -13015
+.7
+    rom -31344, -1898
+    lae .7
+    rck EM_WSIZE  ; testnr 7 in range
+    asp EM_WSIZE
+
+    cal $next
+    loc 8580
+.8
+    rom -26315, 4588
+    lae .8
+    rck EM_WSIZE  ; testnr 8 out of range
+    asp EM_WSIZE
+
+    ; The last test raised a trap, so now there is no trap handler.
+    lpi $never
+    sig           ; push old trap handler
+    loc 0
+    loc EM_WSIZE
+    loc EM_PSIZE
+    cuu           ; push NULL pointer
+    cmp
+    zeq *17       ; fail unless old handler is NULL
+.17
+    rom 17I4
+    lae .17
+    loi 4
+    cal $fail
+    asp 4
+17
+    ; Change the trap handler from $never to $catch.
+    lpi $catch
+    sig
+    lpi $never
+    cmp
+    zeq *18
+.18
+    rom 18I4
+    lae .18
+    loi 4
+    cal $fail
+    asp 4
+18
+    ; Begin ignoring range traps.
+    loc 2         ; 1 << ERANGE
+    sim
+    loc 18
+    ste testnr
+    loc 8580
+    lae .8
+    rck EM_WSIZE  ; testnr 18 out of range but ignored
+
+    ; Fail if we caught the wrong number of traps.
+    loe caught
+    loc 3
+    beq *20
+.20
+    rom 20I4
+    lae .20
+    loi 4
+    cal $fail
+    asp 4
+20
+    cal $finished
+    end
+
+    pro $next,0
+    ine testnr    ; next test
+    lpi $catch
+    sig           ; catch next EM trap (only one trap)
+    asp EM_PSIZE
+    ret 0
+    end
+
+    pro $catch,0
+    ine caught    ; count this trap
+
+    lol 0         ; load trap number
+    loc 1
+    beq *1        ; fail if trap != ERANGE
+.101
+    rom 257I4
+    lae .101
+    loi 4
+    cal $fail
+    ; Wrong type of trap.  _rtt_ might not work, so exit now.
+    cal $finished
+1
+    ; Fail if the wrong test raised this trap.
+    loe testnr
+    loc 3
+    beq *2
+    loe testnr
+    loc 6
+    beq *2
+    loe testnr
+    loc 8
+    beq *2
+    loc 256
+    loe testnr
+    adi EM_WSIZE  ; 0x100 + testnr
+    loc EM_WSIZE
+    loc 4
+    cuu
+    cal $fail
+    asp 4
+2
+    rtt           ; return from trap handler
+    end
+
+    pro $never,0
+.200
+    rom 200I4
+    lae .200
+    loi 4
+    cal $fail
+    asp 4
+    rtt
+    end
--- a/tests/plat/rotate_e.e
+++ b/tests/plat/rotate_e.e
@ -0,0 +1,223 @@
+#
+    mes 2, EM_WSIZE, EM_PSIZE
+
+/*
+ * Tests _rol_ (rotate left) and _ror_ (rotate right).  Several back
+ * ends provide _rol_ and _ror_, but as of year 2017, the compilers
+ * and optimizers had never emit _rol_ nor _ror_.
+ *
+ * By tradition, _rol_ and _ror_ can't rotate values shorter than the
+ * word size, or longer than 4 bytes.
+ *  - If word size is 2, then try rotating 2-byte and 4-byte values.
+ *  - If word size is 4, then try rotating 4-byte values.
+ *
+ * You can cheat this test if _cmu_ always pushes zero.
+ */
+
+#if EM_WSIZE == 2
+#define LEN2  4
+    exa table2
+    exa left2
+    exa right2
+table2         /* left, right */
+    con 12715U2  /*  0,  0 */
+    con 25430U2  /*  1, 15 */
+    con 43825U2  /*  8,  8 */
+    con 39125U2  /* 15,  1 */
+left2
+    con 0I2, 1I2, 8I2, 15I2
+right2
+    con 0I2, 15I2, 8I2, 1I2
+#endif
+
+#define LEN4  4
+    exa table4
+    exa left4
+    exa right4
+table4              /* left, right */
+    con  437223536U4  /*  0,  0 */
+    con  874447072U4  /*  1, 31 */
+    con 2154830351U4  /* 16, 16 */
+    con  218611768U4  /* 31,  1 */
+left4
+    con 0I2, 1I2, 16I2, 31I2
+right4
+    con 0I2, 31I2, 16I2, 1I2
+
+    exa val4
+    exa val4left7
+    exa val4right11
+val4
+    con 4283808839U4
+val4left7
+    con 2866684927U4
+val4right11
+    con 2298473143U4
+
+    exp $_m_a_i_n
+    pro $_m_a_i_n, EM_WSIZE
+#define i -EM_WSIZE
+
+#if EM_WSIZE == 2
+    /*
+     * Loop for LEN2 items in table2.
+     */
+    loc 0
+    stl i
+1
+    lae table2
+    loi 2         /* value to rotate */
+    lae left2
+    lol i
+    loc 1
+    sli EM_WSIZE
+    ads EM_WSIZE
+    loi 2         /* left distance */
+    rol 2         /* rotate left */
+    lae table2
+    lol i
+    loc 1
+    sli EM_WSIZE
+    ads EM_WSIZE
+    loi 2         /* expected result */
+    cmu 2
+    zeq *2
+    loc __LINE__
+    loc EM_WSIZE
+    loc 4
+    cuu
+    cal $fail
+    asp 4
+2
+    lae table2
+    loi 2         /* value to rotate */
+    lae right2
+    lol i
+    loc 1
+    sli EM_WSIZE
+    ads EM_WSIZE
+    loi 2         /* right distance */
+    ror 2         /* rotate right */
+    lae table2
+    lol i
+    loc 1
+    sli EM_WSIZE
+    ads EM_WSIZE
+    loi 2         /* expected result */
+    cmu 2
+    zeq *3
+    loc __LINE__
+    loc EM_WSIZE
+    loc 4
+    cuu
+    cal $fail
+    asp 4
+3
+    inl i         /* loop LEN2 times */
+    lol i
+    loc LEN2
+    blt *1
+#endif /* EM_WSIZE == 2 */
+
+    /*
+     * Loop for LEN4 items in table4.
+     */
+    loc 0
+    stl i
+4
+    lae table4
+    loi 4         /* value to rotate */
+    lae left4
+    lol i
+    loc 1
+    sli EM_WSIZE
+    ads EM_WSIZE
+    loi 2         /* left distance */
+    loc 2
+    loc EM_WSIZE
+    cii
+    rol 4         /* rotate left */
+    lae table4
+    lol i
+    loc 2
+    sli EM_WSIZE
+    ads EM_WSIZE
+    loi 4         /* expected result */
+    cmu 4
+    zeq *5
+    loc __LINE__
+    loc EM_WSIZE
+    loc 4
+    cuu
+    cal $fail
+    asp 4
+5
+    lae table4
+    loi 4         /* value to rotate */
+    lae right4
+    lol i
+    loc 1
+    sli EM_WSIZE
+    ads EM_WSIZE
+    loi 2         /* right distance */
+    loc 2
+    loc EM_WSIZE
+    cii
+    ror 4         /* rotate right */
+    lae table4
+    lol i
+    loc 2
+    sli EM_WSIZE
+    ads EM_WSIZE
+    loi 4         /* expected result */
+    cmu 4
+    zeq *6
+    loc __LINE__
+    loc EM_WSIZE
+    loc 4
+    cuu
+    cal $fail
+    asp 4
+6
+    inl i         /* loop LEN4 times */
+    lol i
+    loc LEN4
+    blt *4
+
+    /*
+     * Rotate 4-byte values by a constant distance, because this uses
+     * different rules in PowerPC ncg.
+     */
+    lae val4
+    loi 4
+    loc 7
+    rol 4         /* rotate left by 7 bits */
+    lae val4left7
+    loi 4
+    cmu 4
+    zeq *7
+    loc __LINE__
+    loc EM_WSIZE
+    loc 4
+    cuu
+    cal $fail
+    asp 4
+7
+    lae val4
+    loi 4
+    loc 11
+    ror 4         /* rotate right by 11 bits */
+    lae val4right11
+    loi 4
+    cmu 4
+    zeq *8
+    loc __LINE__
+    loc EM_WSIZE
+    loc 4
+    cuu
+    cal $fail
+    asp 4
+8
+
+    cal $finished
+    end
--- a/tests/plat/setjmp_c.c
+++ b/tests/plat/setjmp_c.c
@ -0,0 +1,58 @@
+#include <setjmp.h>
+#include "test.h"
+
+/*
+ * Sets i = 2 * i for each i in nums, until i == 0, but stops if
+ * 2 * i >= 1000.
+ *
+ * Uses setjmp() and longjmp() in libc.  For ACK's libc, the back end
+ * must provides EM's _gto_, and _gto_ must preserve the function
+ * return area.
+ */
+int nums1[]         = { 79, 245, 164, 403, 0};
+const int expect1[] = {158, 490, 328, 806, 0};
+int nums2[]         = {20, 221, 411, 643, 48, 272, 448, 0};
+const int expect2[] = {40, 442, 822, 643, 48, 272, 448, 0};
+int nums3[]         = {371, 265, 500, 124, 117, 0};
+const int expect3[] = {742, 530, 500, 124, 117, 0};
+int docount = 0;
+
+int twice(int i, jmp_buf esc) {
+	if (i >= 500)
+		longjmp(esc, i);
+	return 2 * i;
+}
+
+void donums(int *nums, jmp_buf esc) {
+	int *p;
+
+	docount++;
+	for (p = nums; *p != 0; p++) {
+		*p = twice(*p, esc);
+	}
+}
+
+int cknums(int *nums, const int *expect) {
+	jmp_buf env;
+	int ret;
+
+	ret = setjmp(env);
+	if (ret == 0)
+		donums(nums, env);
+	for (;;) {
+		ASSERT(*nums == *expect);
+		if (*expect == 0)
+			break;
+		nums++;
+		expect++;
+	}
+	return ret;
+}
+
+int main(void) {
+	ASSERT(cknums(nums1, expect1) == 0);
+	ASSERT(cknums(nums2, expect2) == 643);
+	ASSERT(cknums(nums3, expect3) == 500);
+	ASSERT(docount == 3);
+	finished();
+}
--- a/tests/plat/structcopy_e.c
+++ b/tests/plat/structcopy_e.c
@ -0,0 +1,113 @@
+#include "test.h"
+
+/* ACK's C compiler uses EM's loi, sti, blm, or an inline loop to copy
+ * these structs.  The compiler doesn't call memcpy() or other
+ * functions in libc, so this test passes without linking the CRT.
+ */
+
+struct c5 {       /* not a whole number of words */
+	char one[5];
+};
+
+struct ii {       /* two words */
+	int one;
+	int two;
+};
+
+struct iii {      /* three words */
+	int one;
+	int two;
+	int three;
+};
+
+int equal5(char *a, char *b) {  /* a, b must have 5 characters */
+	int i;
+
+	for (i = 0; i < 5; i++)
+		if (a[i] != b[i]) return 0;
+	return 1;
+}
+
+struct c5 make_c5(char *str) {  /* str must have 5 characters */
+	struct c5 out;
+	int i;
+
+	for (i = 0; i < 5; i++)
+		out.one[i] = str[i];
+	return out;
+}
+
+struct ii make_ii(int i, int j) {
+	struct ii out;
+
+	out.one = i;
+	out.two = j;
+	return out;
+}
+
+struct iii make_iii(struct ii in, int k) {
+	struct iii out;
+
+	out.one = in.one;
+	out.two = in.two;
+	out.three = k;
+	return out;
+}
+
+struct c5 rotate_left_c5(struct c5 in) {
+	int i;
+	char c = in.one[0];
+
+	/* Modifies our copy of _in_, not caller's copy. */
+	for (i = 0; i < 4; i++)
+		in.one[i] = in.one[i + 1];
+	in.one[4] = c;
+	return in;
+}
+
+struct iii rotate_left_iii(struct iii in) {
+	int i = in.one;
+
+	/* Modifies our copy of _in_, not caller's copy. */
+	in.one = in.two;
+	in.two = in.three;
+	in.three = i;
+	return in;
+}
+
+/* Bypasses the CRT, so there's no stdio. */
+void _m_a_i_n(void) {
+	struct c5 earth, heart, dup_heart, rol_heart;
+	struct ii pair, dup_pair;
+	struct iii triple, dup_triple, rol_triple;
+
+	earth = make_c5("earth");
+	heart = make_c5("heart");
+	dup_heart = heart;
+	rol_heart = rotate_left_c5(heart);
+	ASSERT(equal5(earth.one, "earth"));
+	ASSERT(equal5(heart.one, "heart"));
+	ASSERT(equal5(dup_heart.one, "heart"));
+	ASSERT(equal5(rol_heart.one, "earth"));
+
+	pair = make_ii(29, 31);
+	dup_pair = pair;
+	triple = make_iii(pair, -9);
+	dup_triple = triple;
+	rol_triple = rotate_left_iii(triple);
+	ASSERT(pair.one == 29);
+	ASSERT(pair.two == 31);
+	ASSERT(dup_pair.one == 29);
+	ASSERT(dup_pair.two == 31);
+	ASSERT(triple.one == 29);
+	ASSERT(triple.two == 31);
+	ASSERT(triple.three == -9);
+	ASSERT(dup_triple.one == 29);
+	ASSERT(dup_triple.two == 31);
+	ASSERT(dup_triple.three == -9);
+	ASSERT(rol_triple.one == 31);
+	ASSERT(rol_triple.two == -9);
+	ASSERT(rol_triple.three == 29);
+
+	finished();
+}
--- a/util/ego/build.lua
+++ b/util/ego/build.lua
@ -3,6 +3,7 @@ local function build_ego(name)
 		name = name,
 		srcs = { "./"..name.."/*.c" },
 		deps = {
+			"./"..name.."/*.h",
 			"util/ego/share+lib",
 			"modules/src/em_data+lib",
 			"h+emheaders",
--- a/util/ego/ca/ca.c
+++ b/util/ego/ca/ca.c
@ -72,6 +72,7 @@ proc_p* p_out;
 			{
 				/* register message without arguments */
 				oldline(l);
+				continue;
 			}
 			else
 			{
--- a/util/ego/cs/cs.c
+++ b/util/ego/cs/cs.c
@ -25,7 +25,7 @@

 int Scs; /* Number of optimizations found. */

-STATIC cs_clear()
+STATIC void cs_clear()
 {
 	clr_avails();
 	clr_entities();
@ -74,9 +74,7 @@ STATIC void cs_optimize(void *vp)
 	}
 }

-main(argc, argv)
-	int	argc;
-	char	*argv[];
+int main(int argc, char *argv[])
 {
 	Scs = 0;
 	go(argc, argv, no_action, cs_optimize, cs_machinit, no_action);
--- a/util/ego/cs/cs.h
+++ b/util/ego/cs/cs.h
@ -88,12 +88,13 @@ struct occur {
 #define UNAIR_OP	6
 #define BINAIR_OP	7
 #define TERNAIR_OP	8
-#define KILL_ENTITY	9
-#define SIDE_EFFECTS	10
-#define FIDDLE_STACK	11
-#define IGNORE		12
-#define HOPELESS	13
-#define BBLOCK_END	14
+#define REMAINDER	9
+#define KILL_ENTITY	10
+#define SIDE_EFFECTS	11
+#define FIDDLE_STACK	12
+#define IGNORE		13
+#define HOPELESS	14
+#define BBLOCK_END	15

 struct avail {
 	avail_p	av_before;	/* Ptr to earlier discovered expressions. */
--- a/util/ego/cs/cs_aux.c
+++ b/util/ego/cs/cs_aux.c
@ -11,8 +11,7 @@
 #include "cs.h"
 #include "cs_entity.h"

-offset array_elemsize(vn)
-	valnum vn;
+offset array_elemsize(valnum vn)
 {
 	/* Vn is the valuenumber of an entity that points to
 	 * an array-descriptor. The third element of this descriptor holds
@ -36,14 +35,12 @@ offset array_elemsize(vn)
 	return aoff(enp->en_ext->o_dblock->d_values, 2);
 }

-occur_p occ_elem(i)
-	Lindex i;
+occur_p occ_elem(Lindex i)
 {
 	return (occur_p) Lelem(i);
 }

-entity_p en_elem(i)
-	Lindex i;
+entity_p en_elem(Lindex i)
 {
 	return (entity_p) Lelem(i);
 }
@ -54,14 +51,14 @@ entity_p en_elem(i)

 STATIC valnum val_no;

-valnum newvalnum()
+valnum newvalnum(void)
 {
 	/* Return a completely new value number. */

 	return ++val_no;
 }

-start_valnum()
+void start_valnum(void)
 {
 	/* Restart value numbering. */

--- a/util/ego/cs/cs_aux.h
+++ b/util/ego/cs/cs_aux.h
@ -3,28 +3,28 @@
 * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
 * See the copyright notice in the ACK home directory, in the file "Copyright".
 */
-extern offset	array_elemsize();	/* (valnum vm)
+extern offset	array_elemsize(valnum vm);
+					/*
 					 * Returns the size of array-elements,
 					 * if vn is the valuenumber of the
 					 * address of an array-descriptor.
 					 */

-extern occur_p	occ_elem();		/* (Lindex i)
+extern occur_p	occ_elem(Lindex i);	/*
 					 * Returns a pointer to the occurrence
 					 * of which i is an index in a set.
 					 */

-extern entity_p	en_elem();		/* (Lindex i)
+extern entity_p	en_elem(Lindex i);	/*
 					 * Returns a pointer to the entity
 					 * of which i is an index in a set.
 					 */

-extern valnum	newvalnum();		/* ()
+extern valnum	newvalnum(void);	/*
 					 * Returns a completely new
 					 * value number.
 					 */

-extern		start_valnum();		/* ()
+extern void	start_valnum(void);	/*
 					 * Restart value numbering.
 					 */
-
--- a/util/ego/cs/cs_avail.c
+++ b/util/ego/cs/cs_avail.c
@ -22,8 +22,7 @@

 avail_p avails; /* The list of available expressions. */

-STATIC bool commutative(instr)
-	int instr;
+STATIC bool commutative(int instr)
 {
 	/* Is instr a commutative operator? */

@ -37,9 +36,7 @@ STATIC bool commutative(instr)
 	}
 }

-STATIC bool same_avail(kind, avp1, avp2)
-	byte kind;
-	avail_p avp1, avp2;
+STATIC bool same_avail(byte kind, avail_p avp1, avail_p avp2)
 {
 	/* Two expressions are the same if they have the same operator,
 	 * the same size, and their operand(s) have the same value. 
@ -57,6 +54,7 @@ STATIC bool same_avail(kind, avp1, avp2)
 		case UNAIR_OP:
 			return	avp1->av_operand == avp2->av_operand;
 		case BINAIR_OP:
+		case REMAINDER:
 			if (commutative(avp1->av_instr & BMASK))
 				return	avp1->av_oleft == avp2->av_oleft &&
 					avp1->av_oright == avp2->av_oright
@ -75,8 +73,7 @@ STATIC bool same_avail(kind, avp1, avp2)
 	/* NOTREACHED */
 }

-STATIC void check_local(avp)
-	avail_p avp;
+STATIC void check_local(avail_p avp)
 {
 	/* Check if the local in which the result of avp was stored,
 	 * still holds this result. Update if not.
@ -89,9 +86,7 @@ STATIC void check_local(avp)
 	}
 }

-STATIC entity_p result_local(size, l)
-	offset size;
-	line_p l;
+STATIC entity_p result_local(offset size, line_p l)
 {
 	/* If the result of an expression of size bytes is stored into a
 	 * local for which a registermessage was generated, return a pointer
@ -114,9 +109,7 @@ STATIC entity_p result_local(size, l)
 	return (entity_p) 0;
 }

-STATIC copy_avail(kind, src, dst)
-	int kind;
-	avail_p src, dst;
+STATIC void copy_avail(int kind, avail_p src, avail_p dst)
 {
 	/* Copy some attributes from src to dst. */

@ -132,6 +125,7 @@ STATIC copy_avail(kind, src, dst)
 			dst->av_operand = src->av_operand;
 			break;
 		case BINAIR_OP:
+		case REMAINDER:
 			dst->av_oleft = src->av_oleft;
 			dst->av_oright = src->av_oright;
 			break;
@ -143,10 +137,7 @@ STATIC copy_avail(kind, src, dst)
 	}
 }

-avail_p av_enter(avp, ocp, kind)
-	avail_p avp;
-	occur_p ocp;
-	int kind;
+avail_p av_enter(avail_p avp, occur_p ocp, int kind)
 {
 	/* Put the available expression avp in the list,
 	 * if it is not already there.
@ -171,7 +162,8 @@ avail_p av_enter(avp, ocp, kind)
 	/* Remember local, if any, that holds result. */
 	if (avp->av_instr != (byte) INSTR(last)) {
 		/* Only possible when instr is the implicit AAR in 
-		 * a LAR or SAR.
+		 * a LAR or SAR, or the implicit DVI in an RMI, or
+		 * DVU in RMU.
 		 */
 		ravp->av_saveloc = (entity_p) 0;
 	} else {
@ -186,7 +178,7 @@ avail_p av_enter(avp, ocp, kind)
 	return ravp;
 }

-clr_avails()
+void clr_avails(void)
 {
 	/* Throw away the information about the available expressions. */

--- a/util/ego/cs/cs_avail.h
+++ b/util/ego/cs/cs_avail.h
@ -5,7 +5,8 @@
 */
 extern avail_p	avails;		/* The set of available expressions. */

-extern avail_p	av_enter();	/* (avail_p avp, occur_p ocp, byte kind)
+extern avail_p	av_enter(avail_p avp, occur_p ocp, byte kind);
+				/*
 				 * Puts the available expression in avp
 				 * in the list of available expressions,
 				 * if it is not already there. Add ocp to set of
@ -18,6 +19,7 @@ extern avail_p	av_enter();	/* (avail_p avp, occur_p ocp, byte kind)
 				 * Returns a pointer into the list.
 				 */

-extern		clr_avails();	/* Release all space occupied by the old list
+extern void	clr_avails(void);
+				/* Release all space occupied by the old list
 				 * of available expressions.
 				 */
--- a/Show more
+++ b/Show more