diff --git a/mach/i80/libem/cii.s b/mach/i80/libem/cii.s
index 7d091da5e..bf4e7efb8 100644
--- a/mach/i80/libem/cii.s
+++ b/mach/i80/libem/cii.s
@@ -65,19 +65,19 @@
 	jmp 3f		! done
 
 !if destination size < source size only:
-shrink:	mov l,c		! load source size in hl
+shrink:	mov l,b		! load destination size in hl
 	mvi h,0
 	dad sp
 	mov d,h
-	mov e,l		! de points just above source
-	mov l,b		! load destination size in hl
+	mov e,l		! de points just above lowest bytes of source
+	mov l,c		! load source size in hl
 	mvi h,0
 	dad sp		! hl points just above "destination"
 
 1:	dcx d		! move upwards
 	dcx h
-	mov a,m
-	stax d
+	ldax d
+	mov m,a
 	dcr b
 	jnz 1b
 	sphl
diff --git a/mach/i80/libem/rol4.s b/mach/i80/libem/rol4.s
index e5bb1a83d..41219ea0c 100644
--- a/mach/i80/libem/rol4.s
+++ b/mach/i80/libem/rol4.s
@@ -25,8 +25,8 @@
 	mov e,a
 
 	mov a,b
-	ral
-1:	mov a,l
+1:	ral
+	mov a,l
 	ral
 	mov l,a
 	mov a,h
diff --git a/mach/i80/libem/ror4.s b/mach/i80/libem/ror4.s
index e77d8a74c..490c75abc 100644
--- a/mach/i80/libem/ror4.s
+++ b/mach/i80/libem/ror4.s
@@ -25,8 +25,8 @@
 	mov e,a
 
 	mov a,l
-	rar
-1:	mov a,b
+1:	rar
+	mov a,b
 	rar
 	mov b,a
 	mov a,c
diff --git a/mach/i80/ncg/table b/mach/i80/ncg/table
index 575820c81..e6d7e02f6 100644
--- a/mach/i80/ncg/table
+++ b/mach/i80/ncg/table
@@ -385,8 +385,9 @@ gen dad de
 
 pat loi $1>=512
 kills ALL
-uses dereg={const2,$1}
-gen Call {label,".loi"}
+/* 'uses dereg={const2,$1}' fails to kill de. */
+gen lxi de,{const2,$1}
+    Call {label,".loi"}
 
 pat los $1==2
 with dereg
@@ -597,8 +598,8 @@ gen 1:
 
 pat sti
 kills ALL
-uses dereg={const2,$1}
-gen Call {label,".sti"}
+gen lxi de,{const2,$1}
+    Call {label,".sti"}
 
 pat sts $1==2
 with dereg
@@ -702,23 +703,24 @@ gen Call {label,".mli4"}
 
 pat dvi $1==2
 kills ALL
-uses areg={const1,129}
-gen Call {label,".dvi2"}		yields de
+/* 'uses areg={const1,129}' fails to kill a. */
+gen mvi a,{const1,129}
+    Call {label,".dvi2"}		yields de
 
 pat dvi $1==4
 kills ALL
-uses areg={const1,129}
-gen Call {label,".dvi4"}
+gen mvi a,{const1,129}
+    Call {label,".dvi4"}
 
 pat rmi $1==2
 kills ALL
-uses areg={const1,128}
-gen Call {label,".dvi2"}		yields de
+gen mvi a,{const1,128}
+    Call {label,".dvi2"}		yields de
 
 pat rmi $1==4
 kills ALL
-uses areg={const1,128}
-gen Call {label,".dvi4"}
+gen mvi a,{const1,128}
+    Call {label,".dvi4"}
 
 pat ngi $1==2
 with hl_or_de
@@ -738,7 +740,7 @@ pat loc sli ($1 == 8) && ($2 == 2)
 with hl_or_de
 gen move %1.2, %1.1
     mvi %1.2, {const1,0}        yields %1
-    
+
 pat sli $1==2
 kills ALL
 gen Call {label,".sli2"}		yields de
@@ -749,13 +751,13 @@ gen Call {label,".sli4"}
 
 pat sri $1==2
 kills ALL
-uses areg={const1,1}
-gen Call {label,".sri2"}		yields de
+gen mvi a,{const1,1}
+    Call {label,".sri2"}		yields de
 
 pat sri $1==4
 kills ALL
-uses areg={const1,1}
-gen Call {label,".sri4"}
+gen mvi a,{const1,1}
+    Call {label,".sri4"}
 
 /********************************************/
 /* Group 4: Unsigned arithmetic		    */
@@ -775,23 +777,23 @@ gen Call {label,".mli4"}
 
 pat dvu $1==2
 kills ALL
-uses areg={const1,1}
-gen Call {label,".dvi2"}		yields de
+gen mvi a,{const1,1}
+    Call {label,".dvi2"}		yields de
 
 pat dvu $1==4
 kills ALL
-uses areg={const1,1}
-gen Call {label,".dvi4"}
+gen mvi a,{const1,1}
+    Call {label,".dvi4"}
 
 pat rmu $1==2
 kills ALL
-uses areg={const1,0}
-gen Call {label,".dvi2"}		yields de
+gen mvi a,{const1,0}
+    Call {label,".dvi2"}		yields de
 
 pat rmu $1==4
 kills ALL
-uses areg={const1,0}
-gen Call {label,".dvi4"}
+gen mvi a,{const1,0}
+    Call {label,".dvi4"}
 
 pat slu						leaving sli $1
 
@@ -799,16 +801,16 @@ pat loc sru ($1 == 8) && ($2 == 2)
 with hl_or_de
 gen move %1.1, %1.2
     mvi %1.1, {const1,0}        yields %1
-    
+
 pat sru $1==2
 kills ALL
-uses areg={const1,0}
-gen Call {label,".sri2"}		yields de
+gen mvi a,{const1,0}
+    Call {label,".sri2"}		yields de
 
 pat sru $1==4
 kills ALL
-uses areg={const1,0}
-gen Call {label,".sri4"}
+gen mvi a,{const1,0}
+    Call {label,".sri4"}
 
 
 /********************************************/
@@ -1047,8 +1049,8 @@ with hlreg
 
 pat cii
 kills ALL
-uses areg={const1,1}
-gen Call {label,".cii"}
+gen mvi a,{const1,1}
+    Call {label,".cii"}
 
 pat loc loc ciu					leaving loc $1 loc $2 cuu
 pat loc loc cui					leaving loc $1 loc $2 cuu
@@ -1081,8 +1083,8 @@ with hl_or_de
 
 pat cuu
 kills ALL
-uses areg={const1,0}
-gen Call {label,".cii"}
+gen mvi a,{const1,0}
+    Call {label,".cii"}
 
 pat cfi
 kills ALL
@@ -1128,8 +1130,8 @@ gen mov a,%1.2
 
 pat and defined($1)
 kills ALL
-uses dereg={const2,$1}
-gen Call {label,".and"}
+gen lxi de,{const2,$1}
+    Call {label,".and"}
 
 pat and !defined($1)
 with dereg
@@ -1156,8 +1158,8 @@ gen mov a,%1.2
 
 pat ior defined($1)
 kills ALL
-uses dereg={const2,$1}
-gen Call {label,".ior"}
+gen lxi de,{const2,$1}
+    Call {label,".ior"}
 
 pat ior !defined($1)
 with dereg
@@ -1184,8 +1186,8 @@ gen mov a,%1.2
 
 pat xor defined($1)
 kills ALL
-uses dereg={const2,$1}
-gen Call {label,".xor"}
+gen lxi de,{const2,$1}
+    Call {label,".xor"}
 
 pat xor !defined($1)
 with dereg
@@ -1204,8 +1206,8 @@ gen mov a,%1.2
 
 pat com defined($1)
 kills ALL
-uses dereg={const2,$1}
-gen Call {label,".com"}
+gen lxi de,{const2,$1}
+    Call {label,".com"}
 
 pat com !defined($1)
 with dereg
@@ -1269,8 +1271,8 @@ gen Call {label,".inn2"}		yields de
 
 pat inn defined($1)
 kills ALL
-uses dereg={const2,$1}
-gen Call {label,".inn"}			yields de
+gen lxi de,{const2,$1}
+    Call {label,".inn"}			yields de
 
 pat inn !defined($1)
 with dereg
@@ -1284,8 +1286,8 @@ gen Call {label,".set2"}		yields de
 
 pat set defined($1)
 kills ALL
-uses dereg={const2,$1}
-gen Call {label,".set"}
+gen lxi de,{const2,$1}
+    Call {label,".set"}
 
 pat set !defined($1)
 with dereg
@@ -1402,8 +1404,8 @@ pat cmi $1==2					leaving sbi 2
 
 pat cmi $1==4
 kills ALL
-uses areg={const1,1}
-gen Call {label,".cmi4"}		yields de
+gen mvi a,{const1,1}
+    Call {label,".cmi4"}		yields de
 
 pat cmf $1==4
 kills ALL
@@ -1412,14 +1414,14 @@ gen Call {label,".cmf4"}
 pat cmf $1==8
 kills ALL
 gen Call {label,".cmf8"}
- 
+
 pat cmu $1==2
 with hl_or_de hl_or_de
 uses areg
 gen mov a,%2.1
     cmp %1.1
     jz {label,2f}
-    jc {label,1f}  
+    jc {label,1f}
     0:
     lxi %2,{const2,1}
     jmp {label,3f}
@@ -1436,15 +1438,15 @@ gen mov a,%2.1
 
 pat cmu $1==4
 kills ALL
-uses areg={const1,0}
-gen Call {label,".cmi4"}		yields de
+gen mvi a,{const1,0}
+    Call {label,".cmi4"}		yields de
 
 pat cms $1==2					leaving cmi 2
 
 pat cms defined($1)
 kills ALL
-uses dereg={const2,$1}
-gen Call {label,".cms"}			yields de
+gen lxi de,{const2,$1}
+    Call {label,".cms"}			yields de
 
 pat cms !defined($1)
 with dereg
@@ -1936,8 +1938,8 @@ gen dad sp
 
 pat blm
 kills ALL
-uses dereg={const2,$1}
-gen Call {label,".blm"}
+gen lxi de,{const2,$1}
+    Call {label,".blm"}
 
 pat bls
 with dereg
@@ -1962,8 +1964,8 @@ with src1or2 src1or2			yields %2 %1 %2 %1
 
 pat dup
 kills ALL
-uses dereg={const2,$1}
-gen Call {label,".dup"}
+gen lxi de,{const2,$1}
+    Call {label,".dup"}
 
 pat dus $1==2
 with dereg
@@ -1975,8 +1977,8 @@ with src1or2 src1or2			yields %1 %2
 
 pat exg defined($1)
 kills ALL
-uses dereg={const2,1}
-gen Call {label,".exg"}
+gen lxi de,{const2,$1}
+    Call {label,".exg"}
 
 pat fil
 uses hlreg={label,$1}
diff --git a/mach/i86/ncg/table b/mach/i86/ncg/table
index ffbd7101e..ce2ac7b87 100644
--- a/mach/i86/ncg/table
+++ b/mach/i86/ncg/table
@@ -2292,7 +2292,7 @@ with CXREG REG REG
       rcl %3,{ANYCON,1}
       adc %2,{ANYCON,0}
       loop {label, 2b}
-      1:
+      1:				yields %3 %2
 
 pat loc ror $1==1 && $2==2
 with REG
@@ -2311,7 +2311,7 @@ with CXREG REG REG
       rcl %3,{ANYCON,1}
       adc %2,{ANYCON,0}
       loop {label, 2b}
-      1:
+      1:				yields %3 %2
 
 /*******************************************************************
  *  Group 10 : Set Instructions                                    *
diff --git a/mach/powerpc/as/mach2.c b/mach/powerpc/as/mach2.c
index 4065334e6..e8e61ea0c 100644
--- a/mach/powerpc/as/mach2.c
+++ b/mach/powerpc/as/mach2.c
@@ -47,11 +47,15 @@
 %token <y_word> OP_FRT_FRB_C
 %token <y_word> OP_FRT_RA_D
 %token <y_word> OP_FRT_RA_RB
+%token <y_word> OP_L
 %token <y_word> OP_LEV
 %token <y_word> OP_LIA
 %token <y_word> OP_LIL
 %token <y_word> OP_LI32
+%token <y_word> OP_RA_RB
+%token <y_word> OP_RA_RB_TH
 %token <y_word> OP_RA_RS_C
+%token <y_word> OP_RA_RS_RA_C
 %token <y_word> OP_RA_RS_RB_C
 %token <y_word> OP_RA_RS_RB_MB5_ME5_C
 %token <y_word> OP_RA_RS_RB_MB6_C
@@ -61,14 +65,14 @@
 %token <y_word> OP_RA_RS_SH6_MB6_C
 %token <y_word> OP_RA_RS_UI
 %token <y_word> OP_RA_RS_UI_CC
+%token <y_word> OP_RS
 %token <y_word> OP_RS_FXM
 %token <y_word> OP_RS_RA
 %token <y_word> OP_RS_RA_D
 %token <y_word> OP_RS_RA_DS
 %token <y_word> OP_RS_RA_NB
 %token <y_word> OP_RS_RA_RB
-%token <y_word> OP_RS_RA_RB_C
-%token <y_word> OP_RS_RA_RA_C
+%token <y_word> OP_RS_RA_RB_CC
 %token <y_word> OP_RS_RB
 %token <y_word> OP_RS_SPR
 %token <y_word> OP_RS_SR
@@ -104,4 +108,5 @@
 
 %type <y_word> c
 %type <y_word> e16 negate16 u8 u7 u6 u5 u4 u2 u1
-%type <y_word> opt_bh cr_opt nb ds bda bdl lia lil spr_num
+%type <y_word> opt_bh cr_opt nb ds bda bdl lia lil
+%type <y_word> spr_num tbr_num opt_tbr
diff --git a/mach/powerpc/as/mach3.c b/mach/powerpc/as/mach3.c
index 91b088a6a..99507087d 100644
--- a/mach/powerpc/as/mach3.c
+++ b/mach/powerpc/as/mach3.c
@@ -103,6 +103,10 @@
 0,     OP_HA,                0,                                        "ha16",
 0,     OP_LO,                 0,                                       "lo16",
 
+/* The next page numbers are from PowerPC User Instruction Set
+ * Architecture, Book I, Version 2.01.
+ */
+
 /* Branch processor instructions (page 20) */
 
 0,     OP_LIL,                18<<26 | 0<<1 | 0<<0,                    "b",
@@ -128,7 +132,7 @@
 0,     OP_BT_BA_BB,           19<<26 | 417<<1,                         "crorc",
 0,     OP_BF_BFA,             19<<26 | 0<<1,                           "mcrf",
 
-/* extended mnemonics for bc, bcctr, bclr */
+/* extended mnemonics for bc, bcctr, bclr (page 144) */
 0,     OP_BH,       19<<26 | 20<<21 | 528<<1 | 0<<0,            "bctr",
 0,     OP_BH,       19<<26 | 20<<21 | 528<<1 | 1<<0,            "bctrl",
 0,     OP_BDL,      16<<26 | 16<<21 | 0<<1 | 0<<0,              "bdnz",
@@ -186,7 +190,7 @@
 0,     OP_BI_BH,    19<<26 | 12<<21 | 16<<1 | 0<<0,             "btlr",
 0,     OP_BI_BH,    19<<26 | 12<<21 | 16<<1 | 1<<0,             "btlrl",
 
-/* extended m with condition in BI */
+/* extended m with condition in BI (page 146) */
 0,     OP_BICR_BDL,  16<<26 | 12<<21 | 2<<16 | 0<<1 | 0<<0,     "beq",
 0,     OP_BICR_BDA,  16<<26 | 12<<21 | 2<<16 | 1<<1 | 0<<0,     "beqa",
 0,     OP_BICR_BH,   19<<26 | 12<<21 | 2<<16 | 528<<1 | 0<<0,   "beqctr",
@@ -284,7 +288,7 @@
 0,     OP_BICR_BH,   19<<26 | 12<<21 | 3<<16 | 16<<1 | 0<<0,    "bunlr",
 0,     OP_BICR_BH,   19<<26 | 12<<21 | 3<<16 | 16<<1 | 1<<0,    "bunlrl",
 
-/* extended m for cr logic */
+/* extended m for cr logic (page 147) */
 0,     OP_BT_BT_BT,  19<<26 | 289<<1,                           "crset",
 0,     OP_BT_BT_BT,  19<<26 | 193<<1,                           "crclr",
 0,     OP_BT_BA_BA,  19<<26 | 449<<1,                           "crmove",
@@ -377,12 +381,12 @@
 0,     OP_RT_RA_C,            31<<26 | 0<<10 | 104<<1,                 "neg",
 0,     OP_RT_RA_C,            31<<26 | 1<<10 | 104<<1,                 "nego",
 
-/* extended m for addition */
+/* extended m for addition (pages 153, 154) */
 0,     OP_RT_RA_D,            14<<26,                       "la",
 0,     OP_RT_SI,              14<<26 | 0<<16,               "li",
 0,     OP_RT_SI,              15<<26 | 0<<16,               "lis",
 
-/* extended m for subtraction */
+/* extended m for subtraction (pages 147, 148) */
 0,     OP_RT_RB_RA_C,         31<<26 | 0<<10 | 40<<1,       "sub",
 0,     OP_RT_RB_RA_C,         31<<26 | 1<<10 | 40<<1,       "subo",
 0,     OP_RT_RB_RA_C,         31<<26 | 0<<10 | 8<<1,        "subc",
@@ -418,7 +422,7 @@
 0,     OP_BF_L_RA_UI,         10<<26,                                  "cmpli",
 0,     OP_BF_L_RA_RB,         31<<26 | 32<<1,                          "cmpl",
 
-/* extended m for comparison */
+/* extended m for comparison (page 149) */
 0,     OP_BF_RA_SI,           11<<26 | 1<<21,               "cmpdi",
 0,     OP_BF_RA_RB,           31<<26 | 1<<21 | 0<<1,        "cmpd",
 0,     OP_BF_RA_UI,           10<<26 | 1<<21,               "cmpldi",
@@ -434,7 +438,7 @@
 0,     OP_TO_RA_RB,           31<<26 | 68<<1,                          "td",
 0,     OP_TO_RA_RB,           31<<26 | 4<<1,                           "tw",
 
-/* extended m for traps */
+/* extended m for traps (page 150) */
 0,     OP_TOX_RA_RB,          31<<26 | 4<<21 | 68<<1,       "tdeq",
 0,     OP_TOX_RA_SI,          2<<26 | 4<<21,                "tdeqi",
 0,     OP_TOX_RA_RB,          31<<26 | 12<<21 | 68<<1,      "tdge",
@@ -518,11 +522,10 @@
 0,     OP_RA_RS_C,            31<<26 | 58<<1,                          "cntlzd",
 0,     OP_RA_RS_C,            31<<26 | 26<<1,                          "cntlzw",
 
-/* extended m using logic */
-0,     OP_RS_RA_RA_C,         31<<26 | 444<<1,              "mr",
+/* extended m using logic (pages 153, 154) */
+0,     OP_RA_RS_RA_C,         31<<26 | 444<<1,              "mr",
 0,     OP,                    24<<26,                       "nop",
-0,     OP_RS_RA_RA_C,         31<<26 | 124<<1,              "not",
-0,     OP,                    26<<26,                       "xnop",
+0,     OP_RA_RS_RA_C,         31<<26 | 124<<1,              "not",
 
 /* page 69 */
 0,     OP_RA_RS_SH6_MB6_C,     30<<26 | 0<<2,               "rldicl",
@@ -535,7 +538,7 @@
 0,     OP_RA_RS_SH6_MB6_C,     30<<26 | 3<<2,               "rldimi",
 0,     OP_RA_RS_SH5_MB5_ME5_C, 20<<26,                      "rlwimi",
 
-/* extended m for doubleword rotation */
+/* extended m for doubleword rotation (page 151) */
 0,     OP_clrlsldi,           30<<26 | 2<<2,                "clrlsldi",
 0,     OP_clrldi,             30<<26 | 0<<2,                "clrldi",
 0,     OP_clrrdi,             30<<26 | 1<<2,                "clrrdi",
@@ -548,7 +551,7 @@
 0,     OP_sldi,               30<<26 | 1<<2,                "sldi",
 0,     OP_srdi,               30<<26 | 0<<2,                "srdi",
 
-/* extended m for word rotation */
+/* extended m for word rotation (page 152) */
 0,     OP_clrlslwi,           21<<26,                       "clrlslwi",
 0,     OP_clrlwi,             21<<26,                       "clrlwi",
 0,     OP_clrrwi,             21<<26,                       "clrrwi",
@@ -573,21 +576,25 @@
 0,     OP_RA_RS_RB_C,         31<<26 | 792<<1,              "sraw",
 
 /* page 78 */
-0,     OP_RS_SPR,             31<<26 | 467<<1,                         "mtspr",
-0,     OP_RT_SPR,             31<<26 | 339<<1,                         "mfspr",
-0,     OP_RS_FXM,             31<<26 | 0<<21 | 144<<1,                 "mtcrf",
-0,     OP_RT,                 31<<26 | 0<<21 | 19<<1,                  "mfcr",
+0,     OP_RS_SPR,             31<<26 | 467<<1,              "mtspr",
+0,     OP_RT_SPR,             31<<26 | 339<<1,              "mfspr",
+0,     OP_RS_FXM,             31<<26 | 0<<20 | 144<<1,      "mtcrf",
+0,     OP_RT,                 31<<26 | 0<<20 | 19<<1,       "mfcr",
 
-/* extended m for special purpose registers */
+/* extended m for special purpose registers (page 153) */
 0,     OP_RT,       31<<26 | 9<<16 | 0<<11 | 339<<1,        "mfctr",
 0,     OP_RT,       31<<26 | 8<<16 | 0<<11 | 339<<1,        "mflr",
 0,     OP_RT,       31<<26 | 1<<16 | 0<<11 | 339<<1,        "mfxer",
-0,     OP_RT,       31<<26 | 9<<16 | 0<<11 | 467<<1,        "mtctr",
-0,     OP_RT,       31<<26 | 8<<16 | 0<<11 | 467<<1,        "mtlr",
-0,     OP_RT,       31<<26 | 1<<16 | 0<<11 | 467<<1,        "mtxer",
+0,     OP_RS,       31<<26 | 9<<16 | 0<<11 | 467<<1,        "mtctr",
+0,     OP_RS,       31<<26 | 8<<16 | 0<<11 | 467<<1,        "mtlr",
+0,     OP_RS,       31<<26 | 1<<16 | 0<<11 | 467<<1,        "mtxer",
+
+/* extended m for condition register (page 154) */
+0,     OP_RS,       31<<26 | 0<<20 | 255<<12 | 144<<1,      "mtcr",
 
 /* Floating point instructions (page 83) */
 
+/* page 98 */
 0,     OP_FRT_RA_D,           48<<26,                                  "lfs",
 0,     OP_FRT_RA_RB,          31<<26 | 535<<1,                         "lfsx",
 0,     OP_FRT_RA_D,           49<<26,                                  "lfsu",
@@ -606,6 +613,7 @@
 0,     OP_FRS_RA_RB,          31<<26 | 759<<1,                         "stfdux",
 0,     OP_FRS_RA_RB,          31<<26 | 983<<1,                         "stfiwx",
 
+/* page 104 */
 0,     OP_FRT_FRB_C,          63<<26 | 72<<1,                          "fmr",
 0,     OP_FRT_FRB_C,          63<<26 | 40<<1,                          "fneg",
 0,     OP_FRT_FRB_C,          63<<26 | 264<<1,                         "fabs",
@@ -629,6 +637,7 @@
 0,     OP_FRT_FRA_FRC_FRB_C,  63<<26 | 30<<1,                          "fnmsub",
 0,     OP_FRT_FRA_FRC_FRB_C,  59<<26 | 30<<1,                          "fnmsubs",
 
+/* page 109 */
 0,     OP_FRT_FRB_C,          63<<26 | 12<<1,                          "frsp",
 0,     OP_FRT_FRB_C,          63<<26 | 814<<1,                         "fctid",
 0,     OP_FRT_FRB_C,          63<<26 | 815<<1,                         "fctidz",
@@ -652,4 +661,31 @@
 0,     OP_FRT_FRB_C,          63<<26 | 26<<1,                          "frsqrte",
 0,     OP_FRT_FRA_FRC_FRB_C,  63<<26 | 23<<1,                          "fsel",
 
-/* page 98 */
+/* Storage control instructions (Book II, page 15) */
+
+/* Book II, page 17 */
+0,     OP_RA_RB,              31<<26 | 982<<1,              "icbi",
+0,     OP_RA_RB_TH /* page 35 */,          31<<26 | 278<<1, "dcbt",
+0,     OP_RA_RB,              31<<26 | 246<<1,              "dcbtst",
+0,     OP_RA_RB,              31<<26 | 1014<<1,             "dcbz",
+0,     OP_RA_RB,              31<<26 | 54<<1,               "dcbst",
+0,     OP_RA_RB,              31<<26 | 86<<1,               "dcbf",
+0,     OP,                    19<<26 | 150<<1,              "isync",
+0,     OP_RT_RA_RB,           31<<26 | 20<<1,               "lwarx",
+0,     OP_RT_RA_RB,           31<<26 | 84<<1,               "ldarx",
+0,     OP_RS_RA_RB_CC,        31<<26 | 150<<1 | 1<<0,       "stwcx",
+0,     OP_RS_RA_RB_CC,        31<<26 | 150<<1 | 1<<0,       "stdcx",
+0,     OP_L,                  31<<26 | 598<<1,              "sync",
+0,     OP,                    31<<26 | 1<<21 | 598<<1,      "lwsync",
+0,     OP,                    31<<26 | 2<<21 | 598<<1,      "ptesync",
+0,     OP,                    31<<26 | 854<<1,              "eieio",
+
+/* Time base (Book II, page 30) */
+
+0,     OP_RT_TBR,   31<<26 | 371<<1,                        "mftb",
+0,     OP_RT,       31<<26 | 8<<11 | 13<<16 | 371<<1,       "mftbu",
+
+/* External control (Book II, page 33) */
+
+0,     OP_RT_RA_RB,           31<<26 | 310<<1,              "eciwx",
+0,     OP_RS_RA_RB,           31<<26 | 438<<1,              "ecowx",
diff --git a/mach/powerpc/as/mach4.c b/mach/powerpc/as/mach4.c
index 8a0cca9de..b344ba8ce 100644
--- a/mach/powerpc/as/mach4.c
+++ b/mach/powerpc/as/mach4.c
@@ -42,7 +42,23 @@ operation
 	| OP_FRT_RA_D          FPR ',' e16 '(' GPR ')'    { emit_hl($1 | ($2<<21) | ($6<<16) | $4); }
 	| OP_FRT_RA_RB         FPR ',' GPR ',' GPR        { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); }
 	| OP_FRT_C             c FPR                      { emit4($1 | $2 | ($3<<21)); }
-	| OP_RA_RS_C           c GPR ',' GPR              { emit4($1 | $2 | ($5<<21) | ($3<<16)); }
+	| OP_L                              { emit4($1); }
+	| OP_L                 u2           { emit4($1 | ($2<<21)); }
+	| OP_LEV                            { emit4($1); }
+	| OP_LEV               u7           { emit4($1 | ($2<<5)); }
+	| OP_RA_RB             GPR ',' GPR
+	{ emit4($1 | ($2<<16) | ($4<<11)); }
+	| OP_RA_RB_TH          GPR ',' GPR opt_bh
+	{ emit4($1 | $5 | ($2<<16) | ($4<<11)); }
+	/*
+	 * For instructions with "mnemonic RS, RA, ..."
+	 * OP_RA_RS_... swaps RS and RA to (RA<<21) || (RS<<16)
+	 * OP_RS_RA_... keeps RS and RA as (RS<<21) || (RA<<16)
+	 */
+	| OP_RA_RS_C           c GPR ',' GPR
+	{ emit4($1 | $2 | ($5<<21) | ($3<<16)); }
+	| OP_RA_RS_RA_C        c GPR ',' GPR
+	{ emit4($1 | $2 | ($5<<21) | ($3<<16) | ($5<<11)); }
 	| OP_RA_RS_RB_C        c GPR ',' GPR ',' GPR
 	{ emit4($1 | $2 | ($5<<21) | ($3<<16) | ($7<<11)); }
 	| OP_RA_RS_RB_MB5_ME5_C c GPR ',' GPR ',' GPR ',' u5 ',' u5
@@ -75,20 +91,19 @@ operation
 	| OP_RT_RB_RA_C        c GPR ',' GPR ',' GPR      { emit4($1 | $2 | ($3<<21) | ($7<<16) | ($5<<11)); }
 	| OP_RT_SI             GPR ',' e16                { emit_hl($1 | ($2<<21) | $4); }
 	| OP_RT_SPR            GPR ',' spr_num            { emit4($1 | ($2<<21) | ($4<<11)); }
+	| OP_RT_TBR            GPR opt_tbr                { emit4($1 | ($2<<21) | ($3<<11)); }
+	| OP_RS                GPR                        { emit4($1 | ($2<<21)); }
 	| OP_RS_FXM            u7 ',' GPR                 { emit4($1 | ($4<<21) | ($2<<12)); }
 	| OP_RS_RA_D           GPR ',' e16 '(' GPR ')'    { emit_hl($1 | ($2<<21) | ($6<<16) | $4); }
 	| OP_RS_RA_DS          GPR ',' ds '(' GPR ')'     { emit_hl($1 | ($2<<21) | ($6<<16) | $4); }
 	| OP_RS_RA_NB          GPR ',' GPR ',' nb         { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); }
 	| OP_RS_RA_RB          GPR ',' GPR ',' GPR        { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); }
-	| OP_RS_RA_RB_C        c GPR ',' GPR ',' GPR      { emit4($1 | $2 | ($5<<21) | ($3<<16) | ($7<<11)); }
-	| OP_RS_RA_RA_C        c GPR ',' GPR              { emit4($1 | $2 | ($5<<21) | ($3<<16) | ($5<<11)); }
+	| OP_RS_RA_RB_CC       C GPR ',' GPR ',' GPR      { emit4($1 | ($3<<21) | ($5<<16) | ($7<<11)); }
 	| OP_RS_SPR            spr_num ',' GPR            { emit4($1 | ($4<<21) | ($2<<11)); }
 	| OP_TO_RA_RB          u5 ',' GPR ',' GPR         { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); }
 	| OP_TO_RA_SI          u5 ',' GPR ',' e16         { emit_hl($1 | ($2<<21) | ($4<<16) | $6); }
 	| OP_TOX_RA_RB         GPR ',' GPR                { emit4($1 | ($2<<16) | ($4<<11)); }
 	| OP_TOX_RA_SI         GPR ',' e16                { emit_hl($1 | ($2<<16) | $4); }
-	| OP_LEV                                          { emit4($1); }
-	| OP_LEV               u7                         { emit4($1 | ($2<<5)); }
 	| OP_LIA               lia                        { emit4($1 | $2); }
 	| OP_LIL               lil                        { emit4($1 | $2); }
 	| OP_LI32              li32                       /* emitted in subrule */
@@ -298,7 +313,7 @@ u2
 	}
 	;
 
-/* Optional comma, branch hint. */
+/* Optional comma, branch hint (or touch hint). */
 opt_bh
 	: /* nothing */         { $$ = 0; }
 	| ',' u2                { $$ = ($2<<11); }
@@ -409,13 +424,28 @@ lia
 	}
 	;
 
+/*
+ * Instructions "mfspr", "mtspr", and "mftb" encode the 10-bit special
+ * purpose register (spr) or time base register (tbr) by swapping the
+ * low 5 bits with the high 5 bits.  The value from an SPR token has
+ * already been swapped.
+ */
+
 spr_num
-	: SPR { $$ = $1; }
-	| absexp
+	: SPR     { $$ = $1; }
+	| tbr_num { $$ = $1; }
+	;
+
+opt_tbr
+	: /* nothing */         { $$ = 8 | (12<<5); }
+	| ',' tbr_num           { $$ = $2; }
+	;
+
+tbr_num
+	: absexp
 	{
 		if (($1 < 0) || ($1 > 0x3ff))
-			serror("spr number out of range");
-		/* mfspr, mtspr swap the low and high 5 bits */
+			serror("10-bit unsigned value out of range");
 		$$ = ($1 >> 5) | (($1 & 0x1f) << 5);
 	}
 	;
diff --git a/mach/powerpc/libem/aar4.s b/mach/powerpc/libem/aar4.s
index fc8620d02..08390b081 100644
--- a/mach/powerpc/libem/aar4.s
+++ b/mach/powerpc/libem/aar4.s
@@ -8,21 +8,17 @@
 
 .define .aar4
 .aar4:
-	lis r0, hi16[.trap_earray]
-	ori r0, r0, lo16[.trap_earray]
-	mtspr ctr, r0            ! load CTR with trap address
-
 	lwz r4, 0(sp)            ! r4 = address of descriptor
 	lwz r5, 4(sp)            ! r5 = index
 	lwz r6, 8(sp)            ! r6 = address of array
 
 	lwz r0, 0(r4)
 	subf. r5, r0, r5         ! subtract lower bound from index
-	bltctr                   ! check lower bound
+	blt .trap_earray         ! check lower bound
 
 	lwz r0, 4(r4)
 	cmplw r5, r0
-	bgtctr                   ! check upper bound
+	bgt .trap_earray         ! check upper bound
 
 	lwz r3, 8(r4)            ! r3 = size of element
 	mullw r5, r5, r3         ! scale index by size
@@ -30,3 +26,7 @@
 	stw r6, 8(sp)            ! push address of element
 	addi sp, sp, 8
 	blr
+
+.trap_earray:
+	li r3, 0                 ! EARRAY = 0 in h/em_abs.h
+	b .trp
diff --git a/mach/powerpc/libem/bls4.s b/mach/powerpc/libem/bls4.s
new file mode 100644
index 000000000..a36faca68
--- /dev/null
+++ b/mach/powerpc/libem/bls4.s
@@ -0,0 +1,19 @@
+.sect .text
+
+! Does a block move of words between non-overlapping buffers.
+!  Stack: ( src dst len -- )
+
+.define .bls4
+.bls4:
+	lwz	r3, 0(sp)	! len
+	lwz	r4, 4(sp)	! dst
+	lwz	r5, 8(sp)	! src
+	addi	sp, sp, 12
+	srwi	r3, r3, 2
+	mtspr	ctr, r3
+	addi	r5, r5, -4
+	addi	r4, r4, -4
+1:	lwzu	r3, 4(r5)
+	stwu	r3, 4(r4)
+	bdnz	1b
+	blr
diff --git a/mach/powerpc/libem/build.lua b/mach/powerpc/libem/build.lua
index 16a03147e..5ed9b52e8 100644
--- a/mach/powerpc/libem/build.lua
+++ b/mach/powerpc/libem/build.lua
@@ -6,7 +6,7 @@ for _, plat in ipairs(vars.plats) do
 	acklibrary {
 		name = "lib_"..plat,
 		srcs = {
-			"./*.s", -- rm ret.s
+			"./*.s", -- dus4.s
 		},
 		vars = { plat = plat },
 		deps = {
@@ -15,4 +15,3 @@ for _, plat in ipairs(vars.plats) do
 		}
 	}
 end
-
diff --git a/mach/powerpc/libem/cfu8.s b/mach/powerpc/libem/cfu8.s
index 915f84dd2..710d2a65c 100644
--- a/mach/powerpc/libem/cfu8.s
+++ b/mach/powerpc/libem/cfu8.s
@@ -1,3 +1,5 @@
+.sect .text; .sect .rom; .sect .data; .sect .bss
+
 .sect .text
 
 ! Converts a 64-bit double into a 32-bit unsigned integer.
@@ -6,32 +8,42 @@
 
 .define .cfu8
 .cfu8:
-	lis r3, ha16[.fd_00000000]
-	lfd f0, lo16[.fd_00000000](r3) ! f0 = 0.0
-
-	lfd f1, 0(sp)            ! value to be converted
-
-	lis r3, ha16[.fd_FFFFFFFF]
-	lfd f3, lo16[.fd_FFFFFFFF](r3) ! f3 = 0xFFFFFFFF
-
-	lis r3, ha16[.fd_80000000]
-	lfd f4, lo16[.fd_80000000](r3) ! f4 = 0x80000000
-
-	fsel f2, f1, f1, f0
-	fsub f5, f3, f1
-	fsel f2, f5, f2, f3
-	fsub f5, f2, f4
-	fcmpu cr0, f2, f4
-	fsel f2, f5, f5, f2
-	fctiwz f2, f2
-	
-	stfd f2, 0(sp)
-	addi sp, sp, 4
-
-	bltlr
-
-	lwz r3, 0(sp)
-	xoris r3, r3, 0x8000
-	stw r3, 0(sp)
-
+	lfd f1, 0(sp)                   ! f1 = value to convert
+	lis r3, ha16[.fs_80000000]
+	lfs f2, lo16[.fs_80000000](r3)  ! f2 = 2**31
+	fsub   f1, f1, f2
+	fctiwz f1, f1         ! convert value - 2**31
+	stfd   f1, 0(sp)
+	lwz   r3, 4(sp)
+	xoris r3, r3, 0x8000  ! add 2**31
+	stw   r3, 4(sp)
+	addi  sp, sp, 4
 	blr
+
+.sect .rom
+.fs_80000000:
+	!float 2.147483648e+9 sz 4
+	.data1 0117,00,00,00
+
+! Freescale and IBM provide an example using fsel to select value or
+! value - 2**31 for fctiwz.  The following code adapts Freescale's
+! _Programming Environments Manual for 32-Bit Implementations of the
+! PowerPC Architecture_, section C.3.2, pdf page 557.
+!
+! Given f2 = value clamped from 0 to 2**32 - 1, f4 = 2**31, then
+!	fsub	f5, f2, f4
+!	fcmpu	cr2, f2, f4
+!	fsel	f2, f5, f5, f2
+!	fctiwz	f2, f2
+!	stfdu	f2, 0(sp)
+!	lwz	r3, 4(sp)
+!	blt	cr2, 1f
+!	xoris	r3, r3, 0x8000
+! 1: yields r3 = the converted value.
+!
+! Debian's clang 3.5.0-10 and gcc 4.9.2-10 don't clamp the value
+! before conversion.  They avoid fsel and use the conditional branch
+! to pick between 2 fctwiz instructions.
+!
+! PowerPC 601 lacks fsel (but kernel might trap and emulate fsel).
+! PowerPC 603, 604, G3, G4, G5 have fsel.
diff --git a/mach/powerpc/libem/csa.s b/mach/powerpc/libem/csa.s
index 3898241c4..86d792554 100644
--- a/mach/powerpc/libem/csa.s
+++ b/mach/powerpc/libem/csa.s
@@ -13,22 +13,21 @@
 	lwz r4, 4(sp)
 	addi sp, sp, 8
 
-	lwz r5, 0(r3)            ! load default
-	mtspr ctr, r5
-	
-	lwz r5, 4(r3)            ! fetch lower bound
-	subf. r4, r5, r4         ! adjust value
-	bltctr                   ! jump to default if out of range
+	lwz r5, 0(r3)            ! r5 = default target
 
-	lwz r5, 8(r3)            ! fetch range
-	cmplw r4, r5
-	bgtctr                   ! jump to default if out of range
+	lwz r6, 4(r3)            ! fetch lower bound
+	subf. r4, r6, r4         ! adjust value
+	blt 1f                   ! jump to default if out of range
+
+	lwz r6, 8(r3)            ! fetch range
+	cmplw r4, r6
+	bgt 1f                   ! jump to default if out of range
 
 	addi r3, r3, 12          ! skip header
 	slwi r4, r4, 2           ! scale value (<<2)
-	lwzx r5, r3, r4          ! load target
-	mtspr ctr, r5
+	lwzx r5, r3, r4          ! r5 = new target
 
-	or. r5, r5, r5           ! test it
+1:	mtspr ctr, r5
+	mr. r5, r5               ! test it
 	bnectr                   ! jump to target if non-zero
 	b .trap_ecase            ! otherwise trap
diff --git a/mach/powerpc/libem/csb.s b/mach/powerpc/libem/csb.s
index 571bfc210..92c6d096d 100644
--- a/mach/powerpc/libem/csb.s
+++ b/mach/powerpc/libem/csb.s
@@ -13,23 +13,20 @@
 	lwz r4, 4(sp)
 	addi sp, sp, 8
 
-	lwz r5, 0(r3)            ! load default
-	mtspr ctr, r5
+	lwz r5, 0(r3)            ! r5 = default target
 
 	lwz r6, 4(r3)            ! fetch count
-
-1:
-	or. r6, r6, r6           ! test count
-	beqctr                   ! exit if zero
-	addi r6, r6, -1          ! otherwise decrement
-
-	lwzu r7, 8(r3)           ! fetch target index, increment pointer
+	mr. r6, r6               ! skip loop if count is zero
+	beq 3f                   !   (needed by Modula-2 "CASE i OF END")
+	mtspr ctr, r6
+1:	lwzu r7, 8(r3)           ! fetch target index, increment pointer
 	cmpw r4, r7              ! compare with value
-	bne 1b                   ! if not equal, go again
+	beq 2f
+	bdnz 1b                  ! if not equal, go again
+	b 3f
 
-	lwz r7, 4(r3)            ! fetch target address
-	mtspr ctr, r7
-
-	or. r7, r7, r7           ! test it
+2:	lwz r5, 4(r3)            ! r5 = new target
+3:	mtspr ctr, r5
+	mr. r5, r5               ! test target
 	bnectr                   ! jump to target if non-zero
 	b .trap_ecase            ! otherwise trap
diff --git a/mach/powerpc/libem/dus4.s b/mach/powerpc/libem/dus4.s
new file mode 100644
index 000000000..9c751947a
--- /dev/null
+++ b/mach/powerpc/libem/dus4.s
@@ -0,0 +1,16 @@
+.sect .text
+
+! Duplicates some words on top of stack.
+!  Stack: ( a size -- a a )
+
+.define .dus4
+.dus4:
+	lwz	r3, 0(sp)
+	addi	sp, sp, 4
+	srwi	r4, r3, 2
+	mtspr	ctr, r4
+	add	r5, sp, r3
+1:	lwzu	r4, -4(r5)
+	stwu	r4, -4(sp)
+	bdnz	1b
+	blr
diff --git a/mach/powerpc/libem/exg.s b/mach/powerpc/libem/exg.s
new file mode 100644
index 000000000..eb631b697
--- /dev/null
+++ b/mach/powerpc/libem/exg.s
@@ -0,0 +1,22 @@
+.sect .text
+
+! Exchange top two values on stack.
+!   Stack: ( a b size -- b a )
+
+.define .exg
+.exg:
+	lwz	r3, 0(sp)		! r3 = size
+	srwi	r7, r3, 2
+	mtspr	ctr, r7			! ctr = size / 4
+	mr	r4, sp			! r4 = pointer before value b
+	add	r5, r4, r3		! r5 = pointer before value a
+
+	! Loop to swap each pair of words.
+1:	lwzu	r6, 4(r4)
+	lwzu	r7, 4(r5)
+	stw	r6, 0(r5)
+	stw	r7, 0(r4)
+	bdnz	1b			! loop ctr times
+
+	addi	sp, sp, 4		! drop size from stack
+	blr
diff --git a/mach/powerpc/libem/fd_80000000.s b/mach/powerpc/libem/fd_80000000.s
deleted file mode 100644
index 5c153bba8..000000000
--- a/mach/powerpc/libem/fd_80000000.s
+++ /dev/null
@@ -1,10 +0,0 @@
-.sect .text; .sect .rom; .sect .data; .sect .bss
-
-.sect .rom
-
-! Contains a handy double-precision 0x80000000.
-
-.define .fd_80000000
-.fd_80000000:
-	!float 2.147483648e+9 sz 8
-	.data1 0101,0340,00,00,00,00,00,00
diff --git a/mach/powerpc/libem/fd_FFFFFFFF.s b/mach/powerpc/libem/fd_FFFFFFFF.s
deleted file mode 100644
index 88cf04bd9..000000000
--- a/mach/powerpc/libem/fd_FFFFFFFF.s
+++ /dev/null
@@ -1,10 +0,0 @@
-.sect .text; .sect .rom; .sect .data; .sect .bss
-
-.sect .rom
-
-! Contains a handy double-precision 0xFFFFFFFF.
-
-.define .fd_FFFFFFFF
-.fd_FFFFFFFF:
-	!float 4.294967295e+9 sz 8
-	.data1 0101,0357,0377,0377,0377,0340,00,00
diff --git a/mach/powerpc/libem/fef4.s b/mach/powerpc/libem/fef4.s
new file mode 100644
index 000000000..a338ed0a9
--- /dev/null
+++ b/mach/powerpc/libem/fef4.s
@@ -0,0 +1,48 @@
+.sect .text
+
+! Split a single-precision float into fraction and exponent, like
+! frexpf(3) in C, http://en.cppreference.com/w/c/numeric/math/frexp
+!
+! Stack: ( single -- fraction exponent )
+
+.define .fef4
+.fef4:
+	lwz r3, 0(sp)			! r3 = word of float bits
+
+	! IEEE single = sign * 1.fraction * 2**(exponent - 127)
+	!   sign  exponent  fraction
+	!   0     1..8      9..31
+	!
+	! IEEE exponent = 126 in [0.5, 1) or (-1, -0.5].
+
+	extrwi. r6, r3, 8, 1		! r6 = IEEE exponent
+	beq 3f				! jump if zero or denormalized
+	cmpwi r6, 255
+	addi r5, r6, -126		! r5 = our exponent
+	beq 2f				! jump if infinity or NaN
+	! fall through if normalized
+
+	! Put fraction in [0.5, 1) or (-1, -0.5].
+1:	li r6, 126
+	insrwi r3, r6, 8, 1		! IEEE exponent = 126
+	! fall through
+
+2:	stw r3, 0(sp)			! push fraction
+	stwu r5, -4(sp)			! push exponent
+	blr
+
+	! Got denormalized number or zero, probably zero.
+	! If zero, then exponent must also be zero.
+3:	extrwi. r6, r3, 23, 9		! r6 = fraction
+	bne 4f				! jump if not zero
+	li r5, 0			! exponent = 0
+	b 2b
+
+	! Got denormalized number = 0.fraction * 2**-126
+4:	cntlzw r5, r6
+	addi r5, r5, -8
+	slw r6, r6, r5			! shift left to make 1.fraction
+	insrwi r3, r6, 23, 9		! set new fraction
+	li r6, -126 + 1
+	subf r5, r5, r6			! r5 = our exponent
+	b 1b
diff --git a/mach/powerpc/libem/fef8.s b/mach/powerpc/libem/fef8.s
index 26a962d8b..aff5ea3b6 100644
--- a/mach/powerpc/libem/fef8.s
+++ b/mach/powerpc/libem/fef8.s
@@ -3,7 +3,7 @@
 .sect .text
 
 ! Split a double-precision float into fraction and exponent, like
-! frexp(3) in C.
+! frexp(3) in C, http://en.cppreference.com/w/c/numeric/math/frexp
 !
 ! Stack: ( double -- fraction exponent )
 
@@ -12,42 +12,41 @@
 	lwz r3, 0(sp)			! r3 = high word (bits 0..31)
 	lwz r4, 4(sp)			! r4 = low word (bits 32..63)
 
-	! IEEE double-precision format:
+	! IEEE double = sign * 1.fraction * 2**(exponent - 1023)
 	!   sign  exponent  fraction
 	!   0     1..11     12..63
 	!
-	! To get fraction in [0.5, 1) or (-1, -0.5], we subtract 1022
-	! from the IEEE exponent.
+	! IEEE exponent = 1022 in [0.5, 1) or (-1, -0.5].
 
 	extrwi. r6, r3, 11, 1		! r6 = IEEE exponent
-	addi r5, r6, -1022		! r5 = our exponent
-	beq 2f				! jump if zero or denormalized
+	beq 3f				! jump if zero or denormalized
 	cmpwi r6, 2047
-	beq 1f				! jump if infinity or NaN
+	addi r5, r6, -1022		! r5 = our exponent
+	beq 2f				! jump if infinity or NaN
 	! fall through if normalized
 
-	! Put fraction in [0.5, 1) or (-1, -0.5] by setting its
-	! IEEE exponent to 1022.
-	rlwinm r3, r3, 0, 12, 0		! clear old exponent
-	oris r3, r3, 1022 << 4		! set new exponent
+	! Put fraction in [0.5, 1) or (-1, -0.5].
+1:	li r6, 1022
+	insrwi r3, r6, 11, 1		! IEEE exponent = 1022
 	! fall through
 
-1:	stw r3, 0(sp)
+2:	stw r3, 0(sp)
 	stw r4, 4(sp)			! push fraction
 	stwu r5, -4(sp)			! push exponent
 	blr
 
-2:	! Got denormalized number or zero, probably zero.
-	extrwi r6, r3, 22, 12
+	! Got denormalized number or zero, probably zero.
+	! If zero, then exponent must also be zero.
+3:	extrwi r6, r3, 20, 12
 	or. r6, r6, r4			! r6 = high|low fraction
-	bne 3f				! jump if not zero
+	bne 4f				! jump if not zero
 	li r5, 0			! exponent = 0
-	b 1b
+	b 2b
 
-3:	! Got denormalized number, not zero.
-	lfd f0, 0(sp)
-	lis r6, ha16[_2_64]
-	lfd f1, lo16[_2_64](r6)
+	! Got denormalized number = 0.fraction * 2**-1022
+4:	lfd f0, 0(sp)
+	lis r6, ha16[.fs_2_64]
+	lfs f1, lo16[.fs_2_64](r6)
 	fmul f0, f0, f1			! multiply it by 2**64
 	stfd f0, 0(sp)
 	lwz r3, 0(sp)
@@ -57,7 +56,6 @@
 	b 1b
 
 .sect .rom
-_2_64:
-	! (double) 2**64
-	.data4 0x43f00000
-	.data4 0x00000000
+.fs_2_64:
+	!float 1.84467440737095516e+19 sz 4
+	.data1 0137,0200,00,00
diff --git a/mach/powerpc/libem/fif4.s b/mach/powerpc/libem/fif4.s
new file mode 100644
index 000000000..fc29b178c
--- /dev/null
+++ b/mach/powerpc/libem/fif4.s
@@ -0,0 +1,64 @@
+.sect .text
+
+! Multiplies two single-precision floats, then splits the product into
+! fraction and integer, both as floats, like modff(3) in C,
+! http://en.cppreference.com/w/c/numeric/math/modf
+!
+! Stack: ( a b -- fraction integer )
+
+.define .fif4
+.fif4:
+	lfs f1, 4(sp)
+	lfs f2, 0(sp)
+	fmuls f1, f1, f2		! f1 = a * b
+	stfs f1, 0(sp)
+	lwz r3, 0(sp)			! r3 = word of float bits
+
+	! IEEE single = sign * 1.fraction * 2**(exponent - 127)
+	!   sign  exponent  fraction
+	!   0     1..8      9..31
+	!
+	! Subtract 127 from the IEEE exponent.  If the result is from
+	! 0 to 23, then the IEEE fraction has that many integer bits.
+
+	extrwi r5, r3, 8, 1		! r5 = IEEE exponent
+	addic. r5, r5, -127		! r5 = nr of integer bits
+	blt 3f				! branch if no integer
+	cmpwi r5, 24
+	bge 4f				! branch if no fraction
+	! fall through if integer with fraction
+
+	! f1 has r5 = 0 to 23 integer bits in the IEEE fraction.
+	! There are 23 - r5 fraction bits.
+	li r6, 23
+	subf r6, r5, r6
+	srw r3, r3, r6
+	slw r3, r3, r6			! clear fraction in word
+	! fall through
+
+1:	stw r3, 0(sp)
+	lfs f2, 0(sp)			! integer = high word, low word
+	fsubs f1, f1, f2		! fraction = value - integer
+2:	stfs f1, 4(sp)			! push fraction
+	stfs f2, 0(sp)			! push integer
+	blr
+
+	! f1 is a fraction without integer (or zero).
+	! Then integer is zero with same sign.
+3:	extlwi r3, r3, 1, 0		! extract sign bit
+	stfs f1, 4(sp)			! push fraction
+	stw r3, 0(sp)			! push integer = zero with sign
+	blr
+
+	! f1 is an integer without fraction (or infinity or NaN).
+	! Unless NaN, then fraction is zero with same sign.
+4:	fcmpu cr0, f1, f1
+	bun cr0, 5f
+	extlwi r3, r3, 1, 0		! extract sign bit
+	stw r3, 4(sp)			! push fraction = zero with sign
+	stfs f1, 0(sp)			! push integer
+	blr
+
+	! f1 is NaN, so both fraction and integer are NaN.
+5:	fmr f2, f1
+	b 2b
diff --git a/mach/powerpc/libem/fif8.s b/mach/powerpc/libem/fif8.s
index bce4f8d24..f93a39ac2 100644
--- a/mach/powerpc/libem/fif8.s
+++ b/mach/powerpc/libem/fif8.s
@@ -1,7 +1,8 @@
 .sect .text
 
 ! Multiplies two double-precision floats, then splits the product into
-! fraction and integer, like modf(3) in C.  On entry:
+! fraction and integer, both as floats, like modf(3) in C,
+! http://en.cppreference.com/w/c/numeric/math/modf
 !
 ! Stack: ( a b -- fraction integer )
 
@@ -14,20 +15,18 @@
 	lwz r3, 0(sp)			! r3 = high word
 	lwz r4, 4(sp)			! r4 = low word
 
-	! IEEE double-precision format:
+	! IEEE double = sign * 1.fraction * 2**(exponent - 1023)
 	!   sign  exponent  fraction
 	!   0     1..11     12..63
 	!
 	! Subtract 1023 from the IEEE exponent.  If the result is from
 	! 0 to 51, then the IEEE fraction has that many integer bits.
-	! (IEEE has an implicit 1 before its fraction.  If the IEEE
-	! fraction has 0 integer bits, we still have an integer.)
 
 	extrwi r5, r3, 11, 1		! r5 = IEEE exponent
 	addic. r5, r5, -1023		! r5 = nr of integer bits
-	blt 4f				! branch if no integer
+	blt 3f				! branch if no integer
 	cmpwi r5, 52
-	bge 5f				! branch if no fraction
+	bge 4f				! branch if no fraction
 	cmpwi r5, 21
 	bge 6f				! branch if large integer
 	! fall through if small integer
@@ -44,22 +43,38 @@
 1:	stw r3, 0(sp)
 	stw r4, 4(sp)
 	lfd f2, 0(sp)			! integer = high word, low word
-2:	fsub f1, f1, f2			! fraction = value - integer
-3:	stfd f1, 8(sp)			! push fraction
+	fsub f1, f1, f2			! fraction = value - integer
+2:	stfd f1, 8(sp)			! push fraction
 	stfd f2, 0(sp)			! push integer
 	blr
 
-4:	! f1 is a fraction without integer.
-	fsub f2, f1, f1			! integer = zero
-	b 3b
+	! f1 is a fraction without integer (or zero).
+	! Then integer is zero with same sign.
+3:	extlwi r3, r3, 1, 0		! extract sign bit
+	li r4, 0
+	stfd f1, 8(sp)			! push fraction
+	stw r4, 4(sp)
+	stw r3, 0(sp)			! push integer = zero with sign
+	blr
 
-5:	! f1 is an integer without fraction (or infinity or NaN).
-	fmr f2, f1			! integer = f1
+	! f1 is an integer without fraction (or infinity or NaN).
+	! Unless NaN, then fraction is zero with same sign.
+4:	fcmpu cr0, f1, f1		! integer = f1
+	bun cr0, 5f
+	extlwi r3, r3, 1, 0		! extract sign bit
+	li r4, 0
+	stw r4, 12(sp)
+	stw r3, 8(sp)			! push fraction = zero with sign
+	stfd f1, 0(sp)			! push integer
+	blr
+
+	! f1 is NaN, so both fraction and integer are NaN.
+5:	fmr f2, f1
 	b 2b
 
-6:	! f1 has r5 = 21 to 51 to integer bits.
+	! f1 has r5 = 21 to 51 to integer bits.
 	! Low word has 52 - r5 fraction bits.
-	li r6, 52
+6:	li r6, 52
 	subf r6, r5, r6
 	srw r4, r4, r6
 	slw r4, r4, r6			! clear fraction in low word
diff --git a/mach/powerpc/libem/inn.s b/mach/powerpc/libem/inn.s
index 8925e776e..32275c117 100644
--- a/mach/powerpc/libem/inn.s
+++ b/mach/powerpc/libem/inn.s
@@ -5,6 +5,9 @@
 /* Tests a bit in a bitset on the stack.
  *
  * Stack: ( bitset bitnum setsize -- bool )
+ *
+ * Some back ends push false if bitnum is too large.  We don't because
+ * the compilers tend to pass a small enough bitnum.
  */
 
 .define .inn
diff --git a/mach/powerpc/libem/rck.s b/mach/powerpc/libem/rck.s
index 9008be610..1d07d5711 100644
--- a/mach/powerpc/libem/rck.s
+++ b/mach/powerpc/libem/rck.s
@@ -2,6 +2,9 @@
 
 ! Bounds check. Traps if the value is out of range.
 !  Stack: ( value descriptor -- value )
+!
+! This ".rck" only works with 4-byte integers.  The name is ".rck" and
+! not ".rck4" because many back ends only do rck with the word size.
 
 .define .rck
 .rck:
@@ -18,3 +21,7 @@
     bgt .trap_erange
 
     blr
+
+.trap_erange:
+    li r3, 1       ! ERANGE = 1 in h/em_abs.h
+    b .trp
diff --git a/mach/powerpc/libem/set.s b/mach/powerpc/libem/set.s
index 3c4a9e579..8faf84a09 100644
--- a/mach/powerpc/libem/set.s
+++ b/mach/powerpc/libem/set.s
@@ -2,6 +2,9 @@
 
 ! Create singleton set.
 !  Stack: ( bitnumber size -- set )
+!
+! Some back ends trap ESET if bitnumber is out of range.  We don't
+! because the compilers tend to pass a valid bitnumber.
 
 .define .set
 .set:
diff --git a/mach/powerpc/libem/trp.s b/mach/powerpc/libem/trp.s
new file mode 100644
index 000000000..b07afb929
--- /dev/null
+++ b/mach/powerpc/libem/trp.s
@@ -0,0 +1,56 @@
+.sect .text
+
+.define .trap_ecase
+.trap_ecase:
+	li	r3, 20			! ECASE = 20 in h/em_abs.h
+	! FALLTHROUGH to .trp
+
+! Raises an EM trap.
+! Expects r3 = trap number.
+
+.define .trp
+.trp:
+	cmplwi	r3, 15			! traps > 15 can't be ignored
+	bgt	1f
+
+	lis	r4, ha16[.ignmask]
+	lwz	r4, lo16[.ignmask](r4)	! load ignore mask
+	srw	r4, r4, r3
+	andi.	r4, r4, 1
+	bnelr				! return if ignoring trap
+
+1:	lis	r4, ha16[.trppc]
+	lwz	r5, lo16[.trppc](r4)	! r5 = user trap routine
+	mr.	r5, r5
+	beq	2f			! if no user trap routine, bail out
+
+	mtspr	ctr, r5
+	mfspr	r6, lr
+	li	r0, 0
+	stwu	r3, -8(sp)		! push trap number
+	stw	r0, lo16[.trppc](r4)	! reset trap routine
+	stw	r6, 4(sp)		! save old lr
+	bctrl				! call trap routine
+
+	lwz	r0, 4(sp)
+	mtspr	lr, r0
+	addi	sp, sp, 8		! retract over stack usage
+	blr
+
+2:	! No trap handler.  Write error message, exit.
+	li	r3, 2
+	stwu	r3, -12(sp)
+	lis	r4, ha16[message]
+	addi	r4, r4, lo16[message]
+	li	r5, 6
+	stw	r4, 4(sp)
+	stw	r5, 8(sp)
+	bl	_write			! write(2, message, 6)
+
+	li	r3, 1
+	stw	r3, 0(sp)
+	bl	__exit			! _exit(1)
+
+.sect .rom
+message:
+	.ascii "TRAP!\n"
diff --git a/mach/powerpc/mcg/table b/mach/powerpc/mcg/table
index b72990c36..ca44ce869 100644
--- a/mach/powerpc/mcg/table
+++ b/mach/powerpc/mcg/table
@@ -237,10 +237,13 @@ PATTERNS
     SETSP.I(in:(int)reg)
         emit "mr sp, %in"
         cost 4;
-    
+
     out:(int)reg = ANY.I
         cost 1;
 
+    out:(long)reg = ANY.L
+        cost 1;
+
     out:(int)reg = COPYF.I(in:(float)reg)
         emit "stfsu %in, -4(sp)"
         emit "lwz %out, 0(sp)"
@@ -306,10 +309,21 @@ PATTERNS
 		emit "lwz %out, %addr"
 		cost 4;
 
+#if 0
+    /* FIXME: Doesn't work because %out.0 and %addr might share a
+     * register, so it corrupts %addr before it loads %out.1. */
     out:(long)reg = LOAD.L(addr:address)
         emit "lwz %out.0, 4+%addr"
         emit "lwz %out.1, 0+%addr"
         cost 8;
+#else
+    /* Works, but costs an extra instruction. */
+    out:(long)reg = LOAD.L(addr:address)
+        emit "la %out.1, %addr"
+        emit "lwz %out.0, 4(%out.1)"
+        emit "lwz %out.1, 0(%out.1)"
+        cost 12;
+#endif
 
 	out:(int)ushort0 = LOADH.I(addr:address)
 		emit "lhz %out, %addr"
@@ -566,6 +580,13 @@ PATTERNS
         emit "! COMPARESI.I(cr, 0)"
         cost 4;
 
+    cr:(cr)cr = COMPAREUL.I(left:(long)reg, right:(long)reg)
+        emit "cmpl %cr, 0, %left.1, %right.1"
+        emit "bne 1f"
+        emit "cmpl %cr, 0, %left.0, %right.0"
+        emit "1:"
+        cost 12;
+
 
 
 /* Booleans */
diff --git a/mach/powerpc/ncg/mach.c b/mach/powerpc/ncg/mach.c
index b67903b0a..1a1d98d6c 100644
--- a/mach/powerpc/ncg/mach.c
+++ b/mach/powerpc/ncg/mach.c
@@ -10,8 +10,13 @@
 
 #include <limits.h>
 #include <stdint.h>
+#include <stb.h>
 
+static int writing_stabs = 0;
+
+#ifdef REGVARS
 static long framesize;
+#endif
 
 void
 con_part(int sz, word w)
@@ -51,32 +56,42 @@ con_mult(word sz)
 #define FL_MSB_AT_LOW_ADDRESS	1
 #include <con_float>
 
-static void
-emit_prolog(void)
-{
-	fprintf(codefile, "mfspr r0, lr\n");
-	fprintf(codefile, "addi sp, sp, %ld\n", -framesize - 8);
-	fprintf(codefile, "stw fp, %ld(sp)\n", framesize);
-	fprintf(codefile, "stw r0, %ld(sp)\n", framesize + 4);
-	fprintf(codefile, "addi fp, sp, %ld\n", framesize);
-}
-
 void
 prolog(full nlocals)
 {
-	framesize = nlocals;
+	/*
+	 * For N_LSYM and N_PSYM stabs, we want gdb to use fp, not sp.
+	 * The trick is to use "stwu sp, _(sp)" then "addi fp, sp, 0"
+	 * before we save lr with "stw r0, _(sp)".
+	 *
+	 * Tried with Apple's gdb-696.  Refer to
+	 *  - gdb-696/src/gdb/rs6000-tdep.c, skip_prologue(), line 1101
+	 *  - gdb-696/src/gdb/macosx/ppc-macosx-frameinfo.c,
+	 *    ppc_parse_instructions(), line 717
+	 * https://opensource.apple.com/release/developer-tools-25.html
+	 */
+	fprintf(codefile, "mfspr r0, lr\n");
+	if (writing_stabs) {
+		fprintf(codefile, "stwu sp, -8(sp)\n");  /* for gdb */
+		fprintf(codefile, "stw fp, 0(sp)\n");
+	} else
+		fprintf(codefile, "stwu fp, -8(sp)\n");
+	fprintf(codefile, "addi fp, sp, 0\n");           /* for gdb */
+	fprintf(codefile, "stw r0, 4(sp)\n");
 
 #ifdef REGVARS
-	/* f_regsave() will call emit_prolog() */
+	framesize = nlocals;
+	/* regsave() increases framesize; f_regsave() adjusts sp. */
 #else
-	emit_prolog();
+	if (nlocals)
+		fprintf(codefile, "addi sp, sp, %ld\n", -nlocals);
 #endif
 }
 
 void
 mes(word type)
 {
-	int argt ;
+	int argt, a1, a2 ;
 
 	switch ( (int)type ) {
 	case ms_ext :
@@ -91,6 +106,41 @@ mes(word type)
 				break ;
 			}
 		}
+	case ms_stb:
+		argt = getarg(str_ptyp | cst_ptyp);
+		if (argt == sp_cstx)
+			fputs(".symb \"\", ", codefile);
+		else {
+			fprintf(codefile, ".symb \"%s\", ", str);
+			argt = getarg(cst_ptyp);
+		}
+		a1 = argval;
+		argt = getarg(cst_ptyp);
+		a2 = argval;
+		argt = getarg(cst_ptyp|nof_ptyp|sof_ptyp|ilb_ptyp|pro_ptyp);
+		if (a1 == N_PSYM) {
+			/* Change offset from AB into offset from
+			   the frame pointer.
+			*/
+			argval += 8;
+		}
+		fprintf(codefile, "%s, 0x%x, %d\n", strarg(argt), a1, a2);
+		argt = getarg(end_ptyp);
+		break;
+	case ms_std:
+		writing_stabs = 1;  /* set by first "mes 13,...,100,0" */
+		argt = getarg(str_ptyp | cst_ptyp);
+		if (argt == sp_cstx)
+			str[0] = '\0';
+		else {
+			argt = getarg(cst_ptyp);
+		}
+		swtxt();
+		fprintf(codefile, ".symd \"%s\", 0x%x,", str, (int) argval);
+		argt = getarg(cst_ptyp);
+		fprintf(codefile, "%d\n", (int) argval);
+		argt = getarg(end_ptyp);
+		break;
 	default :
 		while ( getarg(any_ptyp) != sp_cend ) ;
 		break ;
@@ -196,7 +246,7 @@ saveloadregs(const char* ops, const char* opm, const char *opf)
 	for (reg = 31; reg >= 0; reg--) {
 		if (savedf[reg] != LONG_MIN) {
 			offset -= 8;
-			fprintf(codefile, "%s f%d, %ld(fp)\n",
+			fprintf(codefile, "%s f%d,%ld(fp)\n",
 				opf, reg, offset);
 		}
 	}
@@ -213,7 +263,7 @@ saveloadregs(const char* ops, const char* opm, const char *opf)
 		while (reg > 0 && savedi[reg - 1] != LONG_MIN)
 			reg--;
 		offset -= (32 - reg) * 4;
-		fprintf(codefile, "%s r%d, %ld(fp)\n", opm, reg, offset);
+		fprintf(codefile, "%s r%d,%ld(fp)\n", opm, reg, offset);
 	} else
 		reg = 32;
 
@@ -221,7 +271,7 @@ saveloadregs(const char* ops, const char* opm, const char *opf)
 	for (reg--; reg >= 0; reg--) {
 		if (savedi[reg] != LONG_MIN) {
 			offset -= 4;
-			fprintf(codefile, "%s r%d, %ld(fp)\n",
+			fprintf(codefile, "%s r%d,%ld(fp)\n",
 				ops, reg, offset);
 		}
 	}
@@ -232,7 +282,8 @@ f_regsave(void)
 {
 	int reg;
 
-	emit_prolog();
+	if (framesize)
+		fprintf(codefile, "addi sp, sp, %ld\n", -framesize);
 	saveloadregs("stw", "stmw", "stfd");
 
 	/*
diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table
index 62e8f62af..82cada71a 100644
--- a/mach/powerpc/ncg/table
+++ b/mach/powerpc/ncg/table
@@ -1,24 +1,52 @@
+/*
+ * PowerPC table for ncg
+ *
+ * David Given created this table.
+ * George Koehler made many changes in years 2016 to 2018.
+ *
+ * This back end provides 4-byte integers, 4-byte floats, and 8-byte
+ * floats.  It should provide enough of EM for the ACK's compilers.
+ *  - It doesn't provide "mon" (monitor call) nor "lor 2", "str 2"
+ *    (heap pointer).  Programs should call procedures in libsys to
+ *    make system calls or allocate heap memory.
+ *  - It generates only a few EM traps:
+ *     - EARRAY from aar, lar, sar
+ *     - ERANGE from rck
+ *     - ECASE from csa, csb
+ *  - It uses floating-point registers to move 8-byte values that
+ *    aren't floats.  This might cause extra FPU context switches in
+ *    programs that don't use floating point.
+ *
+ * The EM stack is less than optimal for PowerPC, and incompatible
+ * with the calling conventions of other compilers (like gcc).
+ *  - EM and ncg use the stack to pass parameters to procedures.  For
+ *    PowerPC, this is probably slower than passing them in registers.
+ *  - This back end misaligns some 8-byte floats, because EM's stack
+ *    has only 4-byte alignment.  (This kind of misalignment also
+ *    happened in IBM's AIX and Apple's Mac OS, where data structures
+ *    had 8-byte floats with only 4-byte alignment.)
+ */
+
 EM_WSIZE = 4
 EM_PSIZE = 4
 EM_BSIZE = 8    /* two words saved in call frame */
 
-INT8 = 1        /* Size of values */
-INT16 = 2
-INT32 = 4
-INT64 = 8
-
 FP_OFFSET = 0   /* Offset of saved FP relative to our FP */
 PC_OFFSET = 4   /* Offset of saved PC relative to our FP */
-SL_OFFSET = 8   /* Offset of static link */
 
 #define COMMENT(n) /* comment {LABEL, n} */
 
-
-#define nicesize(x) ((x)==INT8 || (x)==INT16 || (x)==INT32 || (x)==INT64)
+#define nicesize(x) ((x)==1 || (x)==2 || (x)==4 || (x)==8)
 
 #define smalls(n) sfit(n, 16)
 #define smallu(n) ufit(n, 16)
 
+/* Finds FRAME_V tokens that overlap myoff, mysize. */
+#define fover(myoff, mysize) (%off+%size>(myoff) && %off<((myoff)+(mysize)))
+
+/* Checks if we can use {LXFRAME, x}. */
+#define nicelx(x) ((x)>=1 && (x)<=0x8000)
+
 #define lo(n) ((n) & 0xFFFF)
 #define hi(n) (((n)>>16) & 0xFFFF)
 
@@ -32,6 +60,7 @@ SL_OFFSET = 8   /* Offset of static link */
 PROPERTIES
 
 	GPR             /* general-purpose register */
+	SPFP            /* sp or fp */
 	REG             /* allocatable GPR */
 	REG3            /* coercion to r3 */
 
@@ -46,17 +75,25 @@ PROPERTIES
 REGISTERS
 
 	/*
+	 * We use r1 as stack pointer and r2 as frame pointer.
+	 * Our assembler has aliases sp -> r1 and fp -> r2.
+	 *
+	 * We preserve r13 to r31 and f14 to f31 across function
+	 * calls to mimic other compilers (like gcc).  See
+	 *  - http://refspecs.linuxbase.org/elf/elfspec_ppc.pdf
+	 *  - https://github.com/ryanarn/powerabi -> chap3-elf32abi.sgml
+	 *  - Apple's "32-bit PowerPC Function Calling Conventions"
+	 *
 	 * When ncg allocates regvars, it seems to start with the last
 	 * register in the first class.  To encourage ncg to allocate
 	 * them from r31 down, we list them in one class as
 	 *   r13, r14, ..., r31: GPR, REG regvar(reg_any).
 	 */
 
-	r0, sp, fp  : GPR.
-	r3          : GPR, REG, REG3.
-
-	r4, r5, r6, r7, r8, r9, r10, r11, r12
-	  : GPR, REG.
+	r0, r12                           : GPR.
+	sp, fp                            : GPR, SPFP.
+	r3                                : GPR, REG, REG3.
+	r4, r5, r6, r7, r8, r9, r10, r11  : GPR, REG.
 
 	r13, r14, r15, r16, r17, r18, r19, r20, r21, r22, r23, r24,
 	r25, r26, r27, r28, r29, r30, r31
@@ -65,7 +102,7 @@ REGISTERS
 	f0          : FPR.
 
 	f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13
-          : FPR, FREG.
+	  : FPR, FREG.
 
 	f14, f15, f16, f17, f18, f19, f20, f21, f22, f23, f24, f25,
 	f26, f27, f28, f29, f30, f31
@@ -86,8 +123,11 @@ REGISTERS
 	  : FSREG regvar(reg_float).
 
 	lr, ctr     : SPR.
-	cr0         : CR.
+	cr0         : CR.   /* We use cr0, ignore cr1 to cr7. */
 
+	/* The stacking rules can't allocate registers.  We use these
+	 * scratch registers to stack tokens.
+	 */
 #define RSCRATCH r0
 #define FSCRATCH f0
 
@@ -96,7 +136,7 @@ TOKENS
 
 /* Primitives */
 
-	CONST              = { INT val; }             4    val.
+	C /* constant */   = { INT val; }             4    val.
 	LABEL              = { ADDR adr; }            4    adr.
 	LABEL_HI           = { ADDR adr; }            4    "hi16[" adr "]".
 	LABEL_HA           = { ADDR adr; }            4    "ha16[" adr "]".
@@ -106,29 +146,39 @@ TOKENS
 
 /* Allows us to use regvar() to refer to registers */
 
-	GPR_EXPR           = { GPR reg; }             4    reg.
-	FPR_EXPR           = { FPR reg; }             8    reg.
+	REG_EXPR           = { REG reg; }             4    reg.
+	FREG_EXPR          = { FREG reg; }            8    reg.
 	FSREG_EXPR         = { FSREG reg; }           4    reg.
 
 /* Constants on the stack */
 
-	CONST_N8000        = { INT val; }             4.
-	CONST_N7FFF_N0001  = { INT val; }             4.
-	CONST_0000_7FFF    = { INT val; }             4.
-	CONST_8000         = { INT val; }             4.
-	CONST_8001_FFFF    = { INT val; }             4.
-	CONST_HZ           = { INT val; }             4.
-	CONST_HL           = { INT val; }             4.
+	CONST_N8000        = { INT val; }             4    val.
+	CONST_N7FFF_N0001  = { INT val; }             4    val.
+	CONST_0000_7FFF    = { INT val; }             4    val.
+	CONST_8000         = { INT val; }             4    val.
+	CONST_8001_FFFF    = { INT val; }             4    val.
+	CONST_HI_ZR        = { INT val; }             4    val.
+	CONST_HI_LO        = { INT val; }             4    val.
 
 /* Expression partial results */
 
+	SEX_B       = { GPR reg; }             4.   /* sign extension */
+	SEX_H       = { GPR reg; }             4.
+
 	SUM_RIS     = { GPR reg; INT offhi; }  4.   /* reg + (offhi << 16) */
 	SUM_RC      = { GPR reg; INT off; }    4.   /* reg + off */
 	SUM_RL      = { GPR reg; ADDR adr; }   4.   /* reg + lo16[adr] */
 	SUM_RR      = { GPR reg1; GPR reg2; }  4.   /* reg1 + reg2 */
 
-	SEX_B              = { GPR reg; }             4.
-	SEX_H              = { GPR reg; }             4.
+	SUB_CR      = { INT val; GPR reg; }    4.   /* val - reg */
+	SUB_RR      = { GPR reg1; GPR reg2; }  4.   /* reg1 - reg2 */
+	NEG_R       = { GPR reg; }             4.   /* -reg */
+	MUL_RC      = { GPR reg; INT val; }    4.   /* reg * val */
+	MUL_RR      = { GPR reg1; GPR reg2; }  4.   /* reg1 * reg2 */
+	DIV_RR      = { GPR reg1; GPR reg2; }  4.   /* reg1 / reg2 signed */
+	DIV_RR_U    = { GPR reg1; GPR reg2; }  4.   /* reg1 / reg2 unsigned */
+
+/* Indirect loads and stores */
 
 	IND_RC_B    = { GPR reg; INT off; }    4    off "(" reg ")".
 	IND_RL_B    = { GPR reg; ADDR adr; }   4    "lo16[" adr "](" reg ")".
@@ -146,15 +196,40 @@ TOKENS
 	IND_RL_D    = { GPR reg; ADDR adr; }   8    "lo16[" adr "](" reg ")".
 	IND_RR_D    = { GPR reg1; GPR reg2; }  8.
 
-	NOT_R              = { GPR reg; }             4.
+/* Local variables in frame */
 
-	AND_RR             = { GPR reg1; GPR reg2; }  4.
-	OR_RR              = { GPR reg1; GPR reg2; }  4.
-	OR_RIS             = { GPR reg; INT valhi; }  4.
-	OR_RC              = { GPR reg; INT val; }    4.
-	XOR_RR             = { GPR reg1; GPR reg2; }  4.
-	XOR_RIS            = { GPR reg; INT valhi; }  4.
-	XOR_RC             = { GPR reg; INT val; }    4.
+	FRAME_B     = { INT level; GPR reg; INT off; INT size; }
+	                                       4    off "(" reg ")".
+	FRAME_H     = { INT level; GPR reg; INT off; INT size; }
+	                                       4    off "(" reg ")".
+	FRAME_H_S   = { INT level; GPR reg; INT off; INT size; }
+	                                       4    off "(" reg ")".
+	FRAME_W     = { INT level; GPR reg; INT off; INT size; }
+	                                       4    off "(" reg ")".
+	FRAME_D     = { INT level; GPR reg; INT off; INT size; }
+	                                       8    off "(" reg ")".
+
+	LXFRAME     = { INT level; }           4.
+
+/* Bitwise logic */
+
+	NOT_R       = { GPR reg; }             4.   /* ~reg */
+	AND_RIS     = { GPR reg; INT valhi; }  4.
+	AND_RC      = { GPR reg; INT val; }    4.
+	AND_RR      = { GPR reg1; GPR reg2; }  4.
+	ANDC_RR     = { GPR reg1; GPR reg2; }  4.   /* reg1 & ~reg2 */
+	OR_RIS      = { GPR reg; INT valhi; }  4.
+	OR_RC       = { GPR reg; INT val; }    4.
+	OR_RR       = { GPR reg1; GPR reg2; }  4.
+	ORC_RR      = { GPR reg1; GPR reg2; }  4.   /* reg1 | ~reg2 */
+	XOR_RIS     = { GPR reg; INT valhi; }  4.
+	XOR_RC      = { GPR reg; INT val; }    4.
+	XOR_RR      = { GPR reg1; GPR reg2; }  4.
+	NAND_RR     = { GPR reg1; GPR reg2; }  4.   /* ~(reg1 & reg2) */
+	NOR_RR      = { GPR reg1; GPR reg2; }  4.   /* ~(reg1 | reg2) */
+	EQV_RR      = { GPR reg1; GPR reg2; }  4.   /* ~(reg1 ^ reg2) */
+
+/* Comparisons */
 
 	COND_RC            = { GPR reg; INT val; }    4.
 	COND_RR            = { GPR reg1; GPR reg2; }  4.
@@ -181,28 +256,43 @@ SETS
 	UCONST2         = CONST_0000_7FFF + CONST_8000 + CONST_8001_FFFF.
 	/* any constant on stack */
 	CONST_STACK     = CONST_N8000 + CONST_N7FFF_N0001 + CONST_0000_7FFF +
-	                  CONST_8000 + CONST_8001_FFFF + CONST_HZ + CONST_HL.
+	                  CONST_8000 + CONST_8001_FFFF +
+			  CONST_HI_ZR + CONST_HI_LO.
 
-	SUM_ALL            = SUM_RC + SUM_RL + SUM_RR.
+	CONST           = C + CONST_STACK.
 
-	SEX_ALL            = SEX_B + SEX_H.
+	SET_RC_B        = IND_RC_B + IND_RL_B + FRAME_B.
+	SET_RC_H        = IND_RC_H + IND_RL_H + FRAME_H.
+	SET_RC_H_S      = IND_RC_H_S + IND_RL_H_S + FRAME_H_S.
+	SET_RC_W        = IND_RC_W + IND_RL_W + FRAME_W.
+	SET_RC_D        = IND_RC_D + IND_RL_D + FRAME_D.
 
-	LOGICAL_ALL        = NOT_R + AND_RR + OR_RR + OR_RC + XOR_RR +
-	                     XOR_RC.
+	IND_ALL_B       = IND_RC_B + IND_RL_B + IND_RR_B.
+	IND_ALL_H       = IND_RC_H + IND_RL_H + IND_RR_H +
+	                  IND_RC_H_S + IND_RL_H_S + IND_RR_H_S.
+	IND_ALL_W       = IND_RC_W + IND_RL_W + IND_RR_W.
+	IND_ALL_D       = IND_RC_D + IND_RL_D + IND_RR_D.
+	IND_V           = IND_ALL_B + IND_ALL_H + IND_ALL_W + IND_ALL_D.
 
-	IND_ALL_B          = IND_RC_B + IND_RL_B + IND_RR_B.
-	IND_ALL_H          = IND_RC_H + IND_RL_H + IND_RR_H +
-	                     IND_RC_H_S + IND_RL_H_S + IND_RR_H_S.
-	IND_ALL_W          = IND_RC_W + IND_RL_W + IND_RR_W.
-	IND_ALL_D          = IND_RC_D + IND_RL_D + IND_RR_D.
-	IND_ALL_BHW        = IND_ALL_B + IND_ALL_H + IND_ALL_W.
+	FRAME_V         = FRAME_B + FRAME_H + FRAME_H_S + FRAME_W + FRAME_D.
 
 	/* anything killed by sti (store indirect) */
-	MEMORY             = IND_ALL_BHW + IND_ALL_D.
+	MEMORY          = IND_V + FRAME_V.
 
-	/* any stack token that we can easily move to GPR */
-	ANY_BHW            = REG + CONST_STACK + SEX_ALL +
-	                     SUM_ALL + IND_ALL_BHW + LOGICAL_ALL.
+	/* any integer from stack that we can easily move to GPR */
+	INT_W   = SPFP + REG + CONST_STACK + SEX_B + SEX_H +
+	          SUM_RIS + SUM_RC + SUM_RL + SUM_RR +
+	          SUB_CR + SUB_RR + NEG_R +
+	          MUL_RC + MUL_RR + DIV_RR + DIV_RR_U +
+	          IND_ALL_B + IND_ALL_H + IND_ALL_W +
+	          FRAME_B + FRAME_H + FRAME_H_S + FRAME_W +
+	          NOT_R + AND_RIS + AND_RC + AND_RR + ANDC_RR +
+	          OR_RIS + OR_RC + OR_RR + ORC_RR +
+	          XOR_RIS + XOR_RC + XOR_RR + NAND_RR + NOR_RR + EQV_RR +
+	          XEQ + XNE + XGT + XGE + XLT + XLE.
+
+	FLOAT_D = FREG + IND_ALL_D + FRAME_D.
+	FLOAT_W = FSREG + IND_ALL_W + FRAME_W.
 
 
 INSTRUCTIONS
@@ -220,7 +310,7 @@ INSTRUCTIONS
   cost(4, 1) /* space, time */
 
   add             GPR:wo, GPR:ro, GPR:ro.
-  addX "add."     GPR:wo, GPR:ro, GPR:ro.
+  addX "add."     GPR:wo:cc, GPR:ro, GPR:ro.
   addi            GPR:wo, GPR:ro, CONST+LABEL_LO:ro.
     li            GPR:wo, CONST:ro.
   addis           GPR:wo, GPR:ro, CONST+LABEL_HI+LABEL_HA:ro.
@@ -246,13 +336,13 @@ INSTRUCTIONS
   bclr            CONST:ro, CONST:ro, CONST:ro.
     blr.
   bl              LABEL:ro.
-  cmp             CR:ro, CONST:ro, GPR:ro, GPR:ro kills :cc.
+  cmp             CR:wo, CONST:ro, GPR:ro, GPR:ro kills :cc.
     cmpw          GPR:ro, GPR:ro kills :cc.
-  cmpi            CR:ro, CONST:ro, GPR:ro, CONST:ro kills :cc.
+  cmpi            CR:wo, CONST:ro, GPR:ro, CONST:ro kills :cc.
     cmpwi         GPR:ro, CONST:ro kills :cc.
-  cmpl            CR:ro, CONST:ro, GPR:ro, GPR:ro kills :cc.
+  cmpl            CR:wo, CONST:ro, GPR:ro, GPR:ro kills :cc.
     cmplw         GPR:ro, GPR:ro kills :cc.
-  cmpli           CR:ro, CONST:ro, GPR:ro, CONST:ro kills :cc.
+  cmpli           CR:wo, CONST:ro, GPR:ro, CONST:ro kills :cc.
     cmplwi        GPR:ro, CONST:ro kills :cc.
   divw            GPR:wo, GPR:ro, GPR:ro cost(4, 23).
   divwu           GPR:wo, GPR:ro, GPR:ro cost(4, 23).
@@ -263,7 +353,7 @@ INSTRUCTIONS
   fadds           FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 5).
   fcmpo           CR:wo, FREG:ro, FREG:ro cost(4, 5).
   fcmpo           CR:wo, FSREG:ro, FSREG:ro cost(4, 5).
-  fctiwz          FREG:wo, FREG:ro.
+  fctiwz          FREG:wo, FREG:ro cost(4, 5).
   fdiv            FREG+DLOCAL:wo, FREG:ro, FREG:ro cost(4, 35).
   fdivs           FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 21).
   fmr             FPR:wo, FPR:ro cost(4, 5).
@@ -272,60 +362,67 @@ INSTRUCTIONS
   fmuls           FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 5).
   fneg            FREG+DLOCAL:wo, FREG:ro cost(4, 5).
   fneg            FSREG+LOCAL:wo, FSREG:ro cost(4, 5).
-  frsp            FSREG:wo, FREG:ro cost(4, 5).
+  frsp            FSREG+LOCAL:wo, FREG:ro cost(4, 5).
   fsub            FREG+DLOCAL:wo, FREG:ro, FREG:ro cost(4, 5).
   fsubs           FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 5).
-  lbz             GPR:wo, IND_RC_B+IND_RL_B:ro cost(4, 3).
+  lbz             GPR:wo, SET_RC_B:ro cost(4, 3).
   lbzx            GPR:wo, GPR:ro, GPR:ro cost(4, 3).
-  lfd             FPR+DLOCAL:wo, IND_RC_D+IND_RL_D:ro cost(4, 5).
-  lfdu            FPR:wo, IND_RC_D:ro cost(4, 5).
+  lfd             FPR+DLOCAL:wo, SET_RC_D:ro cost(4, 5).
+  lfdu            FPR:wo, IND_RC_D:rw cost(4, 5).
   lfdx            FPR:wo, GPR:ro, GPR:ro cost(4, 5).
-  lfs             FSREG+LOCAL:wo, IND_RC_W+IND_RL_W:ro cost(4, 4).
+  lfs             FSREG+LOCAL:wo, SET_RC_W:ro cost(4, 4).
   lfsu            FSREG:wo, IND_RC_W:rw cost(4, 4).
   lfsx            FSREG:wo, GPR:ro, GPR:ro cost(4, 4).
-  lha             GPR:wo, IND_RC_H_S+IND_RL_H_S:ro cost(4, 3).
+  lha             GPR:wo, SET_RC_H_S:ro cost(4, 3).
   lhax            GPR:wo, GPR:ro, GPR:ro cost(4, 3).
-  lhz             GPR:wo, IND_RC_H+IND_RL_H:ro cost(4, 3).
+  lhz             GPR:wo, SET_RC_H:ro cost(4, 3).
   lhzx            GPR:wo, GPR:ro, GPR:ro cost(4, 3).
+  lwz             GPR+LOCAL:wo, SET_RC_W:ro cost(4, 3).
   lwzu            GPR:wo, IND_RC_W:rw cost(4, 3).
   lwzx            GPR:wo, GPR:ro, GPR:ro cost(4, 3).
-  lwz             GPR+LOCAL:wo, IND_RC_W+IND_RL_W:ro cost(4, 3).
+  mfcr            GPR:wo cost(4,2).
+  mfspr           GPR:wo, SPR:ro cost(4, 3).
+  mtspr           SPR:wo, GPR:ro cost(4, 2).
+  mulli           GPR:wo, GPR:ro, CONST:ro cost(4, 3).
+  mullw           GPR:wo, GPR:ro, GPR:ro cost(4, 4).
   nand            GPR:wo, GPR:ro, GPR:ro.
   neg             GPR:wo, GPR:ro.
   nor             GPR:wo, GPR:ro, GPR:ro.
-  mfcr            GPR:wo cost(4,2).
-  mullw           GPR:wo, GPR:ro, GPR:ro cost(4, 4).
-  mfspr           GPR:wo, SPR:ro cost(4, 3).
-  mtspr           SPR:wo, GPR:ro cost(4, 2).
   or              GPR:wo, GPR:ro, GPR:ro.
     mr            GPR:wo, GPR:ro.
   orX "or."       GPR:wo:cc, GPR:ro, GPR:ro.
-    orX_readonly "or." GPR:ro:cc, GPR:ro, GPR:ro.
+    mrX_readonly "mr." GPR:ro:cc, GPR:ro.
   orc             GPR:wo, GPR:ro, GPR:ro.
   ori             GPR:wo, GPR:ro, CONST+LABEL_LO:ro.
   oris            GPR:wo, GPR:ro, CONST:ro.
   rlwinm          GPR:wo, GPR:ro, CONST:ro, CONST:ro, CONST:ro.
     extlwi        GPR:wo, GPR:ro, CONST:ro, CONST:ro.
     extrwi        GPR:wo, GPR:ro, CONST:ro, CONST:ro.
-    srwi          GPR:wo, GPR:ro, CONST:ro.
-  slw             GPR:wo, GPR:ro, GPR:ro.
-  subf            GPR:wo, GPR:ro, GPR:ro.
-  sraw            GPR:wo, GPR:ro, GPR:ro cost(4, 2).
-  srawi           GPR:wo, GPR:ro, CONST:ro cost(4, 2).
-  srw             GPR:wo, GPR:ro, GPR:ro.
-  stb             GPR:ro, IND_RC_B+IND_RL_B:rw cost(4, 3).
+    rotlwi        GPR+LOCAL:wo, GPR:ro, CONST:ro.
+    rotrwi        GPR+LOCAL:wo, GPR:ro, CONST:ro.
+    slwi          GPR+LOCAL:wo, GPR:ro, CONST:ro.
+    srwi          GPR+LOCAL:wo, GPR:ro, CONST:ro.
+  rlwnm           GPR:wo, GPR:ro, GPR:ro, CONST:ro, CONST:ro.
+    rotlw         GPR+LOCAL:wo, GPR:ro, GPR:ro.
+  slw             GPR+LOCAL:wo, GPR:ro, GPR:ro.
+  sraw            GPR+LOCAL:wo, GPR:ro, GPR:ro /* kills xer */ cost(4, 2).
+  srawi           GPR+LOCAL:wo, GPR:ro, CONST:ro /* kills xer */ cost(4, 2).
+  srw             GPR+LOCAL:wo, GPR:ro, GPR:ro.
+  stb             GPR:ro, SET_RC_B:rw cost(4, 3).
   stbx            GPR:ro, GPR:ro, GPR:ro cost(4, 3).
-  stfd            FPR:ro, IND_RC_D+IND_RL_D:rw cost(4, 4).
+  stfd            FPR:ro, SET_RC_D:rw cost(4, 4).
   stfdu           FPR:ro, IND_RC_D:rw cost(4, 4).
   stfdx           FPR:ro, GPR:ro, GPR:ro cost(4, 4).
-  stfs            FSREG:ro, IND_RC_W+IND_RL_W:rw cost(4, 3).
+  stfs            FSREG:ro, SET_RC_W:rw cost(4, 3).
   stfsu           FSREG:ro, IND_RC_W:rw cost(4, 3).
   stfsx           FSREG:ro, GPR:ro, GPR:ro cost(4, 3).
-  sth             GPR:ro, IND_RC_H+IND_RL_H:rw cost(4, 3).
+  sth             GPR:ro, SET_RC_H:rw cost(4, 3).
   sthx            GPR:ro, GPR:ro, GPR:ro cost(4, 3).
-  stw             GPR:ro, IND_RC_W+IND_RL_W:rw cost(4, 3).
+  stw             GPR:ro, SET_RC_W:rw cost(4, 3).
   stwx            GPR:ro, GPR:ro, GPR:ro cost(4, 3).
   stwu            GPR:ro, IND_RC_W:rw cost(4, 3).
+  subf            GPR:wo, GPR:ro, GPR:ro.
+  subfic          GPR:wo, GPR:ro, CONST:ro /* kills xer */.
   xor             GPR:wo, GPR:ro, GPR:ro.
   xori            GPR:wo, GPR:ro, CONST:ro.
   xoris           GPR:wo, GPR:ro, CONST:ro.
@@ -347,22 +444,22 @@ MOVES
 
 /* Constants */
 
-	from CONST + CONST_STACK smalls(%val) to GPR
+	from CONST smalls(%val) to GPR
 		gen
 			COMMENT("move CONST->GPR smalls")
-			li %2, {CONST, %1.val}
+			li %2, %1
 
-	from CONST + CONST_STACK lo(%val)==0 to GPR
+	from CONST lo(%val)==0 to GPR
 		gen
 			COMMENT("move CONST->GPR shifted")
-			lis %2, {CONST, hi(%1.val)}
+			lis %2, {C, hi(%1.val)}
 
-	from CONST + CONST_STACK to GPR
+	from CONST to GPR
 		gen
 			COMMENT("move CONST->GPR")
-			lis %2, {CONST, hi(%1.val)}
-			ori %2, %2, {CONST, lo(%1.val)}
-			/* Can't use addi %2, %2, {CONST, los(%1.val)}
+			lis %2, {C, hi(%1.val)}
+			ori %2, %2, {C, lo(%1.val)}
+			/* Can't use addi %2, %2, {C, los(%1.val)}
 			 * because %2 might be R0. */
 
 	from LABEL to GPR
@@ -385,10 +482,10 @@ MOVES
 /* Register + something */
 
 	from SUM_RIS to GPR
-		gen addis %2, %1.reg, {CONST, %1.offhi}
+		gen addis %2, %1.reg, {C, %1.offhi}
 
 	from SUM_RC to GPR
-		gen addi %2, %1.reg, {CONST, %1.off}
+		gen addi %2, %1.reg, {C, %1.off}
 
 	from SUM_RL to GPR
 		gen addi %2, %1.reg, {LABEL_LO, %1.adr}
@@ -396,9 +493,34 @@ MOVES
 	from SUM_RR to GPR
 		gen add %2, %1.reg1, %1.reg2
 
+/* Other arithmetic */
+
+	from SUB_CR to GPR
+		/* val - reg -> subtract reg from val */
+		gen subfic %2, %1.reg, {C, %1.val}
+
+	from SUB_RR to GPR
+		/* reg1 - reg2 -> subtract reg2 from reg1 */
+		gen subf %2, %1.reg2, %1.reg1
+
+	from NEG_R to GPR
+		gen neg %2, %1.reg
+
+	from MUL_RC to GPR
+		gen mulli %2, %1.reg, {C, %1.val}
+
+	from MUL_RR to GPR
+		gen mullw %2, %1.reg1, %1.reg2
+
+	from DIV_RR to GPR
+		gen divw %2, %1.reg1, %1.reg2
+
+	from DIV_RR_U to GPR
+		gen divwu %2, %1.reg1, %1.reg2
+
 /* Read byte */
 
-	from IND_RC_B+IND_RL_B to GPR
+	from SET_RC_B to GPR
 		gen lbz %2, %1
 
 	from IND_RR_B to GPR
@@ -406,7 +528,7 @@ MOVES
 
 /* Write byte */
 
-	from GPR to IND_RC_B+IND_RL_B
+	from GPR to SET_RC_B
 		gen stb %1, %2
 
 	from GPR to IND_RR_B
@@ -414,13 +536,13 @@ MOVES
 
 /* Read halfword (short) */
 
-	from IND_RC_H+IND_RL_H to GPR
+	from SET_RC_H to GPR
 		gen lhz %2, %1
 
 	from IND_RR_H to GPR
 		gen lhzx %2, %1.reg1, %1.reg2
 
-	from IND_RC_H_S+IND_RL_H_S to GPR
+	from SET_RC_H_S to GPR
 		gen lha %2, %1
 
 	from IND_RR_H_S to GPR
@@ -428,7 +550,7 @@ MOVES
 
 /* Write halfword */
 
-	from GPR to IND_RC_H+IND_RL_H
+	from GPR to SET_RC_H
 		gen sth %1, %2
 
 	from GPR to IND_RR_H
@@ -436,13 +558,13 @@ MOVES
 
 /* Read word */
 
-	from IND_RC_W+IND_RL_W to GPR
+	from SET_RC_W to GPR
 		gen lwz %2, %1
 
 	from IND_RR_W to GPR
 		gen lwzx %2, %1.reg1, %1.reg2
 
-	from IND_RC_W+IND_RL_W to FSREG
+	from SET_RC_W to FSREG
 		gen lfs %2, %1
 
 	from IND_RR_W to FSREG
@@ -450,13 +572,13 @@ MOVES
 
 /* Write word */
 
-	from GPR to IND_RC_W+IND_RL_W
+	from GPR to SET_RC_W
 		gen stw %1, %2
 
 	from GPR to IND_RR_W
 		gen stwx %1, %2.reg1, %2.reg2
 
-	from FSREG to IND_RC_W+IND_RL_W
+	from FSREG to SET_RC_W
 		gen stfs %1, %2
 
 	from FSREG to IND_RR_W
@@ -464,7 +586,7 @@ MOVES
 
 /* Read double */
 
-	from IND_RC_D+IND_RL_D to FPR
+	from SET_RC_D to FPR
 		gen lfd %2, %1
 
 	from IND_RR_D to FPR
@@ -472,37 +594,87 @@ MOVES
 
 /* Write double */
 
-	from FPR to IND_RC_D+IND_RL_D
+	from FPR to SET_RC_D
 		gen stfd %1, %2
 
 	from FPR to IND_RR_D
 		gen stfdx %1, %2.reg1, %2.reg2
 
+/* LXFRAME is a lexical frame from the static chain.  We define a move
+   so "uses REG={LXFRAME, $1}" may find a register with the same
+   frame, and not repeat the move.  This move can't search for a REG
+   with {LXFRAME, $1-1}, but must always start from fp.  The static
+   chain, if it exists, is the argument at fp + EM_BSIZE. */
+
+	from LXFRAME %level==1 to REG
+		gen	lwz %2, {IND_RC_W, fp, EM_BSIZE}
+	from LXFRAME %level==2 to REG
+		gen	lwz %2, {IND_RC_W, fp, EM_BSIZE}
+			/* PowerPC can't add r0 + EM_BSIZE,
+			 * so %2 must not be r0. */
+			lwz %2, {IND_RC_W, %2, EM_BSIZE}
+	from LXFRAME %level==3 to REG
+		gen	lwz %2, {IND_RC_W, fp, EM_BSIZE}
+			lwz %2, {IND_RC_W, %2, EM_BSIZE}
+			lwz %2, {IND_RC_W, %2, EM_BSIZE}
+	from LXFRAME %level==4 to REG
+		gen	lwz %2, {IND_RC_W, fp, EM_BSIZE}
+			lwz %2, {IND_RC_W, %2, EM_BSIZE}
+			lwz %2, {IND_RC_W, %2, EM_BSIZE}
+			lwz %2, {IND_RC_W, %2, EM_BSIZE}
+	from LXFRAME to REG  /* assuming %level in 2 to 0x8000 */
+		gen	li %2, {C, %1.level-1}
+			mtspr ctr, %2
+			lwz %2, {IND_RC_W, fp, EM_BSIZE}
+		1:	lwz %2, {IND_RC_W, %2, EM_BSIZE}
+			bdnz {LABEL, "1b"}
+
 /* Logicals */
 
 	from NOT_R to GPR
 		gen nor %2, %1.reg, %1.reg
 
+	from AND_RIS to GPR
+		gen andisX %2, %1.reg, {C, %1.valhi}
+
+	from AND_RC to GPR
+		gen andiX %2, %1.reg, {C, %1.val}
+
 	from AND_RR to GPR
 		gen and %2, %1.reg1, %1.reg2
 
+	from ANDC_RR to GPR
+		gen andc %2, %1.reg1, %1.reg2
+
+	from OR_RIS to GPR
+		gen oris %2, %1.reg, {C, %1.valhi}
+
+	from OR_RC to GPR
+		gen ori %2, %1.reg, {C, %1.val}
+
 	from OR_RR to GPR
 		gen or %2, %1.reg1, %1.reg2
 
-	from OR_RIS to GPR
-		gen oris %2, %1.reg, {CONST, %1.valhi}
+	from ORC_RR to GPR
+		gen orc %2, %1.reg1, %1.reg2
 
-	from OR_RC to GPR
-		gen ori %2, %1.reg, {CONST, %1.val}
+	from XOR_RIS to GPR
+		gen xoris %2, %1.reg, {C, %1.valhi}
+
+	from XOR_RC to GPR
+		gen xori %2, %1.reg, {C, %1.val}
 
 	from XOR_RR to GPR
 		gen xor %2, %1.reg1, %1.reg2
 
-	from XOR_RIS to GPR
-		gen xoris %2, %1.reg, {CONST, %1.valhi}
+	from NAND_RR to GPR
+		gen nand %2, %1.reg1, %1.reg2
 
-	from XOR_RC to GPR
-		gen xori %2, %1.reg, {CONST, %1.val}
+	from NOR_RR to GPR
+		gen nor %2, %1.reg1, %1.reg2
+
+	from EQV_RR to GPR
+		gen eqv %2, %1.reg1, %1.reg2
 
 /* Conditions */
 
@@ -510,7 +682,7 @@ MOVES
 
 	from COND_RC to GPR
 		gen
-			cmpwi %1.reg, {CONST, %1.val}
+			cmpwi %1.reg, {C, %1.val}
 			mfcr %2
 
 	from COND_RR to GPR
@@ -520,7 +692,7 @@ MOVES
 
 	from CONDL_RC to GPR
 		gen
-			cmplwi %1.reg, {CONST, %1.val}
+			cmplwi %1.reg, {C, %1.val}
 			mfcr %2
 
 	from CONDL_RR to GPR
@@ -544,75 +716,73 @@ MOVES
 
 	from XEQ to GPR
 		gen
-			extrwi %2, %1.reg, {CONST, 1}, {CONST, 2}
+			extrwi %2, %1.reg, {C, 1}, {C, 2}
 
 	from XNE to GPR
 		gen
-			extrwi %2, %1.reg, {CONST, 1}, {CONST, 2}
-			xori %2, %2, {CONST, 1}
+			extrwi %2, %1.reg, {C, 1}, {C, 2}
+			xori %2, %2, {C, 1}
 
 	from XGT to GPR
 		gen
-			extrwi %2, %1.reg, {CONST, 1}, {CONST, 1}
+			extrwi %2, %1.reg, {C, 1}, {C, 1}
 
 	from XGE to GPR
 		gen
-			extrwi %2, %1.reg, {CONST, 1}, {CONST, 0}
-			xori %2, %2, {CONST, 1}
+			extrwi %2, %1.reg, {C, 1}, {C, 0}
+			xori %2, %2, {C, 1}
 
 	from XLT to GPR
 		gen
-			extrwi %2, %1.reg, {CONST, 1}, {CONST, 0}
+			extrwi %2, %1.reg, {C, 1}, {C, 0}
 
 	from XLE to GPR
 		gen
-			extrwi %2, %1.reg, {CONST, 1}, {CONST, 1}
-			xori %2, %2, {CONST, 1}
+			extrwi %2, %1.reg, {C, 1}, {C, 1}
+			xori %2, %2, {C, 1}
 
-/* GPR_EXPR exists solely to allow us to use regvar() (which can only
+/* REG_EXPR exists solely to allow us to use regvar() (which can only
    be used in an expression) as a register constant.  We can then use
-   our moves to GPR to set register variables.  We define no moves to
-   LOCAL, so we avoid confusion between GPR and FSREG in LOCAL. */
+   our moves to GPR or REG to set register variables.  This is easier
+   than defining moves to LOCAL, and avoids confusion between GPR and
+   FSREG in LOCAL. */
 
-	from ANY_BHW to GPR_EXPR
+	from INT_W + LXFRAME to REG_EXPR
 		gen move %1, %2.reg
 
-	from FPR+IND_ALL_D to FPR_EXPR
+	from FLOAT_D to FREG_EXPR
 		gen move %1, %2.reg
 
-	from FSREG+IND_ALL_W to FSREG_EXPR
+	from FLOAT_W to FSREG_EXPR
 		gen move %1, %2.reg
 
 
 TESTS
 
-	/* Given orX %1, %1, %1, ncgg says, "Instruction destroys %1,
-	 * not allowed here".  We use orX_readonly to trick ncgg.
-	 *
-	 * Using "or." and not "mr." because mach/powerpc/top/table
-	 * was optimizing "or." and not "mr.".
+	/* Given "mrX %1, %1", ncgg would say, "Instruction destroys
+	 * %1, not allowed here".  We use mrX_readonly to trick ncgg.
 	 */
 	to test GPR
 		gen
-			orX_readonly %1, %1, %1
+			mrX_readonly %1, %1
 
 
 STACKINGRULES
 
-	from REG to STACK
+	from SPFP+REG to STACK
 		gen
-			COMMENT("stack REG")
+			COMMENT("stack SPFP+REG")
 			stwu %1, {IND_RC_W, sp, 0-4}
 
-	from ANY_BHW-REG to STACK
+	from INT_W-SPFP-REG to STACK
 		gen
-			COMMENT("stack ANY_BHW-REG")
+			COMMENT("stack INT_W-SPFP-REG")
 			move %1, RSCRATCH
 			stwu RSCRATCH, {IND_RC_W, sp, 0-4}
 
-	from IND_ALL_D to STACK
+	from FLOAT_D-FREG to STACK
 		gen
-			COMMENT("stack IND_ALL_D")
+			COMMENT("stack FLOAT_D-FREG")
 			move %1, FSCRATCH
 			stfdu FSCRATCH, {IND_RC_D, sp, 0-8}
 
@@ -637,15 +807,18 @@ STACKINGRULES
 		gen bug {LABEL, "STACKING DLOCAL"}
 
 
-
 COERCIONS
 
+	/* The unstacking coercions emit many "addi sp, sp, X"
+	 * instructions; the target optimizer (top) will merge them.
+	 */
+
 	from STACK
 		uses REG
 		gen
 			COMMENT("coerce STACK->REG")
 			lwz %a, {IND_RC_W, sp, 0}
-			addi sp, sp, {CONST, 4}
+			addi sp, sp, {C, 4}
 		yields %a
 
 	from STACK
@@ -653,7 +826,7 @@ COERCIONS
 		gen
 			COMMENT("coerce STACK->FREG")
 			lfd %a, {IND_RC_D, sp, 0}
-			addi sp, sp, {CONST, 8}
+			addi sp, sp, {C, 8}
 		yields %a
 
 	from STACK
@@ -661,45 +834,56 @@ COERCIONS
 		gen
 			COMMENT("coerce STACK->FSREG")
 			lfs %a, {IND_RC_W, sp, 0}
-			addi sp, sp, {CONST, 4}
+			addi sp, sp, {C, 4}
 		yields %a
 
-	from ANY_BHW
-		uses REG
-		gen
-			COMMENT("coerce ANY_BHW->REG")
-			move %1, %a
-		yields %a
-
-	/*
-	 * There is no coercion from IND_ALL_D to REG REG, because
-	 * coercions can't allocate registers for intermediate values.
+	/* "uses REG=%1" may find and reuse a register containing the
+	 * same token.  For contrast, "uses REG gen move %1, %a" would
+	 * pick a different register before doing the move.
 	 *
-	 * A coercion to split IND_RC_D into two IND_RC_W, without
-	 * allocating an intermediate register, would yield
-	 *   {IND_RC_W, %1.val, %1.off+4}
-	 * but %1.off+4 might overflow a signed 16-bit integer.
+	 * "reusing %1" helps when coercing an INT_W token like
+	 * {SUM_RC, r3, 0-4} to REG3, by not stacking the token.
 	 */
 
-	from FREG+IND_ALL_D
-		uses FREG
-		gen
-			COMMENT("coerce FREG+IND_ALL_D->FREG")
-			move %1, %a
+	from INT_W
+		uses reusing %1, REG=%1
 		yields %a
 
-	from FSREG+IND_ALL_W
-		uses FSREG
-		gen
-			COMMENT("coerce FSREG+IND_ALL_W->FREG")
-			move %1, %a
+	from FLOAT_D
+		uses reusing %1, FREG=%1
 		yields %a
 
+	from FLOAT_W
+		uses reusing %1, FSREG=%1
+		yields %a
+
+	/* Splitting coercions can't allocate registers.
+	 * PowerPC can't add r0 + constant.  Use r12.
+	 */
+
+	from IND_RC_D %off<=0x7FFA
+		yields
+			{IND_RC_W, %1.reg, %1.off+4}
+			{IND_RC_W, %1.reg, %1.off}
+
+	from IND_RC_D
+		/* Don't move to %1.reg; it might be a regvar. */
+		gen move {SUM_RC, %1.reg, %1.off}, r12
+		yields {IND_RC_W, r12, 4} {IND_RC_W, r12, 0}
+
+	from IND_RR_D
+		gen move {SUM_RR, %1.reg1, %1.reg2}, r12
+		yields {IND_RC_W, r12, 4} {IND_RC_W, r12, 0}
+
+	from FRAME_D %off<=0x7FFA
+		yields
+			{FRAME_W, %1.level, %1.reg, %1.off+4, 4}
+			{FRAME_W, %1.level, %1.reg, %1.off, 4}
 
 
 PATTERNS
 
-/* Intrinsics */
+/* Constants */
 
 	pat loc $1==(0-0x8000)             /* Load constant */
 		yields {CONST_N8000, $1}
@@ -712,76 +896,109 @@ PATTERNS
 	pat loc $1>=0x8001 && $1<=0xFFFF
 		yields {CONST_8001_FFFF, $1}
 	pat loc lo($1)==0
-		yields {CONST_HZ, $1}
+		yields {CONST_HI_ZR, $1}
 	pat loc
-		yields {CONST_HL, $1}
+		yields {CONST_HI_LO, $1}
 
-	pat dup $1==INT32                  /* Duplicate word on top of stack */
-		with REG
-			yields %1 %1
-		with FSREG
+
+/* Stack shuffles */
+
+	/* The peephole optimizer does:  loc $1 ass 4 -> asp $1
+	 * To optimize multiplication, it uses:  dup 8 asp 4
+	 */
+
+	pat asp $1==4                      /* Adjust stack by constant */
+		with exact INT_W+FLOAT_W
+			/* drop %1 */
+		with STACK
+			gen addi sp, sp, {C, 4}
+	pat asp smalls($1)
+		with STACK
+			gen addi sp, sp, {C, $1}
+	pat asp lo($1)==0
+		with STACK
+			gen addi sp, sp, {C, hi($1)}
+	pat asp
+		with STACK
+			gen
+				addis sp, sp, {C, his($1)}
+				addi sp, sp, {C, los($1)}
+
+	pat ass $1==4                      /* Adjust stack by variable */
+		with REG STACK
+			gen add sp, sp, %1
+
+	/* To duplicate a token, we coerce the token into a register,
+	 * then duplicate the register.  This decreases code size.
+	 */
+
+	pat dup $1==4                      /* Duplicate word on top of stack */
+		with REG+FSREG
 			yields %1 %1
 
-	pat dup $1==INT64                  /* Duplicate double-word on top of stack */
-		with REG REG
+	pat dup $1==8                      /* Duplicate double-word */
+		with REG+FSREG REG+FSREG
 			yields %2 %1 %2 %1
 		with FREG
 			yields %1 %1
 
-	pat exg $1==INT32                  /* Exchange top two words on stack */
-		with REG REG
+	pat dup                            /* Duplicate other size */
+		leaving
+			loc $1
+			dus 4
+
+	pat dus $1==4                      /* Duplicate variable size */
+		with REG STACK
+			/* ( a size%1 -- a a ) */
+			uses REG, REG
+			gen
+				srwi %a, %1, {C, 2}
+				mtspr ctr, %a
+				add %b, sp, %1
+			1:	lwzu %a, {IND_RC_W, %b, 0-4}
+				stwu %a, {IND_RC_W, sp, 0-4}
+				bdnz {LABEL, "1b"}
+
+	pat exg $1==4                      /* Exchange top two words */
+		with INT_W+FLOAT_W INT_W+FLOAT_W
 			yields %1 %2
 
-	pat stl lol $1==$2                 /* Store then load local */
+	pat exg defined($1)                /* Exchange other size */
 		leaving
-			dup 4
-			stl $1
+			loc $1
+			cal ".exg"
 
-	pat sdl ldl $1==$2                 /* Store then load double local */
+	pat exg !defined($1)
 		leaving
-			dup 8
-			sdl $1
-
-	pat lal sti lal loi $1==$3 && $2==$4 /* Store then load local, of a different size */
-		leaving
-			dup INT32
-			lal $1
-			sti $2
-
-	pat ste loe $1==$2                 /* Store then load external */
-		leaving
-			dup 4
-			ste $1
+			cal ".exg"
 
 
 /* Type conversions */
 
-	pat loc loc ciu                    /* signed X -> unsigned X */
+	pat loc loc ciu                    /* signed -> unsigned */
 		leaving
 			loc $1
 			loc $2
 			cuu
 
-	pat loc loc cuu $1==$2             /* unsigned X -> unsigned X */
+	pat loc loc cui                    /* unsigned -> signed */
+		leaving
+			loc $1
+			loc $2
+			cuu
+
+	pat loc loc cuu $1<=4 && $2<=4     /* unsigned -> unsigned */
 		/* nop */
 
-	pat loc loc cii $1==$2             /* signed X -> signed X */
-		/* nop */
+	pat loc loc cii $1<=4 && $2<=$1
+		/* signed -> signed of smaller or same size,
+		 * no sign extension */
 
-	pat loc loc cui $1==$2             /* unsigned X -> signed X */
-		/* nop */
-
-	pat loc loc cui $1==INT8 && $2==INT32 /* unsigned char -> signed int */
-		/* nop */
-
-	pat loc loc cui $1==INT16 && $2==INT32 /* unsigned short -> signed int */
-		/* nop */
-
-	pat loc loc cii $1==INT8 && $2==INT32 /* signed char -> signed int */
+	pat loc loc cii $1==1 && $2<=4     /* sign-extend char */
 		with REG
 			yields {SEX_B, %1}
 
-	pat loc loc cii $1==2 && $2==4     /* signed char -> signed short */
+	pat loc loc cii $1==2 && $2<=4     /* sign-extend short */
 		with REG
 			yields {SEX_H, %1}
 
@@ -795,85 +1012,117 @@ PATTERNS
 		uses REG={SUM_RIS, fp, his($1)}
 		yields {SUM_RC, %a, los($1)}
 
+	pat lal loi smalls($1) && $2==1    /* Load byte from local */
+		yields {FRAME_B, 0, fp, $1, 1}
+
+	/* Load half-word from local and sign-extend */
+	pat lal loi loc loc cii smalls($1) && $2==2 && $3==2 && $4==4
+		yields {FRAME_H_S, 0, fp, $1, 1}
+
+	pat lal loi smalls($1) && $2==2    /* Load half-word from local */
+		yields {FRAME_H, 0, fp, $1, 1}
+
 	/* Load word from local */
 	pat lol inreg($1)==reg_any || inreg($1)==reg_float
 		yields {LOCAL, $1}
+	pat lol smalls($1)
+		yields {FRAME_W, 0, fp, $1, 4}
 	pat lol
 		leaving
 			lal $1
 			loi 4
 
-	/* Load double-word from local */
-	pat ldl inreg($1)==reg_float
+	pat ldl inreg($1)==reg_float       /* Load double-word from local */
 		yields {DLOCAL, $1}
+	pat ldl smalls($1) && smalls($1+4)
+		/* smalls($1+4) implies FRAME_D %off<=0xFFFA */
+		yields {FRAME_D, 0, fp, $1, 8}
 	pat ldl
 		leaving
 			lal $1
 			loi 8
 
-	/* Store word to local */
-	pat stl inreg($1)==reg_any
-		with exact ANY_BHW
+	pat lal sti smalls($1) && $2==1    /* Store byte to local */
+		with REG
+			kills IND_V, FRAME_V %level==0 && fover($1, 1)
+			gen move %1, {FRAME_B, 0, fp, $1, 1}
+
+	pat lal sti smalls($1) && $2==2    /* Store half-word to local */
+		with REG
+			kills IND_V, FRAME_V %level==0 && fover($1, 2)
+			gen move %1, {FRAME_H, 0, fp, $1, 2}
+
+	pat stl inreg($1)==reg_any         /* Store word to local */
+		with exact INT_W
 			/* ncg fails to infer that regvar($1) is dead! */
 			kills regvar($1)
-			gen move %1, {GPR_EXPR, regvar($1)}
+			gen move %1, {REG_EXPR, regvar($1)}
 		with STACK
 			gen
 				lwz {LOCAL, $1}, {IND_RC_W, sp, 0}
-				addi sp, sp, {CONST, 4}
+				addi sp, sp, {C, 4}
 	pat stl inreg($1)==reg_float
-		with exact FSREG+IND_ALL_W
+		with exact FLOAT_W
 			kills regvar_w($1, reg_float)
 			gen move %1, {FSREG_EXPR, regvar_w($1, reg_float)}
 		with STACK
 			gen
 				lfs {LOCAL, $1}, {IND_RC_W, sp, 0}
-				addi sp, sp, {CONST, 4}
+				addi sp, sp, {C, 4}
+	pat stl smalls($1)
+		with REG+FSREG
+			kills IND_V, FRAME_V %level==0 && fover($1, 4)
+			gen move %1, {FRAME_W, 0, fp, $1, 4}
 	pat stl
 		leaving
 			lal $1
 			sti 4
 
-	/* Store double-word to local */
-	pat sdl inreg($1)==reg_float
-		with exact FREG+IND_ALL_D
+	pat sdl inreg($1)==reg_float       /* Store double-word to local */
+		with exact FLOAT_D
 			kills regvar_d($1, reg_float)
-			gen move %1, {FPR_EXPR, regvar_d($1, reg_float)}
+			gen move %1, {FREG_EXPR, regvar_d($1, reg_float)}
 		with STACK
 			gen
 				lfd {DLOCAL, $1}, {IND_RC_D, sp, 0}
-				addi sp, sp, {CONST, 8}
+				addi sp, sp, {C, 8}
+	pat sdl smalls($1) && smalls($1+4)
+		with REG REG
+			kills IND_V, FRAME_V %level==0 && fover($1, 8)
+			gen
+				move %1, {FRAME_W, 0, fp, $1, 4}
+				move %2, {FRAME_W, 0, fp, $1+4, 4}
+		with FREG
+			kills IND_V, FRAME_V %level==0 && fover($1, 4)
+			gen move %1, {FRAME_D, 0, fp, $1, 8}
 	pat sdl
 		leaving
 			lal $1
 			sti 8
 
-	/* Load indirect from local */
-	pat lil inreg($1)==reg_any
-		yields {IND_RC_W, regvar($1), 0}
-	pat lil
+	pat lil                            /* Load indirect from local */
 		leaving
 			lol $1
 			loi 4
 
-	pat sil                            /* Save to indirected local */
+	pat sil                            /* Store indirect to local */
 		leaving
 			lol $1
 			sti 4
 
-	pat zrl                             /* Zero local */
+	pat zrl                            /* Zero local */
 		leaving
 			loc 0
 			stl $1
 
-	pat inl                             /* Increment local */
+	pat inl                            /* Increment local */
 		leaving
 			lol $1
 			loc 1
 			adi 4
 			stl $1
 
-	pat del                             /* Decrement local */
+	pat del                            /* Decrement local */
 		leaving
 			lol $1
 			loc 1
@@ -881,9 +1130,86 @@ PATTERNS
 			stl $1
 
 
+/* Local variables of procedures on static chain */
+
+	/* lxa (lexical argument base) -> lxl (lexical local base) */
+	pat lxa adp nicelx($1)
+		leaving lxl $1 adp $2+EM_BSIZE
+	pat lxa lof nicelx($1)
+		leaving lxl $1 lof $2+EM_BSIZE
+	pat lxa ldf nicelx($1)
+		leaving lxl $1 ldf $2+EM_BSIZE
+	pat lxa stf nicelx($1)
+		leaving lxl $1 stf $2+EM_BSIZE
+	pat lxa sdf nicelx($1)
+		leaving lxl $1 stf $2+EM_BSIZE
+	pat lxa nicelx($1)
+		leaving lxl $1 adp EM_BSIZE
+
+	/* Load locals in statically enclosing procedures */
+	pat lxl adp loi nicelx($1) && smalls($2) && $3==1
+		uses REG={LXFRAME, $1}
+		yields {FRAME_B, $1, %a, $2, 1}
+	pat lxl adp loi loc loc cii nicelx($1) && smalls($2) &&
+	                            $3==2 && $4==2 && $5==4
+		uses REG={LXFRAME, $1}
+		yields {FRAME_H_S, $1, %a, $2, 2}
+	pat lxl adp loi nicelx($1) && smalls($2) && $3==2
+		uses REG={LXFRAME, $1}
+		yields {FRAME_H, $1, %a, $2, 2}
+	pat lxl lof nicelx($1) && smalls($2)
+		uses REG={LXFRAME, $1}
+		yields {FRAME_W, $1, %a, $2, 4}
+	pat lxl ldf nicelx($1) && smalls($2) && smalls($2+4)
+		uses REG={LXFRAME, $1}
+		/* smalls($2+4) implies FRAME_D %off<=0xFFFA */
+		yields {FRAME_D, $1, %a, $2, 8}
+
+	/* Store locals in statically enclosing procedures */
+	pat lxl adp sti nicelx($1) && smalls($2) && $3==1
+		with REG
+			kills IND_V, FRAME_V %level==$1 && fover($2, 1)
+			uses REG={LXFRAME, $1}
+			gen move %1, {FRAME_B, $1, %a, $2, 1}
+	pat lxl adp sti nicelx($1) && smalls($2) && $3==2
+		with REG
+			kills IND_V, FRAME_V %level==$1 && fover($2, 2)
+			uses REG={LXFRAME, $1}
+			gen move %1, {FRAME_H, $1, %a, $2, 2}
+	pat lxl stf nicelx($1) && smalls($2)
+		with REG+FSREG
+			kills IND_V, FRAME_V %level==$1 && fover($2, 4)
+			uses REG={LXFRAME, $1}
+			gen move %1, {FRAME_W, $1, %a, $2, 4}
+	pat lxl sdf nicelx($1) && smalls($2) && smalls($2+4)
+		with REG REG
+			kills IND_V, FRAME_V %level==$1 && fover($2, 8)
+			uses REG={LXFRAME, $1}
+			gen
+				move %1, {FRAME_W, $1, %a, $2, 4}
+				move %2, {FRAME_W, $1, %a, $2+4, 4}
+		with FREG
+			kills IND_V, FRAME_V %level==$1 && fover($2, 8)
+			uses REG={LXFRAME, $1}
+			gen move %1, {FRAME_D, $1, %a, $2, 8}
+
+	pat lxl nicelx($1)                 /* Local base on static chain */
+		uses REG={LXFRAME, $1}
+		yields %a  /* Can't yield LXFRAME. */
+	pat lxl stl nicelx($1) && inreg($2)==reg_any
+		kills regvar($2)
+		gen move {LXFRAME, $1}, {REG_EXPR, regvar($2)}
+
+	pat lxl $1==0                      /* Our local base */
+		yields fp
+
+	pat lxa $1==0                      /* Our argument base */
+		yields {SUM_RC, fp, EM_BSIZE}
+
+
 /* Global variables */
 
-	pat lpi                            /* Load address of external function */
+	pat lpi                            /* Load address of function */
 		leaving
 			lae $1
 
@@ -894,35 +1220,35 @@ PATTERNS
 	pat loe                            /* Load word external */
 		leaving
 			lae $1
-			loi INT32
+			loi 4
 
 	pat ste                            /* Store word external */
 		leaving
 			lae $1
-			sti INT32
+			sti 4
 
 	pat lde                            /* Load double-word external */
 		leaving
 			lae $1
-			loi INT64
+			loi 8
 
 	pat sde                            /* Store double-word external */
 		leaving
 			lae $1
-			sti INT64
+			sti 8
 
-	pat zre                             /* Zero external */
+	pat zre                            /* Zero external */
 		leaving
 			loc 0
 			ste $1
 
-	pat ine                             /* Increment external */
+	pat ine                            /* Increment external */
 		leaving
 			loe $1
 			inc
 			ste $1
 
-	pat dee                             /* Decrement external */
+	pat dee                            /* Decrement external */
 		leaving
 			loe $1
 			dec
@@ -934,27 +1260,27 @@ PATTERNS
 	pat lof                            /* Load word offsetted */
 		leaving
 			adp $1
-			loi INT32
+			loi 4
 
 	pat ldf                            /* Load double-word offsetted */
 		leaving
 			adp $1
-			loi INT64
+			loi 8
 
 	pat stf                            /* Store word offsetted */
 		leaving
 			adp $1
-			sti INT32
+			sti 4
 
 	pat sdf                            /* Store double-word offsetted */
 		leaving
 			adp $1
-			sti INT64
+			sti 8
 
 
 /* Loads and stores */
 
-	pat loi $1==INT8                   /* Load byte indirect */
+	pat loi $1==1                      /* Load byte indirect */
 		with REG
 			yields {IND_RC_B, %1, 0}
 		with exact SUM_RC
@@ -964,8 +1290,8 @@ PATTERNS
 		with exact SUM_RR
 			yields {IND_RR_B, %1.reg1, %1.reg2}
 
-	pat loi loc loc cii $1==INT16 && $2==INT16 && $3==INT32
-	/* Load half-word indirect and sign extend */
+	/* Load half-word indirect and sign-extend */
+	pat loi loc loc cii $1==2 && $2==2 && $3==4
 		with REG
 			yields {IND_RC_H_S, %1, 0}
 		with exact SUM_RC
@@ -975,7 +1301,7 @@ PATTERNS
 		with exact SUM_RR
 			yields {IND_RR_H_S, %1.reg1, %1.reg2}
 
-	pat loi $1==INT16                  /* Load half-word indirect */
+	pat loi $1==2                      /* Load half-word indirect */
 		with REG
 			yields {IND_RC_H, %1, 0}
 		with exact SUM_RC
@@ -985,7 +1311,7 @@ PATTERNS
 		with exact SUM_RR
 			yields {IND_RR_H, %1.reg1, %1.reg2}
 
-	pat loi $1==INT32                  /* Load word indirect */
+	pat loi $1==4                      /* Load word indirect */
 		with REG
 			yields {IND_RC_W, %1, 0}
 		with exact SUM_RC
@@ -995,7 +1321,7 @@ PATTERNS
 		with exact SUM_RR
 			yields {IND_RR_W, %1.reg1, %1.reg2}
 
-	pat loi $1==INT64                  /* Load double-word indirect */
+	pat loi $1==8                      /* Load double-word indirect */
 		with REG
 			yields {IND_RC_D, %1, 0}
 		with exact SUM_RC
@@ -1013,10 +1339,9 @@ PATTERNS
 	pat los $1==4                      /* Load arbitrary size */
 		with REG3 STACK
 			kills ALL
-			gen
-				bl {LABEL, ".los4"}
+			gen bl {LABEL, ".los4"}
 
-	pat sti $1==INT8                   /* Store byte indirect */
+	pat sti $1==1                      /* Store byte indirect */
 		with REG REG
 			kills MEMORY
 			gen move %2, {IND_RC_B, %1, 0}
@@ -1030,7 +1355,7 @@ PATTERNS
 			kills MEMORY
 			gen move %2, {IND_RR_B, %1.reg1, %1.reg2}
 
-	pat sti $1==INT16                  /* Store half-word indirect */
+	pat sti $1==2                      /* Store half-word indirect */
 		with REG REG
 			kills MEMORY
 			gen move %2, {IND_RC_H, %1, 0}
@@ -1044,7 +1369,7 @@ PATTERNS
 			kills MEMORY
 			gen move %2, {IND_RR_H, %1.reg1, %1.reg2}
 
-	pat sti $1==INT32                  /* Store word indirect */
+	pat sti $1==4                      /* Store word indirect */
 		with REG REG+FSREG
 			kills MEMORY
 			gen move %2, {IND_RC_W, %1, 0}
@@ -1058,7 +1383,7 @@ PATTERNS
 			kills MEMORY
 			gen move %2, {IND_RR_W, %1.reg1, %1.reg2}
 
-	pat sti $1==INT64                  /* Store double-word indirect */
+	pat sti $1==8                      /* Store double-word indirect */
 		with REG FREG
 			kills MEMORY
 			gen move %2, {IND_RC_D, %1, 0}
@@ -1076,26 +1401,6 @@ PATTERNS
 			gen
 				move %2, {IND_RC_W, %1, 0}
 				move %3, {IND_RC_W, %1, 4}
-		/*
-		 * Next 2 patterns exist because there is no coercion
-		 * from IND_ALL_D to REG REG.
-		 */
-		with REG IND_RC_D
-			kills MEMORY
-			uses REG={SUM_RC, %2.reg, %2.off}, REG, REG
-			gen
-				move {IND_RC_W, %a, 0}, %b
-				move {IND_RC_W, %a, 4}, %c
-				move %b, {IND_RC_W, %1, 0}
-				move %c, {IND_RC_W, %1, 4}
-		with REG IND_RR_D
-			kills MEMORY
-			uses REG={SUM_RR, %2.reg1, %2.reg2}, REG, REG
-			gen
-				move {IND_RC_W, %a, 0}, %b
-				move {IND_RC_W, %a, 4}, %c
-				move %b, {IND_RC_W, %1, 0}
-				move %c, {IND_RC_W, %1, 4}
 
 	pat sti                            /* Store arbitrary size */
 		leaving
@@ -1105,8 +1410,7 @@ PATTERNS
 	pat sts $1==4                      /* Store arbitrary size */
 		with REG3 STACK
 			kills ALL
-			gen
-				bl {LABEL, ".sts4"}
+			gen bl {LABEL, ".sts4"}
 
 
 /* Arithmetic wrappers */
@@ -1151,6 +1455,10 @@ PATTERNS
 
 /* Word arithmetic */
 
+	/* Like most back ends, this one doesn't trap EIOVFL, so it
+	 * ignores overflow in signed integers.
+	 */
+
 	pat adi $1==4                      /* Add word (second + top) */
 		with REG REG
 			yields {SUM_RR, %1, %2}
@@ -1158,113 +1466,87 @@ PATTERNS
 			yields {SUM_RC, %2, %1.val}
 		with REG CONST2
 			yields {SUM_RC, %1, %2.val}
-		with CONST_HZ REG
-			uses reusing %2, REG={SUM_RIS, %2, his(%1.val)}
-			yields %a
-		with REG CONST_HZ
-			uses reusing %1, REG={SUM_RIS, %1, his(%2.val)}
-			yields %a
-		with CONST_STACK-CONST2-CONST_HZ REG
+		with CONST_HI_ZR REG
+			yields {SUM_RIS, %2, his(%1.val)}
+		with REG CONST_HI_ZR
+			yields {SUM_RIS, %1, his(%2.val)}
+		with CONST_STACK-CONST2-CONST_HI_ZR REG
 			uses reusing %2, REG={SUM_RIS, %2, his(%1.val)}
 			yields {SUM_RC, %a, los(%1.val)}
-		with REG CONST_STACK-CONST2-CONST_HZ
+		with REG CONST_STACK-CONST2-CONST_HI_ZR
 			uses reusing %1, REG={SUM_RIS, %1, his(%2.val)}
 			yields {SUM_RC, %a, los(%2.val)}
 
 	pat sbi $1==4                      /* Subtract word (second - top) */
 		with REG REG
-			uses reusing %2, REG
-			gen
-				subf %a, %1, %2
-			yields %a
+			uses reusing %1, reusing %2, REG
+			yields {SUB_RR, %2, %1}
 		with CONST2_WHEN_NEG REG
 			yields {SUM_RC, %2, 0-%1.val}
-		with CONST_HZ REG
-			uses reusing %2, REG={SUM_RIS, %2, his(0-%1.val)}
-			yields %a
-		with CONST_STACK-CONST2_WHEN_NEG-CONST_HZ REG
+		with REG CONST2
+			yields {SUB_CR, %2.val, %1}
+		with CONST_HI_ZR REG
+			yields {SUM_RIS, %2, his(0-%1.val)}
+		with CONST_STACK-CONST2_WHEN_NEG-CONST_HI_ZR REG
 			uses reusing %2, REG={SUM_RIS, %2, his(0-%1.val)}
 			yields {SUM_RC, %a, los(0-%1.val)}
 
 	pat ngi $1==4                      /* Negate word */
 		with REG
-			uses reusing %1, REG
-			gen
-				neg %a, %1
-			yields %a
+			yields {NEG_R, %1}
 
 	pat mli $1==4                      /* Multiply word (second * top) */
+		with CONST2 REG
+			yields {MUL_RC, %2, %1.val}
+		with REG CONST2
+			yields {MUL_RC, %1, %2.val}
 		with REG REG
-			uses reusing %2, REG
-			gen
-				mullw %a, %2, %1
-			yields %a
+			yields {MUL_RR, %2, %1}
 
 	pat dvi $1==4                      /* Divide word (second / top) */
 		with REG REG
-			uses reusing %2, REG
-			gen
-				divw %a, %2, %1
-			yields %a
+			yields {DIV_RR, %2, %1}
 
-	pat dvu $1==4                      /* Divide unsigned word (second / top) */
+	pat dvu $1==4             /* Divide unsigned word (second / top) */
 		with REG REG
-			uses reusing %2, REG
-			gen
-				divwu %a, %2, %1
-			yields %a
+			yields {DIV_RR_U, %2, %1}
+
+	/* To calculate a remainder:  a % b = a - (a / b * b) */
 
 	pat rmi $1==4                      /* Remainder word (second % top) */
 		with REG REG
-			uses REG
-			gen
-				divw %a, %2, %1
-				mullw %a, %a, %1
-				subf %a, %a, %2
-			yields %a
+			uses REG={DIV_RR, %2, %1}, REG
+			gen move {MUL_RR, %a, %1}, %b
+			yields {SUB_RR, %2, %b}
 
-	pat rmu $1==4                      /* Remainder unsigned word (second % top) */
+	pat rmu $1==4             /* Remainder unsigned word (second % top) */
 		with REG REG
-			uses REG
-			gen
-				divwu %a, %2, %1
-				mullw %a, %a, %1
-				subf %a, %a, %2
-			yields %a
+			uses REG={DIV_RR_U, %2, %1}, REG
+			gen move {MUL_RR, %a, %1}, %b
+			yields {SUB_RR, %2, %b}
+
+
+/* Bitwise logic */
+
+	/* This back end doesn't know how to combine shifts and
+	 * bitwise ops to emit rlwinm, rlwnm, or rlwimi instructions.
+	 */
 
 	pat and $1==4                      /* AND word */
 		with REG NOT_R
-			uses reusing %1, REG
-			gen
-				andc %a, %1, %2.reg
-			yields %a
+			yields {ANDC_RR, %1, %2.reg}
 		with NOT_R REG
-			uses reusing %1, REG
-			gen
-				andc %a, %2, %1.reg
-			yields %a
+			yields {ANDC_RR, %2, %1.reg}
 		with REG REG
 			yields {AND_RR, %1, %2}
 		with REG UCONST2
-			uses reusing %1, REG
-			gen
-				andiX %a, %1, {CONST, %2.val}
-			yields %a
+			yields {AND_RC, %1, %2.val}
 		with UCONST2 REG
-			uses reusing %2, REG
-			gen
-				andiX %a, %2, {CONST, %1.val}
-			yields %a
-		with REG CONST_HZ
-			uses reusing %1, REG
-			gen
-				andisX %a, %1, {CONST, hi(%2.val)}
-			yields %a
-		with CONST_HZ REG
-			uses reusing %2, REG
-			gen
-				andisX %a, %2, {CONST, hi(%1.val)}
-			yields %a
+			yields {AND_RC, %2, %1.val}
+		with REG CONST_HI_ZR
+			yields {AND_RIS, %1, hi(%2.val)}
+		with CONST_HI_ZR REG
+			yields {AND_RIS, %2, hi(%1.val)}
 
 	pat and defined($1)                /* AND set */
 		leaving
@@ -1277,31 +1559,23 @@ PATTERNS
 
 	pat ior $1==4                      /* OR word */
 		with REG NOT_R
-			uses reusing %1, REG
-			gen
-				orc %a, %1, %2.reg
-			yields %a
+			yields {ORC_RR, %1, %2.reg}
 		with NOT_R REG
-			uses reusing %2, REG
-			gen
-				orc %a, %2, %1.reg
-			yields %a
+			yields {ORC_RR, %2, %1.reg}
 		with REG REG
 			yields {OR_RR, %1, %2}
 		with REG UCONST2
 			yields {OR_RC, %1, %2.val}
 		with UCONST2 REG
 			yields {OR_RC, %2, %1.val}
-		with REG CONST_HZ
-			uses reusing %1, REG={OR_RIS, %1, hi(%2.val)}
-			yields %a
-		with CONST_HZ REG
-			uses reusing %2, REG={OR_RIS, %2, hi(%1.val)}
-			yields %a
-		with REG CONST_STACK-UCONST2-CONST_HZ
+		with REG CONST_HI_ZR
+			yields {OR_RIS, %1, hi(%2.val)}
+		with CONST_HI_ZR REG
+			yields {OR_RIS, %2, hi(%1.val)}
+		with REG CONST_STACK-UCONST2-CONST_HI_ZR
 			uses reusing %1, REG={OR_RIS, %1, hi(%2.val)}
 			yields {OR_RC, %1, lo(%2.val)}
-		with CONST_STACK-UCONST2-CONST_HZ REG
+		with CONST_STACK-UCONST2-CONST_HI_ZR REG
 			uses reusing %2, REG={OR_RIS, %2, hi(%1.val)}
 			yields {OR_RC, %2, lo(%1.val)}
 
@@ -1322,16 +1596,14 @@ PATTERNS
 			yields {XOR_RC, %1, %2.val}
 		with UCONST2 REG
 			yields {XOR_RC, %2, %1.val}
-		with REG CONST_HZ
-			uses reusing %1, REG={XOR_RIS, %1, hi(%2.val)}
-			yields %a
-		with CONST_HZ REG
-			uses reusing %2, REG={XOR_RIS, %2, hi(%1.val)}
-			yields %a
-		with REG CONST_STACK-UCONST2-CONST_HZ
+		with REG CONST_HI_ZR
+			yields {XOR_RIS, %1, hi(%2.val)}
+		with CONST_HI_ZR REG
+			yields {XOR_RIS, %2, hi(%1.val)}
+		with REG CONST_STACK-UCONST2-CONST_HI_ZR
 			uses reusing %1, REG={XOR_RIS, %1, hi(%2.val)}
 			yields {XOR_RC, %1, lo(%2.val)}
-		with CONST_STACK-UCONST2-CONST_HZ REG
+		with CONST_STACK-UCONST2-CONST_HI_ZR REG
 			uses reusing %2, REG={XOR_RIS, %2, hi(%1.val)}
 			yields {XOR_RC, %2, lo(%1.val)}
 
@@ -1344,22 +1616,13 @@ PATTERNS
 		leaving
 			cal ".xor"
 
-	pat com $1==INT32                  /* NOT word */
-		with AND_RR
-			uses REG
-			gen
-				nand %a, %1.reg1, %1.reg2
-			yields %a
-		with OR_RR
-			uses REG
-			gen
-				nor %a, %1.reg1, %1.reg2
-			yields %a
-		with XOR_RR
-			uses REG
-			gen
-				eqv %a, %1.reg1, %1.reg2
-			yields %a
+	pat com $1==4                      /* NOT word */
+		with exact AND_RR
+			yields {NAND_RR, %1.reg1, %1.reg2}
+		with exact OR_RR
+			yields {NOR_RR, %1.reg1, %1.reg2}
+		with exact XOR_RR
+			yields {EQV_RR, %1.reg1, %1.reg2}
 		with REG
 			yields {NOT_R, %1}
 
@@ -1376,53 +1639,104 @@ PATTERNS
 		leaving
 			loc 0
 
-	pat zer defined($1)	   	           /* Create empty set */
+	pat zer defined($1)                /* Create empty set */
 		leaving
 			loc $1
 			cal ".zer"
 
+
+/* Shifts and rotations */
+
 	pat sli $1==4                      /* Shift left (second << top) */
 		with CONST_STACK REG
 			uses reusing %2, REG
-			gen
-				rlwinm %a, %2, {CONST, (%1.val & 0x1F)}, {CONST, 0}, {CONST, 31-(%1.val & 0x1F)}
+			gen slwi %a, %2, {C, %1.val & 0x1F}
 			yields %a
 		with REG REG
-			uses reusing %2, REG
-			gen
-				slw %a, %2, %1
+			uses reusing %1, reusing %2, REG
+			gen slw %a, %2, %1
 			yields %a
+	pat sli stl $1==4 && inreg($2)==reg_any
+		with CONST_STACK REG
+			gen slwi {LOCAL, $2}, %2, {C, %1.val & 0x1F}
+		with REG REG
+			gen slw {LOCAL, $2}, %2, %1
 
-	pat sri $1==4                      /* Shift right signed (second >> top) */
+	pat sri $1==4               /* Shift right signed (second >> top) */
 		with CONST_STACK REG
 			uses reusing %2, REG
-			gen
-				srawi %a, %2, {CONST, %1.val & 0x1F}
+			gen srawi %a, %2, {C, %1.val & 0x1F}
 			yields %a
 		with REG REG
-			uses reusing %2, REG
-			gen
-				sraw %a, %2, %1
+			uses reusing %1, reusing %2, REG
+			gen sraw %a, %2, %1
 			yields %a
+	pat sri stl $1==4 && inreg($2)==reg_any
+		with CONST_STACK REG
+			gen srawi {LOCAL, $2}, %2, {C, %1.val & 0x1F}
+		with REG REG
+			gen sraw {LOCAL, $2}, %2, %1
 
-	pat sru $1==4                      /* Shift right unsigned (second >> top) */
+	pat sru $1==4               /* Shift right unsigned (second >> top) */
 		with CONST_STACK REG
 			uses reusing %2, REG
-			gen
-				rlwinm %a, %2, {CONST, 32-(%1.val & 0x1F)}, {CONST, (%1.val & 0x1F)}, {CONST, 31}
+			gen srwi %a, %2, {C, %1.val & 0x1F}
 			yields %a
 		with REG REG
-			uses reusing %2, REG
-			gen
-				srw %a, %2, %1
+			uses reusing %1, reusing %2, REG
+			gen srw %a, %2, %1
 			yields %a
+	pat sru stl $1==4 && inreg($2)==reg_any
+		with CONST_STACK REG
+			gen srwi {LOCAL, $2}, %2, {C, %1.val & 0x1F}
+		with REG REG
+			gen srw {LOCAL, $2}, %2, %1
+
+	pat rol $1==4                      /* Rotate left word */
+		with CONST_STACK REG
+			uses reusing %2, REG
+			gen rotlwi %a, %2, {C, %1.val & 0x1F}
+			yields %a
+		with REG REG
+			uses reusing %1, reusing %2, REG
+			gen rotlw %a, %2, %1
+			yields %a
+	pat rol stl $1==4 && inreg($2)==reg_any
+		with CONST_STACK REG
+			gen rotlwi {LOCAL, $2}, %2, {C, %1.val & 0x1F}
+		with REG REG
+			gen rotlw {LOCAL, $2}, %2, %1
+
+	/*
+	 * ror 4 -> ngi 4, rol 4
+	 *   because to rotate right by n bits is to rotate left by
+	 *   (32 - n), which is to rotate left by -n.  PowerPC rotlw
+	 *   handles -n as (-n & 0x1F).
+	 */
+
+	pat ror $1==4                      /* Rotate right word */
+		with CONST_STACK REG
+			uses reusing %2, REG
+			gen rotrwi %a, %2, {C, %1.val & 0x1F}
+			yields %a
+		with /* anything */
+			leaving
+				ngi 4
+				rol 4
+	pat ror stl $1==4 && inreg($2)==reg_any
+		with CONST_STACK REG
+			gen rotrwi {LOCAL, $2}, %2, {C, %1.val & 0x1F}
+		with /* anything */
+			leaving
+				ngi 4
+				rol 4
+				stl $2
 
 
 /* Arrays */
 
 	pat aar $1==4                      /* Address of array element */
-		leaving
-			cal ".aar4"
+		leaving cal ".aar4"
 
 	pat lar $1==4                      /* Load from array */
 		with STACK
@@ -1483,8 +1797,7 @@ PATTERNS
 			gen
 				test %1
 				mfcr %a
-				move {XEQ, %a}, %a
-			yields %a
+			yields {XEQ, %a}
 
 	pat tne                            /* top = (top != 0) */
 		with REG
@@ -1492,8 +1805,7 @@ PATTERNS
 			gen
 				test %1
 				mfcr %a
-				move {XNE, %a}, %a
-			yields %a
+			yields {XNE, %a}
 
 	pat tlt                            /* top = (top < 0) */
 		with REG
@@ -1501,8 +1813,7 @@ PATTERNS
 			gen
 				test %1
 				mfcr %a
-				move {XLT, %a}, %a
-			yields %a
+			yields {XLT, %a}
 
 	pat tle                            /* top = (top <= 0) */
 		with REG
@@ -1510,8 +1821,7 @@ PATTERNS
 			gen
 				test %1
 				mfcr %a
-				move {XLE, %a}, %a
-			yields %a
+			yields {XLE, %a}
 
 	pat tgt                            /* top = (top > 0) */
 		with REG
@@ -1519,8 +1829,7 @@ PATTERNS
 			gen
 				test %1
 				mfcr %a
-				move {XGT, %a}, %a
-			yields %a
+			yields {XGT, %a}
 
 	pat tge                            /* top = (top >= 0) */
 		with REG
@@ -1528,176 +1837,139 @@ PATTERNS
 			gen
 				test %1
 				mfcr %a
-				move {XGE, %a}, %a
-			yields %a
+			yields {XGE, %a}
 
 	pat cmi teq $1==4                  /* Signed second == top */
 		with REG CONST2
 			uses reusing %1, REG={COND_RC, %1, %2.val}
-			gen move {XEQ, %a}, %a
-			yields %a
+			yields {XEQ, %a}
 		with CONST2 REG
-			uses reusing %1, REG={COND_RC, %2, %1.val}
-			gen move {XEQ, %a}, %a
-			yields %a
+			uses reusing %2, REG={COND_RC, %2, %1.val}
+			yields {XEQ, %a}
 		with REG REG
-			uses reusing %1, REG={COND_RR, %2, %1}
-			gen move {XEQ, %a}, %a
-			yields %a
+			uses reusing %1, reusing %2, REG={COND_RR, %2, %1}
+			yields {XEQ, %a}
 
 	pat cmi tne $1==4                  /* Signed second != top */
 		with REG CONST2
 			uses reusing %1, REG={COND_RC, %1, %2.val}
-			gen move {XNE, %a}, %a
-			yields %a
+			yields {XNE, %a}
 		with CONST2 REG
-			uses reusing %1, REG={COND_RC, %2, %1.val}
-			gen move {XNE, %a}, %a
-			yields %a
+			uses reusing %2, REG={COND_RC, %2, %1.val}
+			yields {XNE, %a}
 		with REG REG
-			uses reusing %1, REG={COND_RR, %2, %1}
-			gen move {XNE, %a}, %a
-			yields %a
+			uses reusing %1, reusing %2, REG={COND_RR, %2, %1}
+			yields {XNE, %a}
 
 	pat cmi tgt $1==4                  /* Signed second > top */
 		with REG CONST2
 			uses reusing %1, REG={COND_RC, %1, %2.val}
-			gen move {XLT, %a}, %a
-			yields %a
+			yields {XLT, %a}
 		with CONST2 REG
-			uses reusing %1, REG={COND_RC, %2, %1.val}
-			gen move {XGT, %a}, %a
-			yields %a
+			uses reusing %2, REG={COND_RC, %2, %1.val}
+			yields {XGT, %a}
 		with REG REG
-			uses reusing %1, REG={COND_RR, %2, %1}
-			gen move {XGT, %a}, %a
-			yields %a
+			uses reusing %1, reusing %2, REG={COND_RR, %2, %1}
+			yields {XGT, %a}
 
 	pat cmi tge $1==4                  /* Signed second >= top */
 		with REG CONST2
 			uses reusing %1, REG={COND_RC, %1, %2.val}
-			gen move {XLE, %a}, %a
-			yields %a
+			yields {XLE, %a}
 		with CONST2 REG
-			uses reusing %1, REG={COND_RC, %2, %1.val}
-			gen move {XGE, %a}, %a
-			yields %a
+			uses reusing %2, REG={COND_RC, %2, %1.val}
+			yields {XGE, %a}
 		with REG REG
-			uses reusing %1, REG={COND_RR, %2, %1}
-			gen move {XGE, %a}, %a
-			yields %a
+			uses reusing %1, reusing %2, REG={COND_RR, %2, %1}
+			yields {XGE, %a}
 
 	pat cmi tlt $1==4                  /* Signed second < top */
 		with REG CONST2
 			uses reusing %1, REG={COND_RC, %1, %2.val}
-			gen move {XGT, %a}, %a
-			yields %a
+			yields {XGT, %a}
 		with CONST2 REG
-			uses reusing %1, REG={COND_RC, %2, %1.val}
-			gen move {XLT, %a}, %a
-			yields %a
+			uses reusing %2, REG={COND_RC, %2, %1.val}
+			yields {XLT, %a}
 		with REG REG
-			uses reusing %1, REG={COND_RR, %2, %1}
-			gen move {XLT, %a}, %a
-			yields %a
+			uses reusing %1, reusing %2, REG={COND_RR, %2, %1}
+			yields {XLT, %a}
 
 	pat cmi tle $1==4                  /* Signed second <= top */
 		with REG CONST2
 			uses reusing %1, REG={COND_RC, %1, %2.val}
-			gen move {XGE, %a}, %a
-			yields %a
+			yields {XGE, %a}
 		with CONST2 REG
-			uses reusing %1, REG={COND_RC, %2, %1.val}
-			gen move {XLE, %a}, %a
-			yields %a
+			uses reusing %2, REG={COND_RC, %2, %1.val}
+			yields {XLE, %a}
 		with REG REG
-			uses reusing %1, REG={COND_RR, %2, %1}
-			gen move {XLE, %a}, %a
-			yields %a
+			uses reusing %1, reusing %2, REG={COND_RR, %2, %1}
+			yields {XLE, %a}
 
 	pat cmu teq $1==4                  /* Unsigned second == top */
 		with REG UCONST2
 			uses reusing %1, REG={CONDL_RC, %1, %2.val}
-			gen move {XEQ, %a}, %a
-			yields %a
+			yields {XEQ, %a}
 		with UCONST2 REG
-			uses reusing %1, REG={CONDL_RC, %2, %1.val}
-			gen move {XEQ, %a}, %a
-			yields %a
+			uses reusing %2, REG={CONDL_RC, %2, %1.val}
+			yields {XEQ, %a}
 		with REG REG
-			uses reusing %1, REG={CONDL_RR, %2, %1}
-			gen move {XEQ, %a}, %a
-			yields %a
+			uses reusing %1, reusing %2, REG={CONDL_RR, %2, %1}
+			yields {XEQ, %a}
 
 	pat cmu tne $1==4                  /* Unsigned second != top */
 		with REG UCONST2
 			uses reusing %1, REG={CONDL_RC, %1, %2.val}
-			gen move {XNE, %a}, %a
-			yields %a
+			yields {XNE, %a}
 		with UCONST2 REG
-			uses reusing %1, REG={CONDL_RC, %2, %1.val}
-			gen move {XNE, %a}, %a
-			yields %a
+			uses reusing %2, REG={CONDL_RC, %2, %1.val}
+			yields {XNE, %a}
 		with REG REG
-			uses reusing %1, REG={CONDL_RR, %2, %1}
-			gen move {XNE, %a}, %a
-			yields %a
+			uses reusing %1, reusing %2, REG={CONDL_RR, %2, %1}
+			yields {XNE, %a}
 
 	pat cmu tgt $1==4                  /* Unsigned second > top */
 		with REG UCONST2
 			uses reusing %1, REG={CONDL_RC, %1, %2.val}
-			gen move {XLT, %a}, %a
-			yields %a
+			yields {XLT, %a}
 		with UCONST2 REG
-			uses reusing %1, REG={CONDL_RC, %2, %1.val}
-			gen move {XGT, %a}, %a
-			yields %a
+			uses reusing %2, REG={CONDL_RC, %2, %1.val}
+			yields {XGT, %a}
 		with REG REG
-			uses reusing %1, REG={CONDL_RR, %2, %1}
-			gen move {XGT, %a}, %a
-			yields %a
+			uses reusing %1, reusing %2, REG={CONDL_RR, %2, %1}
+			yields {XGT, %a}
 
 	pat cmu tge $1==4                  /* Unsigned second >= top */
 		with REG UCONST2
 			uses reusing %1, REG={CONDL_RC, %1, %2.val}
-			gen move {XLE, %a}, %a
-			yields %a
+			yields {XLE, %a}
 		with UCONST2 REG
-			uses reusing %1, REG={CONDL_RC, %2, %1.val}
-			gen move {XGE, %a}, %a
-			yields %a
+			uses reusing %2, REG={CONDL_RC, %2, %1.val}
+			yields {XGE, %a}
 		with REG REG
-			uses reusing %1, REG={CONDL_RR, %2, %1}
-			gen move {XGE, %a}, %a
-			yields %a
+			uses reusing %1, reusing %2, REG={CONDL_RR, %2, %1}
+			yields {XGE, %a}
 
 	pat cmu tlt $1==4                  /* Unsigned second < top */
 		with REG UCONST2
 			uses reusing %1, REG={CONDL_RC, %1, %2.val}
-			gen move {XGT, %a}, %a
-			yields %a
+			yields {XGT, %a}
 		with UCONST2 REG
-			uses reusing %1, REG={CONDL_RC, %2, %1.val}
-			gen move {XLT, %a}, %a
-			yields %a
+			uses reusing %2, REG={CONDL_RC, %2, %1.val}
+			yields {XLT, %a}
 		with REG REG
-			uses reusing %1, REG={CONDL_RR, %2, %1}
-			gen move {XLT, %a}, %a
-			yields %a
+			uses reusing %1, reusing %2, REG={CONDL_RR, %2, %1}
+			yields {XLT, %a}
 
 	pat cmu tle $1==4                  /* Unsigned second <= top */
 		with REG UCONST2
 			uses reusing %1, REG={CONDL_RC, %1, %2.val}
-			gen move {XGE, %a}, %a
-			yields %a
+			yields {XGE, %a}
 		with UCONST2 REG
-			uses reusing %1, REG={CONDL_RC, %2, %1.val}
-			gen move {XLE, %a}, %a
-			yields %a
+			uses reusing %2, REG={CONDL_RC, %2, %1.val}
+			yields {XLE, %a}
 		with REG REG
-			uses reusing %1, REG={CONDL_RR, %2, %1}
-			gen move {XLE, %a}, %a
-			yields %a
+			uses reusing %1, reusing %2, REG={CONDL_RR, %2, %1}
+			yields {XLE, %a}
 
 
 /* Simple branches */
@@ -1724,11 +1996,11 @@ PATTERNS
 	proc bxx example beq
 		with REG CONST2 STACK
 			gen
-				cmpwi %1, {CONST, %2.val}
+				cmpwi %1, %2
 				bxx[2] {LABEL, $1}
 		with CONST2 REG STACK
 			gen
-				cmpwi %2, {CONST, %1.val}
+				cmpwi %2, %1
 				bxx[1] {LABEL, $1}
 		with REG REG STACK
 			gen
@@ -1746,11 +2018,11 @@ PATTERNS
 	proc cmu4zxx example cmu zeq
 		with REG CONST2 STACK
 			gen
-				cmplwi %1, {CONST, %2.val}
+				cmplwi %1, %2
 				bxx[2] {LABEL, $2}
 		with CONST2 REG STACK
 			gen
-				cmplwi %2, {CONST, %1.val}
+				cmplwi %2, %1
 				bxx[1] {LABEL, $2}
 		with REG REG STACK
 			gen
@@ -1776,41 +2048,41 @@ PATTERNS
 	 * puts gt in the sign bit, to reverse the comparison.
 	 */
 
-	pat cmi $1==INT32                  /* Signed tristate compare */
+	pat cmi $1==4                      /* Signed tristate compare */
 		with REG CONST2
 			uses reusing %1, REG={COND_RC, %1, %2.val}
-			gen rlwinm %a, %a, {CONST, 1}, {CONST, 31}, {CONST, 0}
+			gen rlwinm %a, %a, {C, 1}, {C, 31}, {C, 0}
 			yields %a
 		with CONST2 REG
 			uses reusing %2, REG={COND_RC, %2, %1.val}
-			gen extlwi %a, %a, {CONST, 2}, {CONST, 0}
+			gen extlwi %a, %a, {C, 2}, {C, 0}
 			yields %a
 		with REG REG
-			uses reusing %1, REG={COND_RR, %2, %1}
-			gen extlwi %a, %a, {CONST, 2}, {CONST, 0}
+			uses reusing %1, reusing %2, REG={COND_RR, %2, %1}
+			gen extlwi %a, %a, {C, 2}, {C, 0}
 			yields %a
 
-	pat cmu $1==INT32                  /* Unsigned tristate compare */
+	pat cmu $1==4                      /* Unsigned tristate compare */
 		with REG UCONST2
 			uses reusing %1, REG={CONDL_RC, %1, %2.val}
-			gen rlwinm %a, %a, {CONST, 1}, {CONST, 31}, {CONST, 0}
+			gen rlwinm %a, %a, {C, 1}, {C, 31}, {C, 0}
 			yields %a
 		with UCONST2 REG
 			uses reusing %2, REG={CONDL_RC, %2, %1.val}
-			gen extlwi %a, %a, {CONST, 2}, {CONST, 0}
+			gen extlwi %a, %a, {C, 2}, {C, 0}
 			yields %a
 		with REG REG
-			uses reusing %1, REG={CONDL_RR, %2, %1}
-			gen extlwi %a, %a, {CONST, 2}, {CONST, 0}
+			uses reusing %1, reusing %2, REG={CONDL_RR, %2, %1}
+			gen extlwi %a, %a, {C, 2}, {C, 0}
 			yields %a
 
 	pat cmp                            /* Compare pointers */
 		leaving
-			cmu INT32
+			cmu 4
 
-	pat cms $1==INT32                  /* Compare blocks (word sized) */
+	pat cms $1==4                      /* Compare blocks (word sized) */
 		leaving
-			cmi INT32
+			cmi 4
 
 	pat cms defined($1)
 		leaving
@@ -1824,34 +2096,32 @@ PATTERNS
 
 /* Other branching and labelling */
 
+	/* During an unconditional jump, if the top element on the
+	 * stack has 4 bytes, then we hold it in register r3.
+	 */
 	pat lab topeltsize($1)==4 && !fallthrough($1)
 		kills ALL
-		gen
-			labeldef $1
-			yields r3
+		gen labeldef $1
+		yields r3
 
 	pat lab topeltsize($1)==4 && fallthrough($1)
 		with REG3 STACK
-		kills ALL
-		gen
-			labeldef $1
-		yields r3
+			kills ALL
+			gen labeldef $1
+			yields r3
 
-	pat lab topeltsize($1)!=4
+	pat lab topeltsize($1)!=4          /* Label without r3 */
 		with STACK
-		kills ALL
-		gen
-			labeldef $1
+			kills ALL
+			gen labeldef $1
 
-	pat bra topeltsize($1)==4          /* Unconditional jump with TOS GPRister */
+	pat bra topeltsize($1)==4          /* Branch with r3 */
 		with REG3 STACK
-		gen
-			b {LABEL, $1}
+			gen b {LABEL, $1}
 
-	pat bra topeltsize($1)!=4          /* Unconditional jump without TOS GPRister */
+	pat bra topeltsize($1)!=4          /* Branch without r3 */
 		with STACK
-		gen
-			b {LABEL, $1}
+			gen b {LABEL, $1}
 
 
 /* Miscellaneous */
@@ -1859,8 +2129,7 @@ PATTERNS
 	pat cal                            /* Call procedure */
 		with STACK
 			kills ALL
-			gen
-				bl {LABEL, $1}
+			gen bl {LABEL, $1}
 
 	pat cai                            /* Call procedure indirect */
 		with REG STACK
@@ -1869,10 +2138,10 @@ PATTERNS
 				mtspr ctr, %1
 				bctrl.
 
-	pat lfr $1==INT32                  /* Load function result, word */
+	pat lfr $1==4                      /* Load function result, word */
 		yields r3
 
-	pat lfr $1==INT64                  /* Load function result, double-word */
+	pat lfr $1==8               /* Load function result, double-word */
 		yields r4 r3
 
 	pat ret $1==0                      /* Return from procedure */
@@ -1884,18 +2153,25 @@ PATTERNS
 			mtspr lr, r0
 			lwz r0, {IND_RC_W, fp, 0}
 			/* Free our stack frame. */
-			addi sp, fp, {CONST, 8}
+			addi sp, fp, {C, 8}
 			mr fp, r0
 			blr.
 
+	/* If "ret" coerces STACK to REG3, then top will delete the
+	 * extra "addi sp, sp, 4".
+	 */
+
 	pat ret $1==4                      /* Return from procedure, word */
 		with REG3
 			leaving ret 0
 
 	pat ret $1==8                      /* Return from proc, double-word */
-		with REG3 REG
+		with REG3 INT_W
 			gen move %2, r4
 			leaving ret 0
+		with REG3 STACK
+			gen lwz r4, {IND_RC_W, sp, 0}
+			leaving ret 0
 
 	/*
 	 * These rules for blm/bls are wrong if length is zero.
@@ -1908,14 +2184,15 @@ PATTERNS
 			bls
 
 	pat bls                            /* Block move variable length */
-		with REG REG REG
+		with REG SPFP+REG SPFP+REG
+			/* allows sp as %2, %3 */
 			/* ( src%3 dst%2 len%1 -- ) */
 			uses reusing %1, REG, REG, REG
 			gen
-				srwi %a, %1, {CONST, 2}
+				srwi %a, %1, {C, 2}
 				mtspr ctr, %a
-				addi %b, %3, {CONST, 0-4}
-				addi %c, %2, {CONST, 0-4}
+				addi %b, %3, {C, 0-4}
+				addi %c, %2, {C, 0-4}
 			1:	lwzu %a, {IND_RC_W, %b, 4}
 				stwu %a, {IND_RC_W, %c, 4}
 				bdnz {LABEL, "1b"}
@@ -1923,14 +2200,12 @@ PATTERNS
 	pat csa                            /* Array-lookup switch */
 		with STACK
 			kills ALL
-			gen
-				b {LABEL, ".csa"}
+			gen b {LABEL, ".csa"}
 
 	pat csb                            /* Table-lookup switch */
 		with STACK
 			kills ALL
-			gen
-				b {LABEL, ".csb"}
+			gen b {LABEL, ".csb"}
 
 
 /* EM specials */
@@ -1946,151 +2221,116 @@ PATTERNS
 			ste "hol0"
 
 	pat lni                            /* Increment line number */
-		leaving
-			ine "hol0"
+		leaving ine "hol0"
 
 	pat lim                            /* Load EM trap ignore mask */
-		leaving
-			lde ".ignmask"
+		leaving loe ".ignmask"
 
 	pat sim                            /* Store EM trap ignore mask */
+		leaving ste ".ignmask"
+
+	pat sig                            /* Set trap handler, yield old */
 		leaving
-			ste ".ignmask"
+			loe ".trppc"
+			exg 4
+			ste ".trppc"
 
 	pat trp                            /* Raise EM trap */
 		with REG3
 			kills ALL
-			gen
-				bl {LABEL, ".trap"}
-
-	pat sig                            /* Set trap handler */
-		leaving
-			ste ".trppc"
+			gen bl {LABEL, ".trp"}
 
 	pat rtt                            /* Return from trap */
-		leaving
-			ret 0
+		leaving ret 0
 
-	/*
-	 * Lexical local base: lxl 0 yields our fp, lxl n yields the
-	 * fp of the nth statically enclosing procedure.
+	pat rck $1==4                      /* Range check */
+		leaving cal ".rck"
+
+	/* Our caller's local base, "lxl 0 dch", appears in
+	 * lang/cem/libcc.ansi/setjmp/setjmp.e, lang/m2/libm2/par_misc.e
 	 */
-	pat lxl $1==0
-		leaving
-			lor 0
-	pat lxl $1==1
-		yields {IND_RC_W, fp, SL_OFFSET}
-	pat lxl $1==2
-		uses REG={IND_RC_W, fp, SL_OFFSET}
-		yields {IND_RC_W, %a, SL_OFFSET}
-	pat lxl $1==3
-		uses REG={IND_RC_W, fp, SL_OFFSET}, reusing %a, REG
-		gen move {IND_RC_W, %a, SL_OFFSET}, %b
-		yields {IND_RC_W, %b, SL_OFFSET}
-	pat lxl $1>=4 && $1<=0x8000
-		uses REG={IND_RC_W, fp, SL_OFFSET},
-		     REG={CONST_0000_7FFF, $1-1}
-		gen
-			mtspr ctr, %b
-		1:	lwz %a, {IND_RC_W, %a, SL_OFFSET}
-			bdnz {LABEL, "1b"}
-		yields %a
+	pat lxl dch $1==0
+		yields {IND_RC_W, fp, FP_OFFSET}
 
 	pat dch               /* Dynamic chain: LB -> caller's LB */
 		with REG
 			yields {IND_RC_W, %1, FP_OFFSET}
 
 	pat lpb                            /* LB -> argument base */
-		leaving
-			adp EM_BSIZE
-
-	pat lxa                            /* Lexical argument base */
-		leaving
-			lxl $1
-			lpb
+		leaving adp EM_BSIZE
 
+	/* "gto" must preserve the function result for "lfr", so
+	 * longjmp() can pass the return value to setjmp().
+	 *  - See lang/cem/libcc.ansi/setjmp/setjmp.e
+	 *
+	 * Must preserve r3 and r4, so no "uses REG".
+	 * PowerPC can't add r0 + constant.  Use r12.
+	 */
 	pat gto                            /* longjmp */
 		with STACK
-			uses REG
 			gen
-				move {LABEL, $1}, %a
-				move {IND_RC_W, %a, 8}, fp
-				move {IND_RC_W, %a, 4}, sp
-				move {IND_RC_W, %a, 0}, %a
-				mtspr ctr, %a
+				move {LABEL, $1}, r12
+				move {IND_RC_W, r12, 8}, fp
+				move {IND_RC_W, r12, 4}, sp
+				move {IND_RC_W, r12, 0}, r12
+				mtspr ctr, r12
 				bctr.
 
 	pat lor $1==0                      /* Load local base */
-		uses REG
-		gen
-			move fp, %a
-		yields %a
+		leaving lxl 0
 
 	pat lor $1==1                      /* Load stack pointer */
-		uses REG
-		gen
-			move sp, %a
-		yields %a
+		with STACK
+			yields sp
+
+	/* Next few patterns for "lor 1" appear in
+	 * lang/m2/libm2/par_misc.e
+	 */
+	pat lor adp $1==1 && smalls($2)    /* sp + constant */
+		with STACK
+			yields {SUM_RC, sp, $2}
+
+	/* Subtract stack pointer by doing %1 - (sp - 4)
+	 * because sp - 4 would point to %1.
+	 */
+	pat lor sbs loc adu $1==1 && $2==4 && $4==4
+		with REG STACK
+			uses reusing %1, REG
+			gen subf %a, sp, %1
+			yields %a
+			leaving loc $3+4 adu 4
+	pat lor sbs $1==1 && $2==4
+		with REG STACK
+			uses reusing %1, REG
+			gen subf %a, sp, %1
+			yields {SUM_RC, %a, 4}
 
 	pat str $1==0                      /* Store local base */
-		with REG
-			gen
-				move %1, fp
-
-	pat str $1==1                      /* Store stack pointer */
-		with REG
-			gen
-				move %1, sp
-
-	pat loc ass $1==4 && $2==4         /* Drop 4 bytes from stack */
-		with exact REG
-			/* nop */
+		with INT_W
+			gen move %1, fp
 		with STACK
 			gen
-				addi sp, sp, {CONST, 4}
+				lwz fp, {IND_RC_W, sp, 0}
+				addi sp, sp, {C, 4}
 
-	pat ass $1==4                      /* Adjust stack by variable amount */
-		with CONST2 STACK
-			gen
-				move {SUM_RC, sp, %1.val}, sp
-		with CONST_HZ STACK
-			gen
-				move {SUM_RC, sp, his(%1.val)}, sp
-		with CONST_STACK-CONST2-CONST_HZ STACK
-			gen
-				move {SUM_RC, sp, his(%1.val)}, sp
-				move {SUM_RC, sp, los(%1.val)}, sp
-		with REG STACK
-			gen
-				move {SUM_RR, sp, %1}, sp
-
-	pat asp                            /* Adjust stack by constant amount */
-		leaving
-			loc $1
-			ass 4
-
-	pat lae rck $2==4                  /* Range check */
-		with REG
+	pat str $1==1                      /* Store stack pointer */
+		with INT_W
 			kills ALL
-			gen
-				cmpwi %1, {CONST, rom($1, 1)}
-				blt {LABEL, ".trap_erange"}
-				cmpwi %1, {CONST, rom($1, 2)}
-				bgt {LABEL, ".trap_erange"}
-			yields %1
+			gen move %1, sp
+		with STACK
+			kills ALL
+			gen lwz sp, {IND_RC_W, sp, 0}
 
 
 /* Single-precision floating-point */
 
-	pat zrf $1==INT32                  /* Push zero */
-		leaving
-			loe ".fs_00000000"
+	pat zrf $1==4                      /* Push zero */
+		leaving loe ".fs_00000000"
 
 	pat adf $1==4                      /* Add single */
 		with FSREG FSREG
-			uses reusing %1, FSREG
-			gen
-				fadds %a, %2, %1
+			uses reusing %1, reusing %2, FSREG
+			gen fadds %a, %2, %1
 			yields %a
 	pat adf stl $1==4 && inreg($2)==reg_float
 		with FSREG FSREG
@@ -2098,9 +2338,8 @@ PATTERNS
 
 	pat sbf $1==4                      /* Subtract single */
 		with FSREG FSREG
-			uses reusing %1, FSREG
-			gen
-				fsubs %a, %2, %1
+			uses reusing %1, reusing %2, FSREG
+			gen fsubs %a, %2, %1
 			yields %a
 	pat sbf stl $1==4 && inreg($2)==reg_float
 		with FSREG FSREG
@@ -2108,79 +2347,79 @@ PATTERNS
 
 	pat mlf $1==4                      /* Multiply single */
 		with FSREG FSREG
-			uses reusing %1, FSREG
-			gen
-				fmuls %a, %2, %1
+			uses reusing %1, reusing %2, FSREG
+			gen fmuls %a, %2, %1
 			yields %a
 	pat mlf stl $1==4 && inreg($2)==reg_float
 		with FSREG FSREG
 			gen fmuls {LOCAL, $2}, %2, %1
 
-	pat dvf $1==INT32                  /* Divide single */
+	pat dvf $1==4                      /* Divide single */
 		with FSREG FSREG
-			uses reusing %1, FSREG
-			gen
-				fdivs %a, %2, %1
+			uses reusing %1, reusing %2, FSREG
+			gen fdivs %a, %2, %1
 			yields %a
 	pat dvf stl $1==4 && inreg($2)==reg_float
 		with FSREG FSREG
 			gen fdivs {LOCAL, $2}, %2, %1
 
-	pat ngf $1==INT32                  /* Negate single */
+	pat ngf $1==4                      /* Negate single */
 		with FSREG
 			uses reusing %1, FSREG
-			gen
-				fneg %a, %1
+			gen fneg %a, %1
 			yields %a
 	pat ngf stl $1==4 && inreg($2)==reg_float
 		with FSREG
 			gen fneg {LOCAL, $2}, %1
 
-	pat cmf $1==INT32                  /* Compare single */
+	/* When a or b is NaN, then a < b, a <= b, a > b, a >= b
+	 * should all be false.  We can't make them false, because
+	 *  - EM's _cmf_ is only for ordered comparisons.
+	 *  - The peephole optimizer assumes (a < b) == !(a >= b).
+	 *
+	 * We do make a == b false and a != b true, by checking the
+	 * eq (equal) bit or un (unordered) bit in cr0.
+	 */
+
+	pat cmf $1==4                      /* Compare single */
 		with FSREG FSREG
 			uses REG={COND_FS, %2, %1}
-			gen extlwi %a, %a, {CONST, 2}, {CONST, 0}
+			/* Extract lt, gt, un; put lt in sign bit. */
+			gen andisX %a, %a, {C, 0xd000}
 			yields %a
 
 	pat cmf teq $1==4                  /* Single second == top */
 		with FSREG FSREG
 			uses REG={COND_FS, %2, %1}
-			gen move {XEQ, %a}, %a
-			yields %a
+			yields {XEQ, %a}
 
 	pat cmf tne $1==4                  /* Single second == top */
 		with FSREG FSREG
 			uses REG={COND_FS, %2, %1}
-			gen move {XNE, %a}, %a
-			yields %a
+			yields {XNE, %a}
 
 	pat cmf tgt $1==4                  /* Single second > top */
 		with FSREG FSREG
 			uses REG={COND_FS, %2, %1}
-			gen move {XGT, %a}, %a
-			yields %a
+			yields {XGT, %a}
 
 	pat cmf tge $1==4                  /* Single second >= top */
 		with FSREG FSREG
 			uses REG={COND_FS, %2, %1}
-			gen move {XGE, %a}, %a
-			yields %a
+			yields {XGE, %a}
 
 	pat cmf tlt $1==4                  /* Single second < top */
 		with FSREG FSREG
 			uses REG={COND_FS, %2, %1}
-			gen move {XLT, %a}, %a
-			yields %a
+			yields {XLT, %a}
 
 	pat cmf tle $1==4                  /* Single second <= top */
 		with FSREG FSREG
 			uses REG={COND_FS, %2, %1}
-			gen move {XLE, %a}, %a
-			yields %a
+			yields {XLE, %a}
 
 	proc cmf4zxx example cmf zeq
 		with FSREG FSREG STACK
-			uses REG
 			gen
 				fcmpo cr0, %2, %1
 				bxx* {LABEL, $2}
@@ -2193,12 +2432,11 @@ PATTERNS
 	pat cmf zlt $1==4    call cmf4zxx("blt")
 	pat cmf zle $1==4    call cmf4zxx("ble")
 
-	pat loc loc cff $1==INT32 && $2==INT64 /* Convert single to double */
+	pat loc loc cff $1==4 && $2==8     /* Convert single to double */
 		with FSREG
 			yields %1.1
 
-	/* Convert single to signed int */
-	pat loc loc cfi $1==4 && $2==4
+	pat loc loc cfi $1==4 && $2==4     /* Single to signed int */
 		leaving
 			loc 4
 			loc 8
@@ -2207,8 +2445,7 @@ PATTERNS
 			loc 4
 			cfi
 
-	/* Convert single to unsigned int */
-	pat loc loc cfu $1==4 && $2==4
+	pat loc loc cfu $1==4 && $2==4     /* Single to unsigned int */
 		leaving
 			loc 4
 			loc 8
@@ -2217,8 +2454,7 @@ PATTERNS
 			loc 4
 			cfu
 
-	/* Convert signed int to single */
-	pat loc loc cif $1==4 && $2==4
+	pat loc loc cif $1==4 && $2==4     /* Signed int to single */
 		leaving
 			loc 4
 			loc 8
@@ -2227,8 +2463,7 @@ PATTERNS
 			loc 4
 			cff
 
-	/* Convert unsigned int to single */
-	pat loc loc cuf $1==4 && $2==4
+	pat loc loc cuf $1==4 && $2==4     /* Unsigned int to single */
 		leaving
 			loc 4
 			loc 8
@@ -2237,18 +2472,23 @@ PATTERNS
 			loc 4
 			cff
 
+	pat fef $1==4                      /* Split fraction, exponent */
+		leaving cal ".fef4"
+
+	/* Multiply two singles, then split fraction, integer */
+	pat fif $1==4
+		leaving cal ".fif4"
+
 
 /* Double-precision floating-point */
 
-	pat zrf $1==INT64                  /* Push zero */
-		leaving
-			lde ".fd_00000000"
+	pat zrf $1==8                      /* Push zero */
+		leaving lde ".fd_00000000"
 
 	pat adf $1==8                      /* Add double */
 		with FREG FREG
-			uses reusing %1, FREG
-			gen
-				fadd %a, %2, %1
+			uses reusing %1, reusing %2, FREG
+			gen fadd %a, %2, %1
 			yields %a
 	pat adf sdl $1==8 && inreg($2)==reg_float
 		with FREG FREG
@@ -2256,9 +2496,8 @@ PATTERNS
 
 	pat sbf $1==8                      /* Subtract double */
 		with FREG FREG
-			uses reusing %1, FREG
-			gen
-				fsub %a, %2, %1
+			uses reusing %1, reusing %2, FREG
+			gen fsub %a, %2, %1
 			yields %a
 	pat sbf sdl $1==8 && inreg($2)==reg_float
 		with FREG FREG
@@ -2266,9 +2505,8 @@ PATTERNS
 
 	pat mlf $1==8                      /* Multiply double */
 		with FREG FREG
-			uses reusing %1, FREG
-			gen
-				fmul %a, %2, %1
+			uses reusing %1, reusing %2, FREG
+			gen fmul %a, %2, %1
 			yields %a
 	pat mlf sdl $1==8 && inreg($2)==reg_float
 		with FREG FREG
@@ -2276,9 +2514,8 @@ PATTERNS
 
 	pat dvf $1==8                      /* Divide double */
 		with FREG FREG
-			uses reusing %1, FREG
-			gen
-				fdiv %a, %2, %1
+			uses reusing %1, reusing %2, FREG
+			gen fdiv %a, %2, %1
 			yields %a
 	pat dvf sdl $1==8 && inreg($2)==reg_float
 		with FREG FREG
@@ -2287,58 +2524,53 @@ PATTERNS
 	pat ngf $1==8                      /* Negate double */
 		with FREG
 			uses reusing %1, FREG
-			gen
-				fneg %a, %1
+			gen fneg %a, %1
 			yields %a
 	pat ngf sdl $1==8 && inreg($2)==reg_float
 		with FREG
 			gen fneg {DLOCAL, $2}, %1
 
-	pat cmf $1==INT64                  /* Compare double */
+	/* To compare NaN, see comment above pat cmf $1==4 */
+
+	pat cmf $1==8                      /* Compare double */
 		with FREG FREG
 			uses REG={COND_FD, %2, %1}
-			gen extlwi %a, %a, {CONST, 2}, {CONST, 0}
+			/* Extract lt, gt, un; put lt in sign bit. */
+			gen andisX %a, %a, {C, 0xd000}
 			yields %a
 
 	pat cmf teq $1==8                  /* Double second == top */
 		with FREG FREG
 			uses REG={COND_FD, %2, %1}
-			gen move {XEQ, %a}, %a
-			yields %a
+			yields {XEQ, %a}
 
-	pat cmf tne $1==8                  /* Single second == top */
+	pat cmf tne $1==8                  /* Double second == top */
 		with FREG FREG
 			uses REG={COND_FD, %2, %1}
-			gen move {XNE, %a}, %a
-			yields %a
+			yields {XNE, %a}
 
 	pat cmf tgt $1==8                  /* Double second > top */
 		with FREG FREG
 			uses REG={COND_FD, %2, %1}
-			gen move {XGT, %a}, %a
-			yields %a
+			yields {XGT, %a}
 
 	pat cmf tge $1==8                  /* Double second >= top */
 		with FREG FREG
 			uses REG={COND_FD, %2, %1}
-			gen move {XGE, %a}, %a
-			yields %a
+			yields {XGE, %a}
 
 	pat cmf tlt $1==8                  /* Double second < top */
 		with FREG FREG
 			uses REG={COND_FD, %2, %1}
-			gen move {XLT, %a}, %a
-			yields %a
+			yields {XLT, %a}
 
 	pat cmf tle $1==8                  /* Double second <= top */
 		with FREG FREG
 			uses REG={COND_FD, %2, %1}
-			gen move {XLE, %a}, %a
-			yields %a
+			yields {XLE, %a}
 
 	proc cmf8zxx example cmf zeq
 		with FREG FREG STACK
-			uses REG
 			gen
 				fcmpo cr0, %2, %1
 				bxx* {LABEL, $2}
@@ -2351,42 +2583,37 @@ PATTERNS
 	pat cmf zlt $1==8    call cmf8zxx("blt")
 	pat cmf zle $1==8    call cmf8zxx("ble")
 
-	pat loc loc cff $1==INT64 && $2==INT32 /* Convert double to single */
+	/* Convert double to single */
+	/*   reg_float pattern must be first, or it goes unused! */
+	pat loc loc cff stl $1==8 && $2==4 && inreg($4)==reg_float
+		with FREG
+			gen frsp {LOCAL, $4}, %1
+	pat loc loc cff $1==8 && $2==4
 		with FREG
 			uses reusing %1, FSREG
-			gen
-				frsp %a, %1
+			gen frsp %a, %1
 			yields %a
 
-	/* Convert double to signed int */
-	pat loc loc cfi $1==8 && $2==4
+	pat loc loc cfi $1==8 && $2==4     /* Double to signed int */
 		with FREG STACK
 			uses reusing %1, FREG
 			gen
 				fctiwz %a, %1
 				stfdu %a, {IND_RC_D, sp, 0-8}
-				addi sp, sp, {CONST, 4}
+				addi sp, sp, {C, 4}
 
-	/* Convert double to unsigned int */
-	pat loc loc cfu $1==8 && $2==4
-		leaving
-			cal ".cfu8"
+	pat loc loc cfu $1==8 && $2==4     /* Double to unsigned int */
+		leaving cal ".cfu8"
 
-	/* Convert signed int to double */
-	pat loc loc cif $1==4 && $2==8
-		leaving
-			cal ".cif8"
+	pat loc loc cif $1==4 && $2==8     /* Signed int to double */
+		leaving cal ".cif8"
 
-	/* Convert unsigned int to double */
-	pat loc loc cuf $1==4 && $2==8
-		leaving
-			cal ".cuf8"
+	pat loc loc cuf $1==4 && $2==8     /* Unsigned int to double */
+		leaving cal ".cuf8"
 
 	pat fef $1==8                      /* Split fraction, exponent */
-		leaving
-			cal ".fef8"
+		leaving cal ".fef8"
 
 	/* Multiply two doubles, then split fraction, integer */
 	pat fif $1==8
-		leaving
-			cal ".fif8"
+		leaving cal ".fif8"
diff --git a/mach/powerpc/top/table b/mach/powerpc/top/table
index fdec03b2e..196cae128 100644
--- a/mach/powerpc/top/table
+++ b/mach/powerpc/top/table
@@ -1,12 +1,14 @@
 
-/* PowerPC desciptor table for ACK target optimizer */
+/* PowerPC table for ACK target optimizer */
 
-MAXOP 3;
+MAXOP 5;
 LABEL_STARTER '.';
 
 %%;
 
+L1, L2, L3, L4, L5  { not_using_sp(VAL) };
 RNZ                 { strcmp(VAL, "r0") };  /* not r0 */
+UP                  { positive(VAL) };
 X, Y, Z             { TRUE };
 
 %%;
@@ -16,10 +18,74 @@ X, Y, Z             { TRUE };
 addi  RNZ, RNZ, 0            -> ;
 addis RNZ, RNZ, 0            -> ;
 
+addi RNZ, RNZ, X : addi RNZ, RNZ, Y { plus(X, Y, Z) }
+                             -> addi RNZ, RNZ, Z ;
+
+/* Lower "addi sp, sp, UP" by lifting other instructions, looking for
+ * chances to merge or delete _addi_ instructions, and assuming that
+ * the code generator uses "sp" not "r1".
+ */
+addi sp, sp, UP : ANY L1                 { lift(ANY) }
+                             -> ANY L1                 : addi sp, sp, UP ;
+addi sp, sp, UP : ANY L1, L2             { lift(ANY) }
+                             -> ANY L1, L2             : addi sp, sp, UP ;
+addi sp, sp, UP : ANY L1, L2, L3         { lift(ANY) }
+                             -> ANY L1, L2, L3         : addi sp, sp, UP ;
+addi sp, sp, UP : ANY L1, L2, L3, L4     { lift(ANY) }
+                             -> ANY L1, L2, L3, L4     : addi sp, sp, UP ;
+addi sp, sp, UP : ANY L1, L2, L3, L4, L5 { lift(ANY) }
+                             -> ANY L1, L2, L3, L4, L5 : addi sp, sp, UP ;
+addi sp, sp, UP : lmw Y, L1 { Y[0]=='r' && atoi(Y+1)>1 }
+                             -> lmw Y, L1 : addi sp, sp, UP ;
+
+/* Merge _addi_ when popping from the stack. */
+addi sp, sp, X : lwz L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' }
+                             -> lwz L1, Z(sp) : addi sp, sp, X ;
+addi sp, sp, X : lfs L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' }
+                             -> lfs L1, Z(sp) : addi sp, sp, X ;
+addi sp, sp, X : lfd L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' }
+                             -> lfd L1, Z(sp) : addi sp, sp, X ;
+
+/* Lower or delete _addi_ when pushing to the stack. */
+addi sp, sp, X : stwu  L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' }
+                             -> stw  L1, Z(sp) : addi sp, sp, Z ;
+addi sp, sp, X : stfsu L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' }
+                             -> stfs L1, Z(sp) : addi sp, sp, Z ;
+addi sp, sp, X : stfdu L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' }
+                             -> stfd L1, Z(sp) : addi sp, sp, Z ;
+addi sp, sp, 4 : stfdu L1, -8(sp) -> stfdu L1, -4(sp) ;
+
+/* Delete _addi_ when setting the stack pointer. */
+addi sp, sp, X : addi sp, L1, Y   -> addi sp, L1, Y ;
+addi sp, sp, X : lwz sp, L1       -> lwz sp, L1 ;
+
+or X, Y, Y                   -> mr X, Y ;
+or. X, Y, Y                  -> mr. X, Y ;
+
 mr X, X                      -> ;
 fmr X, X                     -> ;
 
-or X, Y, Z : or. X, X, X     -> or. X, Y, Z ;
+add X, Y, Z   : mr. X, X     -> add. X, Y, Z ;
+and X, Y, Z   : mr. X, X     -> and. X, Y, Z ;
+andc X, Y, Z  : mr. X, X     -> andc. X, Y, Z ;
+divw X, Y, Z  : mr. X, X     -> divw. X, Y, Z ;
+divwu X, Y, Z : mr. X, X     -> divwu. X, Y, Z ;
+extsb X, Y, Z : mr. X, X     -> extsb. X, Y, Z ;
+extsh X, Y, Z : mr. X, X     -> extsh. X, Y, Z ;
+eqv X, Y, Z   : mr. X, X     -> eqv. X, Y, Z ;
+mullw X, Y, Z : mr. X, X     -> mullw. X, Y, Z ;
+nand X, Y, Z  : mr. X, X     -> nand. X, Y, Z ;
+nor X, Y, Z   : mr. X, X     -> nor. X, Y, Z ;
+or X, Y, Z    : mr. X, X     -> or. X, Y, Z ;
+orc X, Y, Z   : mr. X, X     -> orc. X, Y, Z ;
+slw X, Y, Z   : mr. X, X     -> slw. X, Y, Z ;
+slwi X, Y, Z  : mr. X, X     -> slwi. X, Y, Z ;
+subf X, Y, Z  : mr. X, X     -> subf. X, Y, Z ;
+sraw X, Y, Z  : mr. X, X     -> sraw. X, Y, Z ;
+srawi X, Y, Z : mr. X, X     -> srawi. X, Y, Z ;
+srw X, Y, Z   : mr. X, X     -> srw. X, Y, Z ;
+srwi X, Y, Z  : mr. X, X     -> srwi. X, Y, Z ;
+xor X, Y, Z   : mr. X, X     -> xor. X, Y, Z ;
 
 b X : labdef X               -> labdef X ;
 
@@ -27,3 +93,98 @@ b X : labdef X               -> labdef X ;
 /* LT=0, GT=1, EQ=2, OV=3 */
 
 %%;
+
+/* Is it a word character? 0-9A-Za-z_ */
+static int isword(char c) {
+	return
+	    (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') ||
+	    (c >= 'a' && c <= 'z') || (c == '_');
+}
+
+/* Does operand _s_ not use the stack pointer? */
+int not_using_sp(const char *s) {
+	int boundary;
+
+	boundary = 1;
+	while (*s) {
+		if (boundary &&
+		    ((s[0]=='s' && s[1]=='p') || (s[0]=='r' && s[1]=='1')) &&
+		    !isword(s[2]))
+			return 0;
+		boundary = !isword(*s);
+		s++;
+	}
+	return 1;
+}
+
+
+int positive(const char *s) {
+	long n;
+	char *end;
+
+	n = strtol(s, &end, 10);
+	return *s != '\0' && *end == '\0' && n > 0;
+}
+
+
+/* Instructions to lift(), sorted in strcmp() order.  These are from
+ * ../ncg/table, minus branch instructions.
+ */
+const char *liftables[] = {
+	"add", "add.", "addi",
+	"and", "andc", "andi.", "andis.",
+	"cmp", "cmpi", "cmpl", "cmpli",
+	"cmplw", "cmplwi", "cmpw", "cmpwi",
+	"divw", "divwu", "eqv", "extlwi", "extrwi", "extsb", "extsh",
+	"fadd", "fadds", "fcmpo", "fctiwz", "fdiv", "fdivs",
+	"fmr", "fmul", "fmuls", "fneg", "frsp", "fsub", "fsubs",
+	"lbz", "lbzx",
+	"lfd", "lfdu", "lfdx", "lfs", "lfsu", "lfsx",
+	"lha", "lhax", "lhz", "lhzx",
+	"li", "lis", "lwz", "lwzu", "lwzx",
+	"mfcr", "mfspr", "mr", "mr.", "mtspr", "mullw",
+	"nand", "neg", "nor", "or", "or.", "ori", "oris",
+	"rlwinm", "rlwnm", "rotlwi", "rotrwi",
+	"slw", "slwi", "sraw", "srawi", "srw", "srwi",
+	"stb", "stbx",
+	"stfd", "stfdu", "stfdx", "stfs", "stfsu", "stfsx",
+	"sth", "sthx", "stw", "stwx", "stwu",
+	"subf", "xor", "xori", "xoris",
+};
+
+static int liftcmp(const void *a, const void *b) {
+	return strcmp(*(const char **)a, *(const char **)b);
+}
+
+/* May we lift instruction _s_ above "addi SP, SP, X"? */
+int lift(const char *s) {
+	return bsearch(&s, liftables,
+	    sizeof(liftables) / sizeof(liftables[0]),
+	    sizeof(liftables[0]), liftcmp);
+}
+
+
+/* Does it fit a signed 16-bit integer? */
+static int fits16(long l) {
+	return l >= -32768 && l <= 32767;
+}
+
+/* Tries sum = a + b with signed 16-bit integers. */
+int plus(const char *a, const char *b, const char *sum)
+{
+	long la, lb, lsum;
+	char *end;
+
+	la = strtol(a, &end, 10);
+	if (*a == '\0' || *end != '\0' || !fits16(la))
+		return 0;
+	lb = strtol(b, &end, 10);
+	if (*b == '\0' || *end != '\0' || !fits16(lb))
+		return 0;
+
+	lsum = la + lb;
+	if (!fits16(lsum))
+		return 0;
+	snprintf(sum, 7, "%ld", lsum);
+	return 1;
+}
diff --git a/mach/proto/mcg/main.c b/mach/proto/mcg/main.c
index cf8a4435f..aa0fa4816 100644
--- a/mach/proto/mcg/main.c
+++ b/mach/proto/mcg/main.c
@@ -42,13 +42,14 @@ int main(int argc, char* const argv[])
     const char* inputfilename = NULL;
     const char* outputfilename = NULL;
     FILE* output;
+    int i;
 
     program_name = argv[0];
 
     opterr = 1;
     for (;;)
     {
-        int c = getopt(argc, argv, "-d:D:C:o:");
+        int c = getopt(argc, argv, "d:D:C:o:");
         if (c == -1)
             break;
 
@@ -79,20 +80,22 @@ int main(int argc, char* const argv[])
                     fatal("already specified an output file");
                 outputfilename = optarg;
                 break;
-
-            case 1:
-                if (inputfilename)
-                    fatal("unexpected argument '%s'", optarg);
-                inputfilename = optarg;
         }
     }
 
+    for (i = optind; i < argc; i++)
+    {
+        if (inputfilename)
+            fatal("unexpected argument '%s'", argv[i]);
+        inputfilename = argv[i];
+    }
+
     symbol_init();
 
-	if (!EM_open((char*) inputfilename))
-		fatal("couldn't open input '%s': %s",
+    if (!EM_open((char*) inputfilename))
+        fatal("couldn't open input '%s': %s",
             inputfilename ? inputfilename : "<stdin>", EM_error);
-	
+
     if (outputfilename)
     {
         outputfile = fopen(outputfilename, "w");
diff --git a/mach/proto/mcg/treebuilder.c b/mach/proto/mcg/treebuilder.c
index eed770170..ac811fc14 100644
--- a/mach/proto/mcg/treebuilder.c
+++ b/mach/proto/mcg/treebuilder.c
@@ -274,7 +274,7 @@ static struct ir* store(int size, struct ir* address, int offset, struct ir* val
     else
         opcode = IR_STORE;
 
-    if (offset > 0)
+    if (offset != 0)
         address = new_ir2(
             IR_ADD, EM_pointersize,
             address, new_wordir(offset)
@@ -304,7 +304,7 @@ static struct ir* load(int size, struct ir* address, int offset)
     else
         opcode = IR_LOAD;
 
-    if (offset > 0)
+    if (offset != 0)
         address = new_ir2(
             IR_ADD, EM_pointersize,
             address, new_wordir(offset)
@@ -416,6 +416,31 @@ static void helper_function(const char* name)
     );
 }
 
+static void helper_function_with_arg(const char* name, struct ir* arg)
+{
+    /* Abuses IR_SETRET to set a register to pass one argument to a
+     * helper function.
+     *
+     * FIXME:  As of January 2018, mach/powerpc/libem takes an
+     * argument in register r3 only for ".los4", ".sts4", ".trp".
+     * This is an accident.  Should the argument be on the stack, or
+     * should other helpers use a register? */
+
+    materialise_stack();
+    appendir(
+        new_ir1(
+            IR_SETRET, arg->size,
+            arg
+        )
+    );
+    appendir(
+        new_ir1(
+            IR_CALL, 0,
+            new_labelir(name)
+        )
+    );
+}
+
 static void insn_simple(int opcode)
 {
     switch (opcode)
@@ -437,6 +462,7 @@ static void insn_simple(int opcode)
         case op_cii: simple_convert(IR_FROMSI); break;
         case op_ciu: simple_convert(IR_FROMSI); break;
         case op_cui: simple_convert(IR_FROMUI); break;
+        case op_cuu: simple_convert(IR_FROMUI); break;
         case op_cfu: simple_convert(IR_FROMUF); break;
         case op_cfi: simple_convert(IR_FROMSF); break;
         case op_cif: simple_convert(IR_FROMSI); break;
@@ -496,10 +522,12 @@ static void insn_simple(int opcode)
 
         case op_lim:
         {
+            /* Traps use only 16 bits of .ignmask, but we keep an
+             * entire word, even if a word has more than 2 bytes. */
             push(
-                new_ir1(
-                    (EM_wordsize == 2) ? IR_LOAD : IR_LOADH, EM_wordsize,
-                    new_labelir(".ignmask")
+                load(
+                    EM_wordsize,
+                    new_labelir(".ignmask"), 0
                 )
             );
             break;
@@ -507,26 +535,34 @@ static void insn_simple(int opcode)
 
         case op_sim:
         {
-            sequence_point();
             appendir(
-                new_ir2(
-                    (EM_wordsize == 2) ? IR_STORE : IR_STOREH, EM_wordsize,
-                    new_labelir(".ignmask"),
+                store(
+                    EM_wordsize,
+                    new_labelir(".ignmask"), 0,
                     pop(EM_wordsize)
                 )
             );
             break;
         }
 
-        case op_trp: helper_function(".trp"); break;
+        case op_trp:
+            helper_function_with_arg(".trp", pop(EM_wordsize));
+            break;
 
         case op_sig:
         {
+            struct ir* label = new_labelir(".trppc");
             struct ir* value = pop(EM_pointersize);
+            push(
+                load(
+                    EM_pointersize,
+                    label, 0
+                )
+            );
             appendir(
                 store(
                     EM_pointersize,
-                    new_labelir(".trppc"), 0,
+                    label, 0,
                     value
                 )
             );
@@ -539,12 +575,13 @@ static void insn_simple(int opcode)
             break;
         }
 
-        /* FIXME: These instructions are really complex and barely used
-         * (Modula-2 and Pascal set support, I believe). Leave them until
-         * later. */
-        case op_set: helper_function(".unimplemented_set"); break;
-        case op_ior: helper_function(".unimplemented_ior"); break;
-
+        case op_and: helper_function(".and"); break;
+        case op_ior: helper_function(".ior"); break;
+        case op_xor: helper_function(".xor"); break;
+        case op_com: helper_function(".com"); break;
+        case op_cms: helper_function(".cms"); break;
+        case op_set: helper_function(".set"); break;
+        case op_inn: helper_function(".inn"); break;
 
         case op_dch:
             push(
@@ -670,6 +707,31 @@ static void simple_alu2(int opcode, int size, int irop, const char* fallback)
     }
 }
 
+static void rotate(int opcode, int size, int irop, int irop_reverse)
+{
+    if (size > (2*EM_wordsize))
+        fatal("treebuilder: can't do opcode %s with size %d", em_mnem[opcode - sp_fmnem], size);
+    else
+    {
+        struct ir* right = pop(size);
+        struct ir* left = pop(size);
+        struct ir* bits = new_wordir(8 * size);
+
+        /* a rol b -> (a << b) | (a >> (32 - b)) */
+        push(
+            new_ir2(
+                IR_OR, size,
+                new_ir2(irop, size, left, right),
+                new_ir2(
+                    irop_reverse, size,
+                    left,
+                    new_ir2(IR_SUB, size, bits, right)
+                )
+            )
+        );
+    }
+}
+
 static struct ir* extract_block_refs(struct basicblock* bb)
 {
     struct ir* outir = NULL;
@@ -720,26 +782,28 @@ static struct ir* ptradd(struct ir* address, int offset)
         );
 }
 
-static void blockmove(struct ir* dest, struct ir* src, struct ir* size)
+static struct ir* walk_static_chain(int level)
 {
-    /* memmove stack: ( size src dest -- ) */
-    push(size);
-    push(src);
-    push(dest);
+    struct ir* ir;
 
-    materialise_stack();
-    appendir(
-        new_ir1(
-            IR_CALL, 0,
-            new_labelir("memmove")
-        )
-    );
-    appendir(
-        new_ir1(
-            IR_STACKADJUST, EM_pointersize,
-            new_wordir(EM_pointersize*2 + EM_wordsize)
-        )
+    /* The static chain, when it exists, is the first argument of each
+     * procedure.  The chain begins with the current frame at level 0,
+     * and continues until we reach the outermost procedure. */
+    ir = new_ir0(
+        IR_GETFP, EM_pointersize
     );
+    while (level--)
+    {
+        /* Walk to the next frame pointer. */
+        ir = load(
+            EM_pointersize,
+            new_ir1(
+                IR_FPTOAB, EM_pointersize,
+                ir
+            ), 0
+        );
+    }
+    return ir;
 }
 
 static void insn_ivalue(int opcode, arith value)
@@ -765,8 +829,10 @@ static void insn_ivalue(int opcode, arith value)
 
         case op_and: simple_alu2(opcode, value, IR_AND, ".and"); break;
         case op_ior: simple_alu2(opcode, value, IR_OR, ".ior"); break;
-        case op_xor: simple_alu2(opcode, value, IR_EOR, NULL); break;
+        case op_xor: simple_alu2(opcode, value, IR_EOR, ".xor"); break;
         case op_com: simple_alu1(opcode, value, IR_NOT, ".com"); break;
+        case op_rol: rotate(opcode, value, IR_LSL, IR_LSR); break;
+        case op_ror: rotate(opcode, value, IR_LSR, IR_LSL); break;
 
         case op_adf: simple_alu2(opcode, value, IR_ADDF, NULL); break;
         case op_sbf: simple_alu2(opcode, value, IR_SUBF, NULL); break;
@@ -774,12 +840,23 @@ static void insn_ivalue(int opcode, arith value)
         case op_dvf: simple_alu2(opcode, value, IR_DIVF, NULL); break;
         case op_ngf: simple_alu1(opcode, value, IR_NEGF, NULL); break;
 
-        case op_cmu: /* fall through */
-        case op_cms: push(tristate_compare(value, IR_COMPAREUI)); break;
+        case op_cms:
+            if (value > (2*EM_wordsize))
+            {
+                push(new_wordir(value));
+                helper_function(".cms");
+                break;
+            }
+            /* fall through */
+        case op_cmu: push(tristate_compare(value, IR_COMPAREUI)); break;
         case op_cmi: push(tristate_compare(value, IR_COMPARESI)); break;
         case op_cmf: push(tristate_compare(value, IR_COMPAREF)); break;
 
-        case op_rck: helper_function(".rck"); break;
+        case op_rck:
+            if (value != EM_wordsize)
+                fatal("'rck %d' not supported", value);
+            helper_function(".rck");
+            break;
         case op_set: push(new_wordir(value)); helper_function(".set"); break;
         case op_inn: push(new_wordir(value)); helper_function(".inn"); break;
 
@@ -930,26 +1007,24 @@ static void insn_ivalue(int opcode, arith value)
 
             if (value > (EM_wordsize*2))
             {
-                /* We're going to need to do multiple stores; fix the address
+                /* We're going to need to do multiple loads; fix the address
                  * so it'll go into a register and we can do maths on it. */
                 appendir(ptr);
             }
 
+            /* Stack grows down.  Load backwards. */
             while (value > 0)
             {
                 int s = EM_wordsize*2;
                 if (value < s)
                     s = value;
-
+                value -= s;
                 push(
                     load(
                         s,
-                        ptr, offset
+                        ptr, value
                     )
                 );
-
-                value -= s;
-                offset += s;
             }
 
             assert(value == 0);
@@ -1099,7 +1174,12 @@ static void insn_ivalue(int opcode, arith value)
         case op_dup:
         {
             sequence_point();
-            if ((value == (EM_wordsize*2)) && (peek(0) == EM_wordsize) && (peek(1) == EM_wordsize))
+            if (value > (2*EM_wordsize))
+            {
+                push(new_wordir(value));
+                helper_function(".dus4");
+            }
+            else if ((value == (EM_wordsize*2)) && (peek(0) == EM_wordsize) && (peek(1) == EM_wordsize))
             {
                 struct ir* v1 = pop(EM_wordsize);
                 struct ir* v2 = pop(EM_wordsize);
@@ -1117,12 +1197,30 @@ static void insn_ivalue(int opcode, arith value)
             break;
         }
 
+        case op_dus:
+        {
+            if (value != EM_wordsize)
+                fatal("'dus %d' not supported", value);
+            helper_function(".dus4");
+            break;
+        }
+
         case op_exg:
         {
-            struct ir* v1 = pop(value);
-            struct ir* v2 = pop(value);
-            push(v1);
-            push(v2);
+            if (value > (2*EM_wordsize))
+            {
+                push(
+                    new_wordir(value)
+                );
+                helper_function(".exg");
+            }
+            else
+            {
+                struct ir* v1 = pop(value);
+                struct ir* v2 = pop(value);
+                push(v1);
+                push(v2);
+            }
             break;
         }
 
@@ -1285,53 +1383,19 @@ static void insn_ivalue(int opcode, arith value)
         }
 
         case op_lxl:
-        {
-            struct ir* ir;
-
-            /* Walk the static chain. */
-
-            ir = new_ir0(
-                IR_GETFP, EM_pointersize
+            push(
+                walk_static_chain(value)
             );
-
-            while (value--)
-            {
-                ir = new_ir1(
-                    IR_CHAINFP, EM_pointersize,
-                    ir
-                );
-            }
-
-            push(ir);
             break;
-        }
 
         case op_lxa:
-        {
-            struct ir* ir;
-
-            /* Walk the static chain. */
-
-            ir = new_ir0(
-                IR_GETFP, EM_pointersize
-            );
-
-            while (value--)
-            {
-                ir = new_ir1(
-                    IR_CHAINFP, EM_pointersize,
-                    ir
-                );
-            }
-
             push(
                 new_ir1(
                     IR_FPTOAB, EM_pointersize,
-                    ir
+                    walk_static_chain(value)
                 )
             );
             break;
-        }
 
         case op_fef:
         {
@@ -1394,6 +1458,7 @@ static void insn_ivalue(int opcode, arith value)
                     break;
 
                 case 1:
+                    materialise_stack();
                     push(
                         appendir(
                             new_ir0(
@@ -1403,10 +1468,6 @@ static void insn_ivalue(int opcode, arith value)
                     );
                     break;
 
-                case 2:
-                    helper_function(".unimplemented_lor_2");
-                    break;
-
                 default:
                     fatal("'lor %d' not supported", value);
             }
@@ -1436,10 +1497,6 @@ static void insn_ivalue(int opcode, arith value)
                     );
                     break;
 
-                case 2:
-                    helper_function(".unimplemented_str_2");
-                    break;
-
                 default:
                     fatal("'str %d' not supported", value);
             }
@@ -1448,100 +1505,27 @@ static void insn_ivalue(int opcode, arith value)
         }
 
         case op_blm:
-        {
-            /* Input stack: ( src dest -- ) */
-            struct ir* dest = pop(EM_pointersize);
-            struct ir* src = pop(EM_pointersize);
-            blockmove(dest, src, new_wordir(value));
+            push(new_wordir(value));
+            helper_function(".bls4");
             break;
-        }
 
         case op_bls:
-        {
-            /* Input stack: ( src dest size -- ) */
-            struct ir* dest = pop(EM_pointersize);
-            struct ir* src = pop(EM_pointersize);
-            struct ir* size = pop(EM_wordsize);
-            blockmove(dest, src, size);
+            if (value != EM_wordsize)
+                fatal("'bls %d' not supported", value);
+            helper_function(".bls4");
             break;
-        }
 
         case op_los:
-        {
-            /* Copy an arbitrary amount to the stack. */
-            struct ir* bytes = pop(EM_wordsize);
-            struct ir* address = pop(EM_pointersize);
-
-            materialise_stack();
-            appendir(
-                new_ir1(
-                    IR_STACKADJUST, EM_pointersize,
-                    new_ir1(
-                        IR_NEG, EM_wordsize,
-                        bytes
-                    )
-                )
-            );
-
-            push(
-                new_ir0(
-                    IR_GETSP, EM_pointersize
-                )
-            );
-            push(address);
-            push(bytes);
-            materialise_stack();
-            appendir(
-                new_ir1(
-                    IR_CALL, 0,
-                    new_labelir("memcpy")
-                )
-            );
-            appendir(
-                new_ir1(
-                    IR_STACKADJUST, EM_pointersize,
-                    new_wordir(EM_pointersize*2 + EM_wordsize)
-                )
-            );
+            if (value != EM_wordsize)
+                fatal("'los %d' not supported", value);
+            helper_function_with_arg(".los4", pop(EM_wordsize));
             break;
-        }
 
         case op_sts:
-        {
-            /* Copy an arbitrary amount from the stack. */
-            struct ir* bytes = pop(EM_wordsize);
-            struct ir* dest = pop(EM_pointersize);
-            struct ir* src;
-
-            materialise_stack();
-            src = appendir(
-                    new_ir0(
-                        IR_GETSP, EM_pointersize
-                    )
-                );
-
-            push(dest);
-            push(src);
-            push(bytes);
-            materialise_stack();
-            appendir(
-                new_ir1(
-                    IR_CALL, 0,
-                    new_labelir("memcpy")
-                )
-            );
-            appendir(
-                new_ir1(
-                    IR_STACKADJUST, EM_pointersize,
-                    new_ir2(
-                        IR_ADD, EM_wordsize,
-                        new_wordir(EM_pointersize*2 + EM_wordsize),
-                        bytes
-                    )
-                )
-            );
+            if (value != EM_wordsize)
+                fatal("'sts %d' not supported", value);
+            helper_function_with_arg(".sts4", pop(EM_wordsize));
             break;
-        }
 
         case op_lin:
         {
@@ -1677,17 +1661,17 @@ static void insn_lvalue(int opcode, const char* label, arith offset)
 
         case op_gto:
         {
-            struct ir* descriptor = pop(EM_pointersize);
+            struct ir* descriptor = address_of_external(label, offset);
 
             appendir(
                 new_ir1(
-                    IR_SETSP, EM_pointersize,
+                    IR_SETFP, EM_pointersize,
                     load(EM_pointersize, descriptor, EM_pointersize*2)
                 )
             );
             appendir(
                 new_ir1(
-                    IR_SETFP, EM_pointersize,
+                    IR_SETSP, EM_pointersize,
                     load(EM_pointersize, descriptor, EM_pointersize*1)
                 )
             );
diff --git a/mach/proto/ncg/subr.c b/mach/proto/ncg/subr.c
index 0feb54f30..0dc045973 100644
--- a/mach/proto/ncg/subr.c
+++ b/mach/proto/ncg/subr.c
@@ -518,7 +518,7 @@ int split(token_p tp, int *ip, int ply, int toplevel) {
 	int tpl;
 
 	for (cp=c2coercs;cp->c2_texpno>=0; cp++) {
-		if (!match(tp,&machsets[cp->c2_texpno],0))
+		if (!match(tp,&machsets[cp->c2_texpno],cp->c2_expr))
 			continue;
 		ok=1;
 		for (i=0; ok && i<cp->c2_nsplit;i++) {
diff --git a/man/powerpc_as.6 b/man/powerpc_as.6
index 8198d6bce..f6bb90818 100644
--- a/man/powerpc_as.6
+++ b/man/powerpc_as.6
@@ -1,33 +1,136 @@
-.TH POWERPC_AS 1
+.TH POWERPC_AS 1 2018-03-07
 .ad
 .SH NAME
 powerpc_as \- assembler for PowerPC
-
 .SH SYNOPSIS
 as [options] argument ...
-
 .SH DESCRIPTION
 This assembler is made with the general framework
 described in \fIuni_ass\fP(6).
-
+.PP
+It can assemble the instructions from Book I and Book II of PowerPC
+version 2.01.
+This includes the branch, integer, and floating point instructions
+from Book I; and the cache, synchronization, and time base
+instructions from Book II.
+.PP
+There is no support for other instructions, such as supervisor-mode
+instructions or vector instructions.
+There is some support for 64-bit integer instructions, but the
+assembler only has 32-bit symbols.
 .SH SYNTAX
-Most 32-bit integer and floating point instructions are supported, but not many
-short form instructions. Instructions which take 16-bit operands can additionally
-use the following special functions:
-
-.IP hi16[value], ha16[value]
-Returns the high half of the value of the expression; if the value is not absolute,
-also generates the appropriate fixup. Use of either of these \fImust\fR be followed,
-in the next instruction, by the corresponding use of \fBlo16[]\fR. Use \fBhi16[]\fR
-if the low half is going to interpret its payload as an unsigned value, and
-\fBha16[]\fR if it will be interpreted as a signed value (so that the high half can
-be adjusted to match).
-
-.IP lo16[]
-Returns the low half of the value of the expression. No fixup is generated. Use of
-\fBlo16[]\fR must come in the instruction immediately after a use of \fBhi16[]\fR or
-\fBha16[]\fR.
-
+.SS general purpose registers
+There are 32 GPRs from \fBr0\fP to \fBr31\fP.
+In this assembler, \fBsp\fP is an alias for \fBr1\fP, and \fBfp\fP is
+an alias for \fBr2\fP, because \fIack\fP uses r1 as the stack pointer
+and r2 as the frame pointer.
+Other compilers don't use r2 as the frame pointer.
+.PP
+GPR syntax requires a register name, not a number.
+For example, \(oqaddi\ r5,\ r4,\ 1\(cq works, but
+\(oqaddi\ 5,\ 4,\ 1\(cq is a syntax error.
+.PP
+Certain instructions ignore the contents of \fBr0\fP and use zero.
+This happens when using r0 as the second operand of \fIaddi\fP or
+\fIaddis\fP, or when addressing \(oqexpr(r0)\(cq or
+\(oqr0,\ gpr\(cq.
+The syntax is still the name r0, not the number 0.
+.SS floating point registers
+There are 32 FPRs from \fBf0\fP to \fBf31\fP.
+Each FPR has 64 bits and can hold a single-precision or
+double-precision number.
+FPR syntax requires a register name, not a number.
+.SS special purpose registers
+The three named SPRs are \fBctr\fP (count register), \fBlr\fP (link
+register), and \fBxer\fP (exception register).
+\(oqmfspr\(cq and \(oqmtspr\(cq allow these names or a number.
+.SS condition register
+There is a 32-bit condition register, where bit 0 is most significant,
+and bit 31 is least significant.
+This gets split into 8 registers of 4 bits each, from \fBcr0\fP (with
+bits 0 to 3) to \fBcr7\fP (with bits 28 to 31).
+Some instructions use the names cr0 to cr7, others use a bit numbered
+0 to 31, and others use all 32 bits.
+.SS addressing modes
+\(oqexpr(gpr)\(cq addresses \fIexpr\fP + the contents of \fIgpr\fP,
+except that \(oqexpr(r0)\(cq addresses \fIexpr\fP\ +\ 0.
+A few instructions, like \(oqstwu\(cq, also update \fIgpr\fP by
+setting it to the address.
+.PP
+\(oqgprA,\ gprB\(cq in certain instructions addresses the contents of
+\fIgprA\fP + the contents of \fIgprB\fP, except that \(oqr0,\ gprB\(cq
+addresses 0\ +\ the contents of \fIgprB\fP.
+.SS 16-bit operands
+Some instructions have a 16-bit operand.
+This can be a bare \fIexpr\fP (which must fit signed or unsigned
+16 bits), or it can be one of these special functions:
+.IP "hi16[expr], ha16[expr]"
+Returns the high half of the 32-bit value of the expression.
+If the low half is negative (from 0x8000 to 0xffff),
+then \fBha16[]\fP adjusts the high half by adding 1.
+Use \fBhi16[]\fP if the instruction with \fBlo16[]\fP is going to
+interpret its operand as an unsigned value, or \fBha16[]\fP if it will
+interpret it as signed.
+.IP
+If \fIexpr\fP is not absolute, then the assembler must generate a
+fixup for the linker.
+The fixup only works if the instruction is
+\(oqaddis gpr, r0, hx16[expr]\(cq or \(oqlis gpr, hx16[expr]\(cq.
+.IP lo16[expr]
+Returns the low half of the 32-bit value of the expression.
+.SS short forms
+Some instructions have short forms using extended mnemonics (or
+simplified mnemonics) like \fIli\fP, \fIsrwi\fP, and many others.
+.IP "li r6, 789"
+is short for: addi r6, r0, 789
+.IP "srwi r3, r4, 2"
+is short for: rlwinm r3, r4, 30, 2, 31
+.PP
+This assembler doesn't support extended mnemonics with branch
+prediction, such as \fIblt+\fP or \fIbne-\fP.
+It always parses \(oq+\(cq and \(oq-\(cq as operators,
+never as part of a mnemonic.
+.SH EXAMPLES
+There are two ways to load r3 with _symbol\ =\ 0x1234abcd.
+One way is
+.PP
+.nf
+   lis  r3, hi16[_symbol]
+   ori  r3, r3, lo16[_symbol]  ! r3 = 0x12340000 | 0x0000abcd
+.fi
+.PP
+The other way is
+.PP
+.nf
+   lis  r3, ha16[_symbol]
+   addi r3, r3, lo16[_symbol]  ! r3 = 0x12350000 + 0xffffabcd
+.fi
+.PP
+The next code adds 1 to a global variable.
+.PP
+.nf
+   lis  r3, ha16[_var]
+   lwz  r4, lo16[_var](r3)
+   addi r4, r4, 1
+   stw  r4, lo16[_var](r3)
+.fi
 .SH "SEE ALSO"
 uni_ass(6),
 ack(1)
+.PP
+Freescale Semiconductor, \fIProgramming Environments Manual for 32-Bit
+Implementations of the PowerPC Architecture\fP, Rev. 3, September 2005.
+.PP
+IBM, \fIPowerPC User Instruction Set Architecture, Book I\fP, Version
+2.01, September 2003.
+.PP
+IBM, \fIPowerPC Virtual Environment Architecture, Book II\fP, Version
+2.01, December 2003.
+.SH CAVEATS
+Beware that not every processor can run every instruction.
+The 32-bit processors can't run 64-bit instructions like \fIlwa\fP,
+\fIstd\fP, and \fIfctid\fP.
+The PowerPC 601 can't run \fIstfiwx\fP, nor \fIfres\fP, \fIfrsqrte\fP,
+\fIfsel\fP.
+Many models, like the PowerPC G4, can't run \fIfsqrt\fP nor
+\fIfsqrts\fP.
diff --git a/modules/src/em_code/insert.c b/modules/src/em_code/insert.c
index 36950c3ea..00c628dcb 100644
--- a/modules/src/em_code/insert.c
+++ b/modules/src/em_code/insert.c
@@ -99,20 +99,19 @@ C_out_parts(pp)
 		}
 		else {
 			/* copy the chunk to output */
-#ifdef INCORE
-			register char *s = C_BASE + pp->pp_begin;
-			char *se = C_BASE + pp->pp_end;
-
-			while (s < se) {
-				put(*s++);
-			}
-#else
 			register long b = pp->pp_begin;
 
 			while (b < pp->pp_end) {
+#ifdef INCORE
+				/* C_BASE is not constant, put() may
+				   move C_BASE, so each iteration of
+				   this loop must read C_BASE again.
+				*/
+				put(C_BASE[b++]);
+#else
 				put(getbyte(b++));
-			}
 #endif
+			}
 		}
 		prev = pp;
 		pp = pp->pp_next;
diff --git a/modules/src/object/wr_ranlib.c b/modules/src/object/wr_ranlib.c
index 91274d71c..b515ffb3b 100644
--- a/modules/src/object/wr_ranlib.c
+++ b/modules/src/object/wr_ranlib.c
@@ -10,16 +10,27 @@ wr_ranlib(fd, ran, cnt1)
 	struct ranlib	*ran;
 	long	cnt1;
 {
-	{
-		register long cnt = cnt1;
-		register struct ranlib *r = ran;
-		register char *c = (char *) r;
+	struct ranlib *r;
+	long cnt, val;
+	char *c;
 
-		while (cnt--) {
-			put4(r->ran_off,c); c += 4;
-			put4(r->ran_pos,c); c += 4;
-			r++;
-		}
+	/*
+	 * We overwrite the structs in r with the bytes in c, so we
+	 * don't need to allocate another buffer.
+	 *
+	 * put4(r->ran_off, c) can fail if r->ran_off and c overlap in
+	 * memory, if this is a big-endian machine.  It tries to swap
+	 * the bytes from big to little endian, but overwrites some
+	 * bytes before reading them.  To prevent this, we must copy
+	 * each value before we overwrite it.
+	 */
+	r = ran;
+	c = (char *)r;
+	cnt = cnt1;
+	while (cnt--) {
+		val = r->ran_off; put4(val, c); c += 4;
+		val = r->ran_pos; put4(val, c); c += 4;
+		r++;
 	}
 	wr_bytes(fd, (char *) ran, cnt1 * SZ_RAN);
 }
diff --git a/modules/src/print/doprnt.c b/modules/src/print/doprnt.c
index a77b7d2c1..1d888e570 100644
--- a/modules/src/print/doprnt.c
+++ b/modules/src/print/doprnt.c
@@ -16,7 +16,7 @@
 	%d = int
 $ */
 void
-doprnt(File *fp, char *fmt, va_list argp)
+doprnt(File *fp, const char *fmt, va_list argp)
 {
 	char buf[SSIZE];
 
diff --git a/modules/src/print/format.c b/modules/src/print/format.c
index 2ad920bc8..e03717918 100644
--- a/modules/src/print/format.c
+++ b/modules/src/print/format.c
@@ -35,7 +35,7 @@ integral(int c)
 	%d = int
 $ */
 int
-_format(char *buf, char *fmt, va_list argp)
+_format(char *buf, const char *fmt, va_list argp)
 {
 	register char *pf = fmt;
 	register char *pb = buf;
diff --git a/modules/src/print/fprint.c b/modules/src/print/fprint.c
index c401858a9..6b5b8a389 100644
--- a/modules/src/print/fprint.c
+++ b/modules/src/print/fprint.c
@@ -17,7 +17,7 @@
 $ */
 /*VARARGS*/
 void
-fprint(File *fp, char *fmt, ...)
+fprint(File *fp, const char *fmt, ...)
 {
 	va_list args;
 	char buf[SSIZE];
diff --git a/modules/src/print/print.c b/modules/src/print/print.c
index cd9346e98..2e1256a54 100644
--- a/modules/src/print/print.c
+++ b/modules/src/print/print.c
@@ -17,7 +17,7 @@
 $ */
 /*VARARGS*/
 void
-print(char *fmt, ...)
+print(const char *fmt, ...)
 {
 	va_list args;
 	char buf[SSIZE];
diff --git a/modules/src/print/print.h b/modules/src/print/print.h
index 56372376a..974e4bf1b 100644
--- a/modules/src/print/print.h
+++ b/modules/src/print/print.h
@@ -9,10 +9,10 @@
 
 #include <stdarg.h>
 
-void print(char *fmt, ...);
-void fprint(File *f, char *fmt, ...);
-void doprnt(File *f, char *fmt, va_list ap);
-int _format(char *buf, char *fmt, va_list ap);
-char *sprint(char *buf, char *fmt, ...);
+void print(const char *fmt, ...);
+void fprint(File *f, const char *fmt, ...);
+void doprnt(File *f, const char *fmt, va_list ap);
+int _format(char *buf, const char *fmt, va_list ap);
+char *sprint(char *buf, const char *fmt, ...);
 
 #endif /* __PRINT_INCLUDED__ */
diff --git a/modules/src/print/sprint.c b/modules/src/print/sprint.c
index d88b47e69..7c9dbf9b0 100644
--- a/modules/src/print/sprint.c
+++ b/modules/src/print/sprint.c
@@ -17,7 +17,7 @@
 $ */
 /*VARARGS*/
 char *
-sprint(char *buf, char *fmt, ...)
+sprint(char *buf, const char *fmt, ...)
 {
 	va_list args;
 
diff --git a/plat/linux/libsys/errno.s b/plat/linux/libsys/errno.s
deleted file mode 100644
index 550fd6d7c..000000000
--- a/plat/linux/libsys/errno.s
+++ /dev/null
@@ -1,28 +0,0 @@
-#
-! $Source: /cvsroot/tack/Ack/plat/linux386/libsys/errno.s,v $
-! $State: Exp $
-! $Revision: 1.1 $
-
-! Declare segments (the order is important).
-
-.sect .text
-.sect .rom
-.sect .data
-.sect .bss
-
-#define D(e) .define e; e
-
-.sect .data
-
-! Define various ACK error numbers. Note that these are *not* ANSI C
-! errnos, and are used for different purposes.
-
-D(ERANGE)         = 1
-D(ESET)           = 2
-D(EIDIVZ)         = 6
-D(EHEAP)          = 17
-D(EILLINS)        = 18
-D(EODDZ)          = 19
-D(ECASE)          = 20
-D(EBADMON)        = 25
-
diff --git a/plat/linux/libsys/syscalls.h b/plat/linux/libsys/syscalls.h
index 19d5543c6..8bddcc0ee 100644
--- a/plat/linux/libsys/syscalls.h
+++ b/plat/linux/libsys/syscalls.h
@@ -174,6 +174,12 @@
 #define __NR_mremap 163
 #define __NR_setresuid 164
 #define __NR_getresuid 165
+
+/*
+ * i386, m68020, powerpc use different numbers after 165.
+ * This file only has the numbers for i386.
+ */
+#if defined(__i386)
 #define __NR_vm86 166
 #define __NR_query_module 167
 #define __NR_poll 168
@@ -324,5 +330,6 @@
 
 #define concat(x, y) x##y
 #define MAPPED_SYSCALL(p, n) .define concat(p,n); concat(p,n): xor eax, eax; movb al, concat(__NR_,n); jmp __mapped_syscall
+#endif /* __i386 */
 
 #endif
diff --git a/plat/linux386/libsys/build.lua b/plat/linux386/libsys/build.lua
index a4d2d7447..7de7b4061 100644
--- a/plat/linux386/libsys/build.lua
+++ b/plat/linux386/libsys/build.lua
@@ -6,6 +6,7 @@ acklibrary {
         "plat/linux/libsys/*.s",
     },
 	deps = {
+		"plat/linux/libsys/*.h",
 		"lang/cem/libcc.ansi/headers+headers",
 		"plat/linux386/include+headers",
 	},
diff --git a/plat/linux386/libsys/trapno.s b/plat/linux386/libsys/trapno.s
new file mode 100644
index 000000000..4996de338
--- /dev/null
+++ b/plat/linux386/libsys/trapno.s
@@ -0,0 +1,13 @@
+#define D(e) .define e; e
+
+! Define various EM trap numbers needed by mach/i386/libem.
+! Note that these are *not* ANSI C errnos.
+
+D(ERANGE)         = 1
+D(ESET)           = 2
+D(EIDIVZ)         = 6
+D(EHEAP)          = 17
+D(EILLINS)        = 18
+D(EODDZ)          = 19
+D(ECASE)          = 20
+D(EBADMON)        = 25
diff --git a/plat/linux68k/libsys/build.lua b/plat/linux68k/libsys/build.lua
index ded71cdd1..c17436517 100644
--- a/plat/linux68k/libsys/build.lua
+++ b/plat/linux68k/libsys/build.lua
@@ -6,6 +6,7 @@ acklibrary {
         "plat/linux/libsys/*.s",
     },
 	deps = {
+		"plat/linux/libsys/*.h",
 		"lang/cem/libcc.ansi/headers+headers",
 		"plat/linux68k/include+headers",
 	},
diff --git a/plat/linuxppc/boot.s b/plat/linuxppc/boot.s
index 2da5dd556..33b2abd61 100644
--- a/plat/linuxppc/boot.s
+++ b/plat/linuxppc/boot.s
@@ -32,7 +32,7 @@ begtext:
 
 	lwz r3, 0(sp)            ! r3 = argc
 	addi r4, sp, 4           ! r4 = argv
-	rlwinm r5, r3, 32-2, 2, 31 ! shift left 2 bits
+	srwi r5, r3, 2
 	add r5, r5, r4 
 	addi r5, r5, 8           ! r5 = env
 	
diff --git a/plat/linuxppc/descr b/plat/linuxppc/descr
index 1bbb9fbd9..7f6f8fc02 100644
--- a/plat/linuxppc/descr
+++ b/plat/linuxppc/descr
@@ -19,7 +19,7 @@ var PLATFORM=linuxppc
 var PLATFORMDIR={EM}/share/ack/{PLATFORM}
 var CPP_F=-D__unix
 var ALIGN=-a0:4 -a1:4 -a2:4 -a3:4 -b0:0x10000054
-var MACHOPT_F=-m3
+var MACHOPT_F=-m2
 var EGO_PLAT_FLAGS=-M{EM}/share/ack/ego/{ARCH}.descr
 
 # Override the setting in fe so that files compiled for linuxppc can see
diff --git a/plat/linuxppc/libsys/_syscall.s b/plat/linuxppc/libsys/_syscall.s
index c7e818830..f60423bea 100644
--- a/plat/linuxppc/libsys/_syscall.s
+++ b/plat/linuxppc/libsys/_syscall.s
@@ -12,17 +12,8 @@
 
 .sect .text
 
-EINVAL = 22
+#define EINVAL 22
 
-#define IFFALSE 4
-#define IFTRUE 12
-#define ALWAYS 20
-
-#define LT 0
-#define GT 1
-#define EQ 2
-#define OV 3
-	
 ! Perform a Linux system call.
 
 .define __syscall
@@ -32,21 +23,21 @@ __syscall:
 	lwz r4, 8(sp)
 	lwz r5, 12(sp)
 	sc 0
-	bclr IFFALSE, OV, 0
-	
+	bnslr
+
 	! On error, r3 contains the errno.	
 	! It just so happens that errnos 1-34 are the same in Linux as in ACK.
-	cmpi cr0, 0, r3, 1
-	bc IFTRUE, LT, 2f
-	cmpi cr0, 0, r3, 34
-	bc IFTRUE, GT, 2f
-	
+	cmpwi r3, 1
+	blt 2f
+	cmpwi r3, 34
+	bgt 2f
+
 3:
-	li32 r4, _errno
-	stw r3, 0(r4)
-	addi r3, r0, -1
-	bclr ALWAYS, 0, 0
-	
+	lis r4, ha16[_errno]
+	stw r3, lo16[_errno](r4)
+	li r3, -1
+	blr
+
 2:
-	addi r3, r0, EINVAL
+	li r3, EINVAL
 	b 3b
diff --git a/plat/linuxppc/libsys/build.lua b/plat/linuxppc/libsys/build.lua
index f7b16b378..f58df16ea 100644
--- a/plat/linuxppc/libsys/build.lua
+++ b/plat/linuxppc/libsys/build.lua
@@ -4,12 +4,10 @@ acklibrary {
 		"./_syscall.s",
 		"./sigaction.s",
 		"./signal.c",
-		"./trap.s",
 		"plat/linux/libsys/_exit.c",
 		"plat/linux/libsys/_hol0.s",
 		"plat/linux/libsys/close.c",
 		"plat/linux/libsys/creat.c",
-		"plat/linux/libsys/errno.s",
 		"plat/linux/libsys/execve.c",
 		"plat/linux/libsys/getpid.c",
 		"plat/linux/libsys/gettimeofday.c",
@@ -26,6 +24,7 @@ acklibrary {
 		"plat/linux/libsys/write.c",
 	},
 	deps = {
+		"plat/linux/libsys/*.h",
 		"lang/cem/libcc.ansi/headers+headers",
 		"plat/linuxppc/include+headers",
 	},
diff --git a/plat/linuxppc/libsys/sigaction.s b/plat/linuxppc/libsys/sigaction.s
index 0509c8e72..1b1cea24a 100644
--- a/plat/linuxppc/libsys/sigaction.s
+++ b/plat/linuxppc/libsys/sigaction.s
@@ -1,156 +1,194 @@
 #define __NR_sigaction		67
-#define SIG_BLOCK		0
+#define __NR_sigprocmask	126
 #define SIG_SETMASK		2
-#define MAXSIG			32
 
-/* offsets into our stack frame */
-#define mynew	16	/* new sigaction */
-#define mynset	32	/* new signal set */
-#define myoset	36	/* old signal set */
-#define mysave	40
-#define mysize	56
+/* offsets into struct sigaction */
+#define sa_handler	0	/* in union with sa_sigaction */
+#define sa_mask		4
+#define sa_flags	8
+#define sa_restorer	12
+
+/* offsets from stack pointer */
+#define mynewact	16	/* struct sigaction */
+#define myoldact	32
+#define newmask		64	/* signal set */
+#define oldmask		68
+#define oldhandler	72
+#define myret		76
+#define savelr		80
+#define signum		84	/* first argument */
+#define newact		88
+#define oldact		92
 
 .sect .text; .sect .rodata; .sect .data; .sect .bss
 
 /*
  * Linux calls signal handlers with arguments in registers, but the
  * ACK expects arguments on the stack.  This sigaction() uses a
- * "bridge" to move the arguments.
+ * "bridge" to move the arguments, but
+ *
+ *  - If the caller passes a bad pointer, this sigaction() causes
+ *    SIGBUS or SIGSEGV instead of setting errno = EFAULT.
+ *
+ *  - This sigaction() only works with signals 1 to 31, not with
+ *    real-time signals 32 to 64.
+ *
+ *  - This sigaction() is not safe for multiple threads.
+ *
+ * int sigaction(int signum, const struct sigaction *newact,
+ *		 struct sigaction *oldact);
  */
 .sect .text
 .define _sigaction
 _sigaction:
 	mflr	r0
-	subi	r1, r1, mysize
-	stw	r31, mysave+8(r1)
-	stw	r30, mysave+4(r1)
-	stw	r29, mysave(r1)
-	stw	r0, mysave+12(r1)
-	li	r3, 0
-	stw	r3, mynset(r1)	   	! mynset = 0
-	lwz	r29, mysize(r1)		! r29 = signal number
-	lwz	r30, mysize+4(r1)	! r30 = new action
-	lwz	r31, mysize+8(r1)	! r31 = old action
+	li	r3, __NR_sigprocmask
+	stwu	r3, -signum(sp)		/* keep 0(sp) = __NR_sigprocmask */
+	stw	r0, savelr(sp)
+
+	/* Copy newact to stack (before blocking SIGBUS, SIGSEGV). */
+	lwz	r3, newact(sp)
+	mr.	r3, r3
+	beq	1f			/* skip if newact == NULL */
+	lwz	r4, sa_handler(r3)
+	lwz	r5, sa_mask(r3)
+	lwz	r6, sa_flags(r3)
+	lwz	r7, sa_restorer(r3)
+	stw	r4, mynewact+sa_handler(sp)
+	stw	r5, mynewact+sa_mask(sp)
+	stw	r6, mynewact+sa_flags(sp)
+	stw	r7, mynewact+sa_restorer(sp)
+
 	/*
-	 * If the new action is non-NULL, the signal number is in
-	 * range 1 to MAXSIG, and the new handler is not SIG_DFL 0
-	 * or SIG_IGN 1, then we interpose our bridge.
+	 * Block all signals to prevent a race.  After we set sharray,
+	 * we must call the kernel's sigaction before the next signal
+	 * handler runs.  This prevents two problems:
+	 *
+	 *  - The bridge might call the new handler while the kernel
+	 *    uses the mask and flags of the old handler.
+	 *
+	 *  - The signal handler might call sigaction() and destroy
+	 *    sharray.  We must block all signals because any signal
+	 *    handler might call sigaction() for our signal.
 	 */
-	cmpwi	cr0, r30, 0
-	subi	r7, r29, 1		! r7 = index in handlers
-	cmplwi	cr7, r7, MAXSIG		! unsigned comparison
-	beq	cr0, kernel
-	bge	cr7, kernel
-	lwz	r3, 0(r30)		! r3 = new handler
-	clrrwi.	r3, r3, 1
-	beq	cr0, kernel
-	/*
-	 * Block the signal while we build the bridge.  Prevents a
-	 * race if a signal arrives after we change the bridge but
-	 * before we change the action in the kernel.
-	 */
-	li	r4, 1
-	slw	r4, r4, r7
-	stw	r4, mynset(r1)		! mynmask = 1 << (signal - 1)
-	li	r3, SIG_BLOCK
-	la	r4, mynset(r1)
-	la	r5, myoset(r1)
-	stw	r3, 0(r1)
-	stw	r4, 4(r1)
-	stw	r5, 8(r1)
-	bl	_sigprocmask
-	/*
-	 * Point our bridge to the new signal handler.  Then copy the
-	 * new sigaction but point it to our bridge.
-	 */
-	lis	r6, hi16[handlers]
-	ori	r6, r6, lo16[handlers]
-	subi	r7, r29, 1
-	slwi	r7, r7, 2
-	lwz	r3, 0(r30)		! r3 = new handler
-	stwx	r3, r6, r7		! put it in array of handlers
-	lis	r3, hi16[bridge]
-	ori	r3, r3, lo16[bridge]
-	lwz	r4, 4(r30)
-	lwz	r5, 8(r30)
-	lwz	r6, 12(r30)
-	stw	r3, mynew(r1)		! sa_handler or sa_sigaction
-	stw	r4, mynew+4(r1)		! sa_mask
-	stw	r5, mynew+8(r1)		! sa_flags
-	stw	r6, mynew+12(r1)	! sa_restorer
-	la	r30, mynew(r1)
-kernel:
-	li	r3, __NR_sigaction
-	stw	r3, 0(r1)
-	stw	r29, 4(r1)
-	stw	r30, 8(r1)
-	stw	r31, 12(r1)
+1:	li	r4, SIG_SETMASK
+	li	r5, -1			/* mask signals 1 to 32 */
+	stw	r5, newmask(sp)
+	la	r5, newmask(sp)
+	la	r6, oldmask(sp)
+	stw	r4, 4(sp)		/* kept 0(sp) = __NR_sigprocmask */
+	stw	r5, 8(sp)
+	stw	r6, 12(sp)
 	bl	__syscall
+
 	/*
-	 * If we blocked the signal, then restore the old signal mask.
+	 * If the signal number is in range 1 to 31, and the new
+	 * handler is not SIG_DFL 0 or SIG_IGN 1, then we interpose
+	 * our bridge.
 	 */
-	lwz	r3, mynset(r1)
-	cmpwi	cr0, r3, 0
-	beq	cr0, fixold
-	li	r3, SIG_SETMASK
-	la	r4, myoset(r1)
-	li	r5, 0
-	stw	r3, 0(r1)
-	stw	r4, 4(r1)
-	stw	r5, 8(r1)
-	bl	_sigprocmask
-	/*
-	 * If the old sigaction is non-NULL and points to our bridge,
-	 * then point it to the signal handler.
-	 */
-fixold:
-	cmpwi	cr0, r31, 0
-	beq	cr0, leave
-	lis	r3, hi16[bridge]
-	ori	r3, r3, lo16[bridge]
-	lwz	r4, 0(r31)
-	cmpw	cr0, r3, r4
-	bne	cr0, leave
-	lis	r6, hi16[handlers]
-	ori	r6, r6, lo16[handlers]
-	subi	r7, r29, 1
-	slwi	r7, r7, 2
-	lwzx	r3, r6, r7	! get it from array of handlers
-	stw	r3, 0(r31)	! put it in old sigaction
-leave:
-	lwz	r0, mysave+12(r1)
-	lwz	r29, mysave(r1)
-	lwz	r30, mysave+4(r1)
-	lwz	r31, mysave+8(r1)
-	addi	r1, r1, mysize
+	lwz	r4, signum(sp)		/* keep r4 = signum */
+	addi	r5, r4, -1
+	cmplwi	r5, 30
+	bgt	2f			/* skip if out of range */
+
+	slwi	r5, r5, 2		/* r5 = sharray index */
+	lis	r6, ha16[sharray]
+	la	r6, lo16[sharray](r6)	/* r6 = sharray */
+	lwzx	r0, r6, r5
+	stw	r0, oldhandler(sp)	/* remember old handler */
+	lwz	r0, newact(sp)
+	mr.	r0, r0
+	beq	2f			/* skip if newact == NULL */
+
+	lwz	r3, mynewact+sa_handler(sp)
+	cmplwi	r3, 2			/* r3 = new handler */
+	blt	2f			/* skip if SIG_DFL or SIG_IGN */
+
+	stwx	r3, r6, r5		/* put new handler in sharray */
+	lis	r3, ha16[sigbridge]
+	la	r3, lo16[sigbridge](r3)
+	stw	r3, mynewact+sa_handler(sp)
+
+	/* Call the kernel's sigaction. */
+	/* sigaction(signum, &mynewact or NULL, &myoldact or NULL) */
+2:	li	r3, __NR_sigaction
+	lwz	r0, newact(sp)
+	mr.	r0, r0
+	beq	3f
+	la	r5, mynewact(sp)
+	b	4f
+3:	li	r5, 0
+4:	lwz	r0, oldact(sp)
+	mr.	r0, r0
+	beq	5f
+	la	r6, myoldact(sp)
+	b	6f
+5:	li	r6, 0
+6:	stw	r3, 0(sp)
+	stw	r4, 4(sp)		/* kept r4 = signum */
+	stw	r5, 8(sp)
+	stw	r6, 12(sp)
+	bl	__syscall
+	stw	r3, myret(sp)
+
+	/* Unblock signals by restoring old signal mask. */
+	li	r3, __NR_sigprocmask
+	li	r4, SIG_SETMASK
+	la	r5, oldmask(sp)
+	li	r6, 0
+	stw	r3, 0(sp)
+	stw	r4, 4(sp)
+	stw	r5, 8(sp)
+	stw	r6, 12(sp)
+	bl	__syscall
+
+	/* Copy oldact from stack (after unblocking BUS, SEGV). */
+	lwz	r3, oldact(sp)
+	mr.	r3, r3
+	beq	8f			/* skip if oldact == NULL */
+	lwz	r4, myoldact+sa_handler(sp)
+	lis	r5, ha16[sigbridge]
+	la	r5, lo16[sigbridge](r5)
+	cmpw	r4, r5
+	bne	7f
+	lwz	r4, oldhandler(sp)
+7:	lwz	r5, myoldact+sa_mask(sp)
+	lwz	r6, myoldact+sa_flags(sp)
+	lwz	r7, myoldact+sa_restorer(sp)
+	stw	r4, sa_handler(r3)
+	stw	r5, sa_mask(r3)
+	stw	r6, sa_flags(r3)
+	stw	r7, sa_restorer(r3)
+
+8:	lwz	r0, savelr(sp)
+	lwz	r3, myret(sp)
+	addi	sp, sp, signum
 	mtlr	r0
-	blr			! return from sigaction
+	blr
 
 /*
- * Linux calls bridge(signum) or bridge(signum, info, context) with
- * arguments in registers r3, r4, r5.
+ * Linux calls sigbridge(signum) or sigbridge(signum, info, context)
+ * with arguments in registers r3, r4, r5.
  */
-bridge:
+sigbridge:
 	mflr	r0
-	subi	r1, r1, 16
+	stwu	r3, -16(sp)	/* signal number */
+	stw	r4, 4(sp)	/* info */
+	stw	r5, 8(sp)	/* context */
 	stw	r0, 12(r1)
-	stw	r3, 0(r1)	! signal number
-	stw	r4, 4(r1)	! info
-	stw	r5, 8(r1)	! context
 
-	lis	r6, hi16[handlers]
-	ori	r6, r6, lo16[handlers]
-	subi	r7, r3, 1
-	slwi	r7, r7, 2
+	lis	r6, hi16[sharray - 4]
+	la	r6, lo16[sharray - 4](r6)
+	slwi	r7, r3, 2
 	lwzx	r6, r6, r7
 	mtctr	r6
-	bctrl			! call our signal handler
+	bctrl			/* call our signal handler */
 
-	lwz	r0, 12(r1)
+	lwz	r0, 12(sp)
 	addi	r1, r1, 16
 	mtlr	r0
-	blr			! return from bridge
+	blr			/* sigreturn(2) */
 
 .sect .bss
-handlers:
-	.space 4 * MAXSIG	! array of signal handlers
+sharray:
+	.space 4 * 31		/* handlers for signals 1 to 31 */
diff --git a/plat/linuxppc/libsys/trap.s b/plat/linuxppc/libsys/trap.s
deleted file mode 100644
index 93c5189a4..000000000
--- a/plat/linuxppc/libsys/trap.s
+++ /dev/null
@@ -1,112 +0,0 @@
-#
-! $Source: /cvsroot/tack/Ack/plat/linux386/libsys/_syscall.s,v $
-! $State: Exp $
-! $Revision: 1.1 $
-
-! Declare segments (the order is important).
-
-.sect .text
-.sect .rom
-.sect .data
-.sect .bss
-
-.sect .text
-
-#define IFFALSE 4
-#define IFTRUE 12
-#define ALWAYS 20
-
-#define LT 0
-#define GT 1
-#define EQ 2
-#define OV 3
-
-EARRAY	=  0
-ERANGE	=  1
-ESET	=  2
-EIOVFL	=  3
-EFOVFL	=  4
-EFUNFL	=  5
-EIDIVZ	=  6
-EFDIVZ	=  7
-EIUND	=  8
-EFUND	=  9
-ECONV	= 10
-ESTACK  = 16
-EHEAP	= 17
-EILLINS = 18
-EODDZ	= 19
-ECASE	= 20
-EMEMFLT	= 21
-EBADPTR = 22
-EBADPC  = 23
-EBADLAE = 24
-EBADMON = 25
-EBADLIN = 26
-EBADGTO = 27
-EUNIMPL = 63		! unimplemented em-instruction called
-
-! EM trap handling.
-
-.define .trap_ecase
-.trap_ecase:
-	addi r3, r0, ECASE
-	b .trap
-
-.define .trap_earray
-.trap_earray:
-	addi r3, r0, EARRAY
-	b .trap
-
-.define .trap_erange
-.trap_erange:
-	addi r3, r0, ERANGE
-	b .trap
-
-.define .trp
-.define .trap
-.trp:
-.trap:
-	cmpi cr0, 0, r3, 15      ! traps >15 can't be ignored
-	bc IFTRUE, LT, 1f
-
-	addi r4, r0, 1
-	rlwnm r4, r4, r3, 0, 31  ! calculate trap bit
-	li32 r5, .ignmask
-	lwz r5, 0(r5)            ! load ignore mask
-	and. r4, r4, r5          ! compare
-	bclr IFFALSE, EQ, 0      ! return if non-zero
-
-1:
-	li32 r4, .trppc
-	lwz r5, 0(r4)            ! load user trap routine
-	or. r5, r5, r5           ! test
-	bc IFTRUE, EQ, fatal     ! if no user trap routine, bail out
-
-	addi r0, r0, 0
-	stw r0, 0(r4)            ! reset trap routine
-
-	mfspr r0, lr
-	stwu r0, -4(sp)          ! save old lr
-
-	stwu r3, -4(sp)
-	mtspr ctr, r5
-	bcctrl ALWAYS, 0, 0      ! call trap routine
-
-	lwz r0, 4(sp)            ! load old lr again
-	addi sp, sp, 8           ! retract over stack usage
-	bclr ALWAYS, 0, 0        ! return
-
-fatal:
-	addi r3, r0, 1
-	li32 r4, message
-	addi r5, r0, 6
-	addi r0, r0, 4           ! write()
-	sc 0
-
-	addi r0, r0, 1           ! exit()
-	sc 0
-
-.sect .rom
-message:
-	.ascii "TRAP!\n"
diff --git a/plat/osx386/boot.s b/plat/osx386/boot.s
index 932a716e9..c10045dd6 100644
--- a/plat/osx386/boot.s
+++ b/plat/osx386/boot.s
@@ -58,8 +58,6 @@ begdata:
 
 .sect .bss
 begbss:
-.define hol0
-.comm hol0, 8                ! line number and filename (for debugging)
 
 .define _errno
 .comm _errno, 4              ! Posix errno storage
diff --git a/plat/osx386/libsys/build.lua b/plat/osx386/libsys/build.lua
index 23e491f7a..3c2e96c3a 100644
--- a/plat/osx386/libsys/build.lua
+++ b/plat/osx386/libsys/build.lua
@@ -19,7 +19,8 @@ acklibrary {
 		"./sigaction.s",
 		"./stat.s",
 		"./write.s",
-		"plat/linux/libsys/errno.s",
+		"plat/linux/libsys/_hol0.s",
+		"plat/linux386/libsys/trapno.s",
 		"plat/osx/libsys/brk.c",
 		"plat/osx/libsys/creat.c",
 		"plat/osx/libsys/isatty.c",
diff --git a/plat/osxppc/boot.s b/plat/osxppc/boot.s
index e96198eb4..8b1b7ab75 100644
--- a/plat/osxppc/boot.s
+++ b/plat/osxppc/boot.s
@@ -29,7 +29,7 @@ begtext:
 
 	lwz r3, 0(sp)            ! r3 = argc
 	addi r4, sp, 4           ! r4 = argv
-	rlwinm r5, r3, 32-2, 2, 31 ! shift left 2 bits
+	srwi r5, r3, 2
 	add r5, r5, r4
 	addi r5, r5, 8           ! r5 = env
 
@@ -49,8 +49,6 @@ begdata:
 
 .sect .bss
 begbss:
-.define hol0
-.comm hol0, 8                ! line number and filename (for debugging)
 
 .define _errno
 .comm _errno, 4              ! Posix errno storage
diff --git a/plat/osxppc/descr b/plat/osxppc/descr
index 5f416c44c..77fc45260 100644
--- a/plat/osxppc/descr
+++ b/plat/osxppc/descr
@@ -10,16 +10,17 @@ var l={w}
 var la={w}
 var f={w}
 var fa={w}
+# Size 8 has alignment 4 in Mac OS, 8 in Linux.
 var d=8
-var da={d}
+var da=4
 var x=8
-var xa={x}
+var xa=4
 var ARCH=powerpc
 var PLATFORM=osxppc
 var PLATFORMDIR={EM}/share/ack/{PLATFORM}
 var CPP_F=-D__unix
 var ALIGN=-a0:4 -a1:4 -a2:4096 -a3:4 -b0:0x129c
-var MACHOPT_F=-m3
+var MACHOPT_F=-m2
 var EGO_PLAT_FLAGS=-M{EM}/share/ack/ego/{ARCH}.descr
 
 # Override the setting in fe so that files compiled for osxppc can see
diff --git a/plat/osxppc/libsys/build.lua b/plat/osxppc/libsys/build.lua
index 072730b7a..cff10f29b 100644
--- a/plat/osxppc/libsys/build.lua
+++ b/plat/osxppc/libsys/build.lua
@@ -19,7 +19,7 @@ acklibrary {
 		"./sigaction.s",
 		"./stat.s",
 		"./write.s",
-		"plat/linuxppc/libsys/trap.s",
+		"plat/linux/libsys/_hol0.s",
 		"plat/osx/libsys/brk.c",
 		"plat/osx/libsys/creat.c",
 		"plat/osx/libsys/isatty.c",
diff --git a/plat/osxppc/libsys/set_errno.s b/plat/osxppc/libsys/set_errno.s
index e406865a6..beb124a7c 100644
--- a/plat/osxppc/libsys/set_errno.s
+++ b/plat/osxppc/libsys/set_errno.s
@@ -1,7 +1,7 @@
 .sect .text
 .define .set_errno
 .set_errno:
-	li32 r10, _errno
-	stw r3, 0(r10)		! set errno
-	addi r3, r0, -1		! return -1
-	bclr 20, 0, 0
+	lis r4, ha16[_errno]
+	stw r3, lo16[_errno](r4)	! set errno
+	li r3, -1			! return -1
+	blr
diff --git a/plat/qemuppc/descr b/plat/qemuppc/descr
index f5191b249..9d1a80427 100644
--- a/plat/qemuppc/descr
+++ b/plat/qemuppc/descr
@@ -19,11 +19,8 @@ var PLATFORM=qemuppc
 var PLATFORMDIR={EM}/share/ack/{PLATFORM}
 var CPP_F=-D__unix
 var ALIGN=-a0:4 -a1:4 -a2:4 -a3:4 -b0:0x01000000
-var C_LIB={PLATFORMDIR}/libc-ansi.a
-# bitfields reversed for compatibility with (g)cc.
-var CC_ALIGN=-Vr
-var OLD_C_LIB={C_LIB}
-var MACHOPT_F=
+var MACHOPT_F=-m2
+var EGO_PLAT_FLAGS=-M{EM}/share/ack/ego/{ARCH}.descr
 
 # Override the setting in fe so that files compiled for qemuppc can see
 # the platform-specific headers.
diff --git a/plat/qemuppc/libsys/trap.s b/plat/qemuppc/libsys/trap.s
deleted file mode 100644
index e00c4d561..000000000
--- a/plat/qemuppc/libsys/trap.s
+++ /dev/null
@@ -1,65 +0,0 @@
-#
-! $Source: /cvsroot/tack/Ack/plat/linux386/libsys/_syscall.s,v $
-! $State: Exp $
-! $Revision: 1.1 $
-
-! Declare segments (the order is important).
-
-.sect .text
-.sect .rom
-.sect .data
-.sect .bss
-
-.sect .text
-
-#define IFFALSE 4
-#define IFTRUE 12
-#define ALWAYS 20
-
-#define LT 0
-#define GT 1
-#define EQ 2
-#define OV 3
-
-EARRAY	=  0
-ERANGE	=  1
-ESET	=  2
-EIOVFL	=  3
-EFOVFL	=  4
-EFUNFL	=  5
-EIDIVZ	=  6
-EFDIVZ	=  7
-EIUND	=  8
-EFUND	=  9
-ECONV	= 10
-ESTACK  = 16
-EHEAP	= 17
-EILLINS = 18
-EODDZ	= 19
-ECASE	= 20
-EMEMFLT	= 21
-EBADPTR = 22
-EBADPC  = 23
-EBADLAE = 24
-EBADMON = 25
-EBADLIN = 26
-EBADGTO = 27
-EUNIMPL = 63		! unimplemented em-instruction called
-
-.define .trap_ecase
-.trap_ecase:
-	b .trp
-
-.define .trap_earray
-.trap_earray:
-	b .trp
-
-.define .trap_erange
-.trap_erange:
-	b .trap
-
-.define .trp
-.define .trap
-.trp:
-.trap:
-	b .trp					! spin forever
diff --git a/tests/plat/_dummy_e.c b/tests/plat/_dummy_e.c
index 48104b5aa..39262eaaa 100644
--- a/tests/plat/_dummy_e.c
+++ b/tests/plat/_dummy_e.c
@@ -1,6 +1,6 @@
 #include "test.h"
 
-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
     ASSERT(0 == 0);
diff --git a/tests/plat/bss_e.c b/tests/plat/bss_e.c
new file mode 100644
index 000000000..547e7f7f1
--- /dev/null
+++ b/tests/plat/bss_e.c
@@ -0,0 +1,27 @@
+#include "test.h"
+
+/*
+ * EM puts these variables in BSS.  Their initial values must be zero.
+ * Some platforms, like Linux, clear the BSS before they run the
+ * program.  For other platforms, like pc86, we clear the BSS in
+ * boot.s before we call _m_a_i_n.
+ */
+char c;
+int array[9000];
+short s;
+
+/* Bypasses the CRT, so there's no stdio. */
+void _m_a_i_n(void)
+{
+	int bad, i;
+
+	ASSERT(c == 0);
+	bad = 0;
+	for (i = 0; i < sizeof(array) / sizeof(array[0]); i++) {
+		if(array[i])
+			bad++;
+	}
+	ASSERT(bad == 0);
+	ASSERT(s == 0);
+	finished();
+}
diff --git a/tests/plat/bugs/bug-62-notvar_var_e.c b/tests/plat/bugs/bug-62-notvar_var_e.c
index d3813bb91..cde84eed1 100644
--- a/tests/plat/bugs/bug-62-notvar_var_e.c
+++ b/tests/plat/bugs/bug-62-notvar_var_e.c
@@ -40,7 +40,7 @@ void c(int i, int tru, int fal) {
   ASSERT((i != i) == fal);
 }
 
-/* Bypasses the CRT. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void) {
   a();
   b();
diff --git a/tests/plat/build.lua b/tests/plat/build.lua
index 0d3091559..26676b0b1 100644
--- a/tests/plat/build.lua
+++ b/tests/plat/build.lua
@@ -9,12 +9,14 @@ definerule("plat_testsuite",
 		-- Remember this is executed from the caller's directory; local
 		-- target names will resolve there.
 		local testfiles = filenamesof(
+			-- added structcopy_e.c
 			"tests/plat/*.c",
 			"tests/plat/*.e",
 			"tests/plat/*.p",
 			"tests/plat/b/*.b",
-			"tests/plat/bugs/bug-22-inn_mod.mod",
-			"tests/plat/bugs/bug-62-notvar_var_e.c"
+			"tests/plat/bugs/*.c",
+			"tests/plat/bugs/*.mod",
+			"tests/plat/m2/*.mod"
 		)
 
 		acklibrary {
diff --git a/tests/plat/csa_e.c b/tests/plat/csa_e.c
index 355b75ee7..470fbebc5 100644
--- a/tests/plat/csa_e.c
+++ b/tests/plat/csa_e.c
@@ -11,7 +11,7 @@ int csa(int i)
     }
 }
 
-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
     ASSERT(csa(0) == 0);
@@ -23,4 +23,4 @@ void _m_a_i_n(void)
     ASSERT(csa(6) == 0);
 
     finished();
-}
\ No newline at end of file
+}
diff --git a/tests/plat/csb_e.c b/tests/plat/csb_e.c
index c86d31fa6..38ce05402 100644
--- a/tests/plat/csb_e.c
+++ b/tests/plat/csb_e.c
@@ -11,7 +11,7 @@ int csa(int i)
     }
 }
 
-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
     ASSERT(csa(0) == 0);
@@ -23,4 +23,4 @@ void _m_a_i_n(void)
     ASSERT(csa(600) == 0);
 
     finished();
-}
\ No newline at end of file
+}
diff --git a/tests/plat/doublecmp_e.c b/tests/plat/doublecmp_e.c
index f6c1582dc..b6fe5bbad 100644
--- a/tests/plat/doublecmp_e.c
+++ b/tests/plat/doublecmp_e.c
@@ -4,7 +4,7 @@
 double one = 1.0;
 double zero = 0.0;
 
-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
     ASSERT(zero == zero);
@@ -17,4 +17,4 @@ void _m_a_i_n(void)
     ASSERT(one  >= one);
 
     finished();
-}
\ No newline at end of file
+}
diff --git a/tests/plat/dup_e.e b/tests/plat/dup_e.e
new file mode 100644
index 000000000..649589d84
--- /dev/null
+++ b/tests/plat/dup_e.e
@@ -0,0 +1,139 @@
+#
+    mes 2, EM_WSIZE, EM_PSIZE
+
+/*
+ * Tests _dup_ and _dus_ by loading 20 bytes from _src_, then making
+ * and checking some duplicates.  The compilers might never _dup_ or
+ * _dus_ with large sizes, so the compilers might work even if this
+ * test fails.  You can cheat this test if _cms_ always pushes zero.
+ */
+
+    exa src
+    exa size
+src
+    con 3593880729I4, 782166578I4, 4150666996I4, 2453272937I4, 3470523049I4
+size
+    con 20I2
+
+    exp $check
+    exp $_m_a_i_n
+    pro $_m_a_i_n, 0
+
+    /* Push 3 copies of src on stack. */
+    lae src
+    loi 20        /* 1st copy */
+    dup 20        /* 2nd copy */
+    lae size
+    loi 2
+    loc 2
+    loc EM_WSIZE
+    cuu
+    dus EM_WSIZE  /* 3rd copy */
+
+    cal $check
+    cal $finished
+    end /* $_m_a_i_n */
+
+    pro $check, 4 * EM_PSIZE + EM_WSIZE
+#define p1    (-1 * EM_PSIZE)
+#define p2    (-2 * EM_PSIZE)
+#define p3    (-3 * EM_PSIZE)
+#define p4    (-4 * EM_PSIZE)
+#define i     (p4 - EM_WSIZE)
+
+    /* Set pointers to all 4 copies. */
+    lae src
+    lal p4
+    sti EM_PSIZE  /* p4 = src */
+    lal 0
+    lal p3
+    sti EM_PSIZE  /* p3 = 3rd copy */
+    lal 20
+    lal p2
+    sti EM_PSIZE  /* p2 = 2nd copy */
+    lal 40
+    lal p1
+    sti EM_PSIZE  /* p1 = 1st copy */
+
+    /* Loop 20 times to verify each byte. */
+    loc 0
+    stl i
+4
+    lal p4
+    loi EM_PSIZE
+    loi 1         /* byte from src */
+    lal p3
+    loi EM_PSIZE
+    loi 1         /* byte from 3rd copy */
+    cms EM_WSIZE
+    zeq *3
+    loc (3 * 256)
+    lol i
+    adi EM_WSIZE  /* 0x300 + i */
+    loc EM_WSIZE
+    loc 4
+    cuu
+    cal $fail
+    asp 4
+3
+    lal p4
+    loi EM_PSIZE
+    loi 1         /* byte from src */
+    lal p2
+    loi EM_PSIZE
+    loi 1         /* byte from 2nd copy */
+    cms EM_WSIZE
+    zeq *2
+    loc (2 * 256)
+    lol i
+    adi EM_WSIZE  /* 0x200 + i */
+    loc EM_WSIZE
+    loc 4
+    cuu
+    cal $fail
+    asp 4
+2
+    lal p4
+    loi EM_PSIZE
+    loi 1         /* byte from src */
+    lal p1
+    loi EM_PSIZE
+    loi 1         /* byte from 1st copy */
+    cms EM_WSIZE
+    zeq *1
+    loc (1 * 256)
+    lol i
+    adi EM_WSIZE  /* 0x100 + i */
+    loc EM_WSIZE
+    loc 4
+    cuu
+    cal $fail
+    asp 4
+1
+    lal p4
+    loi EM_PSIZE
+    adp 1
+    lal p4
+    sti EM_PSIZE  /* increment p4 */
+    lal p3
+    loi EM_PSIZE
+    adp 1
+    lal p3
+    sti EM_PSIZE  /* increment p3 */
+    lal p2
+    loi EM_PSIZE
+    adp 1
+    lal p2
+    sti EM_PSIZE  /* increment p2 */
+    lal p1
+    loi EM_PSIZE
+    adp 1
+    lal p1
+    sti EM_PSIZE  /* increment p1 */
+    inl i
+    lol i
+    loc 20
+    blt *4        /* loop 20 times */
+
+    ret 0
+    end /* $check */
diff --git a/tests/plat/exg_e.e b/tests/plat/exg_e.e
new file mode 100644
index 000000000..455256483
--- /dev/null
+++ b/tests/plat/exg_e.e
@@ -0,0 +1,83 @@
+#
+    mes 2, EM_WSIZE, EM_PSIZE
+
+/*
+ * Tests _exg_ by loading 40 bytes from _src_, then exchanging 20 and
+ * 20 bytes, and checking the result.  The compilers might never _exg_
+ * large sizes, so the compilers might work even if this test fails.
+ * You can cheat this test if _cms_ always pushes zero.
+ */
+
+    exa src
+src
+    con 1539465570I4, 1344465418I4, 1317578918I4, 1163467696I4, 2645261331I4
+    con 3981585269I4, 1433968975I4, 4256886989I4, 4114909542I4, 1817334375I4
+
+    exp $check
+    exp $_m_a_i_n
+    pro $_m_a_i_n, 0
+
+    lae src
+    loi 40
+    exg 20
+    cal $check
+    cal $finished
+    end /* $_m_a_i_n */
+
+    pro $check, 2 * EM_PSIZE + EM_WSIZE
+#define p1    (-1 * EM_PSIZE)
+#define p2    (-2 * EM_PSIZE)
+#define i     (p2 - EM_WSIZE)
+
+    lae src
+    lal p2
+    sti EM_PSIZE  /* p2 = src */
+    lal 0
+    adp 20
+    lal p1
+    sti EM_PSIZE  /* p1 = exchanged copy + 20 */
+
+    /* Loop 40 times to verify each byte. */
+    loc 0
+    stl i
+1
+    lal p2
+    loi EM_PSIZE
+    loi 1         /* byte from src */
+    lal p1
+    loi EM_PSIZE
+    loi 1         /* byte from exchanged copy */
+    cms EM_WSIZE
+    zeq *2
+    lol i
+    loc EM_WSIZE
+    loc 4
+    cuu
+    cal $fail
+    asp 4
+2
+    lal p2
+    loi EM_PSIZE
+    adp 1
+    lal p2
+    sti EM_PSIZE  /* increment p2 */
+    lal p1
+    loi EM_PSIZE  /* p1 */
+    inl i
+    /* When i reaches 20, p1 would reach end of exchanged copy. */
+    lol i
+    loc 20
+    beq *3
+    adp 1         /* p1 + 1 */
+    bra *4
+3
+    adp -39       /* p1 - 39, beginning of exchanged copy */
+4
+    lal p1
+    sti EM_PSIZE  /* move p1 */
+    lol i
+    loc 40
+    blt *1
+
+    ret 0
+    end /* $check */
\ No newline at end of file
diff --git a/tests/plat/from_d_to_si_e.c b/tests/plat/from_d_to_si_e.c
index 7f51e6c5b..bc06c755c 100644
--- a/tests/plat/from_d_to_si_e.c
+++ b/tests/plat/from_d_to_si_e.c
@@ -8,7 +8,7 @@ double minusone = -1.0;
 double big = (double)INT_MAX;
 double minusbig = (double)INT_MIN;
 
-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
     ASSERT((int)zero == 0);
@@ -18,4 +18,4 @@ void _m_a_i_n(void)
     ASSERT((int)minusbig == INT_MIN);
 
     finished();
-}
\ No newline at end of file
+}
diff --git a/tests/plat/from_d_to_ui_e.c b/tests/plat/from_d_to_ui_e.c
index 811780b87..7d18ca9e5 100644
--- a/tests/plat/from_d_to_ui_e.c
+++ b/tests/plat/from_d_to_ui_e.c
@@ -6,7 +6,7 @@ double one = 1.0;
 double zero = 0.0;
 double big = (double)UINT_MAX;
 
-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
     ASSERT((unsigned int)zero == 0);
@@ -14,4 +14,4 @@ void _m_a_i_n(void)
     ASSERT((unsigned int)big == UINT_MAX);
 
     finished();
-}
\ No newline at end of file
+}
diff --git a/tests/plat/from_si_to_d_e.c b/tests/plat/from_si_to_d_e.c
index b6c7a25ba..172361dfa 100644
--- a/tests/plat/from_si_to_d_e.c
+++ b/tests/plat/from_si_to_d_e.c
@@ -8,7 +8,7 @@ int minusone = -1;
 int big = INT_MAX;
 int minusbig = INT_MIN;
 
-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
     ASSERT((double)zero == 0.0);
@@ -18,4 +18,4 @@ void _m_a_i_n(void)
     /* ASSERT((double)minusbig == (double)INT_MIN); FIXME: fails for now */
 
     finished();
-}
\ No newline at end of file
+}
diff --git a/tests/plat/from_ui_to_d_e.c b/tests/plat/from_ui_to_d_e.c
index b8e017c99..383d9afad 100644
--- a/tests/plat/from_ui_to_d_e.c
+++ b/tests/plat/from_ui_to_d_e.c
@@ -6,7 +6,7 @@ unsigned int one_u = 1;
 unsigned int zero_u = 0;
 unsigned int big_u = UINT_MAX;
 
-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
     ASSERT((double)zero_u == 0.0);
@@ -14,4 +14,4 @@ void _m_a_i_n(void)
     ASSERT((double)big_u == (double)UINT_MAX);
 
     finished();
-}
\ No newline at end of file
+}
diff --git a/tests/plat/inn_e.e b/tests/plat/inn_e.e
index a5aee02f5..543623b3f 100644
--- a/tests/plat/inn_e.e
+++ b/tests/plat/inn_e.e
@@ -14,6 +14,9 @@
     zeq *1
 
     loc __LINE__
+    loc EM_WSIZE
+    loc 4
+    cuu
     cal $fail
     ass EM_WSIZE
 1
@@ -31,6 +34,9 @@
     zne *2
 
     loc __LINE__
+    loc EM_WSIZE
+    loc 4
+    cuu
     cal $fail
     ass EM_WSIZE
 2
@@ -49,6 +55,9 @@
     zeq *3
 
     loc __LINE__
+    loc EM_WSIZE
+    loc 4
+    cuu
     cal $fail
     ass EM_WSIZE
 3
@@ -67,11 +76,12 @@
     zne *4
 
     loc __LINE__
+    loc EM_WSIZE
+    loc 4
+    cuu
     cal $fail
     ass EM_WSIZE
 4
 
     cal $finished
-    ret 0
-    
     end
diff --git a/tests/plat/intadd_e.c b/tests/plat/intadd_e.c
index 8e4868a62..94549814c 100644
--- a/tests/plat/intadd_e.c
+++ b/tests/plat/intadd_e.c
@@ -6,7 +6,7 @@ int one = 1;
 int zero = 0;
 int minusone = -1;
 
-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
     ASSERT((two + one)      == 3);
@@ -28,4 +28,4 @@ void _m_a_i_n(void)
     ASSERT(((unsigned int)-1  + (unsigned int)two) == 1);
 
     finished();
-}
\ No newline at end of file
+}
diff --git a/tests/plat/intcmp_e.c b/tests/plat/intcmp_e.c
index dd7f1da75..72cfc06b1 100644
--- a/tests/plat/intcmp_e.c
+++ b/tests/plat/intcmp_e.c
@@ -4,7 +4,7 @@
 int one = 1;
 int zero = 0;
 
-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
     ASSERT(zero == zero);
@@ -62,4 +62,4 @@ void _m_a_i_n(void)
     ASSERT((unsigned int)1 >= (unsigned int)one);
 
     finished();
-}
\ No newline at end of file
+}
diff --git a/tests/plat/intdiv_e.c b/tests/plat/intdiv_e.c
index c90964ced..cab76cdad 100644
--- a/tests/plat/intdiv_e.c
+++ b/tests/plat/intdiv_e.c
@@ -6,7 +6,7 @@ int two = 2;
 int one = 1;
 int zero = 0;
 
-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
     ASSERT((three / two) == 1);
@@ -25,4 +25,4 @@ void _m_a_i_n(void)
     ASSERT((3 / -two) == -1);
 
     finished();
-}
\ No newline at end of file
+}
diff --git a/tests/plat/intrem_e.c b/tests/plat/intrem_e.c
index 40f68d654..424152106 100644
--- a/tests/plat/intrem_e.c
+++ b/tests/plat/intrem_e.c
@@ -6,7 +6,7 @@ int two = 2;
 int one = 1;
 int zero = 0;
 
-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
     ASSERT((three % two) == 1);
@@ -25,4 +25,4 @@ void _m_a_i_n(void)
     ASSERT((3 % -two) == 1);
 
     finished();
-}
\ No newline at end of file
+}
diff --git a/tests/plat/intshift_e.c b/tests/plat/intshift_e.c
index 3cc6d52f9..08ef05ca1 100644
--- a/tests/plat/intshift_e.c
+++ b/tests/plat/intshift_e.c
@@ -6,7 +6,7 @@ int one = 1;
 int zero = 0;
 int minusone = -1;
 
-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
     ASSERT((one     <<zero) == 1);
@@ -50,4 +50,4 @@ void _m_a_i_n(void)
     ASSERT(((unsigned int)minusone>>(unsigned int)1)  == (UINT_MAX>>1));
 
     finished();
-}
\ No newline at end of file
+}
diff --git a/tests/plat/intsub_e.c b/tests/plat/intsub_e.c
index d8f67d3a3..b0cf08ae6 100644
--- a/tests/plat/intsub_e.c
+++ b/tests/plat/intsub_e.c
@@ -7,7 +7,7 @@ int one = 1;
 int zero = 0;
 int minusone = -1;
 
-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
     ASSERT((two - one) == 1);
@@ -29,4 +29,4 @@ void _m_a_i_n(void)
     ASSERT(((unsigned int)1   - (unsigned int)two) == UINT_MAX);
 
     finished();
-}
\ No newline at end of file
+}
diff --git a/tests/plat/lib/test.c b/tests/plat/lib/test.c
index 426f9944a..6df3ee7d5 100644
--- a/tests/plat/lib/test.c
+++ b/tests/plat/lib/test.c
@@ -5,7 +5,7 @@
 void finished(void)
 {
     static const char s[] = "@@FINISHED\n";
-    write(1, s, sizeof(s));
+    write(1, s, sizeof(s)-1);
     _exit(0);
 }
 
@@ -16,7 +16,7 @@ void writehex(uint32_t code)
 
     do
     {
-        *--p = "0123456789abcdef"[code & 0xf];
+        *--p = "0123456789abcdef"[(unsigned int)code & 0xf];
         code >>= 4;
     }
     while (code > 0);
diff --git a/tests/plat/m2/ConvTest_mod.mod b/tests/plat/m2/ConvTest_mod.mod
new file mode 100644
index 000000000..9fa828af0
--- /dev/null
+++ b/tests/plat/m2/ConvTest_mod.mod
@@ -0,0 +1,36 @@
+MODULE ConvTest;
+FROM Conversions IMPORT
+  ConvertOctal, ConvertHex, ConvertCardinal, ConvertInteger;
+FROM Strings IMPORT CompareStr;
+FROM Test IMPORT fail, finished;
+
+(* Asserts a = b, or fails with code. *)
+PROCEDURE A(a, b: ARRAY OF CHAR; code: INTEGER);
+BEGIN
+  IF (CompareStr(a, b) # 0) OR (CompareStr(a, "wrong string") = 0) THEN
+    fail(code)
+  END
+END A;
+
+VAR
+  str: ARRAY [0..15] OF CHAR;
+BEGIN
+  ConvertOctal(  9, 6, str); A("    11", str, 1);
+  ConvertOctal( 59, 6, str); A("    73", str, 2);
+  ConvertOctal(278, 6, str); A("   426", str, 3);
+
+  ConvertHex(  9, 6, str); A("     9", str, 11H);
+  ConvertHex( 59, 6, str); A("    3B", str, 12H);
+  ConvertHex(278, 6, str); A("   116", str, 13H);
+
+  ConvertCardinal(  9, 6, str); A("     9", str, 21H);
+  ConvertCardinal( 59, 6, str); A("    59", str, 22H);
+  ConvertCardinal(278, 6, str); A("   278", str, 23H);
+
+  ConvertInteger(   9, 6, str); A("     9", str, 31H);
+  ConvertInteger(  59, 6, str); A("    59", str, 32H);
+  ConvertInteger( 278, 6, str); A("   278", str, 33H);
+  ConvertInteger(-424, 6, str); A("  -424", str, 34H);
+
+  finished;
+END ConvTest.
diff --git a/tests/plat/m2/NestProc_mod.mod b/tests/plat/m2/NestProc_mod.mod
new file mode 100644
index 000000000..d46731f55
--- /dev/null
+++ b/tests/plat/m2/NestProc_mod.mod
@@ -0,0 +1,132 @@
+(*
+ * Calls nested procedures.  The compiler emits the EM instructions
+ * _lxl_ and _lxa_ to access the variables in the statically enclosing
+ * procedures.
+ *
+ * You can cheat this test if a = b is TRUE for any a, b.
+ *)
+MODULE NestProc;
+FROM Test IMPORT fail, finished;
+
+(* Asserts cond, or fails with code. *)
+PROCEDURE A(cond: BOOLEAN; code: INTEGER);
+BEGIN
+  IF NOT cond THEN fail(code) END
+END A;
+
+TYPE
+  Set8 = SET OF [0..63];
+  (* Box has fields of size 8, 4, and 1. *)
+  Box = RECORD
+    huge: Set8;
+    big: LONGINT;
+    small: CHAR;
+    tiny: CHAR;
+  END;
+
+PROCEDURE First(a, b: INTEGER; in: Box): Box;
+  VAR c, d: INTEGER;
+      out: Box;
+
+  PROCEDURE Second(e: INTEGER);
+    VAR f: INTEGER;
+
+    PROCEDURE Third(g: INTEGER);
+      VAR h: INTEGER;
+
+      PROCEDURE CheckThird;
+      BEGIN
+        A(a = 1354, 31H);   (* lxa 3 *)
+        A(b = 3385, 32H);
+        A(c = 14349, 33H);  (* lxl 3 *)
+        A(d = 30989, 34H);
+        A(e = 28935, 35H);  (* lxa 2 *)
+        A(f = 13366, 36H);  (* lxl 2 *)
+        A(g = 7988, 37H);   (* lxa 1 *)
+        A(h = 11711, 38H);  (* lxl 1 *)
+      END CheckThird;
+
+      PROCEDURE Fourth(i: INTEGER);
+        VAR j: INTEGER;
+
+        PROCEDURE Fifth(k: INTEGER);
+          VAR l: INTEGER;
+
+          PROCEDURE Sixth(): INTEGER;
+          BEGIN
+            A(e = 2, 61H);      (* lxa 4 *)
+            A(f = 11703, 62H);  (* lxl 4 *)
+
+            b := 3385;   (* lxa 5 *)
+            d := 30989;  (* lxl 5 *)
+            e := 28935;  (* lxl 4 *)
+            f := 13366;  (* lxa 4 *)
+            CheckThird;
+
+            (* lxa 5 *)
+            A(in.huge = Set8{11, 12, 40, 40, 43, 56}, 63H);
+            A(in.big = 2130020019D, 64H);
+            A(in.small = 300C, 65H);
+            A(in.tiny = 175C, 66H);
+
+            (* lxl 5 *)
+            out.huge := Set8{8, 19, 36, 41, 47, 62};
+            out.big := 385360915D;
+            out.small := 366C;
+            out.tiny := 131C;
+
+            j := k;  (* lxl 2, lxa 1 *)
+            l := i;  (* lxl 1, lxa 2 *)
+            RETURN 5217;
+          END Sixth;
+
+          PROCEDURE TwiceSixth(): INTEGER;
+          BEGIN
+            (* lxa and lxl must follow the static chain from Sixth to
+             * Fifth, not dynamic chain from Sixth to TwiceSixth. *)
+            RETURN 2 * Sixth();
+          END TwiceSixth;
+
+        BEGIN (* Fifth *)
+          A(TwiceSixth() = 10434, 51H);
+          A(k = 11567, 51H);
+          A(l = 32557, 52H);
+        END Fifth;
+
+      BEGIN (* Fourth *)
+        Fifth(11567);  (* k *)
+        A(i = 32557, 41H);
+        A(j = 11567, 42H);
+      END Fourth;
+
+    BEGIN (* Third *)
+      h := 11711;
+      Fourth(32557);  (* i *)
+    END Third;
+
+  BEGIN (* Second *)
+    f := 11703;
+    Third(7988);  (* g *)
+  END Second;
+
+BEGIN (* First *)
+  c := 14349;
+  d := 17850;
+  Second(2);  (* e *)
+  RETURN out
+END First;
+
+VAR
+  x: Box;
+BEGIN
+  x.huge := Set8{11, 12, 40, 40, 43, 56};
+  x.big := 2130020019D;
+  x.small := 300C;
+  x.tiny := 175C;
+  x := First(1354, 19516, x);  (* a, b, in *)
+  A(x.huge = Set8{8, 19, 36, 41, 47, 62}, 71H);
+  A(x.big = 385360915D, 72H);
+  A(x.small = 366C, 73H);
+  A(x.tiny = 131C, 74H);
+  finished;
+END NestProc.
diff --git a/tests/plat/m2/OpenArray_mod.mod b/tests/plat/m2/OpenArray_mod.mod
new file mode 100644
index 000000000..1aa219a55
--- /dev/null
+++ b/tests/plat/m2/OpenArray_mod.mod
@@ -0,0 +1,59 @@
+(*
+ * Passes an open array to a procedure.  The back end must implement
+ * some EM instructions for accessing arrays.
+ *)
+MODULE OpenArray;
+FROM Test IMPORT fail, finished;
+
+(* Asserts condition or fails with code. *)
+PROCEDURE A(cond: BOOLEAN; code: INTEGER);
+BEGIN
+  IF NOT cond THEN fail(code) END
+END A;
+
+(* Called as Modify(ary1, 1) or Modify(ary2, 2). *)
+PROCEDURE Modify(VAR ary: ARRAY OF INTEGER; what: INTEGER);
+  VAR hi: INTEGER;
+BEGIN
+  hi := what * 100H;
+
+  (* Indices must be from 0 to HIGH(ary). *)
+  A((what = 1) = (HIGH(ary) = 3), hi + 1);
+  A((what = 2) = (HIGH(ary) = 9), hi + 2);
+
+  (* ary[2] must equal ary1[3] or ary2[3]. *)
+  A((what = 1) = (ary[2] = 13), hi + 3);
+  A((what = 2) = (ary[2] = 37), hi + 4);
+
+  (* Modify some values. *)
+  IF HIGH(ary) >= 3 THEN ary[3] := 20 END;
+  IF HIGH(ary) >= 6 THEN ary[6] := 40 END;
+  IF HIGH(ary) >= 9 THEN ary[9] := 12 END;
+END Modify;
+
+VAR
+  ary1: ARRAY [1..4] OF INTEGER;
+  ary2: ARRAY [1..10] OF INTEGER;
+BEGIN
+  (* Initialize the arrays. *)
+  ary1[1] :=  6; ary1[2] :=  9; ary1[3] := 13; ary1[4] := 49;
+
+  ary2[1] := 56; ary2[2] := 79; ary2[3] := 37; ary2[4] :=  0;
+  ary2[5] := 70; ary2[6] := 62; ary2[7] := 64; ary2[8] := 92;
+  ary2[9] := 29; ary2[10] := 90;
+
+  (* Pass them as open arrays. *)
+  Modify(ary1, 1);
+  Modify(ary2, 2);
+
+  (* Check that ary1[4], ary2[4, 7, 10] have been modified. *)
+  A(ary1[1] =  6, 301H); A(ary1[2] =  9, 301H); A(ary1[3] = 13, 303H);
+  A(ary1[4] = 20, 304H);
+
+  A(ary2[1] = 56, 401H); A(ary2[2] = 79, 402H); A(ary2[3] = 37, 403H);
+  A(ary2[4] = 20, 404H); A(ary2[5] = 70, 406H); A(ary2[6] = 62, 406H);
+  A(ary2[7] = 40, 407H); A(ary2[8] = 92, 408H); A(ary2[9] = 29, 409H);
+  A(ary2[10] = 12, 40AH);
+
+  finished;
+END OpenArray.
diff --git a/tests/plat/m2/SemaTest_mod.mod b/tests/plat/m2/SemaTest_mod.mod
new file mode 100644
index 000000000..9ae395662
--- /dev/null
+++ b/tests/plat/m2/SemaTest_mod.mod
@@ -0,0 +1,157 @@
+(*
+ * Generates some integer sequences.  Each generator is a process that
+ * yields integers to the main process.  ACK switches processes by
+ * saving and restoring the stack.  It uses _lor_ and _str_ to save
+ * and restore the local base and frame pointer.
+ *)
+MODULE SemaTest;
+FROM Semaphores IMPORT Sema, NewSema, Down, Up, StartProcess;
+FROM Storage IMPORT ALLOCATE;
+FROM Test IMPORT fail, finished;
+
+TYPE
+  Generator = POINTER TO GeneratorRecord;
+  GeneratorRecord = RECORD
+    resume: Sema;       (* up when resuming generator *)
+    yield: Sema;        (* up when yielding value *)
+    value: INTEGER;
+  END;
+VAR
+  curgen: Generator;    (* current generator *)
+  startLock: Sema;      (* down when booting generator *)
+  startProc: PROC;
+  startSelf: Generator;
+
+PROCEDURE BootGenerator;
+  VAR pr: PROC; self: Generator;
+BEGIN
+  pr := startProc;
+  self := startSelf;
+  Up(startLock);
+  Down(self^.resume);   (* wait for first Resume *)
+  pr();
+END BootGenerator;
+
+PROCEDURE StartGenerator(gen: Generator; pr: PROC);
+BEGIN
+  gen^.resume := NewSema(0);
+  gen^.yield := NewSema(0);
+  Down(startLock);
+  startProc := pr;
+  startSelf := gen;
+  StartProcess(BootGenerator, 8192);
+END StartGenerator;
+
+PROCEDURE Resume(gen: Generator): INTEGER;
+  VAR self: Generator;
+BEGIN
+  self := curgen;
+  curgen := gen;
+  Up(gen^.resume);
+  Down(gen^.yield);     (* wait for Yield *)
+  curgen := self;
+  RETURN gen^.value
+END Resume;
+
+PROCEDURE Yield(i: INTEGER);
+  VAR self: Generator;
+BEGIN
+  self := curgen;
+  self^.value := i;
+  Up(self^.yield);      (* curgen becomes invalid *)
+  Down(self^.resume);   (* wait for Resume *)
+END Yield;
+
+PROCEDURE YieldHalfOf(i: INTEGER);
+BEGIN
+  Yield(i DIV 2);
+END YieldHalfOf;
+
+PROCEDURE Triangular;
+  (* Yields the triangular numbers, http://oeis.org/A000217 *)
+  VAR n: INTEGER;
+BEGIN
+  n := 0;
+  LOOP
+    YieldHalfOf(n * (n + 1));
+    INC(n);
+  END;
+END Triangular;
+
+PROCEDURE Pentagonal;
+  (* Yields the pentagonal numbers, http://oeis.org/A000326 *)
+  VAR n: INTEGER;
+BEGIN
+  n := 0;
+  LOOP
+    YieldHalfOf(n * (3 * n - 1));
+    INC(n);
+  END;
+END Pentagonal;
+
+PROCEDURE Odious;
+  (* Yields the odius numbers, http://oeis.org/A000069 *)
+  VAR b, i, n: INTEGER;
+BEGIN
+  n := 1;
+  LOOP
+    (* b := count bits in n *)
+    b := 0;
+    i := n;
+    WHILE i # 0 DO
+      INC(b, i MOD 2);
+      i := i DIV 2;
+    END;
+
+    IF (b MOD 2) = 1 THEN
+      Yield(n);
+    END;
+    INC(n);
+  END;
+END Odious;
+
+TYPE
+  Triple = ARRAY[1..3] OF INTEGER;
+PROCEDURE T(i1, i2, i3: INTEGER): Triple;
+  VAR t: Triple;
+BEGIN
+  t[1] := i1; t[2] := i2; t[3] := i3; RETURN t
+END T;
+
+CONST
+  two28 = 268435456D;   (* 0x1000_0000 *)
+VAR
+  a: ARRAY [0..9] OF Triple;
+  tri, pen, odi: Generator;
+  i, g1, g2, g3: INTEGER;
+BEGIN
+  startLock := NewSema(1);
+
+  ALLOCATE(tri, SIZE(GeneratorRecord));
+  ALLOCATE(pen, SIZE(GeneratorRecord));
+  ALLOCATE(odi, SIZE(GeneratorRecord));
+  StartGenerator(tri, Triangular);
+  StartGenerator(pen, Pentagonal);
+  StartGenerator(odi, Odious);
+
+  a[0] := T( 0,   0,  1);
+  a[1] := T( 1,   1,  2);
+  a[2] := T( 3,   5,  4);
+  a[3] := T( 6,  12,  7);
+  a[4] := T(10,  22,  8);
+  a[5] := T(15,  35, 11);
+  a[6] := T(21,  51, 13);
+  a[7] := T(28,  70, 14);
+  a[8] := T(36,  92, 16);
+  a[9] := T(45, 117, 19);
+
+  FOR i := 0 TO INTEGER(9) DO
+    g1 := Resume(tri);
+    g2 := Resume(pen);
+    g3 := Resume(odi);
+    IF g1 # a[i][1] THEN fail(1D * two28 + LONG(a[i][1])) END;
+    IF g2 # a[i][2] THEN fail(2D * two28 + LONG(a[i][2])) END;
+    IF g3 # a[i][3] THEN fail(3D * two28 + LONG(a[i][3])) END;
+  END;
+  finished;
+END SemaTest.
diff --git a/tests/plat/m2/Set100_mod.mod b/tests/plat/m2/Set100_mod.mod
new file mode 100644
index 000000000..3b200d318
--- /dev/null
+++ b/tests/plat/m2/Set100_mod.mod
@@ -0,0 +1,61 @@
+(*
+ * Operates on sets of 100 integers.  The compiler emits, and the back
+ * end must implement, the EM instructions for large sets.
+ *)
+MODULE Set100;
+FROM Test IMPORT fail, finished;
+
+(* Asserts condition or fails with code. *)
+PROCEDURE A(cond: BOOLEAN; code: INTEGER);
+BEGIN
+  IF NOT cond THEN fail(code) END
+END A;
+
+TYPE
+  Num = [1..100];
+  NumSet = SET OF Num;
+VAR
+  (* VAR, not CONST, so compiler can't do constant operations. *)
+  primes, teens, lowevens, eighties, nineties: NumSet;
+CONST
+  (* These are the expected results of some set operations. *)
+  primeteen = NumSet{13, 17, 19};
+  compeighties = NumSet{80..82, 84..88};
+  teenxoreven = NumSet{2, 4, 6, 8, 10, 12, 13, 15, 17, 19, 20};
+  eightiesnineties = NumSet{80..99};
+
+(* Checks that some set is equal to the expected result.  Also checks
+ * that the set is not equal to the other sets. *)
+PROCEDURE Check(set: NumSet; what: INTEGER);
+  VAR hi: INTEGER;
+BEGIN
+  hi := what * 100H;
+
+  (* The compiler uses cms in EM to check set equality. *)
+  A((what = 1) = (set = primeteen), hi + 1);
+  A((what = 2) = (set = compeighties), hi + 2);
+  A((what = 3) = (set = teenxoreven), hi + 3);
+  A((what = 4) = (set = eightiesnineties), hi + 4);
+END Check;
+
+PROCEDURE Range(min: Num; max: Num): NumSet;
+BEGIN
+  (* The compiler calls LtoUset in lang/m2/libm2/LtoUset.e *)
+  RETURN NumSet{min..max}
+END Range;
+
+BEGIN
+  primes := NumSet{2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43,
+                   47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97};
+  teens := NumSet{13, 14, 15, 16, 17, 18, 19};
+  lowevens := NumSet{2, 4, 6, 8, 10, 12, 14, 16, 18, 20};
+  eighties := Range(80, 89);
+  nineties := Range(90, 99);
+
+  Check(primes * teens, 1);
+  Check(eighties - primes, 2);
+  Check(teens / lowevens, 3);
+  Check(eighties + nineties, 4);
+
+  finished;
+END Set100.
diff --git a/tests/plat/m2/StringTest_mod.mod b/tests/plat/m2/StringTest_mod.mod
new file mode 100644
index 000000000..41552aa7a
--- /dev/null
+++ b/tests/plat/m2/StringTest_mod.mod
@@ -0,0 +1,55 @@
+MODULE StringTest;
+FROM Strings IMPORT
+  Assign, Insert, Delete, Pos, Copy, Concat, Length, CompareStr;
+FROM Test IMPORT fail, finished;
+
+(* Asserts condition or fails with code. *)
+PROCEDURE A(cond: BOOLEAN; code: INTEGER);
+BEGIN
+  IF NOT cond THEN fail(code) END
+END A;
+
+VAR
+  small: ARRAY [0..3] OF CHAR;
+  big: ARRAY [0..99] OF CHAR;
+BEGIN
+  (* CompareStr *)
+  A(CompareStr("ablaze", "ablaze") = 0, 1);
+  A(CompareStr("ablaze", "abloom") < 0, 2);
+  A(CompareStr("abloom", "ablaze") > 0, 3);
+  A(CompareStr("abloom", "abloom") = 0, 4);
+
+  (* Assign, Insert, Delete *)
+  Assign("obsequiosity", small);
+  A(CompareStr("obsequiosity", small) > 0, 11H);
+  Assign("obsequiosity", big);
+  A(CompareStr("obsequiosity", big) = 0, 12H);
+  A(big[11] = 'y', 13H);
+  A(big[11] # 0C, 14H);
+  A(big[12] # 'y', 15H);
+  A(big[12] = 0C, 16H);
+  Insert(" omnihuman", big, 9);
+  A(CompareStr("obsequios omnihumanity", big) = 0, 17H);
+  Delete(big, 6, 15);
+  A(CompareStr("obsequy", big) = 0, 18H);
+
+  (* Pos, Concat *)
+  Assign("Now is the time for all good men to come...", big);
+  A(Pos("w", big) = 2, 21H);
+  A(Pos("t", big) = 7, 22H);
+  A(Pos("ti", big) = 11, 23H);
+  A(Pos("men", big) = 29, 24H);
+  A(Pos("women", big) > 42, 25H);
+  Copy(big, 29, 2, small);
+  A(CompareStr("me", small) = 0, 26H);
+
+  (* Concat, Length *)
+  Concat("pictorial", "ist", big);
+  A(CompareStr("pictorialist", big) = 0, 31H);
+  A(Length(big) = 12, 32H);
+  Concat("zit", "her", small);
+  A(CompareStr("zither", small) > 0, 33H);
+  A(Length(small) < 5, 34H);
+
+  finished;
+END StringTest.
diff --git a/tests/plat/rck_e.e b/tests/plat/rck_e.e
new file mode 100644
index 000000000..cd5c581df
--- /dev/null
+++ b/tests/plat/rck_e.e
@@ -0,0 +1,186 @@
+#
+    mes 2, EM_WSIZE, EM_PSIZE
+
+/*
+ * Uses _rck_ for range checks.  Catches the EM trap if a value is out
+ * of range, and continues with the next instruction after _rck_.
+ *
+ * Some back ends, like i80, ignore _rck_, so this test fails.
+ */
+
+testnr
+    con 1         ; test number
+caught
+    con 0         ; number of caught traps
+
+    inp $next
+    inp $catch
+    inp $never
+    exp $_m_a_i_n
+    pro $_m_a_i_n,0
+
+    lim           ; load ignore mask
+    loc 2
+    and EM_WSIZE  ; check bit 1 << ERANGE
+    zeq *1        ; fail if ignoring ERANGE
+.1
+    rom 1I4
+    lae .1
+    loi 4
+    cal $fail
+    asp 4
+1
+
+    cal $next     ; increment testnr, catch next trap
+    loc 10125
+.2
+    rom 4283, 13644
+    lae .2
+    rck EM_WSIZE  ; testnr 2 in range
+    asp EM_WSIZE
+
+    cal $next
+    loc 4282
+    lae .2
+    rck EM_WSIZE  ; testnr 3 out of range
+    asp EM_WSIZE
+
+    cal $next
+    loc 4283
+    lae .2
+    rck EM_WSIZE  ; testnr 4 in range
+    asp EM_WSIZE
+
+    cal $next
+    loc 13644
+    lae .2
+    rck EM_WSIZE  ; testnr 5 in range
+    asp EM_WSIZE
+
+    cal $next
+    loc 13655
+    lae .2
+    rck EM_WSIZE  ; testnr 6 out of range
+    asp EM_WSIZE
+
+    cal $next
+    loc -13015
+.7
+    rom -31344, -1898
+    lae .7
+    rck EM_WSIZE  ; testnr 7 in range
+    asp EM_WSIZE
+
+    cal $next
+    loc 8580
+.8
+    rom -26315, 4588
+    lae .8
+    rck EM_WSIZE  ; testnr 8 out of range
+    asp EM_WSIZE
+
+    ; The last test raised a trap, so now there is no trap handler.
+    lpi $never
+    sig           ; push old trap handler
+    loc 0
+    loc EM_WSIZE
+    loc EM_PSIZE
+    cuu           ; push NULL pointer
+    cmp
+    zeq *17       ; fail unless old handler is NULL
+.17
+    rom 17I4
+    lae .17
+    loi 4
+    cal $fail
+    asp 4
+17
+    ; Change the trap handler from $never to $catch.
+    lpi $catch
+    sig
+    lpi $never
+    cmp
+    zeq *18
+.18
+    rom 18I4
+    lae .18
+    loi 4
+    cal $fail
+    asp 4
+18
+    ; Begin ignoring range traps.
+    loc 2         ; 1 << ERANGE
+    sim
+    loc 18
+    ste testnr
+    loc 8580
+    lae .8
+    rck EM_WSIZE  ; testnr 18 out of range but ignored
+
+    ; Fail if we caught the wrong number of traps.
+    loe caught
+    loc 3
+    beq *20
+.20
+    rom 20I4
+    lae .20
+    loi 4
+    cal $fail
+    asp 4
+20
+    cal $finished
+    end
+
+    pro $next,0
+    ine testnr    ; next test
+    lpi $catch
+    sig           ; catch next EM trap (only one trap)
+    asp EM_PSIZE
+    ret 0
+    end
+
+    pro $catch,0
+    ine caught    ; count this trap
+
+    lol 0         ; load trap number
+    loc 1
+    beq *1        ; fail if trap != ERANGE
+.101
+    rom 257I4
+    lae .101
+    loi 4
+    cal $fail
+    ; Wrong type of trap.  _rtt_ might not work, so exit now.
+    cal $finished
+1
+    ; Fail if the wrong test raised this trap.
+    loe testnr
+    loc 3
+    beq *2
+    loe testnr
+    loc 6
+    beq *2
+    loe testnr
+    loc 8
+    beq *2
+    loc 256
+    loe testnr
+    adi EM_WSIZE  ; 0x100 + testnr
+    loc EM_WSIZE
+    loc 4
+    cuu
+    cal $fail
+    asp 4
+2
+    rtt           ; return from trap handler
+    end
+
+    pro $never,0
+.200
+    rom 200I4
+    lae .200
+    loi 4
+    cal $fail
+    asp 4
+    rtt
+    end
diff --git a/tests/plat/rotate_e.e b/tests/plat/rotate_e.e
new file mode 100644
index 000000000..0698c58a0
--- /dev/null
+++ b/tests/plat/rotate_e.e
@@ -0,0 +1,223 @@
+#
+    mes 2, EM_WSIZE, EM_PSIZE
+
+/*
+ * Tests _rol_ (rotate left) and _ror_ (rotate right).  Several back
+ * ends provide _rol_ and _ror_, but as of year 2017, the compilers
+ * and optimizers had never emit _rol_ nor _ror_.
+ *
+ * By tradition, _rol_ and _ror_ can't rotate values shorter than the
+ * word size, or longer than 4 bytes.
+ *  - If word size is 2, then try rotating 2-byte and 4-byte values.
+ *  - If word size is 4, then try rotating 4-byte values.
+ *
+ * You can cheat this test if _cmu_ always pushes zero.
+ */
+
+#if EM_WSIZE == 2
+#define LEN2  4
+    exa table2
+    exa left2
+    exa right2
+table2         /* left, right */
+    con 12715U2  /*  0,  0 */
+    con 25430U2  /*  1, 15 */
+    con 43825U2  /*  8,  8 */
+    con 39125U2  /* 15,  1 */
+left2
+    con 0I2, 1I2, 8I2, 15I2
+right2
+    con 0I2, 15I2, 8I2, 1I2
+#endif
+
+#define LEN4  4
+    exa table4
+    exa left4
+    exa right4
+table4              /* left, right */
+    con  437223536U4  /*  0,  0 */
+    con  874447072U4  /*  1, 31 */
+    con 2154830351U4  /* 16, 16 */
+    con  218611768U4  /* 31,  1 */
+left4
+    con 0I2, 1I2, 16I2, 31I2
+right4
+    con 0I2, 31I2, 16I2, 1I2
+
+    exa val4
+    exa val4left7
+    exa val4right11
+val4
+    con 4283808839U4
+val4left7
+    con 2866684927U4
+val4right11
+    con 2298473143U4
+
+    exp $_m_a_i_n
+    pro $_m_a_i_n, EM_WSIZE
+#define i -EM_WSIZE
+
+#if EM_WSIZE == 2
+    /*
+     * Loop for LEN2 items in table2.
+     */
+    loc 0
+    stl i
+1
+    lae table2
+    loi 2         /* value to rotate */
+    lae left2
+    lol i
+    loc 1
+    sli EM_WSIZE
+    ads EM_WSIZE
+    loi 2         /* left distance */
+    rol 2         /* rotate left */
+    lae table2
+    lol i
+    loc 1
+    sli EM_WSIZE
+    ads EM_WSIZE
+    loi 2         /* expected result */
+    cmu 2
+    zeq *2
+    loc __LINE__
+    loc EM_WSIZE
+    loc 4
+    cuu
+    cal $fail
+    asp 4
+2
+    lae table2
+    loi 2         /* value to rotate */
+    lae right2
+    lol i
+    loc 1
+    sli EM_WSIZE
+    ads EM_WSIZE
+    loi 2         /* right distance */
+    ror 2         /* rotate right */
+    lae table2
+    lol i
+    loc 1
+    sli EM_WSIZE
+    ads EM_WSIZE
+    loi 2         /* expected result */
+    cmu 2
+    zeq *3
+    loc __LINE__
+    loc EM_WSIZE
+    loc 4
+    cuu
+    cal $fail
+    asp 4
+3
+    inl i         /* loop LEN2 times */
+    lol i
+    loc LEN2
+    blt *1
+#endif /* EM_WSIZE == 2 */
+
+    /*
+     * Loop for LEN4 items in table4.
+     */
+    loc 0
+    stl i
+4
+    lae table4
+    loi 4         /* value to rotate */
+    lae left4
+    lol i
+    loc 1
+    sli EM_WSIZE
+    ads EM_WSIZE
+    loi 2         /* left distance */
+    loc 2
+    loc EM_WSIZE
+    cii
+    rol 4         /* rotate left */
+    lae table4
+    lol i
+    loc 2
+    sli EM_WSIZE
+    ads EM_WSIZE
+    loi 4         /* expected result */
+    cmu 4
+    zeq *5
+    loc __LINE__
+    loc EM_WSIZE
+    loc 4
+    cuu
+    cal $fail
+    asp 4
+5
+    lae table4
+    loi 4         /* value to rotate */
+    lae right4
+    lol i
+    loc 1
+    sli EM_WSIZE
+    ads EM_WSIZE
+    loi 2         /* right distance */
+    loc 2
+    loc EM_WSIZE
+    cii
+    ror 4         /* rotate right */
+    lae table4
+    lol i
+    loc 2
+    sli EM_WSIZE
+    ads EM_WSIZE
+    loi 4         /* expected result */
+    cmu 4
+    zeq *6
+    loc __LINE__
+    loc EM_WSIZE
+    loc 4
+    cuu
+    cal $fail
+    asp 4
+6
+    inl i         /* loop LEN4 times */
+    lol i
+    loc LEN4
+    blt *4
+
+    /*
+     * Rotate 4-byte values by a constant distance, because this uses
+     * different rules in PowerPC ncg.
+     */
+    lae val4
+    loi 4
+    loc 7
+    rol 4         /* rotate left by 7 bits */
+    lae val4left7
+    loi 4
+    cmu 4
+    zeq *7
+    loc __LINE__
+    loc EM_WSIZE
+    loc 4
+    cuu
+    cal $fail
+    asp 4
+7
+    lae val4
+    loi 4
+    loc 11
+    ror 4         /* rotate right by 11 bits */
+    lae val4right11
+    loi 4
+    cmu 4
+    zeq *8
+    loc __LINE__
+    loc EM_WSIZE
+    loc 4
+    cuu
+    cal $fail
+    asp 4
+8
+
+    cal $finished
+    end
diff --git a/tests/plat/setjmp_c.c b/tests/plat/setjmp_c.c
new file mode 100644
index 000000000..2a514a03f
--- /dev/null
+++ b/tests/plat/setjmp_c.c
@@ -0,0 +1,58 @@
+#include <setjmp.h>
+#include "test.h"
+
+/*
+ * Sets i = 2 * i for each i in nums, until i == 0, but stops if
+ * 2 * i >= 1000.
+ *
+ * Uses setjmp() and longjmp() in libc.  For ACK's libc, the back end
+ * must provides EM's _gto_, and _gto_ must preserve the function
+ * return area.
+ */
+int nums1[]         = { 79, 245, 164, 403, 0};
+const int expect1[] = {158, 490, 328, 806, 0};
+int nums2[]         = {20, 221, 411, 643, 48, 272, 448, 0};
+const int expect2[] = {40, 442, 822, 643, 48, 272, 448, 0};
+int nums3[]         = {371, 265, 500, 124, 117, 0};
+const int expect3[] = {742, 530, 500, 124, 117, 0};
+int docount = 0;
+
+int twice(int i, jmp_buf esc) {
+	if (i >= 500)
+		longjmp(esc, i);
+	return 2 * i;
+}
+
+void donums(int *nums, jmp_buf esc) {
+	int *p;
+
+	docount++;
+	for (p = nums; *p != 0; p++) {
+		*p = twice(*p, esc);
+	}
+}
+
+int cknums(int *nums, const int *expect) {
+	jmp_buf env;
+	int ret;
+
+	ret = setjmp(env);
+	if (ret == 0)
+		donums(nums, env);
+	for (;;) {
+		ASSERT(*nums == *expect);
+		if (*expect == 0)
+			break;
+		nums++;
+		expect++;
+	}
+	return ret;
+}
+
+int main(void) {
+	ASSERT(cknums(nums1, expect1) == 0);
+	ASSERT(cknums(nums2, expect2) == 643);
+	ASSERT(cknums(nums3, expect3) == 500);
+	ASSERT(docount == 3);
+	finished();
+}
diff --git a/tests/plat/structcopy_e.c b/tests/plat/structcopy_e.c
new file mode 100644
index 000000000..74a9e2d30
--- /dev/null
+++ b/tests/plat/structcopy_e.c
@@ -0,0 +1,113 @@
+#include "test.h"
+
+/* ACK's C compiler uses EM's loi, sti, blm, or an inline loop to copy
+ * these structs.  The compiler doesn't call memcpy() or other
+ * functions in libc, so this test passes without linking the CRT.
+ */
+
+struct c5 {       /* not a whole number of words */
+	char one[5];
+};
+
+struct ii {       /* two words */
+	int one;
+	int two;
+};
+
+struct iii {      /* three words */
+	int one;
+	int two;
+	int three;
+};
+
+int equal5(char *a, char *b) {  /* a, b must have 5 characters */
+	int i;
+
+	for (i = 0; i < 5; i++)
+		if (a[i] != b[i]) return 0;
+	return 1;
+}
+
+struct c5 make_c5(char *str) {  /* str must have 5 characters */
+	struct c5 out;
+	int i;
+
+	for (i = 0; i < 5; i++)
+		out.one[i] = str[i];
+	return out;
+}
+
+struct ii make_ii(int i, int j) {
+	struct ii out;
+
+	out.one = i;
+	out.two = j;
+	return out;
+}
+
+struct iii make_iii(struct ii in, int k) {
+	struct iii out;
+
+	out.one = in.one;
+	out.two = in.two;
+	out.three = k;
+	return out;
+}
+
+struct c5 rotate_left_c5(struct c5 in) {
+	int i;
+	char c = in.one[0];
+
+	/* Modifies our copy of _in_, not caller's copy. */
+	for (i = 0; i < 4; i++)
+		in.one[i] = in.one[i + 1];
+	in.one[4] = c;
+	return in;
+}
+
+struct iii rotate_left_iii(struct iii in) {
+	int i = in.one;
+
+	/* Modifies our copy of _in_, not caller's copy. */
+	in.one = in.two;
+	in.two = in.three;
+	in.three = i;
+	return in;
+}
+
+/* Bypasses the CRT, so there's no stdio. */
+void _m_a_i_n(void) {
+	struct c5 earth, heart, dup_heart, rol_heart;
+	struct ii pair, dup_pair;
+	struct iii triple, dup_triple, rol_triple;
+
+	earth = make_c5("earth");
+	heart = make_c5("heart");
+	dup_heart = heart;
+	rol_heart = rotate_left_c5(heart);
+	ASSERT(equal5(earth.one, "earth"));
+	ASSERT(equal5(heart.one, "heart"));
+	ASSERT(equal5(dup_heart.one, "heart"));
+	ASSERT(equal5(rol_heart.one, "earth"));
+
+	pair = make_ii(29, 31);
+	dup_pair = pair;
+	triple = make_iii(pair, -9);
+	dup_triple = triple;
+	rol_triple = rotate_left_iii(triple);
+	ASSERT(pair.one == 29);
+	ASSERT(pair.two == 31);
+	ASSERT(dup_pair.one == 29);
+	ASSERT(dup_pair.two == 31);
+	ASSERT(triple.one == 29);
+	ASSERT(triple.two == 31);
+	ASSERT(triple.three == -9);
+	ASSERT(dup_triple.one == 29);
+	ASSERT(dup_triple.two == 31);
+	ASSERT(dup_triple.three == -9);
+	ASSERT(rol_triple.one == 31);
+	ASSERT(rol_triple.two == -9);
+	ASSERT(rol_triple.three == 29);
+
+	finished();
+}
diff --git a/util/ego/build.lua b/util/ego/build.lua
index 864447550..78895f508 100644
--- a/util/ego/build.lua
+++ b/util/ego/build.lua
@@ -3,6 +3,7 @@ local function build_ego(name)
 		name = name,
 		srcs = { "./"..name.."/*.c" },
 		deps = {
+			"./"..name.."/*.h",
 			"util/ego/share+lib",
 			"modules/src/em_data+lib",
 			"h+emheaders",
diff --git a/util/ego/ca/ca.c b/util/ego/ca/ca.c
index 095736665..1bf73d24a 100644
--- a/util/ego/ca/ca.c
+++ b/util/ego/ca/ca.c
@@ -72,6 +72,7 @@ proc_p* p_out;
 			{
 				/* register message without arguments */
 				oldline(l);
+				continue;
 			}
 			else
 			{
diff --git a/util/ego/cs/cs.c b/util/ego/cs/cs.c
index dfcccbbf7..068ddc3dc 100644
--- a/util/ego/cs/cs.c
+++ b/util/ego/cs/cs.c
@@ -25,7 +25,7 @@
 
 int Scs; /* Number of optimizations found. */
 
-STATIC cs_clear()
+STATIC void cs_clear()
 {
 	clr_avails();
 	clr_entities();
@@ -74,9 +74,7 @@ STATIC void cs_optimize(void *vp)
 	}
 }
 
-main(argc, argv)
-	int	argc;
-	char	*argv[];
+int main(int argc, char *argv[])
 {
 	Scs = 0;
 	go(argc, argv, no_action, cs_optimize, cs_machinit, no_action);
diff --git a/util/ego/cs/cs.h b/util/ego/cs/cs.h
index c749427a5..7a2ebde7b 100644
--- a/util/ego/cs/cs.h
+++ b/util/ego/cs/cs.h
@@ -88,12 +88,13 @@ struct occur {
 #define UNAIR_OP	6
 #define BINAIR_OP	7
 #define TERNAIR_OP	8
-#define KILL_ENTITY	9
-#define SIDE_EFFECTS	10
-#define FIDDLE_STACK	11
-#define IGNORE		12
-#define HOPELESS	13
-#define BBLOCK_END	14
+#define REMAINDER	9
+#define KILL_ENTITY	10
+#define SIDE_EFFECTS	11
+#define FIDDLE_STACK	12
+#define IGNORE		13
+#define HOPELESS	14
+#define BBLOCK_END	15
 
 struct avail {
 	avail_p	av_before;	/* Ptr to earlier discovered expressions. */
diff --git a/util/ego/cs/cs_aux.c b/util/ego/cs/cs_aux.c
index 337deeda7..aeb582c9b 100644
--- a/util/ego/cs/cs_aux.c
+++ b/util/ego/cs/cs_aux.c
@@ -11,8 +11,7 @@
 #include "cs.h"
 #include "cs_entity.h"
 
-offset array_elemsize(vn)
-	valnum vn;
+offset array_elemsize(valnum vn)
 {
 	/* Vn is the valuenumber of an entity that points to
 	 * an array-descriptor. The third element of this descriptor holds
@@ -36,14 +35,12 @@ offset array_elemsize(vn)
 	return aoff(enp->en_ext->o_dblock->d_values, 2);
 }
 
-occur_p occ_elem(i)
-	Lindex i;
+occur_p occ_elem(Lindex i)
 {
 	return (occur_p) Lelem(i);
 }
 
-entity_p en_elem(i)
-	Lindex i;
+entity_p en_elem(Lindex i)
 {
 	return (entity_p) Lelem(i);
 }
@@ -54,14 +51,14 @@ entity_p en_elem(i)
 
 STATIC valnum val_no;
 
-valnum newvalnum()
+valnum newvalnum(void)
 {
 	/* Return a completely new value number. */
 
 	return ++val_no;
 }
 
-start_valnum()
+void start_valnum(void)
 {
 	/* Restart value numbering. */
 
diff --git a/util/ego/cs/cs_aux.h b/util/ego/cs/cs_aux.h
index 11950540e..1ce9373a0 100644
--- a/util/ego/cs/cs_aux.h
+++ b/util/ego/cs/cs_aux.h
@@ -3,28 +3,28 @@
  * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
  * See the copyright notice in the ACK home directory, in the file "Copyright".
  */
-extern offset	array_elemsize();	/* (valnum vm)
+extern offset	array_elemsize(valnum vm);
+					/*
 					 * Returns the size of array-elements,
 					 * if vn is the valuenumber of the
 					 * address of an array-descriptor.
 					 */
 
-extern occur_p	occ_elem();		/* (Lindex i)
+extern occur_p	occ_elem(Lindex i);	/*
 					 * Returns a pointer to the occurrence
 					 * of which i is an index in a set.
 					 */
 
-extern entity_p	en_elem();		/* (Lindex i)
+extern entity_p	en_elem(Lindex i);	/*
 					 * Returns a pointer to the entity
 					 * of which i is an index in a set.
 					 */
 
-extern valnum	newvalnum();		/* ()
+extern valnum	newvalnum(void);	/*
 					 * Returns a completely new
 					 * value number.
 					 */
 
-extern		start_valnum();		/* ()
+extern void	start_valnum(void);	/*
 					 * Restart value numbering.
 					 */
-
diff --git a/util/ego/cs/cs_avail.c b/util/ego/cs/cs_avail.c
index 1f766a85c..b28cc496a 100644
--- a/util/ego/cs/cs_avail.c
+++ b/util/ego/cs/cs_avail.c
@@ -22,8 +22,7 @@
 
 avail_p avails; /* The list of available expressions. */
 
-STATIC bool commutative(instr)
-	int instr;
+STATIC bool commutative(int instr)
 {
 	/* Is instr a commutative operator? */
 
@@ -37,9 +36,7 @@ STATIC bool commutative(instr)
 	}
 }
 
-STATIC bool same_avail(kind, avp1, avp2)
-	byte kind;
-	avail_p avp1, avp2;
+STATIC bool same_avail(byte kind, avail_p avp1, avail_p avp2)
 {
 	/* Two expressions are the same if they have the same operator,
 	 * the same size, and their operand(s) have the same value. 
@@ -57,6 +54,7 @@ STATIC bool same_avail(kind, avp1, avp2)
 		case UNAIR_OP:
 			return	avp1->av_operand == avp2->av_operand;
 		case BINAIR_OP:
+		case REMAINDER:
 			if (commutative(avp1->av_instr & BMASK))
 				return	avp1->av_oleft == avp2->av_oleft &&
 					avp1->av_oright == avp2->av_oright
@@ -75,8 +73,7 @@ STATIC bool same_avail(kind, avp1, avp2)
 	/* NOTREACHED */
 }
 
-STATIC void check_local(avp)
-	avail_p avp;
+STATIC void check_local(avail_p avp)
 {
 	/* Check if the local in which the result of avp was stored,
 	 * still holds this result. Update if not.
@@ -89,9 +86,7 @@ STATIC void check_local(avp)
 	}
 }
 
-STATIC entity_p result_local(size, l)
-	offset size;
-	line_p l;
+STATIC entity_p result_local(offset size, line_p l)
 {
 	/* If the result of an expression of size bytes is stored into a
 	 * local for which a registermessage was generated, return a pointer
@@ -114,9 +109,7 @@ STATIC entity_p result_local(size, l)
 	return (entity_p) 0;
 }
 
-STATIC copy_avail(kind, src, dst)
-	int kind;
-	avail_p src, dst;
+STATIC void copy_avail(int kind, avail_p src, avail_p dst)
 {
 	/* Copy some attributes from src to dst. */
 
@@ -132,6 +125,7 @@ STATIC copy_avail(kind, src, dst)
 			dst->av_operand = src->av_operand;
 			break;
 		case BINAIR_OP:
+		case REMAINDER:
 			dst->av_oleft = src->av_oleft;
 			dst->av_oright = src->av_oright;
 			break;
@@ -143,10 +137,7 @@ STATIC copy_avail(kind, src, dst)
 	}
 }
 
-avail_p av_enter(avp, ocp, kind)
-	avail_p avp;
-	occur_p ocp;
-	int kind;
+avail_p av_enter(avail_p avp, occur_p ocp, int kind)
 {
 	/* Put the available expression avp in the list,
 	 * if it is not already there.
@@ -171,7 +162,8 @@ avail_p av_enter(avp, ocp, kind)
 	/* Remember local, if any, that holds result. */
 	if (avp->av_instr != (byte) INSTR(last)) {
 		/* Only possible when instr is the implicit AAR in 
-		 * a LAR or SAR.
+		 * a LAR or SAR, or the implicit DVI in an RMI, or
+		 * DVU in RMU.
 		 */
 		ravp->av_saveloc = (entity_p) 0;
 	} else {
@@ -186,7 +178,7 @@ avail_p av_enter(avp, ocp, kind)
 	return ravp;
 }
 
-clr_avails()
+void clr_avails(void)
 {
 	/* Throw away the information about the available expressions. */
 
diff --git a/util/ego/cs/cs_avail.h b/util/ego/cs/cs_avail.h
index a0515353a..3364be2a2 100644
--- a/util/ego/cs/cs_avail.h
+++ b/util/ego/cs/cs_avail.h
@@ -5,7 +5,8 @@
  */
 extern avail_p	avails;		/* The set of available expressions. */
 
-extern avail_p	av_enter();	/* (avail_p avp, occur_p ocp, byte kind)
+extern avail_p	av_enter(avail_p avp, occur_p ocp, byte kind);
+				/*
 				 * Puts the available expression in avp
 				 * in the list of available expressions,
 				 * if it is not already there. Add ocp to set of
@@ -18,6 +19,7 @@ extern avail_p	av_enter();	/* (avail_p avp, occur_p ocp, byte kind)
 				 * Returns a pointer into the list.
 				 */
 
-extern		clr_avails();	/* Release all space occupied by the old list
+extern void	clr_avails(void);
+				/* Release all space occupied by the old list
 				 * of available expressions.
 				 */
diff --git a/util/ego/cs/cs_debug.c b/util/ego/cs/cs_debug.c
index bf43d8c12..3d5509ddc 100644
--- a/util/ego/cs/cs_debug.c
+++ b/util/ego/cs/cs_debug.c
@@ -11,14 +11,14 @@
 #include "cs.h"
 #include "cs_aux.h"
 #include "cs_avail.h"
+#include "cs_debug.h"
 #include "cs_entity.h"
 
 #ifdef VERBOSE
 
 extern char em_mnem[]; /* The mnemonics of the EM instructions. */
 
-STATIC void showinstr(lnp)
-	line_p lnp;
+STATIC void showinstr(line_p lnp)
 {
 	/* Makes the instruction in `lnp' human readable. Only lines that
 	 * can occur in expressions that are going to be eliminated are
@@ -49,8 +49,7 @@ STATIC void showinstr(lnp)
 	fprintf(stderr,"\n");
 }
 
-SHOWOCCUR(ocp)
-	occur_p ocp;
+void SHOWOCCUR(occur_p ocp)
 {
 	/* Shows all instructions in an occurrence. */
 
@@ -69,8 +68,7 @@ SHOWOCCUR(ocp)
 
 #ifdef TRACE
 
-SHOWAVAIL(avp)
-	avail_p avp;
+void SHOWAVAIL(avail_p avp)
 {
 	/* Shows an available expression. */
 	showinstr(avp->av_found);
@@ -79,7 +77,7 @@ SHOWAVAIL(avp)
 
 }
 
-OUTAVAILS()
+void OUTAVAILS(void)
 {
 	register avail_p ravp;
 
@@ -110,7 +108,7 @@ STATIC char *enkinds[] = {
 	"ignore mask"
 };
 
-OUTENTITIES()
+void OUTENTITIES(void)
 {
 	register Lindex i;
 
diff --git a/util/ego/cs/cs_debug.h b/util/ego/cs/cs_debug.h
index e45287f9b..2d85ebfe8 100644
--- a/util/ego/cs/cs_debug.h
+++ b/util/ego/cs/cs_debug.h
@@ -5,7 +5,8 @@
  */
 #ifdef VERBOSE
 
-extern SHOWOCCUR();	/* (occur_p ocp)
+extern void SHOWOCCUR(occur_p ocp);
+			/*
 			 * Shows all lines in an occurrence.
 			 */
 
@@ -17,15 +18,18 @@ extern SHOWOCCUR();	/* (occur_p ocp)
 
 #ifdef TRACE
 
-extern OUTAVAILS();	/* ()
+extern void OUTAVAILS(void);
+			/*
 			 * Prints all available expressions.
 			 */
 
-extern OUTENTITIES();	/* ()
+extern void OUTENTITIES(void);
+			/*
 			 * Prints all entities.
 			 */
 
-extern SHOWAVAIL();	/* (avail_p avp)
+extern void SHOWAVAIL(avail_p avp);
+			/*
 			 * Shows an available expression.
 			 */
 
diff --git a/util/ego/cs/cs_elim.c b/util/ego/cs/cs_elim.c
index 0a253830f..b83371416 100644
--- a/util/ego/cs/cs_elim.c
+++ b/util/ego/cs/cs_elim.c
@@ -20,8 +20,7 @@
 #include "cs_partit.h"
 #include "cs_debug.h"
 
-STATIC dlink(l1, l2)
-	line_p l1, l2;
+STATIC void dlink(line_p l1, line_p l2)
 {
 	/* Doubly link the lines in l1 and l2. */
 
@@ -31,11 +30,10 @@ STATIC dlink(l1, l2)
 		l2->l_prev = l1;
 }
 
-STATIC remove_lines(first, last)
-	line_p first, last;
+STATIC void remove_lines(line_p first, line_p last)
 {
 	/* Throw away the lines between and including first and last.
-	 * Don't worry about any pointers; the (must) have been taken care of.
+	 * Don't worry about any pointers; they (must) have been taken care of.
 	 */
 	register line_p lnp, next;
 
@@ -46,8 +44,7 @@ STATIC remove_lines(first, last)
 	}
 }
 
-STATIC bool contained(ocp1, ocp2)
-	occur_p ocp1, ocp2;
+STATIC bool contained(occur_p ocp1, occur_p ocp2)
 {
 	/* Determine whether ocp1 is contained within ocp2. */
 
@@ -61,9 +58,7 @@ STATIC bool contained(ocp1, ocp2)
 	return FALSE;
 }
 
-STATIC delete(ocp, start)
-	occur_p ocp;
-	avail_p start;
+STATIC void delete(occur_p ocp, avail_p start)
 {
 	/* Delete all occurrences that are contained within ocp.
 	 * They must have been entered in the list before start:
@@ -90,10 +85,7 @@ STATIC delete(ocp, start)
 	}
 }
 
-STATIC complete_aar(lnp, instr, descr_vn)
-	line_p lnp;
-	int instr;
-	valnum descr_vn;
+STATIC void complete_aar(line_p lnp, int instr, valnum descr_vn)
 {
 	/* Lnp is an instruction that loads the address of an array-element.
 	 * Instr tells us what effect we should achieve; load (instr is op_lar)
@@ -109,15 +101,50 @@ STATIC complete_aar(lnp, instr, descr_vn)
 	dlink(lnp, lindir);
 }
 
-STATIC replace(ocp, tmp, avp)
-	occur_p ocp;
-	offset tmp;
-	avail_p avp;
+STATIC void complete_dv_as_rm(line_p lnp, avail_p avp, bool first)
+{
+	/* Complete a / b as a % b = a - b * (a / b). For the first
+	 * occurrence, lnp must stack q, where q = a / b. We prepend a
+	 * DUP to change postfix a b / into a b a b /, then append a
+	 * MLI/MLU and SBI/SBU to make a b a b / * -.
+	 *
+	 * For later occurences, lnp must stack a b q.  We append the
+	 * MLI/MLU and SBI/SBU.
+	 */
+	line_p dv, dup, ml, sb;
+	offset size;
+	bool s;
+
+	size = avp->av_size;
+	s = (avp->av_instr == (byte) op_dvi);
+	assert(s || avp->av_instr == (byte) op_dvu);
+	if (first) {
+		/* Prepend our DUP to avp->av_found, to get before the
+		 * DVI if lnp points to the LOL in DVI STL LOL.
+		 */
+		dup = int_line(2 * size);
+		dup->l_instr = op_dup;
+		dv = avp->av_found;
+		dlink(dv->l_prev, dup);
+		dlink(dup, dv);
+	}
+	ml = int_line(size);
+	sb = int_line(size);
+	ml->l_instr = (s ? op_mli : op_mlu);
+	sb->l_instr = (s ? op_sbi : op_sbu);
+	dlink(sb, lnp->l_next);
+	dlink(ml, sb);
+	dlink(lnp, ml);
+}
+
+STATIC void replace(occur_p ocp, offset tmp, avail_p avp)
 {
 	/* Replace the lines in the occurrence in ocp by a load of the
 	 * temporary with offset tmp.
 	 */
-	register line_p lol, first, last;
+	avail_p ravp;
+	line_p lol, first, last;
+	int instr;
 
 	assert(avp->av_size == ws || avp->av_size == 2*ws);
 
@@ -130,22 +157,58 @@ STATIC replace(ocp, tmp, avp)
 	if (first->l_prev == (line_p) 0) ocp->oc_belongs->b_start = lol;
 	dlink(first->l_prev, lol);
 
-	if (avp->av_instr == (byte) op_aar) {
-		/* There may actually be a LAR or a SAR instruction; in that
-		 * case we have to complete the array-instruction.
-		 */
-		register int instr = INSTR(last);
+	instr = INSTR(last);
+	switch (avp->av_instr & 0377) {
+		case op_aar:
+			/* There may actually be a LAR or a SAR
+			 * instruction; in that case we have to
+			 * complete the array-instruction.
+			 */
+			if (instr != op_aar)
+				complete_aar(lol, instr, avp->av_othird);
+			break;
+		case op_dvi:
+			if (instr == op_rmi)
+				complete_dv_as_rm(lol, avp, FALSE);
+			break;
+		case op_dvu:
+			if (instr == op_rmu)
+				complete_dv_as_rm(lol, avp, FALSE);
+			break;
+	}
 
-		if (instr != op_aar) complete_aar(lol, instr, avp->av_othird);
+	/* Some occurrence rocp of an expression before avp might have
+	 * rocp->oc_lfirst == first.  If so, then we must set
+	 * rocp->oc_lfirst = lol before we throw away first.
+	 *
+	 * This is almost not possible, but it can happen in code with
+	 * expr1 LOI expr2 STI expr2 LOI, where the STI causes both
+	 * LOIs to have the same value number.  Then the first LOI
+	 * might come before the first expr2, so we might replace
+	 * expr2 before we replace expr2 LOI.  Then the occurrence of
+	 * expr2 LOI must not point to the eliminated lines of expr2.
+	 */
+	for (ravp = avp->av_before; ravp != (avail_p) 0;
+	     ravp = ravp->av_before) {
+		/* We only check LOI expressions. */
+		if (ravp->av_instr == op_loi) {
+			occur_p rocp;
+			Lindex i;
+
+			for (i = Lfirst(ravp->av_occurs); i != (Lindex) 0;
+			     i = Lnext(i, ravp->av_occurs)) {
+				rocp = occ_elem(i);
+				if (rocp->oc_lfirst == first)
+					rocp->oc_lfirst = lol;
+			}
+		}
 	}
 
 	/* Throw away the by now useless lines. */
 	remove_lines(first, last);
 }
 
-STATIC append(avp, tmp)
-	avail_p avp;
-	offset tmp;
+STATIC void append(avail_p avp, offset tmp)
 {
 	/* Avp->av_found points to a line with an operator in it. This 
 	 * routine emits a sequence of instructions that saves the result
@@ -155,6 +218,7 @@ STATIC append(avp, tmp)
 	 * within a lar or sar, we must first generate the aar.
 	 */
 	register line_p stl, lol;
+	register int instr;
 
 	assert(avp->av_size == ws || avp->av_size == 2*ws);
 
@@ -167,19 +231,30 @@ STATIC append(avp, tmp)
 	dlink(stl, lol);
 	dlink(avp->av_found, stl);
 
-	if (avp->av_instr == (byte) op_aar) {
-		register int instr = INSTR(avp->av_found);
-
-		if (instr != op_aar) {
-			complete_aar(lol, instr, avp->av_othird);
-			avp->av_found->l_instr = op_aar;
-		}
+	instr = INSTR(avp->av_found);
+	switch (avp->av_instr & 0377) {
+		case op_aar:
+			if (instr != op_aar) {
+				complete_aar(lol, instr, avp->av_othird);
+				avp->av_found->l_instr = op_aar;
+			}
+			break;
+		case op_dvi:
+			if (instr == op_rmi) {
+				complete_dv_as_rm(lol, avp, TRUE);
+				avp->av_found->l_instr = op_dvi;
+			}
+			break;
+		case op_dvu:
+			if (instr == op_rmu) {
+				complete_dv_as_rm(lol, avp, TRUE);
+				avp->av_found->l_instr = op_dvu;
+			}
+			break;
 	}
 }
 
-STATIC set_replace(avp, tmp)
-	avail_p avp;
-	offset tmp;
+STATIC void set_replace(avail_p avp, offset tmp)
 {
 	/* Avp->av_occurs is now a set of occurrences, each of which will be
 	 * replaced by a reference to a local.
@@ -199,8 +274,7 @@ STATIC set_replace(avp, tmp)
 	}
 }
 
-STATIC int reg_score(enp)
-	entity_p enp;
+STATIC int reg_score(entity_p enp)
 {
 	/* Enp is a local that will go into a register.
 	 * We return its score upto now.
@@ -209,10 +283,7 @@ STATIC int reg_score(enp)
 	return regv_arg(enp->en_loc, 4);
 }
 
-STATIC line_p gen_mesreg(off, avp, pp)
-	offset off;
-	avail_p avp;
-	proc_p pp;
+STATIC line_p gen_mesreg(offset off, avail_p avp, proc_p pp)
 {
 	/* Generate a register message for the local that will hold the
 	 * result of the expression in avp, at the appropriate place in
@@ -226,9 +297,7 @@ STATIC line_p gen_mesreg(off, avp, pp)
 	return reg;
 }
 
-STATIC change_score(mes, score)
-	line_p mes;
-	int score;
+STATIC void change_score(line_p mes, int score)
 {
 	/* Change the score in the register message in mes to score. */
 
@@ -242,8 +311,7 @@ STATIC change_score(mes, score)
 	ap->a_a.a_offset = score;
 }
 
-eliminate(pp)
-	proc_p pp;
+void eliminate(proc_p pp)
 {
 	/* Eliminate costly common subexpressions within procedure pp.
 	 * We scan the available expressions in - with respect to time found -
diff --git a/util/ego/cs/cs_elim.h b/util/ego/cs/cs_elim.h
index 4c6a61669..9c7d86477 100644
--- a/util/ego/cs/cs_elim.h
+++ b/util/ego/cs/cs_elim.h
@@ -3,7 +3,8 @@
  * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
  * See the copyright notice in the ACK home directory, in the file "Copyright".
  */
-extern eliminate();	/* (proc_p pp)
+extern void eliminate(proc_p pp);
+			/*
 			 * Eliminate some of the recurrences of expressions
 			 * that were found by the valuenumbering
 			 * algorithm.
diff --git a/util/ego/cs/cs_entity.c b/util/ego/cs/cs_entity.c
index e4e49ff9a..a2cd5228d 100644
--- a/util/ego/cs/cs_entity.c
+++ b/util/ego/cs/cs_entity.c
@@ -18,8 +18,7 @@
 
 lset entities; /* Our pseudo symbol-table. */
 
-entity_p find_entity(vn)
-	valnum vn;
+entity_p find_entity(valnum vn)
 {
 	/* Try to find the entity with valuenumber vn. */
 
@@ -33,8 +32,7 @@ entity_p find_entity(vn)
 	return (entity_p) 0;
 }
 
-STATIC bool same_entity(enp1, enp2)
-	entity_p enp1, enp2;
+STATIC bool same_entity(entity_p enp1, entity_p enp2)
 {
 	if (enp1->en_kind != enp2->en_kind) return FALSE;
 	if (enp1->en_size != enp2->en_size) return FALSE;
@@ -69,8 +67,7 @@ STATIC bool same_entity(enp1, enp2)
 	}
 }
 
-STATIC copy_entity(src, dst)
-	entity_p src, dst;
+STATIC void copy_entity(entity_p src, entity_p dst)
 {
 	dst->en_static = src->en_static;
 	dst->en_kind = src->en_kind;
@@ -111,8 +108,7 @@ STATIC copy_entity(src, dst)
 	}
 }
 
-entity_p en_enter(enp)
-	register entity_p enp;
+entity_p en_enter(entity_p enp)
 {
 	/* Put the entity in enp in the entity set, if it is not already there.
 	 * Return pointer to stored entity.
@@ -133,7 +129,7 @@ entity_p en_enter(enp)
 	return new;
 }
 
-clr_entities()
+void clr_entities(void)
 {
 	/* Throw away all pseudo-symboltable information. */
 
diff --git a/util/ego/cs/cs_entity.h b/util/ego/cs/cs_entity.h
index c669efb58..0a222f96e 100644
--- a/util/ego/cs/cs_entity.h
+++ b/util/ego/cs/cs_entity.h
@@ -5,16 +5,19 @@
  */
 extern lset	entities;	/* The pseudo-symboltable. */
 
-extern entity_p	find_entity();	/* (valnum vn)
+extern entity_p	find_entity(valnum vn);
+				/*
 				 * Tries to find an entity with value number vn.
 				 */
 
-extern entity_p	en_enter();	/* (entity_p enp)
+extern entity_p	en_enter(entity_p enp);
+				/*
 				 * Enter the entity in enp in the set of
 				 * entities if it was not already there.
 				 */
 
-extern		clr_entities();	/* ()
+extern void	clr_entities(void);
+				/*
 				 * Release all space occupied by our
 				 * pseudo-symboltable.
 				 */
diff --git a/util/ego/cs/cs_getent.c b/util/ego/cs/cs_getent.c
index ef8694536..144750802 100644
--- a/util/ego/cs/cs_getent.c
+++ b/util/ego/cs/cs_getent.c
@@ -67,8 +67,7 @@ STATIC struct inf_entity {
 #define ENKIND(ip)	ip->inf_used
 #define SIZEINF(ip)	ip->inf_size
 
-STATIC struct inf_entity *getinf(n)
-	int n;
+STATIC struct inf_entity *getinf(int n)
 {
 	struct inf_entity *ip;
 
@@ -78,8 +77,7 @@ STATIC struct inf_entity *getinf(n)
 	return (struct inf_entity *) 0;
 }
 
-entity_p getentity(lnp, l_out)
-	line_p lnp, *l_out;
+entity_p getentity(line_p lnp, line_p *l_out)
 {
 	/* Build the entities where lnp refers to, and enter them.
 	 * If a token needs to be popped, the first line that pushed
diff --git a/util/ego/cs/cs_getent.h b/util/ego/cs/cs_getent.h
index e37e37404..f1c4e955d 100644
--- a/util/ego/cs/cs_getent.h
+++ b/util/ego/cs/cs_getent.h
@@ -3,7 +3,8 @@
  * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
  * See the copyright notice in the ACK home directory, in the file "Copyright".
  */
-extern entity_p getentity();	/* (line_p lnp, *l_out)
+extern entity_p getentity(line_p lnp, line_p *l_out);
+				/*
 				 * Extract the entity lnp refers and enter it
 				 * in the table of entities. The main entity
 				 * lnp refers to is returned; sometimes there
diff --git a/util/ego/cs/cs_kill.c b/util/ego/cs/cs_kill.c
index 520366f23..fc3144397 100644
--- a/util/ego/cs/cs_kill.c
+++ b/util/ego/cs/cs_kill.c
@@ -16,9 +16,9 @@
 #include "cs_debug.h"
 #include "cs_avail.h"
 #include "cs_entity.h"
+#include "cs_kill.h"
 
-STATIC base_valno(enp)
-	entity_p enp;
+STATIC valnum base_valno(entity_p enp)
 {
 	/* Return the value number of the (base) address of an indirectly
 	 * accessed entity.
@@ -37,8 +37,7 @@ STATIC base_valno(enp)
 	/* NOTREACHED */
 }
 
-STATIC entity_p find_base(vn)
-	valnum vn;
+STATIC entity_p find_base(valnum vn)
 {
 	/* Vn is the valuenumber of the (base) address of an indirectly
 	 * accessed entity. Return the entity that holds this address
@@ -79,8 +78,7 @@ STATIC entity_p find_base(vn)
 	return (entity_p) 0;
 }
 
-STATIC bool obj_overlap(op1, op2)
-	obj_p op1, op2;
+STATIC bool obj_overlap(obj_p op1, obj_p op2)
 {
 	/* Op1 and op2 point to two objects in the same datablock.
 	 * Obj_overlap returns whether these objects might overlap.
@@ -97,8 +95,7 @@ STATIC bool obj_overlap(op1, op2)
 
 #define same_datablock(o1, o2)	((o1)->o_dblock == (o2)->o_dblock)
 
-STATIC bool addr_local(enp)
-	entity_p enp;
+STATIC bool addr_local(entity_p enp)
 {
 	/* Is enp the address of a stack item. */
 
@@ -108,17 +105,14 @@ STATIC bool addr_local(enp)
 		enp->en_kind == ENAARGBASE;
 }
 
-STATIC bool addr_external(enp)
-	entity_p enp;
+STATIC bool addr_external(entity_p enp)
 {
 	/* Is enp the address of an external. */
 
 	return enp != (entity_p) 0 && enp->en_kind == ENAEXTERNAL;
 }
 
-STATIC kill_external(obp, indir)
-	obj_p obp;
-	int indir;
+STATIC void kill_external(obj_p obp, int indir)
 {
 	/* A store is done via the object in obp. If this store is direct
 	 * we kill directly accessed entities in the same data block only
@@ -164,8 +158,7 @@ STATIC kill_external(obp, indir)
 	}
 }
 
-STATIC bool loc_overlap(enp1, enp2)
-	entity_p enp1, enp2;
+STATIC bool loc_overlap(entity_p enp1, entity_p enp2)
 {
 	/* Enp1 and enp2 point to two locals. Loc_overlap returns whether
 	 * they overlap.
@@ -184,9 +177,7 @@ STATIC bool loc_overlap(enp1, enp2)
 			enp1->en_loc + enp1->en_size > enp2->en_loc;
 }
 
-STATIC kill_local(enp, indir)
-	entity_p enp;
-	bool indir;
+STATIC void kill_local(entity_p enp, bool indir)
 {
 	/* This time a store is done into an ENLOCAL. */
 
@@ -234,7 +225,7 @@ STATIC kill_local(enp, indir)
 	}
 }
 
-STATIC void kill_sim()
+STATIC void kill_sim(void)
 {
 	/* A store is done into the ENIGNMASK. */
 
@@ -252,8 +243,7 @@ STATIC void kill_sim()
 	}
 }
 
-kill_direct(enp)
-	entity_p enp;
+void kill_direct(entity_p enp)
 {
 	/* A store will be done into enp. We must forget the values of all the
 	 * entities this one may overlap with.
@@ -274,8 +264,7 @@ kill_direct(enp)
 	}
 }
 
-kill_indir(enp)
-	entity_p enp;
+void kill_indir(entity_p enp)
 {
 	/* An indirect store is done, in an ENINDIR,
 	 * an ENOFFSETTED or an ENARRELEM.
@@ -306,7 +295,7 @@ kill_indir(enp)
 	}
 }
 
-kill_much()
+extern void kill_much(void)
 {
 	/* Kills all killable entities,
 	 * except the locals for which a registermessage was generated.
@@ -324,8 +313,7 @@ kill_much()
 	}
 }
 
-STATIC bool bad_procflags(pp)
-	proc_p pp;
+STATIC bool bad_procflags(proc_p pp)
 {
 	/* Return whether the flags about the procedure in pp indicate
 	 * that we have little information about it. It might be that
@@ -335,8 +323,7 @@ STATIC bool bad_procflags(pp)
 	return !(pp->p_flags1 & PF_BODYSEEN) || (pp->p_flags1 & PF_CALUNKNOWN);
 }
 
-STATIC kill_globset(s)
-	cset s;
+STATIC void kill_globset(cset s)
 {
 	/* S is a set of global variables that might be changed.
 	 * We act as if a direct store is done into each of them.
@@ -349,8 +336,7 @@ STATIC kill_globset(s)
 	}
 }
 
-kill_call(pp)
-	proc_p pp;
+void kill_call(proc_p pp)
 {
 	/* Kill everything that might be destroyed by calling
 	 * the procedure in pp.
@@ -367,7 +353,7 @@ kill_call(pp)
 	}
 }
 
-kill_all()
+void kill_all(void)
 {
 	/* Kills all entities. */
 
diff --git a/util/ego/cs/cs_kill.h b/util/ego/cs/cs_kill.h
index 6fa6859b8..347e3eb16 100644
--- a/util/ego/cs/cs_kill.h
+++ b/util/ego/cs/cs_kill.h
@@ -3,27 +3,32 @@
  * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
  * See the copyright notice in the ACK home directory, in the file "Copyright".
  */
-extern	kill_call();	/* (proc_p pp)
+extern void kill_call(proc_p pp);
+			/*
 			 * Kill all entities that might have an other value
 			 * after execution of the procedure in pp.
 			 */
 
-extern	kill_much();	/* ()
+extern void kill_much(void);
+			/*
 			 * Kill all killable entities except those for which
 			 * a register message was generated.
 			 * Constants, addresses, etc are not killable.
 			 */
 
-extern	kill_indir();	/* (entity_p enp)
+extern void kill_indir(entity_p enp);
+			/*
 			 * Kill all entities that might have an other value
 			 * after indirect assignment to the entity in enp.
 			 */
 
-extern	kill_direct();	/* (entity_p enp)
+extern void kill_direct(entity_p enp);
+			/*
 			 * Kill all entities that might have an other value
 			 * after direct assignment to the entity in enp.
 			 */
 
-extern	kill_all();	/* ()
+extern void kill_all(void);
+			/*
 			 * Kill all entities.
 			 */
diff --git a/util/ego/cs/cs_partit.c b/util/ego/cs/cs_partit.c
index 9a1bde042..b020ebcfa 100644
--- a/util/ego/cs/cs_partit.c
+++ b/util/ego/cs/cs_partit.c
@@ -125,8 +125,8 @@ STATIC struct {
 /* nop */	HOPELESS,	XXX,	XXX,	XXX,	XXX,
 /* rck */	BBLOCK_END,	XXX,	XXX,	XXX,	XXX,
 /* ret */	BBLOCK_END,	XXX,	XXX,	XXX,	XXX,
-/* rmi */	BINAIR_OP,	ARGW,	ARGW,	ARGW,	ANY,
-/* rmu */	BINAIR_OP,	ARGW,	ARGW,	ARGW,	ANY,
+/* rmi */	REMAINDER,	ARGW,	ARGW,	ARGW,	ANY,
+/* rmu */	REMAINDER,	ARGW,	ARGW,	ARGW,	ANY,
 /* rol */	BINAIR_OP,	ARGW,	WS,	ARGW,	ANY,
 /* ror */	BINAIR_OP,	ARGW,	WS,	ARGW,	ANY,
 /* rtt */	BBLOCK_END,	XXX,	XXX,	XXX,	XXX,
@@ -178,8 +178,7 @@ STATIC struct {
 #define AVSIZE(l)	(info[INSTR(l)].i_av)
 #define REGTYPE(n)	(info[n].i_regtype)
 
-int instrgroup(lnp)
-	line_p lnp;
+int instrgroup(line_p lnp)
 {
 	if (INSTR(lnp) == op_lor && SHORT(lnp) == 1) {
 		/* We can't do anything with the stackpointer. */
@@ -192,8 +191,7 @@ int instrgroup(lnp)
 	return GROUP(INSTR(lnp));
 }
 
-bool stack_group(instr)
-	int instr;
+bool stack_group(int instr)
 {
 	/* Is this an instruction that only does something to the top of
 	 * the stack?
@@ -205,14 +203,14 @@ bool stack_group(instr)
 		case UNAIR_OP:
 		case BINAIR_OP:
 		case TERNAIR_OP:
+		case REMAINDER:
 			return TRUE;
 		default:
 			return FALSE;
 	}
 }
 
-STATIC offset argw(lnp)
-	line_p lnp;
+STATIC offset argw(line_p lnp)
 {
 	/* Some EM-instructions have their argument either on the same line,
 	 * or on top of the stack. We give up when the argument is on top of
@@ -228,8 +226,7 @@ STATIC offset argw(lnp)
 	}
 }
 
-offset op11size(lnp)
-	line_p lnp;
+offset op11size(line_p lnp)
 {
 	/* Returns the size of the first argument of
 	 * the unary operator in lnp.
@@ -248,8 +245,7 @@ offset op11size(lnp)
 	/* NOTREACHED */
 }
 
-offset op12size(lnp)
-	line_p lnp;
+offset op12size(line_p lnp)
 {
 	/* Same for first of binary. */
 
@@ -264,8 +260,7 @@ offset op12size(lnp)
 	/* NOTREACHED */
 }
 
-offset op22size(lnp)
-	line_p lnp;
+offset op22size(line_p lnp)
 {
 	switch (OP2SIZE(lnp)) {
 		case ARGW:
@@ -319,8 +314,7 @@ offset op33size(lnp)
 		return ws;
 }
 
-offset avsize(lnp)
-	line_p lnp;
+offset avsize(line_p lnp)
 {
 	/* Returns the size of the result of the instruction in lnp.
 	 * If the instruction is a conversion this size is given on the stack.
@@ -359,8 +353,7 @@ offset avsize(lnp)
 	/* NOTREACHED */
 }
 
-int regtype(instr)
-	byte instr;
+int regtype(byte instr)
 {
 	switch (REGTYPE(instr & BMASK)) {
 		case ANY:
diff --git a/util/ego/cs/cs_partit.h b/util/ego/cs/cs_partit.h
index 27e7a00bc..ffcc321cb 100644
--- a/util/ego/cs/cs_partit.h
+++ b/util/ego/cs/cs_partit.h
@@ -7,53 +7,63 @@
  * "manageable chunks.
  */
 
-extern int	instrgroup();	/* (line_p lnp)
+extern int	instrgroup(line_p lnp);
+				/*
 				 * Return the group into which the instruction
 				 * in lnp belongs to.
 				 */
 
-extern bool	stack_group();	/* (int instr)
+extern bool	stack_group(int instr);
+				/*
 				 * Return whether instr is an instruction that
 				 * only changes the state of the stack, i.e.
 				 * is a "true" operator.
 				 */
 
-extern offset	op11size();	/* (line_p lnp)
+extern offset	op11size(line_p lnp);
+				/*
 				 * Return the size of the operand of the unary
 				 * operator in lnp.
 				 */
 
-extern offset	op12size();	/* (line_p lnp)
+extern offset	op12size(line_p lnp);
+				/*
 				 * Return the size of the first operand of the
 				 * binary operator in lnp.
 				 */
 
-extern offset	op22size();	/* (line_p lnp)
+extern offset	op22size(line_p lnp);
+				/*
 				 * Return the size of the second operand of the
 				 * binary operator in lnp.
 				 */
 
-extern offset	op13size();	/* (line_p lnp)
+extern offset	op13size(line_p lnp);
+				/*
 				 * Return the size of the first operand of the
 				 * ternary operator in lnp.
 				 */
 
-extern offset	op23size();	/* (line_p lnp)
+extern offset	op23size(line_p lnp);
+				/*
 				 * Return the size of the second operand of the
 				 * ternary operator in lnp.
 				 */
 
-extern offset	op33size();	/* (line_p lnp)
+extern offset	op33size(line_p lnp);
+				/*
 				 * Return the size of the third operand of the
 				 * ternary operator in lnp.
 				 */
 
-extern offset	avsize();	/* (line_p lnp)
+extern offset	avsize(line_p lnp);
+				/*
 				 * Return the size of the result of the
 				 * operator in lnp.
 				 */
 
-extern int	regtype();	/* (byte instr)
+extern int	regtype(byte instr);
+				/*
 				 * Return in what kind of machine-register
 				 * the result of instr should be stored:
 				 * pointer, float, or any.
diff --git a/util/ego/cs/cs_profit.c b/util/ego/cs/cs_profit.c
index 259a6114d..2efabcb03 100644
--- a/util/ego/cs/cs_profit.c
+++ b/util/ego/cs/cs_profit.c
@@ -14,6 +14,7 @@
 #include "../share/cset.h"
 #include "../share/lset.h"
 #include "cs.h"
+#include "cs_alloc.h"
 #include "cs_aux.h"
 #include "cs_debug.h"
 #include "cs_avail.h"
@@ -25,10 +26,9 @@ STATIC cset	forbidden;
 STATIC cset	sli_counts;
 STATIC short	LX_threshold;
 STATIC short	AR_limit;
+STATIC bool	RM_to_DV;
 
-STATIC get_instrs(f, s_p)
-	FILE *f;
-	cset *s_p;
+STATIC void get_instrs(FILE *f, cset *s_p)
 {
 	/* Read a set of integers from inputfile f into *s_p.
 	 * Such a set must be delimited by a negative number.
@@ -42,9 +42,7 @@ STATIC get_instrs(f, s_p)
 	}
 }
 
-STATIC choose_cset(f, s_p, max)
-	FILE *f;
-	cset *s_p;
+STATIC void choose_cset(FILE *f, cset *s_p, int max)
 {
 	/* Read two compact sets of integers from inputfile f.
 	 * Choose the first if we optimize with respect to time,
@@ -101,6 +99,12 @@ void cs_machinit(void *vp)
 	fscanf(f, "%d", &space);
 	AR_limit = space;
 
+	/* Read whether to convert a remainder RMI/RMU to a division
+	 * DVI/DVU using the formula a % b = a - b * (a / b).
+	 */
+	fscanf(f, "%d %d", &time, &space);
+	RM_to_DV = time_space_ratio >= 50 ? time : space;
+
 	/* Read for what counts we must not eliminate an SLI instruction
 	 * when it is part of an array-index computation.
 	 */
@@ -115,8 +119,27 @@ void cs_machinit(void *vp)
 	choose_cset(f, &forbidden, sp_lmnem);
 }
 
-STATIC bool sli_no_eliminate(lnp)
-	line_p lnp;
+bool may_become_aar(avail_p avp)
+{
+	/* Check whether it is desirable to treat a LAR or SAR as an
+	 * AAR LOI/STI. This depends on the size of the array-elements.
+	 */
+	offset sz;
+
+	sz = array_elemsize(avp->av_othird);
+	if (sz == UNKNOWN_SIZE)
+		return FALSE;
+	if (time_space_ratio < 50)
+		return sz <= AR_limit;
+	return TRUE;
+}
+
+bool may_become_dv(void)
+{
+	return RM_to_DV;
+}
+
+STATIC bool sli_no_eliminate(line_p lnp)
 {
 	/* Return whether the SLI-instruction in lnp is part of
 	 * an array-index computation, and should not be eliminated.
@@ -130,8 +153,7 @@ STATIC bool sli_no_eliminate(lnp)
 		;
 }
 
-STATIC bool gains(avp)
-	avail_p avp;
+STATIC bool gains(avail_p avp)
 {
 	/* Return whether we can gain something, when we eliminate
 	 * an expression such as in avp. We just glue together some
@@ -161,12 +183,12 @@ STATIC bool gains(avp)
 	return TRUE;
 }
 
-STATIC bool okay_lines(avp, ocp)
-	avail_p avp;
-	occur_p ocp;
+STATIC bool okay_lines(avail_p avp, occur_p ocp)
 {
+	/* Check whether all lines in this occurrence can in
+	 * principle be eliminated; no stores, messages, calls etc.
+	 */
 	register line_p lnp, next;
-	offset sz;
 
 	for (lnp = ocp->oc_lfirst; lnp != (line_p) 0; lnp = next) {
 		next = lnp != ocp->oc_llast ? lnp->l_next : (line_p) 0;
@@ -179,18 +201,6 @@ STATIC bool okay_lines(avp, ocp)
 				return FALSE;
 		}
 	}
-	/* All lines in this occurrence can in principle be eliminated;
-	 * no stores, messages, calls etc.
-	 * We now check whether it is desirable to treat a LAR or a SAR
-	 * as an AAR LOI/STI. This depends on the size of the array-elements.
-	 */
-	if (INSTR(ocp->oc_llast) == op_lar || INSTR(ocp->oc_llast) == op_sar) {
-		sz = array_elemsize(avp->av_othird);
-		if (sz == UNKNOWN_SIZE) return FALSE;
-		if (avp->av_instr == (byte) op_aar && time_space_ratio < 50) {
-			return sz <= AR_limit;
-		}
-	}
 	return TRUE;
 }
 
diff --git a/util/ego/cs/cs_profit.h b/util/ego/cs/cs_profit.h
index 7ec5e3c17..3d1972d24 100644
--- a/util/ego/cs/cs_profit.h
+++ b/util/ego/cs/cs_profit.h
@@ -7,6 +7,17 @@ void cs_machinit(void *vp);	/* (FILE *f)
 				 * Read phase-specific information from f.
 				 */
 
+bool may_become_aar(avail_p avp);
+				/*
+				 * Return whether a LAR/SAR may become
+				 * an AAR LOI/STI.
+				 */
+
+bool may_become_dv(void);	/*
+				 * Return whether an RMI/RMU may become
+				 * a DVI/DVU: a % b = a - (a / b * b).
+				 */
+
 bool desirable(avail_p avp);	/*
 				 * Return whether it is desirable to eliminate
 				 * the recurrences of the expression in avp.
diff --git a/util/ego/cs/cs_stack.c b/util/ego/cs/cs_stack.c
index 7927438a5..670955d1e 100644
--- a/util/ego/cs/cs_stack.c
+++ b/util/ego/cs/cs_stack.c
@@ -23,8 +23,7 @@ STATIC token_p		free_token;
 #define Stack_empty()	(free_token == &Stack[0])
 #define Top		(free_token - 1)
 
-Push(tkp)
-	token_p tkp;
+void Push(token_p tkp)
 {
 	if (tkp->tk_size == UNKNOWN_SIZE) {
 		Empty_stack(); /* The contents of the Stack is useless. */
@@ -39,10 +38,7 @@ Push(tkp)
 
 #define WORD_MULTIPLE(n)	((n / ws) * ws + ( n % ws ? ws : 0 ))
 
-void
-Pop(tkp, size)
-	token_p tkp;
-	offset size;
+void Pop(token_p tkp, offset size)
 {
 	/* Pop a token with given size from the valuenumber stack into tkp. */
 
@@ -85,8 +81,7 @@ Pop(tkp, size)
 	}
 }
 
-Dup(lnp)
-	line_p lnp;
+void Dup(line_p lnp)
 {
 	/* Duplicate top bytes on the Stack. */
 
@@ -132,7 +127,7 @@ Dup(lnp)
 	}
 }
 
-clr_stack()
+void clr_stack(void)
 {
 	free_token = &Stack[0];
 }
diff --git a/util/ego/cs/cs_stack.h b/util/ego/cs/cs_stack.h
index 64d59cf90..e5a79b858 100644
--- a/util/ego/cs/cs_stack.h
+++ b/util/ego/cs/cs_stack.h
@@ -3,21 +3,25 @@
  * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
  * See the copyright notice in the ACK home directory, in the file "Copyright".
  */
-extern	Push();		/* (token_p tkp)
+extern void Push(token_p tkp);
+			/*
 			 * Push the token in tkp on the fake-stack.
 			 */
 
-extern	Pop();		/* (token_p tkp; offset size)
+extern void Pop(token_p tkp, offset size);
+			/*
 			 * Pop a token of size bytes from the fake-stack
 			 * into tkp. If such a token is not there
 			 * we put a dummy in tkp and adjust the fake-stack.
 			 */
 
-extern	Dup();		/* (line_p lnp)
+extern void Dup(line_p lnp);
+			/*
 			 * Reflect the changes made by the dup-instruction
 			 * in lnp to the EM-stack into the fake-stack.
 			 */
 
-extern	clr_stack();	/* ()
+extern void clr_stack(void);
+			/*
 			 * Clear the fake-stack.
 			 */
diff --git a/util/ego/cs/cs_vnm.c b/util/ego/cs/cs_vnm.c
index a4813411c..435dd4658 100644
--- a/util/ego/cs/cs_vnm.c
+++ b/util/ego/cs/cs_vnm.c
@@ -20,10 +20,9 @@
 #include "cs_kill.h"
 #include "cs_partit.h"
 #include "cs_getent.h"
+#include "cs_profit.h"
 
-STATIC push_entity(enp, lfirst)
-	entity_p enp;
-	line_p lfirst;
+STATIC void push_entity(entity_p enp, line_p lfirst)
 {
 	/* Build token and Push it. */
 
@@ -35,10 +34,8 @@ STATIC push_entity(enp, lfirst)
 	Push(&tk);
 }
 
-STATIC put_expensive_load(bp, lnp, lfirst, enp)
-	bblock_p bp;
-	line_p lnp, lfirst;
-	entity_p enp;
+STATIC void put_expensive_load(bblock_p bp, line_p lnp, line_p lfirst,
+			       entity_p enp)
 {
 	struct avail av;
 	occur_p	ocp;
@@ -52,16 +49,15 @@ STATIC put_expensive_load(bp, lnp, lfirst, enp)
 	av_enter(&av, ocp, EXPENSIVE_LOAD);
 }
 
-STATIC put_aar(bp, lnp, lfirst, enp)
-	bblock_p bp;
-	line_p lnp, lfirst;
-	entity_p enp;
+STATIC void put_aar(bblock_p bp, line_p lnp, line_p lfirst, entity_p enp)
 {
-	/* Enp points to an ENARRELEM. We do as if its address was computed. */
-
+	/* Enter the implicit AAR in a LAR or SAR, where enp points to
+	 * the ENARRELEM, and AAR computes its address.
+	 */
 	struct avail av;
 	occur_p	ocp;
 
+	assert(INSTR(lnp) == op_lar || INSTR(lnp) == op_sar);
 	assert(enp->en_kind == ENARRELEM);
 	av.av_instr = op_aar;
 	av.av_size = ps;
@@ -69,14 +65,17 @@ STATIC put_aar(bp, lnp, lfirst, enp)
 	av.av_osecond = enp->en_index;
 	av.av_othird = enp->en_adesc;
 
-	ocp = newoccur(lfirst, lnp, bp);
-
-	av_enter(&av, ocp, TERNAIR_OP);
+	/* Before we enter an available AAR, we must check whether we
+	 * may convert this LAR/SAR to AAR LOI/STI.  This is so we
+	 * don't LOI/STI a large or unknown size.
+	 */
+	if (may_become_aar(&av)) {
+		ocp = newoccur(lfirst, lnp, bp);
+		av_enter(&av, ocp, TERNAIR_OP);
+	}
 }
 
-STATIC push_avail(avp, lfirst)
-	avail_p avp;
-	line_p lfirst;
+STATIC void push_avail(avail_p avp, line_p lfirst)
 {
 	struct token tk;
 
@@ -86,10 +85,7 @@ STATIC push_avail(avp, lfirst)
 	Push(&tk);
 }
 
-STATIC push_unair_op(bp, lnp, tkp1)
-	bblock_p bp;
-	line_p lnp;
-	token_p tkp1;
+STATIC void push_unair_op(bblock_p bp, line_p lnp, token_p tkp1)
 {
 	struct avail av;
 	occur_p	ocp;
@@ -103,10 +99,7 @@ STATIC push_unair_op(bp, lnp, tkp1)
 	push_avail(av_enter(&av, ocp, UNAIR_OP), tkp1->tk_lfirst);
 }
 
-STATIC push_binair_op(bp, lnp, tkp1, tkp2)
-	bblock_p bp;
-	line_p lnp;
-	token_p tkp1, tkp2;
+STATIC void push_binair_op(bblock_p bp, line_p lnp, token_p tkp1, token_p tkp2)
 {
 	struct avail av;
 	occur_p	ocp;
@@ -121,10 +114,8 @@ STATIC push_binair_op(bp, lnp, tkp1, tkp2)
 	push_avail(av_enter(&av, ocp, BINAIR_OP), tkp1->tk_lfirst);
 }
 
-STATIC push_ternair_op(bp, lnp, tkp1, tkp2, tkp3)
-	bblock_p bp;
-	line_p lnp;
-	token_p tkp1, tkp2, tkp3;
+STATIC void push_ternair_op(bblock_p bp, line_p lnp, token_p tkp1,
+			    token_p tkp2, token_p tkp3)
 {
 	struct avail av;
 	occur_p	ocp;
@@ -140,8 +131,38 @@ STATIC push_ternair_op(bp, lnp, tkp1, tkp2, tkp3)
 	push_avail(av_enter(&av, ocp, TERNAIR_OP), tkp1->tk_lfirst);
 }
 
-STATIC fiddle_stack(lnp)
-	line_p lnp;
+STATIC void push_remainder(bblock_p bp, line_p lnp, token_p tkp1, token_p tkp2)
+{
+	/* Enter the implicit division tkp1 / tkp2,
+	 * then push the remainder tkp1 % tkp2.
+	 */
+	struct avail av;
+	occur_p	ocp;
+
+	assert(INSTR(lnp) == op_rmi || INSTR(lnp) == op_rmu);
+	av.av_size = avsize(lnp);
+	av.av_oleft = tkp1->tk_vn;
+	av.av_oright = tkp2->tk_vn;
+
+	/* Check whether we may convert RMI/RMU to DVI/DVU. */
+	if (may_become_dv()) {
+		/* The division is DVI in RMI, or DVU in RMU. */
+		av.av_instr = (INSTR(lnp) == op_rmi ? op_dvi : op_dvu);
+
+		/* In postfix, a b % becomes a b a b / * -.  We must
+		 * keep a and b on the stack, so the first instruction
+		 * to eliminate is lnp, not tkp1->l_first.
+		 */
+		ocp = newoccur(lnp, lnp, bp);
+		av_enter(&av, ocp, BINAIR_OP);
+	}
+
+	av.av_instr = INSTR(lnp);
+	ocp = newoccur(tkp1->tk_lfirst, lnp, bp);
+	push_avail(av_enter(&av, ocp, REMAINDER), tkp1->tk_lfirst);
+}
+
+STATIC void fiddle_stack(line_p lnp)
 {
 	/* The instruction in lnp does something to the valuenumber-stack. */
 
@@ -232,8 +253,7 @@ STATIC proc_p find_proc(vn)
 	return (proc_p) 0;
 }
 
-STATIC side_effects(lnp)
-	line_p lnp;
+STATIC void side_effects(line_p lnp)
 {
 	/* Lnp contains a cai or cal instruction. We try to find the callee
 	 * and see what side-effects it has.
@@ -255,8 +275,7 @@ STATIC side_effects(lnp)
 	}
 }
 
-hopeless(instr)
-	int instr;
+STATIC void hopeless(int instr)
 {
 	/* The effect of `instr' is too difficult to
 	 * compute. We assume worst case behaviour.
@@ -281,8 +300,7 @@ hopeless(instr)
 	}
 }
 
-vnm(bp)
-	bblock_p bp;
+void vnm(bblock_p bp)
 {
 	register line_p lnp;
 	register entity_p rep;
@@ -331,6 +349,11 @@ vnm(bp)
 				Pop(&tk1, op13size(lnp));
 				push_ternair_op(bp, lnp, &tk1, &tk2, &tk3);
 				break;
+			case REMAINDER:
+				Pop(&tk2, op22size(lnp));
+				Pop(&tk1, op12size(lnp));
+				push_remainder(bp, lnp, &tk1, &tk2);
+				break;
 			case KILL_ENTITY:
 				kill_direct(rep);
 				break;
diff --git a/util/ego/cs/cs_vnm.h b/util/ego/cs/cs_vnm.h
index 0fbce5d72..0c86a77e8 100644
--- a/util/ego/cs/cs_vnm.h
+++ b/util/ego/cs/cs_vnm.h
@@ -3,7 +3,8 @@
  * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
  * See the copyright notice in the ACK home directory, in the file "Copyright".
  */
-extern vnm();	/* (bblock_p bp)
+extern void vnm(bblock_p bp);
+		/*
 		 * Performs the valuenumbering algorithm on the basic
 		 * block in bp.
 		 */
diff --git a/util/ego/descr/em22.descr b/util/ego/descr/em22.descr
index f995d631c..d9c39226b 100644
--- a/util/ego/descr/em22.descr
+++ b/util/ego/descr/em22.descr
@@ -78,6 +78,7 @@ cheap operations: -1
                   -1
 lexical tresholds: 1 1
 indirection limit: 8
+convert remainder to division?: no no
 do not eliminate sli if index on shiftcounts:   -1
                                                 -1
 forbidden operators: -1 -1
diff --git a/util/ego/descr/em24.descr b/util/ego/descr/em24.descr
index a95751170..cbe0ab5c3 100644
--- a/util/ego/descr/em24.descr
+++ b/util/ego/descr/em24.descr
@@ -78,6 +78,7 @@ cheap operations: -1
                   -1
 lexical tresholds: 1 1
 indirection limit: 8
+convert remainder to division?: no no
 do not eliminate sli if index on shiftcounts:   -1
                                                 -1
 forbidden operators: -1 -1
diff --git a/util/ego/descr/em44.descr b/util/ego/descr/em44.descr
index 117f26591..b6dbebba3 100644
--- a/util/ego/descr/em44.descr
+++ b/util/ego/descr/em44.descr
@@ -78,6 +78,7 @@ cheap operations: -1
                   -1
 lexical tresholds: 1 1
 indirection limit: 8
+convert remainder to division?: no no
 do not eliminate sli if index on shiftcounts:   -1
                                                 -1
 forbidden operators: -1 -1
diff --git a/util/ego/descr/i386.descr b/util/ego/descr/i386.descr
index 264151a60..d5a2014bf 100644
--- a/util/ego/descr/i386.descr
+++ b/util/ego/descr/i386.descr
@@ -93,6 +93,7 @@ cheap operations: op_cii op_cui op_ciu op_cuu -1
                   op_cii op_cui op_ciu op_cuu -1
 lexical tresholds: 1 1
 indirection limit: 8
+convert remainder to division?: no no
 do not eliminate sli if index on shiftcounts:   -1
                                                 -1
 forbidden operators: -1 -1
diff --git a/util/ego/descr/i86.descr b/util/ego/descr/i86.descr
index 8be3ec23e..9b27cf840 100644
--- a/util/ego/descr/i86.descr
+++ b/util/ego/descr/i86.descr
@@ -93,6 +93,7 @@ cheap operations: op_cii op_cui op_ciu op_cuu -1
                   op_cii op_cui op_ciu op_cuu -1
 lexical tresholds: 1 1
 indirection limit: 8
+convert remainder to division?: no no
 do not eliminate sli if index on shiftcounts:   -1
                                                 -1
 forbidden operators: -1 -1
diff --git a/util/ego/descr/m68020.descr b/util/ego/descr/m68020.descr
index 9d2f46b2b..f568e00e2 100644
--- a/util/ego/descr/m68020.descr
+++ b/util/ego/descr/m68020.descr
@@ -102,6 +102,7 @@ cheap operations: op_cii op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1
 		  op_cii op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1
 lexical tresholds: 1 1
 indirection limit: 8
+convert remainder to division?: no no
 do not eliminate sli if index on shiftcounts:	1 2 3 -1
 						1 2 3 -1
 forbidden operators: -1 -1
diff --git a/util/ego/descr/m68k2.descr b/util/ego/descr/m68k2.descr
index 58e433db8..6b144cba0 100644
--- a/util/ego/descr/m68k2.descr
+++ b/util/ego/descr/m68k2.descr
@@ -99,6 +99,7 @@ addressing modes: op_adp op_lof op_ldf op_loi op_dch op_lpb -1
 cheap operations: -1 -1
 lexical tresholds: 1 1
 indirection limit: 8
+convert remainder to division?: no no
 do not eliminate sli if index on shiftcounts:	-1
 						-1
 forbidden operators: -1 -1
diff --git a/util/ego/descr/m68k4.descr b/util/ego/descr/m68k4.descr
index 8e1da4c5e..6b9d23dfa 100644
--- a/util/ego/descr/m68k4.descr
+++ b/util/ego/descr/m68k4.descr
@@ -102,6 +102,7 @@ cheap operations: op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1
 		  op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1
 lexical tresholds: 1 1
 indirection limit: 8
+convert remainder to division?: no no
 do not eliminate sli if index on shiftcounts:	-1
 						-1
 forbidden operators: -1 -1
diff --git a/util/ego/descr/pdp.descr b/util/ego/descr/pdp.descr
index e73b3aaf1..ec8f3abca 100644
--- a/util/ego/descr/pdp.descr
+++ b/util/ego/descr/pdp.descr
@@ -92,6 +92,7 @@ cheap operations: op_cii op_cui op_cfi op_ciu op_cff op_cuu op_cif -1
 		  op_cii op_cui op_cfi op_ciu op_cff op_cuu op_cif -1
 lexical tresholds: 1 1
 indirection limit: 8
+convert remainder to division?: no no
 do not eliminate sli if index on shiftcounts:	-1
 						-1
 forbidden operators: -1 -1
diff --git a/util/ego/descr/powerpc.descr b/util/ego/descr/powerpc.descr
index e59990ea1..cf613e96c 100644
--- a/util/ego/descr/powerpc.descr
+++ b/util/ego/descr/powerpc.descr
@@ -102,7 +102,7 @@ register save costs:
 	17 -> (102,136)
 	18 -> (108,144)
 	19 -> (114,152)
-        20 -> (120,160)
+	20 -> (120,160)
 	21 -> (126,168)
 	22 -> (132,176)
 	23 -> (138,184)
@@ -137,10 +137,11 @@ reduce sli if shift count larger than:  0
 first time then space:
 addressing modes: op_ads op_adp op_lof op_ldf op_loi op_dch op_lpb -1
 		  op_ads op_adp op_lof op_ldf op_loi op_dch op_lpb -1
-cheap operations: op_cii op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1 
+cheap operations: op_cii op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1
 		  op_cii op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1
 lexical tresholds: 1 1
 indirection limit: 8
+convert remainder to division?: yes yes
 do not eliminate sli if index on shiftcounts:	-1
 						-1
 forbidden operators: -1 -1
diff --git a/util/ego/descr/sparc.descr b/util/ego/descr/sparc.descr
index 978c39ba3..79c33decb 100644
--- a/util/ego/descr/sparc.descr
+++ b/util/ego/descr/sparc.descr
@@ -100,6 +100,7 @@ cheap operations: op_cuu op_ciu op_cui op_cii -1
                   op_cuu op_ciu op_cui op_cii -1
 lexical tresholds: 1 1
 indirection limit: 8
+convert remainder to division?: no no
 do not eliminate sli if index on shiftcounts:   -1
                                                 -1
 forbidden operators: -1 -1
diff --git a/util/ego/descr/vax4.descr b/util/ego/descr/vax4.descr
index 5a39ea759..beaf0c427 100644
--- a/util/ego/descr/vax4.descr
+++ b/util/ego/descr/vax4.descr
@@ -113,6 +113,7 @@ cheap operations: op_cii op_cui op_cfi op_ciu op_cff op_cuu op_cif
 		  op_cmi op_cmu op_cmf op_cms op_cmp -1
 lexical thresholds: 1 1
 indirection limit: 8
+convert remainder to division?: no no
 do not eliminate sli if index on shiftcounts:	1 2 3 -1
 						1 2 3 -1
 forbidden operators: -1 -1
diff --git a/util/ego/share/aux.h b/util/ego/share/aux.h
index 6a6770469..db2d3f8da 100644
--- a/util/ego/share/aux.h
+++ b/util/ego/share/aux.h
@@ -36,7 +36,7 @@ line_p reg_mes(offset tmp, short size, int typ, int score);
 bool dom(bblock_p b1, bblock_p b2);
 				/*
 				 * See if b1 dominates b2. Note that a
-				 * block always * dominates itself.
+				 * block always dominates itself.
 				 */
 bblock_p common_dom(bblock_p a, bblock_p b);
 				/*
diff --git a/util/ego/share/build.lua b/util/ego/share/build.lua
index ab1068d2c..5ca714897 100644
--- a/util/ego/share/build.lua
+++ b/util/ego/share/build.lua
@@ -48,6 +48,7 @@ clibrary {
 		"./init_glob.c",
 	},
 	deps = {
+		"./*.h",
 		"+classdefs_h",
 		"+pop_push_h",
 		"h+emheaders",
@@ -57,5 +58,3 @@ clibrary {
 		["+cflags"] = {"-DVERBOSE", "-DNOTCOMPACT"}
 	}
 }
-
-
diff --git a/util/ego/share/debug.c b/util/ego/share/debug.c
index 81080f7cf..56514d149 100644
--- a/util/ego/share/debug.c
+++ b/util/ego/share/debug.c
@@ -45,7 +45,7 @@ void error(const char *s, ...)
 void OUTTRACE(const char *s, int n)
 {
 	fprintf(stderr,"> ");
-	vfprintf(stderr,s,n);
+	fprintf(stderr,s,n);
 	fprintf(stderr,"\n");
 }
 #endif
diff --git a/util/ego/share/files.c b/util/ego/share/files.c
index e45f9b7fb..2dd10b25f 100644
--- a/util/ego/share/files.c
+++ b/util/ego/share/files.c
@@ -9,9 +9,11 @@
  */
 
 #include <stdio.h>
+#include "types.h"
+#include "debug.h"
 #include "files.h"
 
-struct files* findfiles(int argc, const char** argv)
+struct files* findfiles(int argc, char * const *argv)
 {
 	static struct files files;
 
diff --git a/util/ego/share/files.h b/util/ego/share/files.h
index 46b19917a..ab2076ea1 100644
--- a/util/ego/share/files.h
+++ b/util/ego/share/files.h
@@ -33,11 +33,11 @@ struct files
 
 	/* The rest of the arguments. */
 
-	const char** argv;
+	char * const *argv;
 	int argc;
 };
 
-struct files* findfiles(int argc, const char** argv);
+struct files* findfiles(int argc, char * const *argv);
 
 FILE *openfile(const char *name, const char *mode);
 				/*
diff --git a/util/ego/share/get.c b/util/ego/share/get.c
index 94c7aabe2..a433b0946 100644
--- a/util/ego/share/get.c
+++ b/util/ego/share/get.c
@@ -285,7 +285,7 @@ dblock_p getdtable(const char *dname)
 
 /* getbblocks */
 
-STATIC argstring(short length, argb_p abp)
+STATIC void argstring(short length, argb_p abp)
 {
 
 	while (length--) {
diff --git a/util/ego/share/global.h b/util/ego/share/global.h
index f97df2fa2..4121a5b85 100644
--- a/util/ego/share/global.h
+++ b/util/ego/share/global.h
@@ -40,13 +40,13 @@ extern int ws;		/* word size	*/
 
 #define UNKNOWN_SIZE (-1)
 
-extern proc_p curproc;  /* current procedure */
+extern proc_p curproc;	/* current procedure */
 
-extern char *filename; /* name of current input file */
+extern char *filename;	/* name of current input file */
 
 extern lset mesregs;	/* set of MES ms_reg pseudos */
 
-extern short time_space_ratio; /* 0   if optimizing for space only,
+extern short time_space_ratio;	/* 0   if optimizing for space only,
 				 * 100 if optimizing for time only,
 				 * else something 'in between'.
 				 */
diff --git a/util/ego/share/go.c b/util/ego/share/go.c
index 9a2107d3d..0ccd3c6e9 100644
--- a/util/ego/share/go.c
+++ b/util/ego/share/go.c
@@ -42,7 +42,7 @@ STATIC void mach_init(char* machfile, void (*phase_machinit)(void *))
 	fclose(f);
 }
 
-void go(int argc, const char** argv,
+void go(int argc, char * const *argv,
 	void (*initialize)(void *), void (*optimize)(void *),
 	void (*phase_machinit)(void *), void (*proc_flag)(void *))
 {
diff --git a/util/ego/share/go.h b/util/ego/share/go.h
index 3bb8c1f54..55f1b48e8 100644
--- a/util/ego/share/go.h
+++ b/util/ego/share/go.h
@@ -22,7 +22,7 @@
  * and 'optimize' is called with the current procedure
  * as parameter.
  */
-void go(int argc, const char** argv,
+void go(int argc, char * const *argv,
 	void (*initialize)(void *null),
 	void (*optimize)(void *),	/* (proc_p *p) */
 	void (*phase_machinit)(void *),	/* (FILE *f) */
diff --git a/util/ego/share/types.h b/util/ego/share/types.h
index cae4d6074..cabc5818d 100644
--- a/util/ego/share/types.h
+++ b/util/ego/share/types.h
@@ -46,7 +46,7 @@ typedef struct elemholder *lset;
 typedef struct bitvector  *cset;
 typedef elem_p Lindex;
 typedef short  Cindex;
-typedef char   *Lelem_t;
+typedef void   *Lelem_t;
 typedef short  Celem_t;
 
 typedef union pext_t *pext_p;
diff --git a/util/ego/sp/sp.c b/util/ego/sp/sp.c
index 8538d3dfb..051281d7e 100644
--- a/util/ego/sp/sp.c
+++ b/util/ego/sp/sp.c
@@ -65,9 +65,8 @@ STATIC void sp_machinit(void *vp)
 	}
 	fscanf(f,"%d",&globl_sp_allowed);
 }
-comb_asps(l1,l2,b)
-	line_p l1,l2;
-	bblock_p b;
+
+STATIC void comb_asps(line_p l1, line_p l2, bblock_p b)
 {
 	assert(INSTR(l1) == op_asp);
 	assert(INSTR(l2) == op_asp);
@@ -78,11 +77,7 @@ comb_asps(l1,l2,b)
 	rm_line(l1,b);
 }
 	
-
-
-
-stack_pollution(b)
-	bblock_p b;
+STATIC void stack_pollution(bblock_p b)
 {
 	/* For every pair of successive ASP instructions in basic
 	 * block b, try to combine the two into one ASP.
@@ -134,8 +129,7 @@ stack_pollution(b)
 	} while (asp != (line_p) 0);
 }
 
-STATIC bool block_save(b)
-	bblock_p b;
+STATIC bool block_save(bblock_p b)
 {
 
 	register line_p l;
@@ -159,10 +153,7 @@ STATIC bool block_save(b)
 	return stack_diff >= 0;
 }
 
-
-
-STATIC mark_pred(b)
-	bblock_p b;
+STATIC void mark_pred(bblock_p b)
 {
 	Lindex i;
 	bblock_p x;
@@ -176,12 +167,7 @@ STATIC mark_pred(b)
 	}
 }
 
-
-
-
-
-STATIC mark_unsave_blocks(p)
-	proc_p p;
+STATIC void mark_unsave_blocks(proc_p p)
 {
 	register bblock_p b;
 
@@ -193,8 +179,7 @@ STATIC mark_unsave_blocks(p)
 	}
 }
 
-
-void sp_optimize(void *vp)
+STATIC void sp_optimize(void *vp)
 {
 	proc_p p = vp;
 	register bblock_p b;
@@ -206,21 +191,13 @@ void sp_optimize(void *vp)
 	}
 }
 
-
-
-
-main(argc,argv)
-	int argc;
-	char *argv[];
+int main(int argc, char *argv[])
 {
 	go(argc,argv,no_action,sp_optimize,sp_machinit,no_action);
 	report("stack adjustments deleted",Ssp);
 	exit(0);
 }
 
-
-
-
 /***** DEBUGGING:
 
 debug_stack_pollution(p)
diff --git a/util/ego/ud/ud.c b/util/ego/ud/ud.c
index c0fe613fd..087337144 100644
--- a/util/ego/ud/ud.c
+++ b/util/ego/ud/ud.c
@@ -269,13 +269,13 @@ pr_localtab() {
 	short i;
 	local_p lc;
 
-	printf("LOCAL-TABLE (%d)\n\n",nrlocals);
+	fprintf(stderr,"LOCAL-TABLE (%d)\n\n",nrlocals);
 	for (i = 1; i <= nrlocals; i++) {
 		lc = locals[i];
-		printf("LOCAL %d\n",i);
-		printf("	offset= %ld\n",lc->lc_off);
-		printf("	size=   %d\n",lc->lc_size);
-		printf("	flags=  %d\n",lc->lc_flags);
+		fprintf(stderr,"LOCAL %d\n",i);
+		fprintf(stderr,"\toffset= %ld\n",lc->lc_off);
+		fprintf(stderr,"\tsize=   %d\n",lc->lc_size);
+		fprintf(stderr,"\tflags=  %d\n",lc->lc_flags);
 	}
 }
 
@@ -284,12 +284,13 @@ pr_globals()
 	dblock_p d;
 	obj_p obj;
 
-	printf("GLOBALS (%d)\n\n",nrglobals);
-	printf("ID	GLOBNR\n");
+	fprintf(stderr,"GLOBALS (%d)\n\n",nrglobals);
+	fprintf(stderr,"ID\tGLOBNR\n");
 	for (d = fdblock; d != (dblock_p) 0; d = d->d_next) {
 		for (obj = d->d_objlist; obj != (obj_p) 0; obj = obj->o_next) {
 			if (obj->o_globnr != 0) {
-			   printf("%d	%d\n", obj->o_id,obj->o_globnr);
+				fprintf(stderr,"%d\t%d\n",
+				    obj->o_id,obj->o_globnr);
 			}
 		}
 	}
@@ -302,20 +303,20 @@ pr_defs()
 	short i;
 	line_p l;
 
-	printf("DEF TABLE\n\n");
+	fprintf(stderr,"DEF TABLE\n\n");
 	for (i = 1; i <= nrexpldefs; i++) {
 		l = defs[i];
-		printf("%d	%s ",EXPL_TO_DEFNR(i),
+		fprintf(stderr,"%d\t%s ",EXPL_TO_DEFNR(i),
 			&em_mnem[(INSTR(l)-sp_fmnem)*4]);
 		switch(TYPE(l)) {
 			case OPSHORT:
-				printf("%d\n",SHORT(l));
+				fprintf(stderr,"%d\n",SHORT(l));
 				break;
 			case OPOFFSET:
-				printf("%ld\n",OFFSET(l));
+				fprintf(stderr,"%ld\n",OFFSET(l));
 				break;
 			case OPOBJECT:
-				printf("%d\n",OBJ(l)->o_id);
+				fprintf(stderr,"%d\n",OBJ(l)->o_id);
 				break;
 			default:
 				assert(FALSE);
@@ -331,13 +332,13 @@ pr_set(name,k,s,n)
 {
 	short i;
 
-	printf("%s(%d) =	{",name,k);
+	fprintf(stderr,"%s(%d) =\t{",name,k);
 	for (i = 1; i <= n; i++) {
 		if (Cis_elem(i,s)) {
-			printf("%d ",i);
+			fprintf(stderr,"%d ",i);
 		}
 	}
-	printf ("}\n");
+	fprintf(stderr,"}\n");
 }
 
 pr_blocks(p)
@@ -347,7 +348,7 @@ pr_blocks(p)
 	short n;
 
 	for (b = p->p_start; b != 0; b = b->b_next) {
-		printf ("\n");
+		fprintf(stderr,"\n");
 		n = b->b_id;
 		pr_set("GEN",n,GEN(b),nrdefs);
 		pr_set("KILL",n,KILL(b),nrdefs);
@@ -361,10 +362,10 @@ pr_copies()
 {
 	short i;
 
-	printf("\nCOPY TABLE\n\n");
+	fprintf(stderr,"\nCOPY TABLE\n\n");
 	for (i = 1; i <= nrdefs; i++) {
 		if (def_to_copynr[i] != 0) {
-			printf("%d	%d\n",i,def_to_copynr[i]);
+			fprintf(stderr,"%d\t%d\n",i,def_to_copynr[i]);
 		}
 	}
 }
@@ -376,7 +377,7 @@ pr_cblocks(p)
 	short n;
 
 	for (b = p->p_start; b != 0; b = b->b_next) {
-		printf ("\n");
+		fprintf(stderr,"\n");
 		n = b->b_id;
 		pr_set("CGEN",n,C_GEN(b),nrcopies);
 		pr_set("CKILL",n,C_KILL(b),nrcopies);
diff --git a/util/misc/convert.c b/util/misc/convert.c
index ec38761fa..9bdc12011 100644
--- a/util/misc/convert.c
+++ b/util/misc/convert.c
@@ -16,8 +16,10 @@ static char rcsid[] = "$Id$";
 	linked.
 */
 
+#include <stdarg.h>
 #include <stdlib.h>
 #include "system.h"
+#include "print.h"
 #include "em_pseu.h"
 #include "em_mnem.h"
 #include "em_spec.h"
@@ -30,8 +32,11 @@ char *filename;			/* Name of input file */
 int errors;			/* Number of errors */
 extern char *C_error;
 
-main(argc,argv)
-	char **argv;
+void error(const char *, ...);
+void fatal(const char *, ...);
+
+int
+main(int argc, char **argv)
 {
 	struct e_instr buf;
 	register struct e_instr *p = &buf;
@@ -66,27 +71,32 @@ main(argc,argv)
 	}
 	C_close();
 	EM_close();
-	exit(errors);
+	exit(errors ? 1 : 0);
 }
 
 /* VARARGS */
-error(s,a1,a2,a3,a4)
-	char *s;
+void
+error(const char *s, ...)
 {
+	va_list ap;
+	va_start(ap, s);
 	fprint(STDERR,
 		"%s, line %d: ",
 		filename ? filename : "standard input",
 		EM_lineno);
-	fprint(STDERR,s,a1,a2,a3,a4);
+	doprnt(STDERR, s, ap);
 	fprint(STDERR, "\n");
 	errors++;
+	va_end(ap);
 }
 
 /* VARARGS */
-fatal(s,a1,a2,a3,a4)
-	char *s;
+void
+fatal(const char *s, ...)
 {
+	va_list ap;
+	va_start(ap, s);
 	if (C_busy()) C_close();
-	error(s,a1,a2,a3,a4);
+	error(s, ap);
 	exit(1);
 }