Merge pull request #73 from kernigh/kernigh-pr

better code from PowerPC ncg and mcg
2018-03-13 13:57:28 +01:00 · 2018-03-13 13:57:28 +01:00 · aabf0bdd69
parent 4cb4bdc85f 85fcbde22f
commit aabf0bdd69
143 changed files with 4153 additions and 2001 deletions
--- a/mach/i80/libem/cii.s
+++ b/mach/i80/libem/cii.s
@ -65,19 +65,19 @@
 	jmp 3f		! done
 !if destination size < source size only:
-shrink:	mov l,c		! load source size in hl
+shrink:	mov l,b		! load destination size in hl
 	mvi h,0
 	dad sp
 	mov d,h
-	mov e,l		! de points just above source
+	mov e,l		! de points just above lowest bytes of source
-	mov l,b		! load destination size in hl
+	mov l,c		! load source size in hl
 	mvi h,0
 	dad sp		! hl points just above "destination"
 1:	dcx d		! move upwards
 	dcx h
-	mov a,m
+	ldax d
-	stax d
+	mov m,a
 	dcr b
 	jnz 1b
 	sphl
--- a/mach/i80/libem/rol4.s
+++ b/mach/i80/libem/rol4.s
@ -25,8 +25,8 @@
 	mov e,a
 	mov a,b
-	ral
+1:	ral
-1:	mov a,l
+	mov a,l
 	ral
 	mov l,a
 	mov a,h
--- a/mach/i80/libem/ror4.s
+++ b/mach/i80/libem/ror4.s
@ -25,8 +25,8 @@
 	mov e,a
 	mov a,l
-	rar
+1:	rar
-1:	mov a,b
+	mov a,b
 	rar
 	mov b,a
 	mov a,c
--- a/mach/i80/ncg/table
+++ b/mach/i80/ncg/table
@ -385,8 +385,9 @@ gen dad de
 pat loi $1>=512
 kills ALL
-uses dereg={const2,$1}
+/* 'uses dereg={const2,$1}' fails to kill de. */
-gen Call {label,".loi"}
+gen lxi de,{const2,$1}
    Call {label,".loi"}
 pat los $1==2
 with dereg
@ -597,8 +598,8 @@ gen 1:
 pat sti
 kills ALL
-uses dereg={const2,$1}
+gen lxi de,{const2,$1}
-gen Call {label,".sti"}
+    Call {label,".sti"}
 pat sts $1==2
 with dereg
@ -702,23 +703,24 @@ gen Call {label,".mli4"}
 pat dvi $1==2
 kills ALL
-uses areg={const1,129}
+/* 'uses areg={const1,129}' fails to kill a. */
-gen Call {label,".dvi2"}		yields de
+gen mvi a,{const1,129}
    Call {label,".dvi2"}		yields de
 pat dvi $1==4
 kills ALL
-uses areg={const1,129}
+gen mvi a,{const1,129}
-gen Call {label,".dvi4"}
+    Call {label,".dvi4"}
 pat rmi $1==2
 kills ALL
-uses areg={const1,128}
+gen mvi a,{const1,128}
-gen Call {label,".dvi2"}		yields de
+    Call {label,".dvi2"}		yields de
 pat rmi $1==4
 kills ALL
-uses areg={const1,128}
+gen mvi a,{const1,128}
-gen Call {label,".dvi4"}
+    Call {label,".dvi4"}
 pat ngi $1==2
 with hl_or_de
@ -749,13 +751,13 @@ gen Call {label,".sli4"}
 pat sri $1==2
 kills ALL
-uses areg={const1,1}
+gen mvi a,{const1,1}
-gen Call {label,".sri2"}		yields de
+    Call {label,".sri2"}		yields de
 pat sri $1==4
 kills ALL
-uses areg={const1,1}
+gen mvi a,{const1,1}
-gen Call {label,".sri4"}
+    Call {label,".sri4"}
 /********************************************/
 /* Group 4: Unsigned arithmetic		    */
@ -775,23 +777,23 @@ gen Call {label,".mli4"}
 pat dvu $1==2
 kills ALL
-uses areg={const1,1}
+gen mvi a,{const1,1}
-gen Call {label,".dvi2"}		yields de
+    Call {label,".dvi2"}		yields de
 pat dvu $1==4
 kills ALL
-uses areg={const1,1}
+gen mvi a,{const1,1}
-gen Call {label,".dvi4"}
+    Call {label,".dvi4"}
 pat rmu $1==2
 kills ALL
-uses areg={const1,0}
+gen mvi a,{const1,0}
-gen Call {label,".dvi2"}		yields de
+    Call {label,".dvi2"}		yields de
 pat rmu $1==4
 kills ALL
-uses areg={const1,0}
+gen mvi a,{const1,0}
-gen Call {label,".dvi4"}
+    Call {label,".dvi4"}
 pat slu						leaving sli $1
@ -802,13 +804,13 @@ gen move %1.1, %1.2
 pat sru $1==2
 kills ALL
-uses areg={const1,0}
+gen mvi a,{const1,0}
-gen Call {label,".sri2"}		yields de
+    Call {label,".sri2"}		yields de
 pat sru $1==4
 kills ALL
-uses areg={const1,0}
+gen mvi a,{const1,0}
-gen Call {label,".sri4"}
+    Call {label,".sri4"}
 /********************************************/
@ -1047,8 +1049,8 @@ with hlreg
 pat cii
 kills ALL
-uses areg={const1,1}
+gen mvi a,{const1,1}
-gen Call {label,".cii"}
+    Call {label,".cii"}
 pat loc loc ciu					leaving loc $1 loc $2 cuu
 pat loc loc cui					leaving loc $1 loc $2 cuu
@ -1081,8 +1083,8 @@ with hl_or_de
 pat cuu
 kills ALL
-uses areg={const1,0}
+gen mvi a,{const1,0}
-gen Call {label,".cii"}
+    Call {label,".cii"}
 pat cfi
 kills ALL
@ -1128,8 +1130,8 @@ gen mov a,%1.2
 pat and defined($1)
 kills ALL
-uses dereg={const2,$1}
+gen lxi de,{const2,$1}
-gen Call {label,".and"}
+    Call {label,".and"}
 pat and !defined($1)
 with dereg
@ -1156,8 +1158,8 @@ gen mov a,%1.2
 pat ior defined($1)
 kills ALL
-uses dereg={const2,$1}
+gen lxi de,{const2,$1}
-gen Call {label,".ior"}
+    Call {label,".ior"}
 pat ior !defined($1)
 with dereg
@ -1184,8 +1186,8 @@ gen mov a,%1.2
 pat xor defined($1)
 kills ALL
-uses dereg={const2,$1}
+gen lxi de,{const2,$1}
-gen Call {label,".xor"}
+    Call {label,".xor"}
 pat xor !defined($1)
 with dereg
@ -1204,8 +1206,8 @@ gen mov a,%1.2
 pat com defined($1)
 kills ALL
-uses dereg={const2,$1}
+gen lxi de,{const2,$1}
-gen Call {label,".com"}
+    Call {label,".com"}
 pat com !defined($1)
 with dereg
@ -1269,8 +1271,8 @@ gen Call {label,".inn2"}		yields de
 pat inn defined($1)
 kills ALL
-uses dereg={const2,$1}
+gen lxi de,{const2,$1}
-gen Call {label,".inn"}			yields de
+    Call {label,".inn"}			yields de
 pat inn !defined($1)
 with dereg
@ -1284,8 +1286,8 @@ gen Call {label,".set2"}		yields de
 pat set defined($1)
 kills ALL
-uses dereg={const2,$1}
+gen lxi de,{const2,$1}
-gen Call {label,".set"}
+    Call {label,".set"}
 pat set !defined($1)
 with dereg
@ -1402,8 +1404,8 @@ pat cmi $1==2					leaving sbi 2
 pat cmi $1==4
 kills ALL
-uses areg={const1,1}
+gen mvi a,{const1,1}
-gen Call {label,".cmi4"}		yields de
+    Call {label,".cmi4"}		yields de
 pat cmf $1==4
 kills ALL
@ -1436,15 +1438,15 @@ gen mov a,%2.1
 pat cmu $1==4
 kills ALL
-uses areg={const1,0}
+gen mvi a,{const1,0}
-gen Call {label,".cmi4"}		yields de
+    Call {label,".cmi4"}		yields de
 pat cms $1==2					leaving cmi 2
 pat cms defined($1)
 kills ALL
-uses dereg={const2,$1}
+gen lxi de,{const2,$1}
-gen Call {label,".cms"}			yields de
+    Call {label,".cms"}			yields de
 pat cms !defined($1)
 with dereg
@ -1936,8 +1938,8 @@ gen dad sp
 pat blm
 kills ALL
-uses dereg={const2,$1}
+gen lxi de,{const2,$1}
-gen Call {label,".blm"}
+    Call {label,".blm"}
 pat bls
 with dereg
@ -1962,8 +1964,8 @@ with src1or2 src1or2			yields %2 %1 %2 %1
 pat dup
 kills ALL
-uses dereg={const2,$1}
+gen lxi de,{const2,$1}
-gen Call {label,".dup"}
+    Call {label,".dup"}
 pat dus $1==2
 with dereg
@ -1975,8 +1977,8 @@ with src1or2 src1or2			yields %1 %2
 pat exg defined($1)
 kills ALL
-uses dereg={const2,1}
+gen lxi de,{const2,$1}
-gen Call {label,".exg"}
+    Call {label,".exg"}
 pat fil
 uses hlreg={label,$1}
--- a/mach/i86/ncg/table
+++ b/mach/i86/ncg/table
@ -2292,7 +2292,7 @@ with CXREG REG REG
      rcl %3,{ANYCON,1}
      adc %2,{ANYCON,0}
      loop {label, 2b}
-      1:
+      1:				yields %3 %2
 pat loc ror $1==1 && $2==2
 with REG
@ -2311,7 +2311,7 @@ with CXREG REG REG
      rcl %3,{ANYCON,1}
      adc %2,{ANYCON,0}
      loop {label, 2b}
-      1:
+      1:				yields %3 %2
 /*******************************************************************
 *  Group 10 : Set Instructions                                    *
--- a/mach/powerpc/as/mach2.c
+++ b/mach/powerpc/as/mach2.c
@ -47,11 +47,15 @@
 %token <y_word> OP_FRT_FRB_C
 %token <y_word> OP_FRT_RA_D
 %token <y_word> OP_FRT_RA_RB
 %token <y_word> OP_L
 %token <y_word> OP_LEV
 %token <y_word> OP_LIA
 %token <y_word> OP_LIL
 %token <y_word> OP_LI32
 %token <y_word> OP_RA_RB
 %token <y_word> OP_RA_RB_TH
 %token <y_word> OP_RA_RS_C
 %token <y_word> OP_RA_RS_RA_C
 %token <y_word> OP_RA_RS_RB_C
 %token <y_word> OP_RA_RS_RB_MB5_ME5_C
 %token <y_word> OP_RA_RS_RB_MB6_C
@ -61,14 +65,14 @@
 %token <y_word> OP_RA_RS_SH6_MB6_C
 %token <y_word> OP_RA_RS_UI
 %token <y_word> OP_RA_RS_UI_CC
 %token <y_word> OP_RS
 %token <y_word> OP_RS_FXM
 %token <y_word> OP_RS_RA
 %token <y_word> OP_RS_RA_D
 %token <y_word> OP_RS_RA_DS
 %token <y_word> OP_RS_RA_NB
 %token <y_word> OP_RS_RA_RB
-%token <y_word> OP_RS_RA_RB_C
+%token <y_word> OP_RS_RA_RB_CC
 %token <y_word> OP_RS_RA_RA_C
 %token <y_word> OP_RS_RB
 %token <y_word> OP_RS_SPR
 %token <y_word> OP_RS_SR
@ -104,4 +108,5 @@
 %type <y_word> c
 %type <y_word> e16 negate16 u8 u7 u6 u5 u4 u2 u1
-%type <y_word> opt_bh cr_opt nb ds bda bdl lia lil spr_num
+%type <y_word> opt_bh cr_opt nb ds bda bdl lia lil
 %type <y_word> spr_num tbr_num opt_tbr
--- a/mach/powerpc/as/mach3.c
+++ b/mach/powerpc/as/mach3.c
@ -103,6 +103,10 @@
 0,     OP_HA,                0,                                        "ha16",
 0,     OP_LO,                 0,                                       "lo16",
 /* The next page numbers are from PowerPC User Instruction Set
 * Architecture, Book I, Version 2.01.
 */
 /* Branch processor instructions (page 20) */
 0,     OP_LIL,                18<<26 | 0<<1 | 0<<0,                    "b",
@ -128,7 +132,7 @@
 0,     OP_BT_BA_BB,           19<<26 | 417<<1,                         "crorc",
 0,     OP_BF_BFA,             19<<26 | 0<<1,                           "mcrf",
-/* extended mnemonics for bc, bcctr, bclr */
+/* extended mnemonics for bc, bcctr, bclr (page 144) */
 0,     OP_BH,       19<<26 | 20<<21 | 528<<1 | 0<<0,            "bctr",
 0,     OP_BH,       19<<26 | 20<<21 | 528<<1 | 1<<0,            "bctrl",
 0,     OP_BDL,      16<<26 | 16<<21 | 0<<1 | 0<<0,              "bdnz",
@ -186,7 +190,7 @@
 0,     OP_BI_BH,    19<<26 | 12<<21 | 16<<1 | 0<<0,             "btlr",
 0,     OP_BI_BH,    19<<26 | 12<<21 | 16<<1 | 1<<0,             "btlrl",
-/* extended m with condition in BI */
+/* extended m with condition in BI (page 146) */
 0,     OP_BICR_BDL,  16<<26 | 12<<21 | 2<<16 | 0<<1 | 0<<0,     "beq",
 0,     OP_BICR_BDA,  16<<26 | 12<<21 | 2<<16 | 1<<1 | 0<<0,     "beqa",
 0,     OP_BICR_BH,   19<<26 | 12<<21 | 2<<16 | 528<<1 | 0<<0,   "beqctr",
@ -284,7 +288,7 @@
 0,     OP_BICR_BH,   19<<26 | 12<<21 | 3<<16 | 16<<1 | 0<<0,    "bunlr",
 0,     OP_BICR_BH,   19<<26 | 12<<21 | 3<<16 | 16<<1 | 1<<0,    "bunlrl",
-/* extended m for cr logic */
+/* extended m for cr logic (page 147) */
 0,     OP_BT_BT_BT,  19<<26 | 289<<1,                           "crset",
 0,     OP_BT_BT_BT,  19<<26 | 193<<1,                           "crclr",
 0,     OP_BT_BA_BA,  19<<26 | 449<<1,                           "crmove",
@ -377,12 +381,12 @@
 0,     OP_RT_RA_C,            31<<26 | 0<<10 | 104<<1,                 "neg",
 0,     OP_RT_RA_C,            31<<26 | 1<<10 | 104<<1,                 "nego",
-/* extended m for addition */
+/* extended m for addition (pages 153, 154) */
 0,     OP_RT_RA_D,            14<<26,                       "la",
 0,     OP_RT_SI,              14<<26 | 0<<16,               "li",
 0,     OP_RT_SI,              15<<26 | 0<<16,               "lis",
-/* extended m for subtraction */
+/* extended m for subtraction (pages 147, 148) */
 0,     OP_RT_RB_RA_C,         31<<26 | 0<<10 | 40<<1,       "sub",
 0,     OP_RT_RB_RA_C,         31<<26 | 1<<10 | 40<<1,       "subo",
 0,     OP_RT_RB_RA_C,         31<<26 | 0<<10 | 8<<1,        "subc",
@ -418,7 +422,7 @@
 0,     OP_BF_L_RA_UI,         10<<26,                                  "cmpli",
 0,     OP_BF_L_RA_RB,         31<<26 | 32<<1,                          "cmpl",
-/* extended m for comparison */
+/* extended m for comparison (page 149) */
 0,     OP_BF_RA_SI,           11<<26 | 1<<21,               "cmpdi",
 0,     OP_BF_RA_RB,           31<<26 | 1<<21 | 0<<1,        "cmpd",
 0,     OP_BF_RA_UI,           10<<26 | 1<<21,               "cmpldi",
@ -434,7 +438,7 @@
 0,     OP_TO_RA_RB,           31<<26 | 68<<1,                          "td",
 0,     OP_TO_RA_RB,           31<<26 | 4<<1,                           "tw",
-/* extended m for traps */
+/* extended m for traps (page 150) */
 0,     OP_TOX_RA_RB,          31<<26 | 4<<21 | 68<<1,       "tdeq",
 0,     OP_TOX_RA_SI,          2<<26 | 4<<21,                "tdeqi",
 0,     OP_TOX_RA_RB,          31<<26 | 12<<21 | 68<<1,      "tdge",
@ -518,11 +522,10 @@
 0,     OP_RA_RS_C,            31<<26 | 58<<1,                          "cntlzd",
 0,     OP_RA_RS_C,            31<<26 | 26<<1,                          "cntlzw",
-/* extended m using logic */
+/* extended m using logic (pages 153, 154) */
-0,     OP_RS_RA_RA_C,         31<<26 | 444<<1,              "mr",
+0,     OP_RA_RS_RA_C,         31<<26 | 444<<1,              "mr",
 0,     OP,                    24<<26,                       "nop",
-0,     OP_RS_RA_RA_C,         31<<26 | 124<<1,              "not",
+0,     OP_RA_RS_RA_C,         31<<26 | 124<<1,              "not",
 0,     OP,                    26<<26,                       "xnop",
 /* page 69 */
 0,     OP_RA_RS_SH6_MB6_C,     30<<26 | 0<<2,               "rldicl",
@ -535,7 +538,7 @@
 0,     OP_RA_RS_SH6_MB6_C,     30<<26 | 3<<2,               "rldimi",
 0,     OP_RA_RS_SH5_MB5_ME5_C, 20<<26,                      "rlwimi",
-/* extended m for doubleword rotation */
+/* extended m for doubleword rotation (page 151) */
 0,     OP_clrlsldi,           30<<26 | 2<<2,                "clrlsldi",
 0,     OP_clrldi,             30<<26 | 0<<2,                "clrldi",
 0,     OP_clrrdi,             30<<26 | 1<<2,                "clrrdi",
@ -548,7 +551,7 @@
 0,     OP_sldi,               30<<26 | 1<<2,                "sldi",
 0,     OP_srdi,               30<<26 | 0<<2,                "srdi",
-/* extended m for word rotation */
+/* extended m for word rotation (page 152) */
 0,     OP_clrlslwi,           21<<26,                       "clrlslwi",
 0,     OP_clrlwi,             21<<26,                       "clrlwi",
 0,     OP_clrrwi,             21<<26,                       "clrrwi",
@ -573,21 +576,25 @@
 0,     OP_RA_RS_RB_C,         31<<26 | 792<<1,              "sraw",
 /* page 78 */
-0,     OP_RS_SPR,             31<<26 | 467<<1,                         "mtspr",
+0,     OP_RS_SPR,             31<<26 | 467<<1,              "mtspr",
-0,     OP_RT_SPR,             31<<26 | 339<<1,                         "mfspr",
+0,     OP_RT_SPR,             31<<26 | 339<<1,              "mfspr",
-0,     OP_RS_FXM,             31<<26 | 0<<21 | 144<<1,                 "mtcrf",
+0,     OP_RS_FXM,             31<<26 | 0<<20 | 144<<1,      "mtcrf",
-0,     OP_RT,                 31<<26 | 0<<21 | 19<<1,                  "mfcr",
+0,     OP_RT,                 31<<26 | 0<<20 | 19<<1,       "mfcr",
-/* extended m for special purpose registers */
+/* extended m for special purpose registers (page 153) */
 0,     OP_RT,       31<<26 | 9<<16 | 0<<11 | 339<<1,        "mfctr",
 0,     OP_RT,       31<<26 | 8<<16 | 0<<11 | 339<<1,        "mflr",
 0,     OP_RT,       31<<26 | 1<<16 | 0<<11 | 339<<1,        "mfxer",
-0,     OP_RT,       31<<26 | 9<<16 | 0<<11 | 467<<1,        "mtctr",
+0,     OP_RS,       31<<26 | 9<<16 | 0<<11 | 467<<1,        "mtctr",
-0,     OP_RT,       31<<26 | 8<<16 | 0<<11 | 467<<1,        "mtlr",
+0,     OP_RS,       31<<26 | 8<<16 | 0<<11 | 467<<1,        "mtlr",
-0,     OP_RT,       31<<26 | 1<<16 | 0<<11 | 467<<1,        "mtxer",
+0,     OP_RS,       31<<26 | 1<<16 | 0<<11 | 467<<1,        "mtxer",
 /* extended m for condition register (page 154) */
 0,     OP_RS,       31<<26 | 0<<20 | 255<<12 | 144<<1,      "mtcr",
 /* Floating point instructions (page 83) */
 /* page 98 */
 0,     OP_FRT_RA_D,           48<<26,                                  "lfs",
 0,     OP_FRT_RA_RB,          31<<26 | 535<<1,                         "lfsx",
 0,     OP_FRT_RA_D,           49<<26,                                  "lfsu",
@ -606,6 +613,7 @@
 0,     OP_FRS_RA_RB,          31<<26 | 759<<1,                         "stfdux",
 0,     OP_FRS_RA_RB,          31<<26 | 983<<1,                         "stfiwx",
 /* page 104 */
 0,     OP_FRT_FRB_C,          63<<26 | 72<<1,                          "fmr",
 0,     OP_FRT_FRB_C,          63<<26 | 40<<1,                          "fneg",
 0,     OP_FRT_FRB_C,          63<<26 | 264<<1,                         "fabs",
@ -629,6 +637,7 @@
 0,     OP_FRT_FRA_FRC_FRB_C,  63<<26 | 30<<1,                          "fnmsub",
 0,     OP_FRT_FRA_FRC_FRB_C,  59<<26 | 30<<1,                          "fnmsubs",
 /* page 109 */
 0,     OP_FRT_FRB_C,          63<<26 | 12<<1,                          "frsp",
 0,     OP_FRT_FRB_C,          63<<26 | 814<<1,                         "fctid",
 0,     OP_FRT_FRB_C,          63<<26 | 815<<1,                         "fctidz",
@ -652,4 +661,31 @@
 0,     OP_FRT_FRB_C,          63<<26 | 26<<1,                          "frsqrte",
 0,     OP_FRT_FRA_FRC_FRB_C,  63<<26 | 23<<1,                          "fsel",
-/* page 98 */
+/* Storage control instructions (Book II, page 15) */
 /* Book II, page 17 */
 0,     OP_RA_RB,              31<<26 | 982<<1,              "icbi",
 0,     OP_RA_RB_TH /* page 35 */,          31<<26 | 278<<1, "dcbt",
 0,     OP_RA_RB,              31<<26 | 246<<1,              "dcbtst",
 0,     OP_RA_RB,              31<<26 | 1014<<1,             "dcbz",
 0,     OP_RA_RB,              31<<26 | 54<<1,               "dcbst",
 0,     OP_RA_RB,              31<<26 | 86<<1,               "dcbf",
 0,     OP,                    19<<26 | 150<<1,              "isync",
 0,     OP_RT_RA_RB,           31<<26 | 20<<1,               "lwarx",
 0,     OP_RT_RA_RB,           31<<26 | 84<<1,               "ldarx",
 0,     OP_RS_RA_RB_CC,        31<<26 | 150<<1 | 1<<0,       "stwcx",
 0,     OP_RS_RA_RB_CC,        31<<26 | 150<<1 | 1<<0,       "stdcx",
 0,     OP_L,                  31<<26 | 598<<1,              "sync",
 0,     OP,                    31<<26 | 1<<21 | 598<<1,      "lwsync",
 0,     OP,                    31<<26 | 2<<21 | 598<<1,      "ptesync",
 0,     OP,                    31<<26 | 854<<1,              "eieio",
 /* Time base (Book II, page 30) */
 0,     OP_RT_TBR,   31<<26 | 371<<1,                        "mftb",
 0,     OP_RT,       31<<26 | 8<<11 | 13<<16 | 371<<1,       "mftbu",
 /* External control (Book II, page 33) */
 0,     OP_RT_RA_RB,           31<<26 | 310<<1,              "eciwx",
 0,     OP_RS_RA_RB,           31<<26 | 438<<1,              "ecowx",
--- a/mach/powerpc/as/mach4.c
+++ b/mach/powerpc/as/mach4.c
@ -42,7 +42,23 @@ operation
 	| OP_FRT_RA_D          FPR ',' e16 '(' GPR ')'    { emit_hl($1 | ($2<<21) | ($6<<16) | $4); }
 	| OP_FRT_RA_RB         FPR ',' GPR ',' GPR        { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); }
 	| OP_FRT_C             c FPR                      { emit4($1 | $2 | ($3<<21)); }
-	| OP_RA_RS_C           c GPR ',' GPR              { emit4($1 | $2 | ($5<<21) | ($3<<16)); }
+	| OP_L                              { emit4($1); }
 	| OP_L                 u2           { emit4($1 | ($2<<21)); }
 	| OP_LEV                            { emit4($1); }
 	| OP_LEV               u7           { emit4($1 | ($2<<5)); }
 	| OP_RA_RB             GPR ',' GPR
 	{ emit4($1 | ($2<<16) | ($4<<11)); }
 	| OP_RA_RB_TH          GPR ',' GPR opt_bh
 	{ emit4($1 | $5 | ($2<<16) | ($4<<11)); }
 	/*
 	 * For instructions with "mnemonic RS, RA, ..."
 	 * OP_RA_RS_... swaps RS and RA to (RA<<21) || (RS<<16)
 	 * OP_RS_RA_... keeps RS and RA as (RS<<21) || (RA<<16)
 	 */
 	| OP_RA_RS_C           c GPR ',' GPR
 	{ emit4($1 | $2 | ($5<<21) | ($3<<16)); }
 	| OP_RA_RS_RA_C        c GPR ',' GPR
 	{ emit4($1 | $2 | ($5<<21) | ($3<<16) | ($5<<11)); }
 	| OP_RA_RS_RB_C        c GPR ',' GPR ',' GPR
 	{ emit4($1 | $2 | ($5<<21) | ($3<<16) | ($7<<11)); }
 	| OP_RA_RS_RB_MB5_ME5_C c GPR ',' GPR ',' GPR ',' u5 ',' u5
@ -75,20 +91,19 @@ operation
 	| OP_RT_RB_RA_C        c GPR ',' GPR ',' GPR      { emit4($1 | $2 | ($3<<21) | ($7<<16) | ($5<<11)); }
 	| OP_RT_SI             GPR ',' e16                { emit_hl($1 | ($2<<21) | $4); }
 	| OP_RT_SPR            GPR ',' spr_num            { emit4($1 | ($2<<21) | ($4<<11)); }
 	| OP_RT_TBR            GPR opt_tbr                { emit4($1 | ($2<<21) | ($3<<11)); }
 	| OP_RS                GPR                        { emit4($1 | ($2<<21)); }
 	| OP_RS_FXM            u7 ',' GPR                 { emit4($1 | ($4<<21) | ($2<<12)); }
 	| OP_RS_RA_D           GPR ',' e16 '(' GPR ')'    { emit_hl($1 | ($2<<21) | ($6<<16) | $4); }
 	| OP_RS_RA_DS          GPR ',' ds '(' GPR ')'     { emit_hl($1 | ($2<<21) | ($6<<16) | $4); }
 	| OP_RS_RA_NB          GPR ',' GPR ',' nb         { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); }
 	| OP_RS_RA_RB          GPR ',' GPR ',' GPR        { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); }
-	| OP_RS_RA_RB_C        c GPR ',' GPR ',' GPR      { emit4($1 | $2 | ($5<<21) | ($3<<16) | ($7<<11)); }
+	| OP_RS_RA_RB_CC       C GPR ',' GPR ',' GPR      { emit4($1 | ($3<<21) | ($5<<16) | ($7<<11)); }
 	| OP_RS_RA_RA_C        c GPR ',' GPR              { emit4($1 | $2 | ($5<<21) | ($3<<16) | ($5<<11)); }
 	| OP_RS_SPR            spr_num ',' GPR            { emit4($1 | ($4<<21) | ($2<<11)); }
 	| OP_TO_RA_RB          u5 ',' GPR ',' GPR         { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); }
 	| OP_TO_RA_SI          u5 ',' GPR ',' e16         { emit_hl($1 | ($2<<21) | ($4<<16) | $6); }
 	| OP_TOX_RA_RB         GPR ',' GPR                { emit4($1 | ($2<<16) | ($4<<11)); }
 	| OP_TOX_RA_SI         GPR ',' e16                { emit_hl($1 | ($2<<16) | $4); }
 	| OP_LEV                                          { emit4($1); }
 	| OP_LEV               u7                         { emit4($1 | ($2<<5)); }
 	| OP_LIA               lia                        { emit4($1 | $2); }
 	| OP_LIL               lil                        { emit4($1 | $2); }
 	| OP_LI32              li32                       /* emitted in subrule */
@ -298,7 +313,7 @@ u2
 	}
 	;
-/* Optional comma, branch hint. */
+/* Optional comma, branch hint (or touch hint). */
 opt_bh
 	: /* nothing */         { $$ = 0; }
 	| ',' u2                { $$ = ($2<<11); }
@ -409,13 +424,28 @@ lia
 	}
 	;
 /*
 * Instructions "mfspr", "mtspr", and "mftb" encode the 10-bit special
 * purpose register (spr) or time base register (tbr) by swapping the
 * low 5 bits with the high 5 bits.  The value from an SPR token has
 * already been swapped.
 */
 spr_num
-	: SPR { $$ = $1; }
+	: SPR     { $$ = $1; }
-	| absexp
+	| tbr_num { $$ = $1; }
 	;
 opt_tbr
 	: /* nothing */         { $$ = 8 | (12<<5); }
 	| ',' tbr_num           { $$ = $2; }
 	;
 tbr_num
 	: absexp
 	{
 		if (($1 < 0) || ($1 > 0x3ff))
-			serror("spr number out of range");
+			serror("10-bit unsigned value out of range");
 		/* mfspr, mtspr swap the low and high 5 bits */
 		$$ = ($1 >> 5) | (($1 & 0x1f) << 5);
 	}
 	;
--- a/mach/powerpc/libem/aar4.s
+++ b/mach/powerpc/libem/aar4.s
@ -8,21 +8,17 @@
 .define .aar4
 .aar4:
 	lis r0, hi16[.trap_earray]
 	ori r0, r0, lo16[.trap_earray]
 	mtspr ctr, r0            ! load CTR with trap address
 	lwz r4, 0(sp)            ! r4 = address of descriptor
 	lwz r5, 4(sp)            ! r5 = index
 	lwz r6, 8(sp)            ! r6 = address of array
 	lwz r0, 0(r4)
 	subf. r5, r0, r5         ! subtract lower bound from index
-	bltctr                   ! check lower bound
+	blt .trap_earray         ! check lower bound
 	lwz r0, 4(r4)
 	cmplw r5, r0
-	bgtctr                   ! check upper bound
+	bgt .trap_earray         ! check upper bound
 	lwz r3, 8(r4)            ! r3 = size of element
 	mullw r5, r5, r3         ! scale index by size
@ -30,3 +26,7 @@
 	stw r6, 8(sp)            ! push address of element
 	addi sp, sp, 8
 	blr
 .trap_earray:
 	li r3, 0                 ! EARRAY = 0 in h/em_abs.h
 	b .trp
--- a/mach/powerpc/libem/bls4.s
+++ b/mach/powerpc/libem/bls4.s
@ -0,0 +1,19 @@
 .sect .text
 ! Does a block move of words between non-overlapping buffers.
 !  Stack: ( src dst len -- )
 .define .bls4
 .bls4:
 	lwz	r3, 0(sp)	! len
 	lwz	r4, 4(sp)	! dst
 	lwz	r5, 8(sp)	! src
 	addi	sp, sp, 12
 	srwi	r3, r3, 2
 	mtspr	ctr, r3
 	addi	r5, r5, -4
 	addi	r4, r4, -4
 1:	lwzu	r3, 4(r5)
 	stwu	r3, 4(r4)
 	bdnz	1b
 	blr
--- a/mach/powerpc/libem/build.lua
+++ b/mach/powerpc/libem/build.lua
@ -6,7 +6,7 @@ for _, plat in ipairs(vars.plats) do
 	acklibrary {
 		name = "lib_"..plat,
 		srcs = {
-			"./*.s", -- rm ret.s
+			"./*.s", -- dus4.s
 		},
 		vars = { plat = plat },
 		deps = {
@ -15,4 +15,3 @@ for _, plat in ipairs(vars.plats) do
 		}
 	}
 end
--- a/mach/powerpc/libem/cfu8.s
+++ b/mach/powerpc/libem/cfu8.s
@ -1,3 +1,5 @@
 .sect .text; .sect .rom; .sect .data; .sect .bss
 .sect .text
 ! Converts a 64-bit double into a 32-bit unsigned integer.
@ -6,32 +8,42 @@
 .define .cfu8
 .cfu8:
-	lis r3, ha16[.fd_00000000]
+	lfd f1, 0(sp)                   ! f1 = value to convert
-	lfd f0, lo16[.fd_00000000](r3) ! f0 = 0.0
+	lis r3, ha16[.fs_80000000]
-
+	lfs f2, lo16[.fs_80000000](r3)  ! f2 = 2**31
-	lfd f1, 0(sp)            ! value to be converted
+	fsub   f1, f1, f2
-
+	fctiwz f1, f1         ! convert value - 2**31
-	lis r3, ha16[.fd_FFFFFFFF]
+	stfd   f1, 0(sp)
-	lfd f3, lo16[.fd_FFFFFFFF](r3) ! f3 = 0xFFFFFFFF
+	lwz   r3, 4(sp)
-
+	xoris r3, r3, 0x8000  ! add 2**31
-	lis r3, ha16[.fd_80000000]
+	stw   r3, 4(sp)
-	lfd f4, lo16[.fd_80000000](r3) ! f4 = 0x80000000
+	addi  sp, sp, 4
 	fsel f2, f1, f1, f0
 	fsub f5, f3, f1
 	fsel f2, f5, f2, f3
 	fsub f5, f2, f4
 	fcmpu cr0, f2, f4
 	fsel f2, f5, f5, f2
 	fctiwz f2, f2
 	stfd f2, 0(sp)
 	addi sp, sp, 4
 	bltlr
 	lwz r3, 0(sp)
 	xoris r3, r3, 0x8000
 	stw r3, 0(sp)
 	blr
 .sect .rom
 .fs_80000000:
 	!float 2.147483648e+9 sz 4
 	.data1 0117,00,00,00
 ! Freescale and IBM provide an example using fsel to select value or
 ! value - 2**31 for fctiwz.  The following code adapts Freescale's
 ! _Programming Environments Manual for 32-Bit Implementations of the
 ! PowerPC Architecture_, section C.3.2, pdf page 557.
 !
 ! Given f2 = value clamped from 0 to 2**32 - 1, f4 = 2**31, then
 !	fsub	f5, f2, f4
 !	fcmpu	cr2, f2, f4
 !	fsel	f2, f5, f5, f2
 !	fctiwz	f2, f2
 !	stfdu	f2, 0(sp)
 !	lwz	r3, 4(sp)
 !	blt	cr2, 1f
 !	xoris	r3, r3, 0x8000
 ! 1: yields r3 = the converted value.
 !
 ! Debian's clang 3.5.0-10 and gcc 4.9.2-10 don't clamp the value
 ! before conversion.  They avoid fsel and use the conditional branch
 ! to pick between 2 fctwiz instructions.
 !
 ! PowerPC 601 lacks fsel (but kernel might trap and emulate fsel).
 ! PowerPC 603, 604, G3, G4, G5 have fsel.
--- a/mach/powerpc/libem/csa.s
+++ b/mach/powerpc/libem/csa.s
@ -13,22 +13,21 @@
 	lwz r4, 4(sp)
 	addi sp, sp, 8
-	lwz r5, 0(r3)            ! load default
+	lwz r5, 0(r3)            ! r5 = default target
 	mtspr ctr, r5
-	lwz r5, 4(r3)            ! fetch lower bound
+	lwz r6, 4(r3)            ! fetch lower bound
-	subf. r4, r5, r4         ! adjust value
+	subf. r4, r6, r4         ! adjust value
-	bltctr                   ! jump to default if out of range
+	blt 1f                   ! jump to default if out of range
-	lwz r5, 8(r3)            ! fetch range
+	lwz r6, 8(r3)            ! fetch range
-	cmplw r4, r5
+	cmplw r4, r6
-	bgtctr                   ! jump to default if out of range
+	bgt 1f                   ! jump to default if out of range
 	addi r3, r3, 12          ! skip header
 	slwi r4, r4, 2           ! scale value (<<2)
-	lwzx r5, r3, r4          ! load target
+	lwzx r5, r3, r4          ! r5 = new target
 	mtspr ctr, r5
-	or. r5, r5, r5           ! test it
+1:	mtspr ctr, r5
 	mr. r5, r5               ! test it
 	bnectr                   ! jump to target if non-zero
 	b .trap_ecase            ! otherwise trap
--- a/mach/powerpc/libem/csb.s
+++ b/mach/powerpc/libem/csb.s
@ -13,23 +13,20 @@
 	lwz r4, 4(sp)
 	addi sp, sp, 8
-	lwz r5, 0(r3)            ! load default
+	lwz r5, 0(r3)            ! r5 = default target
 	mtspr ctr, r5
 	lwz r6, 4(r3)            ! fetch count
-
+	mr. r6, r6               ! skip loop if count is zero
-1:
+	beq 3f                   !   (needed by Modula-2 "CASE i OF END")
-	or. r6, r6, r6           ! test count
+	mtspr ctr, r6
-	beqctr                   ! exit if zero
+1:	lwzu r7, 8(r3)           ! fetch target index, increment pointer
 	addi r6, r6, -1          ! otherwise decrement
 	lwzu r7, 8(r3)           ! fetch target index, increment pointer
 	cmpw r4, r7              ! compare with value
-	bne 1b                   ! if not equal, go again
+	beq 2f
 	bdnz 1b                  ! if not equal, go again
 	b 3f
-	lwz r7, 4(r3)            ! fetch target address
+2:	lwz r5, 4(r3)            ! r5 = new target
-	mtspr ctr, r7
+3:	mtspr ctr, r5
-
+	mr. r5, r5               ! test target
 	or. r7, r7, r7           ! test it
 	bnectr                   ! jump to target if non-zero
 	b .trap_ecase            ! otherwise trap
--- a/mach/powerpc/libem/dus4.s
+++ b/mach/powerpc/libem/dus4.s
@ -0,0 +1,16 @@
 .sect .text
 ! Duplicates some words on top of stack.
 !  Stack: ( a size -- a a )
 .define .dus4
 .dus4:
 	lwz	r3, 0(sp)
 	addi	sp, sp, 4
 	srwi	r4, r3, 2
 	mtspr	ctr, r4
 	add	r5, sp, r3
 1:	lwzu	r4, -4(r5)
 	stwu	r4, -4(sp)
 	bdnz	1b
 	blr
--- a/mach/powerpc/libem/exg.s
+++ b/mach/powerpc/libem/exg.s
@ -0,0 +1,22 @@
 .sect .text
 ! Exchange top two values on stack.
 !   Stack: ( a b size -- b a )
 .define .exg
 .exg:
 	lwz	r3, 0(sp)		! r3 = size
 	srwi	r7, r3, 2
 	mtspr	ctr, r7			! ctr = size / 4
 	mr	r4, sp			! r4 = pointer before value b
 	add	r5, r4, r3		! r5 = pointer before value a
 	! Loop to swap each pair of words.
 1:	lwzu	r6, 4(r4)
 	lwzu	r7, 4(r5)
 	stw	r6, 0(r5)
 	stw	r7, 0(r4)
 	bdnz	1b			! loop ctr times
 	addi	sp, sp, 4		! drop size from stack
 	blr
--- a/mach/powerpc/libem/fd_80000000.s
+++ b/mach/powerpc/libem/fd_80000000.s
@ -1,10 +0,0 @@
 .sect .text; .sect .rom; .sect .data; .sect .bss
 .sect .rom
 ! Contains a handy double-precision 0x80000000.
 .define .fd_80000000
 .fd_80000000:
 	!float 2.147483648e+9 sz 8
 	.data1 0101,0340,00,00,00,00,00,00
--- a/mach/powerpc/libem/fd_FFFFFFFF.s
+++ b/mach/powerpc/libem/fd_FFFFFFFF.s
@ -1,10 +0,0 @@
 .sect .text; .sect .rom; .sect .data; .sect .bss
 .sect .rom
 ! Contains a handy double-precision 0xFFFFFFFF.
 .define .fd_FFFFFFFF
 .fd_FFFFFFFF:
 	!float 4.294967295e+9 sz 8
 	.data1 0101,0357,0377,0377,0377,0340,00,00
--- a/mach/powerpc/libem/fef4.s
+++ b/mach/powerpc/libem/fef4.s
@ -0,0 +1,48 @@
 .sect .text
 ! Split a single-precision float into fraction and exponent, like
 ! frexpf(3) in C, http://en.cppreference.com/w/c/numeric/math/frexp
 !
 ! Stack: ( single -- fraction exponent )
 .define .fef4
 .fef4:
 	lwz r3, 0(sp)			! r3 = word of float bits
 	! IEEE single = sign * 1.fraction * 2**(exponent - 127)
 	!   sign  exponent  fraction
 	!   0     1..8      9..31
 	!
 	! IEEE exponent = 126 in [0.5, 1) or (-1, -0.5].
 	extrwi. r6, r3, 8, 1		! r6 = IEEE exponent
 	beq 3f				! jump if zero or denormalized
 	cmpwi r6, 255
 	addi r5, r6, -126		! r5 = our exponent
 	beq 2f				! jump if infinity or NaN
 	! fall through if normalized
 	! Put fraction in [0.5, 1) or (-1, -0.5].
 1:	li r6, 126
 	insrwi r3, r6, 8, 1		! IEEE exponent = 126
 	! fall through
 2:	stw r3, 0(sp)			! push fraction
 	stwu r5, -4(sp)			! push exponent
 	blr
 	! Got denormalized number or zero, probably zero.
 	! If zero, then exponent must also be zero.
 3:	extrwi. r6, r3, 23, 9		! r6 = fraction
 	bne 4f				! jump if not zero
 	li r5, 0			! exponent = 0
 	b 2b
 	! Got denormalized number = 0.fraction * 2**-126
 4:	cntlzw r5, r6
 	addi r5, r5, -8
 	slw r6, r6, r5			! shift left to make 1.fraction
 	insrwi r3, r6, 23, 9		! set new fraction
 	li r6, -126 + 1
 	subf r5, r5, r6			! r5 = our exponent
 	b 1b
--- a/mach/powerpc/libem/fef8.s
+++ b/mach/powerpc/libem/fef8.s
@ -3,7 +3,7 @@
 .sect .text
 ! Split a double-precision float into fraction and exponent, like
-! frexp(3) in C.
+! frexp(3) in C, http://en.cppreference.com/w/c/numeric/math/frexp
 !
 ! Stack: ( double -- fraction exponent )
@ -12,42 +12,41 @@
 	lwz r3, 0(sp)			! r3 = high word (bits 0..31)
 	lwz r4, 4(sp)			! r4 = low word (bits 32..63)
-	! IEEE double-precision format:
+	! IEEE double = sign * 1.fraction * 2**(exponent - 1023)
 	!   sign  exponent  fraction
 	!   0     1..11     12..63
 	!
-	! To get fraction in [0.5, 1) or (-1, -0.5], we subtract 1022
+	! IEEE exponent = 1022 in [0.5, 1) or (-1, -0.5].
 	! from the IEEE exponent.
 	extrwi. r6, r3, 11, 1		! r6 = IEEE exponent
-	addi r5, r6, -1022		! r5 = our exponent
+	beq 3f				! jump if zero or denormalized
 	beq 2f				! jump if zero or denormalized
 	cmpwi r6, 2047
-	beq 1f				! jump if infinity or NaN
+	addi r5, r6, -1022		! r5 = our exponent
 	beq 2f				! jump if infinity or NaN
 	! fall through if normalized
-	! Put fraction in [0.5, 1) or (-1, -0.5] by setting its
+	! Put fraction in [0.5, 1) or (-1, -0.5].
-	! IEEE exponent to 1022.
+1:	li r6, 1022
-	rlwinm r3, r3, 0, 12, 0		! clear old exponent
+	insrwi r3, r6, 11, 1		! IEEE exponent = 1022
 	oris r3, r3, 1022 << 4		! set new exponent
 	! fall through
-1:	stw r3, 0(sp)
+2:	stw r3, 0(sp)
 	stw r4, 4(sp)			! push fraction
 	stwu r5, -4(sp)			! push exponent
 	blr
-2:	! Got denormalized number or zero, probably zero.
+	! Got denormalized number or zero, probably zero.
-	extrwi r6, r3, 22, 12
+	! If zero, then exponent must also be zero.
 3:	extrwi r6, r3, 20, 12
 	or. r6, r6, r4			! r6 = high|low fraction
-	bne 3f				! jump if not zero
+	bne 4f				! jump if not zero
 	li r5, 0			! exponent = 0
-	b 1b
+	b 2b
-3:	! Got denormalized number, not zero.
+	! Got denormalized number = 0.fraction * 2**-1022
-	lfd f0, 0(sp)
+4:	lfd f0, 0(sp)
-	lis r6, ha16[_2_64]
+	lis r6, ha16[.fs_2_64]
-	lfd f1, lo16[_2_64](r6)
+	lfs f1, lo16[.fs_2_64](r6)
 	fmul f0, f0, f1			! multiply it by 2**64
 	stfd f0, 0(sp)
 	lwz r3, 0(sp)
@ -57,7 +56,6 @@
 	b 1b
 .sect .rom
-_2_64:
+.fs_2_64:
-	! (double) 2**64
+	!float 1.84467440737095516e+19 sz 4
-	.data4 0x43f00000
+	.data1 0137,0200,00,00
 	.data4 0x00000000
--- a/mach/powerpc/libem/fif4.s
+++ b/mach/powerpc/libem/fif4.s
@ -0,0 +1,64 @@
 .sect .text
 ! Multiplies two single-precision floats, then splits the product into
 ! fraction and integer, both as floats, like modff(3) in C,
 ! http://en.cppreference.com/w/c/numeric/math/modf
 !
 ! Stack: ( a b -- fraction integer )
 .define .fif4
 .fif4:
 	lfs f1, 4(sp)
 	lfs f2, 0(sp)
 	fmuls f1, f1, f2		! f1 = a * b
 	stfs f1, 0(sp)
 	lwz r3, 0(sp)			! r3 = word of float bits
 	! IEEE single = sign * 1.fraction * 2**(exponent - 127)
 	!   sign  exponent  fraction
 	!   0     1..8      9..31
 	!
 	! Subtract 127 from the IEEE exponent.  If the result is from
 	! 0 to 23, then the IEEE fraction has that many integer bits.
 	extrwi r5, r3, 8, 1		! r5 = IEEE exponent
 	addic. r5, r5, -127		! r5 = nr of integer bits
 	blt 3f				! branch if no integer
 	cmpwi r5, 24
 	bge 4f				! branch if no fraction
 	! fall through if integer with fraction
 	! f1 has r5 = 0 to 23 integer bits in the IEEE fraction.
 	! There are 23 - r5 fraction bits.
 	li r6, 23
 	subf r6, r5, r6
 	srw r3, r3, r6
 	slw r3, r3, r6			! clear fraction in word
 	! fall through
 1:	stw r3, 0(sp)
 	lfs f2, 0(sp)			! integer = high word, low word
 	fsubs f1, f1, f2		! fraction = value - integer
 2:	stfs f1, 4(sp)			! push fraction
 	stfs f2, 0(sp)			! push integer
 	blr
 	! f1 is a fraction without integer (or zero).
 	! Then integer is zero with same sign.
 3:	extlwi r3, r3, 1, 0		! extract sign bit
 	stfs f1, 4(sp)			! push fraction
 	stw r3, 0(sp)			! push integer = zero with sign
 	blr
 	! f1 is an integer without fraction (or infinity or NaN).
 	! Unless NaN, then fraction is zero with same sign.
 4:	fcmpu cr0, f1, f1
 	bun cr0, 5f
 	extlwi r3, r3, 1, 0		! extract sign bit
 	stw r3, 4(sp)			! push fraction = zero with sign
 	stfs f1, 0(sp)			! push integer
 	blr
 	! f1 is NaN, so both fraction and integer are NaN.
 5:	fmr f2, f1
 	b 2b
--- a/mach/powerpc/libem/fif8.s
+++ b/mach/powerpc/libem/fif8.s
@ -1,7 +1,8 @@
 .sect .text
 ! Multiplies two double-precision floats, then splits the product into
-! fraction and integer, like modf(3) in C.  On entry:
+! fraction and integer, both as floats, like modf(3) in C,
 ! http://en.cppreference.com/w/c/numeric/math/modf
 !
 ! Stack: ( a b -- fraction integer )
@ -14,20 +15,18 @@
 	lwz r3, 0(sp)			! r3 = high word
 	lwz r4, 4(sp)			! r4 = low word
-	! IEEE double-precision format:
+	! IEEE double = sign * 1.fraction * 2**(exponent - 1023)
 	!   sign  exponent  fraction
 	!   0     1..11     12..63
 	!
 	! Subtract 1023 from the IEEE exponent.  If the result is from
 	! 0 to 51, then the IEEE fraction has that many integer bits.
 	! (IEEE has an implicit 1 before its fraction.  If the IEEE
 	! fraction has 0 integer bits, we still have an integer.)
 	extrwi r5, r3, 11, 1		! r5 = IEEE exponent
 	addic. r5, r5, -1023		! r5 = nr of integer bits
-	blt 4f				! branch if no integer
+	blt 3f				! branch if no integer
 	cmpwi r5, 52
-	bge 5f				! branch if no fraction
+	bge 4f				! branch if no fraction
 	cmpwi r5, 21
 	bge 6f				! branch if large integer
 	! fall through if small integer
@ -44,22 +43,38 @@
 1:	stw r3, 0(sp)
 	stw r4, 4(sp)
 	lfd f2, 0(sp)			! integer = high word, low word
-2:	fsub f1, f1, f2			! fraction = value - integer
+	fsub f1, f1, f2			! fraction = value - integer
-3:	stfd f1, 8(sp)			! push fraction
+2:	stfd f1, 8(sp)			! push fraction
 	stfd f2, 0(sp)			! push integer
 	blr
-4:	! f1 is a fraction without integer.
+	! f1 is a fraction without integer (or zero).
-	fsub f2, f1, f1			! integer = zero
+	! Then integer is zero with same sign.
-	b 3b
+3:	extlwi r3, r3, 1, 0		! extract sign bit
 	li r4, 0
 	stfd f1, 8(sp)			! push fraction
 	stw r4, 4(sp)
 	stw r3, 0(sp)			! push integer = zero with sign
 	blr
-5:	! f1 is an integer without fraction (or infinity or NaN).
+	! f1 is an integer without fraction (or infinity or NaN).
-	fmr f2, f1			! integer = f1
+	! Unless NaN, then fraction is zero with same sign.
 4:	fcmpu cr0, f1, f1		! integer = f1
 	bun cr0, 5f
 	extlwi r3, r3, 1, 0		! extract sign bit
 	li r4, 0
 	stw r4, 12(sp)
 	stw r3, 8(sp)			! push fraction = zero with sign
 	stfd f1, 0(sp)			! push integer
 	blr
 	! f1 is NaN, so both fraction and integer are NaN.
 5:	fmr f2, f1
 	b 2b
-6:	! f1 has r5 = 21 to 51 to integer bits.
+	! f1 has r5 = 21 to 51 to integer bits.
 	! Low word has 52 - r5 fraction bits.
-	li r6, 52
+6:	li r6, 52
 	subf r6, r5, r6
 	srw r4, r4, r6
 	slw r4, r4, r6			! clear fraction in low word
--- a/mach/powerpc/libem/inn.s
+++ b/mach/powerpc/libem/inn.s
@ -5,6 +5,9 @@
 /* Tests a bit in a bitset on the stack.
 *
 * Stack: ( bitset bitnum setsize -- bool )
 *
 * Some back ends push false if bitnum is too large.  We don't because
 * the compilers tend to pass a small enough bitnum.
 */
 .define .inn
--- a/mach/powerpc/libem/rck.s
+++ b/mach/powerpc/libem/rck.s
@ -2,6 +2,9 @@
 ! Bounds check. Traps if the value is out of range.
 !  Stack: ( value descriptor -- value )
 !
 ! This ".rck" only works with 4-byte integers.  The name is ".rck" and
 ! not ".rck4" because many back ends only do rck with the word size.
 .define .rck
 .rck:
@ -18,3 +21,7 @@
    bgt .trap_erange
    blr
 .trap_erange:
    li r3, 1       ! ERANGE = 1 in h/em_abs.h
    b .trp
--- a/mach/powerpc/libem/set.s
+++ b/mach/powerpc/libem/set.s
@ -2,6 +2,9 @@
 ! Create singleton set.
 !  Stack: ( bitnumber size -- set )
 !
 ! Some back ends trap ESET if bitnumber is out of range.  We don't
 ! because the compilers tend to pass a valid bitnumber.
 .define .set
 .set:
--- a/mach/powerpc/libem/trp.s
+++ b/mach/powerpc/libem/trp.s
@ -0,0 +1,56 @@
 .sect .text
 .define .trap_ecase
 .trap_ecase:
 	li	r3, 20			! ECASE = 20 in h/em_abs.h
 	! FALLTHROUGH to .trp
 ! Raises an EM trap.
 ! Expects r3 = trap number.
 .define .trp
 .trp:
 	cmplwi	r3, 15			! traps > 15 can't be ignored
 	bgt	1f
 	lis	r4, ha16[.ignmask]
 	lwz	r4, lo16[.ignmask](r4)	! load ignore mask
 	srw	r4, r4, r3
 	andi.	r4, r4, 1
 	bnelr				! return if ignoring trap
 1:	lis	r4, ha16[.trppc]
 	lwz	r5, lo16[.trppc](r4)	! r5 = user trap routine
 	mr.	r5, r5
 	beq	2f			! if no user trap routine, bail out
 	mtspr	ctr, r5
 	mfspr	r6, lr
 	li	r0, 0
 	stwu	r3, -8(sp)		! push trap number
 	stw	r0, lo16[.trppc](r4)	! reset trap routine
 	stw	r6, 4(sp)		! save old lr
 	bctrl				! call trap routine
 	lwz	r0, 4(sp)
 	mtspr	lr, r0
 	addi	sp, sp, 8		! retract over stack usage
 	blr
 2:	! No trap handler.  Write error message, exit.
 	li	r3, 2
 	stwu	r3, -12(sp)
 	lis	r4, ha16[message]
 	addi	r4, r4, lo16[message]
 	li	r5, 6
 	stw	r4, 4(sp)
 	stw	r5, 8(sp)
 	bl	_write			! write(2, message, 6)
 	li	r3, 1
 	stw	r3, 0(sp)
 	bl	__exit			! _exit(1)
 .sect .rom
 message:
 	.ascii "TRAP!\n"
--- a/mach/powerpc/mcg/table
+++ b/mach/powerpc/mcg/table
@ -241,6 +241,9 @@ PATTERNS
    out:(int)reg = ANY.I
        cost 1;
    out:(long)reg = ANY.L
        cost 1;
    out:(int)reg = COPYF.I(in:(float)reg)
        emit "stfsu %in, -4(sp)"
        emit "lwz %out, 0(sp)"
@ -306,10 +309,21 @@ PATTERNS
 		emit "lwz %out, %addr"
 		cost 4;
 #if 0
    /* FIXME: Doesn't work because %out.0 and %addr might share a
     * register, so it corrupts %addr before it loads %out.1. */
    out:(long)reg = LOAD.L(addr:address)
        emit "lwz %out.0, 4+%addr"
        emit "lwz %out.1, 0+%addr"
        cost 8;
 #else
    /* Works, but costs an extra instruction. */
    out:(long)reg = LOAD.L(addr:address)
        emit "la %out.1, %addr"
        emit "lwz %out.0, 4(%out.1)"
        emit "lwz %out.1, 0(%out.1)"
        cost 12;
 #endif
 	out:(int)ushort0 = LOADH.I(addr:address)
 		emit "lhz %out, %addr"
@ -566,6 +580,13 @@ PATTERNS
        emit "! COMPARESI.I(cr, 0)"
        cost 4;
    cr:(cr)cr = COMPAREUL.I(left:(long)reg, right:(long)reg)
        emit "cmpl %cr, 0, %left.1, %right.1"
        emit "bne 1f"
        emit "cmpl %cr, 0, %left.0, %right.0"
        emit "1:"
        cost 12;
 /* Booleans */
--- a/mach/powerpc/ncg/mach.c
+++ b/mach/powerpc/ncg/mach.c
@ -10,8 +10,13 @@
 #include <limits.h>
 #include <stdint.h>
 #include <stb.h>
 static int writing_stabs = 0;
 #ifdef REGVARS
 static long framesize;
 #endif
 void
 con_part(int sz, word w)
@ -51,32 +56,42 @@ con_mult(word sz)
 #define FL_MSB_AT_LOW_ADDRESS	1
 #include <con_float>
 static void
 emit_prolog(void)
 {
 	fprintf(codefile, "mfspr r0, lr\n");
 	fprintf(codefile, "addi sp, sp, %ld\n", -framesize - 8);
 	fprintf(codefile, "stw fp, %ld(sp)\n", framesize);
 	fprintf(codefile, "stw r0, %ld(sp)\n", framesize + 4);
 	fprintf(codefile, "addi fp, sp, %ld\n", framesize);
 }
 void
 prolog(full nlocals)
 {
-	framesize = nlocals;
+	/*
 	 * For N_LSYM and N_PSYM stabs, we want gdb to use fp, not sp.
 	 * The trick is to use "stwu sp, _(sp)" then "addi fp, sp, 0"
 	 * before we save lr with "stw r0, _(sp)".
 	 *
 	 * Tried with Apple's gdb-696.  Refer to
 	 *  - gdb-696/src/gdb/rs6000-tdep.c, skip_prologue(), line 1101
 	 *  - gdb-696/src/gdb/macosx/ppc-macosx-frameinfo.c,
 	 *    ppc_parse_instructions(), line 717
 	 * https://opensource.apple.com/release/developer-tools-25.html
 	 */
 	fprintf(codefile, "mfspr r0, lr\n");
 	if (writing_stabs) {
 		fprintf(codefile, "stwu sp, -8(sp)\n");  /* for gdb */
 		fprintf(codefile, "stw fp, 0(sp)\n");
 	} else
 		fprintf(codefile, "stwu fp, -8(sp)\n");
 	fprintf(codefile, "addi fp, sp, 0\n");           /* for gdb */
 	fprintf(codefile, "stw r0, 4(sp)\n");
 #ifdef REGVARS
-	/* f_regsave() will call emit_prolog() */
+	framesize = nlocals;
 	/* regsave() increases framesize; f_regsave() adjusts sp. */
 #else
-	emit_prolog();
+	if (nlocals)
 		fprintf(codefile, "addi sp, sp, %ld\n", -nlocals);
 #endif
 }
 void
 mes(word type)
 {
-	int argt ;
+	int argt, a1, a2 ;
 	switch ( (int)type ) {
 	case ms_ext :
@ -91,6 +106,41 @@ mes(word type)
 				break ;
 			}
 		}
 	case ms_stb:
 		argt = getarg(str_ptyp | cst_ptyp);
 		if (argt == sp_cstx)
 			fputs(".symb \"\", ", codefile);
 		else {
 			fprintf(codefile, ".symb \"%s\", ", str);
 			argt = getarg(cst_ptyp);
 		}
 		a1 = argval;
 		argt = getarg(cst_ptyp);
 		a2 = argval;
 		argt = getarg(cst_ptyp|nof_ptyp|sof_ptyp|ilb_ptyp|pro_ptyp);
 		if (a1 == N_PSYM) {
 			/* Change offset from AB into offset from
 			   the frame pointer.
 			*/
 			argval += 8;
 		}
 		fprintf(codefile, "%s, 0x%x, %d\n", strarg(argt), a1, a2);
 		argt = getarg(end_ptyp);
 		break;
 	case ms_std:
 		writing_stabs = 1;  /* set by first "mes 13,...,100,0" */
 		argt = getarg(str_ptyp | cst_ptyp);
 		if (argt == sp_cstx)
 			str[0] = '\0';
 		else {
 			argt = getarg(cst_ptyp);
 		}
 		swtxt();
 		fprintf(codefile, ".symd \"%s\", 0x%x,", str, (int) argval);
 		argt = getarg(cst_ptyp);
 		fprintf(codefile, "%d\n", (int) argval);
 		argt = getarg(end_ptyp);
 		break;
 	default :
 		while ( getarg(any_ptyp) != sp_cend ) ;
 		break ;
@ -196,7 +246,7 @@ saveloadregs(const char* ops, const char* opm, const char *opf)
 	for (reg = 31; reg >= 0; reg--) {
 		if (savedf[reg] != LONG_MIN) {
 			offset -= 8;
-			fprintf(codefile, "%s f%d, %ld(fp)\n",
+			fprintf(codefile, "%s f%d,%ld(fp)\n",
 				opf, reg, offset);
 		}
 	}
@ -213,7 +263,7 @@ saveloadregs(const char* ops, const char* opm, const char *opf)
 		while (reg > 0 && savedi[reg - 1] != LONG_MIN)
 			reg--;
 		offset -= (32 - reg) * 4;
-		fprintf(codefile, "%s r%d, %ld(fp)\n", opm, reg, offset);
+		fprintf(codefile, "%s r%d,%ld(fp)\n", opm, reg, offset);
 	} else
 		reg = 32;
@ -221,7 +271,7 @@ saveloadregs(const char* ops, const char* opm, const char *opf)
 	for (reg--; reg >= 0; reg--) {
 		if (savedi[reg] != LONG_MIN) {
 			offset -= 4;
-			fprintf(codefile, "%s r%d, %ld(fp)\n",
+			fprintf(codefile, "%s r%d,%ld(fp)\n",
 				ops, reg, offset);
 		}
 	}
@ -232,7 +282,8 @@ f_regsave(void)
 {
 	int reg;
-	emit_prolog();
+	if (framesize)
 		fprintf(codefile, "addi sp, sp, %ld\n", -framesize);
 	saveloadregs("stw", "stmw", "stfd");
 	/*
--- a/mach/powerpc/ncg/table
+++ b/mach/powerpc/ncg/table
--- a/mach/powerpc/top/table
+++ b/mach/powerpc/top/table
@ -1,12 +1,14 @@
-/* PowerPC desciptor table for ACK target optimizer */
+/* PowerPC table for ACK target optimizer */
-MAXOP 3;
+MAXOP 5;
 LABEL_STARTER '.';
 %%;
 L1, L2, L3, L4, L5  { not_using_sp(VAL) };
 RNZ                 { strcmp(VAL, "r0") };  /* not r0 */
 UP                  { positive(VAL) };
 X, Y, Z             { TRUE };
 %%;
@ -16,10 +18,74 @@ X, Y, Z             { TRUE };
 addi  RNZ, RNZ, 0            -> ;
 addis RNZ, RNZ, 0            -> ;
 addi RNZ, RNZ, X : addi RNZ, RNZ, Y { plus(X, Y, Z) }
                             -> addi RNZ, RNZ, Z ;
 /* Lower "addi sp, sp, UP" by lifting other instructions, looking for
 * chances to merge or delete _addi_ instructions, and assuming that
 * the code generator uses "sp" not "r1".
 */
 addi sp, sp, UP : ANY L1                 { lift(ANY) }
                             -> ANY L1                 : addi sp, sp, UP ;
 addi sp, sp, UP : ANY L1, L2             { lift(ANY) }
                             -> ANY L1, L2             : addi sp, sp, UP ;
 addi sp, sp, UP : ANY L1, L2, L3         { lift(ANY) }
                             -> ANY L1, L2, L3         : addi sp, sp, UP ;
 addi sp, sp, UP : ANY L1, L2, L3, L4     { lift(ANY) }
                             -> ANY L1, L2, L3, L4     : addi sp, sp, UP ;
 addi sp, sp, UP : ANY L1, L2, L3, L4, L5 { lift(ANY) }
                             -> ANY L1, L2, L3, L4, L5 : addi sp, sp, UP ;
 addi sp, sp, UP : lmw Y, L1 { Y[0]=='r' && atoi(Y+1)>1 }
                             -> lmw Y, L1 : addi sp, sp, UP ;
 /* Merge _addi_ when popping from the stack. */
 addi sp, sp, X : lwz L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' }
                             -> lwz L1, Z(sp) : addi sp, sp, X ;
 addi sp, sp, X : lfs L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' }
                             -> lfs L1, Z(sp) : addi sp, sp, X ;
 addi sp, sp, X : lfd L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' }
                             -> lfd L1, Z(sp) : addi sp, sp, X ;
 /* Lower or delete _addi_ when pushing to the stack. */
 addi sp, sp, X : stwu  L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' }
                             -> stw  L1, Z(sp) : addi sp, sp, Z ;
 addi sp, sp, X : stfsu L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' }
                             -> stfs L1, Z(sp) : addi sp, sp, Z ;
 addi sp, sp, X : stfdu L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' }
                             -> stfd L1, Z(sp) : addi sp, sp, Z ;
 addi sp, sp, 4 : stfdu L1, -8(sp) -> stfdu L1, -4(sp) ;
 /* Delete _addi_ when setting the stack pointer. */
 addi sp, sp, X : addi sp, L1, Y   -> addi sp, L1, Y ;
 addi sp, sp, X : lwz sp, L1       -> lwz sp, L1 ;
 or X, Y, Y                   -> mr X, Y ;
 or. X, Y, Y                  -> mr. X, Y ;
 mr X, X                      -> ;
 fmr X, X                     -> ;
-or X, Y, Z : or. X, X, X     -> or. X, Y, Z ;
+add X, Y, Z   : mr. X, X     -> add. X, Y, Z ;
 and X, Y, Z   : mr. X, X     -> and. X, Y, Z ;
 andc X, Y, Z  : mr. X, X     -> andc. X, Y, Z ;
 divw X, Y, Z  : mr. X, X     -> divw. X, Y, Z ;
 divwu X, Y, Z : mr. X, X     -> divwu. X, Y, Z ;
 extsb X, Y, Z : mr. X, X     -> extsb. X, Y, Z ;
 extsh X, Y, Z : mr. X, X     -> extsh. X, Y, Z ;
 eqv X, Y, Z   : mr. X, X     -> eqv. X, Y, Z ;
 mullw X, Y, Z : mr. X, X     -> mullw. X, Y, Z ;
 nand X, Y, Z  : mr. X, X     -> nand. X, Y, Z ;
 nor X, Y, Z   : mr. X, X     -> nor. X, Y, Z ;
 or X, Y, Z    : mr. X, X     -> or. X, Y, Z ;
 orc X, Y, Z   : mr. X, X     -> orc. X, Y, Z ;
 slw X, Y, Z   : mr. X, X     -> slw. X, Y, Z ;
 slwi X, Y, Z  : mr. X, X     -> slwi. X, Y, Z ;
 subf X, Y, Z  : mr. X, X     -> subf. X, Y, Z ;
 sraw X, Y, Z  : mr. X, X     -> sraw. X, Y, Z ;
 srawi X, Y, Z : mr. X, X     -> srawi. X, Y, Z ;
 srw X, Y, Z   : mr. X, X     -> srw. X, Y, Z ;
 srwi X, Y, Z  : mr. X, X     -> srwi. X, Y, Z ;
 xor X, Y, Z   : mr. X, X     -> xor. X, Y, Z ;
 b X : labdef X               -> labdef X ;
@ -27,3 +93,98 @@ b X : labdef X               -> labdef X ;
 /* LT=0, GT=1, EQ=2, OV=3 */
 %%;
 /* Is it a word character? 0-9A-Za-z_ */
 static int isword(char c) {
 	return
 	    (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') ||
 	    (c >= 'a' && c <= 'z') || (c == '_');
 }
 /* Does operand _s_ not use the stack pointer? */
 int not_using_sp(const char *s) {
 	int boundary;
 	boundary = 1;
 	while (*s) {
 		if (boundary &&
 		    ((s[0]=='s' && s[1]=='p') || (s[0]=='r' && s[1]=='1')) &&
 		    !isword(s[2]))
 			return 0;
 		boundary = !isword(*s);
 		s++;
 	}
 	return 1;
 }
 int positive(const char *s) {
 	long n;
 	char *end;
 	n = strtol(s, &end, 10);
 	return *s != '\0' && *end == '\0' && n > 0;
 }
 /* Instructions to lift(), sorted in strcmp() order.  These are from
 * ../ncg/table, minus branch instructions.
 */
 const char *liftables[] = {
 	"add", "add.", "addi",
 	"and", "andc", "andi.", "andis.",
 	"cmp", "cmpi", "cmpl", "cmpli",
 	"cmplw", "cmplwi", "cmpw", "cmpwi",
 	"divw", "divwu", "eqv", "extlwi", "extrwi", "extsb", "extsh",
 	"fadd", "fadds", "fcmpo", "fctiwz", "fdiv", "fdivs",
 	"fmr", "fmul", "fmuls", "fneg", "frsp", "fsub", "fsubs",
 	"lbz", "lbzx",
 	"lfd", "lfdu", "lfdx", "lfs", "lfsu", "lfsx",
 	"lha", "lhax", "lhz", "lhzx",
 	"li", "lis", "lwz", "lwzu", "lwzx",
 	"mfcr", "mfspr", "mr", "mr.", "mtspr", "mullw",
 	"nand", "neg", "nor", "or", "or.", "ori", "oris",
 	"rlwinm", "rlwnm", "rotlwi", "rotrwi",
 	"slw", "slwi", "sraw", "srawi", "srw", "srwi",
 	"stb", "stbx",
 	"stfd", "stfdu", "stfdx", "stfs", "stfsu", "stfsx",
 	"sth", "sthx", "stw", "stwx", "stwu",
 	"subf", "xor", "xori", "xoris",
 };
 static int liftcmp(const void *a, const void *b) {
 	return strcmp(*(const char **)a, *(const char **)b);
 }
 /* May we lift instruction _s_ above "addi SP, SP, X"? */
 int lift(const char *s) {
 	return bsearch(&s, liftables,
 	    sizeof(liftables) / sizeof(liftables[0]),
 	    sizeof(liftables[0]), liftcmp);
 }
 /* Does it fit a signed 16-bit integer? */
 static int fits16(long l) {
 	return l >= -32768 && l <= 32767;
 }
 /* Tries sum = a + b with signed 16-bit integers. */
 int plus(const char *a, const char *b, const char *sum)
 {
 	long la, lb, lsum;
 	char *end;
 	la = strtol(a, &end, 10);
 	if (*a == '\0' || *end != '\0' || !fits16(la))
 		return 0;
 	lb = strtol(b, &end, 10);
 	if (*b == '\0' || *end != '\0' || !fits16(lb))
 		return 0;
 	lsum = la + lb;
 	if (!fits16(lsum))
 		return 0;
 	snprintf(sum, 7, "%ld", lsum);
 	return 1;
 }
--- a/mach/proto/mcg/main.c
+++ b/mach/proto/mcg/main.c
@ -42,13 +42,14 @@ int main(int argc, char* const argv[])
    const char* inputfilename = NULL;
    const char* outputfilename = NULL;
    FILE* output;
    int i;
    program_name = argv[0];
    opterr = 1;
    for (;;)
    {
-        int c = getopt(argc, argv, "-d:D:C:o:");
+        int c = getopt(argc, argv, "d:D:C:o:");
        if (c == -1)
            break;
@ -79,18 +80,20 @@ int main(int argc, char* const argv[])
                    fatal("already specified an output file");
                outputfilename = optarg;
                break;
            case 1:
                if (inputfilename)
                    fatal("unexpected argument '%s'", optarg);
                inputfilename = optarg;
        }
    }
    for (i = optind; i < argc; i++)
    {
        if (inputfilename)
            fatal("unexpected argument '%s'", argv[i]);
        inputfilename = argv[i];
    }
    symbol_init();
-	if (!EM_open((char*) inputfilename))
+    if (!EM_open((char*) inputfilename))
-		fatal("couldn't open input '%s': %s",
+        fatal("couldn't open input '%s': %s",
            inputfilename ? inputfilename : "<stdin>", EM_error);
    if (outputfilename)
--- a/mach/proto/mcg/treebuilder.c
+++ b/mach/proto/mcg/treebuilder.c
@ -274,7 +274,7 @@ static struct ir* store(int size, struct ir* address, int offset, struct ir* val
    else
        opcode = IR_STORE;
-    if (offset > 0)
+    if (offset != 0)
        address = new_ir2(
            IR_ADD, EM_pointersize,
            address, new_wordir(offset)
@ -304,7 +304,7 @@ static struct ir* load(int size, struct ir* address, int offset)
    else
        opcode = IR_LOAD;
-    if (offset > 0)
+    if (offset != 0)
        address = new_ir2(
            IR_ADD, EM_pointersize,
            address, new_wordir(offset)
@ -416,6 +416,31 @@ static void helper_function(const char* name)
    );
 }
 static void helper_function_with_arg(const char* name, struct ir* arg)
 {
    /* Abuses IR_SETRET to set a register to pass one argument to a
     * helper function.
     *
     * FIXME:  As of January 2018, mach/powerpc/libem takes an
     * argument in register r3 only for ".los4", ".sts4", ".trp".
     * This is an accident.  Should the argument be on the stack, or
     * should other helpers use a register? */
    materialise_stack();
    appendir(
        new_ir1(
            IR_SETRET, arg->size,
            arg
        )
    );
    appendir(
        new_ir1(
            IR_CALL, 0,
            new_labelir(name)
        )
    );
 }
 static void insn_simple(int opcode)
 {
    switch (opcode)
@ -437,6 +462,7 @@ static void insn_simple(int opcode)
        case op_cii: simple_convert(IR_FROMSI); break;
        case op_ciu: simple_convert(IR_FROMSI); break;
        case op_cui: simple_convert(IR_FROMUI); break;
        case op_cuu: simple_convert(IR_FROMUI); break;
        case op_cfu: simple_convert(IR_FROMUF); break;
        case op_cfi: simple_convert(IR_FROMSF); break;
        case op_cif: simple_convert(IR_FROMSI); break;
@ -496,10 +522,12 @@ static void insn_simple(int opcode)
        case op_lim:
        {
            /* Traps use only 16 bits of .ignmask, but we keep an
             * entire word, even if a word has more than 2 bytes. */
            push(
-                new_ir1(
+                load(
-                    (EM_wordsize == 2) ? IR_LOAD : IR_LOADH, EM_wordsize,
+                    EM_wordsize,
-                    new_labelir(".ignmask")
+                    new_labelir(".ignmask"), 0
                )
            );
            break;
@ -507,26 +535,34 @@ static void insn_simple(int opcode)
        case op_sim:
        {
            sequence_point();
            appendir(
-                new_ir2(
+                store(
-                    (EM_wordsize == 2) ? IR_STORE : IR_STOREH, EM_wordsize,
+                    EM_wordsize,
-                    new_labelir(".ignmask"),
+                    new_labelir(".ignmask"), 0,
                    pop(EM_wordsize)
                )
            );
            break;
        }
-        case op_trp: helper_function(".trp"); break;
+        case op_trp:
            helper_function_with_arg(".trp", pop(EM_wordsize));
            break;
        case op_sig:
        {
            struct ir* label = new_labelir(".trppc");
            struct ir* value = pop(EM_pointersize);
            push(
                load(
                    EM_pointersize,
                    label, 0
                )
            );
            appendir(
                store(
                    EM_pointersize,
-                    new_labelir(".trppc"), 0,
+                    label, 0,
                    value
                )
            );
@ -539,12 +575,13 @@ static void insn_simple(int opcode)
            break;
        }
-        /* FIXME: These instructions are really complex and barely used
+        case op_and: helper_function(".and"); break;
-         * (Modula-2 and Pascal set support, I believe). Leave them until
+        case op_ior: helper_function(".ior"); break;
-         * later. */
+        case op_xor: helper_function(".xor"); break;
-        case op_set: helper_function(".unimplemented_set"); break;
+        case op_com: helper_function(".com"); break;
-        case op_ior: helper_function(".unimplemented_ior"); break;
+        case op_cms: helper_function(".cms"); break;
-
+        case op_set: helper_function(".set"); break;
        case op_inn: helper_function(".inn"); break;
        case op_dch:
            push(
@ -670,6 +707,31 @@ static void simple_alu2(int opcode, int size, int irop, const char* fallback)
    }
 }
 static void rotate(int opcode, int size, int irop, int irop_reverse)
 {
    if (size > (2*EM_wordsize))
        fatal("treebuilder: can't do opcode %s with size %d", em_mnem[opcode - sp_fmnem], size);
    else
    {
        struct ir* right = pop(size);
        struct ir* left = pop(size);
        struct ir* bits = new_wordir(8 * size);
        /* a rol b -> (a << b) | (a >> (32 - b)) */
        push(
            new_ir2(
                IR_OR, size,
                new_ir2(irop, size, left, right),
                new_ir2(
                    irop_reverse, size,
                    left,
                    new_ir2(IR_SUB, size, bits, right)
                )
            )
        );
    }
 }
 static struct ir* extract_block_refs(struct basicblock* bb)
 {
    struct ir* outir = NULL;
@ -720,26 +782,28 @@ static struct ir* ptradd(struct ir* address, int offset)
        );
 }
-static void blockmove(struct ir* dest, struct ir* src, struct ir* size)
+static struct ir* walk_static_chain(int level)
 {
-    /* memmove stack: ( size src dest -- ) */
+    struct ir* ir;
    push(size);
    push(src);
    push(dest);
-    materialise_stack();
+    /* The static chain, when it exists, is the first argument of each
-    appendir(
+     * procedure.  The chain begins with the current frame at level 0,
-        new_ir1(
+     * and continues until we reach the outermost procedure. */
-            IR_CALL, 0,
+    ir = new_ir0(
-            new_labelir("memmove")
+        IR_GETFP, EM_pointersize
        )
    );
    appendir(
        new_ir1(
            IR_STACKADJUST, EM_pointersize,
            new_wordir(EM_pointersize*2 + EM_wordsize)
        )
    );
    while (level--)
    {
        /* Walk to the next frame pointer. */
        ir = load(
            EM_pointersize,
            new_ir1(
                IR_FPTOAB, EM_pointersize,
                ir
            ), 0
        );
    }
    return ir;
 }
 static void insn_ivalue(int opcode, arith value)
@ -765,8 +829,10 @@ static void insn_ivalue(int opcode, arith value)
        case op_and: simple_alu2(opcode, value, IR_AND, ".and"); break;
        case op_ior: simple_alu2(opcode, value, IR_OR, ".ior"); break;
-        case op_xor: simple_alu2(opcode, value, IR_EOR, NULL); break;
+        case op_xor: simple_alu2(opcode, value, IR_EOR, ".xor"); break;
        case op_com: simple_alu1(opcode, value, IR_NOT, ".com"); break;
        case op_rol: rotate(opcode, value, IR_LSL, IR_LSR); break;
        case op_ror: rotate(opcode, value, IR_LSR, IR_LSL); break;
        case op_adf: simple_alu2(opcode, value, IR_ADDF, NULL); break;
        case op_sbf: simple_alu2(opcode, value, IR_SUBF, NULL); break;
@ -774,12 +840,23 @@ static void insn_ivalue(int opcode, arith value)
        case op_dvf: simple_alu2(opcode, value, IR_DIVF, NULL); break;
        case op_ngf: simple_alu1(opcode, value, IR_NEGF, NULL); break;
-        case op_cmu: /* fall through */
+        case op_cms:
-        case op_cms: push(tristate_compare(value, IR_COMPAREUI)); break;
+            if (value > (2*EM_wordsize))
            {
                push(new_wordir(value));
                helper_function(".cms");
                break;
            }
            /* fall through */
        case op_cmu: push(tristate_compare(value, IR_COMPAREUI)); break;
        case op_cmi: push(tristate_compare(value, IR_COMPARESI)); break;
        case op_cmf: push(tristate_compare(value, IR_COMPAREF)); break;
-        case op_rck: helper_function(".rck"); break;
+        case op_rck:
            if (value != EM_wordsize)
                fatal("'rck %d' not supported", value);
            helper_function(".rck");
            break;
        case op_set: push(new_wordir(value)); helper_function(".set"); break;
        case op_inn: push(new_wordir(value)); helper_function(".inn"); break;
@ -930,26 +1007,24 @@ static void insn_ivalue(int opcode, arith value)
            if (value > (EM_wordsize*2))
            {
-                /* We're going to need to do multiple stores; fix the address
+                /* We're going to need to do multiple loads; fix the address
                 * so it'll go into a register and we can do maths on it. */
                appendir(ptr);
            }
            /* Stack grows down.  Load backwards. */
            while (value > 0)
            {
                int s = EM_wordsize*2;
                if (value < s)
                    s = value;
-
+                value -= s;
                push(
                    load(
                        s,
-                        ptr, offset
+                        ptr, value
                    )
                );
                value -= s;
                offset += s;
            }
            assert(value == 0);
@ -1099,7 +1174,12 @@ static void insn_ivalue(int opcode, arith value)
        case op_dup:
        {
            sequence_point();
-            if ((value == (EM_wordsize*2)) && (peek(0) == EM_wordsize) && (peek(1) == EM_wordsize))
+            if (value > (2*EM_wordsize))
            {
                push(new_wordir(value));
                helper_function(".dus4");
            }
            else if ((value == (EM_wordsize*2)) && (peek(0) == EM_wordsize) && (peek(1) == EM_wordsize))
            {
                struct ir* v1 = pop(EM_wordsize);
                struct ir* v2 = pop(EM_wordsize);
@ -1117,12 +1197,30 @@ static void insn_ivalue(int opcode, arith value)
            break;
        }
        case op_dus:
        {
            if (value != EM_wordsize)
                fatal("'dus %d' not supported", value);
            helper_function(".dus4");
            break;
        }
        case op_exg:
        {
-            struct ir* v1 = pop(value);
+            if (value > (2*EM_wordsize))
-            struct ir* v2 = pop(value);
+            {
-            push(v1);
+                push(
-            push(v2);
+                    new_wordir(value)
                );
                helper_function(".exg");
            }
            else
            {
                struct ir* v1 = pop(value);
                struct ir* v2 = pop(value);
                push(v1);
                push(v2);
            }
            break;
        }
@ -1285,53 +1383,19 @@ static void insn_ivalue(int opcode, arith value)
        }
        case op_lxl:
-        {
+            push(
-            struct ir* ir;
+                walk_static_chain(value)
            /* Walk the static chain. */
            ir = new_ir0(
                IR_GETFP, EM_pointersize
            );
            while (value--)
            {
                ir = new_ir1(
                    IR_CHAINFP, EM_pointersize,
                    ir
                );
            }
            push(ir);
            break;
        }
        case op_lxa:
        {
            struct ir* ir;
            /* Walk the static chain. */
            ir = new_ir0(
                IR_GETFP, EM_pointersize
            );
            while (value--)
            {
                ir = new_ir1(
                    IR_CHAINFP, EM_pointersize,
                    ir
                );
            }
            push(
                new_ir1(
                    IR_FPTOAB, EM_pointersize,
-                    ir
+                    walk_static_chain(value)
                )
            );
            break;
        }
        case op_fef:
        {
@ -1394,6 +1458,7 @@ static void insn_ivalue(int opcode, arith value)
                    break;
                case 1:
                    materialise_stack();
                    push(
                        appendir(
                            new_ir0(
@ -1403,10 +1468,6 @@ static void insn_ivalue(int opcode, arith value)
                    );
                    break;
                case 2:
                    helper_function(".unimplemented_lor_2");
                    break;
                default:
                    fatal("'lor %d' not supported", value);
            }
@ -1436,10 +1497,6 @@ static void insn_ivalue(int opcode, arith value)
                    );
                    break;
                case 2:
                    helper_function(".unimplemented_str_2");
                    break;
                default:
                    fatal("'str %d' not supported", value);
            }
@ -1448,100 +1505,27 @@ static void insn_ivalue(int opcode, arith value)
        }
        case op_blm:
-        {
+            push(new_wordir(value));
-            /* Input stack: ( src dest -- ) */
+            helper_function(".bls4");
            struct ir* dest = pop(EM_pointersize);
            struct ir* src = pop(EM_pointersize);
            blockmove(dest, src, new_wordir(value));
            break;
        }
        case op_bls:
-        {
+            if (value != EM_wordsize)
-            /* Input stack: ( src dest size -- ) */
+                fatal("'bls %d' not supported", value);
-            struct ir* dest = pop(EM_pointersize);
+            helper_function(".bls4");
            struct ir* src = pop(EM_pointersize);
            struct ir* size = pop(EM_wordsize);
            blockmove(dest, src, size);
            break;
        }
        case op_los:
-        {
+            if (value != EM_wordsize)
-            /* Copy an arbitrary amount to the stack. */
+                fatal("'los %d' not supported", value);
-            struct ir* bytes = pop(EM_wordsize);
+            helper_function_with_arg(".los4", pop(EM_wordsize));
            struct ir* address = pop(EM_pointersize);
            materialise_stack();
            appendir(
                new_ir1(
                    IR_STACKADJUST, EM_pointersize,
                    new_ir1(
                        IR_NEG, EM_wordsize,
                        bytes
                    )
                )
            );
            push(
                new_ir0(
                    IR_GETSP, EM_pointersize
                )
            );
            push(address);
            push(bytes);
            materialise_stack();
            appendir(
                new_ir1(
                    IR_CALL, 0,
                    new_labelir("memcpy")
                )
            );
            appendir(
                new_ir1(
                    IR_STACKADJUST, EM_pointersize,
                    new_wordir(EM_pointersize*2 + EM_wordsize)
                )
            );
            break;
        }
        case op_sts:
-        {
+            if (value != EM_wordsize)
-            /* Copy an arbitrary amount from the stack. */
+                fatal("'sts %d' not supported", value);
-            struct ir* bytes = pop(EM_wordsize);
+            helper_function_with_arg(".sts4", pop(EM_wordsize));
            struct ir* dest = pop(EM_pointersize);
            struct ir* src;
            materialise_stack();
            src = appendir(
                    new_ir0(
                        IR_GETSP, EM_pointersize
                    )
                );
            push(dest);
            push(src);
            push(bytes);
            materialise_stack();
            appendir(
                new_ir1(
                    IR_CALL, 0,
                    new_labelir("memcpy")
                )
            );
            appendir(
                new_ir1(
                    IR_STACKADJUST, EM_pointersize,
                    new_ir2(
                        IR_ADD, EM_wordsize,
                        new_wordir(EM_pointersize*2 + EM_wordsize),
                        bytes
                    )
                )
            );
            break;
        }
        case op_lin:
        {
@ -1677,17 +1661,17 @@ static void insn_lvalue(int opcode, const char* label, arith offset)
        case op_gto:
        {
-            struct ir* descriptor = pop(EM_pointersize);
+            struct ir* descriptor = address_of_external(label, offset);
            appendir(
                new_ir1(
-                    IR_SETSP, EM_pointersize,
+                    IR_SETFP, EM_pointersize,
                    load(EM_pointersize, descriptor, EM_pointersize*2)
                )
            );
            appendir(
                new_ir1(
-                    IR_SETFP, EM_pointersize,
+                    IR_SETSP, EM_pointersize,
                    load(EM_pointersize, descriptor, EM_pointersize*1)
                )
            );
--- a/mach/proto/ncg/subr.c
+++ b/mach/proto/ncg/subr.c
@ -518,7 +518,7 @@ int split(token_p tp, int *ip, int ply, int toplevel) {
 	int tpl;
 	for (cp=c2coercs;cp->c2_texpno>=0; cp++) {
-		if (!match(tp,&machsets[cp->c2_texpno],0))
+		if (!match(tp,&machsets[cp->c2_texpno],cp->c2_expr))
 			continue;
 		ok=1;
 		for (i=0; ok && i<cp->c2_nsplit;i++) {
--- a/man/powerpc_as.6
+++ b/man/powerpc_as.6
@ -1,33 +1,136 @@
-.TH POWERPC_AS 1
+.TH POWERPC_AS 1 2018-03-07
 .ad
 .SH NAME
 powerpc_as \- assembler for PowerPC
 .SH SYNOPSIS
 as [options] argument ...
 .SH DESCRIPTION
 This assembler is made with the general framework
 described in \fIuni_ass\fP(6).
-
+.PP
 It can assemble the instructions from Book I and Book II of PowerPC
 version 2.01.
 This includes the branch, integer, and floating point instructions
 from Book I; and the cache, synchronization, and time base
 instructions from Book II.
 .PP
 There is no support for other instructions, such as supervisor-mode
 instructions or vector instructions.
 There is some support for 64-bit integer instructions, but the
 assembler only has 32-bit symbols.
 .SH SYNTAX
-Most 32-bit integer and floating point instructions are supported, but not many
+.SS general purpose registers
-short form instructions. Instructions which take 16-bit operands can additionally
+There are 32 GPRs from \fBr0\fP to \fBr31\fP.
-use the following special functions:
+In this assembler, \fBsp\fP is an alias for \fBr1\fP, and \fBfp\fP is
-
+an alias for \fBr2\fP, because \fIack\fP uses r1 as the stack pointer
-.IP hi16[value], ha16[value]
+and r2 as the frame pointer.
-Returns the high half of the value of the expression; if the value is not absolute,
+Other compilers don't use r2 as the frame pointer.
-also generates the appropriate fixup. Use of either of these \fImust\fR be followed,
+.PP
-in the next instruction, by the corresponding use of \fBlo16[]\fR. Use \fBhi16[]\fR
+GPR syntax requires a register name, not a number.
-if the low half is going to interpret its payload as an unsigned value, and
+For example, \(oqaddi\ r5,\ r4,\ 1\(cq works, but
-\fBha16[]\fR if it will be interpreted as a signed value (so that the high half can
+\(oqaddi\ 5,\ 4,\ 1\(cq is a syntax error.
-be adjusted to match).
+.PP
-
+Certain instructions ignore the contents of \fBr0\fP and use zero.
-.IP lo16[]
+This happens when using r0 as the second operand of \fIaddi\fP or
-Returns the low half of the value of the expression. No fixup is generated. Use of
+\fIaddis\fP, or when addressing \(oqexpr(r0)\(cq or
-\fBlo16[]\fR must come in the instruction immediately after a use of \fBhi16[]\fR or
+\(oqr0,\ gpr\(cq.
-\fBha16[]\fR.
+The syntax is still the name r0, not the number 0.
-
+.SS floating point registers
 There are 32 FPRs from \fBf0\fP to \fBf31\fP.
 Each FPR has 64 bits and can hold a single-precision or
 double-precision number.
 FPR syntax requires a register name, not a number.
 .SS special purpose registers
 The three named SPRs are \fBctr\fP (count register), \fBlr\fP (link
 register), and \fBxer\fP (exception register).
 \(oqmfspr\(cq and \(oqmtspr\(cq allow these names or a number.
 .SS condition register
 There is a 32-bit condition register, where bit 0 is most significant,
 and bit 31 is least significant.
 This gets split into 8 registers of 4 bits each, from \fBcr0\fP (with
 bits 0 to 3) to \fBcr7\fP (with bits 28 to 31).
 Some instructions use the names cr0 to cr7, others use a bit numbered
 0 to 31, and others use all 32 bits.
 .SS addressing modes
 \(oqexpr(gpr)\(cq addresses \fIexpr\fP + the contents of \fIgpr\fP,
 except that \(oqexpr(r0)\(cq addresses \fIexpr\fP\ +\ 0.
 A few instructions, like \(oqstwu\(cq, also update \fIgpr\fP by
 setting it to the address.
 .PP
 \(oqgprA,\ gprB\(cq in certain instructions addresses the contents of
 \fIgprA\fP + the contents of \fIgprB\fP, except that \(oqr0,\ gprB\(cq
 addresses 0\ +\ the contents of \fIgprB\fP.
 .SS 16-bit operands
 Some instructions have a 16-bit operand.
 This can be a bare \fIexpr\fP (which must fit signed or unsigned
 16 bits), or it can be one of these special functions:
 .IP "hi16[expr], ha16[expr]"
 Returns the high half of the 32-bit value of the expression.
 If the low half is negative (from 0x8000 to 0xffff),
 then \fBha16[]\fP adjusts the high half by adding 1.
 Use \fBhi16[]\fP if the instruction with \fBlo16[]\fP is going to
 interpret its operand as an unsigned value, or \fBha16[]\fP if it will
 interpret it as signed.
 .IP
 If \fIexpr\fP is not absolute, then the assembler must generate a
 fixup for the linker.
 The fixup only works if the instruction is
 \(oqaddis gpr, r0, hx16[expr]\(cq or \(oqlis gpr, hx16[expr]\(cq.
 .IP lo16[expr]
 Returns the low half of the 32-bit value of the expression.
 .SS short forms
 Some instructions have short forms using extended mnemonics (or
 simplified mnemonics) like \fIli\fP, \fIsrwi\fP, and many others.
 .IP "li r6, 789"
 is short for: addi r6, r0, 789
 .IP "srwi r3, r4, 2"
 is short for: rlwinm r3, r4, 30, 2, 31
 .PP
 This assembler doesn't support extended mnemonics with branch
 prediction, such as \fIblt+\fP or \fIbne-\fP.
 It always parses \(oq+\(cq and \(oq-\(cq as operators,
 never as part of a mnemonic.
 .SH EXAMPLES
 There are two ways to load r3 with _symbol\ =\ 0x1234abcd.
 One way is
 .PP
 .nf
   lis  r3, hi16[_symbol]
   ori  r3, r3, lo16[_symbol]  ! r3 = 0x12340000 | 0x0000abcd
 .fi
 .PP
 The other way is
 .PP
 .nf
   lis  r3, ha16[_symbol]
   addi r3, r3, lo16[_symbol]  ! r3 = 0x12350000 + 0xffffabcd
 .fi
 .PP
 The next code adds 1 to a global variable.
 .PP
 .nf
   lis  r3, ha16[_var]
   lwz  r4, lo16[_var](r3)
   addi r4, r4, 1
   stw  r4, lo16[_var](r3)
 .fi
 .SH "SEE ALSO"
 uni_ass(6),
 ack(1)
 .PP
 Freescale Semiconductor, \fIProgramming Environments Manual for 32-Bit
 Implementations of the PowerPC Architecture\fP, Rev. 3, September 2005.
 .PP
 IBM, \fIPowerPC User Instruction Set Architecture, Book I\fP, Version
 2.01, September 2003.
 .PP
 IBM, \fIPowerPC Virtual Environment Architecture, Book II\fP, Version
 2.01, December 2003.
 .SH CAVEATS
 Beware that not every processor can run every instruction.
 The 32-bit processors can't run 64-bit instructions like \fIlwa\fP,
 \fIstd\fP, and \fIfctid\fP.
 The PowerPC 601 can't run \fIstfiwx\fP, nor \fIfres\fP, \fIfrsqrte\fP,
 \fIfsel\fP.
 Many models, like the PowerPC G4, can't run \fIfsqrt\fP nor
 \fIfsqrts\fP.
--- a/modules/src/em_code/insert.c
+++ b/modules/src/em_code/insert.c
@ -99,20 +99,19 @@ C_out_parts(pp)
 		}
 		else {
 			/* copy the chunk to output */
 #ifdef INCORE
 			register char *s = C_BASE + pp->pp_begin;
 			char *se = C_BASE + pp->pp_end;
 			while (s < se) {
 				put(*s++);
 			}
 #else
 			register long b = pp->pp_begin;
 			while (b < pp->pp_end) {
 #ifdef INCORE
 				/* C_BASE is not constant, put() may
 				   move C_BASE, so each iteration of
 				   this loop must read C_BASE again.
 				*/
 				put(C_BASE[b++]);
 #else
 				put(getbyte(b++));
 			}
 #endif
 			}
 		}
 		prev = pp;
 		pp = pp->pp_next;
--- a/modules/src/object/wr_ranlib.c
+++ b/modules/src/object/wr_ranlib.c
@ -10,16 +10,27 @@ wr_ranlib(fd, ran, cnt1)
 	struct ranlib	*ran;
 	long	cnt1;
 {
-	{
+	struct ranlib *r;
-		register long cnt = cnt1;
+	long cnt, val;
-		register struct ranlib *r = ran;
+	char *c;
 		register char *c = (char *) r;
-		while (cnt--) {
+	/*
-			put4(r->ran_off,c); c += 4;
+	 * We overwrite the structs in r with the bytes in c, so we
-			put4(r->ran_pos,c); c += 4;
+	 * don't need to allocate another buffer.
-			r++;
+	 *
-		}
+	 * put4(r->ran_off, c) can fail if r->ran_off and c overlap in
 	 * memory, if this is a big-endian machine.  It tries to swap
 	 * the bytes from big to little endian, but overwrites some
 	 * bytes before reading them.  To prevent this, we must copy
 	 * each value before we overwrite it.
 	 */
 	r = ran;
 	c = (char *)r;
 	cnt = cnt1;
 	while (cnt--) {
 		val = r->ran_off; put4(val, c); c += 4;
 		val = r->ran_pos; put4(val, c); c += 4;
 		r++;
 	}
 	wr_bytes(fd, (char *) ran, cnt1 * SZ_RAN);
 }
--- a/modules/src/print/doprnt.c
+++ b/modules/src/print/doprnt.c
@ -16,7 +16,7 @@
 	%d = int
 $ */
 void
-doprnt(File *fp, char *fmt, va_list argp)
+doprnt(File *fp, const char *fmt, va_list argp)
 {
 	char buf[SSIZE];
--- a/modules/src/print/format.c
+++ b/modules/src/print/format.c
@ -35,7 +35,7 @@ integral(int c)
 	%d = int
 $ */
 int
-_format(char *buf, char *fmt, va_list argp)
+_format(char *buf, const char *fmt, va_list argp)
 {
 	register char *pf = fmt;
 	register char *pb = buf;
--- a/modules/src/print/fprint.c
+++ b/modules/src/print/fprint.c
@ -17,7 +17,7 @@
 $ */
 /*VARARGS*/
 void
-fprint(File *fp, char *fmt, ...)
+fprint(File *fp, const char *fmt, ...)
 {
 	va_list args;
 	char buf[SSIZE];
--- a/modules/src/print/print.c
+++ b/modules/src/print/print.c
@ -17,7 +17,7 @@
 $ */
 /*VARARGS*/
 void
-print(char *fmt, ...)
+print(const char *fmt, ...)
 {
 	va_list args;
 	char buf[SSIZE];
--- a/modules/src/print/print.h
+++ b/modules/src/print/print.h
@ -9,10 +9,10 @@
 #include <stdarg.h>
-void print(char *fmt, ...);
+void print(const char *fmt, ...);
-void fprint(File *f, char *fmt, ...);
+void fprint(File *f, const char *fmt, ...);
-void doprnt(File *f, char *fmt, va_list ap);
+void doprnt(File *f, const char *fmt, va_list ap);
-int _format(char *buf, char *fmt, va_list ap);
+int _format(char *buf, const char *fmt, va_list ap);
-char *sprint(char *buf, char *fmt, ...);
+char *sprint(char *buf, const char *fmt, ...);
 #endif /* __PRINT_INCLUDED__ */
--- a/modules/src/print/sprint.c
+++ b/modules/src/print/sprint.c
@ -17,7 +17,7 @@
 $ */
 /*VARARGS*/
 char *
-sprint(char *buf, char *fmt, ...)
+sprint(char *buf, const char *fmt, ...)
 {
 	va_list args;
--- a/plat/linux/libsys/errno.s
+++ b/plat/linux/libsys/errno.s
@ -1,28 +0,0 @@
 #
 ! $Source: /cvsroot/tack/Ack/plat/linux386/libsys/errno.s,v $
 ! $State: Exp $
 ! $Revision: 1.1 $
 ! Declare segments (the order is important).
 .sect .text
 .sect .rom
 .sect .data
 .sect .bss
 #define D(e) .define e; e
 .sect .data
 ! Define various ACK error numbers. Note that these are *not* ANSI C
 ! errnos, and are used for different purposes.
 D(ERANGE)         = 1
 D(ESET)           = 2
 D(EIDIVZ)         = 6
 D(EHEAP)          = 17
 D(EILLINS)        = 18
 D(EODDZ)          = 19
 D(ECASE)          = 20
 D(EBADMON)        = 25
--- a/plat/linux/libsys/syscalls.h
+++ b/plat/linux/libsys/syscalls.h
@ -174,6 +174,12 @@
 #define __NR_mremap 163
 #define __NR_setresuid 164
 #define __NR_getresuid 165
 /*
 * i386, m68020, powerpc use different numbers after 165.
 * This file only has the numbers for i386.
 */
 #if defined(__i386)
 #define __NR_vm86 166
 #define __NR_query_module 167
 #define __NR_poll 168
@ -324,5 +330,6 @@
 #define concat(x, y) x##y
 #define MAPPED_SYSCALL(p, n) .define concat(p,n); concat(p,n): xor eax, eax; movb al, concat(__NR_,n); jmp __mapped_syscall
 #endif /* __i386 */
 #endif
--- a/plat/linux386/libsys/build.lua
+++ b/plat/linux386/libsys/build.lua
@ -6,6 +6,7 @@ acklibrary {
        "plat/linux/libsys/*.s",
    },
 	deps = {
 		"plat/linux/libsys/*.h",
 		"lang/cem/libcc.ansi/headers+headers",
 		"plat/linux386/include+headers",
 	},
--- a/plat/linux386/libsys/trapno.s
+++ b/plat/linux386/libsys/trapno.s
@ -0,0 +1,13 @@
 #define D(e) .define e; e
 ! Define various EM trap numbers needed by mach/i386/libem.
 ! Note that these are *not* ANSI C errnos.
 D(ERANGE)         = 1
 D(ESET)           = 2
 D(EIDIVZ)         = 6
 D(EHEAP)          = 17
 D(EILLINS)        = 18
 D(EODDZ)          = 19
 D(ECASE)          = 20
 D(EBADMON)        = 25
--- a/plat/linux68k/libsys/build.lua
+++ b/plat/linux68k/libsys/build.lua
@ -6,6 +6,7 @@ acklibrary {
        "plat/linux/libsys/*.s",
    },
 	deps = {
 		"plat/linux/libsys/*.h",
 		"lang/cem/libcc.ansi/headers+headers",
 		"plat/linux68k/include+headers",
 	},
--- a/plat/linuxppc/boot.s
+++ b/plat/linuxppc/boot.s
@ -32,7 +32,7 @@ begtext:
 	lwz r3, 0(sp)            ! r3 = argc
 	addi r4, sp, 4           ! r4 = argv
-	rlwinm r5, r3, 32-2, 2, 31 ! shift left 2 bits
+	srwi r5, r3, 2
 	add r5, r5, r4 
 	addi r5, r5, 8           ! r5 = env
--- a/plat/linuxppc/descr
+++ b/plat/linuxppc/descr
@ -19,7 +19,7 @@ var PLATFORM=linuxppc
 var PLATFORMDIR={EM}/share/ack/{PLATFORM}
 var CPP_F=-D__unix
 var ALIGN=-a0:4 -a1:4 -a2:4 -a3:4 -b0:0x10000054
-var MACHOPT_F=-m3
+var MACHOPT_F=-m2
 var EGO_PLAT_FLAGS=-M{EM}/share/ack/ego/{ARCH}.descr
 # Override the setting in fe so that files compiled for linuxppc can see
--- a/plat/linuxppc/libsys/_syscall.s
+++ b/plat/linuxppc/libsys/_syscall.s
@ -12,16 +12,7 @@
 .sect .text
-EINVAL = 22
+#define EINVAL 22
 #define IFFALSE 4
 #define IFTRUE 12
 #define ALWAYS 20
 #define LT 0
 #define GT 1
 #define EQ 2
 #define OV 3
 ! Perform a Linux system call.
@ -32,21 +23,21 @@ __syscall:
 	lwz r4, 8(sp)
 	lwz r5, 12(sp)
 	sc 0
-	bclr IFFALSE, OV, 0
+	bnslr
 	! On error, r3 contains the errno.	
 	! It just so happens that errnos 1-34 are the same in Linux as in ACK.
-	cmpi cr0, 0, r3, 1
+	cmpwi r3, 1
-	bc IFTRUE, LT, 2f
+	blt 2f
-	cmpi cr0, 0, r3, 34
+	cmpwi r3, 34
-	bc IFTRUE, GT, 2f
+	bgt 2f
 3:
-	li32 r4, _errno
+	lis r4, ha16[_errno]
-	stw r3, 0(r4)
+	stw r3, lo16[_errno](r4)
-	addi r3, r0, -1
+	li r3, -1
-	bclr ALWAYS, 0, 0
+	blr
 2:
-	addi r3, r0, EINVAL
+	li r3, EINVAL
 	b 3b
--- a/plat/linuxppc/libsys/build.lua
+++ b/plat/linuxppc/libsys/build.lua
@ -4,12 +4,10 @@ acklibrary {
 		"./_syscall.s",
 		"./sigaction.s",
 		"./signal.c",
 		"./trap.s",
 		"plat/linux/libsys/_exit.c",
 		"plat/linux/libsys/_hol0.s",
 		"plat/linux/libsys/close.c",
 		"plat/linux/libsys/creat.c",
 		"plat/linux/libsys/errno.s",
 		"plat/linux/libsys/execve.c",
 		"plat/linux/libsys/getpid.c",
 		"plat/linux/libsys/gettimeofday.c",
@ -26,6 +24,7 @@ acklibrary {
 		"plat/linux/libsys/write.c",
 	},
 	deps = {
 		"plat/linux/libsys/*.h",
 		"lang/cem/libcc.ansi/headers+headers",
 		"plat/linuxppc/include+headers",
 	},
--- a/plat/linuxppc/libsys/sigaction.s
+++ b/plat/linuxppc/libsys/sigaction.s
@ -1,156 +1,194 @@
 #define __NR_sigaction		67
-#define SIG_BLOCK		0
+#define __NR_sigprocmask	126
 #define SIG_SETMASK		2
 #define MAXSIG			32
-/* offsets into our stack frame */
+/* offsets into struct sigaction */
-#define mynew	16	/* new sigaction */
+#define sa_handler	0	/* in union with sa_sigaction */
-#define mynset	32	/* new signal set */
+#define sa_mask		4
-#define myoset	36	/* old signal set */
+#define sa_flags	8
-#define mysave	40
+#define sa_restorer	12
-#define mysize	56
+
 /* offsets from stack pointer */
 #define mynewact	16	/* struct sigaction */
 #define myoldact	32
 #define newmask		64	/* signal set */
 #define oldmask		68
 #define oldhandler	72
 #define myret		76
 #define savelr		80
 #define signum		84	/* first argument */
 #define newact		88
 #define oldact		92
 .sect .text; .sect .rodata; .sect .data; .sect .bss
 /*
 * Linux calls signal handlers with arguments in registers, but the
 * ACK expects arguments on the stack.  This sigaction() uses a
- * "bridge" to move the arguments.
+ * "bridge" to move the arguments, but
 *
 *  - If the caller passes a bad pointer, this sigaction() causes
 *    SIGBUS or SIGSEGV instead of setting errno = EFAULT.
 *
 *  - This sigaction() only works with signals 1 to 31, not with
 *    real-time signals 32 to 64.
 *
 *  - This sigaction() is not safe for multiple threads.
 *
 * int sigaction(int signum, const struct sigaction *newact,
 *		 struct sigaction *oldact);
 */
 .sect .text
 .define _sigaction
 _sigaction:
 	mflr	r0
-	subi	r1, r1, mysize
+	li	r3, __NR_sigprocmask
-	stw	r31, mysave+8(r1)
+	stwu	r3, -signum(sp)		/* keep 0(sp) = __NR_sigprocmask */
-	stw	r30, mysave+4(r1)
+	stw	r0, savelr(sp)
-	stw	r29, mysave(r1)
+
-	stw	r0, mysave+12(r1)
+	/* Copy newact to stack (before blocking SIGBUS, SIGSEGV). */
-	li	r3, 0
+	lwz	r3, newact(sp)
-	stw	r3, mynset(r1)	   	! mynset = 0
+	mr.	r3, r3
-	lwz	r29, mysize(r1)		! r29 = signal number
+	beq	1f			/* skip if newact == NULL */
-	lwz	r30, mysize+4(r1)	! r30 = new action
+	lwz	r4, sa_handler(r3)
-	lwz	r31, mysize+8(r1)	! r31 = old action
+	lwz	r5, sa_mask(r3)
 	lwz	r6, sa_flags(r3)
 	lwz	r7, sa_restorer(r3)
 	stw	r4, mynewact+sa_handler(sp)
 	stw	r5, mynewact+sa_mask(sp)
 	stw	r6, mynewact+sa_flags(sp)
 	stw	r7, mynewact+sa_restorer(sp)
 	/*
-	 * If the new action is non-NULL, the signal number is in
+	 * Block all signals to prevent a race.  After we set sharray,
-	 * range 1 to MAXSIG, and the new handler is not SIG_DFL 0
+	 * we must call the kernel's sigaction before the next signal
-	 * or SIG_IGN 1, then we interpose our bridge.
+	 * handler runs.  This prevents two problems:
 	 *
 	 *  - The bridge might call the new handler while the kernel
 	 *    uses the mask and flags of the old handler.
 	 *
 	 *  - The signal handler might call sigaction() and destroy
 	 *    sharray.  We must block all signals because any signal
 	 *    handler might call sigaction() for our signal.
 	 */
-	cmpwi	cr0, r30, 0
+1:	li	r4, SIG_SETMASK
-	subi	r7, r29, 1		! r7 = index in handlers
+	li	r5, -1			/* mask signals 1 to 32 */
-	cmplwi	cr7, r7, MAXSIG		! unsigned comparison
+	stw	r5, newmask(sp)
-	beq	cr0, kernel
+	la	r5, newmask(sp)
-	bge	cr7, kernel
+	la	r6, oldmask(sp)
-	lwz	r3, 0(r30)		! r3 = new handler
+	stw	r4, 4(sp)		/* kept 0(sp) = __NR_sigprocmask */
-	clrrwi.	r3, r3, 1
+	stw	r5, 8(sp)
-	beq	cr0, kernel
+	stw	r6, 12(sp)
 	/*
 	 * Block the signal while we build the bridge.  Prevents a
 	 * race if a signal arrives after we change the bridge but
 	 * before we change the action in the kernel.
 	 */
 	li	r4, 1
 	slw	r4, r4, r7
 	stw	r4, mynset(r1)		! mynmask = 1 << (signal - 1)
 	li	r3, SIG_BLOCK
 	la	r4, mynset(r1)
 	la	r5, myoset(r1)
 	stw	r3, 0(r1)
 	stw	r4, 4(r1)
 	stw	r5, 8(r1)
 	bl	_sigprocmask
 	/*
 	 * Point our bridge to the new signal handler.  Then copy the
 	 * new sigaction but point it to our bridge.
 	 */
 	lis	r6, hi16[handlers]
 	ori	r6, r6, lo16[handlers]
 	subi	r7, r29, 1
 	slwi	r7, r7, 2
 	lwz	r3, 0(r30)		! r3 = new handler
 	stwx	r3, r6, r7		! put it in array of handlers
 	lis	r3, hi16[bridge]
 	ori	r3, r3, lo16[bridge]
 	lwz	r4, 4(r30)
 	lwz	r5, 8(r30)
 	lwz	r6, 12(r30)
 	stw	r3, mynew(r1)		! sa_handler or sa_sigaction
 	stw	r4, mynew+4(r1)		! sa_mask
 	stw	r5, mynew+8(r1)		! sa_flags
 	stw	r6, mynew+12(r1)	! sa_restorer
 	la	r30, mynew(r1)
 kernel:
 	li	r3, __NR_sigaction
 	stw	r3, 0(r1)
 	stw	r29, 4(r1)
 	stw	r30, 8(r1)
 	stw	r31, 12(r1)
 	bl	__syscall
 	/*
-	 * If we blocked the signal, then restore the old signal mask.
+	 * If the signal number is in range 1 to 31, and the new
 	 * handler is not SIG_DFL 0 or SIG_IGN 1, then we interpose
 	 * our bridge.
 	 */
-	lwz	r3, mynset(r1)
+	lwz	r4, signum(sp)		/* keep r4 = signum */
-	cmpwi	cr0, r3, 0
+	addi	r5, r4, -1
-	beq	cr0, fixold
+	cmplwi	r5, 30
-	li	r3, SIG_SETMASK
+	bgt	2f			/* skip if out of range */
-	la	r4, myoset(r1)
+
-	li	r5, 0
+	slwi	r5, r5, 2		/* r5 = sharray index */
-	stw	r3, 0(r1)
+	lis	r6, ha16[sharray]
-	stw	r4, 4(r1)
+	la	r6, lo16[sharray](r6)	/* r6 = sharray */
-	stw	r5, 8(r1)
+	lwzx	r0, r6, r5
-	bl	_sigprocmask
+	stw	r0, oldhandler(sp)	/* remember old handler */
-	/*
+	lwz	r0, newact(sp)
-	 * If the old sigaction is non-NULL and points to our bridge,
+	mr.	r0, r0
-	 * then point it to the signal handler.
+	beq	2f			/* skip if newact == NULL */
-	 */
+
-fixold:
+	lwz	r3, mynewact+sa_handler(sp)
-	cmpwi	cr0, r31, 0
+	cmplwi	r3, 2			/* r3 = new handler */
-	beq	cr0, leave
+	blt	2f			/* skip if SIG_DFL or SIG_IGN */
-	lis	r3, hi16[bridge]
+
-	ori	r3, r3, lo16[bridge]
+	stwx	r3, r6, r5		/* put new handler in sharray */
-	lwz	r4, 0(r31)
+	lis	r3, ha16[sigbridge]
-	cmpw	cr0, r3, r4
+	la	r3, lo16[sigbridge](r3)
-	bne	cr0, leave
+	stw	r3, mynewact+sa_handler(sp)
-	lis	r6, hi16[handlers]
+
-	ori	r6, r6, lo16[handlers]
+	/* Call the kernel's sigaction. */
-	subi	r7, r29, 1
+	/* sigaction(signum, &mynewact or NULL, &myoldact or NULL) */
-	slwi	r7, r7, 2
+2:	li	r3, __NR_sigaction
-	lwzx	r3, r6, r7	! get it from array of handlers
+	lwz	r0, newact(sp)
-	stw	r3, 0(r31)	! put it in old sigaction
+	mr.	r0, r0
-leave:
+	beq	3f
-	lwz	r0, mysave+12(r1)
+	la	r5, mynewact(sp)
-	lwz	r29, mysave(r1)
+	b	4f
-	lwz	r30, mysave+4(r1)
+3:	li	r5, 0
-	lwz	r31, mysave+8(r1)
+4:	lwz	r0, oldact(sp)
-	addi	r1, r1, mysize
+	mr.	r0, r0
 	beq	5f
 	la	r6, myoldact(sp)
 	b	6f
 5:	li	r6, 0
 6:	stw	r3, 0(sp)
 	stw	r4, 4(sp)		/* kept r4 = signum */
 	stw	r5, 8(sp)
 	stw	r6, 12(sp)
 	bl	__syscall
 	stw	r3, myret(sp)
 	/* Unblock signals by restoring old signal mask. */
 	li	r3, __NR_sigprocmask
 	li	r4, SIG_SETMASK
 	la	r5, oldmask(sp)
 	li	r6, 0
 	stw	r3, 0(sp)
 	stw	r4, 4(sp)
 	stw	r5, 8(sp)
 	stw	r6, 12(sp)
 	bl	__syscall
 	/* Copy oldact from stack (after unblocking BUS, SEGV). */
 	lwz	r3, oldact(sp)
 	mr.	r3, r3
 	beq	8f			/* skip if oldact == NULL */
 	lwz	r4, myoldact+sa_handler(sp)
 	lis	r5, ha16[sigbridge]
 	la	r5, lo16[sigbridge](r5)
 	cmpw	r4, r5
 	bne	7f
 	lwz	r4, oldhandler(sp)
 7:	lwz	r5, myoldact+sa_mask(sp)
 	lwz	r6, myoldact+sa_flags(sp)
 	lwz	r7, myoldact+sa_restorer(sp)
 	stw	r4, sa_handler(r3)
 	stw	r5, sa_mask(r3)
 	stw	r6, sa_flags(r3)
 	stw	r7, sa_restorer(r3)
 8:	lwz	r0, savelr(sp)
 	lwz	r3, myret(sp)
 	addi	sp, sp, signum
 	mtlr	r0
-	blr			! return from sigaction
+	blr
 /*
- * Linux calls bridge(signum) or bridge(signum, info, context) with
+ * Linux calls sigbridge(signum) or sigbridge(signum, info, context)
- * arguments in registers r3, r4, r5.
+ * with arguments in registers r3, r4, r5.
 */
-bridge:
+sigbridge:
 	mflr	r0
-	subi	r1, r1, 16
+	stwu	r3, -16(sp)	/* signal number */
 	stw	r4, 4(sp)	/* info */
 	stw	r5, 8(sp)	/* context */
 	stw	r0, 12(r1)
 	stw	r3, 0(r1)	! signal number
 	stw	r4, 4(r1)	! info
 	stw	r5, 8(r1)	! context
-	lis	r6, hi16[handlers]
+	lis	r6, hi16[sharray - 4]
-	ori	r6, r6, lo16[handlers]
+	la	r6, lo16[sharray - 4](r6)
-	subi	r7, r3, 1
+	slwi	r7, r3, 2
 	slwi	r7, r7, 2
 	lwzx	r6, r6, r7
 	mtctr	r6
-	bctrl			! call our signal handler
+	bctrl			/* call our signal handler */
-	lwz	r0, 12(r1)
+	lwz	r0, 12(sp)
 	addi	r1, r1, 16
 	mtlr	r0
-	blr			! return from bridge
+	blr			/* sigreturn(2) */
 .sect .bss
-handlers:
+sharray:
-	.space 4 * MAXSIG	! array of signal handlers
+	.space 4 * 31		/* handlers for signals 1 to 31 */
--- a/plat/linuxppc/libsys/trap.s
+++ b/plat/linuxppc/libsys/trap.s
@ -1,112 +0,0 @@
 #
 ! $Source: /cvsroot/tack/Ack/plat/linux386/libsys/_syscall.s,v $
 ! $State: Exp $
 ! $Revision: 1.1 $
 ! Declare segments (the order is important).
 .sect .text
 .sect .rom
 .sect .data
 .sect .bss
 .sect .text
 #define IFFALSE 4
 #define IFTRUE 12
 #define ALWAYS 20
 #define LT 0
 #define GT 1
 #define EQ 2
 #define OV 3
 EARRAY	=  0
 ERANGE	=  1
 ESET	=  2
 EIOVFL	=  3
 EFOVFL	=  4
 EFUNFL	=  5
 EIDIVZ	=  6
 EFDIVZ	=  7
 EIUND	=  8
 EFUND	=  9
 ECONV	= 10
 ESTACK  = 16
 EHEAP	= 17
 EILLINS = 18
 EODDZ	= 19
 ECASE	= 20
 EMEMFLT	= 21
 EBADPTR = 22
 EBADPC  = 23
 EBADLAE = 24
 EBADMON = 25
 EBADLIN = 26
 EBADGTO = 27
 EUNIMPL = 63		! unimplemented em-instruction called
 ! EM trap handling.
 .define .trap_ecase
 .trap_ecase:
 	addi r3, r0, ECASE
 	b .trap
 .define .trap_earray
 .trap_earray:
 	addi r3, r0, EARRAY
 	b .trap
 .define .trap_erange
 .trap_erange:
 	addi r3, r0, ERANGE
 	b .trap
 .define .trp
 .define .trap
 .trp:
 .trap:
 	cmpi cr0, 0, r3, 15      ! traps >15 can't be ignored
 	bc IFTRUE, LT, 1f
 	addi r4, r0, 1
 	rlwnm r4, r4, r3, 0, 31  ! calculate trap bit
 	li32 r5, .ignmask
 	lwz r5, 0(r5)            ! load ignore mask
 	and. r4, r4, r5          ! compare
 	bclr IFFALSE, EQ, 0      ! return if non-zero
 1:
 	li32 r4, .trppc
 	lwz r5, 0(r4)            ! load user trap routine
 	or. r5, r5, r5           ! test
 	bc IFTRUE, EQ, fatal     ! if no user trap routine, bail out
 	addi r0, r0, 0
 	stw r0, 0(r4)            ! reset trap routine
 	mfspr r0, lr
 	stwu r0, -4(sp)          ! save old lr
 	stwu r3, -4(sp)
 	mtspr ctr, r5
 	bcctrl ALWAYS, 0, 0      ! call trap routine
 	lwz r0, 4(sp)            ! load old lr again
 	addi sp, sp, 8           ! retract over stack usage
 	bclr ALWAYS, 0, 0        ! return
 fatal:
 	addi r3, r0, 1
 	li32 r4, message
 	addi r5, r0, 6
 	addi r0, r0, 4           ! write()
 	sc 0
 	addi r0, r0, 1           ! exit()
 	sc 0
 .sect .rom
 message:
 	.ascii "TRAP!\n"
--- a/plat/osx386/boot.s
+++ b/plat/osx386/boot.s
@ -58,8 +58,6 @@ begdata:
 .sect .bss
 begbss:
 .define hol0
 .comm hol0, 8                ! line number and filename (for debugging)
 .define _errno
 .comm _errno, 4              ! Posix errno storage
--- a/plat/osx386/libsys/build.lua
+++ b/plat/osx386/libsys/build.lua
@ -19,7 +19,8 @@ acklibrary {
 		"./sigaction.s",
 		"./stat.s",
 		"./write.s",
-		"plat/linux/libsys/errno.s",
+		"plat/linux/libsys/_hol0.s",
 		"plat/linux386/libsys/trapno.s",
 		"plat/osx/libsys/brk.c",
 		"plat/osx/libsys/creat.c",
 		"plat/osx/libsys/isatty.c",
--- a/plat/osxppc/boot.s
+++ b/plat/osxppc/boot.s
@ -29,7 +29,7 @@ begtext:
 	lwz r3, 0(sp)            ! r3 = argc
 	addi r4, sp, 4           ! r4 = argv
-	rlwinm r5, r3, 32-2, 2, 31 ! shift left 2 bits
+	srwi r5, r3, 2
 	add r5, r5, r4
 	addi r5, r5, 8           ! r5 = env
@ -49,8 +49,6 @@ begdata:
 .sect .bss
 begbss:
 .define hol0
 .comm hol0, 8                ! line number and filename (for debugging)
 .define _errno
 .comm _errno, 4              ! Posix errno storage
--- a/plat/osxppc/descr
+++ b/plat/osxppc/descr
@ -10,16 +10,17 @@ var l={w}
 var la={w}
 var f={w}
 var fa={w}
 # Size 8 has alignment 4 in Mac OS, 8 in Linux.
 var d=8
-var da={d}
+var da=4
 var x=8
-var xa={x}
+var xa=4
 var ARCH=powerpc
 var PLATFORM=osxppc
 var PLATFORMDIR={EM}/share/ack/{PLATFORM}
 var CPP_F=-D__unix
 var ALIGN=-a0:4 -a1:4 -a2:4096 -a3:4 -b0:0x129c
-var MACHOPT_F=-m3
+var MACHOPT_F=-m2
 var EGO_PLAT_FLAGS=-M{EM}/share/ack/ego/{ARCH}.descr
 # Override the setting in fe so that files compiled for osxppc can see
--- a/plat/osxppc/libsys/build.lua
+++ b/plat/osxppc/libsys/build.lua
@ -19,7 +19,7 @@ acklibrary {
 		"./sigaction.s",
 		"./stat.s",
 		"./write.s",
-		"plat/linuxppc/libsys/trap.s",
+		"plat/linux/libsys/_hol0.s",
 		"plat/osx/libsys/brk.c",
 		"plat/osx/libsys/creat.c",
 		"plat/osx/libsys/isatty.c",
--- a/plat/osxppc/libsys/set_errno.s
+++ b/plat/osxppc/libsys/set_errno.s
@ -1,7 +1,7 @@
 .sect .text
 .define .set_errno
 .set_errno:
-	li32 r10, _errno
+	lis r4, ha16[_errno]
-	stw r3, 0(r10)		! set errno
+	stw r3, lo16[_errno](r4)	! set errno
-	addi r3, r0, -1		! return -1
+	li r3, -1			! return -1
-	bclr 20, 0, 0
+	blr
--- a/plat/qemuppc/descr
+++ b/plat/qemuppc/descr
@ -19,11 +19,8 @@ var PLATFORM=qemuppc
 var PLATFORMDIR={EM}/share/ack/{PLATFORM}
 var CPP_F=-D__unix
 var ALIGN=-a0:4 -a1:4 -a2:4 -a3:4 -b0:0x01000000
-var C_LIB={PLATFORMDIR}/libc-ansi.a
+var MACHOPT_F=-m2
-# bitfields reversed for compatibility with (g)cc.
+var EGO_PLAT_FLAGS=-M{EM}/share/ack/ego/{ARCH}.descr
 var CC_ALIGN=-Vr
 var OLD_C_LIB={C_LIB}
 var MACHOPT_F=
 # Override the setting in fe so that files compiled for qemuppc can see
 # the platform-specific headers.
--- a/plat/qemuppc/libsys/trap.s
+++ b/plat/qemuppc/libsys/trap.s
@ -1,65 +0,0 @@
 #
 ! $Source: /cvsroot/tack/Ack/plat/linux386/libsys/_syscall.s,v $
 ! $State: Exp $
 ! $Revision: 1.1 $
 ! Declare segments (the order is important).
 .sect .text
 .sect .rom
 .sect .data
 .sect .bss
 .sect .text
 #define IFFALSE 4
 #define IFTRUE 12
 #define ALWAYS 20
 #define LT 0
 #define GT 1
 #define EQ 2
 #define OV 3
 EARRAY	=  0
 ERANGE	=  1
 ESET	=  2
 EIOVFL	=  3
 EFOVFL	=  4
 EFUNFL	=  5
 EIDIVZ	=  6
 EFDIVZ	=  7
 EIUND	=  8
 EFUND	=  9
 ECONV	= 10
 ESTACK  = 16
 EHEAP	= 17
 EILLINS = 18
 EODDZ	= 19
 ECASE	= 20
 EMEMFLT	= 21
 EBADPTR = 22
 EBADPC  = 23
 EBADLAE = 24
 EBADMON = 25
 EBADLIN = 26
 EBADGTO = 27
 EUNIMPL = 63		! unimplemented em-instruction called
 .define .trap_ecase
 .trap_ecase:
 	b .trp
 .define .trap_earray
 .trap_earray:
 	b .trp
 .define .trap_erange
 .trap_erange:
 	b .trap
 .define .trp
 .define .trap
 .trp:
 .trap:
 	b .trp					! spin forever
--- a/tests/plat/_dummy_e.c
+++ b/tests/plat/_dummy_e.c
@ -1,6 +1,6 @@
 #include "test.h"
-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
    ASSERT(0 == 0);
--- a/tests/plat/bss_e.c
+++ b/tests/plat/bss_e.c
@ -0,0 +1,27 @@
 #include "test.h"
 /*
 * EM puts these variables in BSS.  Their initial values must be zero.
 * Some platforms, like Linux, clear the BSS before they run the
 * program.  For other platforms, like pc86, we clear the BSS in
 * boot.s before we call _m_a_i_n.
 */
 char c;
 int array[9000];
 short s;
 /* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
 	int bad, i;
 	ASSERT(c == 0);
 	bad = 0;
 	for (i = 0; i < sizeof(array) / sizeof(array[0]); i++) {
 		if(array[i])
 			bad++;
 	}
 	ASSERT(bad == 0);
 	ASSERT(s == 0);
 	finished();
 }
--- a/tests/plat/bugs/bug-62-notvar_var_e.c
+++ b/tests/plat/bugs/bug-62-notvar_var_e.c
@ -40,7 +40,7 @@ void c(int i, int tru, int fal) {
  ASSERT((i != i) == fal);
 }
-/* Bypasses the CRT. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void) {
  a();
  b();
--- a/tests/plat/build.lua
+++ b/tests/plat/build.lua
@ -9,12 +9,14 @@ definerule("plat_testsuite",
 		-- Remember this is executed from the caller's directory; local
 		-- target names will resolve there.
 		local testfiles = filenamesof(
 			-- added structcopy_e.c
 			"tests/plat/*.c",
 			"tests/plat/*.e",
 			"tests/plat/*.p",
 			"tests/plat/b/*.b",
-			"tests/plat/bugs/bug-22-inn_mod.mod",
+			"tests/plat/bugs/*.c",
-			"tests/plat/bugs/bug-62-notvar_var_e.c"
+			"tests/plat/bugs/*.mod",
 			"tests/plat/m2/*.mod"
 		)
 		acklibrary {
--- a/tests/plat/csa_e.c
+++ b/tests/plat/csa_e.c
@ -11,7 +11,7 @@ int csa(int i)
    }
 }
-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
    ASSERT(csa(0) == 0);
--- a/tests/plat/csb_e.c
+++ b/tests/plat/csb_e.c
@ -11,7 +11,7 @@ int csa(int i)
    }
 }
-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
    ASSERT(csa(0) == 0);
--- a/tests/plat/doublecmp_e.c
+++ b/tests/plat/doublecmp_e.c
@ -4,7 +4,7 @@
 double one = 1.0;
 double zero = 0.0;
-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
    ASSERT(zero == zero);
--- a/tests/plat/dup_e.e
+++ b/tests/plat/dup_e.e
@ -0,0 +1,139 @@
 #
    mes 2, EM_WSIZE, EM_PSIZE
 /*
 * Tests _dup_ and _dus_ by loading 20 bytes from _src_, then making
 * and checking some duplicates.  The compilers might never _dup_ or
 * _dus_ with large sizes, so the compilers might work even if this
 * test fails.  You can cheat this test if _cms_ always pushes zero.
 */
    exa src
    exa size
 src
    con 3593880729I4, 782166578I4, 4150666996I4, 2453272937I4, 3470523049I4
 size
    con 20I2
    exp $check
    exp $_m_a_i_n
    pro $_m_a_i_n, 0
    /* Push 3 copies of src on stack. */
    lae src
    loi 20        /* 1st copy */
    dup 20        /* 2nd copy */
    lae size
    loi 2
    loc 2
    loc EM_WSIZE
    cuu
    dus EM_WSIZE  /* 3rd copy */
    cal $check
    cal $finished
    end /* $_m_a_i_n */
    pro $check, 4 * EM_PSIZE + EM_WSIZE
 #define p1    (-1 * EM_PSIZE)
 #define p2    (-2 * EM_PSIZE)
 #define p3    (-3 * EM_PSIZE)
 #define p4    (-4 * EM_PSIZE)
 #define i     (p4 - EM_WSIZE)
    /* Set pointers to all 4 copies. */
    lae src
    lal p4
    sti EM_PSIZE  /* p4 = src */
    lal 0
    lal p3
    sti EM_PSIZE  /* p3 = 3rd copy */
    lal 20
    lal p2
    sti EM_PSIZE  /* p2 = 2nd copy */
    lal 40
    lal p1
    sti EM_PSIZE  /* p1 = 1st copy */
    /* Loop 20 times to verify each byte. */
    loc 0
    stl i
 4
    lal p4
    loi EM_PSIZE
    loi 1         /* byte from src */
    lal p3
    loi EM_PSIZE
    loi 1         /* byte from 3rd copy */
    cms EM_WSIZE
    zeq *3
    loc (3 * 256)
    lol i
    adi EM_WSIZE  /* 0x300 + i */
    loc EM_WSIZE
    loc 4
    cuu
    cal $fail
    asp 4
 3
    lal p4
    loi EM_PSIZE
    loi 1         /* byte from src */
    lal p2
    loi EM_PSIZE
    loi 1         /* byte from 2nd copy */
    cms EM_WSIZE
    zeq *2
    loc (2 * 256)
    lol i
    adi EM_WSIZE  /* 0x200 + i */
    loc EM_WSIZE
    loc 4
    cuu
    cal $fail
    asp 4
 2
    lal p4
    loi EM_PSIZE
    loi 1         /* byte from src */
    lal p1
    loi EM_PSIZE
    loi 1         /* byte from 1st copy */
    cms EM_WSIZE
    zeq *1
    loc (1 * 256)
    lol i
    adi EM_WSIZE  /* 0x100 + i */
    loc EM_WSIZE
    loc 4
    cuu
    cal $fail
    asp 4
 1
    lal p4
    loi EM_PSIZE
    adp 1
    lal p4
    sti EM_PSIZE  /* increment p4 */
    lal p3
    loi EM_PSIZE
    adp 1
    lal p3
    sti EM_PSIZE  /* increment p3 */
    lal p2
    loi EM_PSIZE
    adp 1
    lal p2
    sti EM_PSIZE  /* increment p2 */
    lal p1
    loi EM_PSIZE
    adp 1
    lal p1
    sti EM_PSIZE  /* increment p1 */
    inl i
    lol i
    loc 20
    blt *4        /* loop 20 times */
    ret 0
    end /* $check */
--- a/tests/plat/exg_e.e
+++ b/tests/plat/exg_e.e
@ -0,0 +1,83 @@
 #
    mes 2, EM_WSIZE, EM_PSIZE
 /*
 * Tests _exg_ by loading 40 bytes from _src_, then exchanging 20 and
 * 20 bytes, and checking the result.  The compilers might never _exg_
 * large sizes, so the compilers might work even if this test fails.
 * You can cheat this test if _cms_ always pushes zero.
 */
    exa src
 src
    con 1539465570I4, 1344465418I4, 1317578918I4, 1163467696I4, 2645261331I4
    con 3981585269I4, 1433968975I4, 4256886989I4, 4114909542I4, 1817334375I4
    exp $check
    exp $_m_a_i_n
    pro $_m_a_i_n, 0
    lae src
    loi 40
    exg 20
    cal $check
    cal $finished
    end /* $_m_a_i_n */
    pro $check, 2 * EM_PSIZE + EM_WSIZE
 #define p1    (-1 * EM_PSIZE)
 #define p2    (-2 * EM_PSIZE)
 #define i     (p2 - EM_WSIZE)
    lae src
    lal p2
    sti EM_PSIZE  /* p2 = src */
    lal 0
    adp 20
    lal p1
    sti EM_PSIZE  /* p1 = exchanged copy + 20 */
    /* Loop 40 times to verify each byte. */
    loc 0
    stl i
 1
    lal p2
    loi EM_PSIZE
    loi 1         /* byte from src */
    lal p1
    loi EM_PSIZE
    loi 1         /* byte from exchanged copy */
    cms EM_WSIZE
    zeq *2
    lol i
    loc EM_WSIZE
    loc 4
    cuu
    cal $fail
    asp 4
 2
    lal p2
    loi EM_PSIZE
    adp 1
    lal p2
    sti EM_PSIZE  /* increment p2 */
    lal p1
    loi EM_PSIZE  /* p1 */
    inl i
    /* When i reaches 20, p1 would reach end of exchanged copy. */
    lol i
    loc 20
    beq *3
    adp 1         /* p1 + 1 */
    bra *4
 3
    adp -39       /* p1 - 39, beginning of exchanged copy */
 4
    lal p1
    sti EM_PSIZE  /* move p1 */
    lol i
    loc 40
    blt *1
    ret 0
    end /* $check */
--- a/tests/plat/from_d_to_si_e.c
+++ b/tests/plat/from_d_to_si_e.c
@ -8,7 +8,7 @@ double minusone = -1.0;
 double big = (double)INT_MAX;
 double minusbig = (double)INT_MIN;
-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
    ASSERT((int)zero == 0);
--- a/tests/plat/from_d_to_ui_e.c
+++ b/tests/plat/from_d_to_ui_e.c
@ -6,7 +6,7 @@ double one = 1.0;
 double zero = 0.0;
 double big = (double)UINT_MAX;
-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
    ASSERT((unsigned int)zero == 0);
--- a/tests/plat/from_si_to_d_e.c
+++ b/tests/plat/from_si_to_d_e.c
@ -8,7 +8,7 @@ int minusone = -1;
 int big = INT_MAX;
 int minusbig = INT_MIN;
-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
    ASSERT((double)zero == 0.0);
--- a/tests/plat/from_ui_to_d_e.c
+++ b/tests/plat/from_ui_to_d_e.c
@ -6,7 +6,7 @@ unsigned int one_u = 1;
 unsigned int zero_u = 0;
 unsigned int big_u = UINT_MAX;
-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
    ASSERT((double)zero_u == 0.0);
--- a/tests/plat/inn_e.e
+++ b/tests/plat/inn_e.e
@ -14,6 +14,9 @@
    zeq *1
    loc __LINE__
    loc EM_WSIZE
    loc 4
    cuu
    cal $fail
    ass EM_WSIZE
 1
@ -31,6 +34,9 @@
    zne *2
    loc __LINE__
    loc EM_WSIZE
    loc 4
    cuu
    cal $fail
    ass EM_WSIZE
 2
@ -49,6 +55,9 @@
    zeq *3
    loc __LINE__
    loc EM_WSIZE
    loc 4
    cuu
    cal $fail
    ass EM_WSIZE
 3
@ -67,11 +76,12 @@
    zne *4
    loc __LINE__
    loc EM_WSIZE
    loc 4
    cuu
    cal $fail
    ass EM_WSIZE
 4
    cal $finished
    ret 0
    end
--- a/tests/plat/intadd_e.c
+++ b/tests/plat/intadd_e.c
@ -6,7 +6,7 @@ int one = 1;
 int zero = 0;
 int minusone = -1;
-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
    ASSERT((two + one)      == 3);
--- a/tests/plat/intcmp_e.c
+++ b/tests/plat/intcmp_e.c
@ -4,7 +4,7 @@
 int one = 1;
 int zero = 0;
-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
    ASSERT(zero == zero);
--- a/tests/plat/intdiv_e.c
+++ b/tests/plat/intdiv_e.c
@ -6,7 +6,7 @@ int two = 2;
 int one = 1;
 int zero = 0;
-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
    ASSERT((three / two) == 1);
--- a/tests/plat/intrem_e.c
+++ b/tests/plat/intrem_e.c
@ -6,7 +6,7 @@ int two = 2;
 int one = 1;
 int zero = 0;
-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
    ASSERT((three % two) == 1);
--- a/tests/plat/intshift_e.c
+++ b/tests/plat/intshift_e.c
@ -6,7 +6,7 @@ int one = 1;
 int zero = 0;
 int minusone = -1;
-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
    ASSERT((one     <<zero) == 1);
--- a/tests/plat/intsub_e.c
+++ b/tests/plat/intsub_e.c
@ -7,7 +7,7 @@ int one = 1;
 int zero = 0;
 int minusone = -1;
-/* Bypasses the CRT, so there's no stdio or BSS initialisation. */
+/* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void)
 {
    ASSERT((two - one) == 1);
--- a/tests/plat/lib/test.c
+++ b/tests/plat/lib/test.c
@ -5,7 +5,7 @@
 void finished(void)
 {
    static const char s[] = "@@FINISHED\n";
-    write(1, s, sizeof(s));
+    write(1, s, sizeof(s)-1);
    _exit(0);
 }
@ -16,7 +16,7 @@ void writehex(uint32_t code)
    do
    {
-        *--p = "0123456789abcdef"[code & 0xf];
+        *--p = "0123456789abcdef"[(unsigned int)code & 0xf];
        code >>= 4;
    }
    while (code > 0);
--- a/tests/plat/m2/ConvTest_mod.mod
+++ b/tests/plat/m2/ConvTest_mod.mod
@ -0,0 +1,36 @@
 MODULE ConvTest;
 FROM Conversions IMPORT
  ConvertOctal, ConvertHex, ConvertCardinal, ConvertInteger;
 FROM Strings IMPORT CompareStr;
 FROM Test IMPORT fail, finished;
 (* Asserts a = b, or fails with code. *)
 PROCEDURE A(a, b: ARRAY OF CHAR; code: INTEGER);
 BEGIN
  IF (CompareStr(a, b) # 0) OR (CompareStr(a, "wrong string") = 0) THEN
    fail(code)
  END
 END A;
 VAR
  str: ARRAY [0..15] OF CHAR;
 BEGIN
  ConvertOctal(  9, 6, str); A("    11", str, 1);
  ConvertOctal( 59, 6, str); A("    73", str, 2);
  ConvertOctal(278, 6, str); A("   426", str, 3);
  ConvertHex(  9, 6, str); A("     9", str, 11H);
  ConvertHex( 59, 6, str); A("    3B", str, 12H);
  ConvertHex(278, 6, str); A("   116", str, 13H);
  ConvertCardinal(  9, 6, str); A("     9", str, 21H);
  ConvertCardinal( 59, 6, str); A("    59", str, 22H);
  ConvertCardinal(278, 6, str); A("   278", str, 23H);
  ConvertInteger(   9, 6, str); A("     9", str, 31H);
  ConvertInteger(  59, 6, str); A("    59", str, 32H);
  ConvertInteger( 278, 6, str); A("   278", str, 33H);
  ConvertInteger(-424, 6, str); A("  -424", str, 34H);
  finished;
 END ConvTest.
--- a/tests/plat/m2/NestProc_mod.mod
+++ b/tests/plat/m2/NestProc_mod.mod
@ -0,0 +1,132 @@
 (*
 * Calls nested procedures.  The compiler emits the EM instructions
 * _lxl_ and _lxa_ to access the variables in the statically enclosing
 * procedures.
 *
 * You can cheat this test if a = b is TRUE for any a, b.
 *)
 MODULE NestProc;
 FROM Test IMPORT fail, finished;
 (* Asserts cond, or fails with code. *)
 PROCEDURE A(cond: BOOLEAN; code: INTEGER);
 BEGIN
  IF NOT cond THEN fail(code) END
 END A;
 TYPE
  Set8 = SET OF [0..63];
  (* Box has fields of size 8, 4, and 1. *)
  Box = RECORD
    huge: Set8;
    big: LONGINT;
    small: CHAR;
    tiny: CHAR;
  END;
 PROCEDURE First(a, b: INTEGER; in: Box): Box;
  VAR c, d: INTEGER;
      out: Box;
  PROCEDURE Second(e: INTEGER);
    VAR f: INTEGER;
    PROCEDURE Third(g: INTEGER);
      VAR h: INTEGER;
      PROCEDURE CheckThird;
      BEGIN
        A(a = 1354, 31H);   (* lxa 3 *)
        A(b = 3385, 32H);
        A(c = 14349, 33H);  (* lxl 3 *)
        A(d = 30989, 34H);
        A(e = 28935, 35H);  (* lxa 2 *)
        A(f = 13366, 36H);  (* lxl 2 *)
        A(g = 7988, 37H);   (* lxa 1 *)
        A(h = 11711, 38H);  (* lxl 1 *)
      END CheckThird;
      PROCEDURE Fourth(i: INTEGER);
        VAR j: INTEGER;
        PROCEDURE Fifth(k: INTEGER);
          VAR l: INTEGER;
          PROCEDURE Sixth(): INTEGER;
          BEGIN
            A(e = 2, 61H);      (* lxa 4 *)
            A(f = 11703, 62H);  (* lxl 4 *)
            b := 3385;   (* lxa 5 *)
            d := 30989;  (* lxl 5 *)
            e := 28935;  (* lxl 4 *)
            f := 13366;  (* lxa 4 *)
            CheckThird;
            (* lxa 5 *)
            A(in.huge = Set8{11, 12, 40, 40, 43, 56}, 63H);
            A(in.big = 2130020019D, 64H);
            A(in.small = 300C, 65H);
            A(in.tiny = 175C, 66H);
            (* lxl 5 *)
            out.huge := Set8{8, 19, 36, 41, 47, 62};
            out.big := 385360915D;
            out.small := 366C;
            out.tiny := 131C;
            j := k;  (* lxl 2, lxa 1 *)
            l := i;  (* lxl 1, lxa 2 *)
            RETURN 5217;
          END Sixth;
          PROCEDURE TwiceSixth(): INTEGER;
          BEGIN
            (* lxa and lxl must follow the static chain from Sixth to
             * Fifth, not dynamic chain from Sixth to TwiceSixth. *)
            RETURN 2 * Sixth();
          END TwiceSixth;
        BEGIN (* Fifth *)
          A(TwiceSixth() = 10434, 51H);
          A(k = 11567, 51H);
          A(l = 32557, 52H);
        END Fifth;
      BEGIN (* Fourth *)
        Fifth(11567);  (* k *)
        A(i = 32557, 41H);
        A(j = 11567, 42H);
      END Fourth;
    BEGIN (* Third *)
      h := 11711;
      Fourth(32557);  (* i *)
    END Third;
  BEGIN (* Second *)
    f := 11703;
    Third(7988);  (* g *)
  END Second;
 BEGIN (* First *)
  c := 14349;
  d := 17850;
  Second(2);  (* e *)
  RETURN out
 END First;
 VAR
  x: Box;
 BEGIN
  x.huge := Set8{11, 12, 40, 40, 43, 56};
  x.big := 2130020019D;
  x.small := 300C;
  x.tiny := 175C;
  x := First(1354, 19516, x);  (* a, b, in *)
  A(x.huge = Set8{8, 19, 36, 41, 47, 62}, 71H);
  A(x.big = 385360915D, 72H);
  A(x.small = 366C, 73H);
  A(x.tiny = 131C, 74H);
  finished;
 END NestProc.
--- a/tests/plat/m2/OpenArray_mod.mod
+++ b/tests/plat/m2/OpenArray_mod.mod
@ -0,0 +1,59 @@
 (*
 * Passes an open array to a procedure.  The back end must implement
 * some EM instructions for accessing arrays.
 *)
 MODULE OpenArray;
 FROM Test IMPORT fail, finished;
 (* Asserts condition or fails with code. *)
 PROCEDURE A(cond: BOOLEAN; code: INTEGER);
 BEGIN
  IF NOT cond THEN fail(code) END
 END A;
 (* Called as Modify(ary1, 1) or Modify(ary2, 2). *)
 PROCEDURE Modify(VAR ary: ARRAY OF INTEGER; what: INTEGER);
  VAR hi: INTEGER;
 BEGIN
  hi := what * 100H;
  (* Indices must be from 0 to HIGH(ary). *)
  A((what = 1) = (HIGH(ary) = 3), hi + 1);
  A((what = 2) = (HIGH(ary) = 9), hi + 2);
  (* ary[2] must equal ary1[3] or ary2[3]. *)
  A((what = 1) = (ary[2] = 13), hi + 3);
  A((what = 2) = (ary[2] = 37), hi + 4);
  (* Modify some values. *)
  IF HIGH(ary) >= 3 THEN ary[3] := 20 END;
  IF HIGH(ary) >= 6 THEN ary[6] := 40 END;
  IF HIGH(ary) >= 9 THEN ary[9] := 12 END;
 END Modify;
 VAR
  ary1: ARRAY [1..4] OF INTEGER;
  ary2: ARRAY [1..10] OF INTEGER;
 BEGIN
  (* Initialize the arrays. *)
  ary1[1] :=  6; ary1[2] :=  9; ary1[3] := 13; ary1[4] := 49;
  ary2[1] := 56; ary2[2] := 79; ary2[3] := 37; ary2[4] :=  0;
  ary2[5] := 70; ary2[6] := 62; ary2[7] := 64; ary2[8] := 92;
  ary2[9] := 29; ary2[10] := 90;
  (* Pass them as open arrays. *)
  Modify(ary1, 1);
  Modify(ary2, 2);
  (* Check that ary1[4], ary2[4, 7, 10] have been modified. *)
  A(ary1[1] =  6, 301H); A(ary1[2] =  9, 301H); A(ary1[3] = 13, 303H);
  A(ary1[4] = 20, 304H);
  A(ary2[1] = 56, 401H); A(ary2[2] = 79, 402H); A(ary2[3] = 37, 403H);
  A(ary2[4] = 20, 404H); A(ary2[5] = 70, 406H); A(ary2[6] = 62, 406H);
  A(ary2[7] = 40, 407H); A(ary2[8] = 92, 408H); A(ary2[9] = 29, 409H);
  A(ary2[10] = 12, 40AH);
  finished;
 END OpenArray.
--- a/tests/plat/m2/SemaTest_mod.mod
+++ b/tests/plat/m2/SemaTest_mod.mod
@ -0,0 +1,157 @@
 (*
 * Generates some integer sequences.  Each generator is a process that
 * yields integers to the main process.  ACK switches processes by
 * saving and restoring the stack.  It uses _lor_ and _str_ to save
 * and restore the local base and frame pointer.
 *)
 MODULE SemaTest;
 FROM Semaphores IMPORT Sema, NewSema, Down, Up, StartProcess;
 FROM Storage IMPORT ALLOCATE;
 FROM Test IMPORT fail, finished;
 TYPE
  Generator = POINTER TO GeneratorRecord;
  GeneratorRecord = RECORD
    resume: Sema;       (* up when resuming generator *)
    yield: Sema;        (* up when yielding value *)
    value: INTEGER;
  END;
 VAR
  curgen: Generator;    (* current generator *)
  startLock: Sema;      (* down when booting generator *)
  startProc: PROC;
  startSelf: Generator;
 PROCEDURE BootGenerator;
  VAR pr: PROC; self: Generator;
 BEGIN
  pr := startProc;
  self := startSelf;
  Up(startLock);
  Down(self^.resume);   (* wait for first Resume *)
  pr();
 END BootGenerator;
 PROCEDURE StartGenerator(gen: Generator; pr: PROC);
 BEGIN
  gen^.resume := NewSema(0);
  gen^.yield := NewSema(0);
  Down(startLock);
  startProc := pr;
  startSelf := gen;
  StartProcess(BootGenerator, 8192);
 END StartGenerator;
 PROCEDURE Resume(gen: Generator): INTEGER;
  VAR self: Generator;
 BEGIN
  self := curgen;
  curgen := gen;
  Up(gen^.resume);
  Down(gen^.yield);     (* wait for Yield *)
  curgen := self;
  RETURN gen^.value
 END Resume;
 PROCEDURE Yield(i: INTEGER);
  VAR self: Generator;
 BEGIN
  self := curgen;
  self^.value := i;
  Up(self^.yield);      (* curgen becomes invalid *)
  Down(self^.resume);   (* wait for Resume *)
 END Yield;
 PROCEDURE YieldHalfOf(i: INTEGER);
 BEGIN
  Yield(i DIV 2);
 END YieldHalfOf;
 PROCEDURE Triangular;
  (* Yields the triangular numbers, http://oeis.org/A000217 *)
  VAR n: INTEGER;
 BEGIN
  n := 0;
  LOOP
    YieldHalfOf(n * (n + 1));
    INC(n);
  END;
 END Triangular;
 PROCEDURE Pentagonal;
  (* Yields the pentagonal numbers, http://oeis.org/A000326 *)
  VAR n: INTEGER;
 BEGIN
  n := 0;
  LOOP
    YieldHalfOf(n * (3 * n - 1));
    INC(n);
  END;
 END Pentagonal;
 PROCEDURE Odious;
  (* Yields the odius numbers, http://oeis.org/A000069 *)
  VAR b, i, n: INTEGER;
 BEGIN
  n := 1;
  LOOP
    (* b := count bits in n *)
    b := 0;
    i := n;
    WHILE i # 0 DO
      INC(b, i MOD 2);
      i := i DIV 2;
    END;
    IF (b MOD 2) = 1 THEN
      Yield(n);
    END;
    INC(n);
  END;
 END Odious;
 TYPE
  Triple = ARRAY[1..3] OF INTEGER;
 PROCEDURE T(i1, i2, i3: INTEGER): Triple;
  VAR t: Triple;
 BEGIN
  t[1] := i1; t[2] := i2; t[3] := i3; RETURN t
 END T;
 CONST
  two28 = 268435456D;   (* 0x1000_0000 *)
 VAR
  a: ARRAY [0..9] OF Triple;
  tri, pen, odi: Generator;
  i, g1, g2, g3: INTEGER;
 BEGIN
  startLock := NewSema(1);
  ALLOCATE(tri, SIZE(GeneratorRecord));
  ALLOCATE(pen, SIZE(GeneratorRecord));
  ALLOCATE(odi, SIZE(GeneratorRecord));
  StartGenerator(tri, Triangular);
  StartGenerator(pen, Pentagonal);
  StartGenerator(odi, Odious);
  a[0] := T( 0,   0,  1);
  a[1] := T( 1,   1,  2);
  a[2] := T( 3,   5,  4);
  a[3] := T( 6,  12,  7);
  a[4] := T(10,  22,  8);
  a[5] := T(15,  35, 11);
  a[6] := T(21,  51, 13);
  a[7] := T(28,  70, 14);
  a[8] := T(36,  92, 16);
  a[9] := T(45, 117, 19);
  FOR i := 0 TO INTEGER(9) DO
    g1 := Resume(tri);
    g2 := Resume(pen);
    g3 := Resume(odi);
    IF g1 # a[i][1] THEN fail(1D * two28 + LONG(a[i][1])) END;
    IF g2 # a[i][2] THEN fail(2D * two28 + LONG(a[i][2])) END;
    IF g3 # a[i][3] THEN fail(3D * two28 + LONG(a[i][3])) END;
  END;
  finished;
 END SemaTest.
--- a/tests/plat/m2/Set100_mod.mod
+++ b/tests/plat/m2/Set100_mod.mod
@ -0,0 +1,61 @@
 (*
 * Operates on sets of 100 integers.  The compiler emits, and the back
 * end must implement, the EM instructions for large sets.
 *)
 MODULE Set100;
 FROM Test IMPORT fail, finished;
 (* Asserts condition or fails with code. *)
 PROCEDURE A(cond: BOOLEAN; code: INTEGER);
 BEGIN
  IF NOT cond THEN fail(code) END
 END A;
 TYPE
  Num = [1..100];
  NumSet = SET OF Num;
 VAR
  (* VAR, not CONST, so compiler can't do constant operations. *)
  primes, teens, lowevens, eighties, nineties: NumSet;
 CONST
  (* These are the expected results of some set operations. *)
  primeteen = NumSet{13, 17, 19};
  compeighties = NumSet{80..82, 84..88};
  teenxoreven = NumSet{2, 4, 6, 8, 10, 12, 13, 15, 17, 19, 20};
  eightiesnineties = NumSet{80..99};
 (* Checks that some set is equal to the expected result.  Also checks
 * that the set is not equal to the other sets. *)
 PROCEDURE Check(set: NumSet; what: INTEGER);
  VAR hi: INTEGER;
 BEGIN
  hi := what * 100H;
  (* The compiler uses cms in EM to check set equality. *)
  A((what = 1) = (set = primeteen), hi + 1);
  A((what = 2) = (set = compeighties), hi + 2);
  A((what = 3) = (set = teenxoreven), hi + 3);
  A((what = 4) = (set = eightiesnineties), hi + 4);
 END Check;
 PROCEDURE Range(min: Num; max: Num): NumSet;
 BEGIN
  (* The compiler calls LtoUset in lang/m2/libm2/LtoUset.e *)
  RETURN NumSet{min..max}
 END Range;
 BEGIN
  primes := NumSet{2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43,
                   47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97};
  teens := NumSet{13, 14, 15, 16, 17, 18, 19};
  lowevens := NumSet{2, 4, 6, 8, 10, 12, 14, 16, 18, 20};
  eighties := Range(80, 89);
  nineties := Range(90, 99);
  Check(primes * teens, 1);
  Check(eighties - primes, 2);
  Check(teens / lowevens, 3);
  Check(eighties + nineties, 4);
  finished;
 END Set100.
--- a/tests/plat/m2/StringTest_mod.mod
+++ b/tests/plat/m2/StringTest_mod.mod
@ -0,0 +1,55 @@
 MODULE StringTest;
 FROM Strings IMPORT
  Assign, Insert, Delete, Pos, Copy, Concat, Length, CompareStr;
 FROM Test IMPORT fail, finished;
 (* Asserts condition or fails with code. *)
 PROCEDURE A(cond: BOOLEAN; code: INTEGER);
 BEGIN
  IF NOT cond THEN fail(code) END
 END A;
 VAR
  small: ARRAY [0..3] OF CHAR;
  big: ARRAY [0..99] OF CHAR;
 BEGIN
  (* CompareStr *)
  A(CompareStr("ablaze", "ablaze") = 0, 1);
  A(CompareStr("ablaze", "abloom") < 0, 2);
  A(CompareStr("abloom", "ablaze") > 0, 3);
  A(CompareStr("abloom", "abloom") = 0, 4);
  (* Assign, Insert, Delete *)
  Assign("obsequiosity", small);
  A(CompareStr("obsequiosity", small) > 0, 11H);
  Assign("obsequiosity", big);
  A(CompareStr("obsequiosity", big) = 0, 12H);
  A(big[11] = 'y', 13H);
  A(big[11] # 0C, 14H);
  A(big[12] # 'y', 15H);
  A(big[12] = 0C, 16H);
  Insert(" omnihuman", big, 9);
  A(CompareStr("obsequios omnihumanity", big) = 0, 17H);
  Delete(big, 6, 15);
  A(CompareStr("obsequy", big) = 0, 18H);
  (* Pos, Concat *)
  Assign("Now is the time for all good men to come...", big);
  A(Pos("w", big) = 2, 21H);
  A(Pos("t", big) = 7, 22H);
  A(Pos("ti", big) = 11, 23H);
  A(Pos("men", big) = 29, 24H);
  A(Pos("women", big) > 42, 25H);
  Copy(big, 29, 2, small);
  A(CompareStr("me", small) = 0, 26H);
  (* Concat, Length *)
  Concat("pictorial", "ist", big);
  A(CompareStr("pictorialist", big) = 0, 31H);
  A(Length(big) = 12, 32H);
  Concat("zit", "her", small);
  A(CompareStr("zither", small) > 0, 33H);
  A(Length(small) < 5, 34H);
  finished;
 END StringTest.
--- a/tests/plat/rck_e.e
+++ b/tests/plat/rck_e.e
@ -0,0 +1,186 @@
 #
    mes 2, EM_WSIZE, EM_PSIZE
 /*
 * Uses _rck_ for range checks.  Catches the EM trap if a value is out
 * of range, and continues with the next instruction after _rck_.
 *
 * Some back ends, like i80, ignore _rck_, so this test fails.
 */
 testnr
    con 1         ; test number
 caught
    con 0         ; number of caught traps
    inp $next
    inp $catch
    inp $never
    exp $_m_a_i_n
    pro $_m_a_i_n,0
    lim           ; load ignore mask
    loc 2
    and EM_WSIZE  ; check bit 1 << ERANGE
    zeq *1        ; fail if ignoring ERANGE
 .1
    rom 1I4
    lae .1
    loi 4
    cal $fail
    asp 4
 1
    cal $next     ; increment testnr, catch next trap
    loc 10125
 .2
    rom 4283, 13644
    lae .2
    rck EM_WSIZE  ; testnr 2 in range
    asp EM_WSIZE
    cal $next
    loc 4282
    lae .2
    rck EM_WSIZE  ; testnr 3 out of range
    asp EM_WSIZE
    cal $next
    loc 4283
    lae .2
    rck EM_WSIZE  ; testnr 4 in range
    asp EM_WSIZE
    cal $next
    loc 13644
    lae .2
    rck EM_WSIZE  ; testnr 5 in range
    asp EM_WSIZE
    cal $next
    loc 13655
    lae .2
    rck EM_WSIZE  ; testnr 6 out of range
    asp EM_WSIZE
    cal $next
    loc -13015
 .7
    rom -31344, -1898
    lae .7
    rck EM_WSIZE  ; testnr 7 in range
    asp EM_WSIZE
    cal $next
    loc 8580
 .8
    rom -26315, 4588
    lae .8
    rck EM_WSIZE  ; testnr 8 out of range
    asp EM_WSIZE
    ; The last test raised a trap, so now there is no trap handler.
    lpi $never
    sig           ; push old trap handler
    loc 0
    loc EM_WSIZE
    loc EM_PSIZE
    cuu           ; push NULL pointer
    cmp
    zeq *17       ; fail unless old handler is NULL
 .17
    rom 17I4
    lae .17
    loi 4
    cal $fail
    asp 4
 17
    ; Change the trap handler from $never to $catch.
    lpi $catch
    sig
    lpi $never
    cmp
    zeq *18
 .18
    rom 18I4
    lae .18
    loi 4
    cal $fail
    asp 4
 18
    ; Begin ignoring range traps.
    loc 2         ; 1 << ERANGE
    sim
    loc 18
    ste testnr
    loc 8580
    lae .8
    rck EM_WSIZE  ; testnr 18 out of range but ignored
    ; Fail if we caught the wrong number of traps.
    loe caught
    loc 3
    beq *20
 .20
    rom 20I4
    lae .20
    loi 4
    cal $fail
    asp 4
 20
    cal $finished
    end
    pro $next,0
    ine testnr    ; next test
    lpi $catch
    sig           ; catch next EM trap (only one trap)
    asp EM_PSIZE
    ret 0
    end
    pro $catch,0
    ine caught    ; count this trap
    lol 0         ; load trap number
    loc 1
    beq *1        ; fail if trap != ERANGE
 .101
    rom 257I4
    lae .101
    loi 4
    cal $fail
    ; Wrong type of trap.  _rtt_ might not work, so exit now.
    cal $finished
 1
    ; Fail if the wrong test raised this trap.
    loe testnr
    loc 3
    beq *2
    loe testnr
    loc 6
    beq *2
    loe testnr
    loc 8
    beq *2
    loc 256
    loe testnr
    adi EM_WSIZE  ; 0x100 + testnr
    loc EM_WSIZE
    loc 4
    cuu
    cal $fail
    asp 4
 2
    rtt           ; return from trap handler
    end
    pro $never,0
 .200
    rom 200I4
    lae .200
    loi 4
    cal $fail
    asp 4
    rtt
    end
--- a/tests/plat/rotate_e.e
+++ b/tests/plat/rotate_e.e
@ -0,0 +1,223 @@
 #
    mes 2, EM_WSIZE, EM_PSIZE
 /*
 * Tests _rol_ (rotate left) and _ror_ (rotate right).  Several back
 * ends provide _rol_ and _ror_, but as of year 2017, the compilers
 * and optimizers had never emit _rol_ nor _ror_.
 *
 * By tradition, _rol_ and _ror_ can't rotate values shorter than the
 * word size, or longer than 4 bytes.
 *  - If word size is 2, then try rotating 2-byte and 4-byte values.
 *  - If word size is 4, then try rotating 4-byte values.
 *
 * You can cheat this test if _cmu_ always pushes zero.
 */
 #if EM_WSIZE == 2
 #define LEN2  4
    exa table2
    exa left2
    exa right2
 table2         /* left, right */
    con 12715U2  /*  0,  0 */
    con 25430U2  /*  1, 15 */
    con 43825U2  /*  8,  8 */
    con 39125U2  /* 15,  1 */
 left2
    con 0I2, 1I2, 8I2, 15I2
 right2
    con 0I2, 15I2, 8I2, 1I2
 #endif
 #define LEN4  4
    exa table4
    exa left4
    exa right4
 table4              /* left, right */
    con  437223536U4  /*  0,  0 */
    con  874447072U4  /*  1, 31 */
    con 2154830351U4  /* 16, 16 */
    con  218611768U4  /* 31,  1 */
 left4
    con 0I2, 1I2, 16I2, 31I2
 right4
    con 0I2, 31I2, 16I2, 1I2
    exa val4
    exa val4left7
    exa val4right11
 val4
    con 4283808839U4
 val4left7
    con 2866684927U4
 val4right11
    con 2298473143U4
    exp $_m_a_i_n
    pro $_m_a_i_n, EM_WSIZE
 #define i -EM_WSIZE
 #if EM_WSIZE == 2
    /*
     * Loop for LEN2 items in table2.
     */
    loc 0
    stl i
 1
    lae table2
    loi 2         /* value to rotate */
    lae left2
    lol i
    loc 1
    sli EM_WSIZE
    ads EM_WSIZE
    loi 2         /* left distance */
    rol 2         /* rotate left */
    lae table2
    lol i
    loc 1
    sli EM_WSIZE
    ads EM_WSIZE
    loi 2         /* expected result */
    cmu 2
    zeq *2
    loc __LINE__
    loc EM_WSIZE
    loc 4
    cuu
    cal $fail
    asp 4
 2
    lae table2
    loi 2         /* value to rotate */
    lae right2
    lol i
    loc 1
    sli EM_WSIZE
    ads EM_WSIZE
    loi 2         /* right distance */
    ror 2         /* rotate right */
    lae table2
    lol i
    loc 1
    sli EM_WSIZE
    ads EM_WSIZE
    loi 2         /* expected result */
    cmu 2
    zeq *3
    loc __LINE__
    loc EM_WSIZE
    loc 4
    cuu
    cal $fail
    asp 4
 3
    inl i         /* loop LEN2 times */
    lol i
    loc LEN2
    blt *1
 #endif /* EM_WSIZE == 2 */
    /*
     * Loop for LEN4 items in table4.
     */
    loc 0
    stl i
 4
    lae table4
    loi 4         /* value to rotate */
    lae left4
    lol i
    loc 1
    sli EM_WSIZE
    ads EM_WSIZE
    loi 2         /* left distance */
    loc 2
    loc EM_WSIZE
    cii
    rol 4         /* rotate left */
    lae table4
    lol i
    loc 2
    sli EM_WSIZE
    ads EM_WSIZE
    loi 4         /* expected result */
    cmu 4
    zeq *5
    loc __LINE__
    loc EM_WSIZE
    loc 4
    cuu
    cal $fail
    asp 4
 5
    lae table4
    loi 4         /* value to rotate */
    lae right4
    lol i
    loc 1
    sli EM_WSIZE
    ads EM_WSIZE
    loi 2         /* right distance */
    loc 2
    loc EM_WSIZE
    cii
    ror 4         /* rotate right */
    lae table4
    lol i
    loc 2
    sli EM_WSIZE
    ads EM_WSIZE
    loi 4         /* expected result */
    cmu 4
    zeq *6
    loc __LINE__
    loc EM_WSIZE
    loc 4
    cuu
    cal $fail
    asp 4
 6
    inl i         /* loop LEN4 times */
    lol i
    loc LEN4
    blt *4
    /*
     * Rotate 4-byte values by a constant distance, because this uses
     * different rules in PowerPC ncg.
     */
    lae val4
    loi 4
    loc 7
    rol 4         /* rotate left by 7 bits */
    lae val4left7
    loi 4
    cmu 4
    zeq *7
    loc __LINE__
    loc EM_WSIZE
    loc 4
    cuu
    cal $fail
    asp 4
 7
    lae val4
    loi 4
    loc 11
    ror 4         /* rotate right by 11 bits */
    lae val4right11
    loi 4
    cmu 4
    zeq *8
    loc __LINE__
    loc EM_WSIZE
    loc 4
    cuu
    cal $fail
    asp 4
 8
    cal $finished
    end
--- a/tests/plat/setjmp_c.c
+++ b/tests/plat/setjmp_c.c
@ -0,0 +1,58 @@
 #include <setjmp.h>
 #include "test.h"
 /*
 * Sets i = 2 * i for each i in nums, until i == 0, but stops if
 * 2 * i >= 1000.
 *
 * Uses setjmp() and longjmp() in libc.  For ACK's libc, the back end
 * must provides EM's _gto_, and _gto_ must preserve the function
 * return area.
 */
 int nums1[]         = { 79, 245, 164, 403, 0};
 const int expect1[] = {158, 490, 328, 806, 0};
 int nums2[]         = {20, 221, 411, 643, 48, 272, 448, 0};
 const int expect2[] = {40, 442, 822, 643, 48, 272, 448, 0};
 int nums3[]         = {371, 265, 500, 124, 117, 0};
 const int expect3[] = {742, 530, 500, 124, 117, 0};
 int docount = 0;
 int twice(int i, jmp_buf esc) {
 	if (i >= 500)
 		longjmp(esc, i);
 	return 2 * i;
 }
 void donums(int *nums, jmp_buf esc) {
 	int *p;
 	docount++;
 	for (p = nums; *p != 0; p++) {
 		*p = twice(*p, esc);
 	}
 }
 int cknums(int *nums, const int *expect) {
 	jmp_buf env;
 	int ret;
 	ret = setjmp(env);
 	if (ret == 0)
 		donums(nums, env);
 	for (;;) {
 		ASSERT(*nums == *expect);
 		if (*expect == 0)
 			break;
 		nums++;
 		expect++;
 	}
 	return ret;
 }
 int main(void) {
 	ASSERT(cknums(nums1, expect1) == 0);
 	ASSERT(cknums(nums2, expect2) == 643);
 	ASSERT(cknums(nums3, expect3) == 500);
 	ASSERT(docount == 3);
 	finished();
 }
--- a/tests/plat/structcopy_e.c
+++ b/tests/plat/structcopy_e.c
@ -0,0 +1,113 @@
 #include "test.h"
 /* ACK's C compiler uses EM's loi, sti, blm, or an inline loop to copy
 * these structs.  The compiler doesn't call memcpy() or other
 * functions in libc, so this test passes without linking the CRT.
 */
 struct c5 {       /* not a whole number of words */
 	char one[5];
 };
 struct ii {       /* two words */
 	int one;
 	int two;
 };
 struct iii {      /* three words */
 	int one;
 	int two;
 	int three;
 };
 int equal5(char *a, char *b) {  /* a, b must have 5 characters */
 	int i;
 	for (i = 0; i < 5; i++)
 		if (a[i] != b[i]) return 0;
 	return 1;
 }
 struct c5 make_c5(char *str) {  /* str must have 5 characters */
 	struct c5 out;
 	int i;
 	for (i = 0; i < 5; i++)
 		out.one[i] = str[i];
 	return out;
 }
 struct ii make_ii(int i, int j) {
 	struct ii out;
 	out.one = i;
 	out.two = j;
 	return out;
 }
 struct iii make_iii(struct ii in, int k) {
 	struct iii out;
 	out.one = in.one;
 	out.two = in.two;
 	out.three = k;
 	return out;
 }
 struct c5 rotate_left_c5(struct c5 in) {
 	int i;
 	char c = in.one[0];
 	/* Modifies our copy of _in_, not caller's copy. */
 	for (i = 0; i < 4; i++)
 		in.one[i] = in.one[i + 1];
 	in.one[4] = c;
 	return in;
 }
 struct iii rotate_left_iii(struct iii in) {
 	int i = in.one;
 	/* Modifies our copy of _in_, not caller's copy. */
 	in.one = in.two;
 	in.two = in.three;
 	in.three = i;
 	return in;
 }
 /* Bypasses the CRT, so there's no stdio. */
 void _m_a_i_n(void) {
 	struct c5 earth, heart, dup_heart, rol_heart;
 	struct ii pair, dup_pair;
 	struct iii triple, dup_triple, rol_triple;
 	earth = make_c5("earth");
 	heart = make_c5("heart");
 	dup_heart = heart;
 	rol_heart = rotate_left_c5(heart);
 	ASSERT(equal5(earth.one, "earth"));
 	ASSERT(equal5(heart.one, "heart"));
 	ASSERT(equal5(dup_heart.one, "heart"));
 	ASSERT(equal5(rol_heart.one, "earth"));
 	pair = make_ii(29, 31);
 	dup_pair = pair;
 	triple = make_iii(pair, -9);
 	dup_triple = triple;
 	rol_triple = rotate_left_iii(triple);
 	ASSERT(pair.one == 29);
 	ASSERT(pair.two == 31);
 	ASSERT(dup_pair.one == 29);
 	ASSERT(dup_pair.two == 31);
 	ASSERT(triple.one == 29);
 	ASSERT(triple.two == 31);
 	ASSERT(triple.three == -9);
 	ASSERT(dup_triple.one == 29);
 	ASSERT(dup_triple.two == 31);
 	ASSERT(dup_triple.three == -9);
 	ASSERT(rol_triple.one == 31);
 	ASSERT(rol_triple.two == -9);
 	ASSERT(rol_triple.three == 29);
 	finished();
 }
--- a/util/ego/build.lua
+++ b/util/ego/build.lua
@ -3,6 +3,7 @@ local function build_ego(name)
 		name = name,
 		srcs = { "./"..name.."/*.c" },
 		deps = {
 			"./"..name.."/*.h",
 			"util/ego/share+lib",
 			"modules/src/em_data+lib",
 			"h+emheaders",
--- a/util/ego/ca/ca.c
+++ b/util/ego/ca/ca.c
@ -72,6 +72,7 @@ proc_p* p_out;
 			{
 				/* register message without arguments */
 				oldline(l);
 				continue;
 			}
 			else
 			{
--- a/util/ego/cs/cs.c
+++ b/util/ego/cs/cs.c
@ -25,7 +25,7 @@
 int Scs; /* Number of optimizations found. */
-STATIC cs_clear()
+STATIC void cs_clear()
 {
 	clr_avails();
 	clr_entities();
@ -74,9 +74,7 @@ STATIC void cs_optimize(void *vp)
 	}
 }
-main(argc, argv)
+int main(int argc, char *argv[])
 	int	argc;
 	char	*argv[];
 {
 	Scs = 0;
 	go(argc, argv, no_action, cs_optimize, cs_machinit, no_action);
--- a/util/ego/cs/cs.h
+++ b/util/ego/cs/cs.h
@ -88,12 +88,13 @@ struct occur {
 #define UNAIR_OP	6
 #define BINAIR_OP	7
 #define TERNAIR_OP	8
-#define KILL_ENTITY	9
+#define REMAINDER	9
-#define SIDE_EFFECTS	10
+#define KILL_ENTITY	10
-#define FIDDLE_STACK	11
+#define SIDE_EFFECTS	11
-#define IGNORE		12
+#define FIDDLE_STACK	12
-#define HOPELESS	13
+#define IGNORE		13
-#define BBLOCK_END	14
+#define HOPELESS	14
 #define BBLOCK_END	15
 struct avail {
 	avail_p	av_before;	/* Ptr to earlier discovered expressions. */
--- a/util/ego/cs/cs_aux.c
+++ b/util/ego/cs/cs_aux.c
@ -11,8 +11,7 @@
 #include "cs.h"
 #include "cs_entity.h"
-offset array_elemsize(vn)
+offset array_elemsize(valnum vn)
 	valnum vn;
 {
 	/* Vn is the valuenumber of an entity that points to
 	 * an array-descriptor. The third element of this descriptor holds
@ -36,14 +35,12 @@ offset array_elemsize(vn)
 	return aoff(enp->en_ext->o_dblock->d_values, 2);
 }
-occur_p occ_elem(i)
+occur_p occ_elem(Lindex i)
 	Lindex i;
 {
 	return (occur_p) Lelem(i);
 }
-entity_p en_elem(i)
+entity_p en_elem(Lindex i)
 	Lindex i;
 {
 	return (entity_p) Lelem(i);
 }
@ -54,14 +51,14 @@ entity_p en_elem(i)
 STATIC valnum val_no;
-valnum newvalnum()
+valnum newvalnum(void)
 {
 	/* Return a completely new value number. */
 	return ++val_no;
 }
-start_valnum()
+void start_valnum(void)
 {
 	/* Restart value numbering. */
--- a/util/ego/cs/cs_aux.h
+++ b/util/ego/cs/cs_aux.h
@ -3,28 +3,28 @@
 * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
 * See the copyright notice in the ACK home directory, in the file "Copyright".
 */
-extern offset	array_elemsize();	/* (valnum vm)
+extern offset	array_elemsize(valnum vm);
 					/*
 					 * Returns the size of array-elements,
 					 * if vn is the valuenumber of the
 					 * address of an array-descriptor.
 					 */
-extern occur_p	occ_elem();		/* (Lindex i)
+extern occur_p	occ_elem(Lindex i);	/*
 					 * Returns a pointer to the occurrence
 					 * of which i is an index in a set.
 					 */
-extern entity_p	en_elem();		/* (Lindex i)
+extern entity_p	en_elem(Lindex i);	/*
 					 * Returns a pointer to the entity
 					 * of which i is an index in a set.
 					 */
-extern valnum	newvalnum();		/* ()
+extern valnum	newvalnum(void);	/*
 					 * Returns a completely new
 					 * value number.
 					 */
-extern		start_valnum();		/* ()
+extern void	start_valnum(void);	/*
 					 * Restart value numbering.
 					 */
--- a/util/ego/cs/cs_avail.c
+++ b/util/ego/cs/cs_avail.c
@ -22,8 +22,7 @@
 avail_p avails; /* The list of available expressions. */
-STATIC bool commutative(instr)
+STATIC bool commutative(int instr)
 	int instr;
 {
 	/* Is instr a commutative operator? */
@ -37,9 +36,7 @@ STATIC bool commutative(instr)
 	}
 }
-STATIC bool same_avail(kind, avp1, avp2)
+STATIC bool same_avail(byte kind, avail_p avp1, avail_p avp2)
 	byte kind;
 	avail_p avp1, avp2;
 {
 	/* Two expressions are the same if they have the same operator,
 	 * the same size, and their operand(s) have the same value. 
@ -57,6 +54,7 @@ STATIC bool same_avail(kind, avp1, avp2)
 		case UNAIR_OP:
 			return	avp1->av_operand == avp2->av_operand;
 		case BINAIR_OP:
 		case REMAINDER:
 			if (commutative(avp1->av_instr & BMASK))
 				return	avp1->av_oleft == avp2->av_oleft &&
 					avp1->av_oright == avp2->av_oright
@ -75,8 +73,7 @@ STATIC bool same_avail(kind, avp1, avp2)
 	/* NOTREACHED */
 }
-STATIC void check_local(avp)
+STATIC void check_local(avail_p avp)
 	avail_p avp;
 {
 	/* Check if the local in which the result of avp was stored,
 	 * still holds this result. Update if not.
@ -89,9 +86,7 @@ STATIC void check_local(avp)
 	}
 }
-STATIC entity_p result_local(size, l)
+STATIC entity_p result_local(offset size, line_p l)
 	offset size;
 	line_p l;
 {
 	/* If the result of an expression of size bytes is stored into a
 	 * local for which a registermessage was generated, return a pointer
@ -114,9 +109,7 @@ STATIC entity_p result_local(size, l)
 	return (entity_p) 0;
 }
-STATIC copy_avail(kind, src, dst)
+STATIC void copy_avail(int kind, avail_p src, avail_p dst)
 	int kind;
 	avail_p src, dst;
 {
 	/* Copy some attributes from src to dst. */
@ -132,6 +125,7 @@ STATIC copy_avail(kind, src, dst)
 			dst->av_operand = src->av_operand;
 			break;
 		case BINAIR_OP:
 		case REMAINDER:
 			dst->av_oleft = src->av_oleft;
 			dst->av_oright = src->av_oright;
 			break;
@ -143,10 +137,7 @@ STATIC copy_avail(kind, src, dst)
 	}
 }
-avail_p av_enter(avp, ocp, kind)
+avail_p av_enter(avail_p avp, occur_p ocp, int kind)
 	avail_p avp;
 	occur_p ocp;
 	int kind;
 {
 	/* Put the available expression avp in the list,
 	 * if it is not already there.
@ -171,7 +162,8 @@ avail_p av_enter(avp, ocp, kind)
 	/* Remember local, if any, that holds result. */
 	if (avp->av_instr != (byte) INSTR(last)) {
 		/* Only possible when instr is the implicit AAR in 
-		 * a LAR or SAR.
+		 * a LAR or SAR, or the implicit DVI in an RMI, or
 		 * DVU in RMU.
 		 */
 		ravp->av_saveloc = (entity_p) 0;
 	} else {
@ -186,7 +178,7 @@ avail_p av_enter(avp, ocp, kind)
 	return ravp;
 }
-clr_avails()
+void clr_avails(void)
 {
 	/* Throw away the information about the available expressions. */
--- a/util/ego/cs/cs_avail.h
+++ b/util/ego/cs/cs_avail.h
@ -5,7 +5,8 @@
 */
 extern avail_p	avails;		/* The set of available expressions. */
-extern avail_p	av_enter();	/* (avail_p avp, occur_p ocp, byte kind)
+extern avail_p	av_enter(avail_p avp, occur_p ocp, byte kind);
 				/*
 				 * Puts the available expression in avp
 				 * in the list of available expressions,
 				 * if it is not already there. Add ocp to set of
@ -18,6 +19,7 @@ extern avail_p	av_enter();	/* (avail_p avp, occur_p ocp, byte kind)
 				 * Returns a pointer into the list.
 				 */
-extern		clr_avails();	/* Release all space occupied by the old list
+extern void	clr_avails(void);
 				/* Release all space occupied by the old list
 				 * of available expressions.
 				 */
--- a/Show more
+++ b/Show more