diff --git a/mach/i80/libem/cii.s b/mach/i80/libem/cii.s index 7d091da5e..bf4e7efb8 100644 --- a/mach/i80/libem/cii.s +++ b/mach/i80/libem/cii.s @@ -65,19 +65,19 @@ jmp 3f ! done !if destination size < source size only: -shrink: mov l,c ! load source size in hl +shrink: mov l,b ! load destination size in hl mvi h,0 dad sp mov d,h - mov e,l ! de points just above source - mov l,b ! load destination size in hl + mov e,l ! de points just above lowest bytes of source + mov l,c ! load source size in hl mvi h,0 dad sp ! hl points just above "destination" 1: dcx d ! move upwards dcx h - mov a,m - stax d + ldax d + mov m,a dcr b jnz 1b sphl diff --git a/mach/i80/libem/rol4.s b/mach/i80/libem/rol4.s index e5bb1a83d..41219ea0c 100644 --- a/mach/i80/libem/rol4.s +++ b/mach/i80/libem/rol4.s @@ -25,8 +25,8 @@ mov e,a mov a,b - ral -1: mov a,l +1: ral + mov a,l ral mov l,a mov a,h diff --git a/mach/i80/libem/ror4.s b/mach/i80/libem/ror4.s index e77d8a74c..490c75abc 100644 --- a/mach/i80/libem/ror4.s +++ b/mach/i80/libem/ror4.s @@ -25,8 +25,8 @@ mov e,a mov a,l - rar -1: mov a,b +1: rar + mov a,b rar mov b,a mov a,c diff --git a/mach/i80/ncg/table b/mach/i80/ncg/table index 575820c81..e6d7e02f6 100644 --- a/mach/i80/ncg/table +++ b/mach/i80/ncg/table @@ -385,8 +385,9 @@ gen dad de pat loi $1>=512 kills ALL -uses dereg={const2,$1} -gen Call {label,".loi"} +/* 'uses dereg={const2,$1}' fails to kill de. */ +gen lxi de,{const2,$1} + Call {label,".loi"} pat los $1==2 with dereg @@ -597,8 +598,8 @@ gen 1: pat sti kills ALL -uses dereg={const2,$1} -gen Call {label,".sti"} +gen lxi de,{const2,$1} + Call {label,".sti"} pat sts $1==2 with dereg @@ -702,23 +703,24 @@ gen Call {label,".mli4"} pat dvi $1==2 kills ALL -uses areg={const1,129} -gen Call {label,".dvi2"} yields de +/* 'uses areg={const1,129}' fails to kill a. */ +gen mvi a,{const1,129} + Call {label,".dvi2"} yields de pat dvi $1==4 kills ALL -uses areg={const1,129} -gen Call {label,".dvi4"} +gen mvi a,{const1,129} + Call {label,".dvi4"} pat rmi $1==2 kills ALL -uses areg={const1,128} -gen Call {label,".dvi2"} yields de +gen mvi a,{const1,128} + Call {label,".dvi2"} yields de pat rmi $1==4 kills ALL -uses areg={const1,128} -gen Call {label,".dvi4"} +gen mvi a,{const1,128} + Call {label,".dvi4"} pat ngi $1==2 with hl_or_de @@ -738,7 +740,7 @@ pat loc sli ($1 == 8) && ($2 == 2) with hl_or_de gen move %1.2, %1.1 mvi %1.2, {const1,0} yields %1 - + pat sli $1==2 kills ALL gen Call {label,".sli2"} yields de @@ -749,13 +751,13 @@ gen Call {label,".sli4"} pat sri $1==2 kills ALL -uses areg={const1,1} -gen Call {label,".sri2"} yields de +gen mvi a,{const1,1} + Call {label,".sri2"} yields de pat sri $1==4 kills ALL -uses areg={const1,1} -gen Call {label,".sri4"} +gen mvi a,{const1,1} + Call {label,".sri4"} /********************************************/ /* Group 4: Unsigned arithmetic */ @@ -775,23 +777,23 @@ gen Call {label,".mli4"} pat dvu $1==2 kills ALL -uses areg={const1,1} -gen Call {label,".dvi2"} yields de +gen mvi a,{const1,1} + Call {label,".dvi2"} yields de pat dvu $1==4 kills ALL -uses areg={const1,1} -gen Call {label,".dvi4"} +gen mvi a,{const1,1} + Call {label,".dvi4"} pat rmu $1==2 kills ALL -uses areg={const1,0} -gen Call {label,".dvi2"} yields de +gen mvi a,{const1,0} + Call {label,".dvi2"} yields de pat rmu $1==4 kills ALL -uses areg={const1,0} -gen Call {label,".dvi4"} +gen mvi a,{const1,0} + Call {label,".dvi4"} pat slu leaving sli $1 @@ -799,16 +801,16 @@ pat loc sru ($1 == 8) && ($2 == 2) with hl_or_de gen move %1.1, %1.2 mvi %1.1, {const1,0} yields %1 - + pat sru $1==2 kills ALL -uses areg={const1,0} -gen Call {label,".sri2"} yields de +gen mvi a,{const1,0} + Call {label,".sri2"} yields de pat sru $1==4 kills ALL -uses areg={const1,0} -gen Call {label,".sri4"} +gen mvi a,{const1,0} + Call {label,".sri4"} /********************************************/ @@ -1047,8 +1049,8 @@ with hlreg pat cii kills ALL -uses areg={const1,1} -gen Call {label,".cii"} +gen mvi a,{const1,1} + Call {label,".cii"} pat loc loc ciu leaving loc $1 loc $2 cuu pat loc loc cui leaving loc $1 loc $2 cuu @@ -1081,8 +1083,8 @@ with hl_or_de pat cuu kills ALL -uses areg={const1,0} -gen Call {label,".cii"} +gen mvi a,{const1,0} + Call {label,".cii"} pat cfi kills ALL @@ -1128,8 +1130,8 @@ gen mov a,%1.2 pat and defined($1) kills ALL -uses dereg={const2,$1} -gen Call {label,".and"} +gen lxi de,{const2,$1} + Call {label,".and"} pat and !defined($1) with dereg @@ -1156,8 +1158,8 @@ gen mov a,%1.2 pat ior defined($1) kills ALL -uses dereg={const2,$1} -gen Call {label,".ior"} +gen lxi de,{const2,$1} + Call {label,".ior"} pat ior !defined($1) with dereg @@ -1184,8 +1186,8 @@ gen mov a,%1.2 pat xor defined($1) kills ALL -uses dereg={const2,$1} -gen Call {label,".xor"} +gen lxi de,{const2,$1} + Call {label,".xor"} pat xor !defined($1) with dereg @@ -1204,8 +1206,8 @@ gen mov a,%1.2 pat com defined($1) kills ALL -uses dereg={const2,$1} -gen Call {label,".com"} +gen lxi de,{const2,$1} + Call {label,".com"} pat com !defined($1) with dereg @@ -1269,8 +1271,8 @@ gen Call {label,".inn2"} yields de pat inn defined($1) kills ALL -uses dereg={const2,$1} -gen Call {label,".inn"} yields de +gen lxi de,{const2,$1} + Call {label,".inn"} yields de pat inn !defined($1) with dereg @@ -1284,8 +1286,8 @@ gen Call {label,".set2"} yields de pat set defined($1) kills ALL -uses dereg={const2,$1} -gen Call {label,".set"} +gen lxi de,{const2,$1} + Call {label,".set"} pat set !defined($1) with dereg @@ -1402,8 +1404,8 @@ pat cmi $1==2 leaving sbi 2 pat cmi $1==4 kills ALL -uses areg={const1,1} -gen Call {label,".cmi4"} yields de +gen mvi a,{const1,1} + Call {label,".cmi4"} yields de pat cmf $1==4 kills ALL @@ -1412,14 +1414,14 @@ gen Call {label,".cmf4"} pat cmf $1==8 kills ALL gen Call {label,".cmf8"} - + pat cmu $1==2 with hl_or_de hl_or_de uses areg gen mov a,%2.1 cmp %1.1 jz {label,2f} - jc {label,1f} + jc {label,1f} 0: lxi %2,{const2,1} jmp {label,3f} @@ -1436,15 +1438,15 @@ gen mov a,%2.1 pat cmu $1==4 kills ALL -uses areg={const1,0} -gen Call {label,".cmi4"} yields de +gen mvi a,{const1,0} + Call {label,".cmi4"} yields de pat cms $1==2 leaving cmi 2 pat cms defined($1) kills ALL -uses dereg={const2,$1} -gen Call {label,".cms"} yields de +gen lxi de,{const2,$1} + Call {label,".cms"} yields de pat cms !defined($1) with dereg @@ -1936,8 +1938,8 @@ gen dad sp pat blm kills ALL -uses dereg={const2,$1} -gen Call {label,".blm"} +gen lxi de,{const2,$1} + Call {label,".blm"} pat bls with dereg @@ -1962,8 +1964,8 @@ with src1or2 src1or2 yields %2 %1 %2 %1 pat dup kills ALL -uses dereg={const2,$1} -gen Call {label,".dup"} +gen lxi de,{const2,$1} + Call {label,".dup"} pat dus $1==2 with dereg @@ -1975,8 +1977,8 @@ with src1or2 src1or2 yields %1 %2 pat exg defined($1) kills ALL -uses dereg={const2,1} -gen Call {label,".exg"} +gen lxi de,{const2,$1} + Call {label,".exg"} pat fil uses hlreg={label,$1} diff --git a/mach/i86/ncg/table b/mach/i86/ncg/table index ffbd7101e..ce2ac7b87 100644 --- a/mach/i86/ncg/table +++ b/mach/i86/ncg/table @@ -2292,7 +2292,7 @@ with CXREG REG REG rcl %3,{ANYCON,1} adc %2,{ANYCON,0} loop {label, 2b} - 1: + 1: yields %3 %2 pat loc ror $1==1 && $2==2 with REG @@ -2311,7 +2311,7 @@ with CXREG REG REG rcl %3,{ANYCON,1} adc %2,{ANYCON,0} loop {label, 2b} - 1: + 1: yields %3 %2 /******************************************************************* * Group 10 : Set Instructions * diff --git a/mach/powerpc/as/mach2.c b/mach/powerpc/as/mach2.c index 4065334e6..e8e61ea0c 100644 --- a/mach/powerpc/as/mach2.c +++ b/mach/powerpc/as/mach2.c @@ -47,11 +47,15 @@ %token OP_FRT_FRB_C %token OP_FRT_RA_D %token OP_FRT_RA_RB +%token OP_L %token OP_LEV %token OP_LIA %token OP_LIL %token OP_LI32 +%token OP_RA_RB +%token OP_RA_RB_TH %token OP_RA_RS_C +%token OP_RA_RS_RA_C %token OP_RA_RS_RB_C %token OP_RA_RS_RB_MB5_ME5_C %token OP_RA_RS_RB_MB6_C @@ -61,14 +65,14 @@ %token OP_RA_RS_SH6_MB6_C %token OP_RA_RS_UI %token OP_RA_RS_UI_CC +%token OP_RS %token OP_RS_FXM %token OP_RS_RA %token OP_RS_RA_D %token OP_RS_RA_DS %token OP_RS_RA_NB %token OP_RS_RA_RB -%token OP_RS_RA_RB_C -%token OP_RS_RA_RA_C +%token OP_RS_RA_RB_CC %token OP_RS_RB %token OP_RS_SPR %token OP_RS_SR @@ -104,4 +108,5 @@ %type c %type e16 negate16 u8 u7 u6 u5 u4 u2 u1 -%type opt_bh cr_opt nb ds bda bdl lia lil spr_num +%type opt_bh cr_opt nb ds bda bdl lia lil +%type spr_num tbr_num opt_tbr diff --git a/mach/powerpc/as/mach3.c b/mach/powerpc/as/mach3.c index 91b088a6a..99507087d 100644 --- a/mach/powerpc/as/mach3.c +++ b/mach/powerpc/as/mach3.c @@ -103,6 +103,10 @@ 0, OP_HA, 0, "ha16", 0, OP_LO, 0, "lo16", +/* The next page numbers are from PowerPC User Instruction Set + * Architecture, Book I, Version 2.01. + */ + /* Branch processor instructions (page 20) */ 0, OP_LIL, 18<<26 | 0<<1 | 0<<0, "b", @@ -128,7 +132,7 @@ 0, OP_BT_BA_BB, 19<<26 | 417<<1, "crorc", 0, OP_BF_BFA, 19<<26 | 0<<1, "mcrf", -/* extended mnemonics for bc, bcctr, bclr */ +/* extended mnemonics for bc, bcctr, bclr (page 144) */ 0, OP_BH, 19<<26 | 20<<21 | 528<<1 | 0<<0, "bctr", 0, OP_BH, 19<<26 | 20<<21 | 528<<1 | 1<<0, "bctrl", 0, OP_BDL, 16<<26 | 16<<21 | 0<<1 | 0<<0, "bdnz", @@ -186,7 +190,7 @@ 0, OP_BI_BH, 19<<26 | 12<<21 | 16<<1 | 0<<0, "btlr", 0, OP_BI_BH, 19<<26 | 12<<21 | 16<<1 | 1<<0, "btlrl", -/* extended m with condition in BI */ +/* extended m with condition in BI (page 146) */ 0, OP_BICR_BDL, 16<<26 | 12<<21 | 2<<16 | 0<<1 | 0<<0, "beq", 0, OP_BICR_BDA, 16<<26 | 12<<21 | 2<<16 | 1<<1 | 0<<0, "beqa", 0, OP_BICR_BH, 19<<26 | 12<<21 | 2<<16 | 528<<1 | 0<<0, "beqctr", @@ -284,7 +288,7 @@ 0, OP_BICR_BH, 19<<26 | 12<<21 | 3<<16 | 16<<1 | 0<<0, "bunlr", 0, OP_BICR_BH, 19<<26 | 12<<21 | 3<<16 | 16<<1 | 1<<0, "bunlrl", -/* extended m for cr logic */ +/* extended m for cr logic (page 147) */ 0, OP_BT_BT_BT, 19<<26 | 289<<1, "crset", 0, OP_BT_BT_BT, 19<<26 | 193<<1, "crclr", 0, OP_BT_BA_BA, 19<<26 | 449<<1, "crmove", @@ -377,12 +381,12 @@ 0, OP_RT_RA_C, 31<<26 | 0<<10 | 104<<1, "neg", 0, OP_RT_RA_C, 31<<26 | 1<<10 | 104<<1, "nego", -/* extended m for addition */ +/* extended m for addition (pages 153, 154) */ 0, OP_RT_RA_D, 14<<26, "la", 0, OP_RT_SI, 14<<26 | 0<<16, "li", 0, OP_RT_SI, 15<<26 | 0<<16, "lis", -/* extended m for subtraction */ +/* extended m for subtraction (pages 147, 148) */ 0, OP_RT_RB_RA_C, 31<<26 | 0<<10 | 40<<1, "sub", 0, OP_RT_RB_RA_C, 31<<26 | 1<<10 | 40<<1, "subo", 0, OP_RT_RB_RA_C, 31<<26 | 0<<10 | 8<<1, "subc", @@ -418,7 +422,7 @@ 0, OP_BF_L_RA_UI, 10<<26, "cmpli", 0, OP_BF_L_RA_RB, 31<<26 | 32<<1, "cmpl", -/* extended m for comparison */ +/* extended m for comparison (page 149) */ 0, OP_BF_RA_SI, 11<<26 | 1<<21, "cmpdi", 0, OP_BF_RA_RB, 31<<26 | 1<<21 | 0<<1, "cmpd", 0, OP_BF_RA_UI, 10<<26 | 1<<21, "cmpldi", @@ -434,7 +438,7 @@ 0, OP_TO_RA_RB, 31<<26 | 68<<1, "td", 0, OP_TO_RA_RB, 31<<26 | 4<<1, "tw", -/* extended m for traps */ +/* extended m for traps (page 150) */ 0, OP_TOX_RA_RB, 31<<26 | 4<<21 | 68<<1, "tdeq", 0, OP_TOX_RA_SI, 2<<26 | 4<<21, "tdeqi", 0, OP_TOX_RA_RB, 31<<26 | 12<<21 | 68<<1, "tdge", @@ -518,11 +522,10 @@ 0, OP_RA_RS_C, 31<<26 | 58<<1, "cntlzd", 0, OP_RA_RS_C, 31<<26 | 26<<1, "cntlzw", -/* extended m using logic */ -0, OP_RS_RA_RA_C, 31<<26 | 444<<1, "mr", +/* extended m using logic (pages 153, 154) */ +0, OP_RA_RS_RA_C, 31<<26 | 444<<1, "mr", 0, OP, 24<<26, "nop", -0, OP_RS_RA_RA_C, 31<<26 | 124<<1, "not", -0, OP, 26<<26, "xnop", +0, OP_RA_RS_RA_C, 31<<26 | 124<<1, "not", /* page 69 */ 0, OP_RA_RS_SH6_MB6_C, 30<<26 | 0<<2, "rldicl", @@ -535,7 +538,7 @@ 0, OP_RA_RS_SH6_MB6_C, 30<<26 | 3<<2, "rldimi", 0, OP_RA_RS_SH5_MB5_ME5_C, 20<<26, "rlwimi", -/* extended m for doubleword rotation */ +/* extended m for doubleword rotation (page 151) */ 0, OP_clrlsldi, 30<<26 | 2<<2, "clrlsldi", 0, OP_clrldi, 30<<26 | 0<<2, "clrldi", 0, OP_clrrdi, 30<<26 | 1<<2, "clrrdi", @@ -548,7 +551,7 @@ 0, OP_sldi, 30<<26 | 1<<2, "sldi", 0, OP_srdi, 30<<26 | 0<<2, "srdi", -/* extended m for word rotation */ +/* extended m for word rotation (page 152) */ 0, OP_clrlslwi, 21<<26, "clrlslwi", 0, OP_clrlwi, 21<<26, "clrlwi", 0, OP_clrrwi, 21<<26, "clrrwi", @@ -573,21 +576,25 @@ 0, OP_RA_RS_RB_C, 31<<26 | 792<<1, "sraw", /* page 78 */ -0, OP_RS_SPR, 31<<26 | 467<<1, "mtspr", -0, OP_RT_SPR, 31<<26 | 339<<1, "mfspr", -0, OP_RS_FXM, 31<<26 | 0<<21 | 144<<1, "mtcrf", -0, OP_RT, 31<<26 | 0<<21 | 19<<1, "mfcr", +0, OP_RS_SPR, 31<<26 | 467<<1, "mtspr", +0, OP_RT_SPR, 31<<26 | 339<<1, "mfspr", +0, OP_RS_FXM, 31<<26 | 0<<20 | 144<<1, "mtcrf", +0, OP_RT, 31<<26 | 0<<20 | 19<<1, "mfcr", -/* extended m for special purpose registers */ +/* extended m for special purpose registers (page 153) */ 0, OP_RT, 31<<26 | 9<<16 | 0<<11 | 339<<1, "mfctr", 0, OP_RT, 31<<26 | 8<<16 | 0<<11 | 339<<1, "mflr", 0, OP_RT, 31<<26 | 1<<16 | 0<<11 | 339<<1, "mfxer", -0, OP_RT, 31<<26 | 9<<16 | 0<<11 | 467<<1, "mtctr", -0, OP_RT, 31<<26 | 8<<16 | 0<<11 | 467<<1, "mtlr", -0, OP_RT, 31<<26 | 1<<16 | 0<<11 | 467<<1, "mtxer", +0, OP_RS, 31<<26 | 9<<16 | 0<<11 | 467<<1, "mtctr", +0, OP_RS, 31<<26 | 8<<16 | 0<<11 | 467<<1, "mtlr", +0, OP_RS, 31<<26 | 1<<16 | 0<<11 | 467<<1, "mtxer", + +/* extended m for condition register (page 154) */ +0, OP_RS, 31<<26 | 0<<20 | 255<<12 | 144<<1, "mtcr", /* Floating point instructions (page 83) */ +/* page 98 */ 0, OP_FRT_RA_D, 48<<26, "lfs", 0, OP_FRT_RA_RB, 31<<26 | 535<<1, "lfsx", 0, OP_FRT_RA_D, 49<<26, "lfsu", @@ -606,6 +613,7 @@ 0, OP_FRS_RA_RB, 31<<26 | 759<<1, "stfdux", 0, OP_FRS_RA_RB, 31<<26 | 983<<1, "stfiwx", +/* page 104 */ 0, OP_FRT_FRB_C, 63<<26 | 72<<1, "fmr", 0, OP_FRT_FRB_C, 63<<26 | 40<<1, "fneg", 0, OP_FRT_FRB_C, 63<<26 | 264<<1, "fabs", @@ -629,6 +637,7 @@ 0, OP_FRT_FRA_FRC_FRB_C, 63<<26 | 30<<1, "fnmsub", 0, OP_FRT_FRA_FRC_FRB_C, 59<<26 | 30<<1, "fnmsubs", +/* page 109 */ 0, OP_FRT_FRB_C, 63<<26 | 12<<1, "frsp", 0, OP_FRT_FRB_C, 63<<26 | 814<<1, "fctid", 0, OP_FRT_FRB_C, 63<<26 | 815<<1, "fctidz", @@ -652,4 +661,31 @@ 0, OP_FRT_FRB_C, 63<<26 | 26<<1, "frsqrte", 0, OP_FRT_FRA_FRC_FRB_C, 63<<26 | 23<<1, "fsel", -/* page 98 */ +/* Storage control instructions (Book II, page 15) */ + +/* Book II, page 17 */ +0, OP_RA_RB, 31<<26 | 982<<1, "icbi", +0, OP_RA_RB_TH /* page 35 */, 31<<26 | 278<<1, "dcbt", +0, OP_RA_RB, 31<<26 | 246<<1, "dcbtst", +0, OP_RA_RB, 31<<26 | 1014<<1, "dcbz", +0, OP_RA_RB, 31<<26 | 54<<1, "dcbst", +0, OP_RA_RB, 31<<26 | 86<<1, "dcbf", +0, OP, 19<<26 | 150<<1, "isync", +0, OP_RT_RA_RB, 31<<26 | 20<<1, "lwarx", +0, OP_RT_RA_RB, 31<<26 | 84<<1, "ldarx", +0, OP_RS_RA_RB_CC, 31<<26 | 150<<1 | 1<<0, "stwcx", +0, OP_RS_RA_RB_CC, 31<<26 | 150<<1 | 1<<0, "stdcx", +0, OP_L, 31<<26 | 598<<1, "sync", +0, OP, 31<<26 | 1<<21 | 598<<1, "lwsync", +0, OP, 31<<26 | 2<<21 | 598<<1, "ptesync", +0, OP, 31<<26 | 854<<1, "eieio", + +/* Time base (Book II, page 30) */ + +0, OP_RT_TBR, 31<<26 | 371<<1, "mftb", +0, OP_RT, 31<<26 | 8<<11 | 13<<16 | 371<<1, "mftbu", + +/* External control (Book II, page 33) */ + +0, OP_RT_RA_RB, 31<<26 | 310<<1, "eciwx", +0, OP_RS_RA_RB, 31<<26 | 438<<1, "ecowx", diff --git a/mach/powerpc/as/mach4.c b/mach/powerpc/as/mach4.c index 8a0cca9de..b344ba8ce 100644 --- a/mach/powerpc/as/mach4.c +++ b/mach/powerpc/as/mach4.c @@ -42,7 +42,23 @@ operation | OP_FRT_RA_D FPR ',' e16 '(' GPR ')' { emit_hl($1 | ($2<<21) | ($6<<16) | $4); } | OP_FRT_RA_RB FPR ',' GPR ',' GPR { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); } | OP_FRT_C c FPR { emit4($1 | $2 | ($3<<21)); } - | OP_RA_RS_C c GPR ',' GPR { emit4($1 | $2 | ($5<<21) | ($3<<16)); } + | OP_L { emit4($1); } + | OP_L u2 { emit4($1 | ($2<<21)); } + | OP_LEV { emit4($1); } + | OP_LEV u7 { emit4($1 | ($2<<5)); } + | OP_RA_RB GPR ',' GPR + { emit4($1 | ($2<<16) | ($4<<11)); } + | OP_RA_RB_TH GPR ',' GPR opt_bh + { emit4($1 | $5 | ($2<<16) | ($4<<11)); } + /* + * For instructions with "mnemonic RS, RA, ..." + * OP_RA_RS_... swaps RS and RA to (RA<<21) || (RS<<16) + * OP_RS_RA_... keeps RS and RA as (RS<<21) || (RA<<16) + */ + | OP_RA_RS_C c GPR ',' GPR + { emit4($1 | $2 | ($5<<21) | ($3<<16)); } + | OP_RA_RS_RA_C c GPR ',' GPR + { emit4($1 | $2 | ($5<<21) | ($3<<16) | ($5<<11)); } | OP_RA_RS_RB_C c GPR ',' GPR ',' GPR { emit4($1 | $2 | ($5<<21) | ($3<<16) | ($7<<11)); } | OP_RA_RS_RB_MB5_ME5_C c GPR ',' GPR ',' GPR ',' u5 ',' u5 @@ -75,20 +91,19 @@ operation | OP_RT_RB_RA_C c GPR ',' GPR ',' GPR { emit4($1 | $2 | ($3<<21) | ($7<<16) | ($5<<11)); } | OP_RT_SI GPR ',' e16 { emit_hl($1 | ($2<<21) | $4); } | OP_RT_SPR GPR ',' spr_num { emit4($1 | ($2<<21) | ($4<<11)); } + | OP_RT_TBR GPR opt_tbr { emit4($1 | ($2<<21) | ($3<<11)); } + | OP_RS GPR { emit4($1 | ($2<<21)); } | OP_RS_FXM u7 ',' GPR { emit4($1 | ($4<<21) | ($2<<12)); } | OP_RS_RA_D GPR ',' e16 '(' GPR ')' { emit_hl($1 | ($2<<21) | ($6<<16) | $4); } | OP_RS_RA_DS GPR ',' ds '(' GPR ')' { emit_hl($1 | ($2<<21) | ($6<<16) | $4); } | OP_RS_RA_NB GPR ',' GPR ',' nb { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); } | OP_RS_RA_RB GPR ',' GPR ',' GPR { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); } - | OP_RS_RA_RB_C c GPR ',' GPR ',' GPR { emit4($1 | $2 | ($5<<21) | ($3<<16) | ($7<<11)); } - | OP_RS_RA_RA_C c GPR ',' GPR { emit4($1 | $2 | ($5<<21) | ($3<<16) | ($5<<11)); } + | OP_RS_RA_RB_CC C GPR ',' GPR ',' GPR { emit4($1 | ($3<<21) | ($5<<16) | ($7<<11)); } | OP_RS_SPR spr_num ',' GPR { emit4($1 | ($4<<21) | ($2<<11)); } | OP_TO_RA_RB u5 ',' GPR ',' GPR { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); } | OP_TO_RA_SI u5 ',' GPR ',' e16 { emit_hl($1 | ($2<<21) | ($4<<16) | $6); } | OP_TOX_RA_RB GPR ',' GPR { emit4($1 | ($2<<16) | ($4<<11)); } | OP_TOX_RA_SI GPR ',' e16 { emit_hl($1 | ($2<<16) | $4); } - | OP_LEV { emit4($1); } - | OP_LEV u7 { emit4($1 | ($2<<5)); } | OP_LIA lia { emit4($1 | $2); } | OP_LIL lil { emit4($1 | $2); } | OP_LI32 li32 /* emitted in subrule */ @@ -298,7 +313,7 @@ u2 } ; -/* Optional comma, branch hint. */ +/* Optional comma, branch hint (or touch hint). */ opt_bh : /* nothing */ { $$ = 0; } | ',' u2 { $$ = ($2<<11); } @@ -409,13 +424,28 @@ lia } ; +/* + * Instructions "mfspr", "mtspr", and "mftb" encode the 10-bit special + * purpose register (spr) or time base register (tbr) by swapping the + * low 5 bits with the high 5 bits. The value from an SPR token has + * already been swapped. + */ + spr_num - : SPR { $$ = $1; } - | absexp + : SPR { $$ = $1; } + | tbr_num { $$ = $1; } + ; + +opt_tbr + : /* nothing */ { $$ = 8 | (12<<5); } + | ',' tbr_num { $$ = $2; } + ; + +tbr_num + : absexp { if (($1 < 0) || ($1 > 0x3ff)) - serror("spr number out of range"); - /* mfspr, mtspr swap the low and high 5 bits */ + serror("10-bit unsigned value out of range"); $$ = ($1 >> 5) | (($1 & 0x1f) << 5); } ; diff --git a/mach/powerpc/libem/aar4.s b/mach/powerpc/libem/aar4.s index fc8620d02..08390b081 100644 --- a/mach/powerpc/libem/aar4.s +++ b/mach/powerpc/libem/aar4.s @@ -8,21 +8,17 @@ .define .aar4 .aar4: - lis r0, hi16[.trap_earray] - ori r0, r0, lo16[.trap_earray] - mtspr ctr, r0 ! load CTR with trap address - lwz r4, 0(sp) ! r4 = address of descriptor lwz r5, 4(sp) ! r5 = index lwz r6, 8(sp) ! r6 = address of array lwz r0, 0(r4) subf. r5, r0, r5 ! subtract lower bound from index - bltctr ! check lower bound + blt .trap_earray ! check lower bound lwz r0, 4(r4) cmplw r5, r0 - bgtctr ! check upper bound + bgt .trap_earray ! check upper bound lwz r3, 8(r4) ! r3 = size of element mullw r5, r5, r3 ! scale index by size @@ -30,3 +26,7 @@ stw r6, 8(sp) ! push address of element addi sp, sp, 8 blr + +.trap_earray: + li r3, 0 ! EARRAY = 0 in h/em_abs.h + b .trp diff --git a/mach/powerpc/libem/bls4.s b/mach/powerpc/libem/bls4.s new file mode 100644 index 000000000..a36faca68 --- /dev/null +++ b/mach/powerpc/libem/bls4.s @@ -0,0 +1,19 @@ +.sect .text + +! Does a block move of words between non-overlapping buffers. +! Stack: ( src dst len -- ) + +.define .bls4 +.bls4: + lwz r3, 0(sp) ! len + lwz r4, 4(sp) ! dst + lwz r5, 8(sp) ! src + addi sp, sp, 12 + srwi r3, r3, 2 + mtspr ctr, r3 + addi r5, r5, -4 + addi r4, r4, -4 +1: lwzu r3, 4(r5) + stwu r3, 4(r4) + bdnz 1b + blr diff --git a/mach/powerpc/libem/build.lua b/mach/powerpc/libem/build.lua index 16a03147e..5ed9b52e8 100644 --- a/mach/powerpc/libem/build.lua +++ b/mach/powerpc/libem/build.lua @@ -6,7 +6,7 @@ for _, plat in ipairs(vars.plats) do acklibrary { name = "lib_"..plat, srcs = { - "./*.s", -- rm ret.s + "./*.s", -- dus4.s }, vars = { plat = plat }, deps = { @@ -15,4 +15,3 @@ for _, plat in ipairs(vars.plats) do } } end - diff --git a/mach/powerpc/libem/cfu8.s b/mach/powerpc/libem/cfu8.s index 915f84dd2..710d2a65c 100644 --- a/mach/powerpc/libem/cfu8.s +++ b/mach/powerpc/libem/cfu8.s @@ -1,3 +1,5 @@ +.sect .text; .sect .rom; .sect .data; .sect .bss + .sect .text ! Converts a 64-bit double into a 32-bit unsigned integer. @@ -6,32 +8,42 @@ .define .cfu8 .cfu8: - lis r3, ha16[.fd_00000000] - lfd f0, lo16[.fd_00000000](r3) ! f0 = 0.0 - - lfd f1, 0(sp) ! value to be converted - - lis r3, ha16[.fd_FFFFFFFF] - lfd f3, lo16[.fd_FFFFFFFF](r3) ! f3 = 0xFFFFFFFF - - lis r3, ha16[.fd_80000000] - lfd f4, lo16[.fd_80000000](r3) ! f4 = 0x80000000 - - fsel f2, f1, f1, f0 - fsub f5, f3, f1 - fsel f2, f5, f2, f3 - fsub f5, f2, f4 - fcmpu cr0, f2, f4 - fsel f2, f5, f5, f2 - fctiwz f2, f2 - - stfd f2, 0(sp) - addi sp, sp, 4 - - bltlr - - lwz r3, 0(sp) - xoris r3, r3, 0x8000 - stw r3, 0(sp) - + lfd f1, 0(sp) ! f1 = value to convert + lis r3, ha16[.fs_80000000] + lfs f2, lo16[.fs_80000000](r3) ! f2 = 2**31 + fsub f1, f1, f2 + fctiwz f1, f1 ! convert value - 2**31 + stfd f1, 0(sp) + lwz r3, 4(sp) + xoris r3, r3, 0x8000 ! add 2**31 + stw r3, 4(sp) + addi sp, sp, 4 blr + +.sect .rom +.fs_80000000: + !float 2.147483648e+9 sz 4 + .data1 0117,00,00,00 + +! Freescale and IBM provide an example using fsel to select value or +! value - 2**31 for fctiwz. The following code adapts Freescale's +! _Programming Environments Manual for 32-Bit Implementations of the +! PowerPC Architecture_, section C.3.2, pdf page 557. +! +! Given f2 = value clamped from 0 to 2**32 - 1, f4 = 2**31, then +! fsub f5, f2, f4 +! fcmpu cr2, f2, f4 +! fsel f2, f5, f5, f2 +! fctiwz f2, f2 +! stfdu f2, 0(sp) +! lwz r3, 4(sp) +! blt cr2, 1f +! xoris r3, r3, 0x8000 +! 1: yields r3 = the converted value. +! +! Debian's clang 3.5.0-10 and gcc 4.9.2-10 don't clamp the value +! before conversion. They avoid fsel and use the conditional branch +! to pick between 2 fctwiz instructions. +! +! PowerPC 601 lacks fsel (but kernel might trap and emulate fsel). +! PowerPC 603, 604, G3, G4, G5 have fsel. diff --git a/mach/powerpc/libem/csa.s b/mach/powerpc/libem/csa.s index 3898241c4..86d792554 100644 --- a/mach/powerpc/libem/csa.s +++ b/mach/powerpc/libem/csa.s @@ -13,22 +13,21 @@ lwz r4, 4(sp) addi sp, sp, 8 - lwz r5, 0(r3) ! load default - mtspr ctr, r5 - - lwz r5, 4(r3) ! fetch lower bound - subf. r4, r5, r4 ! adjust value - bltctr ! jump to default if out of range + lwz r5, 0(r3) ! r5 = default target - lwz r5, 8(r3) ! fetch range - cmplw r4, r5 - bgtctr ! jump to default if out of range + lwz r6, 4(r3) ! fetch lower bound + subf. r4, r6, r4 ! adjust value + blt 1f ! jump to default if out of range + + lwz r6, 8(r3) ! fetch range + cmplw r4, r6 + bgt 1f ! jump to default if out of range addi r3, r3, 12 ! skip header slwi r4, r4, 2 ! scale value (<<2) - lwzx r5, r3, r4 ! load target - mtspr ctr, r5 + lwzx r5, r3, r4 ! r5 = new target - or. r5, r5, r5 ! test it +1: mtspr ctr, r5 + mr. r5, r5 ! test it bnectr ! jump to target if non-zero b .trap_ecase ! otherwise trap diff --git a/mach/powerpc/libem/csb.s b/mach/powerpc/libem/csb.s index 571bfc210..92c6d096d 100644 --- a/mach/powerpc/libem/csb.s +++ b/mach/powerpc/libem/csb.s @@ -13,23 +13,20 @@ lwz r4, 4(sp) addi sp, sp, 8 - lwz r5, 0(r3) ! load default - mtspr ctr, r5 + lwz r5, 0(r3) ! r5 = default target lwz r6, 4(r3) ! fetch count - -1: - or. r6, r6, r6 ! test count - beqctr ! exit if zero - addi r6, r6, -1 ! otherwise decrement - - lwzu r7, 8(r3) ! fetch target index, increment pointer + mr. r6, r6 ! skip loop if count is zero + beq 3f ! (needed by Modula-2 "CASE i OF END") + mtspr ctr, r6 +1: lwzu r7, 8(r3) ! fetch target index, increment pointer cmpw r4, r7 ! compare with value - bne 1b ! if not equal, go again + beq 2f + bdnz 1b ! if not equal, go again + b 3f - lwz r7, 4(r3) ! fetch target address - mtspr ctr, r7 - - or. r7, r7, r7 ! test it +2: lwz r5, 4(r3) ! r5 = new target +3: mtspr ctr, r5 + mr. r5, r5 ! test target bnectr ! jump to target if non-zero b .trap_ecase ! otherwise trap diff --git a/mach/powerpc/libem/dus4.s b/mach/powerpc/libem/dus4.s new file mode 100644 index 000000000..9c751947a --- /dev/null +++ b/mach/powerpc/libem/dus4.s @@ -0,0 +1,16 @@ +.sect .text + +! Duplicates some words on top of stack. +! Stack: ( a size -- a a ) + +.define .dus4 +.dus4: + lwz r3, 0(sp) + addi sp, sp, 4 + srwi r4, r3, 2 + mtspr ctr, r4 + add r5, sp, r3 +1: lwzu r4, -4(r5) + stwu r4, -4(sp) + bdnz 1b + blr diff --git a/mach/powerpc/libem/exg.s b/mach/powerpc/libem/exg.s new file mode 100644 index 000000000..eb631b697 --- /dev/null +++ b/mach/powerpc/libem/exg.s @@ -0,0 +1,22 @@ +.sect .text + +! Exchange top two values on stack. +! Stack: ( a b size -- b a ) + +.define .exg +.exg: + lwz r3, 0(sp) ! r3 = size + srwi r7, r3, 2 + mtspr ctr, r7 ! ctr = size / 4 + mr r4, sp ! r4 = pointer before value b + add r5, r4, r3 ! r5 = pointer before value a + + ! Loop to swap each pair of words. +1: lwzu r6, 4(r4) + lwzu r7, 4(r5) + stw r6, 0(r5) + stw r7, 0(r4) + bdnz 1b ! loop ctr times + + addi sp, sp, 4 ! drop size from stack + blr diff --git a/mach/powerpc/libem/fd_80000000.s b/mach/powerpc/libem/fd_80000000.s deleted file mode 100644 index 5c153bba8..000000000 --- a/mach/powerpc/libem/fd_80000000.s +++ /dev/null @@ -1,10 +0,0 @@ -.sect .text; .sect .rom; .sect .data; .sect .bss - -.sect .rom - -! Contains a handy double-precision 0x80000000. - -.define .fd_80000000 -.fd_80000000: - !float 2.147483648e+9 sz 8 - .data1 0101,0340,00,00,00,00,00,00 diff --git a/mach/powerpc/libem/fd_FFFFFFFF.s b/mach/powerpc/libem/fd_FFFFFFFF.s deleted file mode 100644 index 88cf04bd9..000000000 --- a/mach/powerpc/libem/fd_FFFFFFFF.s +++ /dev/null @@ -1,10 +0,0 @@ -.sect .text; .sect .rom; .sect .data; .sect .bss - -.sect .rom - -! Contains a handy double-precision 0xFFFFFFFF. - -.define .fd_FFFFFFFF -.fd_FFFFFFFF: - !float 4.294967295e+9 sz 8 - .data1 0101,0357,0377,0377,0377,0340,00,00 diff --git a/mach/powerpc/libem/fef4.s b/mach/powerpc/libem/fef4.s new file mode 100644 index 000000000..a338ed0a9 --- /dev/null +++ b/mach/powerpc/libem/fef4.s @@ -0,0 +1,48 @@ +.sect .text + +! Split a single-precision float into fraction and exponent, like +! frexpf(3) in C, http://en.cppreference.com/w/c/numeric/math/frexp +! +! Stack: ( single -- fraction exponent ) + +.define .fef4 +.fef4: + lwz r3, 0(sp) ! r3 = word of float bits + + ! IEEE single = sign * 1.fraction * 2**(exponent - 127) + ! sign exponent fraction + ! 0 1..8 9..31 + ! + ! IEEE exponent = 126 in [0.5, 1) or (-1, -0.5]. + + extrwi. r6, r3, 8, 1 ! r6 = IEEE exponent + beq 3f ! jump if zero or denormalized + cmpwi r6, 255 + addi r5, r6, -126 ! r5 = our exponent + beq 2f ! jump if infinity or NaN + ! fall through if normalized + + ! Put fraction in [0.5, 1) or (-1, -0.5]. +1: li r6, 126 + insrwi r3, r6, 8, 1 ! IEEE exponent = 126 + ! fall through + +2: stw r3, 0(sp) ! push fraction + stwu r5, -4(sp) ! push exponent + blr + + ! Got denormalized number or zero, probably zero. + ! If zero, then exponent must also be zero. +3: extrwi. r6, r3, 23, 9 ! r6 = fraction + bne 4f ! jump if not zero + li r5, 0 ! exponent = 0 + b 2b + + ! Got denormalized number = 0.fraction * 2**-126 +4: cntlzw r5, r6 + addi r5, r5, -8 + slw r6, r6, r5 ! shift left to make 1.fraction + insrwi r3, r6, 23, 9 ! set new fraction + li r6, -126 + 1 + subf r5, r5, r6 ! r5 = our exponent + b 1b diff --git a/mach/powerpc/libem/fef8.s b/mach/powerpc/libem/fef8.s index 26a962d8b..aff5ea3b6 100644 --- a/mach/powerpc/libem/fef8.s +++ b/mach/powerpc/libem/fef8.s @@ -3,7 +3,7 @@ .sect .text ! Split a double-precision float into fraction and exponent, like -! frexp(3) in C. +! frexp(3) in C, http://en.cppreference.com/w/c/numeric/math/frexp ! ! Stack: ( double -- fraction exponent ) @@ -12,42 +12,41 @@ lwz r3, 0(sp) ! r3 = high word (bits 0..31) lwz r4, 4(sp) ! r4 = low word (bits 32..63) - ! IEEE double-precision format: + ! IEEE double = sign * 1.fraction * 2**(exponent - 1023) ! sign exponent fraction ! 0 1..11 12..63 ! - ! To get fraction in [0.5, 1) or (-1, -0.5], we subtract 1022 - ! from the IEEE exponent. + ! IEEE exponent = 1022 in [0.5, 1) or (-1, -0.5]. extrwi. r6, r3, 11, 1 ! r6 = IEEE exponent - addi r5, r6, -1022 ! r5 = our exponent - beq 2f ! jump if zero or denormalized + beq 3f ! jump if zero or denormalized cmpwi r6, 2047 - beq 1f ! jump if infinity or NaN + addi r5, r6, -1022 ! r5 = our exponent + beq 2f ! jump if infinity or NaN ! fall through if normalized - ! Put fraction in [0.5, 1) or (-1, -0.5] by setting its - ! IEEE exponent to 1022. - rlwinm r3, r3, 0, 12, 0 ! clear old exponent - oris r3, r3, 1022 << 4 ! set new exponent + ! Put fraction in [0.5, 1) or (-1, -0.5]. +1: li r6, 1022 + insrwi r3, r6, 11, 1 ! IEEE exponent = 1022 ! fall through -1: stw r3, 0(sp) +2: stw r3, 0(sp) stw r4, 4(sp) ! push fraction stwu r5, -4(sp) ! push exponent blr -2: ! Got denormalized number or zero, probably zero. - extrwi r6, r3, 22, 12 + ! Got denormalized number or zero, probably zero. + ! If zero, then exponent must also be zero. +3: extrwi r6, r3, 20, 12 or. r6, r6, r4 ! r6 = high|low fraction - bne 3f ! jump if not zero + bne 4f ! jump if not zero li r5, 0 ! exponent = 0 - b 1b + b 2b -3: ! Got denormalized number, not zero. - lfd f0, 0(sp) - lis r6, ha16[_2_64] - lfd f1, lo16[_2_64](r6) + ! Got denormalized number = 0.fraction * 2**-1022 +4: lfd f0, 0(sp) + lis r6, ha16[.fs_2_64] + lfs f1, lo16[.fs_2_64](r6) fmul f0, f0, f1 ! multiply it by 2**64 stfd f0, 0(sp) lwz r3, 0(sp) @@ -57,7 +56,6 @@ b 1b .sect .rom -_2_64: - ! (double) 2**64 - .data4 0x43f00000 - .data4 0x00000000 +.fs_2_64: + !float 1.84467440737095516e+19 sz 4 + .data1 0137,0200,00,00 diff --git a/mach/powerpc/libem/fif4.s b/mach/powerpc/libem/fif4.s new file mode 100644 index 000000000..fc29b178c --- /dev/null +++ b/mach/powerpc/libem/fif4.s @@ -0,0 +1,64 @@ +.sect .text + +! Multiplies two single-precision floats, then splits the product into +! fraction and integer, both as floats, like modff(3) in C, +! http://en.cppreference.com/w/c/numeric/math/modf +! +! Stack: ( a b -- fraction integer ) + +.define .fif4 +.fif4: + lfs f1, 4(sp) + lfs f2, 0(sp) + fmuls f1, f1, f2 ! f1 = a * b + stfs f1, 0(sp) + lwz r3, 0(sp) ! r3 = word of float bits + + ! IEEE single = sign * 1.fraction * 2**(exponent - 127) + ! sign exponent fraction + ! 0 1..8 9..31 + ! + ! Subtract 127 from the IEEE exponent. If the result is from + ! 0 to 23, then the IEEE fraction has that many integer bits. + + extrwi r5, r3, 8, 1 ! r5 = IEEE exponent + addic. r5, r5, -127 ! r5 = nr of integer bits + blt 3f ! branch if no integer + cmpwi r5, 24 + bge 4f ! branch if no fraction + ! fall through if integer with fraction + + ! f1 has r5 = 0 to 23 integer bits in the IEEE fraction. + ! There are 23 - r5 fraction bits. + li r6, 23 + subf r6, r5, r6 + srw r3, r3, r6 + slw r3, r3, r6 ! clear fraction in word + ! fall through + +1: stw r3, 0(sp) + lfs f2, 0(sp) ! integer = high word, low word + fsubs f1, f1, f2 ! fraction = value - integer +2: stfs f1, 4(sp) ! push fraction + stfs f2, 0(sp) ! push integer + blr + + ! f1 is a fraction without integer (or zero). + ! Then integer is zero with same sign. +3: extlwi r3, r3, 1, 0 ! extract sign bit + stfs f1, 4(sp) ! push fraction + stw r3, 0(sp) ! push integer = zero with sign + blr + + ! f1 is an integer without fraction (or infinity or NaN). + ! Unless NaN, then fraction is zero with same sign. +4: fcmpu cr0, f1, f1 + bun cr0, 5f + extlwi r3, r3, 1, 0 ! extract sign bit + stw r3, 4(sp) ! push fraction = zero with sign + stfs f1, 0(sp) ! push integer + blr + + ! f1 is NaN, so both fraction and integer are NaN. +5: fmr f2, f1 + b 2b diff --git a/mach/powerpc/libem/fif8.s b/mach/powerpc/libem/fif8.s index bce4f8d24..f93a39ac2 100644 --- a/mach/powerpc/libem/fif8.s +++ b/mach/powerpc/libem/fif8.s @@ -1,7 +1,8 @@ .sect .text ! Multiplies two double-precision floats, then splits the product into -! fraction and integer, like modf(3) in C. On entry: +! fraction and integer, both as floats, like modf(3) in C, +! http://en.cppreference.com/w/c/numeric/math/modf ! ! Stack: ( a b -- fraction integer ) @@ -14,20 +15,18 @@ lwz r3, 0(sp) ! r3 = high word lwz r4, 4(sp) ! r4 = low word - ! IEEE double-precision format: + ! IEEE double = sign * 1.fraction * 2**(exponent - 1023) ! sign exponent fraction ! 0 1..11 12..63 ! ! Subtract 1023 from the IEEE exponent. If the result is from ! 0 to 51, then the IEEE fraction has that many integer bits. - ! (IEEE has an implicit 1 before its fraction. If the IEEE - ! fraction has 0 integer bits, we still have an integer.) extrwi r5, r3, 11, 1 ! r5 = IEEE exponent addic. r5, r5, -1023 ! r5 = nr of integer bits - blt 4f ! branch if no integer + blt 3f ! branch if no integer cmpwi r5, 52 - bge 5f ! branch if no fraction + bge 4f ! branch if no fraction cmpwi r5, 21 bge 6f ! branch if large integer ! fall through if small integer @@ -44,22 +43,38 @@ 1: stw r3, 0(sp) stw r4, 4(sp) lfd f2, 0(sp) ! integer = high word, low word -2: fsub f1, f1, f2 ! fraction = value - integer -3: stfd f1, 8(sp) ! push fraction + fsub f1, f1, f2 ! fraction = value - integer +2: stfd f1, 8(sp) ! push fraction stfd f2, 0(sp) ! push integer blr -4: ! f1 is a fraction without integer. - fsub f2, f1, f1 ! integer = zero - b 3b + ! f1 is a fraction without integer (or zero). + ! Then integer is zero with same sign. +3: extlwi r3, r3, 1, 0 ! extract sign bit + li r4, 0 + stfd f1, 8(sp) ! push fraction + stw r4, 4(sp) + stw r3, 0(sp) ! push integer = zero with sign + blr -5: ! f1 is an integer without fraction (or infinity or NaN). - fmr f2, f1 ! integer = f1 + ! f1 is an integer without fraction (or infinity or NaN). + ! Unless NaN, then fraction is zero with same sign. +4: fcmpu cr0, f1, f1 ! integer = f1 + bun cr0, 5f + extlwi r3, r3, 1, 0 ! extract sign bit + li r4, 0 + stw r4, 12(sp) + stw r3, 8(sp) ! push fraction = zero with sign + stfd f1, 0(sp) ! push integer + blr + + ! f1 is NaN, so both fraction and integer are NaN. +5: fmr f2, f1 b 2b -6: ! f1 has r5 = 21 to 51 to integer bits. + ! f1 has r5 = 21 to 51 to integer bits. ! Low word has 52 - r5 fraction bits. - li r6, 52 +6: li r6, 52 subf r6, r5, r6 srw r4, r4, r6 slw r4, r4, r6 ! clear fraction in low word diff --git a/mach/powerpc/libem/inn.s b/mach/powerpc/libem/inn.s index 8925e776e..32275c117 100644 --- a/mach/powerpc/libem/inn.s +++ b/mach/powerpc/libem/inn.s @@ -5,6 +5,9 @@ /* Tests a bit in a bitset on the stack. * * Stack: ( bitset bitnum setsize -- bool ) + * + * Some back ends push false if bitnum is too large. We don't because + * the compilers tend to pass a small enough bitnum. */ .define .inn diff --git a/mach/powerpc/libem/rck.s b/mach/powerpc/libem/rck.s index 9008be610..1d07d5711 100644 --- a/mach/powerpc/libem/rck.s +++ b/mach/powerpc/libem/rck.s @@ -2,6 +2,9 @@ ! Bounds check. Traps if the value is out of range. ! Stack: ( value descriptor -- value ) +! +! This ".rck" only works with 4-byte integers. The name is ".rck" and +! not ".rck4" because many back ends only do rck with the word size. .define .rck .rck: @@ -18,3 +21,7 @@ bgt .trap_erange blr + +.trap_erange: + li r3, 1 ! ERANGE = 1 in h/em_abs.h + b .trp diff --git a/mach/powerpc/libem/set.s b/mach/powerpc/libem/set.s index 3c4a9e579..8faf84a09 100644 --- a/mach/powerpc/libem/set.s +++ b/mach/powerpc/libem/set.s @@ -2,6 +2,9 @@ ! Create singleton set. ! Stack: ( bitnumber size -- set ) +! +! Some back ends trap ESET if bitnumber is out of range. We don't +! because the compilers tend to pass a valid bitnumber. .define .set .set: diff --git a/mach/powerpc/libem/trp.s b/mach/powerpc/libem/trp.s new file mode 100644 index 000000000..b07afb929 --- /dev/null +++ b/mach/powerpc/libem/trp.s @@ -0,0 +1,56 @@ +.sect .text + +.define .trap_ecase +.trap_ecase: + li r3, 20 ! ECASE = 20 in h/em_abs.h + ! FALLTHROUGH to .trp + +! Raises an EM trap. +! Expects r3 = trap number. + +.define .trp +.trp: + cmplwi r3, 15 ! traps > 15 can't be ignored + bgt 1f + + lis r4, ha16[.ignmask] + lwz r4, lo16[.ignmask](r4) ! load ignore mask + srw r4, r4, r3 + andi. r4, r4, 1 + bnelr ! return if ignoring trap + +1: lis r4, ha16[.trppc] + lwz r5, lo16[.trppc](r4) ! r5 = user trap routine + mr. r5, r5 + beq 2f ! if no user trap routine, bail out + + mtspr ctr, r5 + mfspr r6, lr + li r0, 0 + stwu r3, -8(sp) ! push trap number + stw r0, lo16[.trppc](r4) ! reset trap routine + stw r6, 4(sp) ! save old lr + bctrl ! call trap routine + + lwz r0, 4(sp) + mtspr lr, r0 + addi sp, sp, 8 ! retract over stack usage + blr + +2: ! No trap handler. Write error message, exit. + li r3, 2 + stwu r3, -12(sp) + lis r4, ha16[message] + addi r4, r4, lo16[message] + li r5, 6 + stw r4, 4(sp) + stw r5, 8(sp) + bl _write ! write(2, message, 6) + + li r3, 1 + stw r3, 0(sp) + bl __exit ! _exit(1) + +.sect .rom +message: + .ascii "TRAP!\n" diff --git a/mach/powerpc/mcg/table b/mach/powerpc/mcg/table index b72990c36..ca44ce869 100644 --- a/mach/powerpc/mcg/table +++ b/mach/powerpc/mcg/table @@ -237,10 +237,13 @@ PATTERNS SETSP.I(in:(int)reg) emit "mr sp, %in" cost 4; - + out:(int)reg = ANY.I cost 1; + out:(long)reg = ANY.L + cost 1; + out:(int)reg = COPYF.I(in:(float)reg) emit "stfsu %in, -4(sp)" emit "lwz %out, 0(sp)" @@ -306,10 +309,21 @@ PATTERNS emit "lwz %out, %addr" cost 4; +#if 0 + /* FIXME: Doesn't work because %out.0 and %addr might share a + * register, so it corrupts %addr before it loads %out.1. */ out:(long)reg = LOAD.L(addr:address) emit "lwz %out.0, 4+%addr" emit "lwz %out.1, 0+%addr" cost 8; +#else + /* Works, but costs an extra instruction. */ + out:(long)reg = LOAD.L(addr:address) + emit "la %out.1, %addr" + emit "lwz %out.0, 4(%out.1)" + emit "lwz %out.1, 0(%out.1)" + cost 12; +#endif out:(int)ushort0 = LOADH.I(addr:address) emit "lhz %out, %addr" @@ -566,6 +580,13 @@ PATTERNS emit "! COMPARESI.I(cr, 0)" cost 4; + cr:(cr)cr = COMPAREUL.I(left:(long)reg, right:(long)reg) + emit "cmpl %cr, 0, %left.1, %right.1" + emit "bne 1f" + emit "cmpl %cr, 0, %left.0, %right.0" + emit "1:" + cost 12; + /* Booleans */ diff --git a/mach/powerpc/ncg/mach.c b/mach/powerpc/ncg/mach.c index b67903b0a..1a1d98d6c 100644 --- a/mach/powerpc/ncg/mach.c +++ b/mach/powerpc/ncg/mach.c @@ -10,8 +10,13 @@ #include #include +#include +static int writing_stabs = 0; + +#ifdef REGVARS static long framesize; +#endif void con_part(int sz, word w) @@ -51,32 +56,42 @@ con_mult(word sz) #define FL_MSB_AT_LOW_ADDRESS 1 #include -static void -emit_prolog(void) -{ - fprintf(codefile, "mfspr r0, lr\n"); - fprintf(codefile, "addi sp, sp, %ld\n", -framesize - 8); - fprintf(codefile, "stw fp, %ld(sp)\n", framesize); - fprintf(codefile, "stw r0, %ld(sp)\n", framesize + 4); - fprintf(codefile, "addi fp, sp, %ld\n", framesize); -} - void prolog(full nlocals) { - framesize = nlocals; + /* + * For N_LSYM and N_PSYM stabs, we want gdb to use fp, not sp. + * The trick is to use "stwu sp, _(sp)" then "addi fp, sp, 0" + * before we save lr with "stw r0, _(sp)". + * + * Tried with Apple's gdb-696. Refer to + * - gdb-696/src/gdb/rs6000-tdep.c, skip_prologue(), line 1101 + * - gdb-696/src/gdb/macosx/ppc-macosx-frameinfo.c, + * ppc_parse_instructions(), line 717 + * https://opensource.apple.com/release/developer-tools-25.html + */ + fprintf(codefile, "mfspr r0, lr\n"); + if (writing_stabs) { + fprintf(codefile, "stwu sp, -8(sp)\n"); /* for gdb */ + fprintf(codefile, "stw fp, 0(sp)\n"); + } else + fprintf(codefile, "stwu fp, -8(sp)\n"); + fprintf(codefile, "addi fp, sp, 0\n"); /* for gdb */ + fprintf(codefile, "stw r0, 4(sp)\n"); #ifdef REGVARS - /* f_regsave() will call emit_prolog() */ + framesize = nlocals; + /* regsave() increases framesize; f_regsave() adjusts sp. */ #else - emit_prolog(); + if (nlocals) + fprintf(codefile, "addi sp, sp, %ld\n", -nlocals); #endif } void mes(word type) { - int argt ; + int argt, a1, a2 ; switch ( (int)type ) { case ms_ext : @@ -91,6 +106,41 @@ mes(word type) break ; } } + case ms_stb: + argt = getarg(str_ptyp | cst_ptyp); + if (argt == sp_cstx) + fputs(".symb \"\", ", codefile); + else { + fprintf(codefile, ".symb \"%s\", ", str); + argt = getarg(cst_ptyp); + } + a1 = argval; + argt = getarg(cst_ptyp); + a2 = argval; + argt = getarg(cst_ptyp|nof_ptyp|sof_ptyp|ilb_ptyp|pro_ptyp); + if (a1 == N_PSYM) { + /* Change offset from AB into offset from + the frame pointer. + */ + argval += 8; + } + fprintf(codefile, "%s, 0x%x, %d\n", strarg(argt), a1, a2); + argt = getarg(end_ptyp); + break; + case ms_std: + writing_stabs = 1; /* set by first "mes 13,...,100,0" */ + argt = getarg(str_ptyp | cst_ptyp); + if (argt == sp_cstx) + str[0] = '\0'; + else { + argt = getarg(cst_ptyp); + } + swtxt(); + fprintf(codefile, ".symd \"%s\", 0x%x,", str, (int) argval); + argt = getarg(cst_ptyp); + fprintf(codefile, "%d\n", (int) argval); + argt = getarg(end_ptyp); + break; default : while ( getarg(any_ptyp) != sp_cend ) ; break ; @@ -196,7 +246,7 @@ saveloadregs(const char* ops, const char* opm, const char *opf) for (reg = 31; reg >= 0; reg--) { if (savedf[reg] != LONG_MIN) { offset -= 8; - fprintf(codefile, "%s f%d, %ld(fp)\n", + fprintf(codefile, "%s f%d,%ld(fp)\n", opf, reg, offset); } } @@ -213,7 +263,7 @@ saveloadregs(const char* ops, const char* opm, const char *opf) while (reg > 0 && savedi[reg - 1] != LONG_MIN) reg--; offset -= (32 - reg) * 4; - fprintf(codefile, "%s r%d, %ld(fp)\n", opm, reg, offset); + fprintf(codefile, "%s r%d,%ld(fp)\n", opm, reg, offset); } else reg = 32; @@ -221,7 +271,7 @@ saveloadregs(const char* ops, const char* opm, const char *opf) for (reg--; reg >= 0; reg--) { if (savedi[reg] != LONG_MIN) { offset -= 4; - fprintf(codefile, "%s r%d, %ld(fp)\n", + fprintf(codefile, "%s r%d,%ld(fp)\n", ops, reg, offset); } } @@ -232,7 +282,8 @@ f_regsave(void) { int reg; - emit_prolog(); + if (framesize) + fprintf(codefile, "addi sp, sp, %ld\n", -framesize); saveloadregs("stw", "stmw", "stfd"); /* diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 62e8f62af..82cada71a 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -1,24 +1,52 @@ +/* + * PowerPC table for ncg + * + * David Given created this table. + * George Koehler made many changes in years 2016 to 2018. + * + * This back end provides 4-byte integers, 4-byte floats, and 8-byte + * floats. It should provide enough of EM for the ACK's compilers. + * - It doesn't provide "mon" (monitor call) nor "lor 2", "str 2" + * (heap pointer). Programs should call procedures in libsys to + * make system calls or allocate heap memory. + * - It generates only a few EM traps: + * - EARRAY from aar, lar, sar + * - ERANGE from rck + * - ECASE from csa, csb + * - It uses floating-point registers to move 8-byte values that + * aren't floats. This might cause extra FPU context switches in + * programs that don't use floating point. + * + * The EM stack is less than optimal for PowerPC, and incompatible + * with the calling conventions of other compilers (like gcc). + * - EM and ncg use the stack to pass parameters to procedures. For + * PowerPC, this is probably slower than passing them in registers. + * - This back end misaligns some 8-byte floats, because EM's stack + * has only 4-byte alignment. (This kind of misalignment also + * happened in IBM's AIX and Apple's Mac OS, where data structures + * had 8-byte floats with only 4-byte alignment.) + */ + EM_WSIZE = 4 EM_PSIZE = 4 EM_BSIZE = 8 /* two words saved in call frame */ -INT8 = 1 /* Size of values */ -INT16 = 2 -INT32 = 4 -INT64 = 8 - FP_OFFSET = 0 /* Offset of saved FP relative to our FP */ PC_OFFSET = 4 /* Offset of saved PC relative to our FP */ -SL_OFFSET = 8 /* Offset of static link */ #define COMMENT(n) /* comment {LABEL, n} */ - -#define nicesize(x) ((x)==INT8 || (x)==INT16 || (x)==INT32 || (x)==INT64) +#define nicesize(x) ((x)==1 || (x)==2 || (x)==4 || (x)==8) #define smalls(n) sfit(n, 16) #define smallu(n) ufit(n, 16) +/* Finds FRAME_V tokens that overlap myoff, mysize. */ +#define fover(myoff, mysize) (%off+%size>(myoff) && %off<((myoff)+(mysize))) + +/* Checks if we can use {LXFRAME, x}. */ +#define nicelx(x) ((x)>=1 && (x)<=0x8000) + #define lo(n) ((n) & 0xFFFF) #define hi(n) (((n)>>16) & 0xFFFF) @@ -32,6 +60,7 @@ SL_OFFSET = 8 /* Offset of static link */ PROPERTIES GPR /* general-purpose register */ + SPFP /* sp or fp */ REG /* allocatable GPR */ REG3 /* coercion to r3 */ @@ -46,17 +75,25 @@ PROPERTIES REGISTERS /* + * We use r1 as stack pointer and r2 as frame pointer. + * Our assembler has aliases sp -> r1 and fp -> r2. + * + * We preserve r13 to r31 and f14 to f31 across function + * calls to mimic other compilers (like gcc). See + * - http://refspecs.linuxbase.org/elf/elfspec_ppc.pdf + * - https://github.com/ryanarn/powerabi -> chap3-elf32abi.sgml + * - Apple's "32-bit PowerPC Function Calling Conventions" + * * When ncg allocates regvars, it seems to start with the last * register in the first class. To encourage ncg to allocate * them from r31 down, we list them in one class as * r13, r14, ..., r31: GPR, REG regvar(reg_any). */ - r0, sp, fp : GPR. - r3 : GPR, REG, REG3. - - r4, r5, r6, r7, r8, r9, r10, r11, r12 - : GPR, REG. + r0, r12 : GPR. + sp, fp : GPR, SPFP. + r3 : GPR, REG, REG3. + r4, r5, r6, r7, r8, r9, r10, r11 : GPR, REG. r13, r14, r15, r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31 @@ -65,7 +102,7 @@ REGISTERS f0 : FPR. f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13 - : FPR, FREG. + : FPR, FREG. f14, f15, f16, f17, f18, f19, f20, f21, f22, f23, f24, f25, f26, f27, f28, f29, f30, f31 @@ -86,8 +123,11 @@ REGISTERS : FSREG regvar(reg_float). lr, ctr : SPR. - cr0 : CR. + cr0 : CR. /* We use cr0, ignore cr1 to cr7. */ + /* The stacking rules can't allocate registers. We use these + * scratch registers to stack tokens. + */ #define RSCRATCH r0 #define FSCRATCH f0 @@ -96,7 +136,7 @@ TOKENS /* Primitives */ - CONST = { INT val; } 4 val. + C /* constant */ = { INT val; } 4 val. LABEL = { ADDR adr; } 4 adr. LABEL_HI = { ADDR adr; } 4 "hi16[" adr "]". LABEL_HA = { ADDR adr; } 4 "ha16[" adr "]". @@ -106,29 +146,39 @@ TOKENS /* Allows us to use regvar() to refer to registers */ - GPR_EXPR = { GPR reg; } 4 reg. - FPR_EXPR = { FPR reg; } 8 reg. + REG_EXPR = { REG reg; } 4 reg. + FREG_EXPR = { FREG reg; } 8 reg. FSREG_EXPR = { FSREG reg; } 4 reg. /* Constants on the stack */ - CONST_N8000 = { INT val; } 4. - CONST_N7FFF_N0001 = { INT val; } 4. - CONST_0000_7FFF = { INT val; } 4. - CONST_8000 = { INT val; } 4. - CONST_8001_FFFF = { INT val; } 4. - CONST_HZ = { INT val; } 4. - CONST_HL = { INT val; } 4. + CONST_N8000 = { INT val; } 4 val. + CONST_N7FFF_N0001 = { INT val; } 4 val. + CONST_0000_7FFF = { INT val; } 4 val. + CONST_8000 = { INT val; } 4 val. + CONST_8001_FFFF = { INT val; } 4 val. + CONST_HI_ZR = { INT val; } 4 val. + CONST_HI_LO = { INT val; } 4 val. /* Expression partial results */ + SEX_B = { GPR reg; } 4. /* sign extension */ + SEX_H = { GPR reg; } 4. + SUM_RIS = { GPR reg; INT offhi; } 4. /* reg + (offhi << 16) */ SUM_RC = { GPR reg; INT off; } 4. /* reg + off */ SUM_RL = { GPR reg; ADDR adr; } 4. /* reg + lo16[adr] */ SUM_RR = { GPR reg1; GPR reg2; } 4. /* reg1 + reg2 */ - SEX_B = { GPR reg; } 4. - SEX_H = { GPR reg; } 4. + SUB_CR = { INT val; GPR reg; } 4. /* val - reg */ + SUB_RR = { GPR reg1; GPR reg2; } 4. /* reg1 - reg2 */ + NEG_R = { GPR reg; } 4. /* -reg */ + MUL_RC = { GPR reg; INT val; } 4. /* reg * val */ + MUL_RR = { GPR reg1; GPR reg2; } 4. /* reg1 * reg2 */ + DIV_RR = { GPR reg1; GPR reg2; } 4. /* reg1 / reg2 signed */ + DIV_RR_U = { GPR reg1; GPR reg2; } 4. /* reg1 / reg2 unsigned */ + +/* Indirect loads and stores */ IND_RC_B = { GPR reg; INT off; } 4 off "(" reg ")". IND_RL_B = { GPR reg; ADDR adr; } 4 "lo16[" adr "](" reg ")". @@ -146,15 +196,40 @@ TOKENS IND_RL_D = { GPR reg; ADDR adr; } 8 "lo16[" adr "](" reg ")". IND_RR_D = { GPR reg1; GPR reg2; } 8. - NOT_R = { GPR reg; } 4. +/* Local variables in frame */ - AND_RR = { GPR reg1; GPR reg2; } 4. - OR_RR = { GPR reg1; GPR reg2; } 4. - OR_RIS = { GPR reg; INT valhi; } 4. - OR_RC = { GPR reg; INT val; } 4. - XOR_RR = { GPR reg1; GPR reg2; } 4. - XOR_RIS = { GPR reg; INT valhi; } 4. - XOR_RC = { GPR reg; INT val; } 4. + FRAME_B = { INT level; GPR reg; INT off; INT size; } + 4 off "(" reg ")". + FRAME_H = { INT level; GPR reg; INT off; INT size; } + 4 off "(" reg ")". + FRAME_H_S = { INT level; GPR reg; INT off; INT size; } + 4 off "(" reg ")". + FRAME_W = { INT level; GPR reg; INT off; INT size; } + 4 off "(" reg ")". + FRAME_D = { INT level; GPR reg; INT off; INT size; } + 8 off "(" reg ")". + + LXFRAME = { INT level; } 4. + +/* Bitwise logic */ + + NOT_R = { GPR reg; } 4. /* ~reg */ + AND_RIS = { GPR reg; INT valhi; } 4. + AND_RC = { GPR reg; INT val; } 4. + AND_RR = { GPR reg1; GPR reg2; } 4. + ANDC_RR = { GPR reg1; GPR reg2; } 4. /* reg1 & ~reg2 */ + OR_RIS = { GPR reg; INT valhi; } 4. + OR_RC = { GPR reg; INT val; } 4. + OR_RR = { GPR reg1; GPR reg2; } 4. + ORC_RR = { GPR reg1; GPR reg2; } 4. /* reg1 | ~reg2 */ + XOR_RIS = { GPR reg; INT valhi; } 4. + XOR_RC = { GPR reg; INT val; } 4. + XOR_RR = { GPR reg1; GPR reg2; } 4. + NAND_RR = { GPR reg1; GPR reg2; } 4. /* ~(reg1 & reg2) */ + NOR_RR = { GPR reg1; GPR reg2; } 4. /* ~(reg1 | reg2) */ + EQV_RR = { GPR reg1; GPR reg2; } 4. /* ~(reg1 ^ reg2) */ + +/* Comparisons */ COND_RC = { GPR reg; INT val; } 4. COND_RR = { GPR reg1; GPR reg2; } 4. @@ -181,28 +256,43 @@ SETS UCONST2 = CONST_0000_7FFF + CONST_8000 + CONST_8001_FFFF. /* any constant on stack */ CONST_STACK = CONST_N8000 + CONST_N7FFF_N0001 + CONST_0000_7FFF + - CONST_8000 + CONST_8001_FFFF + CONST_HZ + CONST_HL. + CONST_8000 + CONST_8001_FFFF + + CONST_HI_ZR + CONST_HI_LO. - SUM_ALL = SUM_RC + SUM_RL + SUM_RR. + CONST = C + CONST_STACK. - SEX_ALL = SEX_B + SEX_H. + SET_RC_B = IND_RC_B + IND_RL_B + FRAME_B. + SET_RC_H = IND_RC_H + IND_RL_H + FRAME_H. + SET_RC_H_S = IND_RC_H_S + IND_RL_H_S + FRAME_H_S. + SET_RC_W = IND_RC_W + IND_RL_W + FRAME_W. + SET_RC_D = IND_RC_D + IND_RL_D + FRAME_D. - LOGICAL_ALL = NOT_R + AND_RR + OR_RR + OR_RC + XOR_RR + - XOR_RC. + IND_ALL_B = IND_RC_B + IND_RL_B + IND_RR_B. + IND_ALL_H = IND_RC_H + IND_RL_H + IND_RR_H + + IND_RC_H_S + IND_RL_H_S + IND_RR_H_S. + IND_ALL_W = IND_RC_W + IND_RL_W + IND_RR_W. + IND_ALL_D = IND_RC_D + IND_RL_D + IND_RR_D. + IND_V = IND_ALL_B + IND_ALL_H + IND_ALL_W + IND_ALL_D. - IND_ALL_B = IND_RC_B + IND_RL_B + IND_RR_B. - IND_ALL_H = IND_RC_H + IND_RL_H + IND_RR_H + - IND_RC_H_S + IND_RL_H_S + IND_RR_H_S. - IND_ALL_W = IND_RC_W + IND_RL_W + IND_RR_W. - IND_ALL_D = IND_RC_D + IND_RL_D + IND_RR_D. - IND_ALL_BHW = IND_ALL_B + IND_ALL_H + IND_ALL_W. + FRAME_V = FRAME_B + FRAME_H + FRAME_H_S + FRAME_W + FRAME_D. /* anything killed by sti (store indirect) */ - MEMORY = IND_ALL_BHW + IND_ALL_D. + MEMORY = IND_V + FRAME_V. - /* any stack token that we can easily move to GPR */ - ANY_BHW = REG + CONST_STACK + SEX_ALL + - SUM_ALL + IND_ALL_BHW + LOGICAL_ALL. + /* any integer from stack that we can easily move to GPR */ + INT_W = SPFP + REG + CONST_STACK + SEX_B + SEX_H + + SUM_RIS + SUM_RC + SUM_RL + SUM_RR + + SUB_CR + SUB_RR + NEG_R + + MUL_RC + MUL_RR + DIV_RR + DIV_RR_U + + IND_ALL_B + IND_ALL_H + IND_ALL_W + + FRAME_B + FRAME_H + FRAME_H_S + FRAME_W + + NOT_R + AND_RIS + AND_RC + AND_RR + ANDC_RR + + OR_RIS + OR_RC + OR_RR + ORC_RR + + XOR_RIS + XOR_RC + XOR_RR + NAND_RR + NOR_RR + EQV_RR + + XEQ + XNE + XGT + XGE + XLT + XLE. + + FLOAT_D = FREG + IND_ALL_D + FRAME_D. + FLOAT_W = FSREG + IND_ALL_W + FRAME_W. INSTRUCTIONS @@ -220,7 +310,7 @@ INSTRUCTIONS cost(4, 1) /* space, time */ add GPR:wo, GPR:ro, GPR:ro. - addX "add." GPR:wo, GPR:ro, GPR:ro. + addX "add." GPR:wo:cc, GPR:ro, GPR:ro. addi GPR:wo, GPR:ro, CONST+LABEL_LO:ro. li GPR:wo, CONST:ro. addis GPR:wo, GPR:ro, CONST+LABEL_HI+LABEL_HA:ro. @@ -246,13 +336,13 @@ INSTRUCTIONS bclr CONST:ro, CONST:ro, CONST:ro. blr. bl LABEL:ro. - cmp CR:ro, CONST:ro, GPR:ro, GPR:ro kills :cc. + cmp CR:wo, CONST:ro, GPR:ro, GPR:ro kills :cc. cmpw GPR:ro, GPR:ro kills :cc. - cmpi CR:ro, CONST:ro, GPR:ro, CONST:ro kills :cc. + cmpi CR:wo, CONST:ro, GPR:ro, CONST:ro kills :cc. cmpwi GPR:ro, CONST:ro kills :cc. - cmpl CR:ro, CONST:ro, GPR:ro, GPR:ro kills :cc. + cmpl CR:wo, CONST:ro, GPR:ro, GPR:ro kills :cc. cmplw GPR:ro, GPR:ro kills :cc. - cmpli CR:ro, CONST:ro, GPR:ro, CONST:ro kills :cc. + cmpli CR:wo, CONST:ro, GPR:ro, CONST:ro kills :cc. cmplwi GPR:ro, CONST:ro kills :cc. divw GPR:wo, GPR:ro, GPR:ro cost(4, 23). divwu GPR:wo, GPR:ro, GPR:ro cost(4, 23). @@ -263,7 +353,7 @@ INSTRUCTIONS fadds FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 5). fcmpo CR:wo, FREG:ro, FREG:ro cost(4, 5). fcmpo CR:wo, FSREG:ro, FSREG:ro cost(4, 5). - fctiwz FREG:wo, FREG:ro. + fctiwz FREG:wo, FREG:ro cost(4, 5). fdiv FREG+DLOCAL:wo, FREG:ro, FREG:ro cost(4, 35). fdivs FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 21). fmr FPR:wo, FPR:ro cost(4, 5). @@ -272,60 +362,67 @@ INSTRUCTIONS fmuls FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 5). fneg FREG+DLOCAL:wo, FREG:ro cost(4, 5). fneg FSREG+LOCAL:wo, FSREG:ro cost(4, 5). - frsp FSREG:wo, FREG:ro cost(4, 5). + frsp FSREG+LOCAL:wo, FREG:ro cost(4, 5). fsub FREG+DLOCAL:wo, FREG:ro, FREG:ro cost(4, 5). fsubs FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 5). - lbz GPR:wo, IND_RC_B+IND_RL_B:ro cost(4, 3). + lbz GPR:wo, SET_RC_B:ro cost(4, 3). lbzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). - lfd FPR+DLOCAL:wo, IND_RC_D+IND_RL_D:ro cost(4, 5). - lfdu FPR:wo, IND_RC_D:ro cost(4, 5). + lfd FPR+DLOCAL:wo, SET_RC_D:ro cost(4, 5). + lfdu FPR:wo, IND_RC_D:rw cost(4, 5). lfdx FPR:wo, GPR:ro, GPR:ro cost(4, 5). - lfs FSREG+LOCAL:wo, IND_RC_W+IND_RL_W:ro cost(4, 4). + lfs FSREG+LOCAL:wo, SET_RC_W:ro cost(4, 4). lfsu FSREG:wo, IND_RC_W:rw cost(4, 4). lfsx FSREG:wo, GPR:ro, GPR:ro cost(4, 4). - lha GPR:wo, IND_RC_H_S+IND_RL_H_S:ro cost(4, 3). + lha GPR:wo, SET_RC_H_S:ro cost(4, 3). lhax GPR:wo, GPR:ro, GPR:ro cost(4, 3). - lhz GPR:wo, IND_RC_H+IND_RL_H:ro cost(4, 3). + lhz GPR:wo, SET_RC_H:ro cost(4, 3). lhzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). + lwz GPR+LOCAL:wo, SET_RC_W:ro cost(4, 3). lwzu GPR:wo, IND_RC_W:rw cost(4, 3). lwzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). - lwz GPR+LOCAL:wo, IND_RC_W+IND_RL_W:ro cost(4, 3). + mfcr GPR:wo cost(4,2). + mfspr GPR:wo, SPR:ro cost(4, 3). + mtspr SPR:wo, GPR:ro cost(4, 2). + mulli GPR:wo, GPR:ro, CONST:ro cost(4, 3). + mullw GPR:wo, GPR:ro, GPR:ro cost(4, 4). nand GPR:wo, GPR:ro, GPR:ro. neg GPR:wo, GPR:ro. nor GPR:wo, GPR:ro, GPR:ro. - mfcr GPR:wo cost(4,2). - mullw GPR:wo, GPR:ro, GPR:ro cost(4, 4). - mfspr GPR:wo, SPR:ro cost(4, 3). - mtspr SPR:wo, GPR:ro cost(4, 2). or GPR:wo, GPR:ro, GPR:ro. mr GPR:wo, GPR:ro. orX "or." GPR:wo:cc, GPR:ro, GPR:ro. - orX_readonly "or." GPR:ro:cc, GPR:ro, GPR:ro. + mrX_readonly "mr." GPR:ro:cc, GPR:ro. orc GPR:wo, GPR:ro, GPR:ro. ori GPR:wo, GPR:ro, CONST+LABEL_LO:ro. oris GPR:wo, GPR:ro, CONST:ro. rlwinm GPR:wo, GPR:ro, CONST:ro, CONST:ro, CONST:ro. extlwi GPR:wo, GPR:ro, CONST:ro, CONST:ro. extrwi GPR:wo, GPR:ro, CONST:ro, CONST:ro. - srwi GPR:wo, GPR:ro, CONST:ro. - slw GPR:wo, GPR:ro, GPR:ro. - subf GPR:wo, GPR:ro, GPR:ro. - sraw GPR:wo, GPR:ro, GPR:ro cost(4, 2). - srawi GPR:wo, GPR:ro, CONST:ro cost(4, 2). - srw GPR:wo, GPR:ro, GPR:ro. - stb GPR:ro, IND_RC_B+IND_RL_B:rw cost(4, 3). + rotlwi GPR+LOCAL:wo, GPR:ro, CONST:ro. + rotrwi GPR+LOCAL:wo, GPR:ro, CONST:ro. + slwi GPR+LOCAL:wo, GPR:ro, CONST:ro. + srwi GPR+LOCAL:wo, GPR:ro, CONST:ro. + rlwnm GPR:wo, GPR:ro, GPR:ro, CONST:ro, CONST:ro. + rotlw GPR+LOCAL:wo, GPR:ro, GPR:ro. + slw GPR+LOCAL:wo, GPR:ro, GPR:ro. + sraw GPR+LOCAL:wo, GPR:ro, GPR:ro /* kills xer */ cost(4, 2). + srawi GPR+LOCAL:wo, GPR:ro, CONST:ro /* kills xer */ cost(4, 2). + srw GPR+LOCAL:wo, GPR:ro, GPR:ro. + stb GPR:ro, SET_RC_B:rw cost(4, 3). stbx GPR:ro, GPR:ro, GPR:ro cost(4, 3). - stfd FPR:ro, IND_RC_D+IND_RL_D:rw cost(4, 4). + stfd FPR:ro, SET_RC_D:rw cost(4, 4). stfdu FPR:ro, IND_RC_D:rw cost(4, 4). stfdx FPR:ro, GPR:ro, GPR:ro cost(4, 4). - stfs FSREG:ro, IND_RC_W+IND_RL_W:rw cost(4, 3). + stfs FSREG:ro, SET_RC_W:rw cost(4, 3). stfsu FSREG:ro, IND_RC_W:rw cost(4, 3). stfsx FSREG:ro, GPR:ro, GPR:ro cost(4, 3). - sth GPR:ro, IND_RC_H+IND_RL_H:rw cost(4, 3). + sth GPR:ro, SET_RC_H:rw cost(4, 3). sthx GPR:ro, GPR:ro, GPR:ro cost(4, 3). - stw GPR:ro, IND_RC_W+IND_RL_W:rw cost(4, 3). + stw GPR:ro, SET_RC_W:rw cost(4, 3). stwx GPR:ro, GPR:ro, GPR:ro cost(4, 3). stwu GPR:ro, IND_RC_W:rw cost(4, 3). + subf GPR:wo, GPR:ro, GPR:ro. + subfic GPR:wo, GPR:ro, CONST:ro /* kills xer */. xor GPR:wo, GPR:ro, GPR:ro. xori GPR:wo, GPR:ro, CONST:ro. xoris GPR:wo, GPR:ro, CONST:ro. @@ -347,22 +444,22 @@ MOVES /* Constants */ - from CONST + CONST_STACK smalls(%val) to GPR + from CONST smalls(%val) to GPR gen COMMENT("move CONST->GPR smalls") - li %2, {CONST, %1.val} + li %2, %1 - from CONST + CONST_STACK lo(%val)==0 to GPR + from CONST lo(%val)==0 to GPR gen COMMENT("move CONST->GPR shifted") - lis %2, {CONST, hi(%1.val)} + lis %2, {C, hi(%1.val)} - from CONST + CONST_STACK to GPR + from CONST to GPR gen COMMENT("move CONST->GPR") - lis %2, {CONST, hi(%1.val)} - ori %2, %2, {CONST, lo(%1.val)} - /* Can't use addi %2, %2, {CONST, los(%1.val)} + lis %2, {C, hi(%1.val)} + ori %2, %2, {C, lo(%1.val)} + /* Can't use addi %2, %2, {C, los(%1.val)} * because %2 might be R0. */ from LABEL to GPR @@ -385,10 +482,10 @@ MOVES /* Register + something */ from SUM_RIS to GPR - gen addis %2, %1.reg, {CONST, %1.offhi} + gen addis %2, %1.reg, {C, %1.offhi} from SUM_RC to GPR - gen addi %2, %1.reg, {CONST, %1.off} + gen addi %2, %1.reg, {C, %1.off} from SUM_RL to GPR gen addi %2, %1.reg, {LABEL_LO, %1.adr} @@ -396,9 +493,34 @@ MOVES from SUM_RR to GPR gen add %2, %1.reg1, %1.reg2 +/* Other arithmetic */ + + from SUB_CR to GPR + /* val - reg -> subtract reg from val */ + gen subfic %2, %1.reg, {C, %1.val} + + from SUB_RR to GPR + /* reg1 - reg2 -> subtract reg2 from reg1 */ + gen subf %2, %1.reg2, %1.reg1 + + from NEG_R to GPR + gen neg %2, %1.reg + + from MUL_RC to GPR + gen mulli %2, %1.reg, {C, %1.val} + + from MUL_RR to GPR + gen mullw %2, %1.reg1, %1.reg2 + + from DIV_RR to GPR + gen divw %2, %1.reg1, %1.reg2 + + from DIV_RR_U to GPR + gen divwu %2, %1.reg1, %1.reg2 + /* Read byte */ - from IND_RC_B+IND_RL_B to GPR + from SET_RC_B to GPR gen lbz %2, %1 from IND_RR_B to GPR @@ -406,7 +528,7 @@ MOVES /* Write byte */ - from GPR to IND_RC_B+IND_RL_B + from GPR to SET_RC_B gen stb %1, %2 from GPR to IND_RR_B @@ -414,13 +536,13 @@ MOVES /* Read halfword (short) */ - from IND_RC_H+IND_RL_H to GPR + from SET_RC_H to GPR gen lhz %2, %1 from IND_RR_H to GPR gen lhzx %2, %1.reg1, %1.reg2 - from IND_RC_H_S+IND_RL_H_S to GPR + from SET_RC_H_S to GPR gen lha %2, %1 from IND_RR_H_S to GPR @@ -428,7 +550,7 @@ MOVES /* Write halfword */ - from GPR to IND_RC_H+IND_RL_H + from GPR to SET_RC_H gen sth %1, %2 from GPR to IND_RR_H @@ -436,13 +558,13 @@ MOVES /* Read word */ - from IND_RC_W+IND_RL_W to GPR + from SET_RC_W to GPR gen lwz %2, %1 from IND_RR_W to GPR gen lwzx %2, %1.reg1, %1.reg2 - from IND_RC_W+IND_RL_W to FSREG + from SET_RC_W to FSREG gen lfs %2, %1 from IND_RR_W to FSREG @@ -450,13 +572,13 @@ MOVES /* Write word */ - from GPR to IND_RC_W+IND_RL_W + from GPR to SET_RC_W gen stw %1, %2 from GPR to IND_RR_W gen stwx %1, %2.reg1, %2.reg2 - from FSREG to IND_RC_W+IND_RL_W + from FSREG to SET_RC_W gen stfs %1, %2 from FSREG to IND_RR_W @@ -464,7 +586,7 @@ MOVES /* Read double */ - from IND_RC_D+IND_RL_D to FPR + from SET_RC_D to FPR gen lfd %2, %1 from IND_RR_D to FPR @@ -472,37 +594,87 @@ MOVES /* Write double */ - from FPR to IND_RC_D+IND_RL_D + from FPR to SET_RC_D gen stfd %1, %2 from FPR to IND_RR_D gen stfdx %1, %2.reg1, %2.reg2 +/* LXFRAME is a lexical frame from the static chain. We define a move + so "uses REG={LXFRAME, $1}" may find a register with the same + frame, and not repeat the move. This move can't search for a REG + with {LXFRAME, $1-1}, but must always start from fp. The static + chain, if it exists, is the argument at fp + EM_BSIZE. */ + + from LXFRAME %level==1 to REG + gen lwz %2, {IND_RC_W, fp, EM_BSIZE} + from LXFRAME %level==2 to REG + gen lwz %2, {IND_RC_W, fp, EM_BSIZE} + /* PowerPC can't add r0 + EM_BSIZE, + * so %2 must not be r0. */ + lwz %2, {IND_RC_W, %2, EM_BSIZE} + from LXFRAME %level==3 to REG + gen lwz %2, {IND_RC_W, fp, EM_BSIZE} + lwz %2, {IND_RC_W, %2, EM_BSIZE} + lwz %2, {IND_RC_W, %2, EM_BSIZE} + from LXFRAME %level==4 to REG + gen lwz %2, {IND_RC_W, fp, EM_BSIZE} + lwz %2, {IND_RC_W, %2, EM_BSIZE} + lwz %2, {IND_RC_W, %2, EM_BSIZE} + lwz %2, {IND_RC_W, %2, EM_BSIZE} + from LXFRAME to REG /* assuming %level in 2 to 0x8000 */ + gen li %2, {C, %1.level-1} + mtspr ctr, %2 + lwz %2, {IND_RC_W, fp, EM_BSIZE} + 1: lwz %2, {IND_RC_W, %2, EM_BSIZE} + bdnz {LABEL, "1b"} + /* Logicals */ from NOT_R to GPR gen nor %2, %1.reg, %1.reg + from AND_RIS to GPR + gen andisX %2, %1.reg, {C, %1.valhi} + + from AND_RC to GPR + gen andiX %2, %1.reg, {C, %1.val} + from AND_RR to GPR gen and %2, %1.reg1, %1.reg2 + from ANDC_RR to GPR + gen andc %2, %1.reg1, %1.reg2 + + from OR_RIS to GPR + gen oris %2, %1.reg, {C, %1.valhi} + + from OR_RC to GPR + gen ori %2, %1.reg, {C, %1.val} + from OR_RR to GPR gen or %2, %1.reg1, %1.reg2 - from OR_RIS to GPR - gen oris %2, %1.reg, {CONST, %1.valhi} + from ORC_RR to GPR + gen orc %2, %1.reg1, %1.reg2 - from OR_RC to GPR - gen ori %2, %1.reg, {CONST, %1.val} + from XOR_RIS to GPR + gen xoris %2, %1.reg, {C, %1.valhi} + + from XOR_RC to GPR + gen xori %2, %1.reg, {C, %1.val} from XOR_RR to GPR gen xor %2, %1.reg1, %1.reg2 - from XOR_RIS to GPR - gen xoris %2, %1.reg, {CONST, %1.valhi} + from NAND_RR to GPR + gen nand %2, %1.reg1, %1.reg2 - from XOR_RC to GPR - gen xori %2, %1.reg, {CONST, %1.val} + from NOR_RR to GPR + gen nor %2, %1.reg1, %1.reg2 + + from EQV_RR to GPR + gen eqv %2, %1.reg1, %1.reg2 /* Conditions */ @@ -510,7 +682,7 @@ MOVES from COND_RC to GPR gen - cmpwi %1.reg, {CONST, %1.val} + cmpwi %1.reg, {C, %1.val} mfcr %2 from COND_RR to GPR @@ -520,7 +692,7 @@ MOVES from CONDL_RC to GPR gen - cmplwi %1.reg, {CONST, %1.val} + cmplwi %1.reg, {C, %1.val} mfcr %2 from CONDL_RR to GPR @@ -544,75 +716,73 @@ MOVES from XEQ to GPR gen - extrwi %2, %1.reg, {CONST, 1}, {CONST, 2} + extrwi %2, %1.reg, {C, 1}, {C, 2} from XNE to GPR gen - extrwi %2, %1.reg, {CONST, 1}, {CONST, 2} - xori %2, %2, {CONST, 1} + extrwi %2, %1.reg, {C, 1}, {C, 2} + xori %2, %2, {C, 1} from XGT to GPR gen - extrwi %2, %1.reg, {CONST, 1}, {CONST, 1} + extrwi %2, %1.reg, {C, 1}, {C, 1} from XGE to GPR gen - extrwi %2, %1.reg, {CONST, 1}, {CONST, 0} - xori %2, %2, {CONST, 1} + extrwi %2, %1.reg, {C, 1}, {C, 0} + xori %2, %2, {C, 1} from XLT to GPR gen - extrwi %2, %1.reg, {CONST, 1}, {CONST, 0} + extrwi %2, %1.reg, {C, 1}, {C, 0} from XLE to GPR gen - extrwi %2, %1.reg, {CONST, 1}, {CONST, 1} - xori %2, %2, {CONST, 1} + extrwi %2, %1.reg, {C, 1}, {C, 1} + xori %2, %2, {C, 1} -/* GPR_EXPR exists solely to allow us to use regvar() (which can only +/* REG_EXPR exists solely to allow us to use regvar() (which can only be used in an expression) as a register constant. We can then use - our moves to GPR to set register variables. We define no moves to - LOCAL, so we avoid confusion between GPR and FSREG in LOCAL. */ + our moves to GPR or REG to set register variables. This is easier + than defining moves to LOCAL, and avoids confusion between GPR and + FSREG in LOCAL. */ - from ANY_BHW to GPR_EXPR + from INT_W + LXFRAME to REG_EXPR gen move %1, %2.reg - from FPR+IND_ALL_D to FPR_EXPR + from FLOAT_D to FREG_EXPR gen move %1, %2.reg - from FSREG+IND_ALL_W to FSREG_EXPR + from FLOAT_W to FSREG_EXPR gen move %1, %2.reg TESTS - /* Given orX %1, %1, %1, ncgg says, "Instruction destroys %1, - * not allowed here". We use orX_readonly to trick ncgg. - * - * Using "or." and not "mr." because mach/powerpc/top/table - * was optimizing "or." and not "mr.". + /* Given "mrX %1, %1", ncgg would say, "Instruction destroys + * %1, not allowed here". We use mrX_readonly to trick ncgg. */ to test GPR gen - orX_readonly %1, %1, %1 + mrX_readonly %1, %1 STACKINGRULES - from REG to STACK + from SPFP+REG to STACK gen - COMMENT("stack REG") + COMMENT("stack SPFP+REG") stwu %1, {IND_RC_W, sp, 0-4} - from ANY_BHW-REG to STACK + from INT_W-SPFP-REG to STACK gen - COMMENT("stack ANY_BHW-REG") + COMMENT("stack INT_W-SPFP-REG") move %1, RSCRATCH stwu RSCRATCH, {IND_RC_W, sp, 0-4} - from IND_ALL_D to STACK + from FLOAT_D-FREG to STACK gen - COMMENT("stack IND_ALL_D") + COMMENT("stack FLOAT_D-FREG") move %1, FSCRATCH stfdu FSCRATCH, {IND_RC_D, sp, 0-8} @@ -637,15 +807,18 @@ STACKINGRULES gen bug {LABEL, "STACKING DLOCAL"} - COERCIONS + /* The unstacking coercions emit many "addi sp, sp, X" + * instructions; the target optimizer (top) will merge them. + */ + from STACK uses REG gen COMMENT("coerce STACK->REG") lwz %a, {IND_RC_W, sp, 0} - addi sp, sp, {CONST, 4} + addi sp, sp, {C, 4} yields %a from STACK @@ -653,7 +826,7 @@ COERCIONS gen COMMENT("coerce STACK->FREG") lfd %a, {IND_RC_D, sp, 0} - addi sp, sp, {CONST, 8} + addi sp, sp, {C, 8} yields %a from STACK @@ -661,45 +834,56 @@ COERCIONS gen COMMENT("coerce STACK->FSREG") lfs %a, {IND_RC_W, sp, 0} - addi sp, sp, {CONST, 4} + addi sp, sp, {C, 4} yields %a - from ANY_BHW - uses REG - gen - COMMENT("coerce ANY_BHW->REG") - move %1, %a - yields %a - - /* - * There is no coercion from IND_ALL_D to REG REG, because - * coercions can't allocate registers for intermediate values. + /* "uses REG=%1" may find and reuse a register containing the + * same token. For contrast, "uses REG gen move %1, %a" would + * pick a different register before doing the move. * - * A coercion to split IND_RC_D into two IND_RC_W, without - * allocating an intermediate register, would yield - * {IND_RC_W, %1.val, %1.off+4} - * but %1.off+4 might overflow a signed 16-bit integer. + * "reusing %1" helps when coercing an INT_W token like + * {SUM_RC, r3, 0-4} to REG3, by not stacking the token. */ - from FREG+IND_ALL_D - uses FREG - gen - COMMENT("coerce FREG+IND_ALL_D->FREG") - move %1, %a + from INT_W + uses reusing %1, REG=%1 yields %a - from FSREG+IND_ALL_W - uses FSREG - gen - COMMENT("coerce FSREG+IND_ALL_W->FREG") - move %1, %a + from FLOAT_D + uses reusing %1, FREG=%1 yields %a + from FLOAT_W + uses reusing %1, FSREG=%1 + yields %a + + /* Splitting coercions can't allocate registers. + * PowerPC can't add r0 + constant. Use r12. + */ + + from IND_RC_D %off<=0x7FFA + yields + {IND_RC_W, %1.reg, %1.off+4} + {IND_RC_W, %1.reg, %1.off} + + from IND_RC_D + /* Don't move to %1.reg; it might be a regvar. */ + gen move {SUM_RC, %1.reg, %1.off}, r12 + yields {IND_RC_W, r12, 4} {IND_RC_W, r12, 0} + + from IND_RR_D + gen move {SUM_RR, %1.reg1, %1.reg2}, r12 + yields {IND_RC_W, r12, 4} {IND_RC_W, r12, 0} + + from FRAME_D %off<=0x7FFA + yields + {FRAME_W, %1.level, %1.reg, %1.off+4, 4} + {FRAME_W, %1.level, %1.reg, %1.off, 4} PATTERNS -/* Intrinsics */ +/* Constants */ pat loc $1==(0-0x8000) /* Load constant */ yields {CONST_N8000, $1} @@ -712,76 +896,109 @@ PATTERNS pat loc $1>=0x8001 && $1<=0xFFFF yields {CONST_8001_FFFF, $1} pat loc lo($1)==0 - yields {CONST_HZ, $1} + yields {CONST_HI_ZR, $1} pat loc - yields {CONST_HL, $1} + yields {CONST_HI_LO, $1} - pat dup $1==INT32 /* Duplicate word on top of stack */ - with REG - yields %1 %1 - with FSREG + +/* Stack shuffles */ + + /* The peephole optimizer does: loc $1 ass 4 -> asp $1 + * To optimize multiplication, it uses: dup 8 asp 4 + */ + + pat asp $1==4 /* Adjust stack by constant */ + with exact INT_W+FLOAT_W + /* drop %1 */ + with STACK + gen addi sp, sp, {C, 4} + pat asp smalls($1) + with STACK + gen addi sp, sp, {C, $1} + pat asp lo($1)==0 + with STACK + gen addi sp, sp, {C, hi($1)} + pat asp + with STACK + gen + addis sp, sp, {C, his($1)} + addi sp, sp, {C, los($1)} + + pat ass $1==4 /* Adjust stack by variable */ + with REG STACK + gen add sp, sp, %1 + + /* To duplicate a token, we coerce the token into a register, + * then duplicate the register. This decreases code size. + */ + + pat dup $1==4 /* Duplicate word on top of stack */ + with REG+FSREG yields %1 %1 - pat dup $1==INT64 /* Duplicate double-word on top of stack */ - with REG REG + pat dup $1==8 /* Duplicate double-word */ + with REG+FSREG REG+FSREG yields %2 %1 %2 %1 with FREG yields %1 %1 - pat exg $1==INT32 /* Exchange top two words on stack */ - with REG REG + pat dup /* Duplicate other size */ + leaving + loc $1 + dus 4 + + pat dus $1==4 /* Duplicate variable size */ + with REG STACK + /* ( a size%1 -- a a ) */ + uses REG, REG + gen + srwi %a, %1, {C, 2} + mtspr ctr, %a + add %b, sp, %1 + 1: lwzu %a, {IND_RC_W, %b, 0-4} + stwu %a, {IND_RC_W, sp, 0-4} + bdnz {LABEL, "1b"} + + pat exg $1==4 /* Exchange top two words */ + with INT_W+FLOAT_W INT_W+FLOAT_W yields %1 %2 - pat stl lol $1==$2 /* Store then load local */ + pat exg defined($1) /* Exchange other size */ leaving - dup 4 - stl $1 + loc $1 + cal ".exg" - pat sdl ldl $1==$2 /* Store then load double local */ + pat exg !defined($1) leaving - dup 8 - sdl $1 - - pat lal sti lal loi $1==$3 && $2==$4 /* Store then load local, of a different size */ - leaving - dup INT32 - lal $1 - sti $2 - - pat ste loe $1==$2 /* Store then load external */ - leaving - dup 4 - ste $1 + cal ".exg" /* Type conversions */ - pat loc loc ciu /* signed X -> unsigned X */ + pat loc loc ciu /* signed -> unsigned */ leaving loc $1 loc $2 cuu - pat loc loc cuu $1==$2 /* unsigned X -> unsigned X */ + pat loc loc cui /* unsigned -> signed */ + leaving + loc $1 + loc $2 + cuu + + pat loc loc cuu $1<=4 && $2<=4 /* unsigned -> unsigned */ /* nop */ - pat loc loc cii $1==$2 /* signed X -> signed X */ - /* nop */ + pat loc loc cii $1<=4 && $2<=$1 + /* signed -> signed of smaller or same size, + * no sign extension */ - pat loc loc cui $1==$2 /* unsigned X -> signed X */ - /* nop */ - - pat loc loc cui $1==INT8 && $2==INT32 /* unsigned char -> signed int */ - /* nop */ - - pat loc loc cui $1==INT16 && $2==INT32 /* unsigned short -> signed int */ - /* nop */ - - pat loc loc cii $1==INT8 && $2==INT32 /* signed char -> signed int */ + pat loc loc cii $1==1 && $2<=4 /* sign-extend char */ with REG yields {SEX_B, %1} - pat loc loc cii $1==2 && $2==4 /* signed char -> signed short */ + pat loc loc cii $1==2 && $2<=4 /* sign-extend short */ with REG yields {SEX_H, %1} @@ -795,85 +1012,117 @@ PATTERNS uses REG={SUM_RIS, fp, his($1)} yields {SUM_RC, %a, los($1)} + pat lal loi smalls($1) && $2==1 /* Load byte from local */ + yields {FRAME_B, 0, fp, $1, 1} + + /* Load half-word from local and sign-extend */ + pat lal loi loc loc cii smalls($1) && $2==2 && $3==2 && $4==4 + yields {FRAME_H_S, 0, fp, $1, 1} + + pat lal loi smalls($1) && $2==2 /* Load half-word from local */ + yields {FRAME_H, 0, fp, $1, 1} + /* Load word from local */ pat lol inreg($1)==reg_any || inreg($1)==reg_float yields {LOCAL, $1} + pat lol smalls($1) + yields {FRAME_W, 0, fp, $1, 4} pat lol leaving lal $1 loi 4 - /* Load double-word from local */ - pat ldl inreg($1)==reg_float + pat ldl inreg($1)==reg_float /* Load double-word from local */ yields {DLOCAL, $1} + pat ldl smalls($1) && smalls($1+4) + /* smalls($1+4) implies FRAME_D %off<=0xFFFA */ + yields {FRAME_D, 0, fp, $1, 8} pat ldl leaving lal $1 loi 8 - /* Store word to local */ - pat stl inreg($1)==reg_any - with exact ANY_BHW + pat lal sti smalls($1) && $2==1 /* Store byte to local */ + with REG + kills IND_V, FRAME_V %level==0 && fover($1, 1) + gen move %1, {FRAME_B, 0, fp, $1, 1} + + pat lal sti smalls($1) && $2==2 /* Store half-word to local */ + with REG + kills IND_V, FRAME_V %level==0 && fover($1, 2) + gen move %1, {FRAME_H, 0, fp, $1, 2} + + pat stl inreg($1)==reg_any /* Store word to local */ + with exact INT_W /* ncg fails to infer that regvar($1) is dead! */ kills regvar($1) - gen move %1, {GPR_EXPR, regvar($1)} + gen move %1, {REG_EXPR, regvar($1)} with STACK gen lwz {LOCAL, $1}, {IND_RC_W, sp, 0} - addi sp, sp, {CONST, 4} + addi sp, sp, {C, 4} pat stl inreg($1)==reg_float - with exact FSREG+IND_ALL_W + with exact FLOAT_W kills regvar_w($1, reg_float) gen move %1, {FSREG_EXPR, regvar_w($1, reg_float)} with STACK gen lfs {LOCAL, $1}, {IND_RC_W, sp, 0} - addi sp, sp, {CONST, 4} + addi sp, sp, {C, 4} + pat stl smalls($1) + with REG+FSREG + kills IND_V, FRAME_V %level==0 && fover($1, 4) + gen move %1, {FRAME_W, 0, fp, $1, 4} pat stl leaving lal $1 sti 4 - /* Store double-word to local */ - pat sdl inreg($1)==reg_float - with exact FREG+IND_ALL_D + pat sdl inreg($1)==reg_float /* Store double-word to local */ + with exact FLOAT_D kills regvar_d($1, reg_float) - gen move %1, {FPR_EXPR, regvar_d($1, reg_float)} + gen move %1, {FREG_EXPR, regvar_d($1, reg_float)} with STACK gen lfd {DLOCAL, $1}, {IND_RC_D, sp, 0} - addi sp, sp, {CONST, 8} + addi sp, sp, {C, 8} + pat sdl smalls($1) && smalls($1+4) + with REG REG + kills IND_V, FRAME_V %level==0 && fover($1, 8) + gen + move %1, {FRAME_W, 0, fp, $1, 4} + move %2, {FRAME_W, 0, fp, $1+4, 4} + with FREG + kills IND_V, FRAME_V %level==0 && fover($1, 4) + gen move %1, {FRAME_D, 0, fp, $1, 8} pat sdl leaving lal $1 sti 8 - /* Load indirect from local */ - pat lil inreg($1)==reg_any - yields {IND_RC_W, regvar($1), 0} - pat lil + pat lil /* Load indirect from local */ leaving lol $1 loi 4 - pat sil /* Save to indirected local */ + pat sil /* Store indirect to local */ leaving lol $1 sti 4 - pat zrl /* Zero local */ + pat zrl /* Zero local */ leaving loc 0 stl $1 - pat inl /* Increment local */ + pat inl /* Increment local */ leaving lol $1 loc 1 adi 4 stl $1 - pat del /* Decrement local */ + pat del /* Decrement local */ leaving lol $1 loc 1 @@ -881,9 +1130,86 @@ PATTERNS stl $1 +/* Local variables of procedures on static chain */ + + /* lxa (lexical argument base) -> lxl (lexical local base) */ + pat lxa adp nicelx($1) + leaving lxl $1 adp $2+EM_BSIZE + pat lxa lof nicelx($1) + leaving lxl $1 lof $2+EM_BSIZE + pat lxa ldf nicelx($1) + leaving lxl $1 ldf $2+EM_BSIZE + pat lxa stf nicelx($1) + leaving lxl $1 stf $2+EM_BSIZE + pat lxa sdf nicelx($1) + leaving lxl $1 stf $2+EM_BSIZE + pat lxa nicelx($1) + leaving lxl $1 adp EM_BSIZE + + /* Load locals in statically enclosing procedures */ + pat lxl adp loi nicelx($1) && smalls($2) && $3==1 + uses REG={LXFRAME, $1} + yields {FRAME_B, $1, %a, $2, 1} + pat lxl adp loi loc loc cii nicelx($1) && smalls($2) && + $3==2 && $4==2 && $5==4 + uses REG={LXFRAME, $1} + yields {FRAME_H_S, $1, %a, $2, 2} + pat lxl adp loi nicelx($1) && smalls($2) && $3==2 + uses REG={LXFRAME, $1} + yields {FRAME_H, $1, %a, $2, 2} + pat lxl lof nicelx($1) && smalls($2) + uses REG={LXFRAME, $1} + yields {FRAME_W, $1, %a, $2, 4} + pat lxl ldf nicelx($1) && smalls($2) && smalls($2+4) + uses REG={LXFRAME, $1} + /* smalls($2+4) implies FRAME_D %off<=0xFFFA */ + yields {FRAME_D, $1, %a, $2, 8} + + /* Store locals in statically enclosing procedures */ + pat lxl adp sti nicelx($1) && smalls($2) && $3==1 + with REG + kills IND_V, FRAME_V %level==$1 && fover($2, 1) + uses REG={LXFRAME, $1} + gen move %1, {FRAME_B, $1, %a, $2, 1} + pat lxl adp sti nicelx($1) && smalls($2) && $3==2 + with REG + kills IND_V, FRAME_V %level==$1 && fover($2, 2) + uses REG={LXFRAME, $1} + gen move %1, {FRAME_H, $1, %a, $2, 2} + pat lxl stf nicelx($1) && smalls($2) + with REG+FSREG + kills IND_V, FRAME_V %level==$1 && fover($2, 4) + uses REG={LXFRAME, $1} + gen move %1, {FRAME_W, $1, %a, $2, 4} + pat lxl sdf nicelx($1) && smalls($2) && smalls($2+4) + with REG REG + kills IND_V, FRAME_V %level==$1 && fover($2, 8) + uses REG={LXFRAME, $1} + gen + move %1, {FRAME_W, $1, %a, $2, 4} + move %2, {FRAME_W, $1, %a, $2+4, 4} + with FREG + kills IND_V, FRAME_V %level==$1 && fover($2, 8) + uses REG={LXFRAME, $1} + gen move %1, {FRAME_D, $1, %a, $2, 8} + + pat lxl nicelx($1) /* Local base on static chain */ + uses REG={LXFRAME, $1} + yields %a /* Can't yield LXFRAME. */ + pat lxl stl nicelx($1) && inreg($2)==reg_any + kills regvar($2) + gen move {LXFRAME, $1}, {REG_EXPR, regvar($2)} + + pat lxl $1==0 /* Our local base */ + yields fp + + pat lxa $1==0 /* Our argument base */ + yields {SUM_RC, fp, EM_BSIZE} + + /* Global variables */ - pat lpi /* Load address of external function */ + pat lpi /* Load address of function */ leaving lae $1 @@ -894,35 +1220,35 @@ PATTERNS pat loe /* Load word external */ leaving lae $1 - loi INT32 + loi 4 pat ste /* Store word external */ leaving lae $1 - sti INT32 + sti 4 pat lde /* Load double-word external */ leaving lae $1 - loi INT64 + loi 8 pat sde /* Store double-word external */ leaving lae $1 - sti INT64 + sti 8 - pat zre /* Zero external */ + pat zre /* Zero external */ leaving loc 0 ste $1 - pat ine /* Increment external */ + pat ine /* Increment external */ leaving loe $1 inc ste $1 - pat dee /* Decrement external */ + pat dee /* Decrement external */ leaving loe $1 dec @@ -934,27 +1260,27 @@ PATTERNS pat lof /* Load word offsetted */ leaving adp $1 - loi INT32 + loi 4 pat ldf /* Load double-word offsetted */ leaving adp $1 - loi INT64 + loi 8 pat stf /* Store word offsetted */ leaving adp $1 - sti INT32 + sti 4 pat sdf /* Store double-word offsetted */ leaving adp $1 - sti INT64 + sti 8 /* Loads and stores */ - pat loi $1==INT8 /* Load byte indirect */ + pat loi $1==1 /* Load byte indirect */ with REG yields {IND_RC_B, %1, 0} with exact SUM_RC @@ -964,8 +1290,8 @@ PATTERNS with exact SUM_RR yields {IND_RR_B, %1.reg1, %1.reg2} - pat loi loc loc cii $1==INT16 && $2==INT16 && $3==INT32 - /* Load half-word indirect and sign extend */ + /* Load half-word indirect and sign-extend */ + pat loi loc loc cii $1==2 && $2==2 && $3==4 with REG yields {IND_RC_H_S, %1, 0} with exact SUM_RC @@ -975,7 +1301,7 @@ PATTERNS with exact SUM_RR yields {IND_RR_H_S, %1.reg1, %1.reg2} - pat loi $1==INT16 /* Load half-word indirect */ + pat loi $1==2 /* Load half-word indirect */ with REG yields {IND_RC_H, %1, 0} with exact SUM_RC @@ -985,7 +1311,7 @@ PATTERNS with exact SUM_RR yields {IND_RR_H, %1.reg1, %1.reg2} - pat loi $1==INT32 /* Load word indirect */ + pat loi $1==4 /* Load word indirect */ with REG yields {IND_RC_W, %1, 0} with exact SUM_RC @@ -995,7 +1321,7 @@ PATTERNS with exact SUM_RR yields {IND_RR_W, %1.reg1, %1.reg2} - pat loi $1==INT64 /* Load double-word indirect */ + pat loi $1==8 /* Load double-word indirect */ with REG yields {IND_RC_D, %1, 0} with exact SUM_RC @@ -1013,10 +1339,9 @@ PATTERNS pat los $1==4 /* Load arbitrary size */ with REG3 STACK kills ALL - gen - bl {LABEL, ".los4"} + gen bl {LABEL, ".los4"} - pat sti $1==INT8 /* Store byte indirect */ + pat sti $1==1 /* Store byte indirect */ with REG REG kills MEMORY gen move %2, {IND_RC_B, %1, 0} @@ -1030,7 +1355,7 @@ PATTERNS kills MEMORY gen move %2, {IND_RR_B, %1.reg1, %1.reg2} - pat sti $1==INT16 /* Store half-word indirect */ + pat sti $1==2 /* Store half-word indirect */ with REG REG kills MEMORY gen move %2, {IND_RC_H, %1, 0} @@ -1044,7 +1369,7 @@ PATTERNS kills MEMORY gen move %2, {IND_RR_H, %1.reg1, %1.reg2} - pat sti $1==INT32 /* Store word indirect */ + pat sti $1==4 /* Store word indirect */ with REG REG+FSREG kills MEMORY gen move %2, {IND_RC_W, %1, 0} @@ -1058,7 +1383,7 @@ PATTERNS kills MEMORY gen move %2, {IND_RR_W, %1.reg1, %1.reg2} - pat sti $1==INT64 /* Store double-word indirect */ + pat sti $1==8 /* Store double-word indirect */ with REG FREG kills MEMORY gen move %2, {IND_RC_D, %1, 0} @@ -1076,26 +1401,6 @@ PATTERNS gen move %2, {IND_RC_W, %1, 0} move %3, {IND_RC_W, %1, 4} - /* - * Next 2 patterns exist because there is no coercion - * from IND_ALL_D to REG REG. - */ - with REG IND_RC_D - kills MEMORY - uses REG={SUM_RC, %2.reg, %2.off}, REG, REG - gen - move {IND_RC_W, %a, 0}, %b - move {IND_RC_W, %a, 4}, %c - move %b, {IND_RC_W, %1, 0} - move %c, {IND_RC_W, %1, 4} - with REG IND_RR_D - kills MEMORY - uses REG={SUM_RR, %2.reg1, %2.reg2}, REG, REG - gen - move {IND_RC_W, %a, 0}, %b - move {IND_RC_W, %a, 4}, %c - move %b, {IND_RC_W, %1, 0} - move %c, {IND_RC_W, %1, 4} pat sti /* Store arbitrary size */ leaving @@ -1105,8 +1410,7 @@ PATTERNS pat sts $1==4 /* Store arbitrary size */ with REG3 STACK kills ALL - gen - bl {LABEL, ".sts4"} + gen bl {LABEL, ".sts4"} /* Arithmetic wrappers */ @@ -1151,6 +1455,10 @@ PATTERNS /* Word arithmetic */ + /* Like most back ends, this one doesn't trap EIOVFL, so it + * ignores overflow in signed integers. + */ + pat adi $1==4 /* Add word (second + top) */ with REG REG yields {SUM_RR, %1, %2} @@ -1158,113 +1466,87 @@ PATTERNS yields {SUM_RC, %2, %1.val} with REG CONST2 yields {SUM_RC, %1, %2.val} - with CONST_HZ REG - uses reusing %2, REG={SUM_RIS, %2, his(%1.val)} - yields %a - with REG CONST_HZ - uses reusing %1, REG={SUM_RIS, %1, his(%2.val)} - yields %a - with CONST_STACK-CONST2-CONST_HZ REG + with CONST_HI_ZR REG + yields {SUM_RIS, %2, his(%1.val)} + with REG CONST_HI_ZR + yields {SUM_RIS, %1, his(%2.val)} + with CONST_STACK-CONST2-CONST_HI_ZR REG uses reusing %2, REG={SUM_RIS, %2, his(%1.val)} yields {SUM_RC, %a, los(%1.val)} - with REG CONST_STACK-CONST2-CONST_HZ + with REG CONST_STACK-CONST2-CONST_HI_ZR uses reusing %1, REG={SUM_RIS, %1, his(%2.val)} yields {SUM_RC, %a, los(%2.val)} pat sbi $1==4 /* Subtract word (second - top) */ with REG REG - uses reusing %2, REG - gen - subf %a, %1, %2 - yields %a + uses reusing %1, reusing %2, REG + yields {SUB_RR, %2, %1} with CONST2_WHEN_NEG REG yields {SUM_RC, %2, 0-%1.val} - with CONST_HZ REG - uses reusing %2, REG={SUM_RIS, %2, his(0-%1.val)} - yields %a - with CONST_STACK-CONST2_WHEN_NEG-CONST_HZ REG + with REG CONST2 + yields {SUB_CR, %2.val, %1} + with CONST_HI_ZR REG + yields {SUM_RIS, %2, his(0-%1.val)} + with CONST_STACK-CONST2_WHEN_NEG-CONST_HI_ZR REG uses reusing %2, REG={SUM_RIS, %2, his(0-%1.val)} yields {SUM_RC, %a, los(0-%1.val)} pat ngi $1==4 /* Negate word */ with REG - uses reusing %1, REG - gen - neg %a, %1 - yields %a + yields {NEG_R, %1} pat mli $1==4 /* Multiply word (second * top) */ + with CONST2 REG + yields {MUL_RC, %2, %1.val} + with REG CONST2 + yields {MUL_RC, %1, %2.val} with REG REG - uses reusing %2, REG - gen - mullw %a, %2, %1 - yields %a + yields {MUL_RR, %2, %1} pat dvi $1==4 /* Divide word (second / top) */ with REG REG - uses reusing %2, REG - gen - divw %a, %2, %1 - yields %a + yields {DIV_RR, %2, %1} - pat dvu $1==4 /* Divide unsigned word (second / top) */ + pat dvu $1==4 /* Divide unsigned word (second / top) */ with REG REG - uses reusing %2, REG - gen - divwu %a, %2, %1 - yields %a + yields {DIV_RR_U, %2, %1} + + /* To calculate a remainder: a % b = a - (a / b * b) */ pat rmi $1==4 /* Remainder word (second % top) */ with REG REG - uses REG - gen - divw %a, %2, %1 - mullw %a, %a, %1 - subf %a, %a, %2 - yields %a + uses REG={DIV_RR, %2, %1}, REG + gen move {MUL_RR, %a, %1}, %b + yields {SUB_RR, %2, %b} - pat rmu $1==4 /* Remainder unsigned word (second % top) */ + pat rmu $1==4 /* Remainder unsigned word (second % top) */ with REG REG - uses REG - gen - divwu %a, %2, %1 - mullw %a, %a, %1 - subf %a, %a, %2 - yields %a + uses REG={DIV_RR_U, %2, %1}, REG + gen move {MUL_RR, %a, %1}, %b + yields {SUB_RR, %2, %b} + + +/* Bitwise logic */ + + /* This back end doesn't know how to combine shifts and + * bitwise ops to emit rlwinm, rlwnm, or rlwimi instructions. + */ pat and $1==4 /* AND word */ with REG NOT_R - uses reusing %1, REG - gen - andc %a, %1, %2.reg - yields %a + yields {ANDC_RR, %1, %2.reg} with NOT_R REG - uses reusing %1, REG - gen - andc %a, %2, %1.reg - yields %a + yields {ANDC_RR, %2, %1.reg} with REG REG yields {AND_RR, %1, %2} with REG UCONST2 - uses reusing %1, REG - gen - andiX %a, %1, {CONST, %2.val} - yields %a + yields {AND_RC, %1, %2.val} with UCONST2 REG - uses reusing %2, REG - gen - andiX %a, %2, {CONST, %1.val} - yields %a - with REG CONST_HZ - uses reusing %1, REG - gen - andisX %a, %1, {CONST, hi(%2.val)} - yields %a - with CONST_HZ REG - uses reusing %2, REG - gen - andisX %a, %2, {CONST, hi(%1.val)} - yields %a + yields {AND_RC, %2, %1.val} + with REG CONST_HI_ZR + yields {AND_RIS, %1, hi(%2.val)} + with CONST_HI_ZR REG + yields {AND_RIS, %2, hi(%1.val)} pat and defined($1) /* AND set */ leaving @@ -1277,31 +1559,23 @@ PATTERNS pat ior $1==4 /* OR word */ with REG NOT_R - uses reusing %1, REG - gen - orc %a, %1, %2.reg - yields %a + yields {ORC_RR, %1, %2.reg} with NOT_R REG - uses reusing %2, REG - gen - orc %a, %2, %1.reg - yields %a + yields {ORC_RR, %2, %1.reg} with REG REG yields {OR_RR, %1, %2} with REG UCONST2 yields {OR_RC, %1, %2.val} with UCONST2 REG yields {OR_RC, %2, %1.val} - with REG CONST_HZ - uses reusing %1, REG={OR_RIS, %1, hi(%2.val)} - yields %a - with CONST_HZ REG - uses reusing %2, REG={OR_RIS, %2, hi(%1.val)} - yields %a - with REG CONST_STACK-UCONST2-CONST_HZ + with REG CONST_HI_ZR + yields {OR_RIS, %1, hi(%2.val)} + with CONST_HI_ZR REG + yields {OR_RIS, %2, hi(%1.val)} + with REG CONST_STACK-UCONST2-CONST_HI_ZR uses reusing %1, REG={OR_RIS, %1, hi(%2.val)} yields {OR_RC, %1, lo(%2.val)} - with CONST_STACK-UCONST2-CONST_HZ REG + with CONST_STACK-UCONST2-CONST_HI_ZR REG uses reusing %2, REG={OR_RIS, %2, hi(%1.val)} yields {OR_RC, %2, lo(%1.val)} @@ -1322,16 +1596,14 @@ PATTERNS yields {XOR_RC, %1, %2.val} with UCONST2 REG yields {XOR_RC, %2, %1.val} - with REG CONST_HZ - uses reusing %1, REG={XOR_RIS, %1, hi(%2.val)} - yields %a - with CONST_HZ REG - uses reusing %2, REG={XOR_RIS, %2, hi(%1.val)} - yields %a - with REG CONST_STACK-UCONST2-CONST_HZ + with REG CONST_HI_ZR + yields {XOR_RIS, %1, hi(%2.val)} + with CONST_HI_ZR REG + yields {XOR_RIS, %2, hi(%1.val)} + with REG CONST_STACK-UCONST2-CONST_HI_ZR uses reusing %1, REG={XOR_RIS, %1, hi(%2.val)} yields {XOR_RC, %1, lo(%2.val)} - with CONST_STACK-UCONST2-CONST_HZ REG + with CONST_STACK-UCONST2-CONST_HI_ZR REG uses reusing %2, REG={XOR_RIS, %2, hi(%1.val)} yields {XOR_RC, %2, lo(%1.val)} @@ -1344,22 +1616,13 @@ PATTERNS leaving cal ".xor" - pat com $1==INT32 /* NOT word */ - with AND_RR - uses REG - gen - nand %a, %1.reg1, %1.reg2 - yields %a - with OR_RR - uses REG - gen - nor %a, %1.reg1, %1.reg2 - yields %a - with XOR_RR - uses REG - gen - eqv %a, %1.reg1, %1.reg2 - yields %a + pat com $1==4 /* NOT word */ + with exact AND_RR + yields {NAND_RR, %1.reg1, %1.reg2} + with exact OR_RR + yields {NOR_RR, %1.reg1, %1.reg2} + with exact XOR_RR + yields {EQV_RR, %1.reg1, %1.reg2} with REG yields {NOT_R, %1} @@ -1376,53 +1639,104 @@ PATTERNS leaving loc 0 - pat zer defined($1) /* Create empty set */ + pat zer defined($1) /* Create empty set */ leaving loc $1 cal ".zer" + +/* Shifts and rotations */ + pat sli $1==4 /* Shift left (second << top) */ with CONST_STACK REG uses reusing %2, REG - gen - rlwinm %a, %2, {CONST, (%1.val & 0x1F)}, {CONST, 0}, {CONST, 31-(%1.val & 0x1F)} + gen slwi %a, %2, {C, %1.val & 0x1F} yields %a with REG REG - uses reusing %2, REG - gen - slw %a, %2, %1 + uses reusing %1, reusing %2, REG + gen slw %a, %2, %1 yields %a + pat sli stl $1==4 && inreg($2)==reg_any + with CONST_STACK REG + gen slwi {LOCAL, $2}, %2, {C, %1.val & 0x1F} + with REG REG + gen slw {LOCAL, $2}, %2, %1 - pat sri $1==4 /* Shift right signed (second >> top) */ + pat sri $1==4 /* Shift right signed (second >> top) */ with CONST_STACK REG uses reusing %2, REG - gen - srawi %a, %2, {CONST, %1.val & 0x1F} + gen srawi %a, %2, {C, %1.val & 0x1F} yields %a with REG REG - uses reusing %2, REG - gen - sraw %a, %2, %1 + uses reusing %1, reusing %2, REG + gen sraw %a, %2, %1 yields %a + pat sri stl $1==4 && inreg($2)==reg_any + with CONST_STACK REG + gen srawi {LOCAL, $2}, %2, {C, %1.val & 0x1F} + with REG REG + gen sraw {LOCAL, $2}, %2, %1 - pat sru $1==4 /* Shift right unsigned (second >> top) */ + pat sru $1==4 /* Shift right unsigned (second >> top) */ with CONST_STACK REG uses reusing %2, REG - gen - rlwinm %a, %2, {CONST, 32-(%1.val & 0x1F)}, {CONST, (%1.val & 0x1F)}, {CONST, 31} + gen srwi %a, %2, {C, %1.val & 0x1F} yields %a with REG REG - uses reusing %2, REG - gen - srw %a, %2, %1 + uses reusing %1, reusing %2, REG + gen srw %a, %2, %1 yields %a + pat sru stl $1==4 && inreg($2)==reg_any + with CONST_STACK REG + gen srwi {LOCAL, $2}, %2, {C, %1.val & 0x1F} + with REG REG + gen srw {LOCAL, $2}, %2, %1 + + pat rol $1==4 /* Rotate left word */ + with CONST_STACK REG + uses reusing %2, REG + gen rotlwi %a, %2, {C, %1.val & 0x1F} + yields %a + with REG REG + uses reusing %1, reusing %2, REG + gen rotlw %a, %2, %1 + yields %a + pat rol stl $1==4 && inreg($2)==reg_any + with CONST_STACK REG + gen rotlwi {LOCAL, $2}, %2, {C, %1.val & 0x1F} + with REG REG + gen rotlw {LOCAL, $2}, %2, %1 + + /* + * ror 4 -> ngi 4, rol 4 + * because to rotate right by n bits is to rotate left by + * (32 - n), which is to rotate left by -n. PowerPC rotlw + * handles -n as (-n & 0x1F). + */ + + pat ror $1==4 /* Rotate right word */ + with CONST_STACK REG + uses reusing %2, REG + gen rotrwi %a, %2, {C, %1.val & 0x1F} + yields %a + with /* anything */ + leaving + ngi 4 + rol 4 + pat ror stl $1==4 && inreg($2)==reg_any + with CONST_STACK REG + gen rotrwi {LOCAL, $2}, %2, {C, %1.val & 0x1F} + with /* anything */ + leaving + ngi 4 + rol 4 + stl $2 /* Arrays */ pat aar $1==4 /* Address of array element */ - leaving - cal ".aar4" + leaving cal ".aar4" pat lar $1==4 /* Load from array */ with STACK @@ -1483,8 +1797,7 @@ PATTERNS gen test %1 mfcr %a - move {XEQ, %a}, %a - yields %a + yields {XEQ, %a} pat tne /* top = (top != 0) */ with REG @@ -1492,8 +1805,7 @@ PATTERNS gen test %1 mfcr %a - move {XNE, %a}, %a - yields %a + yields {XNE, %a} pat tlt /* top = (top < 0) */ with REG @@ -1501,8 +1813,7 @@ PATTERNS gen test %1 mfcr %a - move {XLT, %a}, %a - yields %a + yields {XLT, %a} pat tle /* top = (top <= 0) */ with REG @@ -1510,8 +1821,7 @@ PATTERNS gen test %1 mfcr %a - move {XLE, %a}, %a - yields %a + yields {XLE, %a} pat tgt /* top = (top > 0) */ with REG @@ -1519,8 +1829,7 @@ PATTERNS gen test %1 mfcr %a - move {XGT, %a}, %a - yields %a + yields {XGT, %a} pat tge /* top = (top >= 0) */ with REG @@ -1528,176 +1837,139 @@ PATTERNS gen test %1 mfcr %a - move {XGE, %a}, %a - yields %a + yields {XGE, %a} pat cmi teq $1==4 /* Signed second == top */ with REG CONST2 uses reusing %1, REG={COND_RC, %1, %2.val} - gen move {XEQ, %a}, %a - yields %a + yields {XEQ, %a} with CONST2 REG - uses reusing %1, REG={COND_RC, %2, %1.val} - gen move {XEQ, %a}, %a - yields %a + uses reusing %2, REG={COND_RC, %2, %1.val} + yields {XEQ, %a} with REG REG - uses reusing %1, REG={COND_RR, %2, %1} - gen move {XEQ, %a}, %a - yields %a + uses reusing %1, reusing %2, REG={COND_RR, %2, %1} + yields {XEQ, %a} pat cmi tne $1==4 /* Signed second != top */ with REG CONST2 uses reusing %1, REG={COND_RC, %1, %2.val} - gen move {XNE, %a}, %a - yields %a + yields {XNE, %a} with CONST2 REG - uses reusing %1, REG={COND_RC, %2, %1.val} - gen move {XNE, %a}, %a - yields %a + uses reusing %2, REG={COND_RC, %2, %1.val} + yields {XNE, %a} with REG REG - uses reusing %1, REG={COND_RR, %2, %1} - gen move {XNE, %a}, %a - yields %a + uses reusing %1, reusing %2, REG={COND_RR, %2, %1} + yields {XNE, %a} pat cmi tgt $1==4 /* Signed second > top */ with REG CONST2 uses reusing %1, REG={COND_RC, %1, %2.val} - gen move {XLT, %a}, %a - yields %a + yields {XLT, %a} with CONST2 REG - uses reusing %1, REG={COND_RC, %2, %1.val} - gen move {XGT, %a}, %a - yields %a + uses reusing %2, REG={COND_RC, %2, %1.val} + yields {XGT, %a} with REG REG - uses reusing %1, REG={COND_RR, %2, %1} - gen move {XGT, %a}, %a - yields %a + uses reusing %1, reusing %2, REG={COND_RR, %2, %1} + yields {XGT, %a} pat cmi tge $1==4 /* Signed second >= top */ with REG CONST2 uses reusing %1, REG={COND_RC, %1, %2.val} - gen move {XLE, %a}, %a - yields %a + yields {XLE, %a} with CONST2 REG - uses reusing %1, REG={COND_RC, %2, %1.val} - gen move {XGE, %a}, %a - yields %a + uses reusing %2, REG={COND_RC, %2, %1.val} + yields {XGE, %a} with REG REG - uses reusing %1, REG={COND_RR, %2, %1} - gen move {XGE, %a}, %a - yields %a + uses reusing %1, reusing %2, REG={COND_RR, %2, %1} + yields {XGE, %a} pat cmi tlt $1==4 /* Signed second < top */ with REG CONST2 uses reusing %1, REG={COND_RC, %1, %2.val} - gen move {XGT, %a}, %a - yields %a + yields {XGT, %a} with CONST2 REG - uses reusing %1, REG={COND_RC, %2, %1.val} - gen move {XLT, %a}, %a - yields %a + uses reusing %2, REG={COND_RC, %2, %1.val} + yields {XLT, %a} with REG REG - uses reusing %1, REG={COND_RR, %2, %1} - gen move {XLT, %a}, %a - yields %a + uses reusing %1, reusing %2, REG={COND_RR, %2, %1} + yields {XLT, %a} pat cmi tle $1==4 /* Signed second <= top */ with REG CONST2 uses reusing %1, REG={COND_RC, %1, %2.val} - gen move {XGE, %a}, %a - yields %a + yields {XGE, %a} with CONST2 REG - uses reusing %1, REG={COND_RC, %2, %1.val} - gen move {XLE, %a}, %a - yields %a + uses reusing %2, REG={COND_RC, %2, %1.val} + yields {XLE, %a} with REG REG - uses reusing %1, REG={COND_RR, %2, %1} - gen move {XLE, %a}, %a - yields %a + uses reusing %1, reusing %2, REG={COND_RR, %2, %1} + yields {XLE, %a} pat cmu teq $1==4 /* Unsigned second == top */ with REG UCONST2 uses reusing %1, REG={CONDL_RC, %1, %2.val} - gen move {XEQ, %a}, %a - yields %a + yields {XEQ, %a} with UCONST2 REG - uses reusing %1, REG={CONDL_RC, %2, %1.val} - gen move {XEQ, %a}, %a - yields %a + uses reusing %2, REG={CONDL_RC, %2, %1.val} + yields {XEQ, %a} with REG REG - uses reusing %1, REG={CONDL_RR, %2, %1} - gen move {XEQ, %a}, %a - yields %a + uses reusing %1, reusing %2, REG={CONDL_RR, %2, %1} + yields {XEQ, %a} pat cmu tne $1==4 /* Unsigned second != top */ with REG UCONST2 uses reusing %1, REG={CONDL_RC, %1, %2.val} - gen move {XNE, %a}, %a - yields %a + yields {XNE, %a} with UCONST2 REG - uses reusing %1, REG={CONDL_RC, %2, %1.val} - gen move {XNE, %a}, %a - yields %a + uses reusing %2, REG={CONDL_RC, %2, %1.val} + yields {XNE, %a} with REG REG - uses reusing %1, REG={CONDL_RR, %2, %1} - gen move {XNE, %a}, %a - yields %a + uses reusing %1, reusing %2, REG={CONDL_RR, %2, %1} + yields {XNE, %a} pat cmu tgt $1==4 /* Unsigned second > top */ with REG UCONST2 uses reusing %1, REG={CONDL_RC, %1, %2.val} - gen move {XLT, %a}, %a - yields %a + yields {XLT, %a} with UCONST2 REG - uses reusing %1, REG={CONDL_RC, %2, %1.val} - gen move {XGT, %a}, %a - yields %a + uses reusing %2, REG={CONDL_RC, %2, %1.val} + yields {XGT, %a} with REG REG - uses reusing %1, REG={CONDL_RR, %2, %1} - gen move {XGT, %a}, %a - yields %a + uses reusing %1, reusing %2, REG={CONDL_RR, %2, %1} + yields {XGT, %a} pat cmu tge $1==4 /* Unsigned second >= top */ with REG UCONST2 uses reusing %1, REG={CONDL_RC, %1, %2.val} - gen move {XLE, %a}, %a - yields %a + yields {XLE, %a} with UCONST2 REG - uses reusing %1, REG={CONDL_RC, %2, %1.val} - gen move {XGE, %a}, %a - yields %a + uses reusing %2, REG={CONDL_RC, %2, %1.val} + yields {XGE, %a} with REG REG - uses reusing %1, REG={CONDL_RR, %2, %1} - gen move {XGE, %a}, %a - yields %a + uses reusing %1, reusing %2, REG={CONDL_RR, %2, %1} + yields {XGE, %a} pat cmu tlt $1==4 /* Unsigned second < top */ with REG UCONST2 uses reusing %1, REG={CONDL_RC, %1, %2.val} - gen move {XGT, %a}, %a - yields %a + yields {XGT, %a} with UCONST2 REG - uses reusing %1, REG={CONDL_RC, %2, %1.val} - gen move {XLT, %a}, %a - yields %a + uses reusing %2, REG={CONDL_RC, %2, %1.val} + yields {XLT, %a} with REG REG - uses reusing %1, REG={CONDL_RR, %2, %1} - gen move {XLT, %a}, %a - yields %a + uses reusing %1, reusing %2, REG={CONDL_RR, %2, %1} + yields {XLT, %a} pat cmu tle $1==4 /* Unsigned second <= top */ with REG UCONST2 uses reusing %1, REG={CONDL_RC, %1, %2.val} - gen move {XGE, %a}, %a - yields %a + yields {XGE, %a} with UCONST2 REG - uses reusing %1, REG={CONDL_RC, %2, %1.val} - gen move {XLE, %a}, %a - yields %a + uses reusing %2, REG={CONDL_RC, %2, %1.val} + yields {XLE, %a} with REG REG - uses reusing %1, REG={CONDL_RR, %2, %1} - gen move {XLE, %a}, %a - yields %a + uses reusing %1, reusing %2, REG={CONDL_RR, %2, %1} + yields {XLE, %a} /* Simple branches */ @@ -1724,11 +1996,11 @@ PATTERNS proc bxx example beq with REG CONST2 STACK gen - cmpwi %1, {CONST, %2.val} + cmpwi %1, %2 bxx[2] {LABEL, $1} with CONST2 REG STACK gen - cmpwi %2, {CONST, %1.val} + cmpwi %2, %1 bxx[1] {LABEL, $1} with REG REG STACK gen @@ -1746,11 +2018,11 @@ PATTERNS proc cmu4zxx example cmu zeq with REG CONST2 STACK gen - cmplwi %1, {CONST, %2.val} + cmplwi %1, %2 bxx[2] {LABEL, $2} with CONST2 REG STACK gen - cmplwi %2, {CONST, %1.val} + cmplwi %2, %1 bxx[1] {LABEL, $2} with REG REG STACK gen @@ -1776,41 +2048,41 @@ PATTERNS * puts gt in the sign bit, to reverse the comparison. */ - pat cmi $1==INT32 /* Signed tristate compare */ + pat cmi $1==4 /* Signed tristate compare */ with REG CONST2 uses reusing %1, REG={COND_RC, %1, %2.val} - gen rlwinm %a, %a, {CONST, 1}, {CONST, 31}, {CONST, 0} + gen rlwinm %a, %a, {C, 1}, {C, 31}, {C, 0} yields %a with CONST2 REG uses reusing %2, REG={COND_RC, %2, %1.val} - gen extlwi %a, %a, {CONST, 2}, {CONST, 0} + gen extlwi %a, %a, {C, 2}, {C, 0} yields %a with REG REG - uses reusing %1, REG={COND_RR, %2, %1} - gen extlwi %a, %a, {CONST, 2}, {CONST, 0} + uses reusing %1, reusing %2, REG={COND_RR, %2, %1} + gen extlwi %a, %a, {C, 2}, {C, 0} yields %a - pat cmu $1==INT32 /* Unsigned tristate compare */ + pat cmu $1==4 /* Unsigned tristate compare */ with REG UCONST2 uses reusing %1, REG={CONDL_RC, %1, %2.val} - gen rlwinm %a, %a, {CONST, 1}, {CONST, 31}, {CONST, 0} + gen rlwinm %a, %a, {C, 1}, {C, 31}, {C, 0} yields %a with UCONST2 REG uses reusing %2, REG={CONDL_RC, %2, %1.val} - gen extlwi %a, %a, {CONST, 2}, {CONST, 0} + gen extlwi %a, %a, {C, 2}, {C, 0} yields %a with REG REG - uses reusing %1, REG={CONDL_RR, %2, %1} - gen extlwi %a, %a, {CONST, 2}, {CONST, 0} + uses reusing %1, reusing %2, REG={CONDL_RR, %2, %1} + gen extlwi %a, %a, {C, 2}, {C, 0} yields %a pat cmp /* Compare pointers */ leaving - cmu INT32 + cmu 4 - pat cms $1==INT32 /* Compare blocks (word sized) */ + pat cms $1==4 /* Compare blocks (word sized) */ leaving - cmi INT32 + cmi 4 pat cms defined($1) leaving @@ -1824,34 +2096,32 @@ PATTERNS /* Other branching and labelling */ + /* During an unconditional jump, if the top element on the + * stack has 4 bytes, then we hold it in register r3. + */ pat lab topeltsize($1)==4 && !fallthrough($1) kills ALL - gen - labeldef $1 - yields r3 + gen labeldef $1 + yields r3 pat lab topeltsize($1)==4 && fallthrough($1) with REG3 STACK - kills ALL - gen - labeldef $1 - yields r3 + kills ALL + gen labeldef $1 + yields r3 - pat lab topeltsize($1)!=4 + pat lab topeltsize($1)!=4 /* Label without r3 */ with STACK - kills ALL - gen - labeldef $1 + kills ALL + gen labeldef $1 - pat bra topeltsize($1)==4 /* Unconditional jump with TOS GPRister */ + pat bra topeltsize($1)==4 /* Branch with r3 */ with REG3 STACK - gen - b {LABEL, $1} + gen b {LABEL, $1} - pat bra topeltsize($1)!=4 /* Unconditional jump without TOS GPRister */ + pat bra topeltsize($1)!=4 /* Branch without r3 */ with STACK - gen - b {LABEL, $1} + gen b {LABEL, $1} /* Miscellaneous */ @@ -1859,8 +2129,7 @@ PATTERNS pat cal /* Call procedure */ with STACK kills ALL - gen - bl {LABEL, $1} + gen bl {LABEL, $1} pat cai /* Call procedure indirect */ with REG STACK @@ -1869,10 +2138,10 @@ PATTERNS mtspr ctr, %1 bctrl. - pat lfr $1==INT32 /* Load function result, word */ + pat lfr $1==4 /* Load function result, word */ yields r3 - pat lfr $1==INT64 /* Load function result, double-word */ + pat lfr $1==8 /* Load function result, double-word */ yields r4 r3 pat ret $1==0 /* Return from procedure */ @@ -1884,18 +2153,25 @@ PATTERNS mtspr lr, r0 lwz r0, {IND_RC_W, fp, 0} /* Free our stack frame. */ - addi sp, fp, {CONST, 8} + addi sp, fp, {C, 8} mr fp, r0 blr. + /* If "ret" coerces STACK to REG3, then top will delete the + * extra "addi sp, sp, 4". + */ + pat ret $1==4 /* Return from procedure, word */ with REG3 leaving ret 0 pat ret $1==8 /* Return from proc, double-word */ - with REG3 REG + with REG3 INT_W gen move %2, r4 leaving ret 0 + with REG3 STACK + gen lwz r4, {IND_RC_W, sp, 0} + leaving ret 0 /* * These rules for blm/bls are wrong if length is zero. @@ -1908,14 +2184,15 @@ PATTERNS bls pat bls /* Block move variable length */ - with REG REG REG + with REG SPFP+REG SPFP+REG + /* allows sp as %2, %3 */ /* ( src%3 dst%2 len%1 -- ) */ uses reusing %1, REG, REG, REG gen - srwi %a, %1, {CONST, 2} + srwi %a, %1, {C, 2} mtspr ctr, %a - addi %b, %3, {CONST, 0-4} - addi %c, %2, {CONST, 0-4} + addi %b, %3, {C, 0-4} + addi %c, %2, {C, 0-4} 1: lwzu %a, {IND_RC_W, %b, 4} stwu %a, {IND_RC_W, %c, 4} bdnz {LABEL, "1b"} @@ -1923,14 +2200,12 @@ PATTERNS pat csa /* Array-lookup switch */ with STACK kills ALL - gen - b {LABEL, ".csa"} + gen b {LABEL, ".csa"} pat csb /* Table-lookup switch */ with STACK kills ALL - gen - b {LABEL, ".csb"} + gen b {LABEL, ".csb"} /* EM specials */ @@ -1946,151 +2221,116 @@ PATTERNS ste "hol0" pat lni /* Increment line number */ - leaving - ine "hol0" + leaving ine "hol0" pat lim /* Load EM trap ignore mask */ - leaving - lde ".ignmask" + leaving loe ".ignmask" pat sim /* Store EM trap ignore mask */ + leaving ste ".ignmask" + + pat sig /* Set trap handler, yield old */ leaving - ste ".ignmask" + loe ".trppc" + exg 4 + ste ".trppc" pat trp /* Raise EM trap */ with REG3 kills ALL - gen - bl {LABEL, ".trap"} - - pat sig /* Set trap handler */ - leaving - ste ".trppc" + gen bl {LABEL, ".trp"} pat rtt /* Return from trap */ - leaving - ret 0 + leaving ret 0 - /* - * Lexical local base: lxl 0 yields our fp, lxl n yields the - * fp of the nth statically enclosing procedure. + pat rck $1==4 /* Range check */ + leaving cal ".rck" + + /* Our caller's local base, "lxl 0 dch", appears in + * lang/cem/libcc.ansi/setjmp/setjmp.e, lang/m2/libm2/par_misc.e */ - pat lxl $1==0 - leaving - lor 0 - pat lxl $1==1 - yields {IND_RC_W, fp, SL_OFFSET} - pat lxl $1==2 - uses REG={IND_RC_W, fp, SL_OFFSET} - yields {IND_RC_W, %a, SL_OFFSET} - pat lxl $1==3 - uses REG={IND_RC_W, fp, SL_OFFSET}, reusing %a, REG - gen move {IND_RC_W, %a, SL_OFFSET}, %b - yields {IND_RC_W, %b, SL_OFFSET} - pat lxl $1>=4 && $1<=0x8000 - uses REG={IND_RC_W, fp, SL_OFFSET}, - REG={CONST_0000_7FFF, $1-1} - gen - mtspr ctr, %b - 1: lwz %a, {IND_RC_W, %a, SL_OFFSET} - bdnz {LABEL, "1b"} - yields %a + pat lxl dch $1==0 + yields {IND_RC_W, fp, FP_OFFSET} pat dch /* Dynamic chain: LB -> caller's LB */ with REG yields {IND_RC_W, %1, FP_OFFSET} pat lpb /* LB -> argument base */ - leaving - adp EM_BSIZE - - pat lxa /* Lexical argument base */ - leaving - lxl $1 - lpb + leaving adp EM_BSIZE + /* "gto" must preserve the function result for "lfr", so + * longjmp() can pass the return value to setjmp(). + * - See lang/cem/libcc.ansi/setjmp/setjmp.e + * + * Must preserve r3 and r4, so no "uses REG". + * PowerPC can't add r0 + constant. Use r12. + */ pat gto /* longjmp */ with STACK - uses REG gen - move {LABEL, $1}, %a - move {IND_RC_W, %a, 8}, fp - move {IND_RC_W, %a, 4}, sp - move {IND_RC_W, %a, 0}, %a - mtspr ctr, %a + move {LABEL, $1}, r12 + move {IND_RC_W, r12, 8}, fp + move {IND_RC_W, r12, 4}, sp + move {IND_RC_W, r12, 0}, r12 + mtspr ctr, r12 bctr. pat lor $1==0 /* Load local base */ - uses REG - gen - move fp, %a - yields %a + leaving lxl 0 pat lor $1==1 /* Load stack pointer */ - uses REG - gen - move sp, %a - yields %a + with STACK + yields sp + + /* Next few patterns for "lor 1" appear in + * lang/m2/libm2/par_misc.e + */ + pat lor adp $1==1 && smalls($2) /* sp + constant */ + with STACK + yields {SUM_RC, sp, $2} + + /* Subtract stack pointer by doing %1 - (sp - 4) + * because sp - 4 would point to %1. + */ + pat lor sbs loc adu $1==1 && $2==4 && $4==4 + with REG STACK + uses reusing %1, REG + gen subf %a, sp, %1 + yields %a + leaving loc $3+4 adu 4 + pat lor sbs $1==1 && $2==4 + with REG STACK + uses reusing %1, REG + gen subf %a, sp, %1 + yields {SUM_RC, %a, 4} pat str $1==0 /* Store local base */ - with REG - gen - move %1, fp - - pat str $1==1 /* Store stack pointer */ - with REG - gen - move %1, sp - - pat loc ass $1==4 && $2==4 /* Drop 4 bytes from stack */ - with exact REG - /* nop */ + with INT_W + gen move %1, fp with STACK gen - addi sp, sp, {CONST, 4} + lwz fp, {IND_RC_W, sp, 0} + addi sp, sp, {C, 4} - pat ass $1==4 /* Adjust stack by variable amount */ - with CONST2 STACK - gen - move {SUM_RC, sp, %1.val}, sp - with CONST_HZ STACK - gen - move {SUM_RC, sp, his(%1.val)}, sp - with CONST_STACK-CONST2-CONST_HZ STACK - gen - move {SUM_RC, sp, his(%1.val)}, sp - move {SUM_RC, sp, los(%1.val)}, sp - with REG STACK - gen - move {SUM_RR, sp, %1}, sp - - pat asp /* Adjust stack by constant amount */ - leaving - loc $1 - ass 4 - - pat lae rck $2==4 /* Range check */ - with REG + pat str $1==1 /* Store stack pointer */ + with INT_W kills ALL - gen - cmpwi %1, {CONST, rom($1, 1)} - blt {LABEL, ".trap_erange"} - cmpwi %1, {CONST, rom($1, 2)} - bgt {LABEL, ".trap_erange"} - yields %1 + gen move %1, sp + with STACK + kills ALL + gen lwz sp, {IND_RC_W, sp, 0} /* Single-precision floating-point */ - pat zrf $1==INT32 /* Push zero */ - leaving - loe ".fs_00000000" + pat zrf $1==4 /* Push zero */ + leaving loe ".fs_00000000" pat adf $1==4 /* Add single */ with FSREG FSREG - uses reusing %1, FSREG - gen - fadds %a, %2, %1 + uses reusing %1, reusing %2, FSREG + gen fadds %a, %2, %1 yields %a pat adf stl $1==4 && inreg($2)==reg_float with FSREG FSREG @@ -2098,9 +2338,8 @@ PATTERNS pat sbf $1==4 /* Subtract single */ with FSREG FSREG - uses reusing %1, FSREG - gen - fsubs %a, %2, %1 + uses reusing %1, reusing %2, FSREG + gen fsubs %a, %2, %1 yields %a pat sbf stl $1==4 && inreg($2)==reg_float with FSREG FSREG @@ -2108,79 +2347,79 @@ PATTERNS pat mlf $1==4 /* Multiply single */ with FSREG FSREG - uses reusing %1, FSREG - gen - fmuls %a, %2, %1 + uses reusing %1, reusing %2, FSREG + gen fmuls %a, %2, %1 yields %a pat mlf stl $1==4 && inreg($2)==reg_float with FSREG FSREG gen fmuls {LOCAL, $2}, %2, %1 - pat dvf $1==INT32 /* Divide single */ + pat dvf $1==4 /* Divide single */ with FSREG FSREG - uses reusing %1, FSREG - gen - fdivs %a, %2, %1 + uses reusing %1, reusing %2, FSREG + gen fdivs %a, %2, %1 yields %a pat dvf stl $1==4 && inreg($2)==reg_float with FSREG FSREG gen fdivs {LOCAL, $2}, %2, %1 - pat ngf $1==INT32 /* Negate single */ + pat ngf $1==4 /* Negate single */ with FSREG uses reusing %1, FSREG - gen - fneg %a, %1 + gen fneg %a, %1 yields %a pat ngf stl $1==4 && inreg($2)==reg_float with FSREG gen fneg {LOCAL, $2}, %1 - pat cmf $1==INT32 /* Compare single */ + /* When a or b is NaN, then a < b, a <= b, a > b, a >= b + * should all be false. We can't make them false, because + * - EM's _cmf_ is only for ordered comparisons. + * - The peephole optimizer assumes (a < b) == !(a >= b). + * + * We do make a == b false and a != b true, by checking the + * eq (equal) bit or un (unordered) bit in cr0. + */ + + pat cmf $1==4 /* Compare single */ with FSREG FSREG uses REG={COND_FS, %2, %1} - gen extlwi %a, %a, {CONST, 2}, {CONST, 0} + /* Extract lt, gt, un; put lt in sign bit. */ + gen andisX %a, %a, {C, 0xd000} yields %a pat cmf teq $1==4 /* Single second == top */ with FSREG FSREG uses REG={COND_FS, %2, %1} - gen move {XEQ, %a}, %a - yields %a + yields {XEQ, %a} pat cmf tne $1==4 /* Single second == top */ with FSREG FSREG uses REG={COND_FS, %2, %1} - gen move {XNE, %a}, %a - yields %a + yields {XNE, %a} pat cmf tgt $1==4 /* Single second > top */ with FSREG FSREG uses REG={COND_FS, %2, %1} - gen move {XGT, %a}, %a - yields %a + yields {XGT, %a} pat cmf tge $1==4 /* Single second >= top */ with FSREG FSREG uses REG={COND_FS, %2, %1} - gen move {XGE, %a}, %a - yields %a + yields {XGE, %a} pat cmf tlt $1==4 /* Single second < top */ with FSREG FSREG uses REG={COND_FS, %2, %1} - gen move {XLT, %a}, %a - yields %a + yields {XLT, %a} pat cmf tle $1==4 /* Single second <= top */ with FSREG FSREG uses REG={COND_FS, %2, %1} - gen move {XLE, %a}, %a - yields %a + yields {XLE, %a} proc cmf4zxx example cmf zeq with FSREG FSREG STACK - uses REG gen fcmpo cr0, %2, %1 bxx* {LABEL, $2} @@ -2193,12 +2432,11 @@ PATTERNS pat cmf zlt $1==4 call cmf4zxx("blt") pat cmf zle $1==4 call cmf4zxx("ble") - pat loc loc cff $1==INT32 && $2==INT64 /* Convert single to double */ + pat loc loc cff $1==4 && $2==8 /* Convert single to double */ with FSREG yields %1.1 - /* Convert single to signed int */ - pat loc loc cfi $1==4 && $2==4 + pat loc loc cfi $1==4 && $2==4 /* Single to signed int */ leaving loc 4 loc 8 @@ -2207,8 +2445,7 @@ PATTERNS loc 4 cfi - /* Convert single to unsigned int */ - pat loc loc cfu $1==4 && $2==4 + pat loc loc cfu $1==4 && $2==4 /* Single to unsigned int */ leaving loc 4 loc 8 @@ -2217,8 +2454,7 @@ PATTERNS loc 4 cfu - /* Convert signed int to single */ - pat loc loc cif $1==4 && $2==4 + pat loc loc cif $1==4 && $2==4 /* Signed int to single */ leaving loc 4 loc 8 @@ -2227,8 +2463,7 @@ PATTERNS loc 4 cff - /* Convert unsigned int to single */ - pat loc loc cuf $1==4 && $2==4 + pat loc loc cuf $1==4 && $2==4 /* Unsigned int to single */ leaving loc 4 loc 8 @@ -2237,18 +2472,23 @@ PATTERNS loc 4 cff + pat fef $1==4 /* Split fraction, exponent */ + leaving cal ".fef4" + + /* Multiply two singles, then split fraction, integer */ + pat fif $1==4 + leaving cal ".fif4" + /* Double-precision floating-point */ - pat zrf $1==INT64 /* Push zero */ - leaving - lde ".fd_00000000" + pat zrf $1==8 /* Push zero */ + leaving lde ".fd_00000000" pat adf $1==8 /* Add double */ with FREG FREG - uses reusing %1, FREG - gen - fadd %a, %2, %1 + uses reusing %1, reusing %2, FREG + gen fadd %a, %2, %1 yields %a pat adf sdl $1==8 && inreg($2)==reg_float with FREG FREG @@ -2256,9 +2496,8 @@ PATTERNS pat sbf $1==8 /* Subtract double */ with FREG FREG - uses reusing %1, FREG - gen - fsub %a, %2, %1 + uses reusing %1, reusing %2, FREG + gen fsub %a, %2, %1 yields %a pat sbf sdl $1==8 && inreg($2)==reg_float with FREG FREG @@ -2266,9 +2505,8 @@ PATTERNS pat mlf $1==8 /* Multiply double */ with FREG FREG - uses reusing %1, FREG - gen - fmul %a, %2, %1 + uses reusing %1, reusing %2, FREG + gen fmul %a, %2, %1 yields %a pat mlf sdl $1==8 && inreg($2)==reg_float with FREG FREG @@ -2276,9 +2514,8 @@ PATTERNS pat dvf $1==8 /* Divide double */ with FREG FREG - uses reusing %1, FREG - gen - fdiv %a, %2, %1 + uses reusing %1, reusing %2, FREG + gen fdiv %a, %2, %1 yields %a pat dvf sdl $1==8 && inreg($2)==reg_float with FREG FREG @@ -2287,58 +2524,53 @@ PATTERNS pat ngf $1==8 /* Negate double */ with FREG uses reusing %1, FREG - gen - fneg %a, %1 + gen fneg %a, %1 yields %a pat ngf sdl $1==8 && inreg($2)==reg_float with FREG gen fneg {DLOCAL, $2}, %1 - pat cmf $1==INT64 /* Compare double */ + /* To compare NaN, see comment above pat cmf $1==4 */ + + pat cmf $1==8 /* Compare double */ with FREG FREG uses REG={COND_FD, %2, %1} - gen extlwi %a, %a, {CONST, 2}, {CONST, 0} + /* Extract lt, gt, un; put lt in sign bit. */ + gen andisX %a, %a, {C, 0xd000} yields %a pat cmf teq $1==8 /* Double second == top */ with FREG FREG uses REG={COND_FD, %2, %1} - gen move {XEQ, %a}, %a - yields %a + yields {XEQ, %a} - pat cmf tne $1==8 /* Single second == top */ + pat cmf tne $1==8 /* Double second == top */ with FREG FREG uses REG={COND_FD, %2, %1} - gen move {XNE, %a}, %a - yields %a + yields {XNE, %a} pat cmf tgt $1==8 /* Double second > top */ with FREG FREG uses REG={COND_FD, %2, %1} - gen move {XGT, %a}, %a - yields %a + yields {XGT, %a} pat cmf tge $1==8 /* Double second >= top */ with FREG FREG uses REG={COND_FD, %2, %1} - gen move {XGE, %a}, %a - yields %a + yields {XGE, %a} pat cmf tlt $1==8 /* Double second < top */ with FREG FREG uses REG={COND_FD, %2, %1} - gen move {XLT, %a}, %a - yields %a + yields {XLT, %a} pat cmf tle $1==8 /* Double second <= top */ with FREG FREG uses REG={COND_FD, %2, %1} - gen move {XLE, %a}, %a - yields %a + yields {XLE, %a} proc cmf8zxx example cmf zeq with FREG FREG STACK - uses REG gen fcmpo cr0, %2, %1 bxx* {LABEL, $2} @@ -2351,42 +2583,37 @@ PATTERNS pat cmf zlt $1==8 call cmf8zxx("blt") pat cmf zle $1==8 call cmf8zxx("ble") - pat loc loc cff $1==INT64 && $2==INT32 /* Convert double to single */ + /* Convert double to single */ + /* reg_float pattern must be first, or it goes unused! */ + pat loc loc cff stl $1==8 && $2==4 && inreg($4)==reg_float + with FREG + gen frsp {LOCAL, $4}, %1 + pat loc loc cff $1==8 && $2==4 with FREG uses reusing %1, FSREG - gen - frsp %a, %1 + gen frsp %a, %1 yields %a - /* Convert double to signed int */ - pat loc loc cfi $1==8 && $2==4 + pat loc loc cfi $1==8 && $2==4 /* Double to signed int */ with FREG STACK uses reusing %1, FREG gen fctiwz %a, %1 stfdu %a, {IND_RC_D, sp, 0-8} - addi sp, sp, {CONST, 4} + addi sp, sp, {C, 4} - /* Convert double to unsigned int */ - pat loc loc cfu $1==8 && $2==4 - leaving - cal ".cfu8" + pat loc loc cfu $1==8 && $2==4 /* Double to unsigned int */ + leaving cal ".cfu8" - /* Convert signed int to double */ - pat loc loc cif $1==4 && $2==8 - leaving - cal ".cif8" + pat loc loc cif $1==4 && $2==8 /* Signed int to double */ + leaving cal ".cif8" - /* Convert unsigned int to double */ - pat loc loc cuf $1==4 && $2==8 - leaving - cal ".cuf8" + pat loc loc cuf $1==4 && $2==8 /* Unsigned int to double */ + leaving cal ".cuf8" pat fef $1==8 /* Split fraction, exponent */ - leaving - cal ".fef8" + leaving cal ".fef8" /* Multiply two doubles, then split fraction, integer */ pat fif $1==8 - leaving - cal ".fif8" + leaving cal ".fif8" diff --git a/mach/powerpc/top/table b/mach/powerpc/top/table index fdec03b2e..196cae128 100644 --- a/mach/powerpc/top/table +++ b/mach/powerpc/top/table @@ -1,12 +1,14 @@ -/* PowerPC desciptor table for ACK target optimizer */ +/* PowerPC table for ACK target optimizer */ -MAXOP 3; +MAXOP 5; LABEL_STARTER '.'; %%; +L1, L2, L3, L4, L5 { not_using_sp(VAL) }; RNZ { strcmp(VAL, "r0") }; /* not r0 */ +UP { positive(VAL) }; X, Y, Z { TRUE }; %%; @@ -16,10 +18,74 @@ X, Y, Z { TRUE }; addi RNZ, RNZ, 0 -> ; addis RNZ, RNZ, 0 -> ; +addi RNZ, RNZ, X : addi RNZ, RNZ, Y { plus(X, Y, Z) } + -> addi RNZ, RNZ, Z ; + +/* Lower "addi sp, sp, UP" by lifting other instructions, looking for + * chances to merge or delete _addi_ instructions, and assuming that + * the code generator uses "sp" not "r1". + */ +addi sp, sp, UP : ANY L1 { lift(ANY) } + -> ANY L1 : addi sp, sp, UP ; +addi sp, sp, UP : ANY L1, L2 { lift(ANY) } + -> ANY L1, L2 : addi sp, sp, UP ; +addi sp, sp, UP : ANY L1, L2, L3 { lift(ANY) } + -> ANY L1, L2, L3 : addi sp, sp, UP ; +addi sp, sp, UP : ANY L1, L2, L3, L4 { lift(ANY) } + -> ANY L1, L2, L3, L4 : addi sp, sp, UP ; +addi sp, sp, UP : ANY L1, L2, L3, L4, L5 { lift(ANY) } + -> ANY L1, L2, L3, L4, L5 : addi sp, sp, UP ; +addi sp, sp, UP : lmw Y, L1 { Y[0]=='r' && atoi(Y+1)>1 } + -> lmw Y, L1 : addi sp, sp, UP ; + +/* Merge _addi_ when popping from the stack. */ +addi sp, sp, X : lwz L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' } + -> lwz L1, Z(sp) : addi sp, sp, X ; +addi sp, sp, X : lfs L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' } + -> lfs L1, Z(sp) : addi sp, sp, X ; +addi sp, sp, X : lfd L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' } + -> lfd L1, Z(sp) : addi sp, sp, X ; + +/* Lower or delete _addi_ when pushing to the stack. */ +addi sp, sp, X : stwu L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' } + -> stw L1, Z(sp) : addi sp, sp, Z ; +addi sp, sp, X : stfsu L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' } + -> stfs L1, Z(sp) : addi sp, sp, Z ; +addi sp, sp, X : stfdu L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' } + -> stfd L1, Z(sp) : addi sp, sp, Z ; +addi sp, sp, 4 : stfdu L1, -8(sp) -> stfdu L1, -4(sp) ; + +/* Delete _addi_ when setting the stack pointer. */ +addi sp, sp, X : addi sp, L1, Y -> addi sp, L1, Y ; +addi sp, sp, X : lwz sp, L1 -> lwz sp, L1 ; + +or X, Y, Y -> mr X, Y ; +or. X, Y, Y -> mr. X, Y ; + mr X, X -> ; fmr X, X -> ; -or X, Y, Z : or. X, X, X -> or. X, Y, Z ; +add X, Y, Z : mr. X, X -> add. X, Y, Z ; +and X, Y, Z : mr. X, X -> and. X, Y, Z ; +andc X, Y, Z : mr. X, X -> andc. X, Y, Z ; +divw X, Y, Z : mr. X, X -> divw. X, Y, Z ; +divwu X, Y, Z : mr. X, X -> divwu. X, Y, Z ; +extsb X, Y, Z : mr. X, X -> extsb. X, Y, Z ; +extsh X, Y, Z : mr. X, X -> extsh. X, Y, Z ; +eqv X, Y, Z : mr. X, X -> eqv. X, Y, Z ; +mullw X, Y, Z : mr. X, X -> mullw. X, Y, Z ; +nand X, Y, Z : mr. X, X -> nand. X, Y, Z ; +nor X, Y, Z : mr. X, X -> nor. X, Y, Z ; +or X, Y, Z : mr. X, X -> or. X, Y, Z ; +orc X, Y, Z : mr. X, X -> orc. X, Y, Z ; +slw X, Y, Z : mr. X, X -> slw. X, Y, Z ; +slwi X, Y, Z : mr. X, X -> slwi. X, Y, Z ; +subf X, Y, Z : mr. X, X -> subf. X, Y, Z ; +sraw X, Y, Z : mr. X, X -> sraw. X, Y, Z ; +srawi X, Y, Z : mr. X, X -> srawi. X, Y, Z ; +srw X, Y, Z : mr. X, X -> srw. X, Y, Z ; +srwi X, Y, Z : mr. X, X -> srwi. X, Y, Z ; +xor X, Y, Z : mr. X, X -> xor. X, Y, Z ; b X : labdef X -> labdef X ; @@ -27,3 +93,98 @@ b X : labdef X -> labdef X ; /* LT=0, GT=1, EQ=2, OV=3 */ %%; + +/* Is it a word character? 0-9A-Za-z_ */ +static int isword(char c) { + return + (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || + (c >= 'a' && c <= 'z') || (c == '_'); +} + +/* Does operand _s_ not use the stack pointer? */ +int not_using_sp(const char *s) { + int boundary; + + boundary = 1; + while (*s) { + if (boundary && + ((s[0]=='s' && s[1]=='p') || (s[0]=='r' && s[1]=='1')) && + !isword(s[2])) + return 0; + boundary = !isword(*s); + s++; + } + return 1; +} + + +int positive(const char *s) { + long n; + char *end; + + n = strtol(s, &end, 10); + return *s != '\0' && *end == '\0' && n > 0; +} + + +/* Instructions to lift(), sorted in strcmp() order. These are from + * ../ncg/table, minus branch instructions. + */ +const char *liftables[] = { + "add", "add.", "addi", + "and", "andc", "andi.", "andis.", + "cmp", "cmpi", "cmpl", "cmpli", + "cmplw", "cmplwi", "cmpw", "cmpwi", + "divw", "divwu", "eqv", "extlwi", "extrwi", "extsb", "extsh", + "fadd", "fadds", "fcmpo", "fctiwz", "fdiv", "fdivs", + "fmr", "fmul", "fmuls", "fneg", "frsp", "fsub", "fsubs", + "lbz", "lbzx", + "lfd", "lfdu", "lfdx", "lfs", "lfsu", "lfsx", + "lha", "lhax", "lhz", "lhzx", + "li", "lis", "lwz", "lwzu", "lwzx", + "mfcr", "mfspr", "mr", "mr.", "mtspr", "mullw", + "nand", "neg", "nor", "or", "or.", "ori", "oris", + "rlwinm", "rlwnm", "rotlwi", "rotrwi", + "slw", "slwi", "sraw", "srawi", "srw", "srwi", + "stb", "stbx", + "stfd", "stfdu", "stfdx", "stfs", "stfsu", "stfsx", + "sth", "sthx", "stw", "stwx", "stwu", + "subf", "xor", "xori", "xoris", +}; + +static int liftcmp(const void *a, const void *b) { + return strcmp(*(const char **)a, *(const char **)b); +} + +/* May we lift instruction _s_ above "addi SP, SP, X"? */ +int lift(const char *s) { + return bsearch(&s, liftables, + sizeof(liftables) / sizeof(liftables[0]), + sizeof(liftables[0]), liftcmp); +} + + +/* Does it fit a signed 16-bit integer? */ +static int fits16(long l) { + return l >= -32768 && l <= 32767; +} + +/* Tries sum = a + b with signed 16-bit integers. */ +int plus(const char *a, const char *b, const char *sum) +{ + long la, lb, lsum; + char *end; + + la = strtol(a, &end, 10); + if (*a == '\0' || *end != '\0' || !fits16(la)) + return 0; + lb = strtol(b, &end, 10); + if (*b == '\0' || *end != '\0' || !fits16(lb)) + return 0; + + lsum = la + lb; + if (!fits16(lsum)) + return 0; + snprintf(sum, 7, "%ld", lsum); + return 1; +} diff --git a/mach/proto/mcg/main.c b/mach/proto/mcg/main.c index cf8a4435f..aa0fa4816 100644 --- a/mach/proto/mcg/main.c +++ b/mach/proto/mcg/main.c @@ -42,13 +42,14 @@ int main(int argc, char* const argv[]) const char* inputfilename = NULL; const char* outputfilename = NULL; FILE* output; + int i; program_name = argv[0]; opterr = 1; for (;;) { - int c = getopt(argc, argv, "-d:D:C:o:"); + int c = getopt(argc, argv, "d:D:C:o:"); if (c == -1) break; @@ -79,20 +80,22 @@ int main(int argc, char* const argv[]) fatal("already specified an output file"); outputfilename = optarg; break; - - case 1: - if (inputfilename) - fatal("unexpected argument '%s'", optarg); - inputfilename = optarg; } } + for (i = optind; i < argc; i++) + { + if (inputfilename) + fatal("unexpected argument '%s'", argv[i]); + inputfilename = argv[i]; + } + symbol_init(); - if (!EM_open((char*) inputfilename)) - fatal("couldn't open input '%s': %s", + if (!EM_open((char*) inputfilename)) + fatal("couldn't open input '%s': %s", inputfilename ? inputfilename : "", EM_error); - + if (outputfilename) { outputfile = fopen(outputfilename, "w"); diff --git a/mach/proto/mcg/treebuilder.c b/mach/proto/mcg/treebuilder.c index eed770170..ac811fc14 100644 --- a/mach/proto/mcg/treebuilder.c +++ b/mach/proto/mcg/treebuilder.c @@ -274,7 +274,7 @@ static struct ir* store(int size, struct ir* address, int offset, struct ir* val else opcode = IR_STORE; - if (offset > 0) + if (offset != 0) address = new_ir2( IR_ADD, EM_pointersize, address, new_wordir(offset) @@ -304,7 +304,7 @@ static struct ir* load(int size, struct ir* address, int offset) else opcode = IR_LOAD; - if (offset > 0) + if (offset != 0) address = new_ir2( IR_ADD, EM_pointersize, address, new_wordir(offset) @@ -416,6 +416,31 @@ static void helper_function(const char* name) ); } +static void helper_function_with_arg(const char* name, struct ir* arg) +{ + /* Abuses IR_SETRET to set a register to pass one argument to a + * helper function. + * + * FIXME: As of January 2018, mach/powerpc/libem takes an + * argument in register r3 only for ".los4", ".sts4", ".trp". + * This is an accident. Should the argument be on the stack, or + * should other helpers use a register? */ + + materialise_stack(); + appendir( + new_ir1( + IR_SETRET, arg->size, + arg + ) + ); + appendir( + new_ir1( + IR_CALL, 0, + new_labelir(name) + ) + ); +} + static void insn_simple(int opcode) { switch (opcode) @@ -437,6 +462,7 @@ static void insn_simple(int opcode) case op_cii: simple_convert(IR_FROMSI); break; case op_ciu: simple_convert(IR_FROMSI); break; case op_cui: simple_convert(IR_FROMUI); break; + case op_cuu: simple_convert(IR_FROMUI); break; case op_cfu: simple_convert(IR_FROMUF); break; case op_cfi: simple_convert(IR_FROMSF); break; case op_cif: simple_convert(IR_FROMSI); break; @@ -496,10 +522,12 @@ static void insn_simple(int opcode) case op_lim: { + /* Traps use only 16 bits of .ignmask, but we keep an + * entire word, even if a word has more than 2 bytes. */ push( - new_ir1( - (EM_wordsize == 2) ? IR_LOAD : IR_LOADH, EM_wordsize, - new_labelir(".ignmask") + load( + EM_wordsize, + new_labelir(".ignmask"), 0 ) ); break; @@ -507,26 +535,34 @@ static void insn_simple(int opcode) case op_sim: { - sequence_point(); appendir( - new_ir2( - (EM_wordsize == 2) ? IR_STORE : IR_STOREH, EM_wordsize, - new_labelir(".ignmask"), + store( + EM_wordsize, + new_labelir(".ignmask"), 0, pop(EM_wordsize) ) ); break; } - case op_trp: helper_function(".trp"); break; + case op_trp: + helper_function_with_arg(".trp", pop(EM_wordsize)); + break; case op_sig: { + struct ir* label = new_labelir(".trppc"); struct ir* value = pop(EM_pointersize); + push( + load( + EM_pointersize, + label, 0 + ) + ); appendir( store( EM_pointersize, - new_labelir(".trppc"), 0, + label, 0, value ) ); @@ -539,12 +575,13 @@ static void insn_simple(int opcode) break; } - /* FIXME: These instructions are really complex and barely used - * (Modula-2 and Pascal set support, I believe). Leave them until - * later. */ - case op_set: helper_function(".unimplemented_set"); break; - case op_ior: helper_function(".unimplemented_ior"); break; - + case op_and: helper_function(".and"); break; + case op_ior: helper_function(".ior"); break; + case op_xor: helper_function(".xor"); break; + case op_com: helper_function(".com"); break; + case op_cms: helper_function(".cms"); break; + case op_set: helper_function(".set"); break; + case op_inn: helper_function(".inn"); break; case op_dch: push( @@ -670,6 +707,31 @@ static void simple_alu2(int opcode, int size, int irop, const char* fallback) } } +static void rotate(int opcode, int size, int irop, int irop_reverse) +{ + if (size > (2*EM_wordsize)) + fatal("treebuilder: can't do opcode %s with size %d", em_mnem[opcode - sp_fmnem], size); + else + { + struct ir* right = pop(size); + struct ir* left = pop(size); + struct ir* bits = new_wordir(8 * size); + + /* a rol b -> (a << b) | (a >> (32 - b)) */ + push( + new_ir2( + IR_OR, size, + new_ir2(irop, size, left, right), + new_ir2( + irop_reverse, size, + left, + new_ir2(IR_SUB, size, bits, right) + ) + ) + ); + } +} + static struct ir* extract_block_refs(struct basicblock* bb) { struct ir* outir = NULL; @@ -720,26 +782,28 @@ static struct ir* ptradd(struct ir* address, int offset) ); } -static void blockmove(struct ir* dest, struct ir* src, struct ir* size) +static struct ir* walk_static_chain(int level) { - /* memmove stack: ( size src dest -- ) */ - push(size); - push(src); - push(dest); + struct ir* ir; - materialise_stack(); - appendir( - new_ir1( - IR_CALL, 0, - new_labelir("memmove") - ) - ); - appendir( - new_ir1( - IR_STACKADJUST, EM_pointersize, - new_wordir(EM_pointersize*2 + EM_wordsize) - ) + /* The static chain, when it exists, is the first argument of each + * procedure. The chain begins with the current frame at level 0, + * and continues until we reach the outermost procedure. */ + ir = new_ir0( + IR_GETFP, EM_pointersize ); + while (level--) + { + /* Walk to the next frame pointer. */ + ir = load( + EM_pointersize, + new_ir1( + IR_FPTOAB, EM_pointersize, + ir + ), 0 + ); + } + return ir; } static void insn_ivalue(int opcode, arith value) @@ -765,8 +829,10 @@ static void insn_ivalue(int opcode, arith value) case op_and: simple_alu2(opcode, value, IR_AND, ".and"); break; case op_ior: simple_alu2(opcode, value, IR_OR, ".ior"); break; - case op_xor: simple_alu2(opcode, value, IR_EOR, NULL); break; + case op_xor: simple_alu2(opcode, value, IR_EOR, ".xor"); break; case op_com: simple_alu1(opcode, value, IR_NOT, ".com"); break; + case op_rol: rotate(opcode, value, IR_LSL, IR_LSR); break; + case op_ror: rotate(opcode, value, IR_LSR, IR_LSL); break; case op_adf: simple_alu2(opcode, value, IR_ADDF, NULL); break; case op_sbf: simple_alu2(opcode, value, IR_SUBF, NULL); break; @@ -774,12 +840,23 @@ static void insn_ivalue(int opcode, arith value) case op_dvf: simple_alu2(opcode, value, IR_DIVF, NULL); break; case op_ngf: simple_alu1(opcode, value, IR_NEGF, NULL); break; - case op_cmu: /* fall through */ - case op_cms: push(tristate_compare(value, IR_COMPAREUI)); break; + case op_cms: + if (value > (2*EM_wordsize)) + { + push(new_wordir(value)); + helper_function(".cms"); + break; + } + /* fall through */ + case op_cmu: push(tristate_compare(value, IR_COMPAREUI)); break; case op_cmi: push(tristate_compare(value, IR_COMPARESI)); break; case op_cmf: push(tristate_compare(value, IR_COMPAREF)); break; - case op_rck: helper_function(".rck"); break; + case op_rck: + if (value != EM_wordsize) + fatal("'rck %d' not supported", value); + helper_function(".rck"); + break; case op_set: push(new_wordir(value)); helper_function(".set"); break; case op_inn: push(new_wordir(value)); helper_function(".inn"); break; @@ -930,26 +1007,24 @@ static void insn_ivalue(int opcode, arith value) if (value > (EM_wordsize*2)) { - /* We're going to need to do multiple stores; fix the address + /* We're going to need to do multiple loads; fix the address * so it'll go into a register and we can do maths on it. */ appendir(ptr); } + /* Stack grows down. Load backwards. */ while (value > 0) { int s = EM_wordsize*2; if (value < s) s = value; - + value -= s; push( load( s, - ptr, offset + ptr, value ) ); - - value -= s; - offset += s; } assert(value == 0); @@ -1099,7 +1174,12 @@ static void insn_ivalue(int opcode, arith value) case op_dup: { sequence_point(); - if ((value == (EM_wordsize*2)) && (peek(0) == EM_wordsize) && (peek(1) == EM_wordsize)) + if (value > (2*EM_wordsize)) + { + push(new_wordir(value)); + helper_function(".dus4"); + } + else if ((value == (EM_wordsize*2)) && (peek(0) == EM_wordsize) && (peek(1) == EM_wordsize)) { struct ir* v1 = pop(EM_wordsize); struct ir* v2 = pop(EM_wordsize); @@ -1117,12 +1197,30 @@ static void insn_ivalue(int opcode, arith value) break; } + case op_dus: + { + if (value != EM_wordsize) + fatal("'dus %d' not supported", value); + helper_function(".dus4"); + break; + } + case op_exg: { - struct ir* v1 = pop(value); - struct ir* v2 = pop(value); - push(v1); - push(v2); + if (value > (2*EM_wordsize)) + { + push( + new_wordir(value) + ); + helper_function(".exg"); + } + else + { + struct ir* v1 = pop(value); + struct ir* v2 = pop(value); + push(v1); + push(v2); + } break; } @@ -1285,53 +1383,19 @@ static void insn_ivalue(int opcode, arith value) } case op_lxl: - { - struct ir* ir; - - /* Walk the static chain. */ - - ir = new_ir0( - IR_GETFP, EM_pointersize + push( + walk_static_chain(value) ); - - while (value--) - { - ir = new_ir1( - IR_CHAINFP, EM_pointersize, - ir - ); - } - - push(ir); break; - } case op_lxa: - { - struct ir* ir; - - /* Walk the static chain. */ - - ir = new_ir0( - IR_GETFP, EM_pointersize - ); - - while (value--) - { - ir = new_ir1( - IR_CHAINFP, EM_pointersize, - ir - ); - } - push( new_ir1( IR_FPTOAB, EM_pointersize, - ir + walk_static_chain(value) ) ); break; - } case op_fef: { @@ -1394,6 +1458,7 @@ static void insn_ivalue(int opcode, arith value) break; case 1: + materialise_stack(); push( appendir( new_ir0( @@ -1403,10 +1468,6 @@ static void insn_ivalue(int opcode, arith value) ); break; - case 2: - helper_function(".unimplemented_lor_2"); - break; - default: fatal("'lor %d' not supported", value); } @@ -1436,10 +1497,6 @@ static void insn_ivalue(int opcode, arith value) ); break; - case 2: - helper_function(".unimplemented_str_2"); - break; - default: fatal("'str %d' not supported", value); } @@ -1448,100 +1505,27 @@ static void insn_ivalue(int opcode, arith value) } case op_blm: - { - /* Input stack: ( src dest -- ) */ - struct ir* dest = pop(EM_pointersize); - struct ir* src = pop(EM_pointersize); - blockmove(dest, src, new_wordir(value)); + push(new_wordir(value)); + helper_function(".bls4"); break; - } case op_bls: - { - /* Input stack: ( src dest size -- ) */ - struct ir* dest = pop(EM_pointersize); - struct ir* src = pop(EM_pointersize); - struct ir* size = pop(EM_wordsize); - blockmove(dest, src, size); + if (value != EM_wordsize) + fatal("'bls %d' not supported", value); + helper_function(".bls4"); break; - } case op_los: - { - /* Copy an arbitrary amount to the stack. */ - struct ir* bytes = pop(EM_wordsize); - struct ir* address = pop(EM_pointersize); - - materialise_stack(); - appendir( - new_ir1( - IR_STACKADJUST, EM_pointersize, - new_ir1( - IR_NEG, EM_wordsize, - bytes - ) - ) - ); - - push( - new_ir0( - IR_GETSP, EM_pointersize - ) - ); - push(address); - push(bytes); - materialise_stack(); - appendir( - new_ir1( - IR_CALL, 0, - new_labelir("memcpy") - ) - ); - appendir( - new_ir1( - IR_STACKADJUST, EM_pointersize, - new_wordir(EM_pointersize*2 + EM_wordsize) - ) - ); + if (value != EM_wordsize) + fatal("'los %d' not supported", value); + helper_function_with_arg(".los4", pop(EM_wordsize)); break; - } case op_sts: - { - /* Copy an arbitrary amount from the stack. */ - struct ir* bytes = pop(EM_wordsize); - struct ir* dest = pop(EM_pointersize); - struct ir* src; - - materialise_stack(); - src = appendir( - new_ir0( - IR_GETSP, EM_pointersize - ) - ); - - push(dest); - push(src); - push(bytes); - materialise_stack(); - appendir( - new_ir1( - IR_CALL, 0, - new_labelir("memcpy") - ) - ); - appendir( - new_ir1( - IR_STACKADJUST, EM_pointersize, - new_ir2( - IR_ADD, EM_wordsize, - new_wordir(EM_pointersize*2 + EM_wordsize), - bytes - ) - ) - ); + if (value != EM_wordsize) + fatal("'sts %d' not supported", value); + helper_function_with_arg(".sts4", pop(EM_wordsize)); break; - } case op_lin: { @@ -1677,17 +1661,17 @@ static void insn_lvalue(int opcode, const char* label, arith offset) case op_gto: { - struct ir* descriptor = pop(EM_pointersize); + struct ir* descriptor = address_of_external(label, offset); appendir( new_ir1( - IR_SETSP, EM_pointersize, + IR_SETFP, EM_pointersize, load(EM_pointersize, descriptor, EM_pointersize*2) ) ); appendir( new_ir1( - IR_SETFP, EM_pointersize, + IR_SETSP, EM_pointersize, load(EM_pointersize, descriptor, EM_pointersize*1) ) ); diff --git a/mach/proto/ncg/subr.c b/mach/proto/ncg/subr.c index 0feb54f30..0dc045973 100644 --- a/mach/proto/ncg/subr.c +++ b/mach/proto/ncg/subr.c @@ -518,7 +518,7 @@ int split(token_p tp, int *ip, int ply, int toplevel) { int tpl; for (cp=c2coercs;cp->c2_texpno>=0; cp++) { - if (!match(tp,&machsets[cp->c2_texpno],0)) + if (!match(tp,&machsets[cp->c2_texpno],cp->c2_expr)) continue; ok=1; for (i=0; ok && ic2_nsplit;i++) { diff --git a/man/powerpc_as.6 b/man/powerpc_as.6 index 8198d6bce..f6bb90818 100644 --- a/man/powerpc_as.6 +++ b/man/powerpc_as.6 @@ -1,33 +1,136 @@ -.TH POWERPC_AS 1 +.TH POWERPC_AS 1 2018-03-07 .ad .SH NAME powerpc_as \- assembler for PowerPC - .SH SYNOPSIS as [options] argument ... - .SH DESCRIPTION This assembler is made with the general framework described in \fIuni_ass\fP(6). - +.PP +It can assemble the instructions from Book I and Book II of PowerPC +version 2.01. +This includes the branch, integer, and floating point instructions +from Book I; and the cache, synchronization, and time base +instructions from Book II. +.PP +There is no support for other instructions, such as supervisor-mode +instructions or vector instructions. +There is some support for 64-bit integer instructions, but the +assembler only has 32-bit symbols. .SH SYNTAX -Most 32-bit integer and floating point instructions are supported, but not many -short form instructions. Instructions which take 16-bit operands can additionally -use the following special functions: - -.IP hi16[value], ha16[value] -Returns the high half of the value of the expression; if the value is not absolute, -also generates the appropriate fixup. Use of either of these \fImust\fR be followed, -in the next instruction, by the corresponding use of \fBlo16[]\fR. Use \fBhi16[]\fR -if the low half is going to interpret its payload as an unsigned value, and -\fBha16[]\fR if it will be interpreted as a signed value (so that the high half can -be adjusted to match). - -.IP lo16[] -Returns the low half of the value of the expression. No fixup is generated. Use of -\fBlo16[]\fR must come in the instruction immediately after a use of \fBhi16[]\fR or -\fBha16[]\fR. - +.SS general purpose registers +There are 32 GPRs from \fBr0\fP to \fBr31\fP. +In this assembler, \fBsp\fP is an alias for \fBr1\fP, and \fBfp\fP is +an alias for \fBr2\fP, because \fIack\fP uses r1 as the stack pointer +and r2 as the frame pointer. +Other compilers don't use r2 as the frame pointer. +.PP +GPR syntax requires a register name, not a number. +For example, \(oqaddi\ r5,\ r4,\ 1\(cq works, but +\(oqaddi\ 5,\ 4,\ 1\(cq is a syntax error. +.PP +Certain instructions ignore the contents of \fBr0\fP and use zero. +This happens when using r0 as the second operand of \fIaddi\fP or +\fIaddis\fP, or when addressing \(oqexpr(r0)\(cq or +\(oqr0,\ gpr\(cq. +The syntax is still the name r0, not the number 0. +.SS floating point registers +There are 32 FPRs from \fBf0\fP to \fBf31\fP. +Each FPR has 64 bits and can hold a single-precision or +double-precision number. +FPR syntax requires a register name, not a number. +.SS special purpose registers +The three named SPRs are \fBctr\fP (count register), \fBlr\fP (link +register), and \fBxer\fP (exception register). +\(oqmfspr\(cq and \(oqmtspr\(cq allow these names or a number. +.SS condition register +There is a 32-bit condition register, where bit 0 is most significant, +and bit 31 is least significant. +This gets split into 8 registers of 4 bits each, from \fBcr0\fP (with +bits 0 to 3) to \fBcr7\fP (with bits 28 to 31). +Some instructions use the names cr0 to cr7, others use a bit numbered +0 to 31, and others use all 32 bits. +.SS addressing modes +\(oqexpr(gpr)\(cq addresses \fIexpr\fP + the contents of \fIgpr\fP, +except that \(oqexpr(r0)\(cq addresses \fIexpr\fP\ +\ 0. +A few instructions, like \(oqstwu\(cq, also update \fIgpr\fP by +setting it to the address. +.PP +\(oqgprA,\ gprB\(cq in certain instructions addresses the contents of +\fIgprA\fP + the contents of \fIgprB\fP, except that \(oqr0,\ gprB\(cq +addresses 0\ +\ the contents of \fIgprB\fP. +.SS 16-bit operands +Some instructions have a 16-bit operand. +This can be a bare \fIexpr\fP (which must fit signed or unsigned +16 bits), or it can be one of these special functions: +.IP "hi16[expr], ha16[expr]" +Returns the high half of the 32-bit value of the expression. +If the low half is negative (from 0x8000 to 0xffff), +then \fBha16[]\fP adjusts the high half by adding 1. +Use \fBhi16[]\fP if the instruction with \fBlo16[]\fP is going to +interpret its operand as an unsigned value, or \fBha16[]\fP if it will +interpret it as signed. +.IP +If \fIexpr\fP is not absolute, then the assembler must generate a +fixup for the linker. +The fixup only works if the instruction is +\(oqaddis gpr, r0, hx16[expr]\(cq or \(oqlis gpr, hx16[expr]\(cq. +.IP lo16[expr] +Returns the low half of the 32-bit value of the expression. +.SS short forms +Some instructions have short forms using extended mnemonics (or +simplified mnemonics) like \fIli\fP, \fIsrwi\fP, and many others. +.IP "li r6, 789" +is short for: addi r6, r0, 789 +.IP "srwi r3, r4, 2" +is short for: rlwinm r3, r4, 30, 2, 31 +.PP +This assembler doesn't support extended mnemonics with branch +prediction, such as \fIblt+\fP or \fIbne-\fP. +It always parses \(oq+\(cq and \(oq-\(cq as operators, +never as part of a mnemonic. +.SH EXAMPLES +There are two ways to load r3 with _symbol\ =\ 0x1234abcd. +One way is +.PP +.nf + lis r3, hi16[_symbol] + ori r3, r3, lo16[_symbol] ! r3 = 0x12340000 | 0x0000abcd +.fi +.PP +The other way is +.PP +.nf + lis r3, ha16[_symbol] + addi r3, r3, lo16[_symbol] ! r3 = 0x12350000 + 0xffffabcd +.fi +.PP +The next code adds 1 to a global variable. +.PP +.nf + lis r3, ha16[_var] + lwz r4, lo16[_var](r3) + addi r4, r4, 1 + stw r4, lo16[_var](r3) +.fi .SH "SEE ALSO" uni_ass(6), ack(1) +.PP +Freescale Semiconductor, \fIProgramming Environments Manual for 32-Bit +Implementations of the PowerPC Architecture\fP, Rev. 3, September 2005. +.PP +IBM, \fIPowerPC User Instruction Set Architecture, Book I\fP, Version +2.01, September 2003. +.PP +IBM, \fIPowerPC Virtual Environment Architecture, Book II\fP, Version +2.01, December 2003. +.SH CAVEATS +Beware that not every processor can run every instruction. +The 32-bit processors can't run 64-bit instructions like \fIlwa\fP, +\fIstd\fP, and \fIfctid\fP. +The PowerPC 601 can't run \fIstfiwx\fP, nor \fIfres\fP, \fIfrsqrte\fP, +\fIfsel\fP. +Many models, like the PowerPC G4, can't run \fIfsqrt\fP nor +\fIfsqrts\fP. diff --git a/modules/src/em_code/insert.c b/modules/src/em_code/insert.c index 36950c3ea..00c628dcb 100644 --- a/modules/src/em_code/insert.c +++ b/modules/src/em_code/insert.c @@ -99,20 +99,19 @@ C_out_parts(pp) } else { /* copy the chunk to output */ -#ifdef INCORE - register char *s = C_BASE + pp->pp_begin; - char *se = C_BASE + pp->pp_end; - - while (s < se) { - put(*s++); - } -#else register long b = pp->pp_begin; while (b < pp->pp_end) { +#ifdef INCORE + /* C_BASE is not constant, put() may + move C_BASE, so each iteration of + this loop must read C_BASE again. + */ + put(C_BASE[b++]); +#else put(getbyte(b++)); - } #endif + } } prev = pp; pp = pp->pp_next; diff --git a/modules/src/object/wr_ranlib.c b/modules/src/object/wr_ranlib.c index 91274d71c..b515ffb3b 100644 --- a/modules/src/object/wr_ranlib.c +++ b/modules/src/object/wr_ranlib.c @@ -10,16 +10,27 @@ wr_ranlib(fd, ran, cnt1) struct ranlib *ran; long cnt1; { - { - register long cnt = cnt1; - register struct ranlib *r = ran; - register char *c = (char *) r; + struct ranlib *r; + long cnt, val; + char *c; - while (cnt--) { - put4(r->ran_off,c); c += 4; - put4(r->ran_pos,c); c += 4; - r++; - } + /* + * We overwrite the structs in r with the bytes in c, so we + * don't need to allocate another buffer. + * + * put4(r->ran_off, c) can fail if r->ran_off and c overlap in + * memory, if this is a big-endian machine. It tries to swap + * the bytes from big to little endian, but overwrites some + * bytes before reading them. To prevent this, we must copy + * each value before we overwrite it. + */ + r = ran; + c = (char *)r; + cnt = cnt1; + while (cnt--) { + val = r->ran_off; put4(val, c); c += 4; + val = r->ran_pos; put4(val, c); c += 4; + r++; } wr_bytes(fd, (char *) ran, cnt1 * SZ_RAN); } diff --git a/modules/src/print/doprnt.c b/modules/src/print/doprnt.c index a77b7d2c1..1d888e570 100644 --- a/modules/src/print/doprnt.c +++ b/modules/src/print/doprnt.c @@ -16,7 +16,7 @@ %d = int $ */ void -doprnt(File *fp, char *fmt, va_list argp) +doprnt(File *fp, const char *fmt, va_list argp) { char buf[SSIZE]; diff --git a/modules/src/print/format.c b/modules/src/print/format.c index 2ad920bc8..e03717918 100644 --- a/modules/src/print/format.c +++ b/modules/src/print/format.c @@ -35,7 +35,7 @@ integral(int c) %d = int $ */ int -_format(char *buf, char *fmt, va_list argp) +_format(char *buf, const char *fmt, va_list argp) { register char *pf = fmt; register char *pb = buf; diff --git a/modules/src/print/fprint.c b/modules/src/print/fprint.c index c401858a9..6b5b8a389 100644 --- a/modules/src/print/fprint.c +++ b/modules/src/print/fprint.c @@ -17,7 +17,7 @@ $ */ /*VARARGS*/ void -fprint(File *fp, char *fmt, ...) +fprint(File *fp, const char *fmt, ...) { va_list args; char buf[SSIZE]; diff --git a/modules/src/print/print.c b/modules/src/print/print.c index cd9346e98..2e1256a54 100644 --- a/modules/src/print/print.c +++ b/modules/src/print/print.c @@ -17,7 +17,7 @@ $ */ /*VARARGS*/ void -print(char *fmt, ...) +print(const char *fmt, ...) { va_list args; char buf[SSIZE]; diff --git a/modules/src/print/print.h b/modules/src/print/print.h index 56372376a..974e4bf1b 100644 --- a/modules/src/print/print.h +++ b/modules/src/print/print.h @@ -9,10 +9,10 @@ #include -void print(char *fmt, ...); -void fprint(File *f, char *fmt, ...); -void doprnt(File *f, char *fmt, va_list ap); -int _format(char *buf, char *fmt, va_list ap); -char *sprint(char *buf, char *fmt, ...); +void print(const char *fmt, ...); +void fprint(File *f, const char *fmt, ...); +void doprnt(File *f, const char *fmt, va_list ap); +int _format(char *buf, const char *fmt, va_list ap); +char *sprint(char *buf, const char *fmt, ...); #endif /* __PRINT_INCLUDED__ */ diff --git a/modules/src/print/sprint.c b/modules/src/print/sprint.c index d88b47e69..7c9dbf9b0 100644 --- a/modules/src/print/sprint.c +++ b/modules/src/print/sprint.c @@ -17,7 +17,7 @@ $ */ /*VARARGS*/ char * -sprint(char *buf, char *fmt, ...) +sprint(char *buf, const char *fmt, ...) { va_list args; diff --git a/plat/linux/libsys/errno.s b/plat/linux/libsys/errno.s deleted file mode 100644 index 550fd6d7c..000000000 --- a/plat/linux/libsys/errno.s +++ /dev/null @@ -1,28 +0,0 @@ -# -! $Source: /cvsroot/tack/Ack/plat/linux386/libsys/errno.s,v $ -! $State: Exp $ -! $Revision: 1.1 $ - -! Declare segments (the order is important). - -.sect .text -.sect .rom -.sect .data -.sect .bss - -#define D(e) .define e; e - -.sect .data - -! Define various ACK error numbers. Note that these are *not* ANSI C -! errnos, and are used for different purposes. - -D(ERANGE) = 1 -D(ESET) = 2 -D(EIDIVZ) = 6 -D(EHEAP) = 17 -D(EILLINS) = 18 -D(EODDZ) = 19 -D(ECASE) = 20 -D(EBADMON) = 25 - diff --git a/plat/linux/libsys/syscalls.h b/plat/linux/libsys/syscalls.h index 19d5543c6..8bddcc0ee 100644 --- a/plat/linux/libsys/syscalls.h +++ b/plat/linux/libsys/syscalls.h @@ -174,6 +174,12 @@ #define __NR_mremap 163 #define __NR_setresuid 164 #define __NR_getresuid 165 + +/* + * i386, m68020, powerpc use different numbers after 165. + * This file only has the numbers for i386. + */ +#if defined(__i386) #define __NR_vm86 166 #define __NR_query_module 167 #define __NR_poll 168 @@ -324,5 +330,6 @@ #define concat(x, y) x##y #define MAPPED_SYSCALL(p, n) .define concat(p,n); concat(p,n): xor eax, eax; movb al, concat(__NR_,n); jmp __mapped_syscall +#endif /* __i386 */ #endif diff --git a/plat/linux386/libsys/build.lua b/plat/linux386/libsys/build.lua index a4d2d7447..7de7b4061 100644 --- a/plat/linux386/libsys/build.lua +++ b/plat/linux386/libsys/build.lua @@ -6,6 +6,7 @@ acklibrary { "plat/linux/libsys/*.s", }, deps = { + "plat/linux/libsys/*.h", "lang/cem/libcc.ansi/headers+headers", "plat/linux386/include+headers", }, diff --git a/plat/linux386/libsys/trapno.s b/plat/linux386/libsys/trapno.s new file mode 100644 index 000000000..4996de338 --- /dev/null +++ b/plat/linux386/libsys/trapno.s @@ -0,0 +1,13 @@ +#define D(e) .define e; e + +! Define various EM trap numbers needed by mach/i386/libem. +! Note that these are *not* ANSI C errnos. + +D(ERANGE) = 1 +D(ESET) = 2 +D(EIDIVZ) = 6 +D(EHEAP) = 17 +D(EILLINS) = 18 +D(EODDZ) = 19 +D(ECASE) = 20 +D(EBADMON) = 25 diff --git a/plat/linux68k/libsys/build.lua b/plat/linux68k/libsys/build.lua index ded71cdd1..c17436517 100644 --- a/plat/linux68k/libsys/build.lua +++ b/plat/linux68k/libsys/build.lua @@ -6,6 +6,7 @@ acklibrary { "plat/linux/libsys/*.s", }, deps = { + "plat/linux/libsys/*.h", "lang/cem/libcc.ansi/headers+headers", "plat/linux68k/include+headers", }, diff --git a/plat/linuxppc/boot.s b/plat/linuxppc/boot.s index 2da5dd556..33b2abd61 100644 --- a/plat/linuxppc/boot.s +++ b/plat/linuxppc/boot.s @@ -32,7 +32,7 @@ begtext: lwz r3, 0(sp) ! r3 = argc addi r4, sp, 4 ! r4 = argv - rlwinm r5, r3, 32-2, 2, 31 ! shift left 2 bits + srwi r5, r3, 2 add r5, r5, r4 addi r5, r5, 8 ! r5 = env diff --git a/plat/linuxppc/descr b/plat/linuxppc/descr index 1bbb9fbd9..7f6f8fc02 100644 --- a/plat/linuxppc/descr +++ b/plat/linuxppc/descr @@ -19,7 +19,7 @@ var PLATFORM=linuxppc var PLATFORMDIR={EM}/share/ack/{PLATFORM} var CPP_F=-D__unix var ALIGN=-a0:4 -a1:4 -a2:4 -a3:4 -b0:0x10000054 -var MACHOPT_F=-m3 +var MACHOPT_F=-m2 var EGO_PLAT_FLAGS=-M{EM}/share/ack/ego/{ARCH}.descr # Override the setting in fe so that files compiled for linuxppc can see diff --git a/plat/linuxppc/libsys/_syscall.s b/plat/linuxppc/libsys/_syscall.s index c7e818830..f60423bea 100644 --- a/plat/linuxppc/libsys/_syscall.s +++ b/plat/linuxppc/libsys/_syscall.s @@ -12,17 +12,8 @@ .sect .text -EINVAL = 22 +#define EINVAL 22 -#define IFFALSE 4 -#define IFTRUE 12 -#define ALWAYS 20 - -#define LT 0 -#define GT 1 -#define EQ 2 -#define OV 3 - ! Perform a Linux system call. .define __syscall @@ -32,21 +23,21 @@ __syscall: lwz r4, 8(sp) lwz r5, 12(sp) sc 0 - bclr IFFALSE, OV, 0 - + bnslr + ! On error, r3 contains the errno. ! It just so happens that errnos 1-34 are the same in Linux as in ACK. - cmpi cr0, 0, r3, 1 - bc IFTRUE, LT, 2f - cmpi cr0, 0, r3, 34 - bc IFTRUE, GT, 2f - + cmpwi r3, 1 + blt 2f + cmpwi r3, 34 + bgt 2f + 3: - li32 r4, _errno - stw r3, 0(r4) - addi r3, r0, -1 - bclr ALWAYS, 0, 0 - + lis r4, ha16[_errno] + stw r3, lo16[_errno](r4) + li r3, -1 + blr + 2: - addi r3, r0, EINVAL + li r3, EINVAL b 3b diff --git a/plat/linuxppc/libsys/build.lua b/plat/linuxppc/libsys/build.lua index f7b16b378..f58df16ea 100644 --- a/plat/linuxppc/libsys/build.lua +++ b/plat/linuxppc/libsys/build.lua @@ -4,12 +4,10 @@ acklibrary { "./_syscall.s", "./sigaction.s", "./signal.c", - "./trap.s", "plat/linux/libsys/_exit.c", "plat/linux/libsys/_hol0.s", "plat/linux/libsys/close.c", "plat/linux/libsys/creat.c", - "plat/linux/libsys/errno.s", "plat/linux/libsys/execve.c", "plat/linux/libsys/getpid.c", "plat/linux/libsys/gettimeofday.c", @@ -26,6 +24,7 @@ acklibrary { "plat/linux/libsys/write.c", }, deps = { + "plat/linux/libsys/*.h", "lang/cem/libcc.ansi/headers+headers", "plat/linuxppc/include+headers", }, diff --git a/plat/linuxppc/libsys/sigaction.s b/plat/linuxppc/libsys/sigaction.s index 0509c8e72..1b1cea24a 100644 --- a/plat/linuxppc/libsys/sigaction.s +++ b/plat/linuxppc/libsys/sigaction.s @@ -1,156 +1,194 @@ #define __NR_sigaction 67 -#define SIG_BLOCK 0 +#define __NR_sigprocmask 126 #define SIG_SETMASK 2 -#define MAXSIG 32 -/* offsets into our stack frame */ -#define mynew 16 /* new sigaction */ -#define mynset 32 /* new signal set */ -#define myoset 36 /* old signal set */ -#define mysave 40 -#define mysize 56 +/* offsets into struct sigaction */ +#define sa_handler 0 /* in union with sa_sigaction */ +#define sa_mask 4 +#define sa_flags 8 +#define sa_restorer 12 + +/* offsets from stack pointer */ +#define mynewact 16 /* struct sigaction */ +#define myoldact 32 +#define newmask 64 /* signal set */ +#define oldmask 68 +#define oldhandler 72 +#define myret 76 +#define savelr 80 +#define signum 84 /* first argument */ +#define newact 88 +#define oldact 92 .sect .text; .sect .rodata; .sect .data; .sect .bss /* * Linux calls signal handlers with arguments in registers, but the * ACK expects arguments on the stack. This sigaction() uses a - * "bridge" to move the arguments. + * "bridge" to move the arguments, but + * + * - If the caller passes a bad pointer, this sigaction() causes + * SIGBUS or SIGSEGV instead of setting errno = EFAULT. + * + * - This sigaction() only works with signals 1 to 31, not with + * real-time signals 32 to 64. + * + * - This sigaction() is not safe for multiple threads. + * + * int sigaction(int signum, const struct sigaction *newact, + * struct sigaction *oldact); */ .sect .text .define _sigaction _sigaction: mflr r0 - subi r1, r1, mysize - stw r31, mysave+8(r1) - stw r30, mysave+4(r1) - stw r29, mysave(r1) - stw r0, mysave+12(r1) - li r3, 0 - stw r3, mynset(r1) ! mynset = 0 - lwz r29, mysize(r1) ! r29 = signal number - lwz r30, mysize+4(r1) ! r30 = new action - lwz r31, mysize+8(r1) ! r31 = old action + li r3, __NR_sigprocmask + stwu r3, -signum(sp) /* keep 0(sp) = __NR_sigprocmask */ + stw r0, savelr(sp) + + /* Copy newact to stack (before blocking SIGBUS, SIGSEGV). */ + lwz r3, newact(sp) + mr. r3, r3 + beq 1f /* skip if newact == NULL */ + lwz r4, sa_handler(r3) + lwz r5, sa_mask(r3) + lwz r6, sa_flags(r3) + lwz r7, sa_restorer(r3) + stw r4, mynewact+sa_handler(sp) + stw r5, mynewact+sa_mask(sp) + stw r6, mynewact+sa_flags(sp) + stw r7, mynewact+sa_restorer(sp) + /* - * If the new action is non-NULL, the signal number is in - * range 1 to MAXSIG, and the new handler is not SIG_DFL 0 - * or SIG_IGN 1, then we interpose our bridge. + * Block all signals to prevent a race. After we set sharray, + * we must call the kernel's sigaction before the next signal + * handler runs. This prevents two problems: + * + * - The bridge might call the new handler while the kernel + * uses the mask and flags of the old handler. + * + * - The signal handler might call sigaction() and destroy + * sharray. We must block all signals because any signal + * handler might call sigaction() for our signal. */ - cmpwi cr0, r30, 0 - subi r7, r29, 1 ! r7 = index in handlers - cmplwi cr7, r7, MAXSIG ! unsigned comparison - beq cr0, kernel - bge cr7, kernel - lwz r3, 0(r30) ! r3 = new handler - clrrwi. r3, r3, 1 - beq cr0, kernel - /* - * Block the signal while we build the bridge. Prevents a - * race if a signal arrives after we change the bridge but - * before we change the action in the kernel. - */ - li r4, 1 - slw r4, r4, r7 - stw r4, mynset(r1) ! mynmask = 1 << (signal - 1) - li r3, SIG_BLOCK - la r4, mynset(r1) - la r5, myoset(r1) - stw r3, 0(r1) - stw r4, 4(r1) - stw r5, 8(r1) - bl _sigprocmask - /* - * Point our bridge to the new signal handler. Then copy the - * new sigaction but point it to our bridge. - */ - lis r6, hi16[handlers] - ori r6, r6, lo16[handlers] - subi r7, r29, 1 - slwi r7, r7, 2 - lwz r3, 0(r30) ! r3 = new handler - stwx r3, r6, r7 ! put it in array of handlers - lis r3, hi16[bridge] - ori r3, r3, lo16[bridge] - lwz r4, 4(r30) - lwz r5, 8(r30) - lwz r6, 12(r30) - stw r3, mynew(r1) ! sa_handler or sa_sigaction - stw r4, mynew+4(r1) ! sa_mask - stw r5, mynew+8(r1) ! sa_flags - stw r6, mynew+12(r1) ! sa_restorer - la r30, mynew(r1) -kernel: - li r3, __NR_sigaction - stw r3, 0(r1) - stw r29, 4(r1) - stw r30, 8(r1) - stw r31, 12(r1) +1: li r4, SIG_SETMASK + li r5, -1 /* mask signals 1 to 32 */ + stw r5, newmask(sp) + la r5, newmask(sp) + la r6, oldmask(sp) + stw r4, 4(sp) /* kept 0(sp) = __NR_sigprocmask */ + stw r5, 8(sp) + stw r6, 12(sp) bl __syscall + /* - * If we blocked the signal, then restore the old signal mask. + * If the signal number is in range 1 to 31, and the new + * handler is not SIG_DFL 0 or SIG_IGN 1, then we interpose + * our bridge. */ - lwz r3, mynset(r1) - cmpwi cr0, r3, 0 - beq cr0, fixold - li r3, SIG_SETMASK - la r4, myoset(r1) - li r5, 0 - stw r3, 0(r1) - stw r4, 4(r1) - stw r5, 8(r1) - bl _sigprocmask - /* - * If the old sigaction is non-NULL and points to our bridge, - * then point it to the signal handler. - */ -fixold: - cmpwi cr0, r31, 0 - beq cr0, leave - lis r3, hi16[bridge] - ori r3, r3, lo16[bridge] - lwz r4, 0(r31) - cmpw cr0, r3, r4 - bne cr0, leave - lis r6, hi16[handlers] - ori r6, r6, lo16[handlers] - subi r7, r29, 1 - slwi r7, r7, 2 - lwzx r3, r6, r7 ! get it from array of handlers - stw r3, 0(r31) ! put it in old sigaction -leave: - lwz r0, mysave+12(r1) - lwz r29, mysave(r1) - lwz r30, mysave+4(r1) - lwz r31, mysave+8(r1) - addi r1, r1, mysize + lwz r4, signum(sp) /* keep r4 = signum */ + addi r5, r4, -1 + cmplwi r5, 30 + bgt 2f /* skip if out of range */ + + slwi r5, r5, 2 /* r5 = sharray index */ + lis r6, ha16[sharray] + la r6, lo16[sharray](r6) /* r6 = sharray */ + lwzx r0, r6, r5 + stw r0, oldhandler(sp) /* remember old handler */ + lwz r0, newact(sp) + mr. r0, r0 + beq 2f /* skip if newact == NULL */ + + lwz r3, mynewact+sa_handler(sp) + cmplwi r3, 2 /* r3 = new handler */ + blt 2f /* skip if SIG_DFL or SIG_IGN */ + + stwx r3, r6, r5 /* put new handler in sharray */ + lis r3, ha16[sigbridge] + la r3, lo16[sigbridge](r3) + stw r3, mynewact+sa_handler(sp) + + /* Call the kernel's sigaction. */ + /* sigaction(signum, &mynewact or NULL, &myoldact or NULL) */ +2: li r3, __NR_sigaction + lwz r0, newact(sp) + mr. r0, r0 + beq 3f + la r5, mynewact(sp) + b 4f +3: li r5, 0 +4: lwz r0, oldact(sp) + mr. r0, r0 + beq 5f + la r6, myoldact(sp) + b 6f +5: li r6, 0 +6: stw r3, 0(sp) + stw r4, 4(sp) /* kept r4 = signum */ + stw r5, 8(sp) + stw r6, 12(sp) + bl __syscall + stw r3, myret(sp) + + /* Unblock signals by restoring old signal mask. */ + li r3, __NR_sigprocmask + li r4, SIG_SETMASK + la r5, oldmask(sp) + li r6, 0 + stw r3, 0(sp) + stw r4, 4(sp) + stw r5, 8(sp) + stw r6, 12(sp) + bl __syscall + + /* Copy oldact from stack (after unblocking BUS, SEGV). */ + lwz r3, oldact(sp) + mr. r3, r3 + beq 8f /* skip if oldact == NULL */ + lwz r4, myoldact+sa_handler(sp) + lis r5, ha16[sigbridge] + la r5, lo16[sigbridge](r5) + cmpw r4, r5 + bne 7f + lwz r4, oldhandler(sp) +7: lwz r5, myoldact+sa_mask(sp) + lwz r6, myoldact+sa_flags(sp) + lwz r7, myoldact+sa_restorer(sp) + stw r4, sa_handler(r3) + stw r5, sa_mask(r3) + stw r6, sa_flags(r3) + stw r7, sa_restorer(r3) + +8: lwz r0, savelr(sp) + lwz r3, myret(sp) + addi sp, sp, signum mtlr r0 - blr ! return from sigaction + blr /* - * Linux calls bridge(signum) or bridge(signum, info, context) with - * arguments in registers r3, r4, r5. + * Linux calls sigbridge(signum) or sigbridge(signum, info, context) + * with arguments in registers r3, r4, r5. */ -bridge: +sigbridge: mflr r0 - subi r1, r1, 16 + stwu r3, -16(sp) /* signal number */ + stw r4, 4(sp) /* info */ + stw r5, 8(sp) /* context */ stw r0, 12(r1) - stw r3, 0(r1) ! signal number - stw r4, 4(r1) ! info - stw r5, 8(r1) ! context - lis r6, hi16[handlers] - ori r6, r6, lo16[handlers] - subi r7, r3, 1 - slwi r7, r7, 2 + lis r6, hi16[sharray - 4] + la r6, lo16[sharray - 4](r6) + slwi r7, r3, 2 lwzx r6, r6, r7 mtctr r6 - bctrl ! call our signal handler + bctrl /* call our signal handler */ - lwz r0, 12(r1) + lwz r0, 12(sp) addi r1, r1, 16 mtlr r0 - blr ! return from bridge + blr /* sigreturn(2) */ .sect .bss -handlers: - .space 4 * MAXSIG ! array of signal handlers +sharray: + .space 4 * 31 /* handlers for signals 1 to 31 */ diff --git a/plat/linuxppc/libsys/trap.s b/plat/linuxppc/libsys/trap.s deleted file mode 100644 index 93c5189a4..000000000 --- a/plat/linuxppc/libsys/trap.s +++ /dev/null @@ -1,112 +0,0 @@ -# -! $Source: /cvsroot/tack/Ack/plat/linux386/libsys/_syscall.s,v $ -! $State: Exp $ -! $Revision: 1.1 $ - -! Declare segments (the order is important). - -.sect .text -.sect .rom -.sect .data -.sect .bss - -.sect .text - -#define IFFALSE 4 -#define IFTRUE 12 -#define ALWAYS 20 - -#define LT 0 -#define GT 1 -#define EQ 2 -#define OV 3 - -EARRAY = 0 -ERANGE = 1 -ESET = 2 -EIOVFL = 3 -EFOVFL = 4 -EFUNFL = 5 -EIDIVZ = 6 -EFDIVZ = 7 -EIUND = 8 -EFUND = 9 -ECONV = 10 -ESTACK = 16 -EHEAP = 17 -EILLINS = 18 -EODDZ = 19 -ECASE = 20 -EMEMFLT = 21 -EBADPTR = 22 -EBADPC = 23 -EBADLAE = 24 -EBADMON = 25 -EBADLIN = 26 -EBADGTO = 27 -EUNIMPL = 63 ! unimplemented em-instruction called - -! EM trap handling. - -.define .trap_ecase -.trap_ecase: - addi r3, r0, ECASE - b .trap - -.define .trap_earray -.trap_earray: - addi r3, r0, EARRAY - b .trap - -.define .trap_erange -.trap_erange: - addi r3, r0, ERANGE - b .trap - -.define .trp -.define .trap -.trp: -.trap: - cmpi cr0, 0, r3, 15 ! traps >15 can't be ignored - bc IFTRUE, LT, 1f - - addi r4, r0, 1 - rlwnm r4, r4, r3, 0, 31 ! calculate trap bit - li32 r5, .ignmask - lwz r5, 0(r5) ! load ignore mask - and. r4, r4, r5 ! compare - bclr IFFALSE, EQ, 0 ! return if non-zero - -1: - li32 r4, .trppc - lwz r5, 0(r4) ! load user trap routine - or. r5, r5, r5 ! test - bc IFTRUE, EQ, fatal ! if no user trap routine, bail out - - addi r0, r0, 0 - stw r0, 0(r4) ! reset trap routine - - mfspr r0, lr - stwu r0, -4(sp) ! save old lr - - stwu r3, -4(sp) - mtspr ctr, r5 - bcctrl ALWAYS, 0, 0 ! call trap routine - - lwz r0, 4(sp) ! load old lr again - addi sp, sp, 8 ! retract over stack usage - bclr ALWAYS, 0, 0 ! return - -fatal: - addi r3, r0, 1 - li32 r4, message - addi r5, r0, 6 - addi r0, r0, 4 ! write() - sc 0 - - addi r0, r0, 1 ! exit() - sc 0 - -.sect .rom -message: - .ascii "TRAP!\n" diff --git a/plat/osx386/boot.s b/plat/osx386/boot.s index 932a716e9..c10045dd6 100644 --- a/plat/osx386/boot.s +++ b/plat/osx386/boot.s @@ -58,8 +58,6 @@ begdata: .sect .bss begbss: -.define hol0 -.comm hol0, 8 ! line number and filename (for debugging) .define _errno .comm _errno, 4 ! Posix errno storage diff --git a/plat/osx386/libsys/build.lua b/plat/osx386/libsys/build.lua index 23e491f7a..3c2e96c3a 100644 --- a/plat/osx386/libsys/build.lua +++ b/plat/osx386/libsys/build.lua @@ -19,7 +19,8 @@ acklibrary { "./sigaction.s", "./stat.s", "./write.s", - "plat/linux/libsys/errno.s", + "plat/linux/libsys/_hol0.s", + "plat/linux386/libsys/trapno.s", "plat/osx/libsys/brk.c", "plat/osx/libsys/creat.c", "plat/osx/libsys/isatty.c", diff --git a/plat/osxppc/boot.s b/plat/osxppc/boot.s index e96198eb4..8b1b7ab75 100644 --- a/plat/osxppc/boot.s +++ b/plat/osxppc/boot.s @@ -29,7 +29,7 @@ begtext: lwz r3, 0(sp) ! r3 = argc addi r4, sp, 4 ! r4 = argv - rlwinm r5, r3, 32-2, 2, 31 ! shift left 2 bits + srwi r5, r3, 2 add r5, r5, r4 addi r5, r5, 8 ! r5 = env @@ -49,8 +49,6 @@ begdata: .sect .bss begbss: -.define hol0 -.comm hol0, 8 ! line number and filename (for debugging) .define _errno .comm _errno, 4 ! Posix errno storage diff --git a/plat/osxppc/descr b/plat/osxppc/descr index 5f416c44c..77fc45260 100644 --- a/plat/osxppc/descr +++ b/plat/osxppc/descr @@ -10,16 +10,17 @@ var l={w} var la={w} var f={w} var fa={w} +# Size 8 has alignment 4 in Mac OS, 8 in Linux. var d=8 -var da={d} +var da=4 var x=8 -var xa={x} +var xa=4 var ARCH=powerpc var PLATFORM=osxppc var PLATFORMDIR={EM}/share/ack/{PLATFORM} var CPP_F=-D__unix var ALIGN=-a0:4 -a1:4 -a2:4096 -a3:4 -b0:0x129c -var MACHOPT_F=-m3 +var MACHOPT_F=-m2 var EGO_PLAT_FLAGS=-M{EM}/share/ack/ego/{ARCH}.descr # Override the setting in fe so that files compiled for osxppc can see diff --git a/plat/osxppc/libsys/build.lua b/plat/osxppc/libsys/build.lua index 072730b7a..cff10f29b 100644 --- a/plat/osxppc/libsys/build.lua +++ b/plat/osxppc/libsys/build.lua @@ -19,7 +19,7 @@ acklibrary { "./sigaction.s", "./stat.s", "./write.s", - "plat/linuxppc/libsys/trap.s", + "plat/linux/libsys/_hol0.s", "plat/osx/libsys/brk.c", "plat/osx/libsys/creat.c", "plat/osx/libsys/isatty.c", diff --git a/plat/osxppc/libsys/set_errno.s b/plat/osxppc/libsys/set_errno.s index e406865a6..beb124a7c 100644 --- a/plat/osxppc/libsys/set_errno.s +++ b/plat/osxppc/libsys/set_errno.s @@ -1,7 +1,7 @@ .sect .text .define .set_errno .set_errno: - li32 r10, _errno - stw r3, 0(r10) ! set errno - addi r3, r0, -1 ! return -1 - bclr 20, 0, 0 + lis r4, ha16[_errno] + stw r3, lo16[_errno](r4) ! set errno + li r3, -1 ! return -1 + blr diff --git a/plat/qemuppc/descr b/plat/qemuppc/descr index f5191b249..9d1a80427 100644 --- a/plat/qemuppc/descr +++ b/plat/qemuppc/descr @@ -19,11 +19,8 @@ var PLATFORM=qemuppc var PLATFORMDIR={EM}/share/ack/{PLATFORM} var CPP_F=-D__unix var ALIGN=-a0:4 -a1:4 -a2:4 -a3:4 -b0:0x01000000 -var C_LIB={PLATFORMDIR}/libc-ansi.a -# bitfields reversed for compatibility with (g)cc. -var CC_ALIGN=-Vr -var OLD_C_LIB={C_LIB} -var MACHOPT_F= +var MACHOPT_F=-m2 +var EGO_PLAT_FLAGS=-M{EM}/share/ack/ego/{ARCH}.descr # Override the setting in fe so that files compiled for qemuppc can see # the platform-specific headers. diff --git a/plat/qemuppc/libsys/trap.s b/plat/qemuppc/libsys/trap.s deleted file mode 100644 index e00c4d561..000000000 --- a/plat/qemuppc/libsys/trap.s +++ /dev/null @@ -1,65 +0,0 @@ -# -! $Source: /cvsroot/tack/Ack/plat/linux386/libsys/_syscall.s,v $ -! $State: Exp $ -! $Revision: 1.1 $ - -! Declare segments (the order is important). - -.sect .text -.sect .rom -.sect .data -.sect .bss - -.sect .text - -#define IFFALSE 4 -#define IFTRUE 12 -#define ALWAYS 20 - -#define LT 0 -#define GT 1 -#define EQ 2 -#define OV 3 - -EARRAY = 0 -ERANGE = 1 -ESET = 2 -EIOVFL = 3 -EFOVFL = 4 -EFUNFL = 5 -EIDIVZ = 6 -EFDIVZ = 7 -EIUND = 8 -EFUND = 9 -ECONV = 10 -ESTACK = 16 -EHEAP = 17 -EILLINS = 18 -EODDZ = 19 -ECASE = 20 -EMEMFLT = 21 -EBADPTR = 22 -EBADPC = 23 -EBADLAE = 24 -EBADMON = 25 -EBADLIN = 26 -EBADGTO = 27 -EUNIMPL = 63 ! unimplemented em-instruction called - -.define .trap_ecase -.trap_ecase: - b .trp - -.define .trap_earray -.trap_earray: - b .trp - -.define .trap_erange -.trap_erange: - b .trap - -.define .trp -.define .trap -.trp: -.trap: - b .trp ! spin forever diff --git a/tests/plat/_dummy_e.c b/tests/plat/_dummy_e.c index 48104b5aa..39262eaaa 100644 --- a/tests/plat/_dummy_e.c +++ b/tests/plat/_dummy_e.c @@ -1,6 +1,6 @@ #include "test.h" -/* Bypasses the CRT, so there's no stdio or BSS initialisation. */ +/* Bypasses the CRT, so there's no stdio. */ void _m_a_i_n(void) { ASSERT(0 == 0); diff --git a/tests/plat/bss_e.c b/tests/plat/bss_e.c new file mode 100644 index 000000000..547e7f7f1 --- /dev/null +++ b/tests/plat/bss_e.c @@ -0,0 +1,27 @@ +#include "test.h" + +/* + * EM puts these variables in BSS. Their initial values must be zero. + * Some platforms, like Linux, clear the BSS before they run the + * program. For other platforms, like pc86, we clear the BSS in + * boot.s before we call _m_a_i_n. + */ +char c; +int array[9000]; +short s; + +/* Bypasses the CRT, so there's no stdio. */ +void _m_a_i_n(void) +{ + int bad, i; + + ASSERT(c == 0); + bad = 0; + for (i = 0; i < sizeof(array) / sizeof(array[0]); i++) { + if(array[i]) + bad++; + } + ASSERT(bad == 0); + ASSERT(s == 0); + finished(); +} diff --git a/tests/plat/bugs/bug-62-notvar_var_e.c b/tests/plat/bugs/bug-62-notvar_var_e.c index d3813bb91..cde84eed1 100644 --- a/tests/plat/bugs/bug-62-notvar_var_e.c +++ b/tests/plat/bugs/bug-62-notvar_var_e.c @@ -40,7 +40,7 @@ void c(int i, int tru, int fal) { ASSERT((i != i) == fal); } -/* Bypasses the CRT. */ +/* Bypasses the CRT, so there's no stdio. */ void _m_a_i_n(void) { a(); b(); diff --git a/tests/plat/build.lua b/tests/plat/build.lua index 0d3091559..26676b0b1 100644 --- a/tests/plat/build.lua +++ b/tests/plat/build.lua @@ -9,12 +9,14 @@ definerule("plat_testsuite", -- Remember this is executed from the caller's directory; local -- target names will resolve there. local testfiles = filenamesof( + -- added structcopy_e.c "tests/plat/*.c", "tests/plat/*.e", "tests/plat/*.p", "tests/plat/b/*.b", - "tests/plat/bugs/bug-22-inn_mod.mod", - "tests/plat/bugs/bug-62-notvar_var_e.c" + "tests/plat/bugs/*.c", + "tests/plat/bugs/*.mod", + "tests/plat/m2/*.mod" ) acklibrary { diff --git a/tests/plat/csa_e.c b/tests/plat/csa_e.c index 355b75ee7..470fbebc5 100644 --- a/tests/plat/csa_e.c +++ b/tests/plat/csa_e.c @@ -11,7 +11,7 @@ int csa(int i) } } -/* Bypasses the CRT, so there's no stdio or BSS initialisation. */ +/* Bypasses the CRT, so there's no stdio. */ void _m_a_i_n(void) { ASSERT(csa(0) == 0); @@ -23,4 +23,4 @@ void _m_a_i_n(void) ASSERT(csa(6) == 0); finished(); -} \ No newline at end of file +} diff --git a/tests/plat/csb_e.c b/tests/plat/csb_e.c index c86d31fa6..38ce05402 100644 --- a/tests/plat/csb_e.c +++ b/tests/plat/csb_e.c @@ -11,7 +11,7 @@ int csa(int i) } } -/* Bypasses the CRT, so there's no stdio or BSS initialisation. */ +/* Bypasses the CRT, so there's no stdio. */ void _m_a_i_n(void) { ASSERT(csa(0) == 0); @@ -23,4 +23,4 @@ void _m_a_i_n(void) ASSERT(csa(600) == 0); finished(); -} \ No newline at end of file +} diff --git a/tests/plat/doublecmp_e.c b/tests/plat/doublecmp_e.c index f6c1582dc..b6fe5bbad 100644 --- a/tests/plat/doublecmp_e.c +++ b/tests/plat/doublecmp_e.c @@ -4,7 +4,7 @@ double one = 1.0; double zero = 0.0; -/* Bypasses the CRT, so there's no stdio or BSS initialisation. */ +/* Bypasses the CRT, so there's no stdio. */ void _m_a_i_n(void) { ASSERT(zero == zero); @@ -17,4 +17,4 @@ void _m_a_i_n(void) ASSERT(one >= one); finished(); -} \ No newline at end of file +} diff --git a/tests/plat/dup_e.e b/tests/plat/dup_e.e new file mode 100644 index 000000000..649589d84 --- /dev/null +++ b/tests/plat/dup_e.e @@ -0,0 +1,139 @@ +# + mes 2, EM_WSIZE, EM_PSIZE + +/* + * Tests _dup_ and _dus_ by loading 20 bytes from _src_, then making + * and checking some duplicates. The compilers might never _dup_ or + * _dus_ with large sizes, so the compilers might work even if this + * test fails. You can cheat this test if _cms_ always pushes zero. + */ + + exa src + exa size +src + con 3593880729I4, 782166578I4, 4150666996I4, 2453272937I4, 3470523049I4 +size + con 20I2 + + exp $check + exp $_m_a_i_n + pro $_m_a_i_n, 0 + + /* Push 3 copies of src on stack. */ + lae src + loi 20 /* 1st copy */ + dup 20 /* 2nd copy */ + lae size + loi 2 + loc 2 + loc EM_WSIZE + cuu + dus EM_WSIZE /* 3rd copy */ + + cal $check + cal $finished + end /* $_m_a_i_n */ + + pro $check, 4 * EM_PSIZE + EM_WSIZE +#define p1 (-1 * EM_PSIZE) +#define p2 (-2 * EM_PSIZE) +#define p3 (-3 * EM_PSIZE) +#define p4 (-4 * EM_PSIZE) +#define i (p4 - EM_WSIZE) + + /* Set pointers to all 4 copies. */ + lae src + lal p4 + sti EM_PSIZE /* p4 = src */ + lal 0 + lal p3 + sti EM_PSIZE /* p3 = 3rd copy */ + lal 20 + lal p2 + sti EM_PSIZE /* p2 = 2nd copy */ + lal 40 + lal p1 + sti EM_PSIZE /* p1 = 1st copy */ + + /* Loop 20 times to verify each byte. */ + loc 0 + stl i +4 + lal p4 + loi EM_PSIZE + loi 1 /* byte from src */ + lal p3 + loi EM_PSIZE + loi 1 /* byte from 3rd copy */ + cms EM_WSIZE + zeq *3 + loc (3 * 256) + lol i + adi EM_WSIZE /* 0x300 + i */ + loc EM_WSIZE + loc 4 + cuu + cal $fail + asp 4 +3 + lal p4 + loi EM_PSIZE + loi 1 /* byte from src */ + lal p2 + loi EM_PSIZE + loi 1 /* byte from 2nd copy */ + cms EM_WSIZE + zeq *2 + loc (2 * 256) + lol i + adi EM_WSIZE /* 0x200 + i */ + loc EM_WSIZE + loc 4 + cuu + cal $fail + asp 4 +2 + lal p4 + loi EM_PSIZE + loi 1 /* byte from src */ + lal p1 + loi EM_PSIZE + loi 1 /* byte from 1st copy */ + cms EM_WSIZE + zeq *1 + loc (1 * 256) + lol i + adi EM_WSIZE /* 0x100 + i */ + loc EM_WSIZE + loc 4 + cuu + cal $fail + asp 4 +1 + lal p4 + loi EM_PSIZE + adp 1 + lal p4 + sti EM_PSIZE /* increment p4 */ + lal p3 + loi EM_PSIZE + adp 1 + lal p3 + sti EM_PSIZE /* increment p3 */ + lal p2 + loi EM_PSIZE + adp 1 + lal p2 + sti EM_PSIZE /* increment p2 */ + lal p1 + loi EM_PSIZE + adp 1 + lal p1 + sti EM_PSIZE /* increment p1 */ + inl i + lol i + loc 20 + blt *4 /* loop 20 times */ + + ret 0 + end /* $check */ diff --git a/tests/plat/exg_e.e b/tests/plat/exg_e.e new file mode 100644 index 000000000..455256483 --- /dev/null +++ b/tests/plat/exg_e.e @@ -0,0 +1,83 @@ +# + mes 2, EM_WSIZE, EM_PSIZE + +/* + * Tests _exg_ by loading 40 bytes from _src_, then exchanging 20 and + * 20 bytes, and checking the result. The compilers might never _exg_ + * large sizes, so the compilers might work even if this test fails. + * You can cheat this test if _cms_ always pushes zero. + */ + + exa src +src + con 1539465570I4, 1344465418I4, 1317578918I4, 1163467696I4, 2645261331I4 + con 3981585269I4, 1433968975I4, 4256886989I4, 4114909542I4, 1817334375I4 + + exp $check + exp $_m_a_i_n + pro $_m_a_i_n, 0 + + lae src + loi 40 + exg 20 + cal $check + cal $finished + end /* $_m_a_i_n */ + + pro $check, 2 * EM_PSIZE + EM_WSIZE +#define p1 (-1 * EM_PSIZE) +#define p2 (-2 * EM_PSIZE) +#define i (p2 - EM_WSIZE) + + lae src + lal p2 + sti EM_PSIZE /* p2 = src */ + lal 0 + adp 20 + lal p1 + sti EM_PSIZE /* p1 = exchanged copy + 20 */ + + /* Loop 40 times to verify each byte. */ + loc 0 + stl i +1 + lal p2 + loi EM_PSIZE + loi 1 /* byte from src */ + lal p1 + loi EM_PSIZE + loi 1 /* byte from exchanged copy */ + cms EM_WSIZE + zeq *2 + lol i + loc EM_WSIZE + loc 4 + cuu + cal $fail + asp 4 +2 + lal p2 + loi EM_PSIZE + adp 1 + lal p2 + sti EM_PSIZE /* increment p2 */ + lal p1 + loi EM_PSIZE /* p1 */ + inl i + /* When i reaches 20, p1 would reach end of exchanged copy. */ + lol i + loc 20 + beq *3 + adp 1 /* p1 + 1 */ + bra *4 +3 + adp -39 /* p1 - 39, beginning of exchanged copy */ +4 + lal p1 + sti EM_PSIZE /* move p1 */ + lol i + loc 40 + blt *1 + + ret 0 + end /* $check */ \ No newline at end of file diff --git a/tests/plat/from_d_to_si_e.c b/tests/plat/from_d_to_si_e.c index 7f51e6c5b..bc06c755c 100644 --- a/tests/plat/from_d_to_si_e.c +++ b/tests/plat/from_d_to_si_e.c @@ -8,7 +8,7 @@ double minusone = -1.0; double big = (double)INT_MAX; double minusbig = (double)INT_MIN; -/* Bypasses the CRT, so there's no stdio or BSS initialisation. */ +/* Bypasses the CRT, so there's no stdio. */ void _m_a_i_n(void) { ASSERT((int)zero == 0); @@ -18,4 +18,4 @@ void _m_a_i_n(void) ASSERT((int)minusbig == INT_MIN); finished(); -} \ No newline at end of file +} diff --git a/tests/plat/from_d_to_ui_e.c b/tests/plat/from_d_to_ui_e.c index 811780b87..7d18ca9e5 100644 --- a/tests/plat/from_d_to_ui_e.c +++ b/tests/plat/from_d_to_ui_e.c @@ -6,7 +6,7 @@ double one = 1.0; double zero = 0.0; double big = (double)UINT_MAX; -/* Bypasses the CRT, so there's no stdio or BSS initialisation. */ +/* Bypasses the CRT, so there's no stdio. */ void _m_a_i_n(void) { ASSERT((unsigned int)zero == 0); @@ -14,4 +14,4 @@ void _m_a_i_n(void) ASSERT((unsigned int)big == UINT_MAX); finished(); -} \ No newline at end of file +} diff --git a/tests/plat/from_si_to_d_e.c b/tests/plat/from_si_to_d_e.c index b6c7a25ba..172361dfa 100644 --- a/tests/plat/from_si_to_d_e.c +++ b/tests/plat/from_si_to_d_e.c @@ -8,7 +8,7 @@ int minusone = -1; int big = INT_MAX; int minusbig = INT_MIN; -/* Bypasses the CRT, so there's no stdio or BSS initialisation. */ +/* Bypasses the CRT, so there's no stdio. */ void _m_a_i_n(void) { ASSERT((double)zero == 0.0); @@ -18,4 +18,4 @@ void _m_a_i_n(void) /* ASSERT((double)minusbig == (double)INT_MIN); FIXME: fails for now */ finished(); -} \ No newline at end of file +} diff --git a/tests/plat/from_ui_to_d_e.c b/tests/plat/from_ui_to_d_e.c index b8e017c99..383d9afad 100644 --- a/tests/plat/from_ui_to_d_e.c +++ b/tests/plat/from_ui_to_d_e.c @@ -6,7 +6,7 @@ unsigned int one_u = 1; unsigned int zero_u = 0; unsigned int big_u = UINT_MAX; -/* Bypasses the CRT, so there's no stdio or BSS initialisation. */ +/* Bypasses the CRT, so there's no stdio. */ void _m_a_i_n(void) { ASSERT((double)zero_u == 0.0); @@ -14,4 +14,4 @@ void _m_a_i_n(void) ASSERT((double)big_u == (double)UINT_MAX); finished(); -} \ No newline at end of file +} diff --git a/tests/plat/inn_e.e b/tests/plat/inn_e.e index a5aee02f5..543623b3f 100644 --- a/tests/plat/inn_e.e +++ b/tests/plat/inn_e.e @@ -14,6 +14,9 @@ zeq *1 loc __LINE__ + loc EM_WSIZE + loc 4 + cuu cal $fail ass EM_WSIZE 1 @@ -31,6 +34,9 @@ zne *2 loc __LINE__ + loc EM_WSIZE + loc 4 + cuu cal $fail ass EM_WSIZE 2 @@ -49,6 +55,9 @@ zeq *3 loc __LINE__ + loc EM_WSIZE + loc 4 + cuu cal $fail ass EM_WSIZE 3 @@ -67,11 +76,12 @@ zne *4 loc __LINE__ + loc EM_WSIZE + loc 4 + cuu cal $fail ass EM_WSIZE 4 cal $finished - ret 0 - end diff --git a/tests/plat/intadd_e.c b/tests/plat/intadd_e.c index 8e4868a62..94549814c 100644 --- a/tests/plat/intadd_e.c +++ b/tests/plat/intadd_e.c @@ -6,7 +6,7 @@ int one = 1; int zero = 0; int minusone = -1; -/* Bypasses the CRT, so there's no stdio or BSS initialisation. */ +/* Bypasses the CRT, so there's no stdio. */ void _m_a_i_n(void) { ASSERT((two + one) == 3); @@ -28,4 +28,4 @@ void _m_a_i_n(void) ASSERT(((unsigned int)-1 + (unsigned int)two) == 1); finished(); -} \ No newline at end of file +} diff --git a/tests/plat/intcmp_e.c b/tests/plat/intcmp_e.c index dd7f1da75..72cfc06b1 100644 --- a/tests/plat/intcmp_e.c +++ b/tests/plat/intcmp_e.c @@ -4,7 +4,7 @@ int one = 1; int zero = 0; -/* Bypasses the CRT, so there's no stdio or BSS initialisation. */ +/* Bypasses the CRT, so there's no stdio. */ void _m_a_i_n(void) { ASSERT(zero == zero); @@ -62,4 +62,4 @@ void _m_a_i_n(void) ASSERT((unsigned int)1 >= (unsigned int)one); finished(); -} \ No newline at end of file +} diff --git a/tests/plat/intdiv_e.c b/tests/plat/intdiv_e.c index c90964ced..cab76cdad 100644 --- a/tests/plat/intdiv_e.c +++ b/tests/plat/intdiv_e.c @@ -6,7 +6,7 @@ int two = 2; int one = 1; int zero = 0; -/* Bypasses the CRT, so there's no stdio or BSS initialisation. */ +/* Bypasses the CRT, so there's no stdio. */ void _m_a_i_n(void) { ASSERT((three / two) == 1); @@ -25,4 +25,4 @@ void _m_a_i_n(void) ASSERT((3 / -two) == -1); finished(); -} \ No newline at end of file +} diff --git a/tests/plat/intrem_e.c b/tests/plat/intrem_e.c index 40f68d654..424152106 100644 --- a/tests/plat/intrem_e.c +++ b/tests/plat/intrem_e.c @@ -6,7 +6,7 @@ int two = 2; int one = 1; int zero = 0; -/* Bypasses the CRT, so there's no stdio or BSS initialisation. */ +/* Bypasses the CRT, so there's no stdio. */ void _m_a_i_n(void) { ASSERT((three % two) == 1); @@ -25,4 +25,4 @@ void _m_a_i_n(void) ASSERT((3 % -two) == 1); finished(); -} \ No newline at end of file +} diff --git a/tests/plat/intshift_e.c b/tests/plat/intshift_e.c index 3cc6d52f9..08ef05ca1 100644 --- a/tests/plat/intshift_e.c +++ b/tests/plat/intshift_e.c @@ -6,7 +6,7 @@ int one = 1; int zero = 0; int minusone = -1; -/* Bypasses the CRT, so there's no stdio or BSS initialisation. */ +/* Bypasses the CRT, so there's no stdio. */ void _m_a_i_n(void) { ASSERT((one <>(unsigned int)1) == (UINT_MAX>>1)); finished(); -} \ No newline at end of file +} diff --git a/tests/plat/intsub_e.c b/tests/plat/intsub_e.c index d8f67d3a3..b0cf08ae6 100644 --- a/tests/plat/intsub_e.c +++ b/tests/plat/intsub_e.c @@ -7,7 +7,7 @@ int one = 1; int zero = 0; int minusone = -1; -/* Bypasses the CRT, so there's no stdio or BSS initialisation. */ +/* Bypasses the CRT, so there's no stdio. */ void _m_a_i_n(void) { ASSERT((two - one) == 1); @@ -29,4 +29,4 @@ void _m_a_i_n(void) ASSERT(((unsigned int)1 - (unsigned int)two) == UINT_MAX); finished(); -} \ No newline at end of file +} diff --git a/tests/plat/lib/test.c b/tests/plat/lib/test.c index 426f9944a..6df3ee7d5 100644 --- a/tests/plat/lib/test.c +++ b/tests/plat/lib/test.c @@ -5,7 +5,7 @@ void finished(void) { static const char s[] = "@@FINISHED\n"; - write(1, s, sizeof(s)); + write(1, s, sizeof(s)-1); _exit(0); } @@ -16,7 +16,7 @@ void writehex(uint32_t code) do { - *--p = "0123456789abcdef"[code & 0xf]; + *--p = "0123456789abcdef"[(unsigned int)code & 0xf]; code >>= 4; } while (code > 0); diff --git a/tests/plat/m2/ConvTest_mod.mod b/tests/plat/m2/ConvTest_mod.mod new file mode 100644 index 000000000..9fa828af0 --- /dev/null +++ b/tests/plat/m2/ConvTest_mod.mod @@ -0,0 +1,36 @@ +MODULE ConvTest; +FROM Conversions IMPORT + ConvertOctal, ConvertHex, ConvertCardinal, ConvertInteger; +FROM Strings IMPORT CompareStr; +FROM Test IMPORT fail, finished; + +(* Asserts a = b, or fails with code. *) +PROCEDURE A(a, b: ARRAY OF CHAR; code: INTEGER); +BEGIN + IF (CompareStr(a, b) # 0) OR (CompareStr(a, "wrong string") = 0) THEN + fail(code) + END +END A; + +VAR + str: ARRAY [0..15] OF CHAR; +BEGIN + ConvertOctal( 9, 6, str); A(" 11", str, 1); + ConvertOctal( 59, 6, str); A(" 73", str, 2); + ConvertOctal(278, 6, str); A(" 426", str, 3); + + ConvertHex( 9, 6, str); A(" 9", str, 11H); + ConvertHex( 59, 6, str); A(" 3B", str, 12H); + ConvertHex(278, 6, str); A(" 116", str, 13H); + + ConvertCardinal( 9, 6, str); A(" 9", str, 21H); + ConvertCardinal( 59, 6, str); A(" 59", str, 22H); + ConvertCardinal(278, 6, str); A(" 278", str, 23H); + + ConvertInteger( 9, 6, str); A(" 9", str, 31H); + ConvertInteger( 59, 6, str); A(" 59", str, 32H); + ConvertInteger( 278, 6, str); A(" 278", str, 33H); + ConvertInteger(-424, 6, str); A(" -424", str, 34H); + + finished; +END ConvTest. diff --git a/tests/plat/m2/NestProc_mod.mod b/tests/plat/m2/NestProc_mod.mod new file mode 100644 index 000000000..d46731f55 --- /dev/null +++ b/tests/plat/m2/NestProc_mod.mod @@ -0,0 +1,132 @@ +(* + * Calls nested procedures. The compiler emits the EM instructions + * _lxl_ and _lxa_ to access the variables in the statically enclosing + * procedures. + * + * You can cheat this test if a = b is TRUE for any a, b. + *) +MODULE NestProc; +FROM Test IMPORT fail, finished; + +(* Asserts cond, or fails with code. *) +PROCEDURE A(cond: BOOLEAN; code: INTEGER); +BEGIN + IF NOT cond THEN fail(code) END +END A; + +TYPE + Set8 = SET OF [0..63]; + (* Box has fields of size 8, 4, and 1. *) + Box = RECORD + huge: Set8; + big: LONGINT; + small: CHAR; + tiny: CHAR; + END; + +PROCEDURE First(a, b: INTEGER; in: Box): Box; + VAR c, d: INTEGER; + out: Box; + + PROCEDURE Second(e: INTEGER); + VAR f: INTEGER; + + PROCEDURE Third(g: INTEGER); + VAR h: INTEGER; + + PROCEDURE CheckThird; + BEGIN + A(a = 1354, 31H); (* lxa 3 *) + A(b = 3385, 32H); + A(c = 14349, 33H); (* lxl 3 *) + A(d = 30989, 34H); + A(e = 28935, 35H); (* lxa 2 *) + A(f = 13366, 36H); (* lxl 2 *) + A(g = 7988, 37H); (* lxa 1 *) + A(h = 11711, 38H); (* lxl 1 *) + END CheckThird; + + PROCEDURE Fourth(i: INTEGER); + VAR j: INTEGER; + + PROCEDURE Fifth(k: INTEGER); + VAR l: INTEGER; + + PROCEDURE Sixth(): INTEGER; + BEGIN + A(e = 2, 61H); (* lxa 4 *) + A(f = 11703, 62H); (* lxl 4 *) + + b := 3385; (* lxa 5 *) + d := 30989; (* lxl 5 *) + e := 28935; (* lxl 4 *) + f := 13366; (* lxa 4 *) + CheckThird; + + (* lxa 5 *) + A(in.huge = Set8{11, 12, 40, 40, 43, 56}, 63H); + A(in.big = 2130020019D, 64H); + A(in.small = 300C, 65H); + A(in.tiny = 175C, 66H); + + (* lxl 5 *) + out.huge := Set8{8, 19, 36, 41, 47, 62}; + out.big := 385360915D; + out.small := 366C; + out.tiny := 131C; + + j := k; (* lxl 2, lxa 1 *) + l := i; (* lxl 1, lxa 2 *) + RETURN 5217; + END Sixth; + + PROCEDURE TwiceSixth(): INTEGER; + BEGIN + (* lxa and lxl must follow the static chain from Sixth to + * Fifth, not dynamic chain from Sixth to TwiceSixth. *) + RETURN 2 * Sixth(); + END TwiceSixth; + + BEGIN (* Fifth *) + A(TwiceSixth() = 10434, 51H); + A(k = 11567, 51H); + A(l = 32557, 52H); + END Fifth; + + BEGIN (* Fourth *) + Fifth(11567); (* k *) + A(i = 32557, 41H); + A(j = 11567, 42H); + END Fourth; + + BEGIN (* Third *) + h := 11711; + Fourth(32557); (* i *) + END Third; + + BEGIN (* Second *) + f := 11703; + Third(7988); (* g *) + END Second; + +BEGIN (* First *) + c := 14349; + d := 17850; + Second(2); (* e *) + RETURN out +END First; + +VAR + x: Box; +BEGIN + x.huge := Set8{11, 12, 40, 40, 43, 56}; + x.big := 2130020019D; + x.small := 300C; + x.tiny := 175C; + x := First(1354, 19516, x); (* a, b, in *) + A(x.huge = Set8{8, 19, 36, 41, 47, 62}, 71H); + A(x.big = 385360915D, 72H); + A(x.small = 366C, 73H); + A(x.tiny = 131C, 74H); + finished; +END NestProc. diff --git a/tests/plat/m2/OpenArray_mod.mod b/tests/plat/m2/OpenArray_mod.mod new file mode 100644 index 000000000..1aa219a55 --- /dev/null +++ b/tests/plat/m2/OpenArray_mod.mod @@ -0,0 +1,59 @@ +(* + * Passes an open array to a procedure. The back end must implement + * some EM instructions for accessing arrays. + *) +MODULE OpenArray; +FROM Test IMPORT fail, finished; + +(* Asserts condition or fails with code. *) +PROCEDURE A(cond: BOOLEAN; code: INTEGER); +BEGIN + IF NOT cond THEN fail(code) END +END A; + +(* Called as Modify(ary1, 1) or Modify(ary2, 2). *) +PROCEDURE Modify(VAR ary: ARRAY OF INTEGER; what: INTEGER); + VAR hi: INTEGER; +BEGIN + hi := what * 100H; + + (* Indices must be from 0 to HIGH(ary). *) + A((what = 1) = (HIGH(ary) = 3), hi + 1); + A((what = 2) = (HIGH(ary) = 9), hi + 2); + + (* ary[2] must equal ary1[3] or ary2[3]. *) + A((what = 1) = (ary[2] = 13), hi + 3); + A((what = 2) = (ary[2] = 37), hi + 4); + + (* Modify some values. *) + IF HIGH(ary) >= 3 THEN ary[3] := 20 END; + IF HIGH(ary) >= 6 THEN ary[6] := 40 END; + IF HIGH(ary) >= 9 THEN ary[9] := 12 END; +END Modify; + +VAR + ary1: ARRAY [1..4] OF INTEGER; + ary2: ARRAY [1..10] OF INTEGER; +BEGIN + (* Initialize the arrays. *) + ary1[1] := 6; ary1[2] := 9; ary1[3] := 13; ary1[4] := 49; + + ary2[1] := 56; ary2[2] := 79; ary2[3] := 37; ary2[4] := 0; + ary2[5] := 70; ary2[6] := 62; ary2[7] := 64; ary2[8] := 92; + ary2[9] := 29; ary2[10] := 90; + + (* Pass them as open arrays. *) + Modify(ary1, 1); + Modify(ary2, 2); + + (* Check that ary1[4], ary2[4, 7, 10] have been modified. *) + A(ary1[1] = 6, 301H); A(ary1[2] = 9, 301H); A(ary1[3] = 13, 303H); + A(ary1[4] = 20, 304H); + + A(ary2[1] = 56, 401H); A(ary2[2] = 79, 402H); A(ary2[3] = 37, 403H); + A(ary2[4] = 20, 404H); A(ary2[5] = 70, 406H); A(ary2[6] = 62, 406H); + A(ary2[7] = 40, 407H); A(ary2[8] = 92, 408H); A(ary2[9] = 29, 409H); + A(ary2[10] = 12, 40AH); + + finished; +END OpenArray. diff --git a/tests/plat/m2/SemaTest_mod.mod b/tests/plat/m2/SemaTest_mod.mod new file mode 100644 index 000000000..9ae395662 --- /dev/null +++ b/tests/plat/m2/SemaTest_mod.mod @@ -0,0 +1,157 @@ +(* + * Generates some integer sequences. Each generator is a process that + * yields integers to the main process. ACK switches processes by + * saving and restoring the stack. It uses _lor_ and _str_ to save + * and restore the local base and frame pointer. + *) +MODULE SemaTest; +FROM Semaphores IMPORT Sema, NewSema, Down, Up, StartProcess; +FROM Storage IMPORT ALLOCATE; +FROM Test IMPORT fail, finished; + +TYPE + Generator = POINTER TO GeneratorRecord; + GeneratorRecord = RECORD + resume: Sema; (* up when resuming generator *) + yield: Sema; (* up when yielding value *) + value: INTEGER; + END; +VAR + curgen: Generator; (* current generator *) + startLock: Sema; (* down when booting generator *) + startProc: PROC; + startSelf: Generator; + +PROCEDURE BootGenerator; + VAR pr: PROC; self: Generator; +BEGIN + pr := startProc; + self := startSelf; + Up(startLock); + Down(self^.resume); (* wait for first Resume *) + pr(); +END BootGenerator; + +PROCEDURE StartGenerator(gen: Generator; pr: PROC); +BEGIN + gen^.resume := NewSema(0); + gen^.yield := NewSema(0); + Down(startLock); + startProc := pr; + startSelf := gen; + StartProcess(BootGenerator, 8192); +END StartGenerator; + +PROCEDURE Resume(gen: Generator): INTEGER; + VAR self: Generator; +BEGIN + self := curgen; + curgen := gen; + Up(gen^.resume); + Down(gen^.yield); (* wait for Yield *) + curgen := self; + RETURN gen^.value +END Resume; + +PROCEDURE Yield(i: INTEGER); + VAR self: Generator; +BEGIN + self := curgen; + self^.value := i; + Up(self^.yield); (* curgen becomes invalid *) + Down(self^.resume); (* wait for Resume *) +END Yield; + +PROCEDURE YieldHalfOf(i: INTEGER); +BEGIN + Yield(i DIV 2); +END YieldHalfOf; + +PROCEDURE Triangular; + (* Yields the triangular numbers, http://oeis.org/A000217 *) + VAR n: INTEGER; +BEGIN + n := 0; + LOOP + YieldHalfOf(n * (n + 1)); + INC(n); + END; +END Triangular; + +PROCEDURE Pentagonal; + (* Yields the pentagonal numbers, http://oeis.org/A000326 *) + VAR n: INTEGER; +BEGIN + n := 0; + LOOP + YieldHalfOf(n * (3 * n - 1)); + INC(n); + END; +END Pentagonal; + +PROCEDURE Odious; + (* Yields the odius numbers, http://oeis.org/A000069 *) + VAR b, i, n: INTEGER; +BEGIN + n := 1; + LOOP + (* b := count bits in n *) + b := 0; + i := n; + WHILE i # 0 DO + INC(b, i MOD 2); + i := i DIV 2; + END; + + IF (b MOD 2) = 1 THEN + Yield(n); + END; + INC(n); + END; +END Odious; + +TYPE + Triple = ARRAY[1..3] OF INTEGER; +PROCEDURE T(i1, i2, i3: INTEGER): Triple; + VAR t: Triple; +BEGIN + t[1] := i1; t[2] := i2; t[3] := i3; RETURN t +END T; + +CONST + two28 = 268435456D; (* 0x1000_0000 *) +VAR + a: ARRAY [0..9] OF Triple; + tri, pen, odi: Generator; + i, g1, g2, g3: INTEGER; +BEGIN + startLock := NewSema(1); + + ALLOCATE(tri, SIZE(GeneratorRecord)); + ALLOCATE(pen, SIZE(GeneratorRecord)); + ALLOCATE(odi, SIZE(GeneratorRecord)); + StartGenerator(tri, Triangular); + StartGenerator(pen, Pentagonal); + StartGenerator(odi, Odious); + + a[0] := T( 0, 0, 1); + a[1] := T( 1, 1, 2); + a[2] := T( 3, 5, 4); + a[3] := T( 6, 12, 7); + a[4] := T(10, 22, 8); + a[5] := T(15, 35, 11); + a[6] := T(21, 51, 13); + a[7] := T(28, 70, 14); + a[8] := T(36, 92, 16); + a[9] := T(45, 117, 19); + + FOR i := 0 TO INTEGER(9) DO + g1 := Resume(tri); + g2 := Resume(pen); + g3 := Resume(odi); + IF g1 # a[i][1] THEN fail(1D * two28 + LONG(a[i][1])) END; + IF g2 # a[i][2] THEN fail(2D * two28 + LONG(a[i][2])) END; + IF g3 # a[i][3] THEN fail(3D * two28 + LONG(a[i][3])) END; + END; + finished; +END SemaTest. diff --git a/tests/plat/m2/Set100_mod.mod b/tests/plat/m2/Set100_mod.mod new file mode 100644 index 000000000..3b200d318 --- /dev/null +++ b/tests/plat/m2/Set100_mod.mod @@ -0,0 +1,61 @@ +(* + * Operates on sets of 100 integers. The compiler emits, and the back + * end must implement, the EM instructions for large sets. + *) +MODULE Set100; +FROM Test IMPORT fail, finished; + +(* Asserts condition or fails with code. *) +PROCEDURE A(cond: BOOLEAN; code: INTEGER); +BEGIN + IF NOT cond THEN fail(code) END +END A; + +TYPE + Num = [1..100]; + NumSet = SET OF Num; +VAR + (* VAR, not CONST, so compiler can't do constant operations. *) + primes, teens, lowevens, eighties, nineties: NumSet; +CONST + (* These are the expected results of some set operations. *) + primeteen = NumSet{13, 17, 19}; + compeighties = NumSet{80..82, 84..88}; + teenxoreven = NumSet{2, 4, 6, 8, 10, 12, 13, 15, 17, 19, 20}; + eightiesnineties = NumSet{80..99}; + +(* Checks that some set is equal to the expected result. Also checks + * that the set is not equal to the other sets. *) +PROCEDURE Check(set: NumSet; what: INTEGER); + VAR hi: INTEGER; +BEGIN + hi := what * 100H; + + (* The compiler uses cms in EM to check set equality. *) + A((what = 1) = (set = primeteen), hi + 1); + A((what = 2) = (set = compeighties), hi + 2); + A((what = 3) = (set = teenxoreven), hi + 3); + A((what = 4) = (set = eightiesnineties), hi + 4); +END Check; + +PROCEDURE Range(min: Num; max: Num): NumSet; +BEGIN + (* The compiler calls LtoUset in lang/m2/libm2/LtoUset.e *) + RETURN NumSet{min..max} +END Range; + +BEGIN + primes := NumSet{2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, + 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97}; + teens := NumSet{13, 14, 15, 16, 17, 18, 19}; + lowevens := NumSet{2, 4, 6, 8, 10, 12, 14, 16, 18, 20}; + eighties := Range(80, 89); + nineties := Range(90, 99); + + Check(primes * teens, 1); + Check(eighties - primes, 2); + Check(teens / lowevens, 3); + Check(eighties + nineties, 4); + + finished; +END Set100. diff --git a/tests/plat/m2/StringTest_mod.mod b/tests/plat/m2/StringTest_mod.mod new file mode 100644 index 000000000..41552aa7a --- /dev/null +++ b/tests/plat/m2/StringTest_mod.mod @@ -0,0 +1,55 @@ +MODULE StringTest; +FROM Strings IMPORT + Assign, Insert, Delete, Pos, Copy, Concat, Length, CompareStr; +FROM Test IMPORT fail, finished; + +(* Asserts condition or fails with code. *) +PROCEDURE A(cond: BOOLEAN; code: INTEGER); +BEGIN + IF NOT cond THEN fail(code) END +END A; + +VAR + small: ARRAY [0..3] OF CHAR; + big: ARRAY [0..99] OF CHAR; +BEGIN + (* CompareStr *) + A(CompareStr("ablaze", "ablaze") = 0, 1); + A(CompareStr("ablaze", "abloom") < 0, 2); + A(CompareStr("abloom", "ablaze") > 0, 3); + A(CompareStr("abloom", "abloom") = 0, 4); + + (* Assign, Insert, Delete *) + Assign("obsequiosity", small); + A(CompareStr("obsequiosity", small) > 0, 11H); + Assign("obsequiosity", big); + A(CompareStr("obsequiosity", big) = 0, 12H); + A(big[11] = 'y', 13H); + A(big[11] # 0C, 14H); + A(big[12] # 'y', 15H); + A(big[12] = 0C, 16H); + Insert(" omnihuman", big, 9); + A(CompareStr("obsequios omnihumanity", big) = 0, 17H); + Delete(big, 6, 15); + A(CompareStr("obsequy", big) = 0, 18H); + + (* Pos, Concat *) + Assign("Now is the time for all good men to come...", big); + A(Pos("w", big) = 2, 21H); + A(Pos("t", big) = 7, 22H); + A(Pos("ti", big) = 11, 23H); + A(Pos("men", big) = 29, 24H); + A(Pos("women", big) > 42, 25H); + Copy(big, 29, 2, small); + A(CompareStr("me", small) = 0, 26H); + + (* Concat, Length *) + Concat("pictorial", "ist", big); + A(CompareStr("pictorialist", big) = 0, 31H); + A(Length(big) = 12, 32H); + Concat("zit", "her", small); + A(CompareStr("zither", small) > 0, 33H); + A(Length(small) < 5, 34H); + + finished; +END StringTest. diff --git a/tests/plat/rck_e.e b/tests/plat/rck_e.e new file mode 100644 index 000000000..cd5c581df --- /dev/null +++ b/tests/plat/rck_e.e @@ -0,0 +1,186 @@ +# + mes 2, EM_WSIZE, EM_PSIZE + +/* + * Uses _rck_ for range checks. Catches the EM trap if a value is out + * of range, and continues with the next instruction after _rck_. + * + * Some back ends, like i80, ignore _rck_, so this test fails. + */ + +testnr + con 1 ; test number +caught + con 0 ; number of caught traps + + inp $next + inp $catch + inp $never + exp $_m_a_i_n + pro $_m_a_i_n,0 + + lim ; load ignore mask + loc 2 + and EM_WSIZE ; check bit 1 << ERANGE + zeq *1 ; fail if ignoring ERANGE +.1 + rom 1I4 + lae .1 + loi 4 + cal $fail + asp 4 +1 + + cal $next ; increment testnr, catch next trap + loc 10125 +.2 + rom 4283, 13644 + lae .2 + rck EM_WSIZE ; testnr 2 in range + asp EM_WSIZE + + cal $next + loc 4282 + lae .2 + rck EM_WSIZE ; testnr 3 out of range + asp EM_WSIZE + + cal $next + loc 4283 + lae .2 + rck EM_WSIZE ; testnr 4 in range + asp EM_WSIZE + + cal $next + loc 13644 + lae .2 + rck EM_WSIZE ; testnr 5 in range + asp EM_WSIZE + + cal $next + loc 13655 + lae .2 + rck EM_WSIZE ; testnr 6 out of range + asp EM_WSIZE + + cal $next + loc -13015 +.7 + rom -31344, -1898 + lae .7 + rck EM_WSIZE ; testnr 7 in range + asp EM_WSIZE + + cal $next + loc 8580 +.8 + rom -26315, 4588 + lae .8 + rck EM_WSIZE ; testnr 8 out of range + asp EM_WSIZE + + ; The last test raised a trap, so now there is no trap handler. + lpi $never + sig ; push old trap handler + loc 0 + loc EM_WSIZE + loc EM_PSIZE + cuu ; push NULL pointer + cmp + zeq *17 ; fail unless old handler is NULL +.17 + rom 17I4 + lae .17 + loi 4 + cal $fail + asp 4 +17 + ; Change the trap handler from $never to $catch. + lpi $catch + sig + lpi $never + cmp + zeq *18 +.18 + rom 18I4 + lae .18 + loi 4 + cal $fail + asp 4 +18 + ; Begin ignoring range traps. + loc 2 ; 1 << ERANGE + sim + loc 18 + ste testnr + loc 8580 + lae .8 + rck EM_WSIZE ; testnr 18 out of range but ignored + + ; Fail if we caught the wrong number of traps. + loe caught + loc 3 + beq *20 +.20 + rom 20I4 + lae .20 + loi 4 + cal $fail + asp 4 +20 + cal $finished + end + + pro $next,0 + ine testnr ; next test + lpi $catch + sig ; catch next EM trap (only one trap) + asp EM_PSIZE + ret 0 + end + + pro $catch,0 + ine caught ; count this trap + + lol 0 ; load trap number + loc 1 + beq *1 ; fail if trap != ERANGE +.101 + rom 257I4 + lae .101 + loi 4 + cal $fail + ; Wrong type of trap. _rtt_ might not work, so exit now. + cal $finished +1 + ; Fail if the wrong test raised this trap. + loe testnr + loc 3 + beq *2 + loe testnr + loc 6 + beq *2 + loe testnr + loc 8 + beq *2 + loc 256 + loe testnr + adi EM_WSIZE ; 0x100 + testnr + loc EM_WSIZE + loc 4 + cuu + cal $fail + asp 4 +2 + rtt ; return from trap handler + end + + pro $never,0 +.200 + rom 200I4 + lae .200 + loi 4 + cal $fail + asp 4 + rtt + end diff --git a/tests/plat/rotate_e.e b/tests/plat/rotate_e.e new file mode 100644 index 000000000..0698c58a0 --- /dev/null +++ b/tests/plat/rotate_e.e @@ -0,0 +1,223 @@ +# + mes 2, EM_WSIZE, EM_PSIZE + +/* + * Tests _rol_ (rotate left) and _ror_ (rotate right). Several back + * ends provide _rol_ and _ror_, but as of year 2017, the compilers + * and optimizers had never emit _rol_ nor _ror_. + * + * By tradition, _rol_ and _ror_ can't rotate values shorter than the + * word size, or longer than 4 bytes. + * - If word size is 2, then try rotating 2-byte and 4-byte values. + * - If word size is 4, then try rotating 4-byte values. + * + * You can cheat this test if _cmu_ always pushes zero. + */ + +#if EM_WSIZE == 2 +#define LEN2 4 + exa table2 + exa left2 + exa right2 +table2 /* left, right */ + con 12715U2 /* 0, 0 */ + con 25430U2 /* 1, 15 */ + con 43825U2 /* 8, 8 */ + con 39125U2 /* 15, 1 */ +left2 + con 0I2, 1I2, 8I2, 15I2 +right2 + con 0I2, 15I2, 8I2, 1I2 +#endif + +#define LEN4 4 + exa table4 + exa left4 + exa right4 +table4 /* left, right */ + con 437223536U4 /* 0, 0 */ + con 874447072U4 /* 1, 31 */ + con 2154830351U4 /* 16, 16 */ + con 218611768U4 /* 31, 1 */ +left4 + con 0I2, 1I2, 16I2, 31I2 +right4 + con 0I2, 31I2, 16I2, 1I2 + + exa val4 + exa val4left7 + exa val4right11 +val4 + con 4283808839U4 +val4left7 + con 2866684927U4 +val4right11 + con 2298473143U4 + + exp $_m_a_i_n + pro $_m_a_i_n, EM_WSIZE +#define i -EM_WSIZE + +#if EM_WSIZE == 2 + /* + * Loop for LEN2 items in table2. + */ + loc 0 + stl i +1 + lae table2 + loi 2 /* value to rotate */ + lae left2 + lol i + loc 1 + sli EM_WSIZE + ads EM_WSIZE + loi 2 /* left distance */ + rol 2 /* rotate left */ + lae table2 + lol i + loc 1 + sli EM_WSIZE + ads EM_WSIZE + loi 2 /* expected result */ + cmu 2 + zeq *2 + loc __LINE__ + loc EM_WSIZE + loc 4 + cuu + cal $fail + asp 4 +2 + lae table2 + loi 2 /* value to rotate */ + lae right2 + lol i + loc 1 + sli EM_WSIZE + ads EM_WSIZE + loi 2 /* right distance */ + ror 2 /* rotate right */ + lae table2 + lol i + loc 1 + sli EM_WSIZE + ads EM_WSIZE + loi 2 /* expected result */ + cmu 2 + zeq *3 + loc __LINE__ + loc EM_WSIZE + loc 4 + cuu + cal $fail + asp 4 +3 + inl i /* loop LEN2 times */ + lol i + loc LEN2 + blt *1 +#endif /* EM_WSIZE == 2 */ + + /* + * Loop for LEN4 items in table4. + */ + loc 0 + stl i +4 + lae table4 + loi 4 /* value to rotate */ + lae left4 + lol i + loc 1 + sli EM_WSIZE + ads EM_WSIZE + loi 2 /* left distance */ + loc 2 + loc EM_WSIZE + cii + rol 4 /* rotate left */ + lae table4 + lol i + loc 2 + sli EM_WSIZE + ads EM_WSIZE + loi 4 /* expected result */ + cmu 4 + zeq *5 + loc __LINE__ + loc EM_WSIZE + loc 4 + cuu + cal $fail + asp 4 +5 + lae table4 + loi 4 /* value to rotate */ + lae right4 + lol i + loc 1 + sli EM_WSIZE + ads EM_WSIZE + loi 2 /* right distance */ + loc 2 + loc EM_WSIZE + cii + ror 4 /* rotate right */ + lae table4 + lol i + loc 2 + sli EM_WSIZE + ads EM_WSIZE + loi 4 /* expected result */ + cmu 4 + zeq *6 + loc __LINE__ + loc EM_WSIZE + loc 4 + cuu + cal $fail + asp 4 +6 + inl i /* loop LEN4 times */ + lol i + loc LEN4 + blt *4 + + /* + * Rotate 4-byte values by a constant distance, because this uses + * different rules in PowerPC ncg. + */ + lae val4 + loi 4 + loc 7 + rol 4 /* rotate left by 7 bits */ + lae val4left7 + loi 4 + cmu 4 + zeq *7 + loc __LINE__ + loc EM_WSIZE + loc 4 + cuu + cal $fail + asp 4 +7 + lae val4 + loi 4 + loc 11 + ror 4 /* rotate right by 11 bits */ + lae val4right11 + loi 4 + cmu 4 + zeq *8 + loc __LINE__ + loc EM_WSIZE + loc 4 + cuu + cal $fail + asp 4 +8 + + cal $finished + end diff --git a/tests/plat/setjmp_c.c b/tests/plat/setjmp_c.c new file mode 100644 index 000000000..2a514a03f --- /dev/null +++ b/tests/plat/setjmp_c.c @@ -0,0 +1,58 @@ +#include +#include "test.h" + +/* + * Sets i = 2 * i for each i in nums, until i == 0, but stops if + * 2 * i >= 1000. + * + * Uses setjmp() and longjmp() in libc. For ACK's libc, the back end + * must provides EM's _gto_, and _gto_ must preserve the function + * return area. + */ +int nums1[] = { 79, 245, 164, 403, 0}; +const int expect1[] = {158, 490, 328, 806, 0}; +int nums2[] = {20, 221, 411, 643, 48, 272, 448, 0}; +const int expect2[] = {40, 442, 822, 643, 48, 272, 448, 0}; +int nums3[] = {371, 265, 500, 124, 117, 0}; +const int expect3[] = {742, 530, 500, 124, 117, 0}; +int docount = 0; + +int twice(int i, jmp_buf esc) { + if (i >= 500) + longjmp(esc, i); + return 2 * i; +} + +void donums(int *nums, jmp_buf esc) { + int *p; + + docount++; + for (p = nums; *p != 0; p++) { + *p = twice(*p, esc); + } +} + +int cknums(int *nums, const int *expect) { + jmp_buf env; + int ret; + + ret = setjmp(env); + if (ret == 0) + donums(nums, env); + for (;;) { + ASSERT(*nums == *expect); + if (*expect == 0) + break; + nums++; + expect++; + } + return ret; +} + +int main(void) { + ASSERT(cknums(nums1, expect1) == 0); + ASSERT(cknums(nums2, expect2) == 643); + ASSERT(cknums(nums3, expect3) == 500); + ASSERT(docount == 3); + finished(); +} diff --git a/tests/plat/structcopy_e.c b/tests/plat/structcopy_e.c new file mode 100644 index 000000000..74a9e2d30 --- /dev/null +++ b/tests/plat/structcopy_e.c @@ -0,0 +1,113 @@ +#include "test.h" + +/* ACK's C compiler uses EM's loi, sti, blm, or an inline loop to copy + * these structs. The compiler doesn't call memcpy() or other + * functions in libc, so this test passes without linking the CRT. + */ + +struct c5 { /* not a whole number of words */ + char one[5]; +}; + +struct ii { /* two words */ + int one; + int two; +}; + +struct iii { /* three words */ + int one; + int two; + int three; +}; + +int equal5(char *a, char *b) { /* a, b must have 5 characters */ + int i; + + for (i = 0; i < 5; i++) + if (a[i] != b[i]) return 0; + return 1; +} + +struct c5 make_c5(char *str) { /* str must have 5 characters */ + struct c5 out; + int i; + + for (i = 0; i < 5; i++) + out.one[i] = str[i]; + return out; +} + +struct ii make_ii(int i, int j) { + struct ii out; + + out.one = i; + out.two = j; + return out; +} + +struct iii make_iii(struct ii in, int k) { + struct iii out; + + out.one = in.one; + out.two = in.two; + out.three = k; + return out; +} + +struct c5 rotate_left_c5(struct c5 in) { + int i; + char c = in.one[0]; + + /* Modifies our copy of _in_, not caller's copy. */ + for (i = 0; i < 4; i++) + in.one[i] = in.one[i + 1]; + in.one[4] = c; + return in; +} + +struct iii rotate_left_iii(struct iii in) { + int i = in.one; + + /* Modifies our copy of _in_, not caller's copy. */ + in.one = in.two; + in.two = in.three; + in.three = i; + return in; +} + +/* Bypasses the CRT, so there's no stdio. */ +void _m_a_i_n(void) { + struct c5 earth, heart, dup_heart, rol_heart; + struct ii pair, dup_pair; + struct iii triple, dup_triple, rol_triple; + + earth = make_c5("earth"); + heart = make_c5("heart"); + dup_heart = heart; + rol_heart = rotate_left_c5(heart); + ASSERT(equal5(earth.one, "earth")); + ASSERT(equal5(heart.one, "heart")); + ASSERT(equal5(dup_heart.one, "heart")); + ASSERT(equal5(rol_heart.one, "earth")); + + pair = make_ii(29, 31); + dup_pair = pair; + triple = make_iii(pair, -9); + dup_triple = triple; + rol_triple = rotate_left_iii(triple); + ASSERT(pair.one == 29); + ASSERT(pair.two == 31); + ASSERT(dup_pair.one == 29); + ASSERT(dup_pair.two == 31); + ASSERT(triple.one == 29); + ASSERT(triple.two == 31); + ASSERT(triple.three == -9); + ASSERT(dup_triple.one == 29); + ASSERT(dup_triple.two == 31); + ASSERT(dup_triple.three == -9); + ASSERT(rol_triple.one == 31); + ASSERT(rol_triple.two == -9); + ASSERT(rol_triple.three == 29); + + finished(); +} diff --git a/util/ego/build.lua b/util/ego/build.lua index 864447550..78895f508 100644 --- a/util/ego/build.lua +++ b/util/ego/build.lua @@ -3,6 +3,7 @@ local function build_ego(name) name = name, srcs = { "./"..name.."/*.c" }, deps = { + "./"..name.."/*.h", "util/ego/share+lib", "modules/src/em_data+lib", "h+emheaders", diff --git a/util/ego/ca/ca.c b/util/ego/ca/ca.c index 095736665..1bf73d24a 100644 --- a/util/ego/ca/ca.c +++ b/util/ego/ca/ca.c @@ -72,6 +72,7 @@ proc_p* p_out; { /* register message without arguments */ oldline(l); + continue; } else { diff --git a/util/ego/cs/cs.c b/util/ego/cs/cs.c index dfcccbbf7..068ddc3dc 100644 --- a/util/ego/cs/cs.c +++ b/util/ego/cs/cs.c @@ -25,7 +25,7 @@ int Scs; /* Number of optimizations found. */ -STATIC cs_clear() +STATIC void cs_clear() { clr_avails(); clr_entities(); @@ -74,9 +74,7 @@ STATIC void cs_optimize(void *vp) } } -main(argc, argv) - int argc; - char *argv[]; +int main(int argc, char *argv[]) { Scs = 0; go(argc, argv, no_action, cs_optimize, cs_machinit, no_action); diff --git a/util/ego/cs/cs.h b/util/ego/cs/cs.h index c749427a5..7a2ebde7b 100644 --- a/util/ego/cs/cs.h +++ b/util/ego/cs/cs.h @@ -88,12 +88,13 @@ struct occur { #define UNAIR_OP 6 #define BINAIR_OP 7 #define TERNAIR_OP 8 -#define KILL_ENTITY 9 -#define SIDE_EFFECTS 10 -#define FIDDLE_STACK 11 -#define IGNORE 12 -#define HOPELESS 13 -#define BBLOCK_END 14 +#define REMAINDER 9 +#define KILL_ENTITY 10 +#define SIDE_EFFECTS 11 +#define FIDDLE_STACK 12 +#define IGNORE 13 +#define HOPELESS 14 +#define BBLOCK_END 15 struct avail { avail_p av_before; /* Ptr to earlier discovered expressions. */ diff --git a/util/ego/cs/cs_aux.c b/util/ego/cs/cs_aux.c index 337deeda7..aeb582c9b 100644 --- a/util/ego/cs/cs_aux.c +++ b/util/ego/cs/cs_aux.c @@ -11,8 +11,7 @@ #include "cs.h" #include "cs_entity.h" -offset array_elemsize(vn) - valnum vn; +offset array_elemsize(valnum vn) { /* Vn is the valuenumber of an entity that points to * an array-descriptor. The third element of this descriptor holds @@ -36,14 +35,12 @@ offset array_elemsize(vn) return aoff(enp->en_ext->o_dblock->d_values, 2); } -occur_p occ_elem(i) - Lindex i; +occur_p occ_elem(Lindex i) { return (occur_p) Lelem(i); } -entity_p en_elem(i) - Lindex i; +entity_p en_elem(Lindex i) { return (entity_p) Lelem(i); } @@ -54,14 +51,14 @@ entity_p en_elem(i) STATIC valnum val_no; -valnum newvalnum() +valnum newvalnum(void) { /* Return a completely new value number. */ return ++val_no; } -start_valnum() +void start_valnum(void) { /* Restart value numbering. */ diff --git a/util/ego/cs/cs_aux.h b/util/ego/cs/cs_aux.h index 11950540e..1ce9373a0 100644 --- a/util/ego/cs/cs_aux.h +++ b/util/ego/cs/cs_aux.h @@ -3,28 +3,28 @@ * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. * See the copyright notice in the ACK home directory, in the file "Copyright". */ -extern offset array_elemsize(); /* (valnum vm) +extern offset array_elemsize(valnum vm); + /* * Returns the size of array-elements, * if vn is the valuenumber of the * address of an array-descriptor. */ -extern occur_p occ_elem(); /* (Lindex i) +extern occur_p occ_elem(Lindex i); /* * Returns a pointer to the occurrence * of which i is an index in a set. */ -extern entity_p en_elem(); /* (Lindex i) +extern entity_p en_elem(Lindex i); /* * Returns a pointer to the entity * of which i is an index in a set. */ -extern valnum newvalnum(); /* () +extern valnum newvalnum(void); /* * Returns a completely new * value number. */ -extern start_valnum(); /* () +extern void start_valnum(void); /* * Restart value numbering. */ - diff --git a/util/ego/cs/cs_avail.c b/util/ego/cs/cs_avail.c index 1f766a85c..b28cc496a 100644 --- a/util/ego/cs/cs_avail.c +++ b/util/ego/cs/cs_avail.c @@ -22,8 +22,7 @@ avail_p avails; /* The list of available expressions. */ -STATIC bool commutative(instr) - int instr; +STATIC bool commutative(int instr) { /* Is instr a commutative operator? */ @@ -37,9 +36,7 @@ STATIC bool commutative(instr) } } -STATIC bool same_avail(kind, avp1, avp2) - byte kind; - avail_p avp1, avp2; +STATIC bool same_avail(byte kind, avail_p avp1, avail_p avp2) { /* Two expressions are the same if they have the same operator, * the same size, and their operand(s) have the same value. @@ -57,6 +54,7 @@ STATIC bool same_avail(kind, avp1, avp2) case UNAIR_OP: return avp1->av_operand == avp2->av_operand; case BINAIR_OP: + case REMAINDER: if (commutative(avp1->av_instr & BMASK)) return avp1->av_oleft == avp2->av_oleft && avp1->av_oright == avp2->av_oright @@ -75,8 +73,7 @@ STATIC bool same_avail(kind, avp1, avp2) /* NOTREACHED */ } -STATIC void check_local(avp) - avail_p avp; +STATIC void check_local(avail_p avp) { /* Check if the local in which the result of avp was stored, * still holds this result. Update if not. @@ -89,9 +86,7 @@ STATIC void check_local(avp) } } -STATIC entity_p result_local(size, l) - offset size; - line_p l; +STATIC entity_p result_local(offset size, line_p l) { /* If the result of an expression of size bytes is stored into a * local for which a registermessage was generated, return a pointer @@ -114,9 +109,7 @@ STATIC entity_p result_local(size, l) return (entity_p) 0; } -STATIC copy_avail(kind, src, dst) - int kind; - avail_p src, dst; +STATIC void copy_avail(int kind, avail_p src, avail_p dst) { /* Copy some attributes from src to dst. */ @@ -132,6 +125,7 @@ STATIC copy_avail(kind, src, dst) dst->av_operand = src->av_operand; break; case BINAIR_OP: + case REMAINDER: dst->av_oleft = src->av_oleft; dst->av_oright = src->av_oright; break; @@ -143,10 +137,7 @@ STATIC copy_avail(kind, src, dst) } } -avail_p av_enter(avp, ocp, kind) - avail_p avp; - occur_p ocp; - int kind; +avail_p av_enter(avail_p avp, occur_p ocp, int kind) { /* Put the available expression avp in the list, * if it is not already there. @@ -171,7 +162,8 @@ avail_p av_enter(avp, ocp, kind) /* Remember local, if any, that holds result. */ if (avp->av_instr != (byte) INSTR(last)) { /* Only possible when instr is the implicit AAR in - * a LAR or SAR. + * a LAR or SAR, or the implicit DVI in an RMI, or + * DVU in RMU. */ ravp->av_saveloc = (entity_p) 0; } else { @@ -186,7 +178,7 @@ avail_p av_enter(avp, ocp, kind) return ravp; } -clr_avails() +void clr_avails(void) { /* Throw away the information about the available expressions. */ diff --git a/util/ego/cs/cs_avail.h b/util/ego/cs/cs_avail.h index a0515353a..3364be2a2 100644 --- a/util/ego/cs/cs_avail.h +++ b/util/ego/cs/cs_avail.h @@ -5,7 +5,8 @@ */ extern avail_p avails; /* The set of available expressions. */ -extern avail_p av_enter(); /* (avail_p avp, occur_p ocp, byte kind) +extern avail_p av_enter(avail_p avp, occur_p ocp, byte kind); + /* * Puts the available expression in avp * in the list of available expressions, * if it is not already there. Add ocp to set of @@ -18,6 +19,7 @@ extern avail_p av_enter(); /* (avail_p avp, occur_p ocp, byte kind) * Returns a pointer into the list. */ -extern clr_avails(); /* Release all space occupied by the old list +extern void clr_avails(void); + /* Release all space occupied by the old list * of available expressions. */ diff --git a/util/ego/cs/cs_debug.c b/util/ego/cs/cs_debug.c index bf43d8c12..3d5509ddc 100644 --- a/util/ego/cs/cs_debug.c +++ b/util/ego/cs/cs_debug.c @@ -11,14 +11,14 @@ #include "cs.h" #include "cs_aux.h" #include "cs_avail.h" +#include "cs_debug.h" #include "cs_entity.h" #ifdef VERBOSE extern char em_mnem[]; /* The mnemonics of the EM instructions. */ -STATIC void showinstr(lnp) - line_p lnp; +STATIC void showinstr(line_p lnp) { /* Makes the instruction in `lnp' human readable. Only lines that * can occur in expressions that are going to be eliminated are @@ -49,8 +49,7 @@ STATIC void showinstr(lnp) fprintf(stderr,"\n"); } -SHOWOCCUR(ocp) - occur_p ocp; +void SHOWOCCUR(occur_p ocp) { /* Shows all instructions in an occurrence. */ @@ -69,8 +68,7 @@ SHOWOCCUR(ocp) #ifdef TRACE -SHOWAVAIL(avp) - avail_p avp; +void SHOWAVAIL(avail_p avp) { /* Shows an available expression. */ showinstr(avp->av_found); @@ -79,7 +77,7 @@ SHOWAVAIL(avp) } -OUTAVAILS() +void OUTAVAILS(void) { register avail_p ravp; @@ -110,7 +108,7 @@ STATIC char *enkinds[] = { "ignore mask" }; -OUTENTITIES() +void OUTENTITIES(void) { register Lindex i; diff --git a/util/ego/cs/cs_debug.h b/util/ego/cs/cs_debug.h index e45287f9b..2d85ebfe8 100644 --- a/util/ego/cs/cs_debug.h +++ b/util/ego/cs/cs_debug.h @@ -5,7 +5,8 @@ */ #ifdef VERBOSE -extern SHOWOCCUR(); /* (occur_p ocp) +extern void SHOWOCCUR(occur_p ocp); + /* * Shows all lines in an occurrence. */ @@ -17,15 +18,18 @@ extern SHOWOCCUR(); /* (occur_p ocp) #ifdef TRACE -extern OUTAVAILS(); /* () +extern void OUTAVAILS(void); + /* * Prints all available expressions. */ -extern OUTENTITIES(); /* () +extern void OUTENTITIES(void); + /* * Prints all entities. */ -extern SHOWAVAIL(); /* (avail_p avp) +extern void SHOWAVAIL(avail_p avp); + /* * Shows an available expression. */ diff --git a/util/ego/cs/cs_elim.c b/util/ego/cs/cs_elim.c index 0a253830f..b83371416 100644 --- a/util/ego/cs/cs_elim.c +++ b/util/ego/cs/cs_elim.c @@ -20,8 +20,7 @@ #include "cs_partit.h" #include "cs_debug.h" -STATIC dlink(l1, l2) - line_p l1, l2; +STATIC void dlink(line_p l1, line_p l2) { /* Doubly link the lines in l1 and l2. */ @@ -31,11 +30,10 @@ STATIC dlink(l1, l2) l2->l_prev = l1; } -STATIC remove_lines(first, last) - line_p first, last; +STATIC void remove_lines(line_p first, line_p last) { /* Throw away the lines between and including first and last. - * Don't worry about any pointers; the (must) have been taken care of. + * Don't worry about any pointers; they (must) have been taken care of. */ register line_p lnp, next; @@ -46,8 +44,7 @@ STATIC remove_lines(first, last) } } -STATIC bool contained(ocp1, ocp2) - occur_p ocp1, ocp2; +STATIC bool contained(occur_p ocp1, occur_p ocp2) { /* Determine whether ocp1 is contained within ocp2. */ @@ -61,9 +58,7 @@ STATIC bool contained(ocp1, ocp2) return FALSE; } -STATIC delete(ocp, start) - occur_p ocp; - avail_p start; +STATIC void delete(occur_p ocp, avail_p start) { /* Delete all occurrences that are contained within ocp. * They must have been entered in the list before start: @@ -90,10 +85,7 @@ STATIC delete(ocp, start) } } -STATIC complete_aar(lnp, instr, descr_vn) - line_p lnp; - int instr; - valnum descr_vn; +STATIC void complete_aar(line_p lnp, int instr, valnum descr_vn) { /* Lnp is an instruction that loads the address of an array-element. * Instr tells us what effect we should achieve; load (instr is op_lar) @@ -109,15 +101,50 @@ STATIC complete_aar(lnp, instr, descr_vn) dlink(lnp, lindir); } -STATIC replace(ocp, tmp, avp) - occur_p ocp; - offset tmp; - avail_p avp; +STATIC void complete_dv_as_rm(line_p lnp, avail_p avp, bool first) +{ + /* Complete a / b as a % b = a - b * (a / b). For the first + * occurrence, lnp must stack q, where q = a / b. We prepend a + * DUP to change postfix a b / into a b a b /, then append a + * MLI/MLU and SBI/SBU to make a b a b / * -. + * + * For later occurences, lnp must stack a b q. We append the + * MLI/MLU and SBI/SBU. + */ + line_p dv, dup, ml, sb; + offset size; + bool s; + + size = avp->av_size; + s = (avp->av_instr == (byte) op_dvi); + assert(s || avp->av_instr == (byte) op_dvu); + if (first) { + /* Prepend our DUP to avp->av_found, to get before the + * DVI if lnp points to the LOL in DVI STL LOL. + */ + dup = int_line(2 * size); + dup->l_instr = op_dup; + dv = avp->av_found; + dlink(dv->l_prev, dup); + dlink(dup, dv); + } + ml = int_line(size); + sb = int_line(size); + ml->l_instr = (s ? op_mli : op_mlu); + sb->l_instr = (s ? op_sbi : op_sbu); + dlink(sb, lnp->l_next); + dlink(ml, sb); + dlink(lnp, ml); +} + +STATIC void replace(occur_p ocp, offset tmp, avail_p avp) { /* Replace the lines in the occurrence in ocp by a load of the * temporary with offset tmp. */ - register line_p lol, first, last; + avail_p ravp; + line_p lol, first, last; + int instr; assert(avp->av_size == ws || avp->av_size == 2*ws); @@ -130,22 +157,58 @@ STATIC replace(ocp, tmp, avp) if (first->l_prev == (line_p) 0) ocp->oc_belongs->b_start = lol; dlink(first->l_prev, lol); - if (avp->av_instr == (byte) op_aar) { - /* There may actually be a LAR or a SAR instruction; in that - * case we have to complete the array-instruction. - */ - register int instr = INSTR(last); + instr = INSTR(last); + switch (avp->av_instr & 0377) { + case op_aar: + /* There may actually be a LAR or a SAR + * instruction; in that case we have to + * complete the array-instruction. + */ + if (instr != op_aar) + complete_aar(lol, instr, avp->av_othird); + break; + case op_dvi: + if (instr == op_rmi) + complete_dv_as_rm(lol, avp, FALSE); + break; + case op_dvu: + if (instr == op_rmu) + complete_dv_as_rm(lol, avp, FALSE); + break; + } - if (instr != op_aar) complete_aar(lol, instr, avp->av_othird); + /* Some occurrence rocp of an expression before avp might have + * rocp->oc_lfirst == first. If so, then we must set + * rocp->oc_lfirst = lol before we throw away first. + * + * This is almost not possible, but it can happen in code with + * expr1 LOI expr2 STI expr2 LOI, where the STI causes both + * LOIs to have the same value number. Then the first LOI + * might come before the first expr2, so we might replace + * expr2 before we replace expr2 LOI. Then the occurrence of + * expr2 LOI must not point to the eliminated lines of expr2. + */ + for (ravp = avp->av_before; ravp != (avail_p) 0; + ravp = ravp->av_before) { + /* We only check LOI expressions. */ + if (ravp->av_instr == op_loi) { + occur_p rocp; + Lindex i; + + for (i = Lfirst(ravp->av_occurs); i != (Lindex) 0; + i = Lnext(i, ravp->av_occurs)) { + rocp = occ_elem(i); + if (rocp->oc_lfirst == first) + rocp->oc_lfirst = lol; + } + } } /* Throw away the by now useless lines. */ remove_lines(first, last); } -STATIC append(avp, tmp) - avail_p avp; - offset tmp; +STATIC void append(avail_p avp, offset tmp) { /* Avp->av_found points to a line with an operator in it. This * routine emits a sequence of instructions that saves the result @@ -155,6 +218,7 @@ STATIC append(avp, tmp) * within a lar or sar, we must first generate the aar. */ register line_p stl, lol; + register int instr; assert(avp->av_size == ws || avp->av_size == 2*ws); @@ -167,19 +231,30 @@ STATIC append(avp, tmp) dlink(stl, lol); dlink(avp->av_found, stl); - if (avp->av_instr == (byte) op_aar) { - register int instr = INSTR(avp->av_found); - - if (instr != op_aar) { - complete_aar(lol, instr, avp->av_othird); - avp->av_found->l_instr = op_aar; - } + instr = INSTR(avp->av_found); + switch (avp->av_instr & 0377) { + case op_aar: + if (instr != op_aar) { + complete_aar(lol, instr, avp->av_othird); + avp->av_found->l_instr = op_aar; + } + break; + case op_dvi: + if (instr == op_rmi) { + complete_dv_as_rm(lol, avp, TRUE); + avp->av_found->l_instr = op_dvi; + } + break; + case op_dvu: + if (instr == op_rmu) { + complete_dv_as_rm(lol, avp, TRUE); + avp->av_found->l_instr = op_dvu; + } + break; } } -STATIC set_replace(avp, tmp) - avail_p avp; - offset tmp; +STATIC void set_replace(avail_p avp, offset tmp) { /* Avp->av_occurs is now a set of occurrences, each of which will be * replaced by a reference to a local. @@ -199,8 +274,7 @@ STATIC set_replace(avp, tmp) } } -STATIC int reg_score(enp) - entity_p enp; +STATIC int reg_score(entity_p enp) { /* Enp is a local that will go into a register. * We return its score upto now. @@ -209,10 +283,7 @@ STATIC int reg_score(enp) return regv_arg(enp->en_loc, 4); } -STATIC line_p gen_mesreg(off, avp, pp) - offset off; - avail_p avp; - proc_p pp; +STATIC line_p gen_mesreg(offset off, avail_p avp, proc_p pp) { /* Generate a register message for the local that will hold the * result of the expression in avp, at the appropriate place in @@ -226,9 +297,7 @@ STATIC line_p gen_mesreg(off, avp, pp) return reg; } -STATIC change_score(mes, score) - line_p mes; - int score; +STATIC void change_score(line_p mes, int score) { /* Change the score in the register message in mes to score. */ @@ -242,8 +311,7 @@ STATIC change_score(mes, score) ap->a_a.a_offset = score; } -eliminate(pp) - proc_p pp; +void eliminate(proc_p pp) { /* Eliminate costly common subexpressions within procedure pp. * We scan the available expressions in - with respect to time found - diff --git a/util/ego/cs/cs_elim.h b/util/ego/cs/cs_elim.h index 4c6a61669..9c7d86477 100644 --- a/util/ego/cs/cs_elim.h +++ b/util/ego/cs/cs_elim.h @@ -3,7 +3,8 @@ * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. * See the copyright notice in the ACK home directory, in the file "Copyright". */ -extern eliminate(); /* (proc_p pp) +extern void eliminate(proc_p pp); + /* * Eliminate some of the recurrences of expressions * that were found by the valuenumbering * algorithm. diff --git a/util/ego/cs/cs_entity.c b/util/ego/cs/cs_entity.c index e4e49ff9a..a2cd5228d 100644 --- a/util/ego/cs/cs_entity.c +++ b/util/ego/cs/cs_entity.c @@ -18,8 +18,7 @@ lset entities; /* Our pseudo symbol-table. */ -entity_p find_entity(vn) - valnum vn; +entity_p find_entity(valnum vn) { /* Try to find the entity with valuenumber vn. */ @@ -33,8 +32,7 @@ entity_p find_entity(vn) return (entity_p) 0; } -STATIC bool same_entity(enp1, enp2) - entity_p enp1, enp2; +STATIC bool same_entity(entity_p enp1, entity_p enp2) { if (enp1->en_kind != enp2->en_kind) return FALSE; if (enp1->en_size != enp2->en_size) return FALSE; @@ -69,8 +67,7 @@ STATIC bool same_entity(enp1, enp2) } } -STATIC copy_entity(src, dst) - entity_p src, dst; +STATIC void copy_entity(entity_p src, entity_p dst) { dst->en_static = src->en_static; dst->en_kind = src->en_kind; @@ -111,8 +108,7 @@ STATIC copy_entity(src, dst) } } -entity_p en_enter(enp) - register entity_p enp; +entity_p en_enter(entity_p enp) { /* Put the entity in enp in the entity set, if it is not already there. * Return pointer to stored entity. @@ -133,7 +129,7 @@ entity_p en_enter(enp) return new; } -clr_entities() +void clr_entities(void) { /* Throw away all pseudo-symboltable information. */ diff --git a/util/ego/cs/cs_entity.h b/util/ego/cs/cs_entity.h index c669efb58..0a222f96e 100644 --- a/util/ego/cs/cs_entity.h +++ b/util/ego/cs/cs_entity.h @@ -5,16 +5,19 @@ */ extern lset entities; /* The pseudo-symboltable. */ -extern entity_p find_entity(); /* (valnum vn) +extern entity_p find_entity(valnum vn); + /* * Tries to find an entity with value number vn. */ -extern entity_p en_enter(); /* (entity_p enp) +extern entity_p en_enter(entity_p enp); + /* * Enter the entity in enp in the set of * entities if it was not already there. */ -extern clr_entities(); /* () +extern void clr_entities(void); + /* * Release all space occupied by our * pseudo-symboltable. */ diff --git a/util/ego/cs/cs_getent.c b/util/ego/cs/cs_getent.c index ef8694536..144750802 100644 --- a/util/ego/cs/cs_getent.c +++ b/util/ego/cs/cs_getent.c @@ -67,8 +67,7 @@ STATIC struct inf_entity { #define ENKIND(ip) ip->inf_used #define SIZEINF(ip) ip->inf_size -STATIC struct inf_entity *getinf(n) - int n; +STATIC struct inf_entity *getinf(int n) { struct inf_entity *ip; @@ -78,8 +77,7 @@ STATIC struct inf_entity *getinf(n) return (struct inf_entity *) 0; } -entity_p getentity(lnp, l_out) - line_p lnp, *l_out; +entity_p getentity(line_p lnp, line_p *l_out) { /* Build the entities where lnp refers to, and enter them. * If a token needs to be popped, the first line that pushed diff --git a/util/ego/cs/cs_getent.h b/util/ego/cs/cs_getent.h index e37e37404..f1c4e955d 100644 --- a/util/ego/cs/cs_getent.h +++ b/util/ego/cs/cs_getent.h @@ -3,7 +3,8 @@ * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. * See the copyright notice in the ACK home directory, in the file "Copyright". */ -extern entity_p getentity(); /* (line_p lnp, *l_out) +extern entity_p getentity(line_p lnp, line_p *l_out); + /* * Extract the entity lnp refers and enter it * in the table of entities. The main entity * lnp refers to is returned; sometimes there diff --git a/util/ego/cs/cs_kill.c b/util/ego/cs/cs_kill.c index 520366f23..fc3144397 100644 --- a/util/ego/cs/cs_kill.c +++ b/util/ego/cs/cs_kill.c @@ -16,9 +16,9 @@ #include "cs_debug.h" #include "cs_avail.h" #include "cs_entity.h" +#include "cs_kill.h" -STATIC base_valno(enp) - entity_p enp; +STATIC valnum base_valno(entity_p enp) { /* Return the value number of the (base) address of an indirectly * accessed entity. @@ -37,8 +37,7 @@ STATIC base_valno(enp) /* NOTREACHED */ } -STATIC entity_p find_base(vn) - valnum vn; +STATIC entity_p find_base(valnum vn) { /* Vn is the valuenumber of the (base) address of an indirectly * accessed entity. Return the entity that holds this address @@ -79,8 +78,7 @@ STATIC entity_p find_base(vn) return (entity_p) 0; } -STATIC bool obj_overlap(op1, op2) - obj_p op1, op2; +STATIC bool obj_overlap(obj_p op1, obj_p op2) { /* Op1 and op2 point to two objects in the same datablock. * Obj_overlap returns whether these objects might overlap. @@ -97,8 +95,7 @@ STATIC bool obj_overlap(op1, op2) #define same_datablock(o1, o2) ((o1)->o_dblock == (o2)->o_dblock) -STATIC bool addr_local(enp) - entity_p enp; +STATIC bool addr_local(entity_p enp) { /* Is enp the address of a stack item. */ @@ -108,17 +105,14 @@ STATIC bool addr_local(enp) enp->en_kind == ENAARGBASE; } -STATIC bool addr_external(enp) - entity_p enp; +STATIC bool addr_external(entity_p enp) { /* Is enp the address of an external. */ return enp != (entity_p) 0 && enp->en_kind == ENAEXTERNAL; } -STATIC kill_external(obp, indir) - obj_p obp; - int indir; +STATIC void kill_external(obj_p obp, int indir) { /* A store is done via the object in obp. If this store is direct * we kill directly accessed entities in the same data block only @@ -164,8 +158,7 @@ STATIC kill_external(obp, indir) } } -STATIC bool loc_overlap(enp1, enp2) - entity_p enp1, enp2; +STATIC bool loc_overlap(entity_p enp1, entity_p enp2) { /* Enp1 and enp2 point to two locals. Loc_overlap returns whether * they overlap. @@ -184,9 +177,7 @@ STATIC bool loc_overlap(enp1, enp2) enp1->en_loc + enp1->en_size > enp2->en_loc; } -STATIC kill_local(enp, indir) - entity_p enp; - bool indir; +STATIC void kill_local(entity_p enp, bool indir) { /* This time a store is done into an ENLOCAL. */ @@ -234,7 +225,7 @@ STATIC kill_local(enp, indir) } } -STATIC void kill_sim() +STATIC void kill_sim(void) { /* A store is done into the ENIGNMASK. */ @@ -252,8 +243,7 @@ STATIC void kill_sim() } } -kill_direct(enp) - entity_p enp; +void kill_direct(entity_p enp) { /* A store will be done into enp. We must forget the values of all the * entities this one may overlap with. @@ -274,8 +264,7 @@ kill_direct(enp) } } -kill_indir(enp) - entity_p enp; +void kill_indir(entity_p enp) { /* An indirect store is done, in an ENINDIR, * an ENOFFSETTED or an ENARRELEM. @@ -306,7 +295,7 @@ kill_indir(enp) } } -kill_much() +extern void kill_much(void) { /* Kills all killable entities, * except the locals for which a registermessage was generated. @@ -324,8 +313,7 @@ kill_much() } } -STATIC bool bad_procflags(pp) - proc_p pp; +STATIC bool bad_procflags(proc_p pp) { /* Return whether the flags about the procedure in pp indicate * that we have little information about it. It might be that @@ -335,8 +323,7 @@ STATIC bool bad_procflags(pp) return !(pp->p_flags1 & PF_BODYSEEN) || (pp->p_flags1 & PF_CALUNKNOWN); } -STATIC kill_globset(s) - cset s; +STATIC void kill_globset(cset s) { /* S is a set of global variables that might be changed. * We act as if a direct store is done into each of them. @@ -349,8 +336,7 @@ STATIC kill_globset(s) } } -kill_call(pp) - proc_p pp; +void kill_call(proc_p pp) { /* Kill everything that might be destroyed by calling * the procedure in pp. @@ -367,7 +353,7 @@ kill_call(pp) } } -kill_all() +void kill_all(void) { /* Kills all entities. */ diff --git a/util/ego/cs/cs_kill.h b/util/ego/cs/cs_kill.h index 6fa6859b8..347e3eb16 100644 --- a/util/ego/cs/cs_kill.h +++ b/util/ego/cs/cs_kill.h @@ -3,27 +3,32 @@ * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. * See the copyright notice in the ACK home directory, in the file "Copyright". */ -extern kill_call(); /* (proc_p pp) +extern void kill_call(proc_p pp); + /* * Kill all entities that might have an other value * after execution of the procedure in pp. */ -extern kill_much(); /* () +extern void kill_much(void); + /* * Kill all killable entities except those for which * a register message was generated. * Constants, addresses, etc are not killable. */ -extern kill_indir(); /* (entity_p enp) +extern void kill_indir(entity_p enp); + /* * Kill all entities that might have an other value * after indirect assignment to the entity in enp. */ -extern kill_direct(); /* (entity_p enp) +extern void kill_direct(entity_p enp); + /* * Kill all entities that might have an other value * after direct assignment to the entity in enp. */ -extern kill_all(); /* () +extern void kill_all(void); + /* * Kill all entities. */ diff --git a/util/ego/cs/cs_partit.c b/util/ego/cs/cs_partit.c index 9a1bde042..b020ebcfa 100644 --- a/util/ego/cs/cs_partit.c +++ b/util/ego/cs/cs_partit.c @@ -125,8 +125,8 @@ STATIC struct { /* nop */ HOPELESS, XXX, XXX, XXX, XXX, /* rck */ BBLOCK_END, XXX, XXX, XXX, XXX, /* ret */ BBLOCK_END, XXX, XXX, XXX, XXX, -/* rmi */ BINAIR_OP, ARGW, ARGW, ARGW, ANY, -/* rmu */ BINAIR_OP, ARGW, ARGW, ARGW, ANY, +/* rmi */ REMAINDER, ARGW, ARGW, ARGW, ANY, +/* rmu */ REMAINDER, ARGW, ARGW, ARGW, ANY, /* rol */ BINAIR_OP, ARGW, WS, ARGW, ANY, /* ror */ BINAIR_OP, ARGW, WS, ARGW, ANY, /* rtt */ BBLOCK_END, XXX, XXX, XXX, XXX, @@ -178,8 +178,7 @@ STATIC struct { #define AVSIZE(l) (info[INSTR(l)].i_av) #define REGTYPE(n) (info[n].i_regtype) -int instrgroup(lnp) - line_p lnp; +int instrgroup(line_p lnp) { if (INSTR(lnp) == op_lor && SHORT(lnp) == 1) { /* We can't do anything with the stackpointer. */ @@ -192,8 +191,7 @@ int instrgroup(lnp) return GROUP(INSTR(lnp)); } -bool stack_group(instr) - int instr; +bool stack_group(int instr) { /* Is this an instruction that only does something to the top of * the stack? @@ -205,14 +203,14 @@ bool stack_group(instr) case UNAIR_OP: case BINAIR_OP: case TERNAIR_OP: + case REMAINDER: return TRUE; default: return FALSE; } } -STATIC offset argw(lnp) - line_p lnp; +STATIC offset argw(line_p lnp) { /* Some EM-instructions have their argument either on the same line, * or on top of the stack. We give up when the argument is on top of @@ -228,8 +226,7 @@ STATIC offset argw(lnp) } } -offset op11size(lnp) - line_p lnp; +offset op11size(line_p lnp) { /* Returns the size of the first argument of * the unary operator in lnp. @@ -248,8 +245,7 @@ offset op11size(lnp) /* NOTREACHED */ } -offset op12size(lnp) - line_p lnp; +offset op12size(line_p lnp) { /* Same for first of binary. */ @@ -264,8 +260,7 @@ offset op12size(lnp) /* NOTREACHED */ } -offset op22size(lnp) - line_p lnp; +offset op22size(line_p lnp) { switch (OP2SIZE(lnp)) { case ARGW: @@ -319,8 +314,7 @@ offset op33size(lnp) return ws; } -offset avsize(lnp) - line_p lnp; +offset avsize(line_p lnp) { /* Returns the size of the result of the instruction in lnp. * If the instruction is a conversion this size is given on the stack. @@ -359,8 +353,7 @@ offset avsize(lnp) /* NOTREACHED */ } -int regtype(instr) - byte instr; +int regtype(byte instr) { switch (REGTYPE(instr & BMASK)) { case ANY: diff --git a/util/ego/cs/cs_partit.h b/util/ego/cs/cs_partit.h index 27e7a00bc..ffcc321cb 100644 --- a/util/ego/cs/cs_partit.h +++ b/util/ego/cs/cs_partit.h @@ -7,53 +7,63 @@ * "manageable chunks. */ -extern int instrgroup(); /* (line_p lnp) +extern int instrgroup(line_p lnp); + /* * Return the group into which the instruction * in lnp belongs to. */ -extern bool stack_group(); /* (int instr) +extern bool stack_group(int instr); + /* * Return whether instr is an instruction that * only changes the state of the stack, i.e. * is a "true" operator. */ -extern offset op11size(); /* (line_p lnp) +extern offset op11size(line_p lnp); + /* * Return the size of the operand of the unary * operator in lnp. */ -extern offset op12size(); /* (line_p lnp) +extern offset op12size(line_p lnp); + /* * Return the size of the first operand of the * binary operator in lnp. */ -extern offset op22size(); /* (line_p lnp) +extern offset op22size(line_p lnp); + /* * Return the size of the second operand of the * binary operator in lnp. */ -extern offset op13size(); /* (line_p lnp) +extern offset op13size(line_p lnp); + /* * Return the size of the first operand of the * ternary operator in lnp. */ -extern offset op23size(); /* (line_p lnp) +extern offset op23size(line_p lnp); + /* * Return the size of the second operand of the * ternary operator in lnp. */ -extern offset op33size(); /* (line_p lnp) +extern offset op33size(line_p lnp); + /* * Return the size of the third operand of the * ternary operator in lnp. */ -extern offset avsize(); /* (line_p lnp) +extern offset avsize(line_p lnp); + /* * Return the size of the result of the * operator in lnp. */ -extern int regtype(); /* (byte instr) +extern int regtype(byte instr); + /* * Return in what kind of machine-register * the result of instr should be stored: * pointer, float, or any. diff --git a/util/ego/cs/cs_profit.c b/util/ego/cs/cs_profit.c index 259a6114d..2efabcb03 100644 --- a/util/ego/cs/cs_profit.c +++ b/util/ego/cs/cs_profit.c @@ -14,6 +14,7 @@ #include "../share/cset.h" #include "../share/lset.h" #include "cs.h" +#include "cs_alloc.h" #include "cs_aux.h" #include "cs_debug.h" #include "cs_avail.h" @@ -25,10 +26,9 @@ STATIC cset forbidden; STATIC cset sli_counts; STATIC short LX_threshold; STATIC short AR_limit; +STATIC bool RM_to_DV; -STATIC get_instrs(f, s_p) - FILE *f; - cset *s_p; +STATIC void get_instrs(FILE *f, cset *s_p) { /* Read a set of integers from inputfile f into *s_p. * Such a set must be delimited by a negative number. @@ -42,9 +42,7 @@ STATIC get_instrs(f, s_p) } } -STATIC choose_cset(f, s_p, max) - FILE *f; - cset *s_p; +STATIC void choose_cset(FILE *f, cset *s_p, int max) { /* Read two compact sets of integers from inputfile f. * Choose the first if we optimize with respect to time, @@ -101,6 +99,12 @@ void cs_machinit(void *vp) fscanf(f, "%d", &space); AR_limit = space; + /* Read whether to convert a remainder RMI/RMU to a division + * DVI/DVU using the formula a % b = a - b * (a / b). + */ + fscanf(f, "%d %d", &time, &space); + RM_to_DV = time_space_ratio >= 50 ? time : space; + /* Read for what counts we must not eliminate an SLI instruction * when it is part of an array-index computation. */ @@ -115,8 +119,27 @@ void cs_machinit(void *vp) choose_cset(f, &forbidden, sp_lmnem); } -STATIC bool sli_no_eliminate(lnp) - line_p lnp; +bool may_become_aar(avail_p avp) +{ + /* Check whether it is desirable to treat a LAR or SAR as an + * AAR LOI/STI. This depends on the size of the array-elements. + */ + offset sz; + + sz = array_elemsize(avp->av_othird); + if (sz == UNKNOWN_SIZE) + return FALSE; + if (time_space_ratio < 50) + return sz <= AR_limit; + return TRUE; +} + +bool may_become_dv(void) +{ + return RM_to_DV; +} + +STATIC bool sli_no_eliminate(line_p lnp) { /* Return whether the SLI-instruction in lnp is part of * an array-index computation, and should not be eliminated. @@ -130,8 +153,7 @@ STATIC bool sli_no_eliminate(lnp) ; } -STATIC bool gains(avp) - avail_p avp; +STATIC bool gains(avail_p avp) { /* Return whether we can gain something, when we eliminate * an expression such as in avp. We just glue together some @@ -161,12 +183,12 @@ STATIC bool gains(avp) return TRUE; } -STATIC bool okay_lines(avp, ocp) - avail_p avp; - occur_p ocp; +STATIC bool okay_lines(avail_p avp, occur_p ocp) { + /* Check whether all lines in this occurrence can in + * principle be eliminated; no stores, messages, calls etc. + */ register line_p lnp, next; - offset sz; for (lnp = ocp->oc_lfirst; lnp != (line_p) 0; lnp = next) { next = lnp != ocp->oc_llast ? lnp->l_next : (line_p) 0; @@ -179,18 +201,6 @@ STATIC bool okay_lines(avp, ocp) return FALSE; } } - /* All lines in this occurrence can in principle be eliminated; - * no stores, messages, calls etc. - * We now check whether it is desirable to treat a LAR or a SAR - * as an AAR LOI/STI. This depends on the size of the array-elements. - */ - if (INSTR(ocp->oc_llast) == op_lar || INSTR(ocp->oc_llast) == op_sar) { - sz = array_elemsize(avp->av_othird); - if (sz == UNKNOWN_SIZE) return FALSE; - if (avp->av_instr == (byte) op_aar && time_space_ratio < 50) { - return sz <= AR_limit; - } - } return TRUE; } diff --git a/util/ego/cs/cs_profit.h b/util/ego/cs/cs_profit.h index 7ec5e3c17..3d1972d24 100644 --- a/util/ego/cs/cs_profit.h +++ b/util/ego/cs/cs_profit.h @@ -7,6 +7,17 @@ void cs_machinit(void *vp); /* (FILE *f) * Read phase-specific information from f. */ +bool may_become_aar(avail_p avp); + /* + * Return whether a LAR/SAR may become + * an AAR LOI/STI. + */ + +bool may_become_dv(void); /* + * Return whether an RMI/RMU may become + * a DVI/DVU: a % b = a - (a / b * b). + */ + bool desirable(avail_p avp); /* * Return whether it is desirable to eliminate * the recurrences of the expression in avp. diff --git a/util/ego/cs/cs_stack.c b/util/ego/cs/cs_stack.c index 7927438a5..670955d1e 100644 --- a/util/ego/cs/cs_stack.c +++ b/util/ego/cs/cs_stack.c @@ -23,8 +23,7 @@ STATIC token_p free_token; #define Stack_empty() (free_token == &Stack[0]) #define Top (free_token - 1) -Push(tkp) - token_p tkp; +void Push(token_p tkp) { if (tkp->tk_size == UNKNOWN_SIZE) { Empty_stack(); /* The contents of the Stack is useless. */ @@ -39,10 +38,7 @@ Push(tkp) #define WORD_MULTIPLE(n) ((n / ws) * ws + ( n % ws ? ws : 0 )) -void -Pop(tkp, size) - token_p tkp; - offset size; +void Pop(token_p tkp, offset size) { /* Pop a token with given size from the valuenumber stack into tkp. */ @@ -85,8 +81,7 @@ Pop(tkp, size) } } -Dup(lnp) - line_p lnp; +void Dup(line_p lnp) { /* Duplicate top bytes on the Stack. */ @@ -132,7 +127,7 @@ Dup(lnp) } } -clr_stack() +void clr_stack(void) { free_token = &Stack[0]; } diff --git a/util/ego/cs/cs_stack.h b/util/ego/cs/cs_stack.h index 64d59cf90..e5a79b858 100644 --- a/util/ego/cs/cs_stack.h +++ b/util/ego/cs/cs_stack.h @@ -3,21 +3,25 @@ * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. * See the copyright notice in the ACK home directory, in the file "Copyright". */ -extern Push(); /* (token_p tkp) +extern void Push(token_p tkp); + /* * Push the token in tkp on the fake-stack. */ -extern Pop(); /* (token_p tkp; offset size) +extern void Pop(token_p tkp, offset size); + /* * Pop a token of size bytes from the fake-stack * into tkp. If such a token is not there * we put a dummy in tkp and adjust the fake-stack. */ -extern Dup(); /* (line_p lnp) +extern void Dup(line_p lnp); + /* * Reflect the changes made by the dup-instruction * in lnp to the EM-stack into the fake-stack. */ -extern clr_stack(); /* () +extern void clr_stack(void); + /* * Clear the fake-stack. */ diff --git a/util/ego/cs/cs_vnm.c b/util/ego/cs/cs_vnm.c index a4813411c..435dd4658 100644 --- a/util/ego/cs/cs_vnm.c +++ b/util/ego/cs/cs_vnm.c @@ -20,10 +20,9 @@ #include "cs_kill.h" #include "cs_partit.h" #include "cs_getent.h" +#include "cs_profit.h" -STATIC push_entity(enp, lfirst) - entity_p enp; - line_p lfirst; +STATIC void push_entity(entity_p enp, line_p lfirst) { /* Build token and Push it. */ @@ -35,10 +34,8 @@ STATIC push_entity(enp, lfirst) Push(&tk); } -STATIC put_expensive_load(bp, lnp, lfirst, enp) - bblock_p bp; - line_p lnp, lfirst; - entity_p enp; +STATIC void put_expensive_load(bblock_p bp, line_p lnp, line_p lfirst, + entity_p enp) { struct avail av; occur_p ocp; @@ -52,16 +49,15 @@ STATIC put_expensive_load(bp, lnp, lfirst, enp) av_enter(&av, ocp, EXPENSIVE_LOAD); } -STATIC put_aar(bp, lnp, lfirst, enp) - bblock_p bp; - line_p lnp, lfirst; - entity_p enp; +STATIC void put_aar(bblock_p bp, line_p lnp, line_p lfirst, entity_p enp) { - /* Enp points to an ENARRELEM. We do as if its address was computed. */ - + /* Enter the implicit AAR in a LAR or SAR, where enp points to + * the ENARRELEM, and AAR computes its address. + */ struct avail av; occur_p ocp; + assert(INSTR(lnp) == op_lar || INSTR(lnp) == op_sar); assert(enp->en_kind == ENARRELEM); av.av_instr = op_aar; av.av_size = ps; @@ -69,14 +65,17 @@ STATIC put_aar(bp, lnp, lfirst, enp) av.av_osecond = enp->en_index; av.av_othird = enp->en_adesc; - ocp = newoccur(lfirst, lnp, bp); - - av_enter(&av, ocp, TERNAIR_OP); + /* Before we enter an available AAR, we must check whether we + * may convert this LAR/SAR to AAR LOI/STI. This is so we + * don't LOI/STI a large or unknown size. + */ + if (may_become_aar(&av)) { + ocp = newoccur(lfirst, lnp, bp); + av_enter(&av, ocp, TERNAIR_OP); + } } -STATIC push_avail(avp, lfirst) - avail_p avp; - line_p lfirst; +STATIC void push_avail(avail_p avp, line_p lfirst) { struct token tk; @@ -86,10 +85,7 @@ STATIC push_avail(avp, lfirst) Push(&tk); } -STATIC push_unair_op(bp, lnp, tkp1) - bblock_p bp; - line_p lnp; - token_p tkp1; +STATIC void push_unair_op(bblock_p bp, line_p lnp, token_p tkp1) { struct avail av; occur_p ocp; @@ -103,10 +99,7 @@ STATIC push_unair_op(bp, lnp, tkp1) push_avail(av_enter(&av, ocp, UNAIR_OP), tkp1->tk_lfirst); } -STATIC push_binair_op(bp, lnp, tkp1, tkp2) - bblock_p bp; - line_p lnp; - token_p tkp1, tkp2; +STATIC void push_binair_op(bblock_p bp, line_p lnp, token_p tkp1, token_p tkp2) { struct avail av; occur_p ocp; @@ -121,10 +114,8 @@ STATIC push_binair_op(bp, lnp, tkp1, tkp2) push_avail(av_enter(&av, ocp, BINAIR_OP), tkp1->tk_lfirst); } -STATIC push_ternair_op(bp, lnp, tkp1, tkp2, tkp3) - bblock_p bp; - line_p lnp; - token_p tkp1, tkp2, tkp3; +STATIC void push_ternair_op(bblock_p bp, line_p lnp, token_p tkp1, + token_p tkp2, token_p tkp3) { struct avail av; occur_p ocp; @@ -140,8 +131,38 @@ STATIC push_ternair_op(bp, lnp, tkp1, tkp2, tkp3) push_avail(av_enter(&av, ocp, TERNAIR_OP), tkp1->tk_lfirst); } -STATIC fiddle_stack(lnp) - line_p lnp; +STATIC void push_remainder(bblock_p bp, line_p lnp, token_p tkp1, token_p tkp2) +{ + /* Enter the implicit division tkp1 / tkp2, + * then push the remainder tkp1 % tkp2. + */ + struct avail av; + occur_p ocp; + + assert(INSTR(lnp) == op_rmi || INSTR(lnp) == op_rmu); + av.av_size = avsize(lnp); + av.av_oleft = tkp1->tk_vn; + av.av_oright = tkp2->tk_vn; + + /* Check whether we may convert RMI/RMU to DVI/DVU. */ + if (may_become_dv()) { + /* The division is DVI in RMI, or DVU in RMU. */ + av.av_instr = (INSTR(lnp) == op_rmi ? op_dvi : op_dvu); + + /* In postfix, a b % becomes a b a b / * -. We must + * keep a and b on the stack, so the first instruction + * to eliminate is lnp, not tkp1->l_first. + */ + ocp = newoccur(lnp, lnp, bp); + av_enter(&av, ocp, BINAIR_OP); + } + + av.av_instr = INSTR(lnp); + ocp = newoccur(tkp1->tk_lfirst, lnp, bp); + push_avail(av_enter(&av, ocp, REMAINDER), tkp1->tk_lfirst); +} + +STATIC void fiddle_stack(line_p lnp) { /* The instruction in lnp does something to the valuenumber-stack. */ @@ -232,8 +253,7 @@ STATIC proc_p find_proc(vn) return (proc_p) 0; } -STATIC side_effects(lnp) - line_p lnp; +STATIC void side_effects(line_p lnp) { /* Lnp contains a cai or cal instruction. We try to find the callee * and see what side-effects it has. @@ -255,8 +275,7 @@ STATIC side_effects(lnp) } } -hopeless(instr) - int instr; +STATIC void hopeless(int instr) { /* The effect of `instr' is too difficult to * compute. We assume worst case behaviour. @@ -281,8 +300,7 @@ hopeless(instr) } } -vnm(bp) - bblock_p bp; +void vnm(bblock_p bp) { register line_p lnp; register entity_p rep; @@ -331,6 +349,11 @@ vnm(bp) Pop(&tk1, op13size(lnp)); push_ternair_op(bp, lnp, &tk1, &tk2, &tk3); break; + case REMAINDER: + Pop(&tk2, op22size(lnp)); + Pop(&tk1, op12size(lnp)); + push_remainder(bp, lnp, &tk1, &tk2); + break; case KILL_ENTITY: kill_direct(rep); break; diff --git a/util/ego/cs/cs_vnm.h b/util/ego/cs/cs_vnm.h index 0fbce5d72..0c86a77e8 100644 --- a/util/ego/cs/cs_vnm.h +++ b/util/ego/cs/cs_vnm.h @@ -3,7 +3,8 @@ * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. * See the copyright notice in the ACK home directory, in the file "Copyright". */ -extern vnm(); /* (bblock_p bp) +extern void vnm(bblock_p bp); + /* * Performs the valuenumbering algorithm on the basic * block in bp. */ diff --git a/util/ego/descr/em22.descr b/util/ego/descr/em22.descr index f995d631c..d9c39226b 100644 --- a/util/ego/descr/em22.descr +++ b/util/ego/descr/em22.descr @@ -78,6 +78,7 @@ cheap operations: -1 -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: -1 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/em24.descr b/util/ego/descr/em24.descr index a95751170..cbe0ab5c3 100644 --- a/util/ego/descr/em24.descr +++ b/util/ego/descr/em24.descr @@ -78,6 +78,7 @@ cheap operations: -1 -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: -1 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/em44.descr b/util/ego/descr/em44.descr index 117f26591..b6dbebba3 100644 --- a/util/ego/descr/em44.descr +++ b/util/ego/descr/em44.descr @@ -78,6 +78,7 @@ cheap operations: -1 -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: -1 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/i386.descr b/util/ego/descr/i386.descr index 264151a60..d5a2014bf 100644 --- a/util/ego/descr/i386.descr +++ b/util/ego/descr/i386.descr @@ -93,6 +93,7 @@ cheap operations: op_cii op_cui op_ciu op_cuu -1 op_cii op_cui op_ciu op_cuu -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: -1 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/i86.descr b/util/ego/descr/i86.descr index 8be3ec23e..9b27cf840 100644 --- a/util/ego/descr/i86.descr +++ b/util/ego/descr/i86.descr @@ -93,6 +93,7 @@ cheap operations: op_cii op_cui op_ciu op_cuu -1 op_cii op_cui op_ciu op_cuu -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: -1 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/m68020.descr b/util/ego/descr/m68020.descr index 9d2f46b2b..f568e00e2 100644 --- a/util/ego/descr/m68020.descr +++ b/util/ego/descr/m68020.descr @@ -102,6 +102,7 @@ cheap operations: op_cii op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1 op_cii op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: 1 2 3 -1 1 2 3 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/m68k2.descr b/util/ego/descr/m68k2.descr index 58e433db8..6b144cba0 100644 --- a/util/ego/descr/m68k2.descr +++ b/util/ego/descr/m68k2.descr @@ -99,6 +99,7 @@ addressing modes: op_adp op_lof op_ldf op_loi op_dch op_lpb -1 cheap operations: -1 -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: -1 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/m68k4.descr b/util/ego/descr/m68k4.descr index 8e1da4c5e..6b9d23dfa 100644 --- a/util/ego/descr/m68k4.descr +++ b/util/ego/descr/m68k4.descr @@ -102,6 +102,7 @@ cheap operations: op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1 op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: -1 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/pdp.descr b/util/ego/descr/pdp.descr index e73b3aaf1..ec8f3abca 100644 --- a/util/ego/descr/pdp.descr +++ b/util/ego/descr/pdp.descr @@ -92,6 +92,7 @@ cheap operations: op_cii op_cui op_cfi op_ciu op_cff op_cuu op_cif -1 op_cii op_cui op_cfi op_ciu op_cff op_cuu op_cif -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: -1 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/powerpc.descr b/util/ego/descr/powerpc.descr index e59990ea1..cf613e96c 100644 --- a/util/ego/descr/powerpc.descr +++ b/util/ego/descr/powerpc.descr @@ -102,7 +102,7 @@ register save costs: 17 -> (102,136) 18 -> (108,144) 19 -> (114,152) - 20 -> (120,160) + 20 -> (120,160) 21 -> (126,168) 22 -> (132,176) 23 -> (138,184) @@ -137,10 +137,11 @@ reduce sli if shift count larger than: 0 first time then space: addressing modes: op_ads op_adp op_lof op_ldf op_loi op_dch op_lpb -1 op_ads op_adp op_lof op_ldf op_loi op_dch op_lpb -1 -cheap operations: op_cii op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1 +cheap operations: op_cii op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1 op_cii op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: yes yes do not eliminate sli if index on shiftcounts: -1 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/sparc.descr b/util/ego/descr/sparc.descr index 978c39ba3..79c33decb 100644 --- a/util/ego/descr/sparc.descr +++ b/util/ego/descr/sparc.descr @@ -100,6 +100,7 @@ cheap operations: op_cuu op_ciu op_cui op_cii -1 op_cuu op_ciu op_cui op_cii -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: -1 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/vax4.descr b/util/ego/descr/vax4.descr index 5a39ea759..beaf0c427 100644 --- a/util/ego/descr/vax4.descr +++ b/util/ego/descr/vax4.descr @@ -113,6 +113,7 @@ cheap operations: op_cii op_cui op_cfi op_ciu op_cff op_cuu op_cif op_cmi op_cmu op_cmf op_cms op_cmp -1 lexical thresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: 1 2 3 -1 1 2 3 -1 forbidden operators: -1 -1 diff --git a/util/ego/share/aux.h b/util/ego/share/aux.h index 6a6770469..db2d3f8da 100644 --- a/util/ego/share/aux.h +++ b/util/ego/share/aux.h @@ -36,7 +36,7 @@ line_p reg_mes(offset tmp, short size, int typ, int score); bool dom(bblock_p b1, bblock_p b2); /* * See if b1 dominates b2. Note that a - * block always * dominates itself. + * block always dominates itself. */ bblock_p common_dom(bblock_p a, bblock_p b); /* diff --git a/util/ego/share/build.lua b/util/ego/share/build.lua index ab1068d2c..5ca714897 100644 --- a/util/ego/share/build.lua +++ b/util/ego/share/build.lua @@ -48,6 +48,7 @@ clibrary { "./init_glob.c", }, deps = { + "./*.h", "+classdefs_h", "+pop_push_h", "h+emheaders", @@ -57,5 +58,3 @@ clibrary { ["+cflags"] = {"-DVERBOSE", "-DNOTCOMPACT"} } } - - diff --git a/util/ego/share/debug.c b/util/ego/share/debug.c index 81080f7cf..56514d149 100644 --- a/util/ego/share/debug.c +++ b/util/ego/share/debug.c @@ -45,7 +45,7 @@ void error(const char *s, ...) void OUTTRACE(const char *s, int n) { fprintf(stderr,"> "); - vfprintf(stderr,s,n); + fprintf(stderr,s,n); fprintf(stderr,"\n"); } #endif diff --git a/util/ego/share/files.c b/util/ego/share/files.c index e45f9b7fb..2dd10b25f 100644 --- a/util/ego/share/files.c +++ b/util/ego/share/files.c @@ -9,9 +9,11 @@ */ #include +#include "types.h" +#include "debug.h" #include "files.h" -struct files* findfiles(int argc, const char** argv) +struct files* findfiles(int argc, char * const *argv) { static struct files files; diff --git a/util/ego/share/files.h b/util/ego/share/files.h index 46b19917a..ab2076ea1 100644 --- a/util/ego/share/files.h +++ b/util/ego/share/files.h @@ -33,11 +33,11 @@ struct files /* The rest of the arguments. */ - const char** argv; + char * const *argv; int argc; }; -struct files* findfiles(int argc, const char** argv); +struct files* findfiles(int argc, char * const *argv); FILE *openfile(const char *name, const char *mode); /* diff --git a/util/ego/share/get.c b/util/ego/share/get.c index 94c7aabe2..a433b0946 100644 --- a/util/ego/share/get.c +++ b/util/ego/share/get.c @@ -285,7 +285,7 @@ dblock_p getdtable(const char *dname) /* getbblocks */ -STATIC argstring(short length, argb_p abp) +STATIC void argstring(short length, argb_p abp) { while (length--) { diff --git a/util/ego/share/global.h b/util/ego/share/global.h index f97df2fa2..4121a5b85 100644 --- a/util/ego/share/global.h +++ b/util/ego/share/global.h @@ -40,13 +40,13 @@ extern int ws; /* word size */ #define UNKNOWN_SIZE (-1) -extern proc_p curproc; /* current procedure */ +extern proc_p curproc; /* current procedure */ -extern char *filename; /* name of current input file */ +extern char *filename; /* name of current input file */ extern lset mesregs; /* set of MES ms_reg pseudos */ -extern short time_space_ratio; /* 0 if optimizing for space only, +extern short time_space_ratio; /* 0 if optimizing for space only, * 100 if optimizing for time only, * else something 'in between'. */ diff --git a/util/ego/share/go.c b/util/ego/share/go.c index 9a2107d3d..0ccd3c6e9 100644 --- a/util/ego/share/go.c +++ b/util/ego/share/go.c @@ -42,7 +42,7 @@ STATIC void mach_init(char* machfile, void (*phase_machinit)(void *)) fclose(f); } -void go(int argc, const char** argv, +void go(int argc, char * const *argv, void (*initialize)(void *), void (*optimize)(void *), void (*phase_machinit)(void *), void (*proc_flag)(void *)) { diff --git a/util/ego/share/go.h b/util/ego/share/go.h index 3bb8c1f54..55f1b48e8 100644 --- a/util/ego/share/go.h +++ b/util/ego/share/go.h @@ -22,7 +22,7 @@ * and 'optimize' is called with the current procedure * as parameter. */ -void go(int argc, const char** argv, +void go(int argc, char * const *argv, void (*initialize)(void *null), void (*optimize)(void *), /* (proc_p *p) */ void (*phase_machinit)(void *), /* (FILE *f) */ diff --git a/util/ego/share/types.h b/util/ego/share/types.h index cae4d6074..cabc5818d 100644 --- a/util/ego/share/types.h +++ b/util/ego/share/types.h @@ -46,7 +46,7 @@ typedef struct elemholder *lset; typedef struct bitvector *cset; typedef elem_p Lindex; typedef short Cindex; -typedef char *Lelem_t; +typedef void *Lelem_t; typedef short Celem_t; typedef union pext_t *pext_p; diff --git a/util/ego/sp/sp.c b/util/ego/sp/sp.c index 8538d3dfb..051281d7e 100644 --- a/util/ego/sp/sp.c +++ b/util/ego/sp/sp.c @@ -65,9 +65,8 @@ STATIC void sp_machinit(void *vp) } fscanf(f,"%d",&globl_sp_allowed); } -comb_asps(l1,l2,b) - line_p l1,l2; - bblock_p b; + +STATIC void comb_asps(line_p l1, line_p l2, bblock_p b) { assert(INSTR(l1) == op_asp); assert(INSTR(l2) == op_asp); @@ -78,11 +77,7 @@ comb_asps(l1,l2,b) rm_line(l1,b); } - - - -stack_pollution(b) - bblock_p b; +STATIC void stack_pollution(bblock_p b) { /* For every pair of successive ASP instructions in basic * block b, try to combine the two into one ASP. @@ -134,8 +129,7 @@ stack_pollution(b) } while (asp != (line_p) 0); } -STATIC bool block_save(b) - bblock_p b; +STATIC bool block_save(bblock_p b) { register line_p l; @@ -159,10 +153,7 @@ STATIC bool block_save(b) return stack_diff >= 0; } - - -STATIC mark_pred(b) - bblock_p b; +STATIC void mark_pred(bblock_p b) { Lindex i; bblock_p x; @@ -176,12 +167,7 @@ STATIC mark_pred(b) } } - - - - -STATIC mark_unsave_blocks(p) - proc_p p; +STATIC void mark_unsave_blocks(proc_p p) { register bblock_p b; @@ -193,8 +179,7 @@ STATIC mark_unsave_blocks(p) } } - -void sp_optimize(void *vp) +STATIC void sp_optimize(void *vp) { proc_p p = vp; register bblock_p b; @@ -206,21 +191,13 @@ void sp_optimize(void *vp) } } - - - -main(argc,argv) - int argc; - char *argv[]; +int main(int argc, char *argv[]) { go(argc,argv,no_action,sp_optimize,sp_machinit,no_action); report("stack adjustments deleted",Ssp); exit(0); } - - - /***** DEBUGGING: debug_stack_pollution(p) diff --git a/util/ego/ud/ud.c b/util/ego/ud/ud.c index c0fe613fd..087337144 100644 --- a/util/ego/ud/ud.c +++ b/util/ego/ud/ud.c @@ -269,13 +269,13 @@ pr_localtab() { short i; local_p lc; - printf("LOCAL-TABLE (%d)\n\n",nrlocals); + fprintf(stderr,"LOCAL-TABLE (%d)\n\n",nrlocals); for (i = 1; i <= nrlocals; i++) { lc = locals[i]; - printf("LOCAL %d\n",i); - printf(" offset= %ld\n",lc->lc_off); - printf(" size= %d\n",lc->lc_size); - printf(" flags= %d\n",lc->lc_flags); + fprintf(stderr,"LOCAL %d\n",i); + fprintf(stderr,"\toffset= %ld\n",lc->lc_off); + fprintf(stderr,"\tsize= %d\n",lc->lc_size); + fprintf(stderr,"\tflags= %d\n",lc->lc_flags); } } @@ -284,12 +284,13 @@ pr_globals() dblock_p d; obj_p obj; - printf("GLOBALS (%d)\n\n",nrglobals); - printf("ID GLOBNR\n"); + fprintf(stderr,"GLOBALS (%d)\n\n",nrglobals); + fprintf(stderr,"ID\tGLOBNR\n"); for (d = fdblock; d != (dblock_p) 0; d = d->d_next) { for (obj = d->d_objlist; obj != (obj_p) 0; obj = obj->o_next) { if (obj->o_globnr != 0) { - printf("%d %d\n", obj->o_id,obj->o_globnr); + fprintf(stderr,"%d\t%d\n", + obj->o_id,obj->o_globnr); } } } @@ -302,20 +303,20 @@ pr_defs() short i; line_p l; - printf("DEF TABLE\n\n"); + fprintf(stderr,"DEF TABLE\n\n"); for (i = 1; i <= nrexpldefs; i++) { l = defs[i]; - printf("%d %s ",EXPL_TO_DEFNR(i), + fprintf(stderr,"%d\t%s ",EXPL_TO_DEFNR(i), &em_mnem[(INSTR(l)-sp_fmnem)*4]); switch(TYPE(l)) { case OPSHORT: - printf("%d\n",SHORT(l)); + fprintf(stderr,"%d\n",SHORT(l)); break; case OPOFFSET: - printf("%ld\n",OFFSET(l)); + fprintf(stderr,"%ld\n",OFFSET(l)); break; case OPOBJECT: - printf("%d\n",OBJ(l)->o_id); + fprintf(stderr,"%d\n",OBJ(l)->o_id); break; default: assert(FALSE); @@ -331,13 +332,13 @@ pr_set(name,k,s,n) { short i; - printf("%s(%d) = {",name,k); + fprintf(stderr,"%s(%d) =\t{",name,k); for (i = 1; i <= n; i++) { if (Cis_elem(i,s)) { - printf("%d ",i); + fprintf(stderr,"%d ",i); } } - printf ("}\n"); + fprintf(stderr,"}\n"); } pr_blocks(p) @@ -347,7 +348,7 @@ pr_blocks(p) short n; for (b = p->p_start; b != 0; b = b->b_next) { - printf ("\n"); + fprintf(stderr,"\n"); n = b->b_id; pr_set("GEN",n,GEN(b),nrdefs); pr_set("KILL",n,KILL(b),nrdefs); @@ -361,10 +362,10 @@ pr_copies() { short i; - printf("\nCOPY TABLE\n\n"); + fprintf(stderr,"\nCOPY TABLE\n\n"); for (i = 1; i <= nrdefs; i++) { if (def_to_copynr[i] != 0) { - printf("%d %d\n",i,def_to_copynr[i]); + fprintf(stderr,"%d\t%d\n",i,def_to_copynr[i]); } } } @@ -376,7 +377,7 @@ pr_cblocks(p) short n; for (b = p->p_start; b != 0; b = b->b_next) { - printf ("\n"); + fprintf(stderr,"\n"); n = b->b_id; pr_set("CGEN",n,C_GEN(b),nrcopies); pr_set("CKILL",n,C_KILL(b),nrcopies); diff --git a/util/misc/convert.c b/util/misc/convert.c index ec38761fa..9bdc12011 100644 --- a/util/misc/convert.c +++ b/util/misc/convert.c @@ -16,8 +16,10 @@ static char rcsid[] = "$Id$"; linked. */ +#include #include #include "system.h" +#include "print.h" #include "em_pseu.h" #include "em_mnem.h" #include "em_spec.h" @@ -30,8 +32,11 @@ char *filename; /* Name of input file */ int errors; /* Number of errors */ extern char *C_error; -main(argc,argv) - char **argv; +void error(const char *, ...); +void fatal(const char *, ...); + +int +main(int argc, char **argv) { struct e_instr buf; register struct e_instr *p = &buf; @@ -66,27 +71,32 @@ main(argc,argv) } C_close(); EM_close(); - exit(errors); + exit(errors ? 1 : 0); } /* VARARGS */ -error(s,a1,a2,a3,a4) - char *s; +void +error(const char *s, ...) { + va_list ap; + va_start(ap, s); fprint(STDERR, "%s, line %d: ", filename ? filename : "standard input", EM_lineno); - fprint(STDERR,s,a1,a2,a3,a4); + doprnt(STDERR, s, ap); fprint(STDERR, "\n"); errors++; + va_end(ap); } /* VARARGS */ -fatal(s,a1,a2,a3,a4) - char *s; +void +fatal(const char *s, ...) { + va_list ap; + va_start(ap, s); if (C_busy()) C_close(); - error(s,a1,a2,a3,a4); + error(s, ap); exit(1); }