From 88207db63853eef2c86ae9a63f7f6f58c7a5b89e Mon Sep 17 00:00:00 2001 From: George Koehler Date: Wed, 6 Dec 2017 17:09:12 -0500 Subject: [PATCH 01/55] Use in util/misc/convert.c I made a syntax error in some .e file, and em_encode dumped core because a 64-bit pointer didn't fit in a 32-bit int. Now use stdarg to pass pointers to error() and fatal(). Stop using the number of errors as the exit status. Many systems use only the low 8 bits of the exit status, so 256 errors would become 0. Also change modules/src/print to accept const char *buf --- modules/src/print/doprnt.c | 2 +- modules/src/print/format.c | 2 +- modules/src/print/fprint.c | 2 +- modules/src/print/print.c | 2 +- modules/src/print/print.h | 10 +++++----- modules/src/print/sprint.c | 2 +- util/misc/convert.c | 28 +++++++++++++++++++--------- 7 files changed, 29 insertions(+), 19 deletions(-) diff --git a/modules/src/print/doprnt.c b/modules/src/print/doprnt.c index a77b7d2c1..1d888e570 100644 --- a/modules/src/print/doprnt.c +++ b/modules/src/print/doprnt.c @@ -16,7 +16,7 @@ %d = int $ */ void -doprnt(File *fp, char *fmt, va_list argp) +doprnt(File *fp, const char *fmt, va_list argp) { char buf[SSIZE]; diff --git a/modules/src/print/format.c b/modules/src/print/format.c index 2ad920bc8..e03717918 100644 --- a/modules/src/print/format.c +++ b/modules/src/print/format.c @@ -35,7 +35,7 @@ integral(int c) %d = int $ */ int -_format(char *buf, char *fmt, va_list argp) +_format(char *buf, const char *fmt, va_list argp) { register char *pf = fmt; register char *pb = buf; diff --git a/modules/src/print/fprint.c b/modules/src/print/fprint.c index c401858a9..6b5b8a389 100644 --- a/modules/src/print/fprint.c +++ b/modules/src/print/fprint.c @@ -17,7 +17,7 @@ $ */ /*VARARGS*/ void -fprint(File *fp, char *fmt, ...) +fprint(File *fp, const char *fmt, ...) { va_list args; char buf[SSIZE]; diff --git a/modules/src/print/print.c b/modules/src/print/print.c index cd9346e98..2e1256a54 100644 --- a/modules/src/print/print.c +++ b/modules/src/print/print.c @@ -17,7 +17,7 @@ $ */ /*VARARGS*/ void -print(char *fmt, ...) +print(const char *fmt, ...) { va_list args; char buf[SSIZE]; diff --git a/modules/src/print/print.h b/modules/src/print/print.h index 56372376a..974e4bf1b 100644 --- a/modules/src/print/print.h +++ b/modules/src/print/print.h @@ -9,10 +9,10 @@ #include -void print(char *fmt, ...); -void fprint(File *f, char *fmt, ...); -void doprnt(File *f, char *fmt, va_list ap); -int _format(char *buf, char *fmt, va_list ap); -char *sprint(char *buf, char *fmt, ...); +void print(const char *fmt, ...); +void fprint(File *f, const char *fmt, ...); +void doprnt(File *f, const char *fmt, va_list ap); +int _format(char *buf, const char *fmt, va_list ap); +char *sprint(char *buf, const char *fmt, ...); #endif /* __PRINT_INCLUDED__ */ diff --git a/modules/src/print/sprint.c b/modules/src/print/sprint.c index d88b47e69..7c9dbf9b0 100644 --- a/modules/src/print/sprint.c +++ b/modules/src/print/sprint.c @@ -17,7 +17,7 @@ $ */ /*VARARGS*/ char * -sprint(char *buf, char *fmt, ...) +sprint(char *buf, const char *fmt, ...) { va_list args; diff --git a/util/misc/convert.c b/util/misc/convert.c index ec38761fa..9bdc12011 100644 --- a/util/misc/convert.c +++ b/util/misc/convert.c @@ -16,8 +16,10 @@ static char rcsid[] = "$Id$"; linked. */ +#include #include #include "system.h" +#include "print.h" #include "em_pseu.h" #include "em_mnem.h" #include "em_spec.h" @@ -30,8 +32,11 @@ char *filename; /* Name of input file */ int errors; /* Number of errors */ extern char *C_error; -main(argc,argv) - char **argv; +void error(const char *, ...); +void fatal(const char *, ...); + +int +main(int argc, char **argv) { struct e_instr buf; register struct e_instr *p = &buf; @@ -66,27 +71,32 @@ main(argc,argv) } C_close(); EM_close(); - exit(errors); + exit(errors ? 1 : 0); } /* VARARGS */ -error(s,a1,a2,a3,a4) - char *s; +void +error(const char *s, ...) { + va_list ap; + va_start(ap, s); fprint(STDERR, "%s, line %d: ", filename ? filename : "standard input", EM_lineno); - fprint(STDERR,s,a1,a2,a3,a4); + doprnt(STDERR, s, ap); fprint(STDERR, "\n"); errors++; + va_end(ap); } /* VARARGS */ -fatal(s,a1,a2,a3,a4) - char *s; +void +fatal(const char *s, ...) { + va_list ap; + va_start(ap, s); if (C_busy()) C_close(); - error(s,a1,a2,a3,a4); + error(s, ap); exit(1); } From 34cf0c8b63dc709254fbdc4523e5990a2c4714e6 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Wed, 6 Dec 2017 22:14:00 -0500 Subject: [PATCH 02/55] Kill registers a, de, when i80 ncg does Call libem. I compiled tests/plat/lib/test.c with ack -mcpm, but i80 ncg did emit wrong code in writehex(uint32_t) for "0123456789abcdef"[code & 0xf] The code called '.and' to evaluate `code & 0xf`, then tried to call '.cii' to narrow the result from 4 to 2 bytes, but it passed garbage instead of 4 to '.cii'. The rule for '.and' was pat and defined($1) kills ALL uses dereg={const2,$1} gen Call {label,".and"} This failed to kill register de={const2,4}, so ncg pushed de, expecting to push 4, but actually pushing garbage. Fix such rules using `mvi a,...` or `lxi de,...` so ncg doesn't track the token in the register. This is like the i86 table. A different fix would use a dummy instruction `killreg a` or `killreg de` like the m68020 table. Also correct 1 to $1 when calling '.exg'. --- mach/i80/ncg/table | 122 +++++++++++++++++++++++---------------------- 1 file changed, 62 insertions(+), 60 deletions(-) diff --git a/mach/i80/ncg/table b/mach/i80/ncg/table index 575820c81..e6d7e02f6 100644 --- a/mach/i80/ncg/table +++ b/mach/i80/ncg/table @@ -385,8 +385,9 @@ gen dad de pat loi $1>=512 kills ALL -uses dereg={const2,$1} -gen Call {label,".loi"} +/* 'uses dereg={const2,$1}' fails to kill de. */ +gen lxi de,{const2,$1} + Call {label,".loi"} pat los $1==2 with dereg @@ -597,8 +598,8 @@ gen 1: pat sti kills ALL -uses dereg={const2,$1} -gen Call {label,".sti"} +gen lxi de,{const2,$1} + Call {label,".sti"} pat sts $1==2 with dereg @@ -702,23 +703,24 @@ gen Call {label,".mli4"} pat dvi $1==2 kills ALL -uses areg={const1,129} -gen Call {label,".dvi2"} yields de +/* 'uses areg={const1,129}' fails to kill a. */ +gen mvi a,{const1,129} + Call {label,".dvi2"} yields de pat dvi $1==4 kills ALL -uses areg={const1,129} -gen Call {label,".dvi4"} +gen mvi a,{const1,129} + Call {label,".dvi4"} pat rmi $1==2 kills ALL -uses areg={const1,128} -gen Call {label,".dvi2"} yields de +gen mvi a,{const1,128} + Call {label,".dvi2"} yields de pat rmi $1==4 kills ALL -uses areg={const1,128} -gen Call {label,".dvi4"} +gen mvi a,{const1,128} + Call {label,".dvi4"} pat ngi $1==2 with hl_or_de @@ -738,7 +740,7 @@ pat loc sli ($1 == 8) && ($2 == 2) with hl_or_de gen move %1.2, %1.1 mvi %1.2, {const1,0} yields %1 - + pat sli $1==2 kills ALL gen Call {label,".sli2"} yields de @@ -749,13 +751,13 @@ gen Call {label,".sli4"} pat sri $1==2 kills ALL -uses areg={const1,1} -gen Call {label,".sri2"} yields de +gen mvi a,{const1,1} + Call {label,".sri2"} yields de pat sri $1==4 kills ALL -uses areg={const1,1} -gen Call {label,".sri4"} +gen mvi a,{const1,1} + Call {label,".sri4"} /********************************************/ /* Group 4: Unsigned arithmetic */ @@ -775,23 +777,23 @@ gen Call {label,".mli4"} pat dvu $1==2 kills ALL -uses areg={const1,1} -gen Call {label,".dvi2"} yields de +gen mvi a,{const1,1} + Call {label,".dvi2"} yields de pat dvu $1==4 kills ALL -uses areg={const1,1} -gen Call {label,".dvi4"} +gen mvi a,{const1,1} + Call {label,".dvi4"} pat rmu $1==2 kills ALL -uses areg={const1,0} -gen Call {label,".dvi2"} yields de +gen mvi a,{const1,0} + Call {label,".dvi2"} yields de pat rmu $1==4 kills ALL -uses areg={const1,0} -gen Call {label,".dvi4"} +gen mvi a,{const1,0} + Call {label,".dvi4"} pat slu leaving sli $1 @@ -799,16 +801,16 @@ pat loc sru ($1 == 8) && ($2 == 2) with hl_or_de gen move %1.1, %1.2 mvi %1.1, {const1,0} yields %1 - + pat sru $1==2 kills ALL -uses areg={const1,0} -gen Call {label,".sri2"} yields de +gen mvi a,{const1,0} + Call {label,".sri2"} yields de pat sru $1==4 kills ALL -uses areg={const1,0} -gen Call {label,".sri4"} +gen mvi a,{const1,0} + Call {label,".sri4"} /********************************************/ @@ -1047,8 +1049,8 @@ with hlreg pat cii kills ALL -uses areg={const1,1} -gen Call {label,".cii"} +gen mvi a,{const1,1} + Call {label,".cii"} pat loc loc ciu leaving loc $1 loc $2 cuu pat loc loc cui leaving loc $1 loc $2 cuu @@ -1081,8 +1083,8 @@ with hl_or_de pat cuu kills ALL -uses areg={const1,0} -gen Call {label,".cii"} +gen mvi a,{const1,0} + Call {label,".cii"} pat cfi kills ALL @@ -1128,8 +1130,8 @@ gen mov a,%1.2 pat and defined($1) kills ALL -uses dereg={const2,$1} -gen Call {label,".and"} +gen lxi de,{const2,$1} + Call {label,".and"} pat and !defined($1) with dereg @@ -1156,8 +1158,8 @@ gen mov a,%1.2 pat ior defined($1) kills ALL -uses dereg={const2,$1} -gen Call {label,".ior"} +gen lxi de,{const2,$1} + Call {label,".ior"} pat ior !defined($1) with dereg @@ -1184,8 +1186,8 @@ gen mov a,%1.2 pat xor defined($1) kills ALL -uses dereg={const2,$1} -gen Call {label,".xor"} +gen lxi de,{const2,$1} + Call {label,".xor"} pat xor !defined($1) with dereg @@ -1204,8 +1206,8 @@ gen mov a,%1.2 pat com defined($1) kills ALL -uses dereg={const2,$1} -gen Call {label,".com"} +gen lxi de,{const2,$1} + Call {label,".com"} pat com !defined($1) with dereg @@ -1269,8 +1271,8 @@ gen Call {label,".inn2"} yields de pat inn defined($1) kills ALL -uses dereg={const2,$1} -gen Call {label,".inn"} yields de +gen lxi de,{const2,$1} + Call {label,".inn"} yields de pat inn !defined($1) with dereg @@ -1284,8 +1286,8 @@ gen Call {label,".set2"} yields de pat set defined($1) kills ALL -uses dereg={const2,$1} -gen Call {label,".set"} +gen lxi de,{const2,$1} + Call {label,".set"} pat set !defined($1) with dereg @@ -1402,8 +1404,8 @@ pat cmi $1==2 leaving sbi 2 pat cmi $1==4 kills ALL -uses areg={const1,1} -gen Call {label,".cmi4"} yields de +gen mvi a,{const1,1} + Call {label,".cmi4"} yields de pat cmf $1==4 kills ALL @@ -1412,14 +1414,14 @@ gen Call {label,".cmf4"} pat cmf $1==8 kills ALL gen Call {label,".cmf8"} - + pat cmu $1==2 with hl_or_de hl_or_de uses areg gen mov a,%2.1 cmp %1.1 jz {label,2f} - jc {label,1f} + jc {label,1f} 0: lxi %2,{const2,1} jmp {label,3f} @@ -1436,15 +1438,15 @@ gen mov a,%2.1 pat cmu $1==4 kills ALL -uses areg={const1,0} -gen Call {label,".cmi4"} yields de +gen mvi a,{const1,0} + Call {label,".cmi4"} yields de pat cms $1==2 leaving cmi 2 pat cms defined($1) kills ALL -uses dereg={const2,$1} -gen Call {label,".cms"} yields de +gen lxi de,{const2,$1} + Call {label,".cms"} yields de pat cms !defined($1) with dereg @@ -1936,8 +1938,8 @@ gen dad sp pat blm kills ALL -uses dereg={const2,$1} -gen Call {label,".blm"} +gen lxi de,{const2,$1} + Call {label,".blm"} pat bls with dereg @@ -1962,8 +1964,8 @@ with src1or2 src1or2 yields %2 %1 %2 %1 pat dup kills ALL -uses dereg={const2,$1} -gen Call {label,".dup"} +gen lxi de,{const2,$1} + Call {label,".dup"} pat dus $1==2 with dereg @@ -1975,8 +1977,8 @@ with src1or2 src1or2 yields %1 %2 pat exg defined($1) kills ALL -uses dereg={const2,1} -gen Call {label,".exg"} +gen lxi de,{const2,$1} + Call {label,".exg"} pat fil uses hlreg={label,$1} From c95bcac91de056e6b804fabb22b15caec62fbee0 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Thu, 7 Dec 2017 15:39:41 -0500 Subject: [PATCH 03/55] Correct the stack pointer when i80 shrinks an integer. The code used `sphl` to set the stack pointer, but the correct value was in de, not hl. Fix by swapping the values of de and hl, so `sphl` is now correct. When we shrink an integer from 4 to 2 bytes, both registers de and hl point to copies of the result, but only one register preserves the stack below the result. This fixes writehex() in tests/plat/lib/test.c, when I compile it with ack -mcpm, so it preserves the pointer to "0123456789abcdef", so it writes hexadecimal digits and not garbage. This bug goes back to commit 157b243 of Mar 18, 1985, so the bug is 32 years old, and probably the oldest bug that I ever fixed. --- mach/i80/libem/cii.s | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mach/i80/libem/cii.s b/mach/i80/libem/cii.s index 7d091da5e..bf4e7efb8 100644 --- a/mach/i80/libem/cii.s +++ b/mach/i80/libem/cii.s @@ -65,19 +65,19 @@ jmp 3f ! done !if destination size < source size only: -shrink: mov l,c ! load source size in hl +shrink: mov l,b ! load destination size in hl mvi h,0 dad sp mov d,h - mov e,l ! de points just above source - mov l,b ! load destination size in hl + mov e,l ! de points just above lowest bytes of source + mov l,c ! load source size in hl mvi h,0 dad sp ! hl points just above "destination" 1: dcx d ! move upwards dcx h - mov a,m - stax d + ldax d + mov m,a dcr b jnz 1b sphl From a1d1f3869151c8022578d46e41969f95baec94a4 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Thu, 7 Dec 2017 17:16:21 -0500 Subject: [PATCH 04/55] Add test for EM rol, ror. Fix i80, i86, powerpc. EM instructions _rol_ and _ror_ do rotate an integer left or right. Our compilers and optimizers never emit _rol_ nor _ror_, but I might want to use them in the future. Add _rol_ and _ror_ to powerpc. Fix `rol 4` and `ror 4` in both i80 and i86, where the rules for `rol 4` and `ror 4` seem to have never been tested until now. --- mach/i80/libem/rol4.s | 4 +- mach/i80/libem/ror4.s | 4 +- mach/i86/ncg/table | 4 +- mach/powerpc/ncg/table | 37 +++++++ tests/plat/build.lua | 3 +- tests/plat/rotate_e.e | 219 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 264 insertions(+), 7 deletions(-) create mode 100644 tests/plat/rotate_e.e diff --git a/mach/i80/libem/rol4.s b/mach/i80/libem/rol4.s index e5bb1a83d..41219ea0c 100644 --- a/mach/i80/libem/rol4.s +++ b/mach/i80/libem/rol4.s @@ -25,8 +25,8 @@ mov e,a mov a,b - ral -1: mov a,l +1: ral + mov a,l ral mov l,a mov a,h diff --git a/mach/i80/libem/ror4.s b/mach/i80/libem/ror4.s index e77d8a74c..490c75abc 100644 --- a/mach/i80/libem/ror4.s +++ b/mach/i80/libem/ror4.s @@ -25,8 +25,8 @@ mov e,a mov a,l - rar -1: mov a,b +1: rar + mov a,b rar mov b,a mov a,c diff --git a/mach/i86/ncg/table b/mach/i86/ncg/table index ffbd7101e..ce2ac7b87 100644 --- a/mach/i86/ncg/table +++ b/mach/i86/ncg/table @@ -2292,7 +2292,7 @@ with CXREG REG REG rcl %3,{ANYCON,1} adc %2,{ANYCON,0} loop {label, 2b} - 1: + 1: yields %3 %2 pat loc ror $1==1 && $2==2 with REG @@ -2311,7 +2311,7 @@ with CXREG REG REG rcl %3,{ANYCON,1} adc %2,{ANYCON,0} loop {label, 2b} - 1: + 1: yields %3 %2 /******************************************************************* * Group 10 : Set Instructions * diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 62e8f62af..851578f64 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -307,7 +307,11 @@ INSTRUCTIONS rlwinm GPR:wo, GPR:ro, CONST:ro, CONST:ro, CONST:ro. extlwi GPR:wo, GPR:ro, CONST:ro, CONST:ro. extrwi GPR:wo, GPR:ro, CONST:ro, CONST:ro. + rotlwi GPR:wo, GPR:ro, CONST:ro. + rotrwi GPR:wo, GPR:ro, CONST:ro. srwi GPR:wo, GPR:ro, CONST:ro. + rlwnm GPR:wo, GPR:ro, GPR:ro, CONST:ro, CONST:ro. + rotlw GPR:wo, GPR:ro, GPR:ro. slw GPR:wo, GPR:ro, GPR:ro. subf GPR:wo, GPR:ro, GPR:ro. sraw GPR:wo, GPR:ro, GPR:ro cost(4, 2). @@ -1232,6 +1236,9 @@ PATTERNS subf %a, %a, %2 yields %a + +/* Bitwise logic */ + pat and $1==4 /* AND word */ with REG NOT_R uses reusing %1, REG @@ -1381,6 +1388,9 @@ PATTERNS loc $1 cal ".zer" + +/* Shifts and rotations */ + pat sli $1==4 /* Shift left (second << top) */ with CONST_STACK REG uses reusing %2, REG @@ -1417,6 +1427,33 @@ PATTERNS srw %a, %2, %1 yields %a + pat rol $1==4 /* Rotate left word */ + with CONST_STACK REG + uses reusing %2, REG + gen rotlwi %a, %2, {CONST, %1.val & 0x1F} + yields %a + with REG REG + uses reusing %2, REG + gen rotlw %a, %2, %1 + yields %a + + /* + * ror 4 -> ngi 4, rol 4 + * because to rotate right by n bits is to rotate left by + * (32 - n), which is to rotate left by -n. PowerPC rotlw + * handles -n as (-n & 0x1F). + */ + + pat ror $1==4 /* Rotate right word */ + with CONST_STACK REG + uses reusing %2, REG + gen rotrwi %a, %2, {CONST, %1.val & 0x1F} + yields %a + with /* anything */ + leaving + ngi 4 + rol 4 + /* Arrays */ diff --git a/tests/plat/build.lua b/tests/plat/build.lua index 0d3091559..cbd39468e 100644 --- a/tests/plat/build.lua +++ b/tests/plat/build.lua @@ -10,7 +10,8 @@ definerule("plat_testsuite", -- target names will resolve there. local testfiles = filenamesof( "tests/plat/*.c", - "tests/plat/*.e", + "tests/plat/inn_e.e", + "tests/plat/rotate_e.e", "tests/plat/*.p", "tests/plat/b/*.b", "tests/plat/bugs/bug-22-inn_mod.mod", diff --git a/tests/plat/rotate_e.e b/tests/plat/rotate_e.e new file mode 100644 index 000000000..a6f8f28dd --- /dev/null +++ b/tests/plat/rotate_e.e @@ -0,0 +1,219 @@ +# + mes 2, EM_WSIZE, EM_PSIZE + +/* + * Test _rol_ (rotate left) and _ror_ (rotate right). + * + * By tradition, _rol_ and _ror_ can't rotate values shorter than the + * word size, or longer than 4 bytes. + * - If word size is 2, then try rotating 2-byte and 4-byte values. + * - If word size is 4, then try rotating 4-byte values. + */ + +#if EM_WSIZE == 2 +#define LEN2 4 + exa table2 + exa left2 + exa right2 +table2 /* left, right */ + con 12715U2 /* 0, 0 */ + con 25430U2 /* 1, 15 */ + con 43825U2 /* 8, 8 */ + con 39125U2 /* 15, 1 */ +left2 + con 0I2, 1I2, 8I2, 15I2 +right2 + con 0I2, 15I2, 8I2, 1I2 +#endif + +#define LEN4 4 + exa table4 + exa left4 + exa right4 +table4 /* left, right */ + con 437223536U4 /* 0, 0 */ + con 874447072U4 /* 1, 31 */ + con 2154830351U4 /* 16, 16 */ + con 218611768U4 /* 31, 1 */ +left4 + con 0I2, 1I2, 16I2, 31I2 +right4 + con 0I2, 31I2, 16I2, 1I2 + + exa val4 + exa val4left7 + exa val4right11 +val4 + con 4283808839U4 +val4left7 + con 2866684927U4 +val4right11 + con 2298473143U4 + + exp $_m_a_i_n + pro $_m_a_i_n, EM_WSIZE +#define i -EM_WSIZE + +#if EM_WSIZE == 2 + /* + * Loop for LEN2 items in table2. + */ + loc 0 + stl i +1 + lae table2 + loi 2 /* value to rotate */ + lae left2 + lol i + loc 1 + sli EM_WSIZE + ads EM_WSIZE + loi 2 /* left distance */ + rol 2 /* rotate left */ + lae table2 + lol i + loc 1 + sli EM_WSIZE + ads EM_WSIZE + loi 2 /* expected result */ + cmu 2 + zeq *2 + loc __LINE__ + loc EM_WSIZE + loc 4 + cuu + cal $fail + asp 4 +2 + lae table2 + loi 2 /* value to rotate */ + lae right2 + lol i + loc 1 + sli EM_WSIZE + ads EM_WSIZE + loi 2 /* right distance */ + ror 2 /* rotate right */ + lae table2 + lol i + loc 1 + sli EM_WSIZE + ads EM_WSIZE + loi 2 /* expected result */ + cmu 2 + zeq *3 + loc __LINE__ + loc EM_WSIZE + loc 4 + cuu + cal $fail + asp 4 +3 + inl i /* loop LEN2 times */ + lol i + loc LEN2 + blt *1 +#endif /* EM_WSIZE == 2 */ + + /* + * Loop for LEN4 items in table4. + */ + loc 0 + stl i +4 + lae table4 + loi 4 /* value to rotate */ + lae left4 + lol i + loc 1 + sli EM_WSIZE + ads EM_WSIZE + loi 2 /* left distance */ + loc 2 + loc EM_WSIZE + cii + rol 4 /* rotate left */ + lae table4 + lol i + loc 2 + sli EM_WSIZE + ads EM_WSIZE + loi 4 /* expected result */ + cmu 4 + zeq *5 + loc __LINE__ + loc EM_WSIZE + loc 4 + cuu + cal $fail + asp 4 +5 + lae table4 + loi 4 /* value to rotate */ + lae right4 + lol i + loc 1 + sli EM_WSIZE + ads EM_WSIZE + loi 2 /* right distance */ + loc 2 + loc EM_WSIZE + cii + ror 4 /* rotate right */ + lae table4 + lol i + loc 2 + sli EM_WSIZE + ads EM_WSIZE + loi 4 /* expected result */ + cmu 4 + zeq *6 + loc __LINE__ + loc EM_WSIZE + loc 4 + cuu + cal $fail + asp 4 +6 + inl i /* loop LEN4 times */ + lol i + loc LEN4 + blt *4 + + /* + * Rotate 4-byte values by a constant distance, because this uses + * different rules in PowerPC ncg. + */ + lae val4 + loi 4 + loc 7 + rol 4 /* rotate left by 7 bits */ + lae val4left7 + loi 4 + cmu 4 + zeq *7 + loc __LINE__ + loc EM_WSIZE + loc 4 + cuu + cal $fail + asp 4 +7 + lae val4 + loi 4 + loc 11 + ror 4 /* rotate right by 11 bits */ + lae val4right11 + loi 4 + cmu 4 + zeq *8 + loc __LINE__ + loc EM_WSIZE + loc 4 + cuu + cal $fail + asp 4 +8 + + cal $finished + end From 6b933db90b93822f2817a78dd017d909f2e100aa Mon Sep 17 00:00:00 2001 From: George Koehler Date: Thu, 7 Dec 2017 19:24:09 -0500 Subject: [PATCH 05/55] Split C from CONST. Rename token CONST to C. Define set CONST = C + CONST_STACK. The instructions with CONST operands can now accept CONST_STACK tokens; some cases of {CONST, %1.val} become %1. Also simplify two of _rlwinm_ into _slwi_ and _srwi_. --- mach/powerpc/ncg/table | 151 ++++++++++++++++++++--------------------- 1 file changed, 74 insertions(+), 77 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 851578f64..251c83e71 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -96,7 +96,7 @@ TOKENS /* Primitives */ - CONST = { INT val; } 4 val. + C /* constant */ = { INT val; } 4 val. LABEL = { ADDR adr; } 4 adr. LABEL_HI = { ADDR adr; } 4 "hi16[" adr "]". LABEL_HA = { ADDR adr; } 4 "ha16[" adr "]". @@ -112,13 +112,13 @@ TOKENS /* Constants on the stack */ - CONST_N8000 = { INT val; } 4. - CONST_N7FFF_N0001 = { INT val; } 4. - CONST_0000_7FFF = { INT val; } 4. - CONST_8000 = { INT val; } 4. - CONST_8001_FFFF = { INT val; } 4. - CONST_HZ = { INT val; } 4. - CONST_HL = { INT val; } 4. + CONST_N8000 = { INT val; } 4 val. + CONST_N7FFF_N0001 = { INT val; } 4 val. + CONST_0000_7FFF = { INT val; } 4 val. + CONST_8000 = { INT val; } 4 val. + CONST_8001_FFFF = { INT val; } 4 val. + CONST_HZ = { INT val; } 4 val. + CONST_HL = { INT val; } 4 val. /* Expression partial results */ @@ -183,6 +183,8 @@ SETS CONST_STACK = CONST_N8000 + CONST_N7FFF_N0001 + CONST_0000_7FFF + CONST_8000 + CONST_8001_FFFF + CONST_HZ + CONST_HL. + CONST = C + CONST_STACK. + SUM_ALL = SUM_RC + SUM_RL + SUM_RR. SEX_ALL = SEX_B + SEX_H. @@ -309,6 +311,7 @@ INSTRUCTIONS extrwi GPR:wo, GPR:ro, CONST:ro, CONST:ro. rotlwi GPR:wo, GPR:ro, CONST:ro. rotrwi GPR:wo, GPR:ro, CONST:ro. + slwi GPR:wo, GPR:ro, CONST:ro. srwi GPR:wo, GPR:ro, CONST:ro. rlwnm GPR:wo, GPR:ro, GPR:ro, CONST:ro, CONST:ro. rotlw GPR:wo, GPR:ro, GPR:ro. @@ -351,22 +354,22 @@ MOVES /* Constants */ - from CONST + CONST_STACK smalls(%val) to GPR + from CONST smalls(%val) to GPR gen COMMENT("move CONST->GPR smalls") - li %2, {CONST, %1.val} + li %2, %1 - from CONST + CONST_STACK lo(%val)==0 to GPR + from CONST lo(%val)==0 to GPR gen COMMENT("move CONST->GPR shifted") - lis %2, {CONST, hi(%1.val)} + lis %2, {C, hi(%1.val)} - from CONST + CONST_STACK to GPR + from CONST to GPR gen COMMENT("move CONST->GPR") - lis %2, {CONST, hi(%1.val)} - ori %2, %2, {CONST, lo(%1.val)} - /* Can't use addi %2, %2, {CONST, los(%1.val)} + lis %2, {C, hi(%1.val)} + ori %2, %2, {C, lo(%1.val)} + /* Can't use addi %2, %2, {C, los(%1.val)} * because %2 might be R0. */ from LABEL to GPR @@ -389,10 +392,10 @@ MOVES /* Register + something */ from SUM_RIS to GPR - gen addis %2, %1.reg, {CONST, %1.offhi} + gen addis %2, %1.reg, {C, %1.offhi} from SUM_RC to GPR - gen addi %2, %1.reg, {CONST, %1.off} + gen addi %2, %1.reg, {C, %1.off} from SUM_RL to GPR gen addi %2, %1.reg, {LABEL_LO, %1.adr} @@ -494,19 +497,19 @@ MOVES gen or %2, %1.reg1, %1.reg2 from OR_RIS to GPR - gen oris %2, %1.reg, {CONST, %1.valhi} + gen oris %2, %1.reg, {C, %1.valhi} from OR_RC to GPR - gen ori %2, %1.reg, {CONST, %1.val} + gen ori %2, %1.reg, {C, %1.val} from XOR_RR to GPR gen xor %2, %1.reg1, %1.reg2 from XOR_RIS to GPR - gen xoris %2, %1.reg, {CONST, %1.valhi} + gen xoris %2, %1.reg, {C, %1.valhi} from XOR_RC to GPR - gen xori %2, %1.reg, {CONST, %1.val} + gen xori %2, %1.reg, {C, %1.val} /* Conditions */ @@ -514,7 +517,7 @@ MOVES from COND_RC to GPR gen - cmpwi %1.reg, {CONST, %1.val} + cmpwi %1.reg, {C, %1.val} mfcr %2 from COND_RR to GPR @@ -524,7 +527,7 @@ MOVES from CONDL_RC to GPR gen - cmplwi %1.reg, {CONST, %1.val} + cmplwi %1.reg, {C, %1.val} mfcr %2 from CONDL_RR to GPR @@ -548,30 +551,30 @@ MOVES from XEQ to GPR gen - extrwi %2, %1.reg, {CONST, 1}, {CONST, 2} + extrwi %2, %1.reg, {C, 1}, {C, 2} from XNE to GPR gen - extrwi %2, %1.reg, {CONST, 1}, {CONST, 2} - xori %2, %2, {CONST, 1} + extrwi %2, %1.reg, {C, 1}, {C, 2} + xori %2, %2, {C, 1} from XGT to GPR gen - extrwi %2, %1.reg, {CONST, 1}, {CONST, 1} + extrwi %2, %1.reg, {C, 1}, {C, 1} from XGE to GPR gen - extrwi %2, %1.reg, {CONST, 1}, {CONST, 0} - xori %2, %2, {CONST, 1} + extrwi %2, %1.reg, {C, 1}, {C, 0} + xori %2, %2, {C, 1} from XLT to GPR gen - extrwi %2, %1.reg, {CONST, 1}, {CONST, 0} + extrwi %2, %1.reg, {C, 1}, {C, 0} from XLE to GPR gen - extrwi %2, %1.reg, {CONST, 1}, {CONST, 1} - xori %2, %2, {CONST, 1} + extrwi %2, %1.reg, {C, 1}, {C, 1} + xori %2, %2, {C, 1} /* GPR_EXPR exists solely to allow us to use regvar() (which can only be used in an expression) as a register constant. We can then use @@ -649,7 +652,7 @@ COERCIONS gen COMMENT("coerce STACK->REG") lwz %a, {IND_RC_W, sp, 0} - addi sp, sp, {CONST, 4} + addi sp, sp, {C, 4} yields %a from STACK @@ -657,7 +660,7 @@ COERCIONS gen COMMENT("coerce STACK->FREG") lfd %a, {IND_RC_D, sp, 0} - addi sp, sp, {CONST, 8} + addi sp, sp, {C, 8} yields %a from STACK @@ -665,7 +668,7 @@ COERCIONS gen COMMENT("coerce STACK->FSREG") lfs %a, {IND_RC_W, sp, 0} - addi sp, sp, {CONST, 4} + addi sp, sp, {C, 4} yields %a from ANY_BHW @@ -824,7 +827,7 @@ PATTERNS with STACK gen lwz {LOCAL, $1}, {IND_RC_W, sp, 0} - addi sp, sp, {CONST, 4} + addi sp, sp, {C, 4} pat stl inreg($1)==reg_float with exact FSREG+IND_ALL_W kills regvar_w($1, reg_float) @@ -832,7 +835,7 @@ PATTERNS with STACK gen lfs {LOCAL, $1}, {IND_RC_W, sp, 0} - addi sp, sp, {CONST, 4} + addi sp, sp, {C, 4} pat stl leaving lal $1 @@ -846,7 +849,7 @@ PATTERNS with STACK gen lfd {DLOCAL, $1}, {IND_RC_D, sp, 0} - addi sp, sp, {CONST, 8} + addi sp, sp, {C, 8} pat sdl leaving lal $1 @@ -1255,22 +1258,22 @@ PATTERNS with REG UCONST2 uses reusing %1, REG gen - andiX %a, %1, {CONST, %2.val} + andiX %a, %1, %2 yields %a with UCONST2 REG uses reusing %2, REG gen - andiX %a, %2, {CONST, %1.val} + andiX %a, %2, %1 yields %a with REG CONST_HZ uses reusing %1, REG gen - andisX %a, %1, {CONST, hi(%2.val)} + andisX %a, %1, {C, hi(%2.val)} yields %a with CONST_HZ REG uses reusing %2, REG gen - andisX %a, %2, {CONST, hi(%1.val)} + andisX %a, %2, {C, hi(%1.val)} yields %a pat and defined($1) /* AND set */ @@ -1394,43 +1397,37 @@ PATTERNS pat sli $1==4 /* Shift left (second << top) */ with CONST_STACK REG uses reusing %2, REG - gen - rlwinm %a, %2, {CONST, (%1.val & 0x1F)}, {CONST, 0}, {CONST, 31-(%1.val & 0x1F)} + gen slwi %a, %2, {C, %1.val & 0x1F} yields %a with REG REG uses reusing %2, REG - gen - slw %a, %2, %1 + gen slw %a, %2, %1 yields %a pat sri $1==4 /* Shift right signed (second >> top) */ with CONST_STACK REG uses reusing %2, REG - gen - srawi %a, %2, {CONST, %1.val & 0x1F} + gen srawi %a, %2, {C, %1.val & 0x1F} yields %a with REG REG uses reusing %2, REG - gen - sraw %a, %2, %1 + gen sraw %a, %2, %1 yields %a pat sru $1==4 /* Shift right unsigned (second >> top) */ with CONST_STACK REG uses reusing %2, REG - gen - rlwinm %a, %2, {CONST, 32-(%1.val & 0x1F)}, {CONST, (%1.val & 0x1F)}, {CONST, 31} + gen srwi %a, %2, {C, %1.val & 0x1F} yields %a with REG REG uses reusing %2, REG - gen - srw %a, %2, %1 + gen srw %a, %2, %1 yields %a pat rol $1==4 /* Rotate left word */ with CONST_STACK REG uses reusing %2, REG - gen rotlwi %a, %2, {CONST, %1.val & 0x1F} + gen rotlwi %a, %2, {C, %1.val & 0x1F} yields %a with REG REG uses reusing %2, REG @@ -1447,7 +1444,7 @@ PATTERNS pat ror $1==4 /* Rotate right word */ with CONST_STACK REG uses reusing %2, REG - gen rotrwi %a, %2, {CONST, %1.val & 0x1F} + gen rotrwi %a, %2, {C, %1.val & 0x1F} yields %a with /* anything */ leaving @@ -1761,11 +1758,11 @@ PATTERNS proc bxx example beq with REG CONST2 STACK gen - cmpwi %1, {CONST, %2.val} + cmpwi %1, %2 bxx[2] {LABEL, $1} with CONST2 REG STACK gen - cmpwi %2, {CONST, %1.val} + cmpwi %2, %1 bxx[1] {LABEL, $1} with REG REG STACK gen @@ -1783,11 +1780,11 @@ PATTERNS proc cmu4zxx example cmu zeq with REG CONST2 STACK gen - cmplwi %1, {CONST, %2.val} + cmplwi %1, %2 bxx[2] {LABEL, $2} with CONST2 REG STACK gen - cmplwi %2, {CONST, %1.val} + cmplwi %2, %1 bxx[1] {LABEL, $2} with REG REG STACK gen @@ -1816,29 +1813,29 @@ PATTERNS pat cmi $1==INT32 /* Signed tristate compare */ with REG CONST2 uses reusing %1, REG={COND_RC, %1, %2.val} - gen rlwinm %a, %a, {CONST, 1}, {CONST, 31}, {CONST, 0} + gen rlwinm %a, %a, {C, 1}, {C, 31}, {C, 0} yields %a with CONST2 REG uses reusing %2, REG={COND_RC, %2, %1.val} - gen extlwi %a, %a, {CONST, 2}, {CONST, 0} + gen extlwi %a, %a, {C, 2}, {C, 0} yields %a with REG REG uses reusing %1, REG={COND_RR, %2, %1} - gen extlwi %a, %a, {CONST, 2}, {CONST, 0} + gen extlwi %a, %a, {C, 2}, {C, 0} yields %a pat cmu $1==INT32 /* Unsigned tristate compare */ with REG UCONST2 uses reusing %1, REG={CONDL_RC, %1, %2.val} - gen rlwinm %a, %a, {CONST, 1}, {CONST, 31}, {CONST, 0} + gen rlwinm %a, %a, {C, 1}, {C, 31}, {C, 0} yields %a with UCONST2 REG uses reusing %2, REG={CONDL_RC, %2, %1.val} - gen extlwi %a, %a, {CONST, 2}, {CONST, 0} + gen extlwi %a, %a, {C, 2}, {C, 0} yields %a with REG REG uses reusing %1, REG={CONDL_RR, %2, %1} - gen extlwi %a, %a, {CONST, 2}, {CONST, 0} + gen extlwi %a, %a, {C, 2}, {C, 0} yields %a pat cmp /* Compare pointers */ @@ -1921,7 +1918,7 @@ PATTERNS mtspr lr, r0 lwz r0, {IND_RC_W, fp, 0} /* Free our stack frame. */ - addi sp, fp, {CONST, 8} + addi sp, fp, {C, 8} mr fp, r0 blr. @@ -1949,10 +1946,10 @@ PATTERNS /* ( src%3 dst%2 len%1 -- ) */ uses reusing %1, REG, REG, REG gen - srwi %a, %1, {CONST, 2} + srwi %a, %1, {C, 2} mtspr ctr, %a - addi %b, %3, {CONST, 0-4} - addi %c, %2, {CONST, 0-4} + addi %b, %3, {C, 0-4} + addi %c, %2, {C, 0-4} 1: lwzu %a, {IND_RC_W, %b, 4} stwu %a, {IND_RC_W, %c, 4} bdnz {LABEL, "1b"} @@ -2084,7 +2081,7 @@ PATTERNS /* nop */ with STACK gen - addi sp, sp, {CONST, 4} + addi sp, sp, {C, 4} pat ass $1==4 /* Adjust stack by variable amount */ with CONST2 STACK @@ -2110,9 +2107,9 @@ PATTERNS with REG kills ALL gen - cmpwi %1, {CONST, rom($1, 1)} + cmpwi %1, {C, rom($1, 1)} blt {LABEL, ".trap_erange"} - cmpwi %1, {CONST, rom($1, 2)} + cmpwi %1, {C, rom($1, 2)} bgt {LABEL, ".trap_erange"} yields %1 @@ -2176,7 +2173,7 @@ PATTERNS pat cmf $1==INT32 /* Compare single */ with FSREG FSREG uses REG={COND_FS, %2, %1} - gen extlwi %a, %a, {CONST, 2}, {CONST, 0} + gen extlwi %a, %a, {C, 2}, {C, 0} yields %a pat cmf teq $1==4 /* Single second == top */ @@ -2334,7 +2331,7 @@ PATTERNS pat cmf $1==INT64 /* Compare double */ with FREG FREG uses REG={COND_FD, %2, %1} - gen extlwi %a, %a, {CONST, 2}, {CONST, 0} + gen extlwi %a, %a, {C, 2}, {C, 0} yields %a pat cmf teq $1==8 /* Double second == top */ @@ -2402,7 +2399,7 @@ PATTERNS gen fctiwz %a, %1 stfdu %a, {IND_RC_D, sp, 0-8} - addi sp, sp, {CONST, 4} + addi sp, sp, {C, 4} /* Convert double to unsigned int */ pat loc loc cfu $1==8 && $2==4 From 48788287b80ad378bb638fd10f9ffcc68c256585 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Fri, 8 Dec 2017 17:19:26 -0500 Subject: [PATCH 06/55] Add more chances to put results in register variables. When a rule `uses REG ... yields %a`, the result %a is always a temporary, never a regvar. If the EM code uses _stl_ to put the result in a regvar, then ncg emits _mr_ to move %a to the regvar. There are two ways to put the result in the regvar without %a: 1. Yield a token, as in `yields {MUL_RR, %2, %1}`, so that _stl_ can move the token to the regvar without using %a. 2. Provide a pattern, like `sli stl`, that just puts the result in `{LOCAL, $2}` and not %a. Allow some tokens, like SUM_RIS and XEQ, onto the stack; and add tokens like MUL_RR, and patterns like `sli stl`. Delete patterns for `stl lol` and `sdl ldl` to avoid an extra temporary %a when the local is a regvar. Delete `lal sti lal loi` because it would emit wrong code. --- mach/powerpc/ncg/table | 476 +++++++++++++++++++---------------------- 1 file changed, 215 insertions(+), 261 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 251c83e71..5768c4382 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -122,13 +122,19 @@ TOKENS /* Expression partial results */ + SEX_B = { GPR reg; } 4. /* sign extension */ + SEX_H = { GPR reg; } 4. + SUM_RIS = { GPR reg; INT offhi; } 4. /* reg + (offhi << 16) */ SUM_RC = { GPR reg; INT off; } 4. /* reg + off */ SUM_RL = { GPR reg; ADDR adr; } 4. /* reg + lo16[adr] */ SUM_RR = { GPR reg1; GPR reg2; } 4. /* reg1 + reg2 */ - SEX_B = { GPR reg; } 4. - SEX_H = { GPR reg; } 4. + SUB_RR = { GPR reg1; GPR reg2; } 4. /* reg1 - reg2 */ + NEG_R = { GPR reg; } 4. /* -reg */ + MUL_RR = { GPR reg1; GPR reg2; } 4. /* reg1 * reg2 */ + DIV_RR = { GPR reg1; GPR reg2; } 4. /* reg1 / reg2 signed */ + DIV_RR_U = { GPR reg1; GPR reg2; } 4. /* reg1 / reg2 unsigned */ IND_RC_B = { GPR reg; INT off; } 4 off "(" reg ")". IND_RL_B = { GPR reg; ADDR adr; } 4 "lo16[" adr "](" reg ")". @@ -146,15 +152,21 @@ TOKENS IND_RL_D = { GPR reg; ADDR adr; } 8 "lo16[" adr "](" reg ")". IND_RR_D = { GPR reg1; GPR reg2; } 8. - NOT_R = { GPR reg; } 4. - - AND_RR = { GPR reg1; GPR reg2; } 4. - OR_RR = { GPR reg1; GPR reg2; } 4. - OR_RIS = { GPR reg; INT valhi; } 4. - OR_RC = { GPR reg; INT val; } 4. - XOR_RR = { GPR reg1; GPR reg2; } 4. - XOR_RIS = { GPR reg; INT valhi; } 4. - XOR_RC = { GPR reg; INT val; } 4. + NOT_R = { GPR reg; } 4. /* ~reg */ + AND_RIS = { GPR reg; INT valhi; } 4. + AND_RC = { GPR reg; INT val; } 4. + AND_RR = { GPR reg1; GPR reg2; } 4. + ANDC_RR = { GPR reg1; GPR reg2; } 4. /* reg1 & ~reg2 */ + OR_RIS = { GPR reg; INT valhi; } 4. + OR_RC = { GPR reg; INT val; } 4. + OR_RR = { GPR reg1; GPR reg2; } 4. + ORC_RR = { GPR reg1; GPR reg2; } 4. /* reg1 | ~reg2 */ + XOR_RIS = { GPR reg; INT valhi; } 4. + XOR_RC = { GPR reg; INT val; } 4. + XOR_RR = { GPR reg1; GPR reg2; } 4. + NAND_RR = { GPR reg1; GPR reg2; } 4. /* ~(reg1 & reg2) */ + NOR_RR = { GPR reg1; GPR reg2; } 4. /* ~(reg1 | reg2) */ + EQV_RR = { GPR reg1; GPR reg2; } 4. /* ~(reg1 ^ reg2) */ COND_RC = { GPR reg; INT val; } 4. COND_RR = { GPR reg1; GPR reg2; } 4. @@ -185,13 +197,6 @@ SETS CONST = C + CONST_STACK. - SUM_ALL = SUM_RC + SUM_RL + SUM_RR. - - SEX_ALL = SEX_B + SEX_H. - - LOGICAL_ALL = NOT_R + AND_RR + OR_RR + OR_RC + XOR_RR + - XOR_RC. - IND_ALL_B = IND_RC_B + IND_RL_B + IND_RR_B. IND_ALL_H = IND_RC_H + IND_RL_H + IND_RR_H + IND_RC_H_S + IND_RL_H_S + IND_RR_H_S. @@ -203,8 +208,14 @@ SETS MEMORY = IND_ALL_BHW + IND_ALL_D. /* any stack token that we can easily move to GPR */ - ANY_BHW = REG + CONST_STACK + SEX_ALL + - SUM_ALL + IND_ALL_BHW + LOGICAL_ALL. + ANY_BHW = REG + CONST_STACK + SEX_B + SEX_H + + SUM_RIS + SUM_RC + SUM_RL + SUM_RR + + SUB_RR + NEG_R + MUL_RR + DIV_RR + DIV_RR_U + + IND_ALL_BHW + + NOT_R + AND_RIS + AND_RC + AND_RR + ANDC_RR + + OR_RIS + OR_RC + OR_RR + ORC_RR + + XOR_RIS + XOR_RC + XOR_RR + NAND_RR + NOR_RR + EQV_RR + + XEQ + XNE + XGT + XGE + XLT + XLE. INSTRUCTIONS @@ -274,7 +285,7 @@ INSTRUCTIONS fmuls FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 5). fneg FREG+DLOCAL:wo, FREG:ro cost(4, 5). fneg FSREG+LOCAL:wo, FSREG:ro cost(4, 5). - frsp FSREG:wo, FREG:ro cost(4, 5). + frsp FSREG+LOCAL:wo, FREG:ro cost(4, 5). fsub FREG+DLOCAL:wo, FREG:ro, FREG:ro cost(4, 5). fsubs FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 5). lbz GPR:wo, IND_RC_B+IND_RL_B:ro cost(4, 3). @@ -292,13 +303,13 @@ INSTRUCTIONS lwzu GPR:wo, IND_RC_W:rw cost(4, 3). lwzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). lwz GPR+LOCAL:wo, IND_RC_W+IND_RL_W:ro cost(4, 3). + mfcr GPR:wo cost(4,2). + mfspr GPR:wo, SPR:ro cost(4, 3). + mtspr SPR:wo, GPR:ro cost(4, 2). + mullw GPR:wo, GPR:ro, GPR:ro cost(4, 4). nand GPR:wo, GPR:ro, GPR:ro. neg GPR:wo, GPR:ro. nor GPR:wo, GPR:ro, GPR:ro. - mfcr GPR:wo cost(4,2). - mullw GPR:wo, GPR:ro, GPR:ro cost(4, 4). - mfspr GPR:wo, SPR:ro cost(4, 3). - mtspr SPR:wo, GPR:ro cost(4, 2). or GPR:wo, GPR:ro, GPR:ro. mr GPR:wo, GPR:ro. orX "or." GPR:wo:cc, GPR:ro, GPR:ro. @@ -309,17 +320,17 @@ INSTRUCTIONS rlwinm GPR:wo, GPR:ro, CONST:ro, CONST:ro, CONST:ro. extlwi GPR:wo, GPR:ro, CONST:ro, CONST:ro. extrwi GPR:wo, GPR:ro, CONST:ro, CONST:ro. - rotlwi GPR:wo, GPR:ro, CONST:ro. - rotrwi GPR:wo, GPR:ro, CONST:ro. - slwi GPR:wo, GPR:ro, CONST:ro. - srwi GPR:wo, GPR:ro, CONST:ro. + rotlwi GPR+LOCAL:wo, GPR:ro, CONST:ro. + rotrwi GPR+LOCAL:wo, GPR:ro, CONST:ro. + slwi GPR+LOCAL:wo, GPR:ro, CONST:ro. + srwi GPR+LOCAL:wo, GPR:ro, CONST:ro. rlwnm GPR:wo, GPR:ro, GPR:ro, CONST:ro, CONST:ro. - rotlw GPR:wo, GPR:ro, GPR:ro. - slw GPR:wo, GPR:ro, GPR:ro. + rotlw GPR+LOCAL:wo, GPR:ro, GPR:ro. + slw GPR+LOCAL:wo, GPR:ro, GPR:ro. subf GPR:wo, GPR:ro, GPR:ro. - sraw GPR:wo, GPR:ro, GPR:ro cost(4, 2). - srawi GPR:wo, GPR:ro, CONST:ro cost(4, 2). - srw GPR:wo, GPR:ro, GPR:ro. + sraw GPR+LOCAL:wo, GPR:ro, GPR:ro cost(4, 2). + srawi GPR+LOCAL:wo, GPR:ro, CONST:ro cost(4, 2). + srw GPR+LOCAL:wo, GPR:ro, GPR:ro. stb GPR:ro, IND_RC_B+IND_RL_B:rw cost(4, 3). stbx GPR:ro, GPR:ro, GPR:ro cost(4, 3). stfd FPR:ro, IND_RC_D+IND_RL_D:rw cost(4, 4). @@ -403,6 +414,24 @@ MOVES from SUM_RR to GPR gen add %2, %1.reg1, %1.reg2 +/* Other arithmetic */ + + from SUB_RR to GPR + /* reg1 - reg2 -> subtract reg2 from reg1 */ + gen subf %2, %1.reg2, %1.reg1 + + from NEG_R to GPR + gen neg %2, %1.reg + + from MUL_RR to GPR + gen mullw %2, %1.reg1, %1.reg2 + + from DIV_RR to GPR + gen divw %2, %1.reg1, %1.reg2 + + from DIV_RR_U to GPR + gen divwu %2, %1.reg1, %1.reg2 + /* Read byte */ from IND_RC_B+IND_RL_B to GPR @@ -490,11 +519,17 @@ MOVES from NOT_R to GPR gen nor %2, %1.reg, %1.reg + from AND_RIS to GPR + gen andisX %2, %1.reg, {C, %1.valhi} + + from AND_RC to GPR + gen andiX %2, %1.reg, {C, %1.val} + from AND_RR to GPR gen and %2, %1.reg1, %1.reg2 - from OR_RR to GPR - gen or %2, %1.reg1, %1.reg2 + from ANDC_RR to GPR + gen andc %2, %1.reg1, %1.reg2 from OR_RIS to GPR gen oris %2, %1.reg, {C, %1.valhi} @@ -502,8 +537,11 @@ MOVES from OR_RC to GPR gen ori %2, %1.reg, {C, %1.val} - from XOR_RR to GPR - gen xor %2, %1.reg1, %1.reg2 + from OR_RR to GPR + gen or %2, %1.reg1, %1.reg2 + + from ORC_RR to GPR + gen orc %2, %1.reg1, %1.reg2 from XOR_RIS to GPR gen xoris %2, %1.reg, {C, %1.valhi} @@ -511,6 +549,18 @@ MOVES from XOR_RC to GPR gen xori %2, %1.reg, {C, %1.val} + from XOR_RR to GPR + gen xor %2, %1.reg1, %1.reg2 + + from NAND_RR to GPR + gen nand %2, %1.reg1, %1.reg2 + + from NOR_RR to GPR + gen nor %2, %1.reg1, %1.reg2 + + from EQV_RR to GPR + gen eqv %2, %1.reg1, %1.reg2 + /* Conditions */ /* Compare values, then copy cr0 to GPR. */ @@ -739,22 +789,6 @@ PATTERNS with REG REG yields %1 %2 - pat stl lol $1==$2 /* Store then load local */ - leaving - dup 4 - stl $1 - - pat sdl ldl $1==$2 /* Store then load double local */ - leaving - dup 8 - sdl $1 - - pat lal sti lal loi $1==$3 && $2==$4 /* Store then load local, of a different size */ - leaving - dup INT32 - lal $1 - sti $2 - pat ste loe $1==$2 /* Store then load external */ leaving dup 4 @@ -1166,11 +1200,9 @@ PATTERNS with REG CONST2 yields {SUM_RC, %1, %2.val} with CONST_HZ REG - uses reusing %2, REG={SUM_RIS, %2, his(%1.val)} - yields %a + yields {SUM_RIS, %2, his(%1.val)} with REG CONST_HZ - uses reusing %1, REG={SUM_RIS, %1, his(%2.val)} - yields %a + yields {SUM_RIS, %1, his(%2.val)} with CONST_STACK-CONST2-CONST_HZ REG uses reusing %2, REG={SUM_RIS, %2, his(%1.val)} yields {SUM_RC, %a, los(%1.val)} @@ -1181,100 +1213,63 @@ PATTERNS pat sbi $1==4 /* Subtract word (second - top) */ with REG REG uses reusing %2, REG - gen - subf %a, %1, %2 - yields %a + yields {SUB_RR, %2, %1} with CONST2_WHEN_NEG REG yields {SUM_RC, %2, 0-%1.val} with CONST_HZ REG - uses reusing %2, REG={SUM_RIS, %2, his(0-%1.val)} - yields %a + yields {SUM_RIS, %2, his(0-%1.val)} with CONST_STACK-CONST2_WHEN_NEG-CONST_HZ REG uses reusing %2, REG={SUM_RIS, %2, his(0-%1.val)} yields {SUM_RC, %a, los(0-%1.val)} pat ngi $1==4 /* Negate word */ with REG - uses reusing %1, REG - gen - neg %a, %1 - yields %a + yields {NEG_R, %1} pat mli $1==4 /* Multiply word (second * top) */ with REG REG - uses reusing %2, REG - gen - mullw %a, %2, %1 - yields %a + yields {MUL_RR, %2, %1} pat dvi $1==4 /* Divide word (second / top) */ with REG REG - uses reusing %2, REG - gen - divw %a, %2, %1 - yields %a + yields {DIV_RR, %2, %1} - pat dvu $1==4 /* Divide unsigned word (second / top) */ + pat dvu $1==4 /* Divide unsigned word (second / top) */ with REG REG - uses reusing %2, REG - gen - divwu %a, %2, %1 - yields %a + yields {DIV_RR_U, %2, %1} + + /* To calculate a remainder: a % b = a - (a / b * b) */ pat rmi $1==4 /* Remainder word (second % top) */ with REG REG - uses REG - gen - divw %a, %2, %1 - mullw %a, %a, %1 - subf %a, %a, %2 - yields %a + uses REG={DIV_RR, %2, %1}, REG + gen move {MUL_RR, %a, %1}, %b + yields {SUB_RR, %2, %b} - pat rmu $1==4 /* Remainder unsigned word (second % top) */ + pat rmu $1==4 /* Remainder unsigned word (second % top) */ with REG REG - uses REG - gen - divwu %a, %2, %1 - mullw %a, %a, %1 - subf %a, %a, %2 - yields %a + uses REG={DIV_RR_U, %2, %1}, REG + gen move {MUL_RR, %a, %1}, %b + yields {SUB_RR, %2, %b} /* Bitwise logic */ pat and $1==4 /* AND word */ with REG NOT_R - uses reusing %1, REG - gen - andc %a, %1, %2.reg - yields %a + yields {ANDC_RR, %1, %2.reg} with NOT_R REG - uses reusing %1, REG - gen - andc %a, %2, %1.reg - yields %a + yields {ANDC_RR, %2, %1.reg} with REG REG yields {AND_RR, %1, %2} with REG UCONST2 - uses reusing %1, REG - gen - andiX %a, %1, %2 - yields %a + yields {AND_RC, %1, %2.val} with UCONST2 REG - uses reusing %2, REG - gen - andiX %a, %2, %1 - yields %a + yields {AND_RC, %2, %1.val} with REG CONST_HZ - uses reusing %1, REG - gen - andisX %a, %1, {C, hi(%2.val)} - yields %a + yields {AND_RIS, %1, hi(%2.val)} with CONST_HZ REG - uses reusing %2, REG - gen - andisX %a, %2, {C, hi(%1.val)} - yields %a + yields {AND_RIS, %2, hi(%1.val)} pat and defined($1) /* AND set */ leaving @@ -1287,15 +1282,9 @@ PATTERNS pat ior $1==4 /* OR word */ with REG NOT_R - uses reusing %1, REG - gen - orc %a, %1, %2.reg - yields %a + yields {ORC_RR, %1, %2.reg} with NOT_R REG - uses reusing %2, REG - gen - orc %a, %2, %1.reg - yields %a + yields {ORC_RR, %2, %1.reg} with REG REG yields {OR_RR, %1, %2} with REG UCONST2 @@ -1303,11 +1292,9 @@ PATTERNS with UCONST2 REG yields {OR_RC, %2, %1.val} with REG CONST_HZ - uses reusing %1, REG={OR_RIS, %1, hi(%2.val)} - yields %a + yields {OR_RIS, %1, hi(%2.val)} with CONST_HZ REG - uses reusing %2, REG={OR_RIS, %2, hi(%1.val)} - yields %a + yields {OR_RIS, %2, hi(%1.val)} with REG CONST_STACK-UCONST2-CONST_HZ uses reusing %1, REG={OR_RIS, %1, hi(%2.val)} yields {OR_RC, %1, lo(%2.val)} @@ -1333,11 +1320,9 @@ PATTERNS with UCONST2 REG yields {XOR_RC, %2, %1.val} with REG CONST_HZ - uses reusing %1, REG={XOR_RIS, %1, hi(%2.val)} - yields %a + yields {XOR_RIS, %1, hi(%2.val)} with CONST_HZ REG - uses reusing %2, REG={XOR_RIS, %2, hi(%1.val)} - yields %a + yields {XOR_RIS, %2, hi(%1.val)} with REG CONST_STACK-UCONST2-CONST_HZ uses reusing %1, REG={XOR_RIS, %1, hi(%2.val)} yields {XOR_RC, %1, lo(%2.val)} @@ -1355,21 +1340,12 @@ PATTERNS cal ".xor" pat com $1==INT32 /* NOT word */ - with AND_RR - uses REG - gen - nand %a, %1.reg1, %1.reg2 - yields %a - with OR_RR - uses REG - gen - nor %a, %1.reg1, %1.reg2 - yields %a - with XOR_RR - uses REG - gen - eqv %a, %1.reg1, %1.reg2 - yields %a + with exact AND_RR + yields {NAND_RR, %1.reg1, %1.reg2} + with exact OR_RR + yields {NOR_RR, %1.reg1, %1.reg2} + with exact XOR_RR + yields {EQV_RR, %1.reg1, %1.reg2} with REG yields {NOT_R, %1} @@ -1403,8 +1379,13 @@ PATTERNS uses reusing %2, REG gen slw %a, %2, %1 yields %a + pat sli stl $1==4 && inreg($2)==reg_any + with CONST_STACK REG + gen slwi {LOCAL, $2}, %2, {C, %1.val & 0x1F} + with REG REG + gen slw {LOCAL, $2}, %2, %1 - pat sri $1==4 /* Shift right signed (second >> top) */ + pat sri $1==4 /* Shift right signed (second >> top) */ with CONST_STACK REG uses reusing %2, REG gen srawi %a, %2, {C, %1.val & 0x1F} @@ -1413,8 +1394,13 @@ PATTERNS uses reusing %2, REG gen sraw %a, %2, %1 yields %a + pat sri stl $1==4 && inreg($2)==reg_any + with CONST_STACK REG + gen srawi {LOCAL, $2}, %2, {C, %1.val & 0x1F} + with REG REG + gen sraw {LOCAL, $2}, %2, %1 - pat sru $1==4 /* Shift right unsigned (second >> top) */ + pat sru $1==4 /* Shift right unsigned (second >> top) */ with CONST_STACK REG uses reusing %2, REG gen srwi %a, %2, {C, %1.val & 0x1F} @@ -1423,6 +1409,11 @@ PATTERNS uses reusing %2, REG gen srw %a, %2, %1 yields %a + pat sru stl $1==4 && inreg($2)==reg_any + with CONST_STACK REG + gen srwi {LOCAL, $2}, %2, {C, %1.val & 0x1F} + with REG REG + gen srw {LOCAL, $2}, %2, %1 pat rol $1==4 /* Rotate left word */ with CONST_STACK REG @@ -1433,6 +1424,11 @@ PATTERNS uses reusing %2, REG gen rotlw %a, %2, %1 yields %a + pat rol stl $1==4 && inreg($2)==reg_any + with CONST_STACK REG + gen rotlwi {LOCAL, $2}, %2, {C, %1.val & 0x1F} + with REG REG + gen rotlw {LOCAL, $2}, %2, %1 /* * ror 4 -> ngi 4, rol 4 @@ -1450,6 +1446,14 @@ PATTERNS leaving ngi 4 rol 4 + pat ror stl $1==4 && inreg($2)==reg_any + with CONST_STACK REG + gen rotrwi {LOCAL, $2}, %2, {C, %1.val & 0x1F} + with /* anything */ + leaving + ngi 4 + rol 4 + stl $2 /* Arrays */ @@ -1517,8 +1521,7 @@ PATTERNS gen test %1 mfcr %a - move {XEQ, %a}, %a - yields %a + yields {XEQ, %a} pat tne /* top = (top != 0) */ with REG @@ -1526,8 +1529,7 @@ PATTERNS gen test %1 mfcr %a - move {XNE, %a}, %a - yields %a + yields {XNE, %a} pat tlt /* top = (top < 0) */ with REG @@ -1535,8 +1537,7 @@ PATTERNS gen test %1 mfcr %a - move {XLT, %a}, %a - yields %a + yields {XLT, %a} pat tle /* top = (top <= 0) */ with REG @@ -1544,8 +1545,7 @@ PATTERNS gen test %1 mfcr %a - move {XLE, %a}, %a - yields %a + yields {XLE, %a} pat tgt /* top = (top > 0) */ with REG @@ -1553,8 +1553,7 @@ PATTERNS gen test %1 mfcr %a - move {XGT, %a}, %a - yields %a + yields {XGT, %a} pat tge /* top = (top >= 0) */ with REG @@ -1562,176 +1561,139 @@ PATTERNS gen test %1 mfcr %a - move {XGE, %a}, %a - yields %a + yields {XGE, %a} pat cmi teq $1==4 /* Signed second == top */ with REG CONST2 uses reusing %1, REG={COND_RC, %1, %2.val} - gen move {XEQ, %a}, %a - yields %a + yields {XEQ, %a} with CONST2 REG uses reusing %1, REG={COND_RC, %2, %1.val} - gen move {XEQ, %a}, %a - yields %a + yields {XEQ, %a} with REG REG uses reusing %1, REG={COND_RR, %2, %1} - gen move {XEQ, %a}, %a - yields %a + yields {XEQ, %a} pat cmi tne $1==4 /* Signed second != top */ with REG CONST2 uses reusing %1, REG={COND_RC, %1, %2.val} - gen move {XNE, %a}, %a - yields %a + yields {XNE, %a} with CONST2 REG uses reusing %1, REG={COND_RC, %2, %1.val} - gen move {XNE, %a}, %a - yields %a + yields {XNE, %a} with REG REG uses reusing %1, REG={COND_RR, %2, %1} - gen move {XNE, %a}, %a - yields %a + yields {XNE, %a} pat cmi tgt $1==4 /* Signed second > top */ with REG CONST2 uses reusing %1, REG={COND_RC, %1, %2.val} - gen move {XLT, %a}, %a - yields %a + yields {XLT, %a} with CONST2 REG uses reusing %1, REG={COND_RC, %2, %1.val} - gen move {XGT, %a}, %a - yields %a + yields {XGT, %a} with REG REG uses reusing %1, REG={COND_RR, %2, %1} - gen move {XGT, %a}, %a - yields %a + yields {XGT, %a} pat cmi tge $1==4 /* Signed second >= top */ with REG CONST2 uses reusing %1, REG={COND_RC, %1, %2.val} - gen move {XLE, %a}, %a - yields %a + yields {XLE, %a} with CONST2 REG uses reusing %1, REG={COND_RC, %2, %1.val} - gen move {XGE, %a}, %a - yields %a + yields {XGE, %a} with REG REG uses reusing %1, REG={COND_RR, %2, %1} - gen move {XGE, %a}, %a - yields %a + yields {XGE, %a} pat cmi tlt $1==4 /* Signed second < top */ with REG CONST2 uses reusing %1, REG={COND_RC, %1, %2.val} - gen move {XGT, %a}, %a - yields %a + yields {XGT, %a} with CONST2 REG uses reusing %1, REG={COND_RC, %2, %1.val} - gen move {XLT, %a}, %a - yields %a + yields {XLT, %a} with REG REG uses reusing %1, REG={COND_RR, %2, %1} - gen move {XLT, %a}, %a - yields %a + yields {XLT, %a} pat cmi tle $1==4 /* Signed second <= top */ with REG CONST2 uses reusing %1, REG={COND_RC, %1, %2.val} - gen move {XGE, %a}, %a - yields %a + yields {XGE, %a} with CONST2 REG uses reusing %1, REG={COND_RC, %2, %1.val} - gen move {XLE, %a}, %a - yields %a + yields {XLE, %a} with REG REG uses reusing %1, REG={COND_RR, %2, %1} - gen move {XLE, %a}, %a - yields %a + yields {XLE, %a} pat cmu teq $1==4 /* Unsigned second == top */ with REG UCONST2 uses reusing %1, REG={CONDL_RC, %1, %2.val} - gen move {XEQ, %a}, %a - yields %a + yields {XEQ, %a} with UCONST2 REG uses reusing %1, REG={CONDL_RC, %2, %1.val} - gen move {XEQ, %a}, %a - yields %a + yields {XEQ, %a} with REG REG uses reusing %1, REG={CONDL_RR, %2, %1} - gen move {XEQ, %a}, %a - yields %a + yields {XEQ, %a} pat cmu tne $1==4 /* Unsigned second != top */ with REG UCONST2 uses reusing %1, REG={CONDL_RC, %1, %2.val} - gen move {XNE, %a}, %a - yields %a + yields {XNE, %a} with UCONST2 REG uses reusing %1, REG={CONDL_RC, %2, %1.val} - gen move {XNE, %a}, %a - yields %a + yields {XNE, %a} with REG REG uses reusing %1, REG={CONDL_RR, %2, %1} - gen move {XNE, %a}, %a - yields %a + yields {XNE, %a} pat cmu tgt $1==4 /* Unsigned second > top */ with REG UCONST2 uses reusing %1, REG={CONDL_RC, %1, %2.val} - gen move {XLT, %a}, %a - yields %a + yields {XLT, %a} with UCONST2 REG uses reusing %1, REG={CONDL_RC, %2, %1.val} - gen move {XGT, %a}, %a - yields %a + yields {XGT, %a} with REG REG uses reusing %1, REG={CONDL_RR, %2, %1} - gen move {XGT, %a}, %a - yields %a + yields {XGT, %a} pat cmu tge $1==4 /* Unsigned second >= top */ with REG UCONST2 uses reusing %1, REG={CONDL_RC, %1, %2.val} - gen move {XLE, %a}, %a - yields %a + yields {XLE, %a} with UCONST2 REG uses reusing %1, REG={CONDL_RC, %2, %1.val} - gen move {XGE, %a}, %a - yields %a + yields {XGE, %a} with REG REG uses reusing %1, REG={CONDL_RR, %2, %1} - gen move {XGE, %a}, %a - yields %a + yields {XGE, %a} pat cmu tlt $1==4 /* Unsigned second < top */ with REG UCONST2 uses reusing %1, REG={CONDL_RC, %1, %2.val} - gen move {XGT, %a}, %a - yields %a + yields {XGT, %a} with UCONST2 REG uses reusing %1, REG={CONDL_RC, %2, %1.val} - gen move {XLT, %a}, %a - yields %a + yields {XLT, %a} with REG REG uses reusing %1, REG={CONDL_RR, %2, %1} - gen move {XLT, %a}, %a - yields %a + yields {XLT, %a} pat cmu tle $1==4 /* Unsigned second <= top */ with REG UCONST2 uses reusing %1, REG={CONDL_RC, %1, %2.val} - gen move {XGE, %a}, %a - yields %a + yields {XGE, %a} with UCONST2 REG uses reusing %1, REG={CONDL_RC, %2, %1.val} - gen move {XLE, %a}, %a - yields %a + yields {XLE, %a} with REG REG uses reusing %1, REG={CONDL_RR, %2, %1} - gen move {XLE, %a}, %a - yields %a + yields {XLE, %a} /* Simple branches */ @@ -2179,38 +2141,32 @@ PATTERNS pat cmf teq $1==4 /* Single second == top */ with FSREG FSREG uses REG={COND_FS, %2, %1} - gen move {XEQ, %a}, %a - yields %a + yields {XEQ, %a} pat cmf tne $1==4 /* Single second == top */ with FSREG FSREG uses REG={COND_FS, %2, %1} - gen move {XNE, %a}, %a - yields %a + yields {XNE, %a} pat cmf tgt $1==4 /* Single second > top */ with FSREG FSREG uses REG={COND_FS, %2, %1} - gen move {XGT, %a}, %a - yields %a + yields {XGT, %a} pat cmf tge $1==4 /* Single second >= top */ with FSREG FSREG uses REG={COND_FS, %2, %1} - gen move {XGE, %a}, %a - yields %a + yields {XGE, %a} pat cmf tlt $1==4 /* Single second < top */ with FSREG FSREG uses REG={COND_FS, %2, %1} - gen move {XLT, %a}, %a - yields %a + yields {XLT, %a} pat cmf tle $1==4 /* Single second <= top */ with FSREG FSREG uses REG={COND_FS, %2, %1} - gen move {XLE, %a}, %a - yields %a + yields {XLE, %a} proc cmf4zxx example cmf zeq with FSREG FSREG STACK @@ -2337,38 +2293,32 @@ PATTERNS pat cmf teq $1==8 /* Double second == top */ with FREG FREG uses REG={COND_FD, %2, %1} - gen move {XEQ, %a}, %a - yields %a + yields {XEQ, %a} pat cmf tne $1==8 /* Single second == top */ with FREG FREG uses REG={COND_FD, %2, %1} - gen move {XNE, %a}, %a - yields %a + yields {XNE, %a} pat cmf tgt $1==8 /* Double second > top */ with FREG FREG uses REG={COND_FD, %2, %1} - gen move {XGT, %a}, %a - yields %a + yields {XGT, %a} pat cmf tge $1==8 /* Double second >= top */ with FREG FREG uses REG={COND_FD, %2, %1} - gen move {XGE, %a}, %a - yields %a + yields {XGE, %a} pat cmf tlt $1==8 /* Double second < top */ with FREG FREG uses REG={COND_FD, %2, %1} - gen move {XLT, %a}, %a - yields %a + yields {XLT, %a} pat cmf tle $1==8 /* Double second <= top */ with FREG FREG uses REG={COND_FD, %2, %1} - gen move {XLE, %a}, %a - yields %a + yields {XLE, %a} proc cmf8zxx example cmf zeq with FREG FREG STACK @@ -2385,11 +2335,15 @@ PATTERNS pat cmf zlt $1==8 call cmf8zxx("blt") pat cmf zle $1==8 call cmf8zxx("ble") - pat loc loc cff $1==INT64 && $2==INT32 /* Convert double to single */ + /* Convert double to single */ + /* reg_float pattern must be first, or it goes unused! */ + pat loc loc cff stl $1==8 && $2==4 && inreg($4)==reg_float + with FREG + gen frsp {LOCAL, $4}, %1 + pat loc loc cff $1==8 && $2==4 with FREG uses reusing %1, FSREG - gen - frsp %a, %1 + gen frsp %a, %1 yields %a /* Convert double to signed int */ From 504d2aa34e606b6a51643b2fcdd49312d8ece1d0 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Sat, 9 Dec 2017 17:21:06 -0500 Subject: [PATCH 07/55] Revise stack shuffles and integer conversions in PowerPC ncg. Allow asp 4, exg 4 to shuffle tokens without coercing them into registers; but comment why dup 4, dup 8 coerce tokens into registers. Allow dup, dus, exg with larger sizes; and add tests dup_e.e and exg_e.e to check that dup 20, dus, exg 20 work as well in powerpc as in i80 and i86. Then powerpc failed to compile loc 2 loc 4 cuu in dup_e.e. Revise the integer conversions, so powerpc can compile and pass the test. --- mach/powerpc/libem/build.lua | 2 +- mach/powerpc/libem/exg.s | 22 ++++++ mach/powerpc/ncg/table | 134 +++++++++++++++++++------------- tests/plat/build.lua | 2 + tests/plat/dup_e.e | 145 +++++++++++++++++++++++++++++++++++ tests/plat/exg_e.e | 86 +++++++++++++++++++++ 6 files changed, 338 insertions(+), 53 deletions(-) create mode 100644 mach/powerpc/libem/exg.s create mode 100644 tests/plat/dup_e.e create mode 100644 tests/plat/exg_e.e diff --git a/mach/powerpc/libem/build.lua b/mach/powerpc/libem/build.lua index 16a03147e..cb5efd281 100644 --- a/mach/powerpc/libem/build.lua +++ b/mach/powerpc/libem/build.lua @@ -6,7 +6,7 @@ for _, plat in ipairs(vars.plats) do acklibrary { name = "lib_"..plat, srcs = { - "./*.s", -- rm ret.s + "./*.s", -- exg.s }, vars = { plat = plat }, deps = { diff --git a/mach/powerpc/libem/exg.s b/mach/powerpc/libem/exg.s new file mode 100644 index 000000000..eb631b697 --- /dev/null +++ b/mach/powerpc/libem/exg.s @@ -0,0 +1,22 @@ +.sect .text + +! Exchange top two values on stack. +! Stack: ( a b size -- b a ) + +.define .exg +.exg: + lwz r3, 0(sp) ! r3 = size + srwi r7, r3, 2 + mtspr ctr, r7 ! ctr = size / 4 + mr r4, sp ! r4 = pointer before value b + add r5, r4, r3 ! r5 = pointer before value a + + ! Loop to swap each pair of words. +1: lwzu r6, 4(r4) + lwzu r7, 4(r5) + stw r6, 0(r5) + stw r7, 0(r4) + bdnz 1b ! loop ctr times + + addi sp, sp, 4 ! drop size from stack + blr diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 5768c4382..90fd9448d 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -217,6 +217,9 @@ SETS XOR_RIS + XOR_RC + XOR_RR + NAND_RR + NOR_RR + EQV_RR + XEQ + XNE + XGT + XGE + XLT + XLE. + /* any register or token of each size */ + ANY4 = ANY_BHW + FSREG. + ANY8 = IND_ALL_D + FREG. INSTRUCTIONS @@ -756,7 +759,7 @@ COERCIONS PATTERNS -/* Intrinsics */ +/* Constants */ pat loc $1==(0-0x8000) /* Load constant */ yields {CONST_N8000, $1} @@ -773,22 +776,78 @@ PATTERNS pat loc yields {CONST_HL, $1} - pat dup $1==INT32 /* Duplicate word on top of stack */ - with REG - yields %1 %1 - with FSREG + +/* Stack shuffles */ + + /* The peephole optimizer does: loc $1 ass 4 -> asp $1 + * To optimize multiplication, it uses: dup 8 asp 4 + */ + + pat asp $1==4 /* Adjust stack by constant */ + with exact ANY4 + /* drop %1 */ + with STACK + gen addi sp, sp, {C, 4} + pat asp smalls($1) + with STACK + gen addi sp, sp, {C, $1} + pat asp lo($1)==0 + with STACK + gen addi sp, sp, {C, hi($1)} + pat asp + with STACK + gen + addis sp, sp, {C, his($1)} + addi sp, sp, {C, los($1)} + + pat ass $1==4 /* Adjust stack by variable */ + with REG STACK + gen add sp, sp, %1 + + /* To duplicate a token, we coerce the token into a register, + * then duplicate the register. This decreases code size. + */ + + pat dup $1==4 /* Duplicate word on top of stack */ + with REG+FSREG yields %1 %1 - pat dup $1==INT64 /* Duplicate double-word on top of stack */ - with REG REG + pat dup $1==8 /* Duplicate double-word */ + with REG+FSREG REG+FSREG yields %2 %1 %2 %1 with FREG yields %1 %1 - pat exg $1==INT32 /* Exchange top two words on stack */ - with REG REG + pat dup /* Duplicate other size */ + leaving + loc $1 + dus 4 + + pat dus $1==4 /* Duplicate variable size */ + with REG STACK + /* ( a size%1 -- a a ) */ + uses REG, REG + gen + srwi %a, %1, {C, 2} + mtspr ctr, %a + add %b, sp, %1 + 1: lwzu %a, {IND_RC_W, %b, 0-4} + stwu %a, {IND_RC_W, sp, 0-4} + bdnz {LABEL, "1b"} + + pat exg $1==4 /* Exchange top two words */ + with ANY4 ANY4 yields %1 %2 + pat exg defined($1) /* Exchange other size */ + leaving + loc $1 + cal ".exg" + + pat exg !defined($1) + leaving + cal ".exg" + pat ste loe $1==$2 /* Store then load external */ leaving dup 4 @@ -797,32 +856,30 @@ PATTERNS /* Type conversions */ - pat loc loc ciu /* signed X -> unsigned X */ + pat loc loc ciu /* signed -> unsigned */ leaving loc $1 loc $2 cuu - pat loc loc cuu $1==$2 /* unsigned X -> unsigned X */ + pat loc loc cui /* unsigned -> signed */ + leaving + loc $1 + loc $2 + cuu + + pat loc loc cuu $1<=4 && $2<=4 /* unsigned -> unsigned */ /* nop */ - pat loc loc cii $1==$2 /* signed X -> signed X */ - /* nop */ + pat loc loc cii $1<=4 && $2<=$1 + /* signed -> signed of smaller or same size, + * no sign extension */ - pat loc loc cui $1==$2 /* unsigned X -> signed X */ - /* nop */ - - pat loc loc cui $1==INT8 && $2==INT32 /* unsigned char -> signed int */ - /* nop */ - - pat loc loc cui $1==INT16 && $2==INT32 /* unsigned short -> signed int */ - /* nop */ - - pat loc loc cii $1==INT8 && $2==INT32 /* signed char -> signed int */ + pat loc loc cii $1==1 && $2<=4 /* sign-extend char */ with REG yields {SEX_B, %1} - pat loc loc cii $1==2 && $2==4 /* signed char -> signed short */ + pat loc loc cii $1==2 && $2<=4 /* sign-extend short */ with REG yields {SEX_H, %1} @@ -1362,7 +1419,7 @@ PATTERNS leaving loc 0 - pat zer defined($1) /* Create empty set */ + pat zer defined($1) /* Create empty set */ leaving loc $1 cal ".zer" @@ -2038,33 +2095,6 @@ PATTERNS gen move %1, sp - pat loc ass $1==4 && $2==4 /* Drop 4 bytes from stack */ - with exact REG - /* nop */ - with STACK - gen - addi sp, sp, {C, 4} - - pat ass $1==4 /* Adjust stack by variable amount */ - with CONST2 STACK - gen - move {SUM_RC, sp, %1.val}, sp - with CONST_HZ STACK - gen - move {SUM_RC, sp, his(%1.val)}, sp - with CONST_STACK-CONST2-CONST_HZ STACK - gen - move {SUM_RC, sp, his(%1.val)}, sp - move {SUM_RC, sp, los(%1.val)}, sp - with REG STACK - gen - move {SUM_RR, sp, %1}, sp - - pat asp /* Adjust stack by constant amount */ - leaving - loc $1 - ass 4 - pat lae rck $2==4 /* Range check */ with REG kills ALL diff --git a/tests/plat/build.lua b/tests/plat/build.lua index cbd39468e..609771ed1 100644 --- a/tests/plat/build.lua +++ b/tests/plat/build.lua @@ -10,6 +10,8 @@ definerule("plat_testsuite", -- target names will resolve there. local testfiles = filenamesof( "tests/plat/*.c", + "tests/plat/dup_e.e", + "tests/plat/exg_e.e", "tests/plat/inn_e.e", "tests/plat/rotate_e.e", "tests/plat/*.p", diff --git a/tests/plat/dup_e.e b/tests/plat/dup_e.e new file mode 100644 index 000000000..600161be4 --- /dev/null +++ b/tests/plat/dup_e.e @@ -0,0 +1,145 @@ +# + mes 2, EM_WSIZE, EM_PSIZE + +/* + * Test _dup_ and _dus_ by loading 20 bytes from _src_, then making + * and checking some duplicates. + */ + + exa src + exa size +src + con 3593880729I4, 782166578I4, 4150666996I4, 2453272937I4, 3470523049I4 +size + con 20I2 + + exp $check + exp $_m_a_i_n + pro $_m_a_i_n, 0 + + /* Push 3 copies of src on stack. */ + lae src + loi 20 /* 1st copy */ + dup 20 /* 2nd copy */ + lae size + loi 2 + loc 2 + loc EM_WSIZE + cuu + dus EM_WSIZE /* 3rd copy */ + + cal $check + cal $finished + end /* $_m_a_i_n */ + + pro $check, 4 * EM_PSIZE + 2 * EM_WSIZE +#define p1 (-1 * EM_PSIZE) +#define p2 (-2 * EM_PSIZE) +#define p3 (-3 * EM_PSIZE) +#define p4 (-4 * EM_PSIZE) +#define b (p4 - 1 * EM_WSIZE) +#define i (p4 - 2 * EM_WSIZE) + + /* Set pointers to all 4 copies. */ + lae src + lal p4 + sti EM_PSIZE /* p4 = src */ + lal 0 + lal p3 + sti EM_PSIZE /* p3 = 3rd copy */ + lal 20 + lal p2 + sti EM_PSIZE /* p2 = 2nd copy */ + lal 40 + lal p1 + sti EM_PSIZE /* p1 = 1st copy */ + + /* Loop 20 times to verify each byte. */ + loc 0 + stl i +4 + lal p4 + loi EM_PSIZE + loi 1 + loc 1 + loc EM_WSIZE + cii + stl b /* b = byte from src */ + lol b + lal p3 + loi EM_PSIZE + loi 1 /* byte from 3rd copy */ + loc 1 + loc EM_WSIZE + cii + beq *3 + loc (3 * 256) + lol i + adi EM_WSIZE + loc EM_WSIZE + loc 4 + cuu + cal $fail + asp 4 +3 + lol b + lal p2 + loi EM_PSIZE + loi 1 /* byte from 2nd copy */ + loc 1 + loc EM_WSIZE + cii + beq *2 + loc (2 * 256) + lol i + adi EM_WSIZE + loc EM_WSIZE + loc 4 + cuu + cal $fail + asp 4 +2 + lol b + lal p1 + loi EM_PSIZE + loi 1 /* byte from 1st copy */ + loc 1 + loc EM_WSIZE + cii + beq *1 + loc (1 * 256) + lol i + adi EM_WSIZE + loc EM_WSIZE + loc 4 + cuu + cal $fail + asp 4 +1 + lal p4 + loi EM_PSIZE + adp 1 + lal p4 + sti EM_PSIZE /* increment p4 */ + lal p3 + loi EM_PSIZE + adp 1 + lal p3 + sti EM_PSIZE /* increment p3 */ + lal p2 + loi EM_PSIZE + adp 1 + lal p2 + sti EM_PSIZE /* increment p2 */ + lal p1 + loi EM_PSIZE + adp 1 + lal p1 + sti EM_PSIZE /* increment p1 */ + inl i + lol i + loc 20 + blt *4 /* loop 20 times */ + + ret 0 + end /* $check */ diff --git a/tests/plat/exg_e.e b/tests/plat/exg_e.e new file mode 100644 index 000000000..3a1f06d3b --- /dev/null +++ b/tests/plat/exg_e.e @@ -0,0 +1,86 @@ +# + mes 2, EM_WSIZE, EM_PSIZE + +/* + * Test _exg_ by loading 40 bytes from _src_, then exchanging 20 and + * 20 bytes, and checking the result. + */ + + exa src +src + con 1539465570I4, 1344465418I4, 1317578918I4, 1163467696I4, 2645261331I4 + con 3981585269I4, 1433968975I4, 4256886989I4, 4114909542I4, 1817334375I4 + + exp $check + exp $_m_a_i_n + pro $_m_a_i_n, 0 + + lae src + loi 40 + exg 20 + cal $check + cal $finished + end /* $_m_a_i_n */ + + pro $check, 2 * EM_PSIZE + EM_WSIZE +#define p1 (-1 * EM_PSIZE) +#define p2 (-2 * EM_PSIZE) +#define i (p2 - EM_WSIZE) + + lae src + lal p2 + sti EM_PSIZE /* p2 = src */ + lal 0 + adp 20 + lal p1 + sti EM_PSIZE /* p1 = exchanged copy + 20 */ + + /* Loop 40 times to verify each byte. */ + loc 0 + stl i +1 + lal p2 + loi EM_PSIZE + loi 1 /* byte from src */ + loc 1 + loc EM_WSIZE + cii + lal p1 + loi EM_PSIZE + loi 1 /* byte from exchanged copy */ + loc 1 + loc EM_WSIZE + cii + beq *2 + lol i + loc EM_WSIZE + loc 4 + cuu + cal $fail + asp 4 +2 + lal p2 + loi EM_PSIZE + adp 1 + lal p2 + sti EM_PSIZE /* increment p2 */ + lal p1 + loi EM_PSIZE /* p1 */ + inl i + /* When i reaches 20, p1 would reach end of exchanged copy. */ + lol i + loc 20 + beq *3 + adp 1 /* p1 + 1 */ + bra *4 +3 + adp -39 /* p1 - 39, beginning of exchanged copy */ +4 + lal p1 + sti EM_PSIZE /* move p1 */ + lol i + loc 40 + blt *1 + + ret 0 + end /* $check */ \ No newline at end of file From 11a54e0a7c0e7f4947986fee96d3465ac68e7ab0 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Sun, 10 Dec 2017 14:01:14 -0500 Subject: [PATCH 08/55] These instructions write to the CR. --- mach/powerpc/ncg/table | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 90fd9448d..cc5d8a0c0 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -262,13 +262,13 @@ INSTRUCTIONS bclr CONST:ro, CONST:ro, CONST:ro. blr. bl LABEL:ro. - cmp CR:ro, CONST:ro, GPR:ro, GPR:ro kills :cc. + cmp CR:wo, CONST:ro, GPR:ro, GPR:ro kills :cc. cmpw GPR:ro, GPR:ro kills :cc. - cmpi CR:ro, CONST:ro, GPR:ro, CONST:ro kills :cc. + cmpi CR:wo, CONST:ro, GPR:ro, CONST:ro kills :cc. cmpwi GPR:ro, CONST:ro kills :cc. - cmpl CR:ro, CONST:ro, GPR:ro, GPR:ro kills :cc. + cmpl CR:wo, CONST:ro, GPR:ro, GPR:ro kills :cc. cmplw GPR:ro, GPR:ro kills :cc. - cmpli CR:ro, CONST:ro, GPR:ro, CONST:ro kills :cc. + cmpli CR:wo, CONST:ro, GPR:ro, CONST:ro kills :cc. cmplwi GPR:ro, CONST:ro kills :cc. divw GPR:wo, GPR:ro, GPR:ro cost(4, 23). divwu GPR:wo, GPR:ro, GPR:ro cost(4, 23). From 5ba83100d654303f448bc0041044cbd9dfa1d74c Mon Sep 17 00:00:00 2001 From: George Koehler Date: Tue, 12 Dec 2017 13:36:43 -0500 Subject: [PATCH 09/55] Delete rules for sti 8 with REG IND_RC_D, with REG IND_RR_D. Prefer the rule with REG FREG, by coercing IND_RC_D or IND_RR_D to FREG. This rule looks better to ncg. When ncg chose between coercion to REG IND_RC_D or coercion to REG FREG, it chose REG FREG. It only chose REG IND_RC_D if the stack had exact REG IND_RC_D. --- mach/powerpc/ncg/table | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index cc5d8a0c0..f5b3817cc 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -1174,26 +1174,6 @@ PATTERNS gen move %2, {IND_RC_W, %1, 0} move %3, {IND_RC_W, %1, 4} - /* - * Next 2 patterns exist because there is no coercion - * from IND_ALL_D to REG REG. - */ - with REG IND_RC_D - kills MEMORY - uses REG={SUM_RC, %2.reg, %2.off}, REG, REG - gen - move {IND_RC_W, %a, 0}, %b - move {IND_RC_W, %a, 4}, %c - move %b, {IND_RC_W, %1, 0} - move %c, {IND_RC_W, %1, 4} - with REG IND_RR_D - kills MEMORY - uses REG={SUM_RR, %2.reg1, %2.reg2}, REG, REG - gen - move {IND_RC_W, %a, 0}, %b - move {IND_RC_W, %a, 4}, %c - move %b, {IND_RC_W, %1, 0} - move %c, {IND_RC_W, %1, 4} pat sti /* Store arbitrary size */ leaving From b0d75fed37c49f61992310c0e62525d6305cbe13 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Thu, 14 Dec 2017 16:26:19 -0500 Subject: [PATCH 10/55] Rename ANY_BHW to INT_W; add FLOAT_W, FLOAT_D. INT_W, the integer set, continues to exclude FSREG, because we can't easily move FSREG to GPR. ANY4 becomes ISET+FLOAT_W and ANY8 becomes FLOAT_D. --- mach/powerpc/ncg/table | 49 +++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index f5b3817cc..ae1620cfd 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -65,7 +65,7 @@ REGISTERS f0 : FPR. f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13 - : FPR, FREG. + : FPR, FREG. f14, f15, f16, f17, f18, f19, f20, f21, f22, f23, f24, f25, f26, f27, f28, f29, f30, f31 @@ -202,24 +202,23 @@ SETS IND_RC_H_S + IND_RL_H_S + IND_RR_H_S. IND_ALL_W = IND_RC_W + IND_RL_W + IND_RR_W. IND_ALL_D = IND_RC_D + IND_RL_D + IND_RR_D. - IND_ALL_BHW = IND_ALL_B + IND_ALL_H + IND_ALL_W. /* anything killed by sti (store indirect) */ - MEMORY = IND_ALL_BHW + IND_ALL_D. + MEMORY = IND_ALL_B + IND_ALL_H + IND_ALL_W + IND_ALL_D. - /* any stack token that we can easily move to GPR */ - ANY_BHW = REG + CONST_STACK + SEX_B + SEX_H + + /* any integer from stack that we can easily move to GPR */ + INT_W = REG + CONST_STACK + SEX_B + SEX_H + SUM_RIS + SUM_RC + SUM_RL + SUM_RR + SUB_RR + NEG_R + MUL_RR + DIV_RR + DIV_RR_U + - IND_ALL_BHW + + IND_ALL_B + IND_ALL_H + IND_ALL_W + NOT_R + AND_RIS + AND_RC + AND_RR + ANDC_RR + OR_RIS + OR_RC + OR_RR + ORC_RR + XOR_RIS + XOR_RC + XOR_RR + NAND_RR + NOR_RR + EQV_RR + XEQ + XNE + XGT + XGE + XLT + XLE. - /* any register or token of each size */ - ANY4 = ANY_BHW + FSREG. - ANY8 = IND_ALL_D + FREG. + FLOAT_D = FREG + IND_ALL_D. + FLOAT_W = FSREG + IND_ALL_W. + INSTRUCTIONS @@ -634,13 +633,13 @@ MOVES our moves to GPR to set register variables. We define no moves to LOCAL, so we avoid confusion between GPR and FSREG in LOCAL. */ - from ANY_BHW to GPR_EXPR + from INT_W to GPR_EXPR gen move %1, %2.reg - from FPR+IND_ALL_D to FPR_EXPR + from FLOAT_D to FPR_EXPR gen move %1, %2.reg - from FSREG+IND_ALL_W to FSREG_EXPR + from FLOAT_W to FSREG_EXPR gen move %1, %2.reg @@ -664,15 +663,15 @@ STACKINGRULES COMMENT("stack REG") stwu %1, {IND_RC_W, sp, 0-4} - from ANY_BHW-REG to STACK + from INT_W-REG to STACK gen - COMMENT("stack ANY_BHW-REG") + COMMENT("stack INT_W-REG") move %1, RSCRATCH stwu RSCRATCH, {IND_RC_W, sp, 0-4} - from IND_ALL_D to STACK + from FLOAT_D-FREG to STACK gen - COMMENT("stack IND_ALL_D") + COMMENT("stack FLOAT_D-FREG") move %1, FSCRATCH stfdu FSCRATCH, {IND_RC_D, sp, 0-8} @@ -724,10 +723,10 @@ COERCIONS addi sp, sp, {C, 4} yields %a - from ANY_BHW + from INT_W uses REG gen - COMMENT("coerce ANY_BHW->REG") + COMMENT("coerce INT_W->REG") move %1, %a yields %a @@ -741,17 +740,17 @@ COERCIONS * but %1.off+4 might overflow a signed 16-bit integer. */ - from FREG+IND_ALL_D + from FLOAT_D uses FREG gen - COMMENT("coerce FREG+IND_ALL_D->FREG") + COMMENT("coerce FLOAT_D->FREG") move %1, %a yields %a - from FSREG+IND_ALL_W + from FLOAT_W uses FSREG gen - COMMENT("coerce FSREG+IND_ALL_W->FREG") + COMMENT("coerce FLOAT_W->FREG") move %1, %a yields %a @@ -784,7 +783,7 @@ PATTERNS */ pat asp $1==4 /* Adjust stack by constant */ - with exact ANY4 + with exact INT_W+FLOAT_W /* drop %1 */ with STACK gen addi sp, sp, {C, 4} @@ -836,7 +835,7 @@ PATTERNS bdnz {LABEL, "1b"} pat exg $1==4 /* Exchange top two words */ - with ANY4 ANY4 + with INT_W+FLOAT_W INT_W+FLOAT_W yields %1 %2 pat exg defined($1) /* Exchange other size */ @@ -911,7 +910,7 @@ PATTERNS /* Store word to local */ pat stl inreg($1)==reg_any - with exact ANY_BHW + with exact INT_W /* ncg fails to infer that regvar($1) is dead! */ kills regvar($1) gen move %1, {GPR_EXPR, regvar($1)} From d8fa9d1b2aa5e2387b0e12dc6babf4efa4344c54 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Sun, 17 Dec 2017 12:45:27 -0500 Subject: [PATCH 11/55] In coercions, try to reuse a register with the same token. This reduces code size. --- mach/powerpc/ncg/table | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index ae1620cfd..3611a1809 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -723,11 +723,13 @@ COERCIONS addi sp, sp, {C, 4} yields %a + /* "uses REG=%1" may find and reuse a register containing the + * same token. For contrast, "uses REG gen move %1, %a" would + * pick a different register before doing the move. + */ + from INT_W - uses REG - gen - COMMENT("coerce INT_W->REG") - move %1, %a + uses REG=%1 yields %a /* @@ -741,17 +743,11 @@ COERCIONS */ from FLOAT_D - uses FREG - gen - COMMENT("coerce FLOAT_D->FREG") - move %1, %a + uses FREG=%1 yields %a from FLOAT_W - uses FSREG - gen - COMMENT("coerce FLOAT_W->FREG") - move %1, %a + uses FSREG=%1 yields %a From 5e99baabdfd899a950bc184f745f3ed874744120 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Mon, 18 Dec 2017 12:36:10 -0500 Subject: [PATCH 12/55] Rename two tokens. CONST_HZ was not hertz (Hz). --- mach/powerpc/ncg/table | 43 +++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 3611a1809..86da2a895 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -117,8 +117,8 @@ TOKENS CONST_0000_7FFF = { INT val; } 4 val. CONST_8000 = { INT val; } 4 val. CONST_8001_FFFF = { INT val; } 4 val. - CONST_HZ = { INT val; } 4 val. - CONST_HL = { INT val; } 4 val. + CONST_HI_ZR = { INT val; } 4 val. + CONST_HI_LO = { INT val; } 4 val. /* Expression partial results */ @@ -193,7 +193,8 @@ SETS UCONST2 = CONST_0000_7FFF + CONST_8000 + CONST_8001_FFFF. /* any constant on stack */ CONST_STACK = CONST_N8000 + CONST_N7FFF_N0001 + CONST_0000_7FFF + - CONST_8000 + CONST_8001_FFFF + CONST_HZ + CONST_HL. + CONST_8000 + CONST_8001_FFFF + + CONST_HI_ZR + CONST_HI_LO. CONST = C + CONST_STACK. @@ -767,9 +768,9 @@ PATTERNS pat loc $1>=0x8001 && $1<=0xFFFF yields {CONST_8001_FFFF, $1} pat loc lo($1)==0 - yields {CONST_HZ, $1} + yields {CONST_HI_ZR, $1} pat loc - yields {CONST_HL, $1} + yields {CONST_HI_LO, $1} /* Stack shuffles */ @@ -1231,14 +1232,14 @@ PATTERNS yields {SUM_RC, %2, %1.val} with REG CONST2 yields {SUM_RC, %1, %2.val} - with CONST_HZ REG + with CONST_HI_ZR REG yields {SUM_RIS, %2, his(%1.val)} - with REG CONST_HZ + with REG CONST_HI_ZR yields {SUM_RIS, %1, his(%2.val)} - with CONST_STACK-CONST2-CONST_HZ REG + with CONST_STACK-CONST2-CONST_HI_ZR REG uses reusing %2, REG={SUM_RIS, %2, his(%1.val)} yields {SUM_RC, %a, los(%1.val)} - with REG CONST_STACK-CONST2-CONST_HZ + with REG CONST_STACK-CONST2-CONST_HI_ZR uses reusing %1, REG={SUM_RIS, %1, his(%2.val)} yields {SUM_RC, %a, los(%2.val)} @@ -1248,9 +1249,9 @@ PATTERNS yields {SUB_RR, %2, %1} with CONST2_WHEN_NEG REG yields {SUM_RC, %2, 0-%1.val} - with CONST_HZ REG + with CONST_HI_ZR REG yields {SUM_RIS, %2, his(0-%1.val)} - with CONST_STACK-CONST2_WHEN_NEG-CONST_HZ REG + with CONST_STACK-CONST2_WHEN_NEG-CONST_HI_ZR REG uses reusing %2, REG={SUM_RIS, %2, his(0-%1.val)} yields {SUM_RC, %a, los(0-%1.val)} @@ -1298,9 +1299,9 @@ PATTERNS yields {AND_RC, %1, %2.val} with UCONST2 REG yields {AND_RC, %2, %1.val} - with REG CONST_HZ + with REG CONST_HI_ZR yields {AND_RIS, %1, hi(%2.val)} - with CONST_HZ REG + with CONST_HI_ZR REG yields {AND_RIS, %2, hi(%1.val)} pat and defined($1) /* AND set */ @@ -1323,14 +1324,14 @@ PATTERNS yields {OR_RC, %1, %2.val} with UCONST2 REG yields {OR_RC, %2, %1.val} - with REG CONST_HZ + with REG CONST_HI_ZR yields {OR_RIS, %1, hi(%2.val)} - with CONST_HZ REG + with CONST_HI_ZR REG yields {OR_RIS, %2, hi(%1.val)} - with REG CONST_STACK-UCONST2-CONST_HZ + with REG CONST_STACK-UCONST2-CONST_HI_ZR uses reusing %1, REG={OR_RIS, %1, hi(%2.val)} yields {OR_RC, %1, lo(%2.val)} - with CONST_STACK-UCONST2-CONST_HZ REG + with CONST_STACK-UCONST2-CONST_HI_ZR REG uses reusing %2, REG={OR_RIS, %2, hi(%1.val)} yields {OR_RC, %2, lo(%1.val)} @@ -1351,14 +1352,14 @@ PATTERNS yields {XOR_RC, %1, %2.val} with UCONST2 REG yields {XOR_RC, %2, %1.val} - with REG CONST_HZ + with REG CONST_HI_ZR yields {XOR_RIS, %1, hi(%2.val)} - with CONST_HZ REG + with CONST_HI_ZR REG yields {XOR_RIS, %2, hi(%1.val)} - with REG CONST_STACK-UCONST2-CONST_HZ + with REG CONST_STACK-UCONST2-CONST_HI_ZR uses reusing %1, REG={XOR_RIS, %1, hi(%2.val)} yields {XOR_RC, %1, lo(%2.val)} - with CONST_STACK-UCONST2-CONST_HZ REG + with CONST_STACK-UCONST2-CONST_HI_ZR REG uses reusing %2, REG={XOR_RIS, %2, hi(%1.val)} yields {XOR_RC, %2, lo(%1.val)} From 24abaf6a25fb8449a918ad8caa626da835e95521 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Mon, 18 Dec 2017 20:39:56 -0500 Subject: [PATCH 13/55] Enable conditional expressions in splitting coercions. ncgg has parsed the optional conditional expression (optexpr) of each splitting coercion since commit 72b83cc in 1985; but for almost 33 years, ncg has ignored the expression in c2_expr. Few tables had conditional coercions (I only found them in arm and m68020), and no tables had conditional splitting coercions, so this only becomes a problem now as I try to add a conditional splitting coercion to powerpc. --- mach/proto/ncg/subr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mach/proto/ncg/subr.c b/mach/proto/ncg/subr.c index 0feb54f30..0dc045973 100644 --- a/mach/proto/ncg/subr.c +++ b/mach/proto/ncg/subr.c @@ -518,7 +518,7 @@ int split(token_p tp, int *ip, int ply, int toplevel) { int tpl; for (cp=c2coercs;cp->c2_texpno>=0; cp++) { - if (!match(tp,&machsets[cp->c2_texpno],0)) + if (!match(tp,&machsets[cp->c2_texpno],cp->c2_expr)) continue; ok=1; for (i=0; ok && ic2_nsplit;i++) { From ad47fa5fe302c0ff217ce846363708e7cbf024a4 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Mon, 18 Dec 2017 20:59:04 -0500 Subject: [PATCH 14/55] Add splitting coercions for IND_ALL_D. Delete my wrong comment (from commits cfbc537, a8f62f4, 5432bd0) which claimed that such coercions are not possible. --- mach/powerpc/ncg/table | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 86da2a895..28ef4b40e 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -52,11 +52,9 @@ REGISTERS * r13, r14, ..., r31: GPR, REG regvar(reg_any). */ - r0, sp, fp : GPR. - r3 : GPR, REG, REG3. - - r4, r5, r6, r7, r8, r9, r10, r11, r12 - : GPR, REG. + r0, sp, fp, r12 : GPR. + r3 : GPR, REG, REG3. + r4, r5, r6, r7, r8, r9, r10, r11 : GPR, REG. r13, r14, r15, r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31 @@ -88,6 +86,10 @@ REGISTERS lr, ctr : SPR. cr0 : CR. + /* The stacking rules and the splitting coercions can't + * allocate registers. We use r12 in the splitting coercions, + * and these scratch registers in the stacking rules. + */ #define RSCRATCH r0 #define FSCRATCH f0 @@ -697,7 +699,6 @@ STACKINGRULES gen bug {LABEL, "STACKING DLOCAL"} - COERCIONS from STACK @@ -733,16 +734,6 @@ COERCIONS uses REG=%1 yields %a - /* - * There is no coercion from IND_ALL_D to REG REG, because - * coercions can't allocate registers for intermediate values. - * - * A coercion to split IND_RC_D into two IND_RC_W, without - * allocating an intermediate register, would yield - * {IND_RC_W, %1.val, %1.off+4} - * but %1.off+4 might overflow a signed 16-bit integer. - */ - from FLOAT_D uses FREG=%1 yields %a @@ -751,6 +742,23 @@ COERCIONS uses FSREG=%1 yields %a + /* Splitting coercions can't allocate registers. + * PowerPC can't add r0 + constant. Use r12. + */ + + from IND_RC_D %off<=0x7FFA + yields + {IND_RC_W, %1.reg, %1.off+4} + {IND_RC_W, %1.reg, %1.off} + + from IND_RC_D + /* Don't move to %1.reg; it might be a regvar. */ + gen move {SUM_RC, %1.reg, %1.off}, r12 + yields {IND_RC_W, r12, 4} {IND_RC_W, r12, 0} + + from IND_RR_D + gen move {SUM_RR, %1.reg1, %1.reg2}, r12 + yields {IND_RC_W, r12, 4} {IND_RC_W, r12, 0} PATTERNS From a4e6595032838a7260425f35f0dd045a1126a9af Mon Sep 17 00:00:00 2001 From: George Koehler Date: Mon, 18 Dec 2017 21:17:42 -0500 Subject: [PATCH 15/55] Remove '\0' from output. Fix a compiler warning. Don't output '\0' in "@@FINISHED\0". Cast code to unsigned int. This helps platforms with 16-bit int, by doing only the low 16 bits of the bitwise-and. It also removes the "(warning) conversion of long to pointer loses accuracy". --- tests/plat/lib/test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/plat/lib/test.c b/tests/plat/lib/test.c index 426f9944a..6df3ee7d5 100644 --- a/tests/plat/lib/test.c +++ b/tests/plat/lib/test.c @@ -5,7 +5,7 @@ void finished(void) { static const char s[] = "@@FINISHED\n"; - write(1, s, sizeof(s)); + write(1, s, sizeof(s)-1); _exit(0); } @@ -16,7 +16,7 @@ void writehex(uint32_t code) do { - *--p = "0123456789abcdef"[code & 0xf]; + *--p = "0123456789abcdef"[(unsigned int)code & 0xf]; code >>= 4; } while (code > 0); From a5e8dc8a069d373936732c09c58abb5f20840f53 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Mon, 18 Dec 2017 21:52:13 -0500 Subject: [PATCH 16/55] Simplify code by using cms EM_WSIZE to compare bytes. This should work because the C compiler does it. --- tests/plat/dup_e.e | 44 ++++++++++++++++++-------------------------- tests/plat/exg_e.e | 9 ++------- 2 files changed, 20 insertions(+), 33 deletions(-) diff --git a/tests/plat/dup_e.e b/tests/plat/dup_e.e index 600161be4..c0e0001b3 100644 --- a/tests/plat/dup_e.e +++ b/tests/plat/dup_e.e @@ -32,13 +32,12 @@ size cal $finished end /* $_m_a_i_n */ - pro $check, 4 * EM_PSIZE + 2 * EM_WSIZE + pro $check, 4 * EM_PSIZE + EM_WSIZE #define p1 (-1 * EM_PSIZE) #define p2 (-2 * EM_PSIZE) #define p3 (-3 * EM_PSIZE) #define p4 (-4 * EM_PSIZE) -#define b (p4 - 1 * EM_WSIZE) -#define i (p4 - 2 * EM_WSIZE) +#define i (p4 - EM_WSIZE) /* Set pointers to all 4 copies. */ lae src @@ -60,56 +59,49 @@ size 4 lal p4 loi EM_PSIZE - loi 1 - loc 1 - loc EM_WSIZE - cii - stl b /* b = byte from src */ - lol b + loi 1 /* byte from src */ lal p3 loi EM_PSIZE loi 1 /* byte from 3rd copy */ - loc 1 - loc EM_WSIZE - cii - beq *3 + cms EM_WSIZE + zeq *3 loc (3 * 256) lol i - adi EM_WSIZE + adi EM_WSIZE /* 0x300 + i */ loc EM_WSIZE loc 4 cuu cal $fail asp 4 3 - lol b + lal p4 + loi EM_PSIZE + loi 1 /* byte from src */ lal p2 loi EM_PSIZE loi 1 /* byte from 2nd copy */ - loc 1 - loc EM_WSIZE - cii - beq *2 + cms EM_WSIZE + zeq *2 loc (2 * 256) lol i - adi EM_WSIZE + adi EM_WSIZE /* 0x200 + i */ loc EM_WSIZE loc 4 cuu cal $fail asp 4 2 - lol b + lal p4 + loi EM_PSIZE + loi 1 /* byte from src */ lal p1 loi EM_PSIZE loi 1 /* byte from 1st copy */ - loc 1 - loc EM_WSIZE - cii - beq *1 + cms EM_WSIZE + zeq *1 loc (1 * 256) lol i - adi EM_WSIZE + adi EM_WSIZE /* 0x100 + i */ loc EM_WSIZE loc 4 cuu diff --git a/tests/plat/exg_e.e b/tests/plat/exg_e.e index 3a1f06d3b..617f07104 100644 --- a/tests/plat/exg_e.e +++ b/tests/plat/exg_e.e @@ -42,16 +42,11 @@ src lal p2 loi EM_PSIZE loi 1 /* byte from src */ - loc 1 - loc EM_WSIZE - cii lal p1 loi EM_PSIZE loi 1 /* byte from exchanged copy */ - loc 1 - loc EM_WSIZE - cii - beq *2 + cms EM_WSIZE + zeq *2 lol i loc EM_WSIZE loc 4 From aa9418c029c36c952c78e5243043e13001d01c14 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Mon, 18 Dec 2017 21:58:57 -0500 Subject: [PATCH 17/55] Pass 4 bytes to fail(uint32_t) This would become necessary if something failed on a platform with 16-bit int (EM_WSIZE == 2). Remove unreachable `ret 0`. If reached, it wouldn't work to return from _m_a_i_n. --- tests/plat/inn_e.e | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tests/plat/inn_e.e b/tests/plat/inn_e.e index a5aee02f5..543623b3f 100644 --- a/tests/plat/inn_e.e +++ b/tests/plat/inn_e.e @@ -14,6 +14,9 @@ zeq *1 loc __LINE__ + loc EM_WSIZE + loc 4 + cuu cal $fail ass EM_WSIZE 1 @@ -31,6 +34,9 @@ zne *2 loc __LINE__ + loc EM_WSIZE + loc 4 + cuu cal $fail ass EM_WSIZE 2 @@ -49,6 +55,9 @@ zeq *3 loc __LINE__ + loc EM_WSIZE + loc 4 + cuu cal $fail ass EM_WSIZE 3 @@ -67,11 +76,12 @@ zne *4 loc __LINE__ + loc EM_WSIZE + loc 4 + cuu cal $fail ass EM_WSIZE 4 cal $finished - ret 0 - end From 787fdeaaa9330e267dd49aa26818a4e1bd02eded Mon Sep 17 00:00:00 2001 From: George Koehler Date: Thu, 21 Dec 2017 17:44:03 -0500 Subject: [PATCH 18/55] Add some tests for Modula-2. --- tests/plat/build.lua | 7 +- tests/plat/m2/ConvTest_mod.mod | 36 +++++++++ tests/plat/m2/NestProc_mod.mod | 132 +++++++++++++++++++++++++++++++ tests/plat/m2/OpenArray_mod.mod | 59 ++++++++++++++ tests/plat/m2/Set100_mod.mod | 61 ++++++++++++++ tests/plat/m2/StringTest_mod.mod | 55 +++++++++++++ 6 files changed, 349 insertions(+), 1 deletion(-) create mode 100644 tests/plat/m2/ConvTest_mod.mod create mode 100644 tests/plat/m2/NestProc_mod.mod create mode 100644 tests/plat/m2/OpenArray_mod.mod create mode 100644 tests/plat/m2/Set100_mod.mod create mode 100644 tests/plat/m2/StringTest_mod.mod diff --git a/tests/plat/build.lua b/tests/plat/build.lua index 609771ed1..666af7d95 100644 --- a/tests/plat/build.lua +++ b/tests/plat/build.lua @@ -17,7 +17,12 @@ definerule("plat_testsuite", "tests/plat/*.p", "tests/plat/b/*.b", "tests/plat/bugs/bug-22-inn_mod.mod", - "tests/plat/bugs/bug-62-notvar_var_e.c" + "tests/plat/bugs/bug-62-notvar_var_e.c", + "tests/plat/m2/ConvTest_mod.mod", + "tests/plat/m2/NestProc_mod.mod", + "tests/plat/m2/OpenArray_mod.mod", + "tests/plat/m2/Set100_mod.mod", + "tests/plat/m2/StringTest_mod.mod" ) acklibrary { diff --git a/tests/plat/m2/ConvTest_mod.mod b/tests/plat/m2/ConvTest_mod.mod new file mode 100644 index 000000000..9fa828af0 --- /dev/null +++ b/tests/plat/m2/ConvTest_mod.mod @@ -0,0 +1,36 @@ +MODULE ConvTest; +FROM Conversions IMPORT + ConvertOctal, ConvertHex, ConvertCardinal, ConvertInteger; +FROM Strings IMPORT CompareStr; +FROM Test IMPORT fail, finished; + +(* Asserts a = b, or fails with code. *) +PROCEDURE A(a, b: ARRAY OF CHAR; code: INTEGER); +BEGIN + IF (CompareStr(a, b) # 0) OR (CompareStr(a, "wrong string") = 0) THEN + fail(code) + END +END A; + +VAR + str: ARRAY [0..15] OF CHAR; +BEGIN + ConvertOctal( 9, 6, str); A(" 11", str, 1); + ConvertOctal( 59, 6, str); A(" 73", str, 2); + ConvertOctal(278, 6, str); A(" 426", str, 3); + + ConvertHex( 9, 6, str); A(" 9", str, 11H); + ConvertHex( 59, 6, str); A(" 3B", str, 12H); + ConvertHex(278, 6, str); A(" 116", str, 13H); + + ConvertCardinal( 9, 6, str); A(" 9", str, 21H); + ConvertCardinal( 59, 6, str); A(" 59", str, 22H); + ConvertCardinal(278, 6, str); A(" 278", str, 23H); + + ConvertInteger( 9, 6, str); A(" 9", str, 31H); + ConvertInteger( 59, 6, str); A(" 59", str, 32H); + ConvertInteger( 278, 6, str); A(" 278", str, 33H); + ConvertInteger(-424, 6, str); A(" -424", str, 34H); + + finished; +END ConvTest. diff --git a/tests/plat/m2/NestProc_mod.mod b/tests/plat/m2/NestProc_mod.mod new file mode 100644 index 000000000..d46731f55 --- /dev/null +++ b/tests/plat/m2/NestProc_mod.mod @@ -0,0 +1,132 @@ +(* + * Calls nested procedures. The compiler emits the EM instructions + * _lxl_ and _lxa_ to access the variables in the statically enclosing + * procedures. + * + * You can cheat this test if a = b is TRUE for any a, b. + *) +MODULE NestProc; +FROM Test IMPORT fail, finished; + +(* Asserts cond, or fails with code. *) +PROCEDURE A(cond: BOOLEAN; code: INTEGER); +BEGIN + IF NOT cond THEN fail(code) END +END A; + +TYPE + Set8 = SET OF [0..63]; + (* Box has fields of size 8, 4, and 1. *) + Box = RECORD + huge: Set8; + big: LONGINT; + small: CHAR; + tiny: CHAR; + END; + +PROCEDURE First(a, b: INTEGER; in: Box): Box; + VAR c, d: INTEGER; + out: Box; + + PROCEDURE Second(e: INTEGER); + VAR f: INTEGER; + + PROCEDURE Third(g: INTEGER); + VAR h: INTEGER; + + PROCEDURE CheckThird; + BEGIN + A(a = 1354, 31H); (* lxa 3 *) + A(b = 3385, 32H); + A(c = 14349, 33H); (* lxl 3 *) + A(d = 30989, 34H); + A(e = 28935, 35H); (* lxa 2 *) + A(f = 13366, 36H); (* lxl 2 *) + A(g = 7988, 37H); (* lxa 1 *) + A(h = 11711, 38H); (* lxl 1 *) + END CheckThird; + + PROCEDURE Fourth(i: INTEGER); + VAR j: INTEGER; + + PROCEDURE Fifth(k: INTEGER); + VAR l: INTEGER; + + PROCEDURE Sixth(): INTEGER; + BEGIN + A(e = 2, 61H); (* lxa 4 *) + A(f = 11703, 62H); (* lxl 4 *) + + b := 3385; (* lxa 5 *) + d := 30989; (* lxl 5 *) + e := 28935; (* lxl 4 *) + f := 13366; (* lxa 4 *) + CheckThird; + + (* lxa 5 *) + A(in.huge = Set8{11, 12, 40, 40, 43, 56}, 63H); + A(in.big = 2130020019D, 64H); + A(in.small = 300C, 65H); + A(in.tiny = 175C, 66H); + + (* lxl 5 *) + out.huge := Set8{8, 19, 36, 41, 47, 62}; + out.big := 385360915D; + out.small := 366C; + out.tiny := 131C; + + j := k; (* lxl 2, lxa 1 *) + l := i; (* lxl 1, lxa 2 *) + RETURN 5217; + END Sixth; + + PROCEDURE TwiceSixth(): INTEGER; + BEGIN + (* lxa and lxl must follow the static chain from Sixth to + * Fifth, not dynamic chain from Sixth to TwiceSixth. *) + RETURN 2 * Sixth(); + END TwiceSixth; + + BEGIN (* Fifth *) + A(TwiceSixth() = 10434, 51H); + A(k = 11567, 51H); + A(l = 32557, 52H); + END Fifth; + + BEGIN (* Fourth *) + Fifth(11567); (* k *) + A(i = 32557, 41H); + A(j = 11567, 42H); + END Fourth; + + BEGIN (* Third *) + h := 11711; + Fourth(32557); (* i *) + END Third; + + BEGIN (* Second *) + f := 11703; + Third(7988); (* g *) + END Second; + +BEGIN (* First *) + c := 14349; + d := 17850; + Second(2); (* e *) + RETURN out +END First; + +VAR + x: Box; +BEGIN + x.huge := Set8{11, 12, 40, 40, 43, 56}; + x.big := 2130020019D; + x.small := 300C; + x.tiny := 175C; + x := First(1354, 19516, x); (* a, b, in *) + A(x.huge = Set8{8, 19, 36, 41, 47, 62}, 71H); + A(x.big = 385360915D, 72H); + A(x.small = 366C, 73H); + A(x.tiny = 131C, 74H); + finished; +END NestProc. diff --git a/tests/plat/m2/OpenArray_mod.mod b/tests/plat/m2/OpenArray_mod.mod new file mode 100644 index 000000000..1aa219a55 --- /dev/null +++ b/tests/plat/m2/OpenArray_mod.mod @@ -0,0 +1,59 @@ +(* + * Passes an open array to a procedure. The back end must implement + * some EM instructions for accessing arrays. + *) +MODULE OpenArray; +FROM Test IMPORT fail, finished; + +(* Asserts condition or fails with code. *) +PROCEDURE A(cond: BOOLEAN; code: INTEGER); +BEGIN + IF NOT cond THEN fail(code) END +END A; + +(* Called as Modify(ary1, 1) or Modify(ary2, 2). *) +PROCEDURE Modify(VAR ary: ARRAY OF INTEGER; what: INTEGER); + VAR hi: INTEGER; +BEGIN + hi := what * 100H; + + (* Indices must be from 0 to HIGH(ary). *) + A((what = 1) = (HIGH(ary) = 3), hi + 1); + A((what = 2) = (HIGH(ary) = 9), hi + 2); + + (* ary[2] must equal ary1[3] or ary2[3]. *) + A((what = 1) = (ary[2] = 13), hi + 3); + A((what = 2) = (ary[2] = 37), hi + 4); + + (* Modify some values. *) + IF HIGH(ary) >= 3 THEN ary[3] := 20 END; + IF HIGH(ary) >= 6 THEN ary[6] := 40 END; + IF HIGH(ary) >= 9 THEN ary[9] := 12 END; +END Modify; + +VAR + ary1: ARRAY [1..4] OF INTEGER; + ary2: ARRAY [1..10] OF INTEGER; +BEGIN + (* Initialize the arrays. *) + ary1[1] := 6; ary1[2] := 9; ary1[3] := 13; ary1[4] := 49; + + ary2[1] := 56; ary2[2] := 79; ary2[3] := 37; ary2[4] := 0; + ary2[5] := 70; ary2[6] := 62; ary2[7] := 64; ary2[8] := 92; + ary2[9] := 29; ary2[10] := 90; + + (* Pass them as open arrays. *) + Modify(ary1, 1); + Modify(ary2, 2); + + (* Check that ary1[4], ary2[4, 7, 10] have been modified. *) + A(ary1[1] = 6, 301H); A(ary1[2] = 9, 301H); A(ary1[3] = 13, 303H); + A(ary1[4] = 20, 304H); + + A(ary2[1] = 56, 401H); A(ary2[2] = 79, 402H); A(ary2[3] = 37, 403H); + A(ary2[4] = 20, 404H); A(ary2[5] = 70, 406H); A(ary2[6] = 62, 406H); + A(ary2[7] = 40, 407H); A(ary2[8] = 92, 408H); A(ary2[9] = 29, 409H); + A(ary2[10] = 12, 40AH); + + finished; +END OpenArray. diff --git a/tests/plat/m2/Set100_mod.mod b/tests/plat/m2/Set100_mod.mod new file mode 100644 index 000000000..3b200d318 --- /dev/null +++ b/tests/plat/m2/Set100_mod.mod @@ -0,0 +1,61 @@ +(* + * Operates on sets of 100 integers. The compiler emits, and the back + * end must implement, the EM instructions for large sets. + *) +MODULE Set100; +FROM Test IMPORT fail, finished; + +(* Asserts condition or fails with code. *) +PROCEDURE A(cond: BOOLEAN; code: INTEGER); +BEGIN + IF NOT cond THEN fail(code) END +END A; + +TYPE + Num = [1..100]; + NumSet = SET OF Num; +VAR + (* VAR, not CONST, so compiler can't do constant operations. *) + primes, teens, lowevens, eighties, nineties: NumSet; +CONST + (* These are the expected results of some set operations. *) + primeteen = NumSet{13, 17, 19}; + compeighties = NumSet{80..82, 84..88}; + teenxoreven = NumSet{2, 4, 6, 8, 10, 12, 13, 15, 17, 19, 20}; + eightiesnineties = NumSet{80..99}; + +(* Checks that some set is equal to the expected result. Also checks + * that the set is not equal to the other sets. *) +PROCEDURE Check(set: NumSet; what: INTEGER); + VAR hi: INTEGER; +BEGIN + hi := what * 100H; + + (* The compiler uses cms in EM to check set equality. *) + A((what = 1) = (set = primeteen), hi + 1); + A((what = 2) = (set = compeighties), hi + 2); + A((what = 3) = (set = teenxoreven), hi + 3); + A((what = 4) = (set = eightiesnineties), hi + 4); +END Check; + +PROCEDURE Range(min: Num; max: Num): NumSet; +BEGIN + (* The compiler calls LtoUset in lang/m2/libm2/LtoUset.e *) + RETURN NumSet{min..max} +END Range; + +BEGIN + primes := NumSet{2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, + 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97}; + teens := NumSet{13, 14, 15, 16, 17, 18, 19}; + lowevens := NumSet{2, 4, 6, 8, 10, 12, 14, 16, 18, 20}; + eighties := Range(80, 89); + nineties := Range(90, 99); + + Check(primes * teens, 1); + Check(eighties - primes, 2); + Check(teens / lowevens, 3); + Check(eighties + nineties, 4); + + finished; +END Set100. diff --git a/tests/plat/m2/StringTest_mod.mod b/tests/plat/m2/StringTest_mod.mod new file mode 100644 index 000000000..41552aa7a --- /dev/null +++ b/tests/plat/m2/StringTest_mod.mod @@ -0,0 +1,55 @@ +MODULE StringTest; +FROM Strings IMPORT + Assign, Insert, Delete, Pos, Copy, Concat, Length, CompareStr; +FROM Test IMPORT fail, finished; + +(* Asserts condition or fails with code. *) +PROCEDURE A(cond: BOOLEAN; code: INTEGER); +BEGIN + IF NOT cond THEN fail(code) END +END A; + +VAR + small: ARRAY [0..3] OF CHAR; + big: ARRAY [0..99] OF CHAR; +BEGIN + (* CompareStr *) + A(CompareStr("ablaze", "ablaze") = 0, 1); + A(CompareStr("ablaze", "abloom") < 0, 2); + A(CompareStr("abloom", "ablaze") > 0, 3); + A(CompareStr("abloom", "abloom") = 0, 4); + + (* Assign, Insert, Delete *) + Assign("obsequiosity", small); + A(CompareStr("obsequiosity", small) > 0, 11H); + Assign("obsequiosity", big); + A(CompareStr("obsequiosity", big) = 0, 12H); + A(big[11] = 'y', 13H); + A(big[11] # 0C, 14H); + A(big[12] # 'y', 15H); + A(big[12] = 0C, 16H); + Insert(" omnihuman", big, 9); + A(CompareStr("obsequios omnihumanity", big) = 0, 17H); + Delete(big, 6, 15); + A(CompareStr("obsequy", big) = 0, 18H); + + (* Pos, Concat *) + Assign("Now is the time for all good men to come...", big); + A(Pos("w", big) = 2, 21H); + A(Pos("t", big) = 7, 22H); + A(Pos("ti", big) = 11, 23H); + A(Pos("men", big) = 29, 24H); + A(Pos("women", big) > 42, 25H); + Copy(big, 29, 2, small); + A(CompareStr("me", small) = 0, 26H); + + (* Concat, Length *) + Concat("pictorial", "ist", big); + A(CompareStr("pictorialist", big) = 0, 31H); + A(Length(big) = 12, 32H); + Concat("zit", "her", small); + A(CompareStr("zither", small) > 0, 33H); + A(Length(small) < 5, 34H); + + finished; +END StringTest. From 4bb31c296dbd9f176a84918e5508d22505519569 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Thu, 21 Dec 2017 18:19:26 -0500 Subject: [PATCH 19/55] Revise the comments in the EM tests. You can cheat these tests if _cms_ and _cmu_ always push zero. --- tests/plat/dup_e.e | 6 ++++-- tests/plat/exg_e.e | 6 ++++-- tests/plat/rotate_e.e | 6 +++++- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/tests/plat/dup_e.e b/tests/plat/dup_e.e index c0e0001b3..649589d84 100644 --- a/tests/plat/dup_e.e +++ b/tests/plat/dup_e.e @@ -2,8 +2,10 @@ mes 2, EM_WSIZE, EM_PSIZE /* - * Test _dup_ and _dus_ by loading 20 bytes from _src_, then making - * and checking some duplicates. + * Tests _dup_ and _dus_ by loading 20 bytes from _src_, then making + * and checking some duplicates. The compilers might never _dup_ or + * _dus_ with large sizes, so the compilers might work even if this + * test fails. You can cheat this test if _cms_ always pushes zero. */ exa src diff --git a/tests/plat/exg_e.e b/tests/plat/exg_e.e index 617f07104..455256483 100644 --- a/tests/plat/exg_e.e +++ b/tests/plat/exg_e.e @@ -2,8 +2,10 @@ mes 2, EM_WSIZE, EM_PSIZE /* - * Test _exg_ by loading 40 bytes from _src_, then exchanging 20 and - * 20 bytes, and checking the result. + * Tests _exg_ by loading 40 bytes from _src_, then exchanging 20 and + * 20 bytes, and checking the result. The compilers might never _exg_ + * large sizes, so the compilers might work even if this test fails. + * You can cheat this test if _cms_ always pushes zero. */ exa src diff --git a/tests/plat/rotate_e.e b/tests/plat/rotate_e.e index a6f8f28dd..0698c58a0 100644 --- a/tests/plat/rotate_e.e +++ b/tests/plat/rotate_e.e @@ -2,12 +2,16 @@ mes 2, EM_WSIZE, EM_PSIZE /* - * Test _rol_ (rotate left) and _ror_ (rotate right). + * Tests _rol_ (rotate left) and _ror_ (rotate right). Several back + * ends provide _rol_ and _ror_, but as of year 2017, the compilers + * and optimizers had never emit _rol_ nor _ror_. * * By tradition, _rol_ and _ror_ can't rotate values shorter than the * word size, or longer than 4 bytes. * - If word size is 2, then try rotating 2-byte and 4-byte values. * - If word size is 4, then try rotating 4-byte values. + * + * You can cheat this test if _cmu_ always pushes zero. */ #if EM_WSIZE == 2 From 2eeee36f7870679eed0fa68b53bc57902b4dc63a Mon Sep 17 00:00:00 2001 From: George Koehler Date: Fri, 22 Dec 2017 17:04:16 -0500 Subject: [PATCH 20/55] Add FRAME_V tokens for local variables. When storing to a local, stop killing the tokens of other locals, unless they might overlap with the stored local. This helps some procedures that juggle locals when the locals aren't in registers. Also use FRAME_V tokens for locals in statically enclosing procedures. Rewrite _lxa_ as _lxl_, to skip the `addi ?,?,8` if we can add 8 to the next constant. The PowerPC code from _lxl_ is now sometimes better, sometimes worse than before. The i386 table provided the idea to use %size to find overlapping locals. --- mach/powerpc/ncg/table | 329 ++++++++++++++++++++++++++++++----------- 1 file changed, 243 insertions(+), 86 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 28ef4b40e..3f4794fc8 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -9,7 +9,6 @@ INT64 = 8 FP_OFFSET = 0 /* Offset of saved FP relative to our FP */ PC_OFFSET = 4 /* Offset of saved PC relative to our FP */ -SL_OFFSET = 8 /* Offset of static link */ #define COMMENT(n) /* comment {LABEL, n} */ @@ -19,6 +18,12 @@ SL_OFFSET = 8 /* Offset of static link */ #define smalls(n) sfit(n, 16) #define smallu(n) ufit(n, 16) +/* Finds FRAME_V tokens that overlap myoff, mysize. */ +#define fover(myoff, mysize) (%off+%size>(myoff) && %off<((myoff)+(mysize))) + +/* Checks if we can use {LXFRAME, x}. */ +#define nicelx(x) ((x)>=1 && (x)<=0x8000) + #define lo(n) ((n) & 0xFFFF) #define hi(n) (((n)>>16) & 0xFFFF) @@ -138,6 +143,8 @@ TOKENS DIV_RR = { GPR reg1; GPR reg2; } 4. /* reg1 / reg2 signed */ DIV_RR_U = { GPR reg1; GPR reg2; } 4. /* reg1 / reg2 unsigned */ +/* Indirect loads and stores */ + IND_RC_B = { GPR reg; INT off; } 4 off "(" reg ")". IND_RL_B = { GPR reg; ADDR adr; } 4 "lo16[" adr "](" reg ")". IND_RR_B = { GPR reg1; GPR reg2; } 4. @@ -154,6 +161,23 @@ TOKENS IND_RL_D = { GPR reg; ADDR adr; } 8 "lo16[" adr "](" reg ")". IND_RR_D = { GPR reg1; GPR reg2; } 8. +/* Local variables in frame */ + + FRAME_B = { INT level; GPR reg; INT off; INT size; } + 4 off "(" reg ")". + FRAME_H = { INT level; GPR reg; INT off; INT size; } + 4 off "(" reg ")". + FRAME_H_S = { INT level; GPR reg; INT off; INT size; } + 4 off "(" reg ")". + FRAME_W = { INT level; GPR reg; INT off; INT size; } + 4 off "(" reg ")". + FRAME_D = { INT level; GPR reg; INT off; INT size; } + 8 off "(" reg ")". + + LXFRAME = { INT level; } 4. + +/* Bitwise logic */ + NOT_R = { GPR reg; } 4. /* ~reg */ AND_RIS = { GPR reg; INT valhi; } 4. AND_RC = { GPR reg; INT val; } 4. @@ -170,6 +194,8 @@ TOKENS NOR_RR = { GPR reg1; GPR reg2; } 4. /* ~(reg1 | reg2) */ EQV_RR = { GPR reg1; GPR reg2; } 4. /* ~(reg1 ^ reg2) */ +/* Comparisons */ + COND_RC = { GPR reg; INT val; } 4. COND_RR = { GPR reg1; GPR reg2; } 4. CONDL_RC = { GPR reg; INT val; } 4. @@ -200,27 +226,37 @@ SETS CONST = C + CONST_STACK. - IND_ALL_B = IND_RC_B + IND_RL_B + IND_RR_B. - IND_ALL_H = IND_RC_H + IND_RL_H + IND_RR_H + - IND_RC_H_S + IND_RL_H_S + IND_RR_H_S. - IND_ALL_W = IND_RC_W + IND_RL_W + IND_RR_W. - IND_ALL_D = IND_RC_D + IND_RL_D + IND_RR_D. + SET_RC_B = IND_RC_B + IND_RL_B + FRAME_B. + SET_RC_H = IND_RC_H + IND_RL_H + FRAME_H. + SET_RC_H_S = IND_RC_H_S + IND_RL_H_S + FRAME_H_S. + SET_RC_W = IND_RC_W + IND_RL_W + FRAME_W. + SET_RC_D = IND_RC_D + IND_RL_D + FRAME_D. + + IND_ALL_B = IND_RC_B + IND_RL_B + IND_RR_B. + IND_ALL_H = IND_RC_H + IND_RL_H + IND_RR_H + + IND_RC_H_S + IND_RL_H_S + IND_RR_H_S. + IND_ALL_W = IND_RC_W + IND_RL_W + IND_RR_W. + IND_ALL_D = IND_RC_D + IND_RL_D + IND_RR_D. + IND_V = IND_ALL_B + IND_ALL_H + IND_ALL_W + IND_ALL_D. + + FRAME_V = FRAME_B + FRAME_H + FRAME_H_S + FRAME_W + FRAME_D. /* anything killed by sti (store indirect) */ - MEMORY = IND_ALL_B + IND_ALL_H + IND_ALL_W + IND_ALL_D. + MEMORY = IND_V + FRAME_V. /* any integer from stack that we can easily move to GPR */ INT_W = REG + CONST_STACK + SEX_B + SEX_H + SUM_RIS + SUM_RC + SUM_RL + SUM_RR + SUB_RR + NEG_R + MUL_RR + DIV_RR + DIV_RR_U + IND_ALL_B + IND_ALL_H + IND_ALL_W + + FRAME_B + FRAME_H + FRAME_H_S + FRAME_W + NOT_R + AND_RIS + AND_RC + AND_RR + ANDC_RR + OR_RIS + OR_RC + OR_RR + ORC_RR + XOR_RIS + XOR_RC + XOR_RR + NAND_RR + NOR_RR + EQV_RR + XEQ + XNE + XGT + XGE + XLT + XLE. - FLOAT_D = FREG + IND_ALL_D. - FLOAT_W = FSREG + IND_ALL_W. + FLOAT_D = FREG + IND_ALL_D + FRAME_D. + FLOAT_W = FSREG + IND_ALL_W + FRAME_W. INSTRUCTIONS @@ -293,21 +329,21 @@ INSTRUCTIONS frsp FSREG+LOCAL:wo, FREG:ro cost(4, 5). fsub FREG+DLOCAL:wo, FREG:ro, FREG:ro cost(4, 5). fsubs FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 5). - lbz GPR:wo, IND_RC_B+IND_RL_B:ro cost(4, 3). + lbz GPR:wo, SET_RC_B:ro cost(4, 3). lbzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). - lfd FPR+DLOCAL:wo, IND_RC_D+IND_RL_D:ro cost(4, 5). + lfd FPR+DLOCAL:wo, SET_RC_D:ro cost(4, 5). lfdu FPR:wo, IND_RC_D:ro cost(4, 5). lfdx FPR:wo, GPR:ro, GPR:ro cost(4, 5). - lfs FSREG+LOCAL:wo, IND_RC_W+IND_RL_W:ro cost(4, 4). + lfs FSREG+LOCAL:wo, SET_RC_W:ro cost(4, 4). lfsu FSREG:wo, IND_RC_W:rw cost(4, 4). lfsx FSREG:wo, GPR:ro, GPR:ro cost(4, 4). - lha GPR:wo, IND_RC_H_S+IND_RL_H_S:ro cost(4, 3). + lha GPR:wo, SET_RC_H_S:ro cost(4, 3). lhax GPR:wo, GPR:ro, GPR:ro cost(4, 3). - lhz GPR:wo, IND_RC_H+IND_RL_H:ro cost(4, 3). + lhz GPR:wo, SET_RC_H:ro cost(4, 3). lhzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). lwzu GPR:wo, IND_RC_W:rw cost(4, 3). lwzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). - lwz GPR+LOCAL:wo, IND_RC_W+IND_RL_W:ro cost(4, 3). + lwz GPR+LOCAL:wo, SET_RC_W:ro cost(4, 3). mfcr GPR:wo cost(4,2). mfspr GPR:wo, SPR:ro cost(4, 3). mtspr SPR:wo, GPR:ro cost(4, 2). @@ -336,17 +372,17 @@ INSTRUCTIONS sraw GPR+LOCAL:wo, GPR:ro, GPR:ro cost(4, 2). srawi GPR+LOCAL:wo, GPR:ro, CONST:ro cost(4, 2). srw GPR+LOCAL:wo, GPR:ro, GPR:ro. - stb GPR:ro, IND_RC_B+IND_RL_B:rw cost(4, 3). + stb GPR:ro, SET_RC_B:rw cost(4, 3). stbx GPR:ro, GPR:ro, GPR:ro cost(4, 3). - stfd FPR:ro, IND_RC_D+IND_RL_D:rw cost(4, 4). + stfd FPR:ro, SET_RC_D:rw cost(4, 4). stfdu FPR:ro, IND_RC_D:rw cost(4, 4). stfdx FPR:ro, GPR:ro, GPR:ro cost(4, 4). - stfs FSREG:ro, IND_RC_W+IND_RL_W:rw cost(4, 3). + stfs FSREG:ro, SET_RC_W:rw cost(4, 3). stfsu FSREG:ro, IND_RC_W:rw cost(4, 3). stfsx FSREG:ro, GPR:ro, GPR:ro cost(4, 3). - sth GPR:ro, IND_RC_H+IND_RL_H:rw cost(4, 3). + sth GPR:ro, SET_RC_H:rw cost(4, 3). sthx GPR:ro, GPR:ro, GPR:ro cost(4, 3). - stw GPR:ro, IND_RC_W+IND_RL_W:rw cost(4, 3). + stw GPR:ro, SET_RC_W:rw cost(4, 3). stwx GPR:ro, GPR:ro, GPR:ro cost(4, 3). stwu GPR:ro, IND_RC_W:rw cost(4, 3). xor GPR:wo, GPR:ro, GPR:ro. @@ -439,7 +475,7 @@ MOVES /* Read byte */ - from IND_RC_B+IND_RL_B to GPR + from SET_RC_B to GPR gen lbz %2, %1 from IND_RR_B to GPR @@ -447,7 +483,7 @@ MOVES /* Write byte */ - from GPR to IND_RC_B+IND_RL_B + from GPR to SET_RC_B gen stb %1, %2 from GPR to IND_RR_B @@ -455,13 +491,13 @@ MOVES /* Read halfword (short) */ - from IND_RC_H+IND_RL_H to GPR + from SET_RC_H to GPR gen lhz %2, %1 from IND_RR_H to GPR gen lhzx %2, %1.reg1, %1.reg2 - from IND_RC_H_S+IND_RL_H_S to GPR + from SET_RC_H_S to GPR gen lha %2, %1 from IND_RR_H_S to GPR @@ -469,7 +505,7 @@ MOVES /* Write halfword */ - from GPR to IND_RC_H+IND_RL_H + from GPR to SET_RC_H gen sth %1, %2 from GPR to IND_RR_H @@ -477,13 +513,13 @@ MOVES /* Read word */ - from IND_RC_W+IND_RL_W to GPR + from SET_RC_W to GPR gen lwz %2, %1 from IND_RR_W to GPR gen lwzx %2, %1.reg1, %1.reg2 - from IND_RC_W+IND_RL_W to FSREG + from SET_RC_W to FSREG gen lfs %2, %1 from IND_RR_W to FSREG @@ -491,13 +527,13 @@ MOVES /* Write word */ - from GPR to IND_RC_W+IND_RL_W + from GPR to SET_RC_W gen stw %1, %2 from GPR to IND_RR_W gen stwx %1, %2.reg1, %2.reg2 - from FSREG to IND_RC_W+IND_RL_W + from FSREG to SET_RC_W gen stfs %1, %2 from FSREG to IND_RR_W @@ -505,7 +541,7 @@ MOVES /* Read double */ - from IND_RC_D+IND_RL_D to FPR + from SET_RC_D to FPR gen lfd %2, %1 from IND_RR_D to FPR @@ -513,12 +549,41 @@ MOVES /* Write double */ - from FPR to IND_RC_D+IND_RL_D + from FPR to SET_RC_D gen stfd %1, %2 from FPR to IND_RR_D gen stfdx %1, %2.reg1, %2.reg2 +/* LXFRAME is a lexical frame from the static chain. We define a move + so "uses REG={LXFRAME, $1}" may find a register with the same + frame, and not repeat the move. This move can't search for a REG + with {LXFRAME, $1-1}, but must always start from fp. The static + chain, if it exists, is the argument at fp + EM_BSIZE. */ + + from LXFRAME %level==1 to REG + gen lwz %2, {IND_RC_W, fp, EM_BSIZE} + from LXFRAME %level==2 to REG + gen lwz %2, {IND_RC_W, fp, EM_BSIZE} + /* PowerPC can't add r0 + EM_BSIZE, + * so %2 must not be r0. */ + lwz %2, {IND_RC_W, %2, EM_BSIZE} + from LXFRAME %level==3 to REG + gen lwz %2, {IND_RC_W, fp, EM_BSIZE} + lwz %2, {IND_RC_W, %2, EM_BSIZE} + lwz %2, {IND_RC_W, %2, EM_BSIZE} + from LXFRAME %level==4 to REG + gen lwz %2, {IND_RC_W, fp, EM_BSIZE} + lwz %2, {IND_RC_W, %2, EM_BSIZE} + lwz %2, {IND_RC_W, %2, EM_BSIZE} + lwz %2, {IND_RC_W, %2, EM_BSIZE} + from LXFRAME to REG /* assuming %level in 2 to 0x8000 */ + gen li %2, {C, %1.level-1} + mtspr ctr, %2 + lwz %2, {IND_RC_W, fp, EM_BSIZE} + 1: lwz %2, {IND_RC_W, %2, EM_BSIZE} + bdnz {LABEL, "1b"} + /* Logicals */ from NOT_R to GPR @@ -661,6 +726,11 @@ TESTS STACKINGRULES + /* We don't allow GPR-REG on the stack. The intent is to ban + * r0 from the stack, but this also bans fp from the stack. + * This is odd because most other tables for ncg allow the + * frame pointer on the stack. + */ from REG to STACK gen COMMENT("stack REG") @@ -760,6 +830,11 @@ COERCIONS gen move {SUM_RR, %1.reg1, %1.reg2}, r12 yields {IND_RC_W, r12, 4} {IND_RC_W, r12, 0} + from FRAME_D %off<=0x7FFA + yields + {FRAME_W, %1.level, %1.reg, %1.off+4, 4} + {FRAME_W, %1.level, %1.reg, %1.off, 4} + PATTERNS @@ -897,24 +972,47 @@ PATTERNS uses REG={SUM_RIS, fp, his($1)} yields {SUM_RC, %a, los($1)} + pat lal loi smalls($1) && $2==1 /* Load byte from local */ + yields {FRAME_B, 0, fp, $1, 1} + + /* Load half-word from local and sign-extend */ + pat lal loi loc loc cii smalls($1) && $2==2 && $3==2 && $4==4 + yields {FRAME_H_S, 0, fp, $1, 1} + + pat lal loi smalls($1) && $2==2 /* Load half-word from local */ + yields {FRAME_H, 0, fp, $1, 1} + /* Load word from local */ pat lol inreg($1)==reg_any || inreg($1)==reg_float yields {LOCAL, $1} + pat lol smalls($1) + yields {FRAME_W, 0, fp, $1, 4} pat lol leaving lal $1 loi 4 - /* Load double-word from local */ - pat ldl inreg($1)==reg_float + pat ldl inreg($1)==reg_float /* Load double-word from local */ yields {DLOCAL, $1} + pat ldl smalls($1) && smalls($1+4) + /* smalls($1+4) implies FRAME_D %off<=0xFFFA */ + yields {FRAME_D, 0, fp, $1, 8} pat ldl leaving lal $1 loi 8 - /* Store word to local */ - pat stl inreg($1)==reg_any + pat lal sti smalls($1) && $2==1 /* Store byte to local */ + with REG + kills IND_V, FRAME_V %level==0 && fover($1, 1) + gen move %1, {FRAME_B, 0, fp, $1, 1} + + pat lal sti smalls($1) && $2==2 /* Store half-word to local */ + with REG + kills IND_V, FRAME_V %level==0 && fover($1, 2) + gen move %1, {FRAME_H, 0, fp, $1, 2} + + pat stl inreg($1)==reg_any /* Store word to local */ with exact INT_W /* ncg fails to infer that regvar($1) is dead! */ kills regvar($1) @@ -924,58 +1022,69 @@ PATTERNS lwz {LOCAL, $1}, {IND_RC_W, sp, 0} addi sp, sp, {C, 4} pat stl inreg($1)==reg_float - with exact FSREG+IND_ALL_W + with exact FLOAT_W kills regvar_w($1, reg_float) gen move %1, {FSREG_EXPR, regvar_w($1, reg_float)} with STACK gen lfs {LOCAL, $1}, {IND_RC_W, sp, 0} addi sp, sp, {C, 4} + pat stl smalls($1) + with REG+FSREG + kills IND_V, FRAME_V %level==0 && fover($1, 4) + gen move %1, {FRAME_W, 0, fp, $1, 4} pat stl leaving lal $1 sti 4 - /* Store double-word to local */ - pat sdl inreg($1)==reg_float - with exact FREG+IND_ALL_D + pat sdl inreg($1)==reg_float /* Store double-word to local */ + with exact FLOAT_D kills regvar_d($1, reg_float) gen move %1, {FPR_EXPR, regvar_d($1, reg_float)} with STACK gen lfd {DLOCAL, $1}, {IND_RC_D, sp, 0} addi sp, sp, {C, 8} + pat sdl smalls($1) && smalls($1+4) + with REG REG + kills IND_V, FRAME_V %level==0 && fover($1, 8) + gen + move %1, {FRAME_W, 0, fp, $1, 4} + move %2, {FRAME_W, 0, fp, $1+4, 4} + with FREG + kills IND_V, FRAME_V %level==0 && fover($1, 4) + gen move %1, {FRAME_D, 0, fp, $1, 8} pat sdl leaving lal $1 sti 8 - /* Load indirect from local */ - pat lil inreg($1)==reg_any + pat lil inreg($1)==reg_any /* Load indirect from local */ yields {IND_RC_W, regvar($1), 0} pat lil leaving lol $1 loi 4 - pat sil /* Save to indirected local */ + pat sil /* Store indirect to local */ leaving lol $1 sti 4 - pat zrl /* Zero local */ + pat zrl /* Zero local */ leaving loc 0 stl $1 - pat inl /* Increment local */ + pat inl /* Increment local */ leaving lol $1 loc 1 adi 4 stl $1 - pat del /* Decrement local */ + pat del /* Decrement local */ leaving lol $1 loc 1 @@ -983,9 +1092,87 @@ PATTERNS stl $1 +/* Local variables of procedures on static chain */ + + /* lxa (lexical argument base) -> lxl (lexical local base) */ + pat lxa adp nicelx($1) + leaving lxl $1 adp $2+EM_BSIZE + pat lxa lof nicelx($1) + leaving lxl $1 lof $2+EM_BSIZE + pat lxa ldf nicelx($1) + leaving lxl $1 ldf $2+EM_BSIZE + pat lxa stf nicelx($1) + leaving lxl $1 stf $2+EM_BSIZE + pat lxa sdf nicelx($1) + leaving lxl $1 stf $2+EM_BSIZE + pat lxa $1==0 || nicelx($1) + leaving lxl $1 adp EM_BSIZE + + /* Load locals in statically enclosing procedures */ + pat lxl adp loi nicelx($1) && smalls($2) && $3==1 + uses REG={LXFRAME, $1} + yields {FRAME_B, $1, %a, $2, 1} + pat lxl adp loi loc loc cii nicelx($1) && smalls($2) && + $3==2 && $4==2 && $5==4 + uses REG={LXFRAME, $1} + yields {FRAME_H_S, $1, %a, $2, 2} + pat lxl adp loi nicelx($1) && smalls($2) && $3==2 + uses REG={LXFRAME, $1} + yields {FRAME_H, $1, %a, $2, 2} + pat lxl lof nicelx($1) && smalls($2) + uses REG={LXFRAME, $1} + yields {FRAME_W, $1, %a, $2, 4} + pat lxl ldf nicelx($1) && smalls($2) && smalls($2+4) + uses REG={LXFRAME, $1} + /* smalls($2+4) implies FRAME_D %off<=0xFFFA */ + yields {FRAME_D, $1, %a, $2, 8} + + /* Store locals in statically enclosing procedures */ + pat lxl adp sti nicelx($1) && smalls($2) && $3==1 + with REG + kills IND_V, FRAME_V %level==$1 && fover($2, 1) + uses REG={LXFRAME, $1} + gen move %1, {FRAME_B, $1, %a, $2, 1} + pat lxl adp sti nicelx($1) && smalls($2) && $3==2 + with REG + kills IND_V, FRAME_V %level==$1 && fover($2, 2) + uses REG={LXFRAME, $1} + gen move %1, {FRAME_H, $1, %a, $2, 2} + pat lxl stf nicelx($1) && smalls($2) + with REG+FSREG + kills IND_V, FRAME_V %level==$1 && fover($2, 4) + uses REG={LXFRAME, $1} + gen move %1, {FRAME_W, $1, %a, $2, 4} + pat lxl sdf nicelx($1) && smalls($2) && smalls($2+4) + with REG REG + kills IND_V, FRAME_V %level==$1 && fover($2, 8) + uses REG={LXFRAME, $1} + gen + move %1, {FRAME_W, $1, %a, $2, 4} + move %2, {FRAME_W, $1, %a, $2+4, 4} + with FREG + kills IND_V, FRAME_V %level==$1 && fover($2, 8) + uses REG={LXFRAME, $1} + gen move %1, {FRAME_D, $1, %a, $2, 8} + + /* Programs use "lxl cal" to pass the static chain and call a + * nested procedure. This must push a token LXFRAME or the + * register fp to the real stack. */ + + /* Local base of procedure on static chain */ + pat lxl nicelx($1) + uses REG={LXFRAME, $1} + yields %a /* Can't yield LXFRAME. */ + + pat lxl $1==0 /* Our local base */ + with STACK + gen stwu fp, {IND_RC_W, sp, 0-4} + /* Can't yield fp. */ + + /* Global variables */ - pat lpi /* Load address of external function */ + pat lpi /* Load address of function */ leaving lae $1 @@ -2008,30 +2195,11 @@ PATTERNS leaving ret 0 - /* - * Lexical local base: lxl 0 yields our fp, lxl n yields the - * fp of the nth statically enclosing procedure. + /* Our caller's local base, "lxl 0 dch", appears in + * lang/cem/libcc.ansi/setjmp/setjmp.e, lang/m2/libm2/par_misc.e */ - pat lxl $1==0 - leaving - lor 0 - pat lxl $1==1 - yields {IND_RC_W, fp, SL_OFFSET} - pat lxl $1==2 - uses REG={IND_RC_W, fp, SL_OFFSET} - yields {IND_RC_W, %a, SL_OFFSET} - pat lxl $1==3 - uses REG={IND_RC_W, fp, SL_OFFSET}, reusing %a, REG - gen move {IND_RC_W, %a, SL_OFFSET}, %b - yields {IND_RC_W, %b, SL_OFFSET} - pat lxl $1>=4 && $1<=0x8000 - uses REG={IND_RC_W, fp, SL_OFFSET}, - REG={CONST_0000_7FFF, $1-1} - gen - mtspr ctr, %b - 1: lwz %a, {IND_RC_W, %a, SL_OFFSET} - bdnz {LABEL, "1b"} - yields %a + pat lxl dch $1==0 + yields {IND_RC_W, fp, FP_OFFSET} pat dch /* Dynamic chain: LB -> caller's LB */ with REG @@ -2041,11 +2209,6 @@ PATTERNS leaving adp EM_BSIZE - pat lxa /* Lexical argument base */ - leaving - lxl $1 - lpb - pat gto /* longjmp */ with STACK uses REG @@ -2058,26 +2221,20 @@ PATTERNS bctr. pat lor $1==0 /* Load local base */ - uses REG - gen - move fp, %a - yields %a + leaving lxl 0 pat lor $1==1 /* Load stack pointer */ - uses REG - gen - move sp, %a - yields %a + with STACK + uses REG=sp + yields %a /* Can't yield sp. */ pat str $1==0 /* Store local base */ with REG - gen - move %1, fp + gen move %1, fp pat str $1==1 /* Store stack pointer */ with REG - gen - move %1, sp + gen move %1, sp pat lae rck $2==4 /* Range check */ with REG From 5867ca2f2c88e74f5a92210dc784ddef8a072512 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Fri, 22 Dec 2017 19:57:42 -0500 Subject: [PATCH 21/55] Remove two obsolete patterns. These patterns seem to have no effect on the generated code. --- mach/powerpc/ncg/table | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 3f4794fc8..129a09a41 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -927,11 +927,6 @@ PATTERNS leaving cal ".exg" - pat ste loe $1==$2 /* Store then load external */ - leaving - dup 4 - ste $1 - /* Type conversions */ @@ -1060,9 +1055,7 @@ PATTERNS lal $1 sti 8 - pat lil inreg($1)==reg_any /* Load indirect from local */ - yields {IND_RC_W, regvar($1), 0} - pat lil + pat lil /* Load indirect from local */ leaving lol $1 loi 4 From f96f918a2952c4b5928d6c5f1920985f87e0def7 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Fri, 22 Dec 2017 20:37:39 -0500 Subject: [PATCH 22/55] Generate shorter code for ret 4 and ret 8. --- mach/powerpc/ncg/table | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 129a09a41..04619aa71 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -2089,7 +2089,7 @@ PATTERNS pat lfr $1==INT32 /* Load function result, word */ yields r3 - pat lfr $1==INT64 /* Load function result, double-word */ + pat lfr $1==INT64 /* Load function result, double-word */ yields r4 r3 pat ret $1==0 /* Return from procedure */ @@ -2108,11 +2108,22 @@ PATTERNS pat ret $1==4 /* Return from procedure, word */ with REG3 leaving ret 0 + with STACK + gen lwz r3, {IND_RC_W, sp, 0} + leaving ret 0 pat ret $1==8 /* Return from proc, double-word */ - with REG3 REG + with REG3 INT_W gen move %2, r4 leaving ret 0 + with REG3 STACK + gen lwz r4, {IND_RC_W, sp, 0} + leaving ret 0 + with STACK + gen + lwz r3, {IND_RC_W, sp, 0} + lwz r4, {IND_RC_W, sp, 4} + leaving ret 0 /* * These rules for blm/bls are wrong if length is zero. From c964eeddba8458ac5795f3f0cc013e1f94b6e99a Mon Sep 17 00:00:00 2001 From: George Koehler Date: Fri, 22 Dec 2017 21:18:58 -0500 Subject: [PATCH 23/55] Remove INT32 and such. Adjust indentation. I understand `loi 4` more easily than `loi INT32`, because `loi 4` appears in .e files. So remove INT8, INT16, INT32, INT64. Add a comment to explain r3 during unconditional jumps. --- mach/powerpc/ncg/table | 223 ++++++++++++++++------------------------- 1 file changed, 89 insertions(+), 134 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 04619aa71..e566c51b9 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -2,18 +2,12 @@ EM_WSIZE = 4 EM_PSIZE = 4 EM_BSIZE = 8 /* two words saved in call frame */ -INT8 = 1 /* Size of values */ -INT16 = 2 -INT32 = 4 -INT64 = 8 - FP_OFFSET = 0 /* Offset of saved FP relative to our FP */ PC_OFFSET = 4 /* Offset of saved PC relative to our FP */ #define COMMENT(n) /* comment {LABEL, n} */ - -#define nicesize(x) ((x)==INT8 || (x)==INT16 || (x)==INT32 || (x)==INT64) +#define nicesize(x) ((x)==1 || (x)==2 || (x)==4 || (x)==8) #define smalls(n) sfit(n, 16) #define smallu(n) ufit(n, 16) @@ -1176,35 +1170,35 @@ PATTERNS pat loe /* Load word external */ leaving lae $1 - loi INT32 + loi 4 pat ste /* Store word external */ leaving lae $1 - sti INT32 + sti 4 pat lde /* Load double-word external */ leaving lae $1 - loi INT64 + loi 8 pat sde /* Store double-word external */ leaving lae $1 - sti INT64 + sti 8 - pat zre /* Zero external */ + pat zre /* Zero external */ leaving loc 0 ste $1 - pat ine /* Increment external */ + pat ine /* Increment external */ leaving loe $1 inc ste $1 - pat dee /* Decrement external */ + pat dee /* Decrement external */ leaving loe $1 dec @@ -1216,27 +1210,27 @@ PATTERNS pat lof /* Load word offsetted */ leaving adp $1 - loi INT32 + loi 4 pat ldf /* Load double-word offsetted */ leaving adp $1 - loi INT64 + loi 8 pat stf /* Store word offsetted */ leaving adp $1 - sti INT32 + sti 4 pat sdf /* Store double-word offsetted */ leaving adp $1 - sti INT64 + sti 8 /* Loads and stores */ - pat loi $1==INT8 /* Load byte indirect */ + pat loi $1==1 /* Load byte indirect */ with REG yields {IND_RC_B, %1, 0} with exact SUM_RC @@ -1246,8 +1240,8 @@ PATTERNS with exact SUM_RR yields {IND_RR_B, %1.reg1, %1.reg2} - pat loi loc loc cii $1==INT16 && $2==INT16 && $3==INT32 - /* Load half-word indirect and sign extend */ + /* Load half-word indirect and sign-extend */ + pat loi loc loc cii $1==2 && $2==2 && $3==4 with REG yields {IND_RC_H_S, %1, 0} with exact SUM_RC @@ -1257,7 +1251,7 @@ PATTERNS with exact SUM_RR yields {IND_RR_H_S, %1.reg1, %1.reg2} - pat loi $1==INT16 /* Load half-word indirect */ + pat loi $1==2 /* Load half-word indirect */ with REG yields {IND_RC_H, %1, 0} with exact SUM_RC @@ -1267,7 +1261,7 @@ PATTERNS with exact SUM_RR yields {IND_RR_H, %1.reg1, %1.reg2} - pat loi $1==INT32 /* Load word indirect */ + pat loi $1==4 /* Load word indirect */ with REG yields {IND_RC_W, %1, 0} with exact SUM_RC @@ -1277,7 +1271,7 @@ PATTERNS with exact SUM_RR yields {IND_RR_W, %1.reg1, %1.reg2} - pat loi $1==INT64 /* Load double-word indirect */ + pat loi $1==8 /* Load double-word indirect */ with REG yields {IND_RC_D, %1, 0} with exact SUM_RC @@ -1295,10 +1289,9 @@ PATTERNS pat los $1==4 /* Load arbitrary size */ with REG3 STACK kills ALL - gen - bl {LABEL, ".los4"} + gen bl {LABEL, ".los4"} - pat sti $1==INT8 /* Store byte indirect */ + pat sti $1==1 /* Store byte indirect */ with REG REG kills MEMORY gen move %2, {IND_RC_B, %1, 0} @@ -1312,7 +1305,7 @@ PATTERNS kills MEMORY gen move %2, {IND_RR_B, %1.reg1, %1.reg2} - pat sti $1==INT16 /* Store half-word indirect */ + pat sti $1==2 /* Store half-word indirect */ with REG REG kills MEMORY gen move %2, {IND_RC_H, %1, 0} @@ -1326,7 +1319,7 @@ PATTERNS kills MEMORY gen move %2, {IND_RR_H, %1.reg1, %1.reg2} - pat sti $1==INT32 /* Store word indirect */ + pat sti $1==4 /* Store word indirect */ with REG REG+FSREG kills MEMORY gen move %2, {IND_RC_W, %1, 0} @@ -1340,7 +1333,7 @@ PATTERNS kills MEMORY gen move %2, {IND_RR_W, %1.reg1, %1.reg2} - pat sti $1==INT64 /* Store double-word indirect */ + pat sti $1==8 /* Store double-word indirect */ with REG FREG kills MEMORY gen move %2, {IND_RC_D, %1, 0} @@ -1367,8 +1360,7 @@ PATTERNS pat sts $1==4 /* Store arbitrary size */ with REG3 STACK kills ALL - gen - bl {LABEL, ".sts4"} + gen bl {LABEL, ".sts4"} /* Arithmetic wrappers */ @@ -1560,7 +1552,7 @@ PATTERNS leaving cal ".xor" - pat com $1==INT32 /* NOT word */ + pat com $1==4 /* NOT word */ with exact AND_RR yields {NAND_RR, %1.reg1, %1.reg2} with exact OR_RR @@ -1680,8 +1672,7 @@ PATTERNS /* Arrays */ pat aar $1==4 /* Address of array element */ - leaving - cal ".aar4" + leaving cal ".aar4" pat lar $1==4 /* Load from array */ with STACK @@ -1993,7 +1984,7 @@ PATTERNS * puts gt in the sign bit, to reverse the comparison. */ - pat cmi $1==INT32 /* Signed tristate compare */ + pat cmi $1==4 /* Signed tristate compare */ with REG CONST2 uses reusing %1, REG={COND_RC, %1, %2.val} gen rlwinm %a, %a, {C, 1}, {C, 31}, {C, 0} @@ -2007,7 +1998,7 @@ PATTERNS gen extlwi %a, %a, {C, 2}, {C, 0} yields %a - pat cmu $1==INT32 /* Unsigned tristate compare */ + pat cmu $1==4 /* Unsigned tristate compare */ with REG UCONST2 uses reusing %1, REG={CONDL_RC, %1, %2.val} gen rlwinm %a, %a, {C, 1}, {C, 31}, {C, 0} @@ -2023,11 +2014,11 @@ PATTERNS pat cmp /* Compare pointers */ leaving - cmu INT32 + cmu 4 - pat cms $1==INT32 /* Compare blocks (word sized) */ + pat cms $1==4 /* Compare blocks (word sized) */ leaving - cmi INT32 + cmi 4 pat cms defined($1) leaving @@ -2041,34 +2032,32 @@ PATTERNS /* Other branching and labelling */ + /* During an unconditional jump, if the top element on the + * stack has 4 bytes, then we hold it in register r3. + */ pat lab topeltsize($1)==4 && !fallthrough($1) kills ALL - gen - labeldef $1 - yields r3 + gen labeldef $1 + yields r3 pat lab topeltsize($1)==4 && fallthrough($1) with REG3 STACK - kills ALL - gen - labeldef $1 - yields r3 + kills ALL + gen labeldef $1 + yields r3 - pat lab topeltsize($1)!=4 + pat lab topeltsize($1)!=4 /* Label without r3 */ with STACK - kills ALL - gen - labeldef $1 + kills ALL + gen labeldef $1 - pat bra topeltsize($1)==4 /* Unconditional jump with TOS GPRister */ + pat bra topeltsize($1)==4 /* Branch with r3 */ with REG3 STACK - gen - b {LABEL, $1} + gen b {LABEL, $1} - pat bra topeltsize($1)!=4 /* Unconditional jump without TOS GPRister */ + pat bra topeltsize($1)!=4 /* Branch without r3 */ with STACK - gen - b {LABEL, $1} + gen b {LABEL, $1} /* Miscellaneous */ @@ -2076,8 +2065,7 @@ PATTERNS pat cal /* Call procedure */ with STACK kills ALL - gen - bl {LABEL, $1} + gen bl {LABEL, $1} pat cai /* Call procedure indirect */ with REG STACK @@ -2086,10 +2074,10 @@ PATTERNS mtspr ctr, %1 bctrl. - pat lfr $1==INT32 /* Load function result, word */ + pat lfr $1==4 /* Load function result, word */ yields r3 - pat lfr $1==INT64 /* Load function result, double-word */ + pat lfr $1==8 /* Load function result, double-word */ yields r4 r3 pat ret $1==0 /* Return from procedure */ @@ -2151,14 +2139,12 @@ PATTERNS pat csa /* Array-lookup switch */ with STACK kills ALL - gen - b {LABEL, ".csa"} + gen b {LABEL, ".csa"} pat csb /* Table-lookup switch */ with STACK kills ALL - gen - b {LABEL, ".csb"} + gen b {LABEL, ".csb"} /* EM specials */ @@ -2174,30 +2160,24 @@ PATTERNS ste "hol0" pat lni /* Increment line number */ - leaving - ine "hol0" + leaving ine "hol0" pat lim /* Load EM trap ignore mask */ - leaving - lde ".ignmask" + leaving lde ".ignmask" pat sim /* Store EM trap ignore mask */ - leaving - ste ".ignmask" + leaving ste ".ignmask" pat trp /* Raise EM trap */ with REG3 kills ALL - gen - bl {LABEL, ".trap"} + gen bl {LABEL, ".trap"} pat sig /* Set trap handler */ - leaving - ste ".trppc" + leaving ste ".trppc" pat rtt /* Return from trap */ - leaving - ret 0 + leaving ret 0 /* Our caller's local base, "lxl 0 dch", appears in * lang/cem/libcc.ansi/setjmp/setjmp.e, lang/m2/libm2/par_misc.e @@ -2210,8 +2190,7 @@ PATTERNS yields {IND_RC_W, %1, FP_OFFSET} pat lpb /* LB -> argument base */ - leaving - adp EM_BSIZE + leaving adp EM_BSIZE pat gto /* longjmp */ with STACK @@ -2253,15 +2232,14 @@ PATTERNS /* Single-precision floating-point */ - pat zrf $1==INT32 /* Push zero */ + pat zrf $1==4 /* Push zero */ leaving loe ".fs_00000000" pat adf $1==4 /* Add single */ with FSREG FSREG uses reusing %1, FSREG - gen - fadds %a, %2, %1 + gen fadds %a, %2, %1 yields %a pat adf stl $1==4 && inreg($2)==reg_float with FSREG FSREG @@ -2270,8 +2248,7 @@ PATTERNS pat sbf $1==4 /* Subtract single */ with FSREG FSREG uses reusing %1, FSREG - gen - fsubs %a, %2, %1 + gen fsubs %a, %2, %1 yields %a pat sbf stl $1==4 && inreg($2)==reg_float with FSREG FSREG @@ -2280,34 +2257,31 @@ PATTERNS pat mlf $1==4 /* Multiply single */ with FSREG FSREG uses reusing %1, FSREG - gen - fmuls %a, %2, %1 + gen fmuls %a, %2, %1 yields %a pat mlf stl $1==4 && inreg($2)==reg_float with FSREG FSREG gen fmuls {LOCAL, $2}, %2, %1 - pat dvf $1==INT32 /* Divide single */ + pat dvf $1==4 /* Divide single */ with FSREG FSREG uses reusing %1, FSREG - gen - fdivs %a, %2, %1 + gen fdivs %a, %2, %1 yields %a pat dvf stl $1==4 && inreg($2)==reg_float with FSREG FSREG gen fdivs {LOCAL, $2}, %2, %1 - pat ngf $1==INT32 /* Negate single */ + pat ngf $1==4 /* Negate single */ with FSREG uses reusing %1, FSREG - gen - fneg %a, %1 + gen fneg %a, %1 yields %a pat ngf stl $1==4 && inreg($2)==reg_float with FSREG gen fneg {LOCAL, $2}, %1 - pat cmf $1==INT32 /* Compare single */ + pat cmf $1==4 /* Compare single */ with FSREG FSREG uses REG={COND_FS, %2, %1} gen extlwi %a, %a, {C, 2}, {C, 0} @@ -2358,12 +2332,11 @@ PATTERNS pat cmf zlt $1==4 call cmf4zxx("blt") pat cmf zle $1==4 call cmf4zxx("ble") - pat loc loc cff $1==INT32 && $2==INT64 /* Convert single to double */ + pat loc loc cff $1==4 && $2==8 /* Convert single to double */ with FSREG yields %1.1 - /* Convert single to signed int */ - pat loc loc cfi $1==4 && $2==4 + pat loc loc cfi $1==4 && $2==4 /* Single to signed int */ leaving loc 4 loc 8 @@ -2372,8 +2345,7 @@ PATTERNS loc 4 cfi - /* Convert single to unsigned int */ - pat loc loc cfu $1==4 && $2==4 + pat loc loc cfu $1==4 && $2==4 /* Single to unsigned int */ leaving loc 4 loc 8 @@ -2382,8 +2354,7 @@ PATTERNS loc 4 cfu - /* Convert signed int to single */ - pat loc loc cif $1==4 && $2==4 + pat loc loc cif $1==4 && $2==4 /* Signed int to single */ leaving loc 4 loc 8 @@ -2392,8 +2363,7 @@ PATTERNS loc 4 cff - /* Convert unsigned int to single */ - pat loc loc cuf $1==4 && $2==4 + pat loc loc cuf $1==4 && $2==4 /* Unsigned int to single */ leaving loc 4 loc 8 @@ -2405,15 +2375,13 @@ PATTERNS /* Double-precision floating-point */ - pat zrf $1==INT64 /* Push zero */ - leaving - lde ".fd_00000000" + pat zrf $1==8 /* Push zero */ + leaving lde ".fd_00000000" pat adf $1==8 /* Add double */ with FREG FREG uses reusing %1, FREG - gen - fadd %a, %2, %1 + gen fadd %a, %2, %1 yields %a pat adf sdl $1==8 && inreg($2)==reg_float with FREG FREG @@ -2422,8 +2390,7 @@ PATTERNS pat sbf $1==8 /* Subtract double */ with FREG FREG uses reusing %1, FREG - gen - fsub %a, %2, %1 + gen fsub %a, %2, %1 yields %a pat sbf sdl $1==8 && inreg($2)==reg_float with FREG FREG @@ -2432,8 +2399,7 @@ PATTERNS pat mlf $1==8 /* Multiply double */ with FREG FREG uses reusing %1, FREG - gen - fmul %a, %2, %1 + gen fmul %a, %2, %1 yields %a pat mlf sdl $1==8 && inreg($2)==reg_float with FREG FREG @@ -2442,8 +2408,7 @@ PATTERNS pat dvf $1==8 /* Divide double */ with FREG FREG uses reusing %1, FREG - gen - fdiv %a, %2, %1 + gen fdiv %a, %2, %1 yields %a pat dvf sdl $1==8 && inreg($2)==reg_float with FREG FREG @@ -2452,14 +2417,13 @@ PATTERNS pat ngf $1==8 /* Negate double */ with FREG uses reusing %1, FREG - gen - fneg %a, %1 + gen fneg %a, %1 yields %a pat ngf sdl $1==8 && inreg($2)==reg_float with FREG gen fneg {DLOCAL, $2}, %1 - pat cmf $1==INT64 /* Compare double */ + pat cmf $1==8 /* Compare double */ with FREG FREG uses REG={COND_FD, %2, %1} gen extlwi %a, %a, {C, 2}, {C, 0} @@ -2521,8 +2485,7 @@ PATTERNS gen frsp %a, %1 yields %a - /* Convert double to signed int */ - pat loc loc cfi $1==8 && $2==4 + pat loc loc cfi $1==8 && $2==4 /* Double to signed int */ with FREG STACK uses reusing %1, FREG gen @@ -2530,26 +2493,18 @@ PATTERNS stfdu %a, {IND_RC_D, sp, 0-8} addi sp, sp, {C, 4} - /* Convert double to unsigned int */ - pat loc loc cfu $1==8 && $2==4 - leaving - cal ".cfu8" + pat loc loc cfu $1==8 && $2==4 /* Double to unsigned int */ + leaving cal ".cfu8" - /* Convert signed int to double */ - pat loc loc cif $1==4 && $2==8 - leaving - cal ".cif8" + pat loc loc cif $1==4 && $2==8 /* Signed int to double */ + leaving cal ".cif8" - /* Convert unsigned int to double */ - pat loc loc cuf $1==4 && $2==8 - leaving - cal ".cuf8" + pat loc loc cuf $1==4 && $2==8 /* Unsigned int to double */ + leaving cal ".cuf8" pat fef $1==8 /* Split fraction, exponent */ - leaving - cal ".fef8" + leaving cal ".fef8" /* Multiply two doubles, then split fraction, integer */ pat fif $1==8 - leaving - cal ".fif8" + leaving cal ".fif8" From 5f2a7b260fd8af26294062f69599db4d0d698293 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Fri, 22 Dec 2017 22:32:16 -0500 Subject: [PATCH 24/55] Optimize `mr. X, X` after some instructions. For example, when ncg emits slw r9,r8,r5 mr. r9,r9 then top simplifies the code to slw. r9,r8,r5 --- mach/powerpc/ncg/table | 11 ++++------- mach/powerpc/top/table | 27 +++++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index e566c51b9..e66ae855f 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -348,7 +348,7 @@ INSTRUCTIONS or GPR:wo, GPR:ro, GPR:ro. mr GPR:wo, GPR:ro. orX "or." GPR:wo:cc, GPR:ro, GPR:ro. - orX_readonly "or." GPR:ro:cc, GPR:ro, GPR:ro. + mrX_readonly "mr." GPR:ro:cc, GPR:ro. orc GPR:wo, GPR:ro, GPR:ro. ori GPR:wo, GPR:ro, CONST+LABEL_LO:ro. oris GPR:wo, GPR:ro, CONST:ro. @@ -707,15 +707,12 @@ MOVES TESTS - /* Given orX %1, %1, %1, ncgg says, "Instruction destroys %1, - * not allowed here". We use orX_readonly to trick ncgg. - * - * Using "or." and not "mr." because mach/powerpc/top/table - * was optimizing "or." and not "mr.". + /* Given "mrX %1, %1", ncgg would say, "Instruction destroys + * %1, not allowed here". We use mrX_readonly to trick ncgg. */ to test GPR gen - orX_readonly %1, %1, %1 + mrX_readonly %1, %1 STACKINGRULES diff --git a/mach/powerpc/top/table b/mach/powerpc/top/table index fdec03b2e..b3f5b3a31 100644 --- a/mach/powerpc/top/table +++ b/mach/powerpc/top/table @@ -1,5 +1,5 @@ -/* PowerPC desciptor table for ACK target optimizer */ +/* PowerPC table for ACK target optimizer */ MAXOP 3; LABEL_STARTER '.'; @@ -16,10 +16,33 @@ X, Y, Z { TRUE }; addi RNZ, RNZ, 0 -> ; addis RNZ, RNZ, 0 -> ; +or X, Y, Y -> mr X, Y ; +or. X, Y, Y -> mr. X, Y ; + mr X, X -> ; fmr X, X -> ; -or X, Y, Z : or. X, X, X -> or. X, Y, Z ; +add X, Y, Z : mr. X, X -> add. X, Y, Z ; +and X, Y, Z : mr. X, X -> and. X, Y, Z ; +andc X, Y, Z : mr. X, X -> andc. X, Y, Z ; +divw X, Y, Z : mr. X, X -> divw. X, Y, Z ; +divwu X, Y, Z : mr. X, X -> divwu. X, Y, Z ; +extsb X, Y, Z : mr. X, X -> extsb. X, Y, Z ; +extsh X, Y, Z : mr. X, X -> extsh. X, Y, Z ; +eqv X, Y, Z : mr. X, X -> eqv. X, Y, Z ; +mullw X, Y, Z : mr. X, X -> mullw. X, Y, Z ; +nand X, Y, Z : mr. X, X -> nand. X, Y, Z ; +nor X, Y, Z : mr. X, X -> nor. X, Y, Z ; +or X, Y, Z : mr. X, X -> or. X, Y, Z ; +orc X, Y, Z : mr. X, X -> orc. X, Y, Z ; +slw X, Y, Z : mr. X, X -> slw. X, Y, Z ; +slwi X, Y, Z : mr. X, X -> slwi. X, Y, Z ; +subf X, Y, Z : mr. X, X -> subf. X, Y, Z ; +sraw X, Y, Z : mr. X, X -> sraw. X, Y, Z ; +srawi X, Y, Z : mr. X, X -> srawi. X, Y, Z ; +srw X, Y, Z : mr. X, X -> srw. X, Y, Z ; +srwi X, Y, Z : mr. X, X -> srwi. X, Y, Z ; +xor X, Y, Z : mr. X, X -> xor. X, Y, Z ; b X : labdef X -> labdef X ; From 26de4c1ab18df825607d7882924f57c673af46cf Mon Sep 17 00:00:00 2001 From: George Koehler Date: Sun, 24 Dec 2017 22:37:52 -0500 Subject: [PATCH 25/55] Add test for EM _rck_. Fix traps in PowerPC ncg. The new test rck_e.e segfaults on PowerPC unless I make some changes. The inline code for _rck_ was wrong because it didn't allow the trap handler to return. _sig_ forgot to push the old trap handler. Move plat/linuxppc/libsys/trap.s to mach/powerpc/libem/trp.s and rewrite it with simplified/extended mnemonics. Remove .trap alias for .trp procedure. Add a missing `mtspr lr, r0` so we can return from the trap handler. Call write() and _exit() so trp.s works with both linuxppc and osxppc. Before, Mac OS X was wrongly using the trap.s for Linux. In powerpc/libem, simplify .aar4; teach .csa and .csb to raise the trap if the default target is zero. C programs don't need these changes. You may relink your C programs with the changed .csa and .csb, but C code doesn't raise the trap. Modula-2 code can raise traps, so you may want to relink your Modula-2 programs with the changed libem, but you might keep your old .o files from Modula-2. You may need to recompile your Pascal programs (delete old .o files from Pascal) because the Pascal compiler might use _rck_. --- mach/powerpc/libem/aar4.s | 12 +-- mach/powerpc/libem/build.lua | 3 +- mach/powerpc/libem/csa.s | 23 ++-- mach/powerpc/libem/csb.s | 25 ++--- mach/powerpc/libem/rck.s | 4 + mach/powerpc/libem/trp.s | 56 ++++++++++ mach/powerpc/ncg/table | 23 ++-- plat/linuxppc/libsys/build.lua | 1 - plat/linuxppc/libsys/trap.s | 112 -------------------- plat/osxppc/libsys/build.lua | 1 - tests/plat/build.lua | 1 + tests/plat/rck_e.e | 186 +++++++++++++++++++++++++++++++++ 12 files changed, 285 insertions(+), 162 deletions(-) create mode 100644 mach/powerpc/libem/trp.s delete mode 100644 plat/linuxppc/libsys/trap.s create mode 100644 tests/plat/rck_e.e diff --git a/mach/powerpc/libem/aar4.s b/mach/powerpc/libem/aar4.s index fc8620d02..08390b081 100644 --- a/mach/powerpc/libem/aar4.s +++ b/mach/powerpc/libem/aar4.s @@ -8,21 +8,17 @@ .define .aar4 .aar4: - lis r0, hi16[.trap_earray] - ori r0, r0, lo16[.trap_earray] - mtspr ctr, r0 ! load CTR with trap address - lwz r4, 0(sp) ! r4 = address of descriptor lwz r5, 4(sp) ! r5 = index lwz r6, 8(sp) ! r6 = address of array lwz r0, 0(r4) subf. r5, r0, r5 ! subtract lower bound from index - bltctr ! check lower bound + blt .trap_earray ! check lower bound lwz r0, 4(r4) cmplw r5, r0 - bgtctr ! check upper bound + bgt .trap_earray ! check upper bound lwz r3, 8(r4) ! r3 = size of element mullw r5, r5, r3 ! scale index by size @@ -30,3 +26,7 @@ stw r6, 8(sp) ! push address of element addi sp, sp, 8 blr + +.trap_earray: + li r3, 0 ! EARRAY = 0 in h/em_abs.h + b .trp diff --git a/mach/powerpc/libem/build.lua b/mach/powerpc/libem/build.lua index cb5efd281..ac84e3b0f 100644 --- a/mach/powerpc/libem/build.lua +++ b/mach/powerpc/libem/build.lua @@ -6,7 +6,7 @@ for _, plat in ipairs(vars.plats) do acklibrary { name = "lib_"..plat, srcs = { - "./*.s", -- exg.s + "./*.s", -- trp.s }, vars = { plat = plat }, deps = { @@ -15,4 +15,3 @@ for _, plat in ipairs(vars.plats) do } } end - diff --git a/mach/powerpc/libem/csa.s b/mach/powerpc/libem/csa.s index 3898241c4..86d792554 100644 --- a/mach/powerpc/libem/csa.s +++ b/mach/powerpc/libem/csa.s @@ -13,22 +13,21 @@ lwz r4, 4(sp) addi sp, sp, 8 - lwz r5, 0(r3) ! load default - mtspr ctr, r5 - - lwz r5, 4(r3) ! fetch lower bound - subf. r4, r5, r4 ! adjust value - bltctr ! jump to default if out of range + lwz r5, 0(r3) ! r5 = default target - lwz r5, 8(r3) ! fetch range - cmplw r4, r5 - bgtctr ! jump to default if out of range + lwz r6, 4(r3) ! fetch lower bound + subf. r4, r6, r4 ! adjust value + blt 1f ! jump to default if out of range + + lwz r6, 8(r3) ! fetch range + cmplw r4, r6 + bgt 1f ! jump to default if out of range addi r3, r3, 12 ! skip header slwi r4, r4, 2 ! scale value (<<2) - lwzx r5, r3, r4 ! load target - mtspr ctr, r5 + lwzx r5, r3, r4 ! r5 = new target - or. r5, r5, r5 ! test it +1: mtspr ctr, r5 + mr. r5, r5 ! test it bnectr ! jump to target if non-zero b .trap_ecase ! otherwise trap diff --git a/mach/powerpc/libem/csb.s b/mach/powerpc/libem/csb.s index 571bfc210..92c6d096d 100644 --- a/mach/powerpc/libem/csb.s +++ b/mach/powerpc/libem/csb.s @@ -13,23 +13,20 @@ lwz r4, 4(sp) addi sp, sp, 8 - lwz r5, 0(r3) ! load default - mtspr ctr, r5 + lwz r5, 0(r3) ! r5 = default target lwz r6, 4(r3) ! fetch count - -1: - or. r6, r6, r6 ! test count - beqctr ! exit if zero - addi r6, r6, -1 ! otherwise decrement - - lwzu r7, 8(r3) ! fetch target index, increment pointer + mr. r6, r6 ! skip loop if count is zero + beq 3f ! (needed by Modula-2 "CASE i OF END") + mtspr ctr, r6 +1: lwzu r7, 8(r3) ! fetch target index, increment pointer cmpw r4, r7 ! compare with value - bne 1b ! if not equal, go again + beq 2f + bdnz 1b ! if not equal, go again + b 3f - lwz r7, 4(r3) ! fetch target address - mtspr ctr, r7 - - or. r7, r7, r7 ! test it +2: lwz r5, 4(r3) ! r5 = new target +3: mtspr ctr, r5 + mr. r5, r5 ! test target bnectr ! jump to target if non-zero b .trap_ecase ! otherwise trap diff --git a/mach/powerpc/libem/rck.s b/mach/powerpc/libem/rck.s index 9008be610..f1cf7f848 100644 --- a/mach/powerpc/libem/rck.s +++ b/mach/powerpc/libem/rck.s @@ -18,3 +18,7 @@ bgt .trap_erange blr + +.trap_erange: + li r3, 1 ! ERANGE = 1 in h/em_abs.h + b .trp diff --git a/mach/powerpc/libem/trp.s b/mach/powerpc/libem/trp.s new file mode 100644 index 000000000..b07afb929 --- /dev/null +++ b/mach/powerpc/libem/trp.s @@ -0,0 +1,56 @@ +.sect .text + +.define .trap_ecase +.trap_ecase: + li r3, 20 ! ECASE = 20 in h/em_abs.h + ! FALLTHROUGH to .trp + +! Raises an EM trap. +! Expects r3 = trap number. + +.define .trp +.trp: + cmplwi r3, 15 ! traps > 15 can't be ignored + bgt 1f + + lis r4, ha16[.ignmask] + lwz r4, lo16[.ignmask](r4) ! load ignore mask + srw r4, r4, r3 + andi. r4, r4, 1 + bnelr ! return if ignoring trap + +1: lis r4, ha16[.trppc] + lwz r5, lo16[.trppc](r4) ! r5 = user trap routine + mr. r5, r5 + beq 2f ! if no user trap routine, bail out + + mtspr ctr, r5 + mfspr r6, lr + li r0, 0 + stwu r3, -8(sp) ! push trap number + stw r0, lo16[.trppc](r4) ! reset trap routine + stw r6, 4(sp) ! save old lr + bctrl ! call trap routine + + lwz r0, 4(sp) + mtspr lr, r0 + addi sp, sp, 8 ! retract over stack usage + blr + +2: ! No trap handler. Write error message, exit. + li r3, 2 + stwu r3, -12(sp) + lis r4, ha16[message] + addi r4, r4, lo16[message] + li r5, 6 + stw r4, 4(sp) + stw r5, 8(sp) + bl _write ! write(2, message, 6) + + li r3, 1 + stw r3, 0(sp) + bl __exit ! _exit(1) + +.sect .rom +message: + .ascii "TRAP!\n" diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index e66ae855f..10ffadb16 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -2168,10 +2168,13 @@ PATTERNS pat trp /* Raise EM trap */ with REG3 kills ALL - gen bl {LABEL, ".trap"} + gen bl {LABEL, ".trp"} - pat sig /* Set trap handler */ - leaving ste ".trppc" + pat sig /* Set trap handler, yield old */ + leaving + loe ".trppc" + exg 4 + ste ".trppc" pat rtt /* Return from trap */ leaving ret 0 @@ -2216,22 +2219,14 @@ PATTERNS with REG gen move %1, sp - pat lae rck $2==4 /* Range check */ - with REG - kills ALL - gen - cmpwi %1, {C, rom($1, 1)} - blt {LABEL, ".trap_erange"} - cmpwi %1, {C, rom($1, 2)} - bgt {LABEL, ".trap_erange"} - yields %1 + pat rck $1==4 /* Range check */ + leaving cal ".rck" /* Single-precision floating-point */ pat zrf $1==4 /* Push zero */ - leaving - loe ".fs_00000000" + leaving loe ".fs_00000000" pat adf $1==4 /* Add single */ with FSREG FSREG diff --git a/plat/linuxppc/libsys/build.lua b/plat/linuxppc/libsys/build.lua index f7b16b378..696c62d42 100644 --- a/plat/linuxppc/libsys/build.lua +++ b/plat/linuxppc/libsys/build.lua @@ -4,7 +4,6 @@ acklibrary { "./_syscall.s", "./sigaction.s", "./signal.c", - "./trap.s", "plat/linux/libsys/_exit.c", "plat/linux/libsys/_hol0.s", "plat/linux/libsys/close.c", diff --git a/plat/linuxppc/libsys/trap.s b/plat/linuxppc/libsys/trap.s deleted file mode 100644 index 93c5189a4..000000000 --- a/plat/linuxppc/libsys/trap.s +++ /dev/null @@ -1,112 +0,0 @@ -# -! $Source: /cvsroot/tack/Ack/plat/linux386/libsys/_syscall.s,v $ -! $State: Exp $ -! $Revision: 1.1 $ - -! Declare segments (the order is important). - -.sect .text -.sect .rom -.sect .data -.sect .bss - -.sect .text - -#define IFFALSE 4 -#define IFTRUE 12 -#define ALWAYS 20 - -#define LT 0 -#define GT 1 -#define EQ 2 -#define OV 3 - -EARRAY = 0 -ERANGE = 1 -ESET = 2 -EIOVFL = 3 -EFOVFL = 4 -EFUNFL = 5 -EIDIVZ = 6 -EFDIVZ = 7 -EIUND = 8 -EFUND = 9 -ECONV = 10 -ESTACK = 16 -EHEAP = 17 -EILLINS = 18 -EODDZ = 19 -ECASE = 20 -EMEMFLT = 21 -EBADPTR = 22 -EBADPC = 23 -EBADLAE = 24 -EBADMON = 25 -EBADLIN = 26 -EBADGTO = 27 -EUNIMPL = 63 ! unimplemented em-instruction called - -! EM trap handling. - -.define .trap_ecase -.trap_ecase: - addi r3, r0, ECASE - b .trap - -.define .trap_earray -.trap_earray: - addi r3, r0, EARRAY - b .trap - -.define .trap_erange -.trap_erange: - addi r3, r0, ERANGE - b .trap - -.define .trp -.define .trap -.trp: -.trap: - cmpi cr0, 0, r3, 15 ! traps >15 can't be ignored - bc IFTRUE, LT, 1f - - addi r4, r0, 1 - rlwnm r4, r4, r3, 0, 31 ! calculate trap bit - li32 r5, .ignmask - lwz r5, 0(r5) ! load ignore mask - and. r4, r4, r5 ! compare - bclr IFFALSE, EQ, 0 ! return if non-zero - -1: - li32 r4, .trppc - lwz r5, 0(r4) ! load user trap routine - or. r5, r5, r5 ! test - bc IFTRUE, EQ, fatal ! if no user trap routine, bail out - - addi r0, r0, 0 - stw r0, 0(r4) ! reset trap routine - - mfspr r0, lr - stwu r0, -4(sp) ! save old lr - - stwu r3, -4(sp) - mtspr ctr, r5 - bcctrl ALWAYS, 0, 0 ! call trap routine - - lwz r0, 4(sp) ! load old lr again - addi sp, sp, 8 ! retract over stack usage - bclr ALWAYS, 0, 0 ! return - -fatal: - addi r3, r0, 1 - li32 r4, message - addi r5, r0, 6 - addi r0, r0, 4 ! write() - sc 0 - - addi r0, r0, 1 ! exit() - sc 0 - -.sect .rom -message: - .ascii "TRAP!\n" diff --git a/plat/osxppc/libsys/build.lua b/plat/osxppc/libsys/build.lua index 072730b7a..49fc0c934 100644 --- a/plat/osxppc/libsys/build.lua +++ b/plat/osxppc/libsys/build.lua @@ -19,7 +19,6 @@ acklibrary { "./sigaction.s", "./stat.s", "./write.s", - "plat/linuxppc/libsys/trap.s", "plat/osx/libsys/brk.c", "plat/osx/libsys/creat.c", "plat/osx/libsys/isatty.c", diff --git a/tests/plat/build.lua b/tests/plat/build.lua index 666af7d95..fdac9bae3 100644 --- a/tests/plat/build.lua +++ b/tests/plat/build.lua @@ -13,6 +13,7 @@ definerule("plat_testsuite", "tests/plat/dup_e.e", "tests/plat/exg_e.e", "tests/plat/inn_e.e", + "tests/plat/rck_e.e", "tests/plat/rotate_e.e", "tests/plat/*.p", "tests/plat/b/*.b", diff --git a/tests/plat/rck_e.e b/tests/plat/rck_e.e new file mode 100644 index 000000000..cd5c581df --- /dev/null +++ b/tests/plat/rck_e.e @@ -0,0 +1,186 @@ +# + mes 2, EM_WSIZE, EM_PSIZE + +/* + * Uses _rck_ for range checks. Catches the EM trap if a value is out + * of range, and continues with the next instruction after _rck_. + * + * Some back ends, like i80, ignore _rck_, so this test fails. + */ + +testnr + con 1 ; test number +caught + con 0 ; number of caught traps + + inp $next + inp $catch + inp $never + exp $_m_a_i_n + pro $_m_a_i_n,0 + + lim ; load ignore mask + loc 2 + and EM_WSIZE ; check bit 1 << ERANGE + zeq *1 ; fail if ignoring ERANGE +.1 + rom 1I4 + lae .1 + loi 4 + cal $fail + asp 4 +1 + + cal $next ; increment testnr, catch next trap + loc 10125 +.2 + rom 4283, 13644 + lae .2 + rck EM_WSIZE ; testnr 2 in range + asp EM_WSIZE + + cal $next + loc 4282 + lae .2 + rck EM_WSIZE ; testnr 3 out of range + asp EM_WSIZE + + cal $next + loc 4283 + lae .2 + rck EM_WSIZE ; testnr 4 in range + asp EM_WSIZE + + cal $next + loc 13644 + lae .2 + rck EM_WSIZE ; testnr 5 in range + asp EM_WSIZE + + cal $next + loc 13655 + lae .2 + rck EM_WSIZE ; testnr 6 out of range + asp EM_WSIZE + + cal $next + loc -13015 +.7 + rom -31344, -1898 + lae .7 + rck EM_WSIZE ; testnr 7 in range + asp EM_WSIZE + + cal $next + loc 8580 +.8 + rom -26315, 4588 + lae .8 + rck EM_WSIZE ; testnr 8 out of range + asp EM_WSIZE + + ; The last test raised a trap, so now there is no trap handler. + lpi $never + sig ; push old trap handler + loc 0 + loc EM_WSIZE + loc EM_PSIZE + cuu ; push NULL pointer + cmp + zeq *17 ; fail unless old handler is NULL +.17 + rom 17I4 + lae .17 + loi 4 + cal $fail + asp 4 +17 + ; Change the trap handler from $never to $catch. + lpi $catch + sig + lpi $never + cmp + zeq *18 +.18 + rom 18I4 + lae .18 + loi 4 + cal $fail + asp 4 +18 + ; Begin ignoring range traps. + loc 2 ; 1 << ERANGE + sim + loc 18 + ste testnr + loc 8580 + lae .8 + rck EM_WSIZE ; testnr 18 out of range but ignored + + ; Fail if we caught the wrong number of traps. + loe caught + loc 3 + beq *20 +.20 + rom 20I4 + lae .20 + loi 4 + cal $fail + asp 4 +20 + cal $finished + end + + pro $next,0 + ine testnr ; next test + lpi $catch + sig ; catch next EM trap (only one trap) + asp EM_PSIZE + ret 0 + end + + pro $catch,0 + ine caught ; count this trap + + lol 0 ; load trap number + loc 1 + beq *1 ; fail if trap != ERANGE +.101 + rom 257I4 + lae .101 + loi 4 + cal $fail + ; Wrong type of trap. _rtt_ might not work, so exit now. + cal $finished +1 + ; Fail if the wrong test raised this trap. + loe testnr + loc 3 + beq *2 + loe testnr + loc 6 + beq *2 + loe testnr + loc 8 + beq *2 + loc 256 + loe testnr + adi EM_WSIZE ; 0x100 + testnr + loc EM_WSIZE + loc 4 + cuu + cal $fail + asp 4 +2 + rtt ; return from trap handler + end + + pro $never,0 +.200 + rom 200I4 + lae .200 + loi 4 + cal $fail + asp 4 + rtt + end From d6938108a6fda7cb9ef47de7b20587769503add1 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Wed, 3 Jan 2018 14:51:14 -0500 Subject: [PATCH 26/55] Add tests for C and Modula-2 Semaphores. Fix PowerPC ncg so setjmp() returns the correct value. I got unlucky when ncg picked r3 for "uses REG"; this destroyed the return value in r3 and caused the new test to fail. --- mach/powerpc/ncg/table | 23 +++-- tests/plat/build.lua | 1 + tests/plat/m2/SemaTest_mod.mod | 157 +++++++++++++++++++++++++++++++++ tests/plat/setjmp_c.c | 58 ++++++++++++ 4 files changed, 230 insertions(+), 9 deletions(-) create mode 100644 tests/plat/m2/SemaTest_mod.mod create mode 100644 tests/plat/setjmp_c.c diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 10ffadb16..a35ace230 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -85,9 +85,8 @@ REGISTERS lr, ctr : SPR. cr0 : CR. - /* The stacking rules and the splitting coercions can't - * allocate registers. We use r12 in the splitting coercions, - * and these scratch registers in the stacking rules. + /* The stacking rules can't allocate registers. We use these + * scratch registers to stack tokens. */ #define RSCRATCH r0 #define FSCRATCH f0 @@ -2192,15 +2191,21 @@ PATTERNS pat lpb /* LB -> argument base */ leaving adp EM_BSIZE + /* "gto" must preserve the function result for "lfr", so + * longjmp() can pass the return value to setjmp(). + * - See lang/cem/libcc.ansi/setjmp/setjmp.e + * + * Must preserve r3 and r4, so no "uses REG". + * PowerPC can't add r0 + constant. Use r12. + */ pat gto /* longjmp */ with STACK - uses REG gen - move {LABEL, $1}, %a - move {IND_RC_W, %a, 8}, fp - move {IND_RC_W, %a, 4}, sp - move {IND_RC_W, %a, 0}, %a - mtspr ctr, %a + move {LABEL, $1}, r12 + move {IND_RC_W, r12, 8}, fp + move {IND_RC_W, r12, 4}, sp + move {IND_RC_W, r12, 0}, r12 + mtspr ctr, r12 bctr. pat lor $1==0 /* Load local base */ diff --git a/tests/plat/build.lua b/tests/plat/build.lua index fdac9bae3..42ca441d0 100644 --- a/tests/plat/build.lua +++ b/tests/plat/build.lua @@ -22,6 +22,7 @@ definerule("plat_testsuite", "tests/plat/m2/ConvTest_mod.mod", "tests/plat/m2/NestProc_mod.mod", "tests/plat/m2/OpenArray_mod.mod", + "tests/plat/m2/SemaTest_mod.mod", "tests/plat/m2/Set100_mod.mod", "tests/plat/m2/StringTest_mod.mod" ) diff --git a/tests/plat/m2/SemaTest_mod.mod b/tests/plat/m2/SemaTest_mod.mod new file mode 100644 index 000000000..9ae395662 --- /dev/null +++ b/tests/plat/m2/SemaTest_mod.mod @@ -0,0 +1,157 @@ +(* + * Generates some integer sequences. Each generator is a process that + * yields integers to the main process. ACK switches processes by + * saving and restoring the stack. It uses _lor_ and _str_ to save + * and restore the local base and frame pointer. + *) +MODULE SemaTest; +FROM Semaphores IMPORT Sema, NewSema, Down, Up, StartProcess; +FROM Storage IMPORT ALLOCATE; +FROM Test IMPORT fail, finished; + +TYPE + Generator = POINTER TO GeneratorRecord; + GeneratorRecord = RECORD + resume: Sema; (* up when resuming generator *) + yield: Sema; (* up when yielding value *) + value: INTEGER; + END; +VAR + curgen: Generator; (* current generator *) + startLock: Sema; (* down when booting generator *) + startProc: PROC; + startSelf: Generator; + +PROCEDURE BootGenerator; + VAR pr: PROC; self: Generator; +BEGIN + pr := startProc; + self := startSelf; + Up(startLock); + Down(self^.resume); (* wait for first Resume *) + pr(); +END BootGenerator; + +PROCEDURE StartGenerator(gen: Generator; pr: PROC); +BEGIN + gen^.resume := NewSema(0); + gen^.yield := NewSema(0); + Down(startLock); + startProc := pr; + startSelf := gen; + StartProcess(BootGenerator, 8192); +END StartGenerator; + +PROCEDURE Resume(gen: Generator): INTEGER; + VAR self: Generator; +BEGIN + self := curgen; + curgen := gen; + Up(gen^.resume); + Down(gen^.yield); (* wait for Yield *) + curgen := self; + RETURN gen^.value +END Resume; + +PROCEDURE Yield(i: INTEGER); + VAR self: Generator; +BEGIN + self := curgen; + self^.value := i; + Up(self^.yield); (* curgen becomes invalid *) + Down(self^.resume); (* wait for Resume *) +END Yield; + +PROCEDURE YieldHalfOf(i: INTEGER); +BEGIN + Yield(i DIV 2); +END YieldHalfOf; + +PROCEDURE Triangular; + (* Yields the triangular numbers, http://oeis.org/A000217 *) + VAR n: INTEGER; +BEGIN + n := 0; + LOOP + YieldHalfOf(n * (n + 1)); + INC(n); + END; +END Triangular; + +PROCEDURE Pentagonal; + (* Yields the pentagonal numbers, http://oeis.org/A000326 *) + VAR n: INTEGER; +BEGIN + n := 0; + LOOP + YieldHalfOf(n * (3 * n - 1)); + INC(n); + END; +END Pentagonal; + +PROCEDURE Odious; + (* Yields the odius numbers, http://oeis.org/A000069 *) + VAR b, i, n: INTEGER; +BEGIN + n := 1; + LOOP + (* b := count bits in n *) + b := 0; + i := n; + WHILE i # 0 DO + INC(b, i MOD 2); + i := i DIV 2; + END; + + IF (b MOD 2) = 1 THEN + Yield(n); + END; + INC(n); + END; +END Odious; + +TYPE + Triple = ARRAY[1..3] OF INTEGER; +PROCEDURE T(i1, i2, i3: INTEGER): Triple; + VAR t: Triple; +BEGIN + t[1] := i1; t[2] := i2; t[3] := i3; RETURN t +END T; + +CONST + two28 = 268435456D; (* 0x1000_0000 *) +VAR + a: ARRAY [0..9] OF Triple; + tri, pen, odi: Generator; + i, g1, g2, g3: INTEGER; +BEGIN + startLock := NewSema(1); + + ALLOCATE(tri, SIZE(GeneratorRecord)); + ALLOCATE(pen, SIZE(GeneratorRecord)); + ALLOCATE(odi, SIZE(GeneratorRecord)); + StartGenerator(tri, Triangular); + StartGenerator(pen, Pentagonal); + StartGenerator(odi, Odious); + + a[0] := T( 0, 0, 1); + a[1] := T( 1, 1, 2); + a[2] := T( 3, 5, 4); + a[3] := T( 6, 12, 7); + a[4] := T(10, 22, 8); + a[5] := T(15, 35, 11); + a[6] := T(21, 51, 13); + a[7] := T(28, 70, 14); + a[8] := T(36, 92, 16); + a[9] := T(45, 117, 19); + + FOR i := 0 TO INTEGER(9) DO + g1 := Resume(tri); + g2 := Resume(pen); + g3 := Resume(odi); + IF g1 # a[i][1] THEN fail(1D * two28 + LONG(a[i][1])) END; + IF g2 # a[i][2] THEN fail(2D * two28 + LONG(a[i][2])) END; + IF g3 # a[i][3] THEN fail(3D * two28 + LONG(a[i][3])) END; + END; + finished; +END SemaTest. diff --git a/tests/plat/setjmp_c.c b/tests/plat/setjmp_c.c new file mode 100644 index 000000000..2a514a03f --- /dev/null +++ b/tests/plat/setjmp_c.c @@ -0,0 +1,58 @@ +#include +#include "test.h" + +/* + * Sets i = 2 * i for each i in nums, until i == 0, but stops if + * 2 * i >= 1000. + * + * Uses setjmp() and longjmp() in libc. For ACK's libc, the back end + * must provides EM's _gto_, and _gto_ must preserve the function + * return area. + */ +int nums1[] = { 79, 245, 164, 403, 0}; +const int expect1[] = {158, 490, 328, 806, 0}; +int nums2[] = {20, 221, 411, 643, 48, 272, 448, 0}; +const int expect2[] = {40, 442, 822, 643, 48, 272, 448, 0}; +int nums3[] = {371, 265, 500, 124, 117, 0}; +const int expect3[] = {742, 530, 500, 124, 117, 0}; +int docount = 0; + +int twice(int i, jmp_buf esc) { + if (i >= 500) + longjmp(esc, i); + return 2 * i; +} + +void donums(int *nums, jmp_buf esc) { + int *p; + + docount++; + for (p = nums; *p != 0; p++) { + *p = twice(*p, esc); + } +} + +int cknums(int *nums, const int *expect) { + jmp_buf env; + int ret; + + ret = setjmp(env); + if (ret == 0) + donums(nums, env); + for (;;) { + ASSERT(*nums == *expect); + if (*expect == 0) + break; + nums++; + expect++; + } + return ret; +} + +int main(void) { + ASSERT(cknums(nums1, expect1) == 0); + ASSERT(cknums(nums2, expect2) == 643); + ASSERT(cknums(nums3, expect3) == 500); + ASSERT(docount == 3); + finished(); +} From 720af48d8ad469934dbc19d92b77f83f61aeed82 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Thu, 4 Jan 2018 20:40:35 -0500 Subject: [PATCH 27/55] Fix lim. Improve lxl, lxa, lor, str, procs with no locals. _lim_ must use _loe_ (load word external), not _lde_ (load double-word external). The new patterns for _lxl_, _lxa_, _lor_, _str_ emit shorter code in some cases. The change from GPR_EXPR to REG_EXPR allows moving LXFRAME to a register variable. Add more "reusing" clauses. We have enough registers that ncg almost never reuses a register, but sometimes it can reuse r3. In mach.c, emit one fewer instruction in procedures with no locals. --- mach/powerpc/ncg/mach.c | 15 +++- mach/powerpc/ncg/table | 183 +++++++++++++++++++++++++--------------- 2 files changed, 128 insertions(+), 70 deletions(-) diff --git a/mach/powerpc/ncg/mach.c b/mach/powerpc/ncg/mach.c index b67903b0a..a31879de9 100644 --- a/mach/powerpc/ncg/mach.c +++ b/mach/powerpc/ncg/mach.c @@ -55,10 +55,17 @@ static void emit_prolog(void) { fprintf(codefile, "mfspr r0, lr\n"); - fprintf(codefile, "addi sp, sp, %ld\n", -framesize - 8); - fprintf(codefile, "stw fp, %ld(sp)\n", framesize); - fprintf(codefile, "stw r0, %ld(sp)\n", framesize + 4); - fprintf(codefile, "addi fp, sp, %ld\n", framesize); + if (framesize) { + fprintf(codefile, "addi sp, sp, %ld\n", -framesize - 8); + fprintf(codefile, "stw fp, %ld(sp)\n", framesize); + fprintf(codefile, "stw r0, %ld(sp)\n", framesize + 4); + fprintf(codefile, "addi fp, sp, %ld\n", framesize); + } else { + /* optimize for framesize == 0 */ + fprintf(codefile, "stwu fp, -8(sp)\n"); + fprintf(codefile, "stw r0, 4(sp)\n"); + fprintf(codefile, "mr fp, sp\n"); + } } void diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index a35ace230..367942408 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -106,8 +106,8 @@ TOKENS /* Allows us to use regvar() to refer to registers */ - GPR_EXPR = { GPR reg; } 4 reg. - FPR_EXPR = { FPR reg; } 8 reg. + REG_EXPR = { REG reg; } 4 reg. + FREG_EXPR = { FREG reg; } 8 reg. FSREG_EXPR = { FSREG reg; } 4 reg. /* Constants on the stack */ @@ -689,15 +689,16 @@ MOVES extrwi %2, %1.reg, {C, 1}, {C, 1} xori %2, %2, {C, 1} -/* GPR_EXPR exists solely to allow us to use regvar() (which can only +/* REG_EXPR exists solely to allow us to use regvar() (which can only be used in an expression) as a register constant. We can then use - our moves to GPR to set register variables. We define no moves to - LOCAL, so we avoid confusion between GPR and FSREG in LOCAL. */ + our moves to GPR or REG to set register variables. This is easier + than defining moves to LOCAL, and avoids confusion between GPR and + FSREG in LOCAL. */ - from INT_W to GPR_EXPR + from INT_W + LXFRAME to REG_EXPR gen move %1, %2.reg - from FLOAT_D to FPR_EXPR + from FLOAT_D to FREG_EXPR gen move %1, %2.reg from FLOAT_W to FSREG_EXPR @@ -788,18 +789,21 @@ COERCIONS /* "uses REG=%1" may find and reuse a register containing the * same token. For contrast, "uses REG gen move %1, %a" would * pick a different register before doing the move. - */ + * + * "reusing %1" helps when coercing an INT_W token like + * {SUM_RC, r3, 0-4} to REG3, by not stacking the token. + */ from INT_W - uses REG=%1 + uses reusing %1, REG=%1 yields %a from FLOAT_D - uses FREG=%1 + uses reusing %1, FREG=%1 yields %a from FLOAT_W - uses FSREG=%1 + uses reusing %1, FSREG=%1 yields %a /* Splitting coercions can't allocate registers. @@ -1001,7 +1005,7 @@ PATTERNS with exact INT_W /* ncg fails to infer that regvar($1) is dead! */ kills regvar($1) - gen move %1, {GPR_EXPR, regvar($1)} + gen move %1, {REG_EXPR, regvar($1)} with STACK gen lwz {LOCAL, $1}, {IND_RC_W, sp, 0} @@ -1026,7 +1030,7 @@ PATTERNS pat sdl inreg($1)==reg_float /* Store double-word to local */ with exact FLOAT_D kills regvar_d($1, reg_float) - gen move %1, {FPR_EXPR, regvar_d($1, reg_float)} + gen move %1, {FREG_EXPR, regvar_d($1, reg_float)} with STACK gen lfd {DLOCAL, $1}, {IND_RC_D, sp, 0} @@ -1088,7 +1092,7 @@ PATTERNS leaving lxl $1 stf $2+EM_BSIZE pat lxa sdf nicelx($1) leaving lxl $1 stf $2+EM_BSIZE - pat lxa $1==0 || nicelx($1) + pat lxa nicelx($1) leaving lxl $1 adp EM_BSIZE /* Load locals in statically enclosing procedures */ @@ -1146,11 +1150,21 @@ PATTERNS pat lxl nicelx($1) uses REG={LXFRAME, $1} yields %a /* Can't yield LXFRAME. */ + pat lxl stl nicelx($1) && inreg($2)==reg_any + kills regvar($2) + gen move {LXFRAME, $1}, {REG_EXPR, regvar($2)} - pat lxl $1==0 /* Our local base */ + pat lxl cal $1==0 /* Pass our local base to procedure */ with STACK gen stwu fp, {IND_RC_W, sp, 0-4} - /* Can't yield fp. */ + leaving cal $2 + + pat lxl $1==0 /* Our local base */ + uses REG=fp + yields %a /* Can't yield fp. */ + + pat lxa $1==0 /* Our argument base */ + yields {SUM_RC, fp, EM_BSIZE} /* Global variables */ @@ -1421,7 +1435,7 @@ PATTERNS pat sbi $1==4 /* Subtract word (second - top) */ with REG REG - uses reusing %2, REG + uses reusing %1, reusing %2, REG yields {SUB_RR, %2, %1} with CONST2_WHEN_NEG REG yields {SUM_RC, %2, 0-%1.val} @@ -1585,7 +1599,7 @@ PATTERNS gen slwi %a, %2, {C, %1.val & 0x1F} yields %a with REG REG - uses reusing %2, REG + uses reusing %1, reusing %2, REG gen slw %a, %2, %1 yields %a pat sli stl $1==4 && inreg($2)==reg_any @@ -1600,7 +1614,7 @@ PATTERNS gen srawi %a, %2, {C, %1.val & 0x1F} yields %a with REG REG - uses reusing %2, REG + uses reusing %1, reusing %2, REG gen sraw %a, %2, %1 yields %a pat sri stl $1==4 && inreg($2)==reg_any @@ -1615,7 +1629,7 @@ PATTERNS gen srwi %a, %2, {C, %1.val & 0x1F} yields %a with REG REG - uses reusing %2, REG + uses reusing %1, reusing %2, REG gen srw %a, %2, %1 yields %a pat sru stl $1==4 && inreg($2)==reg_any @@ -1630,7 +1644,7 @@ PATTERNS gen rotlwi %a, %2, {C, %1.val & 0x1F} yields %a with REG REG - uses reusing %2, REG + uses reusing %1, reusing %2, REG gen rotlw %a, %2, %1 yields %a pat rol stl $1==4 && inreg($2)==reg_any @@ -1776,10 +1790,10 @@ PATTERNS uses reusing %1, REG={COND_RC, %1, %2.val} yields {XEQ, %a} with CONST2 REG - uses reusing %1, REG={COND_RC, %2, %1.val} + uses reusing %2, REG={COND_RC, %2, %1.val} yields {XEQ, %a} with REG REG - uses reusing %1, REG={COND_RR, %2, %1} + uses reusing %1, reusing %2, REG={COND_RR, %2, %1} yields {XEQ, %a} pat cmi tne $1==4 /* Signed second != top */ @@ -1787,10 +1801,10 @@ PATTERNS uses reusing %1, REG={COND_RC, %1, %2.val} yields {XNE, %a} with CONST2 REG - uses reusing %1, REG={COND_RC, %2, %1.val} + uses reusing %2, REG={COND_RC, %2, %1.val} yields {XNE, %a} with REG REG - uses reusing %1, REG={COND_RR, %2, %1} + uses reusing %1, reusing %2, REG={COND_RR, %2, %1} yields {XNE, %a} pat cmi tgt $1==4 /* Signed second > top */ @@ -1798,10 +1812,10 @@ PATTERNS uses reusing %1, REG={COND_RC, %1, %2.val} yields {XLT, %a} with CONST2 REG - uses reusing %1, REG={COND_RC, %2, %1.val} + uses reusing %2, REG={COND_RC, %2, %1.val} yields {XGT, %a} with REG REG - uses reusing %1, REG={COND_RR, %2, %1} + uses reusing %1, reusing %2, REG={COND_RR, %2, %1} yields {XGT, %a} pat cmi tge $1==4 /* Signed second >= top */ @@ -1809,10 +1823,10 @@ PATTERNS uses reusing %1, REG={COND_RC, %1, %2.val} yields {XLE, %a} with CONST2 REG - uses reusing %1, REG={COND_RC, %2, %1.val} + uses reusing %2, REG={COND_RC, %2, %1.val} yields {XGE, %a} with REG REG - uses reusing %1, REG={COND_RR, %2, %1} + uses reusing %1, reusing %2, REG={COND_RR, %2, %1} yields {XGE, %a} pat cmi tlt $1==4 /* Signed second < top */ @@ -1820,10 +1834,10 @@ PATTERNS uses reusing %1, REG={COND_RC, %1, %2.val} yields {XGT, %a} with CONST2 REG - uses reusing %1, REG={COND_RC, %2, %1.val} + uses reusing %2, REG={COND_RC, %2, %1.val} yields {XLT, %a} with REG REG - uses reusing %1, REG={COND_RR, %2, %1} + uses reusing %1, reusing %2, REG={COND_RR, %2, %1} yields {XLT, %a} pat cmi tle $1==4 /* Signed second <= top */ @@ -1831,10 +1845,10 @@ PATTERNS uses reusing %1, REG={COND_RC, %1, %2.val} yields {XGE, %a} with CONST2 REG - uses reusing %1, REG={COND_RC, %2, %1.val} + uses reusing %2, REG={COND_RC, %2, %1.val} yields {XLE, %a} with REG REG - uses reusing %1, REG={COND_RR, %2, %1} + uses reusing %1, reusing %2, REG={COND_RR, %2, %1} yields {XLE, %a} pat cmu teq $1==4 /* Unsigned second == top */ @@ -1842,10 +1856,10 @@ PATTERNS uses reusing %1, REG={CONDL_RC, %1, %2.val} yields {XEQ, %a} with UCONST2 REG - uses reusing %1, REG={CONDL_RC, %2, %1.val} + uses reusing %2, REG={CONDL_RC, %2, %1.val} yields {XEQ, %a} with REG REG - uses reusing %1, REG={CONDL_RR, %2, %1} + uses reusing %1, reusing %2, REG={CONDL_RR, %2, %1} yields {XEQ, %a} pat cmu tne $1==4 /* Unsigned second != top */ @@ -1853,10 +1867,10 @@ PATTERNS uses reusing %1, REG={CONDL_RC, %1, %2.val} yields {XNE, %a} with UCONST2 REG - uses reusing %1, REG={CONDL_RC, %2, %1.val} + uses reusing %2, REG={CONDL_RC, %2, %1.val} yields {XNE, %a} with REG REG - uses reusing %1, REG={CONDL_RR, %2, %1} + uses reusing %1, reusing %2, REG={CONDL_RR, %2, %1} yields {XNE, %a} pat cmu tgt $1==4 /* Unsigned second > top */ @@ -1864,10 +1878,10 @@ PATTERNS uses reusing %1, REG={CONDL_RC, %1, %2.val} yields {XLT, %a} with UCONST2 REG - uses reusing %1, REG={CONDL_RC, %2, %1.val} + uses reusing %2, REG={CONDL_RC, %2, %1.val} yields {XGT, %a} with REG REG - uses reusing %1, REG={CONDL_RR, %2, %1} + uses reusing %1, reusing %2, REG={CONDL_RR, %2, %1} yields {XGT, %a} pat cmu tge $1==4 /* Unsigned second >= top */ @@ -1875,10 +1889,10 @@ PATTERNS uses reusing %1, REG={CONDL_RC, %1, %2.val} yields {XLE, %a} with UCONST2 REG - uses reusing %1, REG={CONDL_RC, %2, %1.val} + uses reusing %2, REG={CONDL_RC, %2, %1.val} yields {XGE, %a} with REG REG - uses reusing %1, REG={CONDL_RR, %2, %1} + uses reusing %1, reusing %2, REG={CONDL_RR, %2, %1} yields {XGE, %a} pat cmu tlt $1==4 /* Unsigned second < top */ @@ -1886,10 +1900,10 @@ PATTERNS uses reusing %1, REG={CONDL_RC, %1, %2.val} yields {XGT, %a} with UCONST2 REG - uses reusing %1, REG={CONDL_RC, %2, %1.val} + uses reusing %2, REG={CONDL_RC, %2, %1.val} yields {XLT, %a} with REG REG - uses reusing %1, REG={CONDL_RR, %2, %1} + uses reusing %1, reusing %2, REG={CONDL_RR, %2, %1} yields {XLT, %a} pat cmu tle $1==4 /* Unsigned second <= top */ @@ -1897,10 +1911,10 @@ PATTERNS uses reusing %1, REG={CONDL_RC, %1, %2.val} yields {XGE, %a} with UCONST2 REG - uses reusing %1, REG={CONDL_RC, %2, %1.val} + uses reusing %2, REG={CONDL_RC, %2, %1.val} yields {XLE, %a} with REG REG - uses reusing %1, REG={CONDL_RR, %2, %1} + uses reusing %1, reusing %2, REG={CONDL_RR, %2, %1} yields {XLE, %a} @@ -1990,7 +2004,7 @@ PATTERNS gen extlwi %a, %a, {C, 2}, {C, 0} yields %a with REG REG - uses reusing %1, REG={COND_RR, %2, %1} + uses reusing %1, reusing %2, REG={COND_RR, %2, %1} gen extlwi %a, %a, {C, 2}, {C, 0} yields %a @@ -2004,7 +2018,7 @@ PATTERNS gen extlwi %a, %a, {C, 2}, {C, 0} yields %a with REG REG - uses reusing %1, REG={CONDL_RR, %2, %1} + uses reusing %1, reusing %2, REG={CONDL_RR, %2, %1} gen extlwi %a, %a, {C, 2}, {C, 0} yields %a @@ -2159,25 +2173,28 @@ PATTERNS leaving ine "hol0" pat lim /* Load EM trap ignore mask */ - leaving lde ".ignmask" + leaving loe ".ignmask" pat sim /* Store EM trap ignore mask */ leaving ste ".ignmask" - pat trp /* Raise EM trap */ - with REG3 - kills ALL - gen bl {LABEL, ".trp"} - pat sig /* Set trap handler, yield old */ leaving loe ".trppc" exg 4 ste ".trppc" + pat trp /* Raise EM trap */ + with REG3 + kills ALL + gen bl {LABEL, ".trp"} + pat rtt /* Return from trap */ leaving ret 0 + pat rck $1==4 /* Range check */ + leaving cal ".rck" + /* Our caller's local base, "lxl 0 dch", appears in * lang/cem/libcc.ansi/setjmp/setjmp.e, lang/m2/libm2/par_misc.e */ @@ -2216,16 +2233,50 @@ PATTERNS uses REG=sp yields %a /* Can't yield sp. */ + /* Next few patterns for "lor 1" appear in + * lang/m2/libm2/par_misc.e + */ + pat lor lor $1==1 && $2==1 /* Load sp twice */ + with STACK + gen stwu sp, {IND_RC_W, sp, 0-4} + leaving lor 1 + + pat lor adp $1==1 && smalls($2) /* sp + constant */ + with STACK + uses REG + gen addi %a, sp, {C, $2} + yields %a + + /* Subtract stack pointer by doing %1 - (sp - 4) + * because sp - 4 would point to %1. + */ + pat lor sbs loc adu $1==1 && $2==4 && $4==4 + with REG STACK + uses reusing %1, REG + gen subf %a, sp, %1 + yields %a + leaving loc $3+4 adu 4 + pat lor sbs $1==1 && $2==4 + with REG STACK + uses reusing %1, REG + gen subf %a, sp, %1 + yields {SUM_RC, %a, 4} + pat str $1==0 /* Store local base */ - with REG + with INT_W gen move %1, fp + with STACK + gen + lwz fp, {IND_RC_W, sp, 0} + addi sp, sp, {C, 4} pat str $1==1 /* Store stack pointer */ - with REG + with INT_W + kills ALL gen move %1, sp - - pat rck $1==4 /* Range check */ - leaving cal ".rck" + with STACK + kills ALL + gen lwz sp, {IND_RC_W, sp, 0} /* Single-precision floating-point */ @@ -2235,7 +2286,7 @@ PATTERNS pat adf $1==4 /* Add single */ with FSREG FSREG - uses reusing %1, FSREG + uses reusing %1, reusing %2, FSREG gen fadds %a, %2, %1 yields %a pat adf stl $1==4 && inreg($2)==reg_float @@ -2244,7 +2295,7 @@ PATTERNS pat sbf $1==4 /* Subtract single */ with FSREG FSREG - uses reusing %1, FSREG + uses reusing %1, reusing %2, FSREG gen fsubs %a, %2, %1 yields %a pat sbf stl $1==4 && inreg($2)==reg_float @@ -2253,7 +2304,7 @@ PATTERNS pat mlf $1==4 /* Multiply single */ with FSREG FSREG - uses reusing %1, FSREG + uses reusing %1, reusing %2, FSREG gen fmuls %a, %2, %1 yields %a pat mlf stl $1==4 && inreg($2)==reg_float @@ -2262,7 +2313,7 @@ PATTERNS pat dvf $1==4 /* Divide single */ with FSREG FSREG - uses reusing %1, FSREG + uses reusing %1, reusing %2, FSREG gen fdivs %a, %2, %1 yields %a pat dvf stl $1==4 && inreg($2)==reg_float @@ -2377,7 +2428,7 @@ PATTERNS pat adf $1==8 /* Add double */ with FREG FREG - uses reusing %1, FREG + uses reusing %1, reusing %2, FREG gen fadd %a, %2, %1 yields %a pat adf sdl $1==8 && inreg($2)==reg_float @@ -2386,7 +2437,7 @@ PATTERNS pat sbf $1==8 /* Subtract double */ with FREG FREG - uses reusing %1, FREG + uses reusing %1, reusing %2, FREG gen fsub %a, %2, %1 yields %a pat sbf sdl $1==8 && inreg($2)==reg_float @@ -2395,7 +2446,7 @@ PATTERNS pat mlf $1==8 /* Multiply double */ with FREG FREG - uses reusing %1, FREG + uses reusing %1, reusing %2, FREG gen fmul %a, %2, %1 yields %a pat mlf sdl $1==8 && inreg($2)==reg_float @@ -2404,7 +2455,7 @@ PATTERNS pat dvf $1==8 /* Divide double */ with FREG FREG - uses reusing %1, FREG + uses reusing %1, reusing %2, FREG gen fdiv %a, %2, %1 yields %a pat dvf sdl $1==8 && inreg($2)==reg_float From b90c97b00bf4bef2bd51403e9bf2b4795247fd9a Mon Sep 17 00:00:00 2001 From: George Koehler Date: Fri, 5 Jan 2018 17:55:50 -0500 Subject: [PATCH 28/55] Teach top to merge or delete "addi sp, sp, X". This reduces code size, because ncg emits too many "addi sp, sp, X" instructions when unstacking things. Now top lowers "addi sp, sp, X" by lifting other instructions. This sometimes creates chances to merge or delete _addi_ instructions. If no such chance is found, the _addi_ remains uselessly lowered. Edit ncg/table to remove something that top now does. Edit ncg/mach.c to remove some spaces after commas. This removes a whitespace difference between *.s and *.so files, because top removes the space. --- mach/powerpc/ncg/mach.c | 6 +- mach/powerpc/ncg/table | 20 +++---- mach/powerpc/top/table | 130 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 142 insertions(+), 14 deletions(-) diff --git a/mach/powerpc/ncg/mach.c b/mach/powerpc/ncg/mach.c index a31879de9..06e39709f 100644 --- a/mach/powerpc/ncg/mach.c +++ b/mach/powerpc/ncg/mach.c @@ -203,7 +203,7 @@ saveloadregs(const char* ops, const char* opm, const char *opf) for (reg = 31; reg >= 0; reg--) { if (savedf[reg] != LONG_MIN) { offset -= 8; - fprintf(codefile, "%s f%d, %ld(fp)\n", + fprintf(codefile, "%s f%d,%ld(fp)\n", opf, reg, offset); } } @@ -220,7 +220,7 @@ saveloadregs(const char* ops, const char* opm, const char *opf) while (reg > 0 && savedi[reg - 1] != LONG_MIN) reg--; offset -= (32 - reg) * 4; - fprintf(codefile, "%s r%d, %ld(fp)\n", opm, reg, offset); + fprintf(codefile, "%s r%d,%ld(fp)\n", opm, reg, offset); } else reg = 32; @@ -228,7 +228,7 @@ saveloadregs(const char* ops, const char* opm, const char *opf) for (reg--; reg >= 0; reg--) { if (savedi[reg] != LONG_MIN) { offset -= 4; - fprintf(codefile, "%s r%d, %ld(fp)\n", + fprintf(codefile, "%s r%d,%ld(fp)\n", ops, reg, offset); } } diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 367942408..df06a5d49 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -334,9 +334,9 @@ INSTRUCTIONS lhax GPR:wo, GPR:ro, GPR:ro cost(4, 3). lhz GPR:wo, SET_RC_H:ro cost(4, 3). lhzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). + lwz GPR+LOCAL:wo, SET_RC_W:ro cost(4, 3). lwzu GPR:wo, IND_RC_W:rw cost(4, 3). lwzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). - lwz GPR+LOCAL:wo, SET_RC_W:ro cost(4, 3). mfcr GPR:wo cost(4,2). mfspr GPR:wo, SPR:ro cost(4, 3). mtspr SPR:wo, GPR:ro cost(4, 2). @@ -361,7 +361,6 @@ INSTRUCTIONS rlwnm GPR:wo, GPR:ro, GPR:ro, CONST:ro, CONST:ro. rotlw GPR+LOCAL:wo, GPR:ro, GPR:ro. slw GPR+LOCAL:wo, GPR:ro, GPR:ro. - subf GPR:wo, GPR:ro, GPR:ro. sraw GPR+LOCAL:wo, GPR:ro, GPR:ro cost(4, 2). srawi GPR+LOCAL:wo, GPR:ro, CONST:ro cost(4, 2). srw GPR+LOCAL:wo, GPR:ro, GPR:ro. @@ -378,6 +377,7 @@ INSTRUCTIONS stw GPR:ro, SET_RC_W:rw cost(4, 3). stwx GPR:ro, GPR:ro, GPR:ro cost(4, 3). stwu GPR:ro, IND_RC_W:rw cost(4, 3). + subf GPR:wo, GPR:ro, GPR:ro. xor GPR:wo, GPR:ro, GPR:ro. xori GPR:wo, GPR:ro, CONST:ro. xoris GPR:wo, GPR:ro, CONST:ro. @@ -762,6 +762,10 @@ STACKINGRULES COERCIONS + /* The unstacking coercions emit many "addi sp, sp, X" + * instructions; the target optimizer (top) will merge them. + */ + from STACK uses REG gen @@ -2103,12 +2107,13 @@ PATTERNS mr fp, r0 blr. + /* If "ret" coerces STACK to REG3, then top will delete the + * extra "addi sp, sp, 4". + */ + pat ret $1==4 /* Return from procedure, word */ with REG3 leaving ret 0 - with STACK - gen lwz r3, {IND_RC_W, sp, 0} - leaving ret 0 pat ret $1==8 /* Return from proc, double-word */ with REG3 INT_W @@ -2117,11 +2122,6 @@ PATTERNS with REG3 STACK gen lwz r4, {IND_RC_W, sp, 0} leaving ret 0 - with STACK - gen - lwz r3, {IND_RC_W, sp, 0} - lwz r4, {IND_RC_W, sp, 4} - leaving ret 0 /* * These rules for blm/bls are wrong if length is zero. diff --git a/mach/powerpc/top/table b/mach/powerpc/top/table index b3f5b3a31..cbc16c277 100644 --- a/mach/powerpc/top/table +++ b/mach/powerpc/top/table @@ -1,11 +1,12 @@ /* PowerPC table for ACK target optimizer */ -MAXOP 3; +MAXOP 5; LABEL_STARTER '.'; %%; +L1, L2, L3, L4, L5 { not_using_sp(VAL) }; RNZ { strcmp(VAL, "r0") }; /* not r0 */ X, Y, Z { TRUE }; @@ -16,6 +17,47 @@ X, Y, Z { TRUE }; addi RNZ, RNZ, 0 -> ; addis RNZ, RNZ, 0 -> ; +addi RNZ, RNZ, X : addi RNZ, RNZ, Y { plus(X, Y, Z) } + -> addi RNZ, RNZ, Z ; + +/* Lower "addi sp, sp, X" by lifting other instructions, looking for + * chances to merge or delete _addi_ instructions, and assuming that + * the code generator uses "sp" not "r1". + */ +addi sp, sp, X : ANY L1 { lift(ANY) } + -> ANY L1 : addi sp, sp, X ; +addi sp, sp, X : ANY L1, L2 { lift(ANY) } + -> ANY L1, L2 : addi sp, sp, X ; +addi sp, sp, X : ANY L1, L2, L3 { lift(ANY) } + -> ANY L1, L2, L3 : addi sp, sp, X ; +addi sp, sp, X : ANY L1, L2, L3, L4 { lift(ANY) } + -> ANY L1, L2, L3, L4 : addi sp, sp, X ; +addi sp, sp, X : ANY L1, L2, L3, L4, L5 { lift(ANY) } + -> ANY L1, L2, L3, L4, L5 : addi sp, sp, X ; +addi sp, sp, X : lmw Y, L1 { Y[0]=='r' && atoi(Y+1)>1 } + -> lmw Y, L1 : addi sp, sp, X ; + +/* Merge _addi_ when popping from the stack. */ +addi sp, sp, X : lwz L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' } + -> lwz L1, Z(sp) : addi sp, sp, X ; +addi sp, sp, X : lfs L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' } + -> lfs L1, Z(sp) : addi sp, sp, X ; +addi sp, sp, X : lfd L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' } + -> lfd L1, Z(sp) : addi sp, sp, X ; + +/* Lower or delete _addi_ when pushing to the stack. */ +addi sp, sp, X : stwu L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' } + -> stw L1, Z(sp) : addi sp, sp, Z ; +addi sp, sp, X : stfsu L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' } + -> stfs L1, Z(sp) : addi sp, sp, Z ; +addi sp, sp, X : stfdu L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' } + -> stfd L1, Z(sp) : addi sp, sp, Z ; +addi sp, sp, 4 : stfdu L1, -8(sp) -> stfdu L1, -4(sp) ; + +/* Delete _addi_ when setting the stack pointer. */ +addi sp, sp, X : addi sp, L1, Y -> addi sp, L1, Y ; +addi sp, sp, X : lwz sp, L1 -> lwz sp, L1 ; + or X, Y, Y -> mr X, Y ; or. X, Y, Y -> mr. X, Y ; @@ -50,3 +92,89 @@ b X : labdef X -> labdef X ; /* LT=0, GT=1, EQ=2, OV=3 */ %%; + +/* Is it a word character? 0-9A-Za-z_ */ +static int isword(char c) { + return + (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || + (c >= 'a' && c <= 'z') || (c == '_'); +} + +/* Does operand _s_ not use the stack pointer? */ +int not_using_sp(const char *s) { + int boundary; + + boundary = 1; + while (*s) { + if (boundary && + ((s[0]=='s' && s[1]=='p') || (s[0]=='r' && s[1]=='1')) && + !isword(s[2])) + return 0; + boundary = !isword(*s); + s++; + } + return 1; +} + + +/* Instructions to lift(), sorted in strcmp() order. These are from + * ../ncg/table, minus branch instructions. + */ +const char *liftables[] = { + "add", "add.", "addi", + "and", "andc", "andi.", "andis.", + "cmp", "cmpi", "cmpl", "cmpli", + "cmplw", "cmplwi", "cmpw", "cmpwi", + "divw", "divwu", "eqv", "extlwi", "extrwi", "extsb", "extsh", + "fadd", "fadds", "fcmpo", "fctiwz", "fdiv", "fdivs", + "fmr", "fmul", "fmuls", "fneg", "frsp", "fsub", "fsubs", + "lbz", "lbzx", + "lfd", "lfdu", "lfdx", "lfs", "lfsu", "lfsx", + "lha", "lhax", "lhz", "lhzx", + "li", "lis", "lwz", "lwzu", "lwzx", + "mfcr", "mfspr", "mr", "mr.", "mtspr", "mullw", + "nand", "neg", "nor", "or", "or.", "ori", "oris", + "rlwinm", "rlwnm", "rotlwi", "rotrwi", + "slw", "slwi", "sraw", "srawi", "srw", "srwi", + "stb", "stbx", + "stfd", "stfdu", "stfdx", "stfs", "stfsu", "stfsx", + "sth", "sthx", "stw", "stwx", "stwu", + "subf", "xor", "xori", "xoris", +}; + +static int liftcmp(const void *a, const void *b) { + return strcmp(*(const char **)a, *(const char **)b); +} + +/* May we lift instruction _s_ above "addi SP, SP, X"? */ +int lift(const char *s) { + return bsearch(&s, liftables, + sizeof(liftables) / sizeof(liftables[0]), + sizeof(liftables[0]), liftcmp); +} + + +/* Does it fit a signed 16-bit integer? */ +static int fits16(long l) { + return l >= -32768 && l <= 32767; +} + +/* Tries sum = a + b with signed 16-bit integers. */ +int plus(const char *a, const char *b, const char *sum) +{ + long la, lb, lsum; + char *end; + + la = strtol(a, &end, 10); + if (*a == '\0' || *end != '\0' || !fits16(la)) + return 0; + lb = strtol(b, &end, 10); + if (*b == '\0' || *end != '\0' || !fits16(lb)) + return 0; + + lsum = la + lb; + if (!fits16(lsum)) + return 0; + snprintf(sum, 7, "%ld", lsum); + return 1; +} From 64b50b3a45476976b9dc6ad9b91c56f50e2958b7 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Sun, 7 Jan 2018 16:03:55 -0500 Subject: [PATCH 29/55] Shrink .cfu8 With my PowerBook G4, a program that converts values from 1.0 to 4000000.0 runs in about 0.32s with the old .cfu8 and 0.29s with this shrunken .cfu8 Leave a comment about other ways to implement .cfu8 --- mach/powerpc/libem/build.lua | 2 +- mach/powerpc/libem/cfu8.s | 66 ++++++++++++++++++-------------- mach/powerpc/libem/fd_80000000.s | 10 ----- mach/powerpc/libem/fd_FFFFFFFF.s | 10 ----- 4 files changed, 39 insertions(+), 49 deletions(-) delete mode 100644 mach/powerpc/libem/fd_80000000.s delete mode 100644 mach/powerpc/libem/fd_FFFFFFFF.s diff --git a/mach/powerpc/libem/build.lua b/mach/powerpc/libem/build.lua index ac84e3b0f..7a0726b80 100644 --- a/mach/powerpc/libem/build.lua +++ b/mach/powerpc/libem/build.lua @@ -6,7 +6,7 @@ for _, plat in ipairs(vars.plats) do acklibrary { name = "lib_"..plat, srcs = { - "./*.s", -- trp.s + "./*.s", -- cfu8.s }, vars = { plat = plat }, deps = { diff --git a/mach/powerpc/libem/cfu8.s b/mach/powerpc/libem/cfu8.s index 915f84dd2..fd69ff521 100644 --- a/mach/powerpc/libem/cfu8.s +++ b/mach/powerpc/libem/cfu8.s @@ -1,3 +1,5 @@ +.sect .text; .sect .rom; .sect .data; .sect .bss + .sect .text ! Converts a 64-bit double into a 32-bit unsigned integer. @@ -6,32 +8,40 @@ .define .cfu8 .cfu8: - lis r3, ha16[.fd_00000000] - lfd f0, lo16[.fd_00000000](r3) ! f0 = 0.0 - - lfd f1, 0(sp) ! value to be converted - - lis r3, ha16[.fd_FFFFFFFF] - lfd f3, lo16[.fd_FFFFFFFF](r3) ! f3 = 0xFFFFFFFF - - lis r3, ha16[.fd_80000000] - lfd f4, lo16[.fd_80000000](r3) ! f4 = 0x80000000 - - fsel f2, f1, f1, f0 - fsub f5, f3, f1 - fsel f2, f5, f2, f3 - fsub f5, f2, f4 - fcmpu cr0, f2, f4 - fsel f2, f5, f5, f2 - fctiwz f2, f2 - - stfd f2, 0(sp) - addi sp, sp, 4 - - bltlr - - lwz r3, 0(sp) - xoris r3, r3, 0x8000 - stw r3, 0(sp) - + lfd f1, 0(sp) ! f1 = value to convert + lis r3, ha16[.fs_80000000] + lfs f2, lo16[.fs_80000000](r3) ! f2 = 2**31 + fsub f1, f1, f2 + fctiwz f1, f1 ! convert value - 2**31 + stfd f1, 0(sp) + lwz r3, 4(sp) + xoris r3, r3, 0x8000 ! add 2**31 + stw r3, 4(sp) + addi sp, sp, 4 blr + +.sect .rom +.fs_80000000: + !float 2.147483648e+9 sz 4 + .data1 0117,00,00,00 + +! Freescale and IBM provide an example using fsel to select value or +! value - 2**31 for fctiwz. The following code adapts Freescale's +! _Programming Environments Manual for 32-Bit Implementations of the +! PowerPC Architecture_, section C.3.2, pdf page 557. +! +! Given f2 = value clamped from 0 to 2**32 - 1, f4 = 2**31, then +! fsub f5, f2, f4 +! fcmpu cr2, f2, f4 +! fsel f2, f5, f5, f2 +! fctiwz f2, f2 +! stfdu f2, 0(sp) +! lwz r3, 4(sp) +! blt cr2, 1f +! xoris r3, r3, 0x8000 +! 1: yields r3 = the converted value. +! +! Debian's clang 3.5.0-10 and gcc 4.9.2-10 don't clamp the value +! before conversion. They avoid fsel and put the conditional branch +! before fctwiz. PowerPC 601 lacks fsel (but kernel might trap and +! emulate fsel). PowerPC 603, 604, G3, G4, G5 have fsel. diff --git a/mach/powerpc/libem/fd_80000000.s b/mach/powerpc/libem/fd_80000000.s deleted file mode 100644 index 5c153bba8..000000000 --- a/mach/powerpc/libem/fd_80000000.s +++ /dev/null @@ -1,10 +0,0 @@ -.sect .text; .sect .rom; .sect .data; .sect .bss - -.sect .rom - -! Contains a handy double-precision 0x80000000. - -.define .fd_80000000 -.fd_80000000: - !float 2.147483648e+9 sz 8 - .data1 0101,0340,00,00,00,00,00,00 diff --git a/mach/powerpc/libem/fd_FFFFFFFF.s b/mach/powerpc/libem/fd_FFFFFFFF.s deleted file mode 100644 index 88cf04bd9..000000000 --- a/mach/powerpc/libem/fd_FFFFFFFF.s +++ /dev/null @@ -1,10 +0,0 @@ -.sect .text; .sect .rom; .sect .data; .sect .bss - -.sect .rom - -! Contains a handy double-precision 0xFFFFFFFF. - -.define .fd_FFFFFFFF -.fd_FFFFFFFF: - !float 4.294967295e+9 sz 8 - .data1 0101,0357,0377,0377,0377,0340,00,00 From de2c7c3f253787e98b389305f3f3e9cdf27ce03d Mon Sep 17 00:00:00 2001 From: George Koehler Date: Mon, 8 Jan 2018 22:26:24 -0500 Subject: [PATCH 30/55] Hide some i386 stuff from linux68k, linuxppc. Rename plat/linux/libsys/errno.s to plat/linux386/libsys/trapno.s and stop building it for linux68k and linuxppc. It defines symbols for mach/i386/libem. In syscalls.h, the numbers after 165 are only for i386, so hide them from 68k, ppc. These numbers are unused, because the system calls now in libsys use the lower numbers. Also teach the build system that libsys depends on the internal headers in plat/linux/libsys/*.h --- plat/linux/libsys/syscalls.h | 7 +++++++ plat/linux386/libsys/build.lua | 1 + plat/{linux/libsys/errno.s => linux386/libsys/trapno.s} | 0 plat/linux68k/libsys/build.lua | 1 + plat/linuxppc/libsys/build.lua | 2 +- plat/osx386/libsys/build.lua | 2 +- 6 files changed, 11 insertions(+), 2 deletions(-) rename plat/{linux/libsys/errno.s => linux386/libsys/trapno.s} (100%) diff --git a/plat/linux/libsys/syscalls.h b/plat/linux/libsys/syscalls.h index 19d5543c6..8bddcc0ee 100644 --- a/plat/linux/libsys/syscalls.h +++ b/plat/linux/libsys/syscalls.h @@ -174,6 +174,12 @@ #define __NR_mremap 163 #define __NR_setresuid 164 #define __NR_getresuid 165 + +/* + * i386, m68020, powerpc use different numbers after 165. + * This file only has the numbers for i386. + */ +#if defined(__i386) #define __NR_vm86 166 #define __NR_query_module 167 #define __NR_poll 168 @@ -324,5 +330,6 @@ #define concat(x, y) x##y #define MAPPED_SYSCALL(p, n) .define concat(p,n); concat(p,n): xor eax, eax; movb al, concat(__NR_,n); jmp __mapped_syscall +#endif /* __i386 */ #endif diff --git a/plat/linux386/libsys/build.lua b/plat/linux386/libsys/build.lua index a4d2d7447..7de7b4061 100644 --- a/plat/linux386/libsys/build.lua +++ b/plat/linux386/libsys/build.lua @@ -6,6 +6,7 @@ acklibrary { "plat/linux/libsys/*.s", }, deps = { + "plat/linux/libsys/*.h", "lang/cem/libcc.ansi/headers+headers", "plat/linux386/include+headers", }, diff --git a/plat/linux/libsys/errno.s b/plat/linux386/libsys/trapno.s similarity index 100% rename from plat/linux/libsys/errno.s rename to plat/linux386/libsys/trapno.s diff --git a/plat/linux68k/libsys/build.lua b/plat/linux68k/libsys/build.lua index ded71cdd1..c17436517 100644 --- a/plat/linux68k/libsys/build.lua +++ b/plat/linux68k/libsys/build.lua @@ -6,6 +6,7 @@ acklibrary { "plat/linux/libsys/*.s", }, deps = { + "plat/linux/libsys/*.h", "lang/cem/libcc.ansi/headers+headers", "plat/linux68k/include+headers", }, diff --git a/plat/linuxppc/libsys/build.lua b/plat/linuxppc/libsys/build.lua index 696c62d42..f58df16ea 100644 --- a/plat/linuxppc/libsys/build.lua +++ b/plat/linuxppc/libsys/build.lua @@ -8,7 +8,6 @@ acklibrary { "plat/linux/libsys/_hol0.s", "plat/linux/libsys/close.c", "plat/linux/libsys/creat.c", - "plat/linux/libsys/errno.s", "plat/linux/libsys/execve.c", "plat/linux/libsys/getpid.c", "plat/linux/libsys/gettimeofday.c", @@ -25,6 +24,7 @@ acklibrary { "plat/linux/libsys/write.c", }, deps = { + "plat/linux/libsys/*.h", "lang/cem/libcc.ansi/headers+headers", "plat/linuxppc/include+headers", }, diff --git a/plat/osx386/libsys/build.lua b/plat/osx386/libsys/build.lua index 23e491f7a..6a5b0e58c 100644 --- a/plat/osx386/libsys/build.lua +++ b/plat/osx386/libsys/build.lua @@ -19,7 +19,7 @@ acklibrary { "./sigaction.s", "./stat.s", "./write.s", - "plat/linux/libsys/errno.s", + "plat/linux386/libsys/trapno.s", "plat/osx/libsys/brk.c", "plat/osx/libsys/creat.c", "plat/osx/libsys/isatty.c", From 2b09d3756c0fec8643e476f5ea7c564a9a930255 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Tue, 9 Jan 2018 00:39:03 -0500 Subject: [PATCH 31/55] These are EM trap numbers. Remove .sect; absolute symbols don't go in a section. --- plat/linux386/libsys/trapno.s | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/plat/linux386/libsys/trapno.s b/plat/linux386/libsys/trapno.s index 550fd6d7c..4996de338 100644 --- a/plat/linux386/libsys/trapno.s +++ b/plat/linux386/libsys/trapno.s @@ -1,21 +1,7 @@ -# -! $Source: /cvsroot/tack/Ack/plat/linux386/libsys/errno.s,v $ -! $State: Exp $ -! $Revision: 1.1 $ - -! Declare segments (the order is important). - -.sect .text -.sect .rom -.sect .data -.sect .bss - #define D(e) .define e; e -.sect .data - -! Define various ACK error numbers. Note that these are *not* ANSI C -! errnos, and are used for different purposes. +! Define various EM trap numbers needed by mach/i386/libem. +! Note that these are *not* ANSI C errnos. D(ERANGE) = 1 D(ESET) = 2 @@ -25,4 +11,3 @@ D(EILLINS) = 18 D(EODDZ) = 19 D(ECASE) = 20 D(EBADMON) = 25 - From 103d44c27c06f07bda935f0caa4e03114cd102b1 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Thu, 11 Jan 2018 17:59:02 -0500 Subject: [PATCH 32/55] Rewrite sigaction() to prevent another race. A signal handler might call sigaction(). We must block all signals, not only our signal, to prevent a race between us and the next signal handler. Use /* comments */ because cpp might expand macros in ! comments though such expansion is probably harmless. The bridge is now shorter by 2 instructions. --- plat/linuxppc/libsys/sigaction.s | 284 ++++++++++++++++++------------- 1 file changed, 161 insertions(+), 123 deletions(-) diff --git a/plat/linuxppc/libsys/sigaction.s b/plat/linuxppc/libsys/sigaction.s index 0509c8e72..1b1cea24a 100644 --- a/plat/linuxppc/libsys/sigaction.s +++ b/plat/linuxppc/libsys/sigaction.s @@ -1,156 +1,194 @@ #define __NR_sigaction 67 -#define SIG_BLOCK 0 +#define __NR_sigprocmask 126 #define SIG_SETMASK 2 -#define MAXSIG 32 -/* offsets into our stack frame */ -#define mynew 16 /* new sigaction */ -#define mynset 32 /* new signal set */ -#define myoset 36 /* old signal set */ -#define mysave 40 -#define mysize 56 +/* offsets into struct sigaction */ +#define sa_handler 0 /* in union with sa_sigaction */ +#define sa_mask 4 +#define sa_flags 8 +#define sa_restorer 12 + +/* offsets from stack pointer */ +#define mynewact 16 /* struct sigaction */ +#define myoldact 32 +#define newmask 64 /* signal set */ +#define oldmask 68 +#define oldhandler 72 +#define myret 76 +#define savelr 80 +#define signum 84 /* first argument */ +#define newact 88 +#define oldact 92 .sect .text; .sect .rodata; .sect .data; .sect .bss /* * Linux calls signal handlers with arguments in registers, but the * ACK expects arguments on the stack. This sigaction() uses a - * "bridge" to move the arguments. + * "bridge" to move the arguments, but + * + * - If the caller passes a bad pointer, this sigaction() causes + * SIGBUS or SIGSEGV instead of setting errno = EFAULT. + * + * - This sigaction() only works with signals 1 to 31, not with + * real-time signals 32 to 64. + * + * - This sigaction() is not safe for multiple threads. + * + * int sigaction(int signum, const struct sigaction *newact, + * struct sigaction *oldact); */ .sect .text .define _sigaction _sigaction: mflr r0 - subi r1, r1, mysize - stw r31, mysave+8(r1) - stw r30, mysave+4(r1) - stw r29, mysave(r1) - stw r0, mysave+12(r1) - li r3, 0 - stw r3, mynset(r1) ! mynset = 0 - lwz r29, mysize(r1) ! r29 = signal number - lwz r30, mysize+4(r1) ! r30 = new action - lwz r31, mysize+8(r1) ! r31 = old action + li r3, __NR_sigprocmask + stwu r3, -signum(sp) /* keep 0(sp) = __NR_sigprocmask */ + stw r0, savelr(sp) + + /* Copy newact to stack (before blocking SIGBUS, SIGSEGV). */ + lwz r3, newact(sp) + mr. r3, r3 + beq 1f /* skip if newact == NULL */ + lwz r4, sa_handler(r3) + lwz r5, sa_mask(r3) + lwz r6, sa_flags(r3) + lwz r7, sa_restorer(r3) + stw r4, mynewact+sa_handler(sp) + stw r5, mynewact+sa_mask(sp) + stw r6, mynewact+sa_flags(sp) + stw r7, mynewact+sa_restorer(sp) + /* - * If the new action is non-NULL, the signal number is in - * range 1 to MAXSIG, and the new handler is not SIG_DFL 0 - * or SIG_IGN 1, then we interpose our bridge. + * Block all signals to prevent a race. After we set sharray, + * we must call the kernel's sigaction before the next signal + * handler runs. This prevents two problems: + * + * - The bridge might call the new handler while the kernel + * uses the mask and flags of the old handler. + * + * - The signal handler might call sigaction() and destroy + * sharray. We must block all signals because any signal + * handler might call sigaction() for our signal. */ - cmpwi cr0, r30, 0 - subi r7, r29, 1 ! r7 = index in handlers - cmplwi cr7, r7, MAXSIG ! unsigned comparison - beq cr0, kernel - bge cr7, kernel - lwz r3, 0(r30) ! r3 = new handler - clrrwi. r3, r3, 1 - beq cr0, kernel - /* - * Block the signal while we build the bridge. Prevents a - * race if a signal arrives after we change the bridge but - * before we change the action in the kernel. - */ - li r4, 1 - slw r4, r4, r7 - stw r4, mynset(r1) ! mynmask = 1 << (signal - 1) - li r3, SIG_BLOCK - la r4, mynset(r1) - la r5, myoset(r1) - stw r3, 0(r1) - stw r4, 4(r1) - stw r5, 8(r1) - bl _sigprocmask - /* - * Point our bridge to the new signal handler. Then copy the - * new sigaction but point it to our bridge. - */ - lis r6, hi16[handlers] - ori r6, r6, lo16[handlers] - subi r7, r29, 1 - slwi r7, r7, 2 - lwz r3, 0(r30) ! r3 = new handler - stwx r3, r6, r7 ! put it in array of handlers - lis r3, hi16[bridge] - ori r3, r3, lo16[bridge] - lwz r4, 4(r30) - lwz r5, 8(r30) - lwz r6, 12(r30) - stw r3, mynew(r1) ! sa_handler or sa_sigaction - stw r4, mynew+4(r1) ! sa_mask - stw r5, mynew+8(r1) ! sa_flags - stw r6, mynew+12(r1) ! sa_restorer - la r30, mynew(r1) -kernel: - li r3, __NR_sigaction - stw r3, 0(r1) - stw r29, 4(r1) - stw r30, 8(r1) - stw r31, 12(r1) +1: li r4, SIG_SETMASK + li r5, -1 /* mask signals 1 to 32 */ + stw r5, newmask(sp) + la r5, newmask(sp) + la r6, oldmask(sp) + stw r4, 4(sp) /* kept 0(sp) = __NR_sigprocmask */ + stw r5, 8(sp) + stw r6, 12(sp) bl __syscall + /* - * If we blocked the signal, then restore the old signal mask. + * If the signal number is in range 1 to 31, and the new + * handler is not SIG_DFL 0 or SIG_IGN 1, then we interpose + * our bridge. */ - lwz r3, mynset(r1) - cmpwi cr0, r3, 0 - beq cr0, fixold - li r3, SIG_SETMASK - la r4, myoset(r1) - li r5, 0 - stw r3, 0(r1) - stw r4, 4(r1) - stw r5, 8(r1) - bl _sigprocmask - /* - * If the old sigaction is non-NULL and points to our bridge, - * then point it to the signal handler. - */ -fixold: - cmpwi cr0, r31, 0 - beq cr0, leave - lis r3, hi16[bridge] - ori r3, r3, lo16[bridge] - lwz r4, 0(r31) - cmpw cr0, r3, r4 - bne cr0, leave - lis r6, hi16[handlers] - ori r6, r6, lo16[handlers] - subi r7, r29, 1 - slwi r7, r7, 2 - lwzx r3, r6, r7 ! get it from array of handlers - stw r3, 0(r31) ! put it in old sigaction -leave: - lwz r0, mysave+12(r1) - lwz r29, mysave(r1) - lwz r30, mysave+4(r1) - lwz r31, mysave+8(r1) - addi r1, r1, mysize + lwz r4, signum(sp) /* keep r4 = signum */ + addi r5, r4, -1 + cmplwi r5, 30 + bgt 2f /* skip if out of range */ + + slwi r5, r5, 2 /* r5 = sharray index */ + lis r6, ha16[sharray] + la r6, lo16[sharray](r6) /* r6 = sharray */ + lwzx r0, r6, r5 + stw r0, oldhandler(sp) /* remember old handler */ + lwz r0, newact(sp) + mr. r0, r0 + beq 2f /* skip if newact == NULL */ + + lwz r3, mynewact+sa_handler(sp) + cmplwi r3, 2 /* r3 = new handler */ + blt 2f /* skip if SIG_DFL or SIG_IGN */ + + stwx r3, r6, r5 /* put new handler in sharray */ + lis r3, ha16[sigbridge] + la r3, lo16[sigbridge](r3) + stw r3, mynewact+sa_handler(sp) + + /* Call the kernel's sigaction. */ + /* sigaction(signum, &mynewact or NULL, &myoldact or NULL) */ +2: li r3, __NR_sigaction + lwz r0, newact(sp) + mr. r0, r0 + beq 3f + la r5, mynewact(sp) + b 4f +3: li r5, 0 +4: lwz r0, oldact(sp) + mr. r0, r0 + beq 5f + la r6, myoldact(sp) + b 6f +5: li r6, 0 +6: stw r3, 0(sp) + stw r4, 4(sp) /* kept r4 = signum */ + stw r5, 8(sp) + stw r6, 12(sp) + bl __syscall + stw r3, myret(sp) + + /* Unblock signals by restoring old signal mask. */ + li r3, __NR_sigprocmask + li r4, SIG_SETMASK + la r5, oldmask(sp) + li r6, 0 + stw r3, 0(sp) + stw r4, 4(sp) + stw r5, 8(sp) + stw r6, 12(sp) + bl __syscall + + /* Copy oldact from stack (after unblocking BUS, SEGV). */ + lwz r3, oldact(sp) + mr. r3, r3 + beq 8f /* skip if oldact == NULL */ + lwz r4, myoldact+sa_handler(sp) + lis r5, ha16[sigbridge] + la r5, lo16[sigbridge](r5) + cmpw r4, r5 + bne 7f + lwz r4, oldhandler(sp) +7: lwz r5, myoldact+sa_mask(sp) + lwz r6, myoldact+sa_flags(sp) + lwz r7, myoldact+sa_restorer(sp) + stw r4, sa_handler(r3) + stw r5, sa_mask(r3) + stw r6, sa_flags(r3) + stw r7, sa_restorer(r3) + +8: lwz r0, savelr(sp) + lwz r3, myret(sp) + addi sp, sp, signum mtlr r0 - blr ! return from sigaction + blr /* - * Linux calls bridge(signum) or bridge(signum, info, context) with - * arguments in registers r3, r4, r5. + * Linux calls sigbridge(signum) or sigbridge(signum, info, context) + * with arguments in registers r3, r4, r5. */ -bridge: +sigbridge: mflr r0 - subi r1, r1, 16 + stwu r3, -16(sp) /* signal number */ + stw r4, 4(sp) /* info */ + stw r5, 8(sp) /* context */ stw r0, 12(r1) - stw r3, 0(r1) ! signal number - stw r4, 4(r1) ! info - stw r5, 8(r1) ! context - lis r6, hi16[handlers] - ori r6, r6, lo16[handlers] - subi r7, r3, 1 - slwi r7, r7, 2 + lis r6, hi16[sharray - 4] + la r6, lo16[sharray - 4](r6) + slwi r7, r3, 2 lwzx r6, r6, r7 mtctr r6 - bctrl ! call our signal handler + bctrl /* call our signal handler */ - lwz r0, 12(r1) + lwz r0, 12(sp) addi r1, r1, 16 mtlr r0 - blr ! return from bridge + blr /* sigreturn(2) */ .sect .bss -handlers: - .space 4 * MAXSIG ! array of signal handlers +sharray: + .space 4 * 31 /* handlers for signals 1 to 31 */ From f1304e1a3c2a6d32bf156dfbb8541d317bfd1e9a Mon Sep 17 00:00:00 2001 From: George Koehler Date: Thu, 11 Jan 2018 20:04:27 -0500 Subject: [PATCH 33/55] Use extended mnemonics and ha16/lo16. Remove wrong comment: that's a right shift, not a left shift. --- plat/linuxppc/boot.s | 2 +- plat/linuxppc/libsys/_syscall.s | 37 +++++++++++++-------------------- plat/osxppc/boot.s | 2 +- plat/osxppc/libsys/set_errno.s | 8 +++---- 4 files changed, 20 insertions(+), 29 deletions(-) diff --git a/plat/linuxppc/boot.s b/plat/linuxppc/boot.s index 2da5dd556..33b2abd61 100644 --- a/plat/linuxppc/boot.s +++ b/plat/linuxppc/boot.s @@ -32,7 +32,7 @@ begtext: lwz r3, 0(sp) ! r3 = argc addi r4, sp, 4 ! r4 = argv - rlwinm r5, r3, 32-2, 2, 31 ! shift left 2 bits + srwi r5, r3, 2 add r5, r5, r4 addi r5, r5, 8 ! r5 = env diff --git a/plat/linuxppc/libsys/_syscall.s b/plat/linuxppc/libsys/_syscall.s index c7e818830..f60423bea 100644 --- a/plat/linuxppc/libsys/_syscall.s +++ b/plat/linuxppc/libsys/_syscall.s @@ -12,17 +12,8 @@ .sect .text -EINVAL = 22 +#define EINVAL 22 -#define IFFALSE 4 -#define IFTRUE 12 -#define ALWAYS 20 - -#define LT 0 -#define GT 1 -#define EQ 2 -#define OV 3 - ! Perform a Linux system call. .define __syscall @@ -32,21 +23,21 @@ __syscall: lwz r4, 8(sp) lwz r5, 12(sp) sc 0 - bclr IFFALSE, OV, 0 - + bnslr + ! On error, r3 contains the errno. ! It just so happens that errnos 1-34 are the same in Linux as in ACK. - cmpi cr0, 0, r3, 1 - bc IFTRUE, LT, 2f - cmpi cr0, 0, r3, 34 - bc IFTRUE, GT, 2f - + cmpwi r3, 1 + blt 2f + cmpwi r3, 34 + bgt 2f + 3: - li32 r4, _errno - stw r3, 0(r4) - addi r3, r0, -1 - bclr ALWAYS, 0, 0 - + lis r4, ha16[_errno] + stw r3, lo16[_errno](r4) + li r3, -1 + blr + 2: - addi r3, r0, EINVAL + li r3, EINVAL b 3b diff --git a/plat/osxppc/boot.s b/plat/osxppc/boot.s index e96198eb4..1517ee377 100644 --- a/plat/osxppc/boot.s +++ b/plat/osxppc/boot.s @@ -29,7 +29,7 @@ begtext: lwz r3, 0(sp) ! r3 = argc addi r4, sp, 4 ! r4 = argv - rlwinm r5, r3, 32-2, 2, 31 ! shift left 2 bits + srwi r5, r3, 2 add r5, r5, r4 addi r5, r5, 8 ! r5 = env diff --git a/plat/osxppc/libsys/set_errno.s b/plat/osxppc/libsys/set_errno.s index e406865a6..beb124a7c 100644 --- a/plat/osxppc/libsys/set_errno.s +++ b/plat/osxppc/libsys/set_errno.s @@ -1,7 +1,7 @@ .sect .text .define .set_errno .set_errno: - li32 r10, _errno - stw r3, 0(r10) ! set errno - addi r3, r0, -1 ! return -1 - bclr 20, 0, 0 + lis r4, ha16[_errno] + stw r3, lo16[_errno](r4) ! set errno + li r3, -1 ! return -1 + blr From 66f93f08c5a0a3af85c1ce5f5278c06882403c37 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Mon, 22 Jan 2018 14:04:15 -0500 Subject: [PATCH 34/55] Add fef 4, fif 4. Improve fef 8, fif 8. Other float changes. When I wrote fef 8, I forgot to test denormalized numbers. Oops. Now fix two of my mistakes: - When checking for zero, `extrwi r6, r3, 22, 12` needs to be `extrwi r6, r3, 20, 12`. There are only 20 bits to extract. - After the multiplication by 2**64, I forgot to put the fraction in [0.5, 1) or (-1, 0.5] by setting IEEE exponent = 1022. Teach fif 8 about signed zero and NaN. In ncg/table, change cmf so NaN is not equal to any value, and comment why ordered comparisons don't work with NaN. Also add cost for fctwiz, remove extra `uses REG`. Edit comment in cfu8.s because the conditional branch might be before or after fctwiz. --- mach/powerpc/libem/build.lua | 2 +- mach/powerpc/libem/cfu8.s | 8 +++-- mach/powerpc/libem/fef4.s | 48 +++++++++++++++++++++++++++ mach/powerpc/libem/fef8.s | 46 +++++++++++++------------- mach/powerpc/libem/fif4.s | 64 ++++++++++++++++++++++++++++++++++++ mach/powerpc/libem/fif8.s | 45 ++++++++++++++++--------- mach/powerpc/ncg/table | 30 +++++++++++++---- 7 files changed, 194 insertions(+), 49 deletions(-) create mode 100644 mach/powerpc/libem/fef4.s create mode 100644 mach/powerpc/libem/fif4.s diff --git a/mach/powerpc/libem/build.lua b/mach/powerpc/libem/build.lua index 7a0726b80..2709a4770 100644 --- a/mach/powerpc/libem/build.lua +++ b/mach/powerpc/libem/build.lua @@ -6,7 +6,7 @@ for _, plat in ipairs(vars.plats) do acklibrary { name = "lib_"..plat, srcs = { - "./*.s", -- cfu8.s + "./*.s", -- fif4.s }, vars = { plat = plat }, deps = { diff --git a/mach/powerpc/libem/cfu8.s b/mach/powerpc/libem/cfu8.s index fd69ff521..710d2a65c 100644 --- a/mach/powerpc/libem/cfu8.s +++ b/mach/powerpc/libem/cfu8.s @@ -42,6 +42,8 @@ ! 1: yields r3 = the converted value. ! ! Debian's clang 3.5.0-10 and gcc 4.9.2-10 don't clamp the value -! before conversion. They avoid fsel and put the conditional branch -! before fctwiz. PowerPC 601 lacks fsel (but kernel might trap and -! emulate fsel). PowerPC 603, 604, G3, G4, G5 have fsel. +! before conversion. They avoid fsel and use the conditional branch +! to pick between 2 fctwiz instructions. +! +! PowerPC 601 lacks fsel (but kernel might trap and emulate fsel). +! PowerPC 603, 604, G3, G4, G5 have fsel. diff --git a/mach/powerpc/libem/fef4.s b/mach/powerpc/libem/fef4.s new file mode 100644 index 000000000..a338ed0a9 --- /dev/null +++ b/mach/powerpc/libem/fef4.s @@ -0,0 +1,48 @@ +.sect .text + +! Split a single-precision float into fraction and exponent, like +! frexpf(3) in C, http://en.cppreference.com/w/c/numeric/math/frexp +! +! Stack: ( single -- fraction exponent ) + +.define .fef4 +.fef4: + lwz r3, 0(sp) ! r3 = word of float bits + + ! IEEE single = sign * 1.fraction * 2**(exponent - 127) + ! sign exponent fraction + ! 0 1..8 9..31 + ! + ! IEEE exponent = 126 in [0.5, 1) or (-1, -0.5]. + + extrwi. r6, r3, 8, 1 ! r6 = IEEE exponent + beq 3f ! jump if zero or denormalized + cmpwi r6, 255 + addi r5, r6, -126 ! r5 = our exponent + beq 2f ! jump if infinity or NaN + ! fall through if normalized + + ! Put fraction in [0.5, 1) or (-1, -0.5]. +1: li r6, 126 + insrwi r3, r6, 8, 1 ! IEEE exponent = 126 + ! fall through + +2: stw r3, 0(sp) ! push fraction + stwu r5, -4(sp) ! push exponent + blr + + ! Got denormalized number or zero, probably zero. + ! If zero, then exponent must also be zero. +3: extrwi. r6, r3, 23, 9 ! r6 = fraction + bne 4f ! jump if not zero + li r5, 0 ! exponent = 0 + b 2b + + ! Got denormalized number = 0.fraction * 2**-126 +4: cntlzw r5, r6 + addi r5, r5, -8 + slw r6, r6, r5 ! shift left to make 1.fraction + insrwi r3, r6, 23, 9 ! set new fraction + li r6, -126 + 1 + subf r5, r5, r6 ! r5 = our exponent + b 1b diff --git a/mach/powerpc/libem/fef8.s b/mach/powerpc/libem/fef8.s index 26a962d8b..aff5ea3b6 100644 --- a/mach/powerpc/libem/fef8.s +++ b/mach/powerpc/libem/fef8.s @@ -3,7 +3,7 @@ .sect .text ! Split a double-precision float into fraction and exponent, like -! frexp(3) in C. +! frexp(3) in C, http://en.cppreference.com/w/c/numeric/math/frexp ! ! Stack: ( double -- fraction exponent ) @@ -12,42 +12,41 @@ lwz r3, 0(sp) ! r3 = high word (bits 0..31) lwz r4, 4(sp) ! r4 = low word (bits 32..63) - ! IEEE double-precision format: + ! IEEE double = sign * 1.fraction * 2**(exponent - 1023) ! sign exponent fraction ! 0 1..11 12..63 ! - ! To get fraction in [0.5, 1) or (-1, -0.5], we subtract 1022 - ! from the IEEE exponent. + ! IEEE exponent = 1022 in [0.5, 1) or (-1, -0.5]. extrwi. r6, r3, 11, 1 ! r6 = IEEE exponent - addi r5, r6, -1022 ! r5 = our exponent - beq 2f ! jump if zero or denormalized + beq 3f ! jump if zero or denormalized cmpwi r6, 2047 - beq 1f ! jump if infinity or NaN + addi r5, r6, -1022 ! r5 = our exponent + beq 2f ! jump if infinity or NaN ! fall through if normalized - ! Put fraction in [0.5, 1) or (-1, -0.5] by setting its - ! IEEE exponent to 1022. - rlwinm r3, r3, 0, 12, 0 ! clear old exponent - oris r3, r3, 1022 << 4 ! set new exponent + ! Put fraction in [0.5, 1) or (-1, -0.5]. +1: li r6, 1022 + insrwi r3, r6, 11, 1 ! IEEE exponent = 1022 ! fall through -1: stw r3, 0(sp) +2: stw r3, 0(sp) stw r4, 4(sp) ! push fraction stwu r5, -4(sp) ! push exponent blr -2: ! Got denormalized number or zero, probably zero. - extrwi r6, r3, 22, 12 + ! Got denormalized number or zero, probably zero. + ! If zero, then exponent must also be zero. +3: extrwi r6, r3, 20, 12 or. r6, r6, r4 ! r6 = high|low fraction - bne 3f ! jump if not zero + bne 4f ! jump if not zero li r5, 0 ! exponent = 0 - b 1b + b 2b -3: ! Got denormalized number, not zero. - lfd f0, 0(sp) - lis r6, ha16[_2_64] - lfd f1, lo16[_2_64](r6) + ! Got denormalized number = 0.fraction * 2**-1022 +4: lfd f0, 0(sp) + lis r6, ha16[.fs_2_64] + lfs f1, lo16[.fs_2_64](r6) fmul f0, f0, f1 ! multiply it by 2**64 stfd f0, 0(sp) lwz r3, 0(sp) @@ -57,7 +56,6 @@ b 1b .sect .rom -_2_64: - ! (double) 2**64 - .data4 0x43f00000 - .data4 0x00000000 +.fs_2_64: + !float 1.84467440737095516e+19 sz 4 + .data1 0137,0200,00,00 diff --git a/mach/powerpc/libem/fif4.s b/mach/powerpc/libem/fif4.s new file mode 100644 index 000000000..fc29b178c --- /dev/null +++ b/mach/powerpc/libem/fif4.s @@ -0,0 +1,64 @@ +.sect .text + +! Multiplies two single-precision floats, then splits the product into +! fraction and integer, both as floats, like modff(3) in C, +! http://en.cppreference.com/w/c/numeric/math/modf +! +! Stack: ( a b -- fraction integer ) + +.define .fif4 +.fif4: + lfs f1, 4(sp) + lfs f2, 0(sp) + fmuls f1, f1, f2 ! f1 = a * b + stfs f1, 0(sp) + lwz r3, 0(sp) ! r3 = word of float bits + + ! IEEE single = sign * 1.fraction * 2**(exponent - 127) + ! sign exponent fraction + ! 0 1..8 9..31 + ! + ! Subtract 127 from the IEEE exponent. If the result is from + ! 0 to 23, then the IEEE fraction has that many integer bits. + + extrwi r5, r3, 8, 1 ! r5 = IEEE exponent + addic. r5, r5, -127 ! r5 = nr of integer bits + blt 3f ! branch if no integer + cmpwi r5, 24 + bge 4f ! branch if no fraction + ! fall through if integer with fraction + + ! f1 has r5 = 0 to 23 integer bits in the IEEE fraction. + ! There are 23 - r5 fraction bits. + li r6, 23 + subf r6, r5, r6 + srw r3, r3, r6 + slw r3, r3, r6 ! clear fraction in word + ! fall through + +1: stw r3, 0(sp) + lfs f2, 0(sp) ! integer = high word, low word + fsubs f1, f1, f2 ! fraction = value - integer +2: stfs f1, 4(sp) ! push fraction + stfs f2, 0(sp) ! push integer + blr + + ! f1 is a fraction without integer (or zero). + ! Then integer is zero with same sign. +3: extlwi r3, r3, 1, 0 ! extract sign bit + stfs f1, 4(sp) ! push fraction + stw r3, 0(sp) ! push integer = zero with sign + blr + + ! f1 is an integer without fraction (or infinity or NaN). + ! Unless NaN, then fraction is zero with same sign. +4: fcmpu cr0, f1, f1 + bun cr0, 5f + extlwi r3, r3, 1, 0 ! extract sign bit + stw r3, 4(sp) ! push fraction = zero with sign + stfs f1, 0(sp) ! push integer + blr + + ! f1 is NaN, so both fraction and integer are NaN. +5: fmr f2, f1 + b 2b diff --git a/mach/powerpc/libem/fif8.s b/mach/powerpc/libem/fif8.s index bce4f8d24..f93a39ac2 100644 --- a/mach/powerpc/libem/fif8.s +++ b/mach/powerpc/libem/fif8.s @@ -1,7 +1,8 @@ .sect .text ! Multiplies two double-precision floats, then splits the product into -! fraction and integer, like modf(3) in C. On entry: +! fraction and integer, both as floats, like modf(3) in C, +! http://en.cppreference.com/w/c/numeric/math/modf ! ! Stack: ( a b -- fraction integer ) @@ -14,20 +15,18 @@ lwz r3, 0(sp) ! r3 = high word lwz r4, 4(sp) ! r4 = low word - ! IEEE double-precision format: + ! IEEE double = sign * 1.fraction * 2**(exponent - 1023) ! sign exponent fraction ! 0 1..11 12..63 ! ! Subtract 1023 from the IEEE exponent. If the result is from ! 0 to 51, then the IEEE fraction has that many integer bits. - ! (IEEE has an implicit 1 before its fraction. If the IEEE - ! fraction has 0 integer bits, we still have an integer.) extrwi r5, r3, 11, 1 ! r5 = IEEE exponent addic. r5, r5, -1023 ! r5 = nr of integer bits - blt 4f ! branch if no integer + blt 3f ! branch if no integer cmpwi r5, 52 - bge 5f ! branch if no fraction + bge 4f ! branch if no fraction cmpwi r5, 21 bge 6f ! branch if large integer ! fall through if small integer @@ -44,22 +43,38 @@ 1: stw r3, 0(sp) stw r4, 4(sp) lfd f2, 0(sp) ! integer = high word, low word -2: fsub f1, f1, f2 ! fraction = value - integer -3: stfd f1, 8(sp) ! push fraction + fsub f1, f1, f2 ! fraction = value - integer +2: stfd f1, 8(sp) ! push fraction stfd f2, 0(sp) ! push integer blr -4: ! f1 is a fraction without integer. - fsub f2, f1, f1 ! integer = zero - b 3b + ! f1 is a fraction without integer (or zero). + ! Then integer is zero with same sign. +3: extlwi r3, r3, 1, 0 ! extract sign bit + li r4, 0 + stfd f1, 8(sp) ! push fraction + stw r4, 4(sp) + stw r3, 0(sp) ! push integer = zero with sign + blr -5: ! f1 is an integer without fraction (or infinity or NaN). - fmr f2, f1 ! integer = f1 + ! f1 is an integer without fraction (or infinity or NaN). + ! Unless NaN, then fraction is zero with same sign. +4: fcmpu cr0, f1, f1 ! integer = f1 + bun cr0, 5f + extlwi r3, r3, 1, 0 ! extract sign bit + li r4, 0 + stw r4, 12(sp) + stw r3, 8(sp) ! push fraction = zero with sign + stfd f1, 0(sp) ! push integer + blr + + ! f1 is NaN, so both fraction and integer are NaN. +5: fmr f2, f1 b 2b -6: ! f1 has r5 = 21 to 51 to integer bits. + ! f1 has r5 = 21 to 51 to integer bits. ! Low word has 52 - r5 fraction bits. - li r6, 52 +6: li r6, 52 subf r6, r5, r6 srw r4, r4, r6 slw r4, r4, r6 ! clear fraction in low word diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index df06a5d49..1ea0b60ec 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -310,7 +310,7 @@ INSTRUCTIONS fadds FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 5). fcmpo CR:wo, FREG:ro, FREG:ro cost(4, 5). fcmpo CR:wo, FSREG:ro, FSREG:ro cost(4, 5). - fctiwz FREG:wo, FREG:ro. + fctiwz FREG:wo, FREG:ro cost(4, 5). fdiv FREG+DLOCAL:wo, FREG:ro, FREG:ro cost(4, 35). fdivs FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 21). fmr FPR:wo, FPR:ro cost(4, 5). @@ -2329,10 +2329,20 @@ PATTERNS with FSREG gen fneg {LOCAL, $2}, %1 + /* When a or b is NaN, then a < b, a <= b, a > b, a >= b + * should all be false. We can't make them false, because + * - EM's _cmf_ is only for ordered comparisons. + * - The peephole optimizer assumes (a < b) == !(a >= b). + * + * We do make a == b false and a != b true, by checking the + * eq (equal) bit or un (unordered) bit in cr0. + */ + pat cmf $1==4 /* Compare single */ with FSREG FSREG uses REG={COND_FS, %2, %1} - gen extlwi %a, %a, {C, 2}, {C, 0} + /* Extract lt, gt, un; put lt in sign bit. */ + gen andisX %a, %a, {C, 0xd000} yields %a pat cmf teq $1==4 /* Single second == top */ @@ -2367,7 +2377,6 @@ PATTERNS proc cmf4zxx example cmf zeq with FSREG FSREG STACK - uses REG gen fcmpo cr0, %2, %1 bxx* {LABEL, $2} @@ -2420,6 +2429,13 @@ PATTERNS loc 4 cff + pat fef $1==4 /* Split fraction, exponent */ + leaving cal ".fef4" + + /* Multiply two singles, then split fraction, integer */ + pat fif $1==4 + leaving cal ".fif4" + /* Double-precision floating-point */ @@ -2471,10 +2487,13 @@ PATTERNS with FREG gen fneg {DLOCAL, $2}, %1 + /* To compare NaN, see comment above pat cmf $1==4 */ + pat cmf $1==8 /* Compare double */ with FREG FREG uses REG={COND_FD, %2, %1} - gen extlwi %a, %a, {C, 2}, {C, 0} + /* Extract lt, gt, un; put lt in sign bit. */ + gen andisX %a, %a, {C, 0xd000} yields %a pat cmf teq $1==8 /* Double second == top */ @@ -2482,7 +2501,7 @@ PATTERNS uses REG={COND_FD, %2, %1} yields {XEQ, %a} - pat cmf tne $1==8 /* Single second == top */ + pat cmf tne $1==8 /* Double second == top */ with FREG FREG uses REG={COND_FD, %2, %1} yields {XNE, %a} @@ -2509,7 +2528,6 @@ PATTERNS proc cmf8zxx example cmf zeq with FREG FREG STACK - uses REG gen fcmpo cr0, %2, %1 bxx* {LABEL, $2} From c6ceaac1afd70ea418c64be83de2784d3a1b488b Mon Sep 17 00:00:00 2001 From: George Koehler Date: Tue, 23 Jan 2018 13:55:39 -0500 Subject: [PATCH 35/55] Make osx386 and osxppc use _hol0.s like the other platforms. Because I'm lazy, I didn't make another copy of _hol0.s; I am building plat/linux/libsys/_hol0.s for OS X. --- plat/osx386/boot.s | 2 -- plat/osx386/libsys/build.lua | 1 + plat/osxppc/boot.s | 2 -- plat/osxppc/libsys/build.lua | 1 + 4 files changed, 2 insertions(+), 4 deletions(-) diff --git a/plat/osx386/boot.s b/plat/osx386/boot.s index 932a716e9..c10045dd6 100644 --- a/plat/osx386/boot.s +++ b/plat/osx386/boot.s @@ -58,8 +58,6 @@ begdata: .sect .bss begbss: -.define hol0 -.comm hol0, 8 ! line number and filename (for debugging) .define _errno .comm _errno, 4 ! Posix errno storage diff --git a/plat/osx386/libsys/build.lua b/plat/osx386/libsys/build.lua index 6a5b0e58c..3c2e96c3a 100644 --- a/plat/osx386/libsys/build.lua +++ b/plat/osx386/libsys/build.lua @@ -19,6 +19,7 @@ acklibrary { "./sigaction.s", "./stat.s", "./write.s", + "plat/linux/libsys/_hol0.s", "plat/linux386/libsys/trapno.s", "plat/osx/libsys/brk.c", "plat/osx/libsys/creat.c", diff --git a/plat/osxppc/boot.s b/plat/osxppc/boot.s index 1517ee377..8b1b7ab75 100644 --- a/plat/osxppc/boot.s +++ b/plat/osxppc/boot.s @@ -49,8 +49,6 @@ begdata: .sect .bss begbss: -.define hol0 -.comm hol0, 8 ! line number and filename (for debugging) .define _errno .comm _errno, 4 ! Posix errno storage diff --git a/plat/osxppc/libsys/build.lua b/plat/osxppc/libsys/build.lua index 49fc0c934..cff10f29b 100644 --- a/plat/osxppc/libsys/build.lua +++ b/plat/osxppc/libsys/build.lua @@ -19,6 +19,7 @@ acklibrary { "./sigaction.s", "./stat.s", "./write.s", + "plat/linux/libsys/_hol0.s", "plat/osx/libsys/brk.c", "plat/osx/libsys/creat.c", "plat/osx/libsys/isatty.c", From e3672bd66e4242ac5592295576717d568d819c4e Mon Sep 17 00:00:00 2001 From: George Koehler Date: Tue, 23 Jan 2018 18:18:40 -0500 Subject: [PATCH 36/55] Allow sp and fp on the fake stack. This simplifies parts of the PowerPC table and causes ncg to better decide whether to push sp or fp to the real stack, or coerce it to REG3, or coerce it to REG-REG3, or move it to a regvar. These better decisions remove extra _mr_ instructions. The idea comes from mach/powerpc/arm/table, where SP has a property STACKPOINTER and LB has LOCALBASE. I don't need two properties, so I make one property SPFP for both registers. --- mach/powerpc/ncg/table | 49 ++++++++++++------------------------------ 1 file changed, 14 insertions(+), 35 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 1ea0b60ec..a5ec75fc0 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -31,6 +31,7 @@ PC_OFFSET = 4 /* Offset of saved PC relative to our FP */ PROPERTIES GPR /* general-purpose register */ + SPFP /* sp or fp */ REG /* allocatable GPR */ REG3 /* coercion to r3 */ @@ -51,7 +52,8 @@ REGISTERS * r13, r14, ..., r31: GPR, REG regvar(reg_any). */ - r0, sp, fp, r12 : GPR. + r0, r12 : GPR. + sp, fp : GPR, SPFP. r3 : GPR, REG, REG3. r4, r5, r6, r7, r8, r9, r10, r11 : GPR, REG. @@ -238,7 +240,7 @@ SETS MEMORY = IND_V + FRAME_V. /* any integer from stack that we can easily move to GPR */ - INT_W = REG + CONST_STACK + SEX_B + SEX_H + + INT_W = SPFP + REG + CONST_STACK + SEX_B + SEX_H + SUM_RIS + SUM_RC + SUM_RL + SUM_RR + SUB_RR + NEG_R + MUL_RR + DIV_RR + DIV_RR_U + IND_ALL_B + IND_ALL_H + IND_ALL_W + @@ -717,19 +719,14 @@ TESTS STACKINGRULES - /* We don't allow GPR-REG on the stack. The intent is to ban - * r0 from the stack, but this also bans fp from the stack. - * This is odd because most other tables for ncg allow the - * frame pointer on the stack. - */ - from REG to STACK + from SPFP+REG to STACK gen - COMMENT("stack REG") + COMMENT("stack SPFP+REG") stwu %1, {IND_RC_W, sp, 0-4} - from INT_W-REG to STACK + from INT_W-SPFP-REG to STACK gen - COMMENT("stack INT_W-REG") + COMMENT("stack INT_W-SPFP-REG") move %1, RSCRATCH stwu RSCRATCH, {IND_RC_W, sp, 0-4} @@ -1146,26 +1143,15 @@ PATTERNS uses REG={LXFRAME, $1} gen move %1, {FRAME_D, $1, %a, $2, 8} - /* Programs use "lxl cal" to pass the static chain and call a - * nested procedure. This must push a token LXFRAME or the - * register fp to the real stack. */ - - /* Local base of procedure on static chain */ - pat lxl nicelx($1) + pat lxl nicelx($1) /* Local base on static chain */ uses REG={LXFRAME, $1} yields %a /* Can't yield LXFRAME. */ pat lxl stl nicelx($1) && inreg($2)==reg_any kills regvar($2) gen move {LXFRAME, $1}, {REG_EXPR, regvar($2)} - pat lxl cal $1==0 /* Pass our local base to procedure */ - with STACK - gen stwu fp, {IND_RC_W, sp, 0-4} - leaving cal $2 - pat lxl $1==0 /* Our local base */ - uses REG=fp - yields %a /* Can't yield fp. */ + yields fp pat lxa $1==0 /* Our argument base */ yields {SUM_RC, fp, EM_BSIZE} @@ -2134,7 +2120,8 @@ PATTERNS bls pat bls /* Block move variable length */ - with REG REG REG + with REG SPFP+REG SPFP+REG + /* allows sp as %2, %3 */ /* ( src%3 dst%2 len%1 -- ) */ uses reusing %1, REG, REG, REG gen @@ -2230,22 +2217,14 @@ PATTERNS pat lor $1==1 /* Load stack pointer */ with STACK - uses REG=sp - yields %a /* Can't yield sp. */ + yields sp /* Next few patterns for "lor 1" appear in * lang/m2/libm2/par_misc.e */ - pat lor lor $1==1 && $2==1 /* Load sp twice */ - with STACK - gen stwu sp, {IND_RC_W, sp, 0-4} - leaving lor 1 - pat lor adp $1==1 && smalls($2) /* sp + constant */ with STACK - uses REG - gen addi %a, sp, {C, $2} - yields %a + yields {SUM_RC, sp, $2} /* Subtract stack pointer by doing %1 - (sp - 4) * because sp - 4 would point to %1. From e83aaca3ec61f214bb48b6e3122c59bac934b0bf Mon Sep 17 00:00:00 2001 From: George Koehler Date: Wed, 24 Jan 2018 15:17:32 -0500 Subject: [PATCH 37/55] Add some comments before I forget how this stuff works. --- mach/powerpc/libem/inn.s | 3 +++ mach/powerpc/libem/rck.s | 3 +++ mach/powerpc/libem/set.s | 3 +++ mach/powerpc/ncg/table | 48 +++++++++++++++++++++++++++++++++++++++- 4 files changed, 56 insertions(+), 1 deletion(-) diff --git a/mach/powerpc/libem/inn.s b/mach/powerpc/libem/inn.s index 8925e776e..32275c117 100644 --- a/mach/powerpc/libem/inn.s +++ b/mach/powerpc/libem/inn.s @@ -5,6 +5,9 @@ /* Tests a bit in a bitset on the stack. * * Stack: ( bitset bitnum setsize -- bool ) + * + * Some back ends push false if bitnum is too large. We don't because + * the compilers tend to pass a small enough bitnum. */ .define .inn diff --git a/mach/powerpc/libem/rck.s b/mach/powerpc/libem/rck.s index f1cf7f848..1d07d5711 100644 --- a/mach/powerpc/libem/rck.s +++ b/mach/powerpc/libem/rck.s @@ -2,6 +2,9 @@ ! Bounds check. Traps if the value is out of range. ! Stack: ( value descriptor -- value ) +! +! This ".rck" only works with 4-byte integers. The name is ".rck" and +! not ".rck4" because many back ends only do rck with the word size. .define .rck .rck: diff --git a/mach/powerpc/libem/set.s b/mach/powerpc/libem/set.s index 3c4a9e579..8faf84a09 100644 --- a/mach/powerpc/libem/set.s +++ b/mach/powerpc/libem/set.s @@ -2,6 +2,9 @@ ! Create singleton set. ! Stack: ( bitnumber size -- set ) +! +! Some back ends trap ESET if bitnumber is out of range. We don't +! because the compilers tend to pass a valid bitnumber. .define .set .set: diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index a5ec75fc0..efdb681db 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -1,3 +1,32 @@ +/* + * PowerPC table for ncg + * + * David Given created this table. + * George Koehler made many changes in years 2016 to 2018. + * + * This back end provides 4-byte integers, 4-byte floats, and 8-byte + * floats. It should provide enough of EM for the ACK's compilers. + * - It doesn't provide "mon" (monitor call) nor "lor 2", "str 2" + * (heap pointer). Programs should call procedures in libsys to + * make system calls or allocate heap memory. + * - It generates only a few EM traps: + * - EARRAY from aar, lar, sar + * - ERANGE from rck + * - ECASE from csa, csb + * - It uses floating-point registers to move 8-byte values that + * aren't floats. This might cause extra FPU context switches in + * programs that don't use floating point. + * + * The EM stack is less than optimal for PowerPC, and incompatible + * with the calling conventions of other compilers (like gcc). + * - EM and ncg use the stack to pass parameters to procedures. For + * PowerPC, this is probably slower than passing them in registers. + * - This back end misaligns some 8-byte floats, because EM's stack + * has only 4-byte alignment. (This kind of misalignment also + * happened in IBM's AIX and Apple's Mac OS, where data structures + * had 8-byte floats with only 4-byte alignment.) + */ + EM_WSIZE = 4 EM_PSIZE = 4 EM_BSIZE = 8 /* two words saved in call frame */ @@ -46,6 +75,15 @@ PROPERTIES REGISTERS /* + * We use r1 as stack pointer and r2 as frame pointer. + * Our assembler has aliases sp -> r1 and fp -> r2. + * + * We preserve r13 to r31 and f14 to f31 across function + * calls to mimic other compilers (like gcc). See + * - http://refspecs.linuxbase.org/elf/elfspec_ppc.pdf + * - https://github.com/ryanarn/powerabi -> chap3-elf32abi.sgml + * - Apple's "32-bit PowerPC Function Calling Conventions" + * * When ncg allocates regvars, it seems to start with the last * register in the first class. To encourage ncg to allocate * them from r31 down, we list them in one class as @@ -85,7 +123,7 @@ REGISTERS : FSREG regvar(reg_float). lr, ctr : SPR. - cr0 : CR. + cr0 : CR. /* We use cr0, ignore cr1 to cr7. */ /* The stacking rules can't allocate registers. We use these * scratch registers to stack tokens. @@ -1405,6 +1443,10 @@ PATTERNS /* Word arithmetic */ + /* Like most back ends, this one doesn't trap EIOVFL, so it + * ignores overflow in signed integers. + */ + pat adi $1==4 /* Add word (second + top) */ with REG REG yields {SUM_RR, %1, %2} @@ -1468,6 +1510,10 @@ PATTERNS /* Bitwise logic */ + /* This back end doesn't know how to combine shifts and + * bitwise ops to emit rlwinm, rlwnm, or rlwimi instructions. + */ + pat and $1==4 /* AND word */ with REG NOT_R yields {ANDC_RR, %1, %2.reg} From 7c9c4f82fdda6dc02ce0ff24da958060d6a8f75a Mon Sep 17 00:00:00 2001 From: George Koehler Date: Fri, 26 Jan 2018 20:08:03 -0500 Subject: [PATCH 38/55] Get `ack -mosxppc -g` to partly work with gdb. Copy and adapt code from mach/{i386,m68020}/ncg/mach.c to pass the debugging stabs from EM to assembly. The next tools (as, led, cv) already know how to put the stabs in the Mach-o executable. Modify the function prolog/prologue so gdb uses fp, not sp, for N_LSYM and N_PSYM stabs. Simplify prolog() by reducing differences between stabs and no stabs, and zero and nonzero framesize. For files without stabs, the new prolog has the same number of instructions and memory accesses as the old prolog, and to run at about the same speed on my PowerPC Mac. This is enough to see some info for global and local variables in gdb for Mac OS X. I still can't get a backtrace; gdb gets confused because EM and ncg don't link 0(sp) to the previous stack frame. I don't expect `ack -mlinuxppc -g` to work with gdb for Linux, because we prepend underscores to the symbol table, which is correct for Mach-o but wrong for ELF. --- mach/powerpc/ncg/mach.c | 88 ++++++++++++++++++++++++++++++----------- 1 file changed, 66 insertions(+), 22 deletions(-) diff --git a/mach/powerpc/ncg/mach.c b/mach/powerpc/ncg/mach.c index 06e39709f..1a1d98d6c 100644 --- a/mach/powerpc/ncg/mach.c +++ b/mach/powerpc/ncg/mach.c @@ -10,8 +10,13 @@ #include #include +#include +static int writing_stabs = 0; + +#ifdef REGVARS static long framesize; +#endif void con_part(int sz, word w) @@ -51,39 +56,42 @@ con_mult(word sz) #define FL_MSB_AT_LOW_ADDRESS 1 #include -static void -emit_prolog(void) -{ - fprintf(codefile, "mfspr r0, lr\n"); - if (framesize) { - fprintf(codefile, "addi sp, sp, %ld\n", -framesize - 8); - fprintf(codefile, "stw fp, %ld(sp)\n", framesize); - fprintf(codefile, "stw r0, %ld(sp)\n", framesize + 4); - fprintf(codefile, "addi fp, sp, %ld\n", framesize); - } else { - /* optimize for framesize == 0 */ - fprintf(codefile, "stwu fp, -8(sp)\n"); - fprintf(codefile, "stw r0, 4(sp)\n"); - fprintf(codefile, "mr fp, sp\n"); - } -} - void prolog(full nlocals) { - framesize = nlocals; + /* + * For N_LSYM and N_PSYM stabs, we want gdb to use fp, not sp. + * The trick is to use "stwu sp, _(sp)" then "addi fp, sp, 0" + * before we save lr with "stw r0, _(sp)". + * + * Tried with Apple's gdb-696. Refer to + * - gdb-696/src/gdb/rs6000-tdep.c, skip_prologue(), line 1101 + * - gdb-696/src/gdb/macosx/ppc-macosx-frameinfo.c, + * ppc_parse_instructions(), line 717 + * https://opensource.apple.com/release/developer-tools-25.html + */ + fprintf(codefile, "mfspr r0, lr\n"); + if (writing_stabs) { + fprintf(codefile, "stwu sp, -8(sp)\n"); /* for gdb */ + fprintf(codefile, "stw fp, 0(sp)\n"); + } else + fprintf(codefile, "stwu fp, -8(sp)\n"); + fprintf(codefile, "addi fp, sp, 0\n"); /* for gdb */ + fprintf(codefile, "stw r0, 4(sp)\n"); #ifdef REGVARS - /* f_regsave() will call emit_prolog() */ + framesize = nlocals; + /* regsave() increases framesize; f_regsave() adjusts sp. */ #else - emit_prolog(); + if (nlocals) + fprintf(codefile, "addi sp, sp, %ld\n", -nlocals); #endif } void mes(word type) { - int argt ; + int argt, a1, a2 ; switch ( (int)type ) { case ms_ext : @@ -98,6 +106,41 @@ mes(word type) break ; } } + case ms_stb: + argt = getarg(str_ptyp | cst_ptyp); + if (argt == sp_cstx) + fputs(".symb \"\", ", codefile); + else { + fprintf(codefile, ".symb \"%s\", ", str); + argt = getarg(cst_ptyp); + } + a1 = argval; + argt = getarg(cst_ptyp); + a2 = argval; + argt = getarg(cst_ptyp|nof_ptyp|sof_ptyp|ilb_ptyp|pro_ptyp); + if (a1 == N_PSYM) { + /* Change offset from AB into offset from + the frame pointer. + */ + argval += 8; + } + fprintf(codefile, "%s, 0x%x, %d\n", strarg(argt), a1, a2); + argt = getarg(end_ptyp); + break; + case ms_std: + writing_stabs = 1; /* set by first "mes 13,...,100,0" */ + argt = getarg(str_ptyp | cst_ptyp); + if (argt == sp_cstx) + str[0] = '\0'; + else { + argt = getarg(cst_ptyp); + } + swtxt(); + fprintf(codefile, ".symd \"%s\", 0x%x,", str, (int) argval); + argt = getarg(cst_ptyp); + fprintf(codefile, "%d\n", (int) argval); + argt = getarg(end_ptyp); + break; default : while ( getarg(any_ptyp) != sp_cend ) ; break ; @@ -239,7 +282,8 @@ f_regsave(void) { int reg; - emit_prolog(); + if (framesize) + fprintf(codefile, "addi sp, sp, %ld\n", -framesize); saveloadregs("stw", "stmw", "stfd"); /* From 3dae9e49ccd6b753f412995d873a6760c38fea1a Mon Sep 17 00:00:00 2001 From: George Koehler Date: Sat, 27 Jan 2018 15:33:43 -0500 Subject: [PATCH 39/55] Use subfic (val - reg) and mulli (reg * val). In the instruction list, put /* kills xer */ for sraw, srawi, subfic; and correct the (now unused) "addi." and "lfdu". Change MACHOPT_F from -m3 to -m2. This changes the code for 15 * i from slwi r3,r4,4 subfic r5,r4,0 add r3,r3,r5 to mulli r3,r4,15 If the sequence "slwi subfic addi" takes 3 cycles and 12 bytes, and mulli takes 3 cycles and 4 bytes, then mulli is better. --- mach/powerpc/ncg/table | 30 ++++++++++++++++++++++++------ plat/linuxppc/descr | 2 +- plat/osxppc/descr | 2 +- 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index efdb681db..82cada71a 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -170,8 +170,10 @@ TOKENS SUM_RL = { GPR reg; ADDR adr; } 4. /* reg + lo16[adr] */ SUM_RR = { GPR reg1; GPR reg2; } 4. /* reg1 + reg2 */ + SUB_CR = { INT val; GPR reg; } 4. /* val - reg */ SUB_RR = { GPR reg1; GPR reg2; } 4. /* reg1 - reg2 */ NEG_R = { GPR reg; } 4. /* -reg */ + MUL_RC = { GPR reg; INT val; } 4. /* reg * val */ MUL_RR = { GPR reg1; GPR reg2; } 4. /* reg1 * reg2 */ DIV_RR = { GPR reg1; GPR reg2; } 4. /* reg1 / reg2 signed */ DIV_RR_U = { GPR reg1; GPR reg2; } 4. /* reg1 / reg2 unsigned */ @@ -280,9 +282,10 @@ SETS /* any integer from stack that we can easily move to GPR */ INT_W = SPFP + REG + CONST_STACK + SEX_B + SEX_H + SUM_RIS + SUM_RC + SUM_RL + SUM_RR + - SUB_RR + NEG_R + MUL_RR + DIV_RR + DIV_RR_U + + SUB_CR + SUB_RR + NEG_R + + MUL_RC + MUL_RR + DIV_RR + DIV_RR_U + IND_ALL_B + IND_ALL_H + IND_ALL_W + - FRAME_B + FRAME_H + FRAME_H_S + FRAME_W + + FRAME_B + FRAME_H + FRAME_H_S + FRAME_W + NOT_R + AND_RIS + AND_RC + AND_RR + ANDC_RR + OR_RIS + OR_RC + OR_RR + ORC_RR + XOR_RIS + XOR_RC + XOR_RR + NAND_RR + NOR_RR + EQV_RR + @@ -307,7 +310,7 @@ INSTRUCTIONS cost(4, 1) /* space, time */ add GPR:wo, GPR:ro, GPR:ro. - addX "add." GPR:wo, GPR:ro, GPR:ro. + addX "add." GPR:wo:cc, GPR:ro, GPR:ro. addi GPR:wo, GPR:ro, CONST+LABEL_LO:ro. li GPR:wo, CONST:ro. addis GPR:wo, GPR:ro, CONST+LABEL_HI+LABEL_HA:ro. @@ -365,7 +368,7 @@ INSTRUCTIONS lbz GPR:wo, SET_RC_B:ro cost(4, 3). lbzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). lfd FPR+DLOCAL:wo, SET_RC_D:ro cost(4, 5). - lfdu FPR:wo, IND_RC_D:ro cost(4, 5). + lfdu FPR:wo, IND_RC_D:rw cost(4, 5). lfdx FPR:wo, GPR:ro, GPR:ro cost(4, 5). lfs FSREG+LOCAL:wo, SET_RC_W:ro cost(4, 4). lfsu FSREG:wo, IND_RC_W:rw cost(4, 4). @@ -380,6 +383,7 @@ INSTRUCTIONS mfcr GPR:wo cost(4,2). mfspr GPR:wo, SPR:ro cost(4, 3). mtspr SPR:wo, GPR:ro cost(4, 2). + mulli GPR:wo, GPR:ro, CONST:ro cost(4, 3). mullw GPR:wo, GPR:ro, GPR:ro cost(4, 4). nand GPR:wo, GPR:ro, GPR:ro. neg GPR:wo, GPR:ro. @@ -401,8 +405,8 @@ INSTRUCTIONS rlwnm GPR:wo, GPR:ro, GPR:ro, CONST:ro, CONST:ro. rotlw GPR+LOCAL:wo, GPR:ro, GPR:ro. slw GPR+LOCAL:wo, GPR:ro, GPR:ro. - sraw GPR+LOCAL:wo, GPR:ro, GPR:ro cost(4, 2). - srawi GPR+LOCAL:wo, GPR:ro, CONST:ro cost(4, 2). + sraw GPR+LOCAL:wo, GPR:ro, GPR:ro /* kills xer */ cost(4, 2). + srawi GPR+LOCAL:wo, GPR:ro, CONST:ro /* kills xer */ cost(4, 2). srw GPR+LOCAL:wo, GPR:ro, GPR:ro. stb GPR:ro, SET_RC_B:rw cost(4, 3). stbx GPR:ro, GPR:ro, GPR:ro cost(4, 3). @@ -418,6 +422,7 @@ INSTRUCTIONS stwx GPR:ro, GPR:ro, GPR:ro cost(4, 3). stwu GPR:ro, IND_RC_W:rw cost(4, 3). subf GPR:wo, GPR:ro, GPR:ro. + subfic GPR:wo, GPR:ro, CONST:ro /* kills xer */. xor GPR:wo, GPR:ro, GPR:ro. xori GPR:wo, GPR:ro, CONST:ro. xoris GPR:wo, GPR:ro, CONST:ro. @@ -490,6 +495,10 @@ MOVES /* Other arithmetic */ + from SUB_CR to GPR + /* val - reg -> subtract reg from val */ + gen subfic %2, %1.reg, {C, %1.val} + from SUB_RR to GPR /* reg1 - reg2 -> subtract reg2 from reg1 */ gen subf %2, %1.reg2, %1.reg1 @@ -497,6 +506,9 @@ MOVES from NEG_R to GPR gen neg %2, %1.reg + from MUL_RC to GPR + gen mulli %2, %1.reg, {C, %1.val} + from MUL_RR to GPR gen mullw %2, %1.reg1, %1.reg2 @@ -1471,6 +1483,8 @@ PATTERNS yields {SUB_RR, %2, %1} with CONST2_WHEN_NEG REG yields {SUM_RC, %2, 0-%1.val} + with REG CONST2 + yields {SUB_CR, %2.val, %1} with CONST_HI_ZR REG yields {SUM_RIS, %2, his(0-%1.val)} with CONST_STACK-CONST2_WHEN_NEG-CONST_HI_ZR REG @@ -1482,6 +1496,10 @@ PATTERNS yields {NEG_R, %1} pat mli $1==4 /* Multiply word (second * top) */ + with CONST2 REG + yields {MUL_RC, %2, %1.val} + with REG CONST2 + yields {MUL_RC, %1, %2.val} with REG REG yields {MUL_RR, %2, %1} diff --git a/plat/linuxppc/descr b/plat/linuxppc/descr index 1bbb9fbd9..7f6f8fc02 100644 --- a/plat/linuxppc/descr +++ b/plat/linuxppc/descr @@ -19,7 +19,7 @@ var PLATFORM=linuxppc var PLATFORMDIR={EM}/share/ack/{PLATFORM} var CPP_F=-D__unix var ALIGN=-a0:4 -a1:4 -a2:4 -a3:4 -b0:0x10000054 -var MACHOPT_F=-m3 +var MACHOPT_F=-m2 var EGO_PLAT_FLAGS=-M{EM}/share/ack/ego/{ARCH}.descr # Override the setting in fe so that files compiled for linuxppc can see diff --git a/plat/osxppc/descr b/plat/osxppc/descr index 5f416c44c..072a79dbc 100644 --- a/plat/osxppc/descr +++ b/plat/osxppc/descr @@ -19,7 +19,7 @@ var PLATFORM=osxppc var PLATFORMDIR={EM}/share/ack/{PLATFORM} var CPP_F=-D__unix var ALIGN=-a0:4 -a1:4 -a2:4096 -a3:4 -b0:0x129c -var MACHOPT_F=-m3 +var MACHOPT_F=-m2 var EGO_PLAT_FLAGS=-M{EM}/share/ack/ego/{ARCH}.descr # Override the setting in fe so that files compiled for osxppc can see From cdde55535ef4417b0e08eeb6248e0b635fa0968a Mon Sep 17 00:00:00 2001 From: George Koehler Date: Sat, 27 Jan 2018 16:35:48 -0500 Subject: [PATCH 40/55] For osxppc, change size 8 to alignment 4. You may need to delete and recompile some .o files! This changes the alignment of 8-byte values in C structs to match what Apple's gcc does. See Apple's "32-bit PowerPC Function Calling Conventions" at https://developer.apple.com /library/content/documentation/DeveloperTools/Conceptual/LowLevelABI /100-32-bit_PowerPC_Function_Calling_Conventions/32bitPowerPC.html --- plat/osxppc/descr | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/plat/osxppc/descr b/plat/osxppc/descr index 072a79dbc..77fc45260 100644 --- a/plat/osxppc/descr +++ b/plat/osxppc/descr @@ -10,10 +10,11 @@ var l={w} var la={w} var f={w} var fa={w} +# Size 8 has alignment 4 in Mac OS, 8 in Linux. var d=8 -var da={d} +var da=4 var x=8 -var xa={x} +var xa=4 var ARCH=powerpc var PLATFORM=osxppc var PLATFORMDIR={EM}/share/ack/{PLATFORM} From b38fcdded3d6a258560f0ad4bbaf093b8a78a749 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Sat, 27 Jan 2018 20:09:16 -0500 Subject: [PATCH 41/55] Add tests for clearing BSS, copying C structs. The new tests are bss_e.c, structcopy_e.c. We do clear the BSS before calling _m_a_i_n, so fix the comments in the other tests. --- tests/plat/_dummy_e.c | 2 +- tests/plat/bss_e.c | 27 ++++++ tests/plat/bugs/bug-62-notvar_var_e.c | 2 +- tests/plat/build.lua | 18 ++-- tests/plat/csa_e.c | 4 +- tests/plat/csb_e.c | 4 +- tests/plat/doublecmp_e.c | 4 +- tests/plat/from_d_to_si_e.c | 4 +- tests/plat/from_d_to_ui_e.c | 4 +- tests/plat/from_si_to_d_e.c | 4 +- tests/plat/from_ui_to_d_e.c | 4 +- tests/plat/intadd_e.c | 4 +- tests/plat/intcmp_e.c | 4 +- tests/plat/intdiv_e.c | 4 +- tests/plat/intrem_e.c | 4 +- tests/plat/intshift_e.c | 4 +- tests/plat/intsub_e.c | 4 +- tests/plat/structcopy_e.c | 113 ++++++++++++++++++++++++++ 18 files changed, 173 insertions(+), 41 deletions(-) create mode 100644 tests/plat/bss_e.c create mode 100644 tests/plat/structcopy_e.c diff --git a/tests/plat/_dummy_e.c b/tests/plat/_dummy_e.c index 48104b5aa..39262eaaa 100644 --- a/tests/plat/_dummy_e.c +++ b/tests/plat/_dummy_e.c @@ -1,6 +1,6 @@ #include "test.h" -/* Bypasses the CRT, so there's no stdio or BSS initialisation. */ +/* Bypasses the CRT, so there's no stdio. */ void _m_a_i_n(void) { ASSERT(0 == 0); diff --git a/tests/plat/bss_e.c b/tests/plat/bss_e.c new file mode 100644 index 000000000..547e7f7f1 --- /dev/null +++ b/tests/plat/bss_e.c @@ -0,0 +1,27 @@ +#include "test.h" + +/* + * EM puts these variables in BSS. Their initial values must be zero. + * Some platforms, like Linux, clear the BSS before they run the + * program. For other platforms, like pc86, we clear the BSS in + * boot.s before we call _m_a_i_n. + */ +char c; +int array[9000]; +short s; + +/* Bypasses the CRT, so there's no stdio. */ +void _m_a_i_n(void) +{ + int bad, i; + + ASSERT(c == 0); + bad = 0; + for (i = 0; i < sizeof(array) / sizeof(array[0]); i++) { + if(array[i]) + bad++; + } + ASSERT(bad == 0); + ASSERT(s == 0); + finished(); +} diff --git a/tests/plat/bugs/bug-62-notvar_var_e.c b/tests/plat/bugs/bug-62-notvar_var_e.c index d3813bb91..cde84eed1 100644 --- a/tests/plat/bugs/bug-62-notvar_var_e.c +++ b/tests/plat/bugs/bug-62-notvar_var_e.c @@ -40,7 +40,7 @@ void c(int i, int tru, int fal) { ASSERT((i != i) == fal); } -/* Bypasses the CRT. */ +/* Bypasses the CRT, so there's no stdio. */ void _m_a_i_n(void) { a(); b(); diff --git a/tests/plat/build.lua b/tests/plat/build.lua index 42ca441d0..26676b0b1 100644 --- a/tests/plat/build.lua +++ b/tests/plat/build.lua @@ -9,22 +9,14 @@ definerule("plat_testsuite", -- Remember this is executed from the caller's directory; local -- target names will resolve there. local testfiles = filenamesof( + -- added structcopy_e.c "tests/plat/*.c", - "tests/plat/dup_e.e", - "tests/plat/exg_e.e", - "tests/plat/inn_e.e", - "tests/plat/rck_e.e", - "tests/plat/rotate_e.e", + "tests/plat/*.e", "tests/plat/*.p", "tests/plat/b/*.b", - "tests/plat/bugs/bug-22-inn_mod.mod", - "tests/plat/bugs/bug-62-notvar_var_e.c", - "tests/plat/m2/ConvTest_mod.mod", - "tests/plat/m2/NestProc_mod.mod", - "tests/plat/m2/OpenArray_mod.mod", - "tests/plat/m2/SemaTest_mod.mod", - "tests/plat/m2/Set100_mod.mod", - "tests/plat/m2/StringTest_mod.mod" + "tests/plat/bugs/*.c", + "tests/plat/bugs/*.mod", + "tests/plat/m2/*.mod" ) acklibrary { diff --git a/tests/plat/csa_e.c b/tests/plat/csa_e.c index 355b75ee7..470fbebc5 100644 --- a/tests/plat/csa_e.c +++ b/tests/plat/csa_e.c @@ -11,7 +11,7 @@ int csa(int i) } } -/* Bypasses the CRT, so there's no stdio or BSS initialisation. */ +/* Bypasses the CRT, so there's no stdio. */ void _m_a_i_n(void) { ASSERT(csa(0) == 0); @@ -23,4 +23,4 @@ void _m_a_i_n(void) ASSERT(csa(6) == 0); finished(); -} \ No newline at end of file +} diff --git a/tests/plat/csb_e.c b/tests/plat/csb_e.c index c86d31fa6..38ce05402 100644 --- a/tests/plat/csb_e.c +++ b/tests/plat/csb_e.c @@ -11,7 +11,7 @@ int csa(int i) } } -/* Bypasses the CRT, so there's no stdio or BSS initialisation. */ +/* Bypasses the CRT, so there's no stdio. */ void _m_a_i_n(void) { ASSERT(csa(0) == 0); @@ -23,4 +23,4 @@ void _m_a_i_n(void) ASSERT(csa(600) == 0); finished(); -} \ No newline at end of file +} diff --git a/tests/plat/doublecmp_e.c b/tests/plat/doublecmp_e.c index f6c1582dc..b6fe5bbad 100644 --- a/tests/plat/doublecmp_e.c +++ b/tests/plat/doublecmp_e.c @@ -4,7 +4,7 @@ double one = 1.0; double zero = 0.0; -/* Bypasses the CRT, so there's no stdio or BSS initialisation. */ +/* Bypasses the CRT, so there's no stdio. */ void _m_a_i_n(void) { ASSERT(zero == zero); @@ -17,4 +17,4 @@ void _m_a_i_n(void) ASSERT(one >= one); finished(); -} \ No newline at end of file +} diff --git a/tests/plat/from_d_to_si_e.c b/tests/plat/from_d_to_si_e.c index 7f51e6c5b..bc06c755c 100644 --- a/tests/plat/from_d_to_si_e.c +++ b/tests/plat/from_d_to_si_e.c @@ -8,7 +8,7 @@ double minusone = -1.0; double big = (double)INT_MAX; double minusbig = (double)INT_MIN; -/* Bypasses the CRT, so there's no stdio or BSS initialisation. */ +/* Bypasses the CRT, so there's no stdio. */ void _m_a_i_n(void) { ASSERT((int)zero == 0); @@ -18,4 +18,4 @@ void _m_a_i_n(void) ASSERT((int)minusbig == INT_MIN); finished(); -} \ No newline at end of file +} diff --git a/tests/plat/from_d_to_ui_e.c b/tests/plat/from_d_to_ui_e.c index 811780b87..7d18ca9e5 100644 --- a/tests/plat/from_d_to_ui_e.c +++ b/tests/plat/from_d_to_ui_e.c @@ -6,7 +6,7 @@ double one = 1.0; double zero = 0.0; double big = (double)UINT_MAX; -/* Bypasses the CRT, so there's no stdio or BSS initialisation. */ +/* Bypasses the CRT, so there's no stdio. */ void _m_a_i_n(void) { ASSERT((unsigned int)zero == 0); @@ -14,4 +14,4 @@ void _m_a_i_n(void) ASSERT((unsigned int)big == UINT_MAX); finished(); -} \ No newline at end of file +} diff --git a/tests/plat/from_si_to_d_e.c b/tests/plat/from_si_to_d_e.c index b6c7a25ba..172361dfa 100644 --- a/tests/plat/from_si_to_d_e.c +++ b/tests/plat/from_si_to_d_e.c @@ -8,7 +8,7 @@ int minusone = -1; int big = INT_MAX; int minusbig = INT_MIN; -/* Bypasses the CRT, so there's no stdio or BSS initialisation. */ +/* Bypasses the CRT, so there's no stdio. */ void _m_a_i_n(void) { ASSERT((double)zero == 0.0); @@ -18,4 +18,4 @@ void _m_a_i_n(void) /* ASSERT((double)minusbig == (double)INT_MIN); FIXME: fails for now */ finished(); -} \ No newline at end of file +} diff --git a/tests/plat/from_ui_to_d_e.c b/tests/plat/from_ui_to_d_e.c index b8e017c99..383d9afad 100644 --- a/tests/plat/from_ui_to_d_e.c +++ b/tests/plat/from_ui_to_d_e.c @@ -6,7 +6,7 @@ unsigned int one_u = 1; unsigned int zero_u = 0; unsigned int big_u = UINT_MAX; -/* Bypasses the CRT, so there's no stdio or BSS initialisation. */ +/* Bypasses the CRT, so there's no stdio. */ void _m_a_i_n(void) { ASSERT((double)zero_u == 0.0); @@ -14,4 +14,4 @@ void _m_a_i_n(void) ASSERT((double)big_u == (double)UINT_MAX); finished(); -} \ No newline at end of file +} diff --git a/tests/plat/intadd_e.c b/tests/plat/intadd_e.c index 8e4868a62..94549814c 100644 --- a/tests/plat/intadd_e.c +++ b/tests/plat/intadd_e.c @@ -6,7 +6,7 @@ int one = 1; int zero = 0; int minusone = -1; -/* Bypasses the CRT, so there's no stdio or BSS initialisation. */ +/* Bypasses the CRT, so there's no stdio. */ void _m_a_i_n(void) { ASSERT((two + one) == 3); @@ -28,4 +28,4 @@ void _m_a_i_n(void) ASSERT(((unsigned int)-1 + (unsigned int)two) == 1); finished(); -} \ No newline at end of file +} diff --git a/tests/plat/intcmp_e.c b/tests/plat/intcmp_e.c index dd7f1da75..72cfc06b1 100644 --- a/tests/plat/intcmp_e.c +++ b/tests/plat/intcmp_e.c @@ -4,7 +4,7 @@ int one = 1; int zero = 0; -/* Bypasses the CRT, so there's no stdio or BSS initialisation. */ +/* Bypasses the CRT, so there's no stdio. */ void _m_a_i_n(void) { ASSERT(zero == zero); @@ -62,4 +62,4 @@ void _m_a_i_n(void) ASSERT((unsigned int)1 >= (unsigned int)one); finished(); -} \ No newline at end of file +} diff --git a/tests/plat/intdiv_e.c b/tests/plat/intdiv_e.c index c90964ced..cab76cdad 100644 --- a/tests/plat/intdiv_e.c +++ b/tests/plat/intdiv_e.c @@ -6,7 +6,7 @@ int two = 2; int one = 1; int zero = 0; -/* Bypasses the CRT, so there's no stdio or BSS initialisation. */ +/* Bypasses the CRT, so there's no stdio. */ void _m_a_i_n(void) { ASSERT((three / two) == 1); @@ -25,4 +25,4 @@ void _m_a_i_n(void) ASSERT((3 / -two) == -1); finished(); -} \ No newline at end of file +} diff --git a/tests/plat/intrem_e.c b/tests/plat/intrem_e.c index 40f68d654..424152106 100644 --- a/tests/plat/intrem_e.c +++ b/tests/plat/intrem_e.c @@ -6,7 +6,7 @@ int two = 2; int one = 1; int zero = 0; -/* Bypasses the CRT, so there's no stdio or BSS initialisation. */ +/* Bypasses the CRT, so there's no stdio. */ void _m_a_i_n(void) { ASSERT((three % two) == 1); @@ -25,4 +25,4 @@ void _m_a_i_n(void) ASSERT((3 % -two) == 1); finished(); -} \ No newline at end of file +} diff --git a/tests/plat/intshift_e.c b/tests/plat/intshift_e.c index 3cc6d52f9..08ef05ca1 100644 --- a/tests/plat/intshift_e.c +++ b/tests/plat/intshift_e.c @@ -6,7 +6,7 @@ int one = 1; int zero = 0; int minusone = -1; -/* Bypasses the CRT, so there's no stdio or BSS initialisation. */ +/* Bypasses the CRT, so there's no stdio. */ void _m_a_i_n(void) { ASSERT((one <>(unsigned int)1) == (UINT_MAX>>1)); finished(); -} \ No newline at end of file +} diff --git a/tests/plat/intsub_e.c b/tests/plat/intsub_e.c index d8f67d3a3..b0cf08ae6 100644 --- a/tests/plat/intsub_e.c +++ b/tests/plat/intsub_e.c @@ -7,7 +7,7 @@ int one = 1; int zero = 0; int minusone = -1; -/* Bypasses the CRT, so there's no stdio or BSS initialisation. */ +/* Bypasses the CRT, so there's no stdio. */ void _m_a_i_n(void) { ASSERT((two - one) == 1); @@ -29,4 +29,4 @@ void _m_a_i_n(void) ASSERT(((unsigned int)1 - (unsigned int)two) == UINT_MAX); finished(); -} \ No newline at end of file +} diff --git a/tests/plat/structcopy_e.c b/tests/plat/structcopy_e.c new file mode 100644 index 000000000..74a9e2d30 --- /dev/null +++ b/tests/plat/structcopy_e.c @@ -0,0 +1,113 @@ +#include "test.h" + +/* ACK's C compiler uses EM's loi, sti, blm, or an inline loop to copy + * these structs. The compiler doesn't call memcpy() or other + * functions in libc, so this test passes without linking the CRT. + */ + +struct c5 { /* not a whole number of words */ + char one[5]; +}; + +struct ii { /* two words */ + int one; + int two; +}; + +struct iii { /* three words */ + int one; + int two; + int three; +}; + +int equal5(char *a, char *b) { /* a, b must have 5 characters */ + int i; + + for (i = 0; i < 5; i++) + if (a[i] != b[i]) return 0; + return 1; +} + +struct c5 make_c5(char *str) { /* str must have 5 characters */ + struct c5 out; + int i; + + for (i = 0; i < 5; i++) + out.one[i] = str[i]; + return out; +} + +struct ii make_ii(int i, int j) { + struct ii out; + + out.one = i; + out.two = j; + return out; +} + +struct iii make_iii(struct ii in, int k) { + struct iii out; + + out.one = in.one; + out.two = in.two; + out.three = k; + return out; +} + +struct c5 rotate_left_c5(struct c5 in) { + int i; + char c = in.one[0]; + + /* Modifies our copy of _in_, not caller's copy. */ + for (i = 0; i < 4; i++) + in.one[i] = in.one[i + 1]; + in.one[4] = c; + return in; +} + +struct iii rotate_left_iii(struct iii in) { + int i = in.one; + + /* Modifies our copy of _in_, not caller's copy. */ + in.one = in.two; + in.two = in.three; + in.three = i; + return in; +} + +/* Bypasses the CRT, so there's no stdio. */ +void _m_a_i_n(void) { + struct c5 earth, heart, dup_heart, rol_heart; + struct ii pair, dup_pair; + struct iii triple, dup_triple, rol_triple; + + earth = make_c5("earth"); + heart = make_c5("heart"); + dup_heart = heart; + rol_heart = rotate_left_c5(heart); + ASSERT(equal5(earth.one, "earth")); + ASSERT(equal5(heart.one, "heart")); + ASSERT(equal5(dup_heart.one, "heart")); + ASSERT(equal5(rol_heart.one, "earth")); + + pair = make_ii(29, 31); + dup_pair = pair; + triple = make_iii(pair, -9); + dup_triple = triple; + rol_triple = rotate_left_iii(triple); + ASSERT(pair.one == 29); + ASSERT(pair.two == 31); + ASSERT(dup_pair.one == 29); + ASSERT(dup_pair.two == 31); + ASSERT(triple.one == 29); + ASSERT(triple.two == 31); + ASSERT(triple.three == -9); + ASSERT(dup_triple.one == 29); + ASSERT(dup_triple.two == 31); + ASSERT(dup_triple.three == -9); + ASSERT(rol_triple.one == 31); + ASSERT(rol_triple.two == -9); + ASSERT(rol_triple.three == 29); + + finished(); +} From b3c0a767a5b548a5a392d90d24fbabdc8e9ba148 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Sat, 27 Jan 2018 21:41:13 -0500 Subject: [PATCH 42/55] Sync qemuppc with linuxppc. - Don't reverse bitfields; do use ego (41f3bf7). - Use MACHOPT_F=-m2 (3dae9e4). - Remove old trap.s (26de4c1). At this commit, one can build qemuppc with mcg by editing the root build.lua to uncomment "qemuppc" in "vars.plats". If one also uncomments "qemuppc" from "vars.plats_with_tests", then mcg fails to build the tests. If one uses ncg (by editing plat/qemuppc/descr to change "mcg" to "ncg"), then the tests pass. --- plat/qemuppc/descr | 7 ++-- plat/qemuppc/libsys/trap.s | 65 -------------------------------------- 2 files changed, 2 insertions(+), 70 deletions(-) delete mode 100644 plat/qemuppc/libsys/trap.s diff --git a/plat/qemuppc/descr b/plat/qemuppc/descr index f5191b249..9d1a80427 100644 --- a/plat/qemuppc/descr +++ b/plat/qemuppc/descr @@ -19,11 +19,8 @@ var PLATFORM=qemuppc var PLATFORMDIR={EM}/share/ack/{PLATFORM} var CPP_F=-D__unix var ALIGN=-a0:4 -a1:4 -a2:4 -a3:4 -b0:0x01000000 -var C_LIB={PLATFORMDIR}/libc-ansi.a -# bitfields reversed for compatibility with (g)cc. -var CC_ALIGN=-Vr -var OLD_C_LIB={C_LIB} -var MACHOPT_F= +var MACHOPT_F=-m2 +var EGO_PLAT_FLAGS=-M{EM}/share/ack/ego/{ARCH}.descr # Override the setting in fe so that files compiled for qemuppc can see # the platform-specific headers. diff --git a/plat/qemuppc/libsys/trap.s b/plat/qemuppc/libsys/trap.s deleted file mode 100644 index e00c4d561..000000000 --- a/plat/qemuppc/libsys/trap.s +++ /dev/null @@ -1,65 +0,0 @@ -# -! $Source: /cvsroot/tack/Ack/plat/linux386/libsys/_syscall.s,v $ -! $State: Exp $ -! $Revision: 1.1 $ - -! Declare segments (the order is important). - -.sect .text -.sect .rom -.sect .data -.sect .bss - -.sect .text - -#define IFFALSE 4 -#define IFTRUE 12 -#define ALWAYS 20 - -#define LT 0 -#define GT 1 -#define EQ 2 -#define OV 3 - -EARRAY = 0 -ERANGE = 1 -ESET = 2 -EIOVFL = 3 -EFOVFL = 4 -EFUNFL = 5 -EIDIVZ = 6 -EFDIVZ = 7 -EIUND = 8 -EFUND = 9 -ECONV = 10 -ESTACK = 16 -EHEAP = 17 -EILLINS = 18 -EODDZ = 19 -ECASE = 20 -EMEMFLT = 21 -EBADPTR = 22 -EBADPC = 23 -EBADLAE = 24 -EBADMON = 25 -EBADLIN = 26 -EBADGTO = 27 -EUNIMPL = 63 ! unimplemented em-instruction called - -.define .trap_ecase -.trap_ecase: - b .trp - -.define .trap_earray -.trap_earray: - b .trp - -.define .trap_erange -.trap_erange: - b .trap - -.define .trp -.define .trap -.trp: -.trap: - b .trp ! spin forever From 9077b3a5ab37034932642fd11cf3eee3f887849b Mon Sep 17 00:00:00 2001 From: George Koehler Date: Tue, 30 Jan 2018 15:53:26 -0500 Subject: [PATCH 43/55] Teach mcg to pass our tests. Tests pass if one edits the top build.lua to uncomment "qemuppc" from both vars.plats and vars.plats_with_tests, and one leaves mcg in plat/qemuppc/descr. Add or correct some EM instructions in treebuilder.c: - "lof", "stf": handle negative offsets in load() and store(). - "cuu": add using IR_FROMUI. - "lim", "sim": keep an entire word in ".ignmask", to be compatible with mach/powerpc/libem/trp.s and ncg. We also keep a word in ".ignmask" in ncg for both i386 and m68020. - "trp": pass trap number in register. See comment in helper_function_with_arg(). - "sig": push the old value of .trppc on the stack. - "and ?", "ior ?", "xor ?", "com ?", "cms ?", "set ?", "inn ?": connect to helper functions in libem. - "blm", "bls": drop call to memmove() and use new helper ".bls4", because tests/plat/structcopy_e.c can't call memmove(). - "xor s", "cms s": if s is large, fall back on helper function. - "rol", "ror": add by decomposing each rotate into 4 IR ops. - "rck s", "bls s": make fatal unless s is word size. - "loi": push multiple loads in the correct order. - "dup s", "exg s": if s is large, fall back on helper. - "dus": add using new helper ".dus4". - "lxl", "lxa": follow the static chain, not the dynamic chain. - "lor 1": materialise the stack before pushing the stack pointer. - "lor 2", "str 2": make fatal. - "los", "sts": drop calls to memcpy() and use helpers ".los4" and and ".sts4", so lang/m2/libm2/LtoUset.e starts working. - "gto": correctly read descriptor. Change mach/powerpc/mcg/table: - ANY.L: add for "asp -8". - LOAD.L: work around register corruption. - COMPAREUL.I: add for "cms 8". --- mach/powerpc/libem/bls4.s | 19 ++ mach/powerpc/libem/build.lua | 2 +- mach/powerpc/libem/dus4.s | 16 ++ mach/powerpc/mcg/table | 23 ++- mach/proto/mcg/treebuilder.c | 346 +++++++++++++++++------------------ 5 files changed, 223 insertions(+), 183 deletions(-) create mode 100644 mach/powerpc/libem/bls4.s create mode 100644 mach/powerpc/libem/dus4.s diff --git a/mach/powerpc/libem/bls4.s b/mach/powerpc/libem/bls4.s new file mode 100644 index 000000000..a36faca68 --- /dev/null +++ b/mach/powerpc/libem/bls4.s @@ -0,0 +1,19 @@ +.sect .text + +! Does a block move of words between non-overlapping buffers. +! Stack: ( src dst len -- ) + +.define .bls4 +.bls4: + lwz r3, 0(sp) ! len + lwz r4, 4(sp) ! dst + lwz r5, 8(sp) ! src + addi sp, sp, 12 + srwi r3, r3, 2 + mtspr ctr, r3 + addi r5, r5, -4 + addi r4, r4, -4 +1: lwzu r3, 4(r5) + stwu r3, 4(r4) + bdnz 1b + blr diff --git a/mach/powerpc/libem/build.lua b/mach/powerpc/libem/build.lua index 2709a4770..5ed9b52e8 100644 --- a/mach/powerpc/libem/build.lua +++ b/mach/powerpc/libem/build.lua @@ -6,7 +6,7 @@ for _, plat in ipairs(vars.plats) do acklibrary { name = "lib_"..plat, srcs = { - "./*.s", -- fif4.s + "./*.s", -- dus4.s }, vars = { plat = plat }, deps = { diff --git a/mach/powerpc/libem/dus4.s b/mach/powerpc/libem/dus4.s new file mode 100644 index 000000000..9c751947a --- /dev/null +++ b/mach/powerpc/libem/dus4.s @@ -0,0 +1,16 @@ +.sect .text + +! Duplicates some words on top of stack. +! Stack: ( a size -- a a ) + +.define .dus4 +.dus4: + lwz r3, 0(sp) + addi sp, sp, 4 + srwi r4, r3, 2 + mtspr ctr, r4 + add r5, sp, r3 +1: lwzu r4, -4(r5) + stwu r4, -4(sp) + bdnz 1b + blr diff --git a/mach/powerpc/mcg/table b/mach/powerpc/mcg/table index b72990c36..ca44ce869 100644 --- a/mach/powerpc/mcg/table +++ b/mach/powerpc/mcg/table @@ -237,10 +237,13 @@ PATTERNS SETSP.I(in:(int)reg) emit "mr sp, %in" cost 4; - + out:(int)reg = ANY.I cost 1; + out:(long)reg = ANY.L + cost 1; + out:(int)reg = COPYF.I(in:(float)reg) emit "stfsu %in, -4(sp)" emit "lwz %out, 0(sp)" @@ -306,10 +309,21 @@ PATTERNS emit "lwz %out, %addr" cost 4; +#if 0 + /* FIXME: Doesn't work because %out.0 and %addr might share a + * register, so it corrupts %addr before it loads %out.1. */ out:(long)reg = LOAD.L(addr:address) emit "lwz %out.0, 4+%addr" emit "lwz %out.1, 0+%addr" cost 8; +#else + /* Works, but costs an extra instruction. */ + out:(long)reg = LOAD.L(addr:address) + emit "la %out.1, %addr" + emit "lwz %out.0, 4(%out.1)" + emit "lwz %out.1, 0(%out.1)" + cost 12; +#endif out:(int)ushort0 = LOADH.I(addr:address) emit "lhz %out, %addr" @@ -566,6 +580,13 @@ PATTERNS emit "! COMPARESI.I(cr, 0)" cost 4; + cr:(cr)cr = COMPAREUL.I(left:(long)reg, right:(long)reg) + emit "cmpl %cr, 0, %left.1, %right.1" + emit "bne 1f" + emit "cmpl %cr, 0, %left.0, %right.0" + emit "1:" + cost 12; + /* Booleans */ diff --git a/mach/proto/mcg/treebuilder.c b/mach/proto/mcg/treebuilder.c index eed770170..ac811fc14 100644 --- a/mach/proto/mcg/treebuilder.c +++ b/mach/proto/mcg/treebuilder.c @@ -274,7 +274,7 @@ static struct ir* store(int size, struct ir* address, int offset, struct ir* val else opcode = IR_STORE; - if (offset > 0) + if (offset != 0) address = new_ir2( IR_ADD, EM_pointersize, address, new_wordir(offset) @@ -304,7 +304,7 @@ static struct ir* load(int size, struct ir* address, int offset) else opcode = IR_LOAD; - if (offset > 0) + if (offset != 0) address = new_ir2( IR_ADD, EM_pointersize, address, new_wordir(offset) @@ -416,6 +416,31 @@ static void helper_function(const char* name) ); } +static void helper_function_with_arg(const char* name, struct ir* arg) +{ + /* Abuses IR_SETRET to set a register to pass one argument to a + * helper function. + * + * FIXME: As of January 2018, mach/powerpc/libem takes an + * argument in register r3 only for ".los4", ".sts4", ".trp". + * This is an accident. Should the argument be on the stack, or + * should other helpers use a register? */ + + materialise_stack(); + appendir( + new_ir1( + IR_SETRET, arg->size, + arg + ) + ); + appendir( + new_ir1( + IR_CALL, 0, + new_labelir(name) + ) + ); +} + static void insn_simple(int opcode) { switch (opcode) @@ -437,6 +462,7 @@ static void insn_simple(int opcode) case op_cii: simple_convert(IR_FROMSI); break; case op_ciu: simple_convert(IR_FROMSI); break; case op_cui: simple_convert(IR_FROMUI); break; + case op_cuu: simple_convert(IR_FROMUI); break; case op_cfu: simple_convert(IR_FROMUF); break; case op_cfi: simple_convert(IR_FROMSF); break; case op_cif: simple_convert(IR_FROMSI); break; @@ -496,10 +522,12 @@ static void insn_simple(int opcode) case op_lim: { + /* Traps use only 16 bits of .ignmask, but we keep an + * entire word, even if a word has more than 2 bytes. */ push( - new_ir1( - (EM_wordsize == 2) ? IR_LOAD : IR_LOADH, EM_wordsize, - new_labelir(".ignmask") + load( + EM_wordsize, + new_labelir(".ignmask"), 0 ) ); break; @@ -507,26 +535,34 @@ static void insn_simple(int opcode) case op_sim: { - sequence_point(); appendir( - new_ir2( - (EM_wordsize == 2) ? IR_STORE : IR_STOREH, EM_wordsize, - new_labelir(".ignmask"), + store( + EM_wordsize, + new_labelir(".ignmask"), 0, pop(EM_wordsize) ) ); break; } - case op_trp: helper_function(".trp"); break; + case op_trp: + helper_function_with_arg(".trp", pop(EM_wordsize)); + break; case op_sig: { + struct ir* label = new_labelir(".trppc"); struct ir* value = pop(EM_pointersize); + push( + load( + EM_pointersize, + label, 0 + ) + ); appendir( store( EM_pointersize, - new_labelir(".trppc"), 0, + label, 0, value ) ); @@ -539,12 +575,13 @@ static void insn_simple(int opcode) break; } - /* FIXME: These instructions are really complex and barely used - * (Modula-2 and Pascal set support, I believe). Leave them until - * later. */ - case op_set: helper_function(".unimplemented_set"); break; - case op_ior: helper_function(".unimplemented_ior"); break; - + case op_and: helper_function(".and"); break; + case op_ior: helper_function(".ior"); break; + case op_xor: helper_function(".xor"); break; + case op_com: helper_function(".com"); break; + case op_cms: helper_function(".cms"); break; + case op_set: helper_function(".set"); break; + case op_inn: helper_function(".inn"); break; case op_dch: push( @@ -670,6 +707,31 @@ static void simple_alu2(int opcode, int size, int irop, const char* fallback) } } +static void rotate(int opcode, int size, int irop, int irop_reverse) +{ + if (size > (2*EM_wordsize)) + fatal("treebuilder: can't do opcode %s with size %d", em_mnem[opcode - sp_fmnem], size); + else + { + struct ir* right = pop(size); + struct ir* left = pop(size); + struct ir* bits = new_wordir(8 * size); + + /* a rol b -> (a << b) | (a >> (32 - b)) */ + push( + new_ir2( + IR_OR, size, + new_ir2(irop, size, left, right), + new_ir2( + irop_reverse, size, + left, + new_ir2(IR_SUB, size, bits, right) + ) + ) + ); + } +} + static struct ir* extract_block_refs(struct basicblock* bb) { struct ir* outir = NULL; @@ -720,26 +782,28 @@ static struct ir* ptradd(struct ir* address, int offset) ); } -static void blockmove(struct ir* dest, struct ir* src, struct ir* size) +static struct ir* walk_static_chain(int level) { - /* memmove stack: ( size src dest -- ) */ - push(size); - push(src); - push(dest); + struct ir* ir; - materialise_stack(); - appendir( - new_ir1( - IR_CALL, 0, - new_labelir("memmove") - ) - ); - appendir( - new_ir1( - IR_STACKADJUST, EM_pointersize, - new_wordir(EM_pointersize*2 + EM_wordsize) - ) + /* The static chain, when it exists, is the first argument of each + * procedure. The chain begins with the current frame at level 0, + * and continues until we reach the outermost procedure. */ + ir = new_ir0( + IR_GETFP, EM_pointersize ); + while (level--) + { + /* Walk to the next frame pointer. */ + ir = load( + EM_pointersize, + new_ir1( + IR_FPTOAB, EM_pointersize, + ir + ), 0 + ); + } + return ir; } static void insn_ivalue(int opcode, arith value) @@ -765,8 +829,10 @@ static void insn_ivalue(int opcode, arith value) case op_and: simple_alu2(opcode, value, IR_AND, ".and"); break; case op_ior: simple_alu2(opcode, value, IR_OR, ".ior"); break; - case op_xor: simple_alu2(opcode, value, IR_EOR, NULL); break; + case op_xor: simple_alu2(opcode, value, IR_EOR, ".xor"); break; case op_com: simple_alu1(opcode, value, IR_NOT, ".com"); break; + case op_rol: rotate(opcode, value, IR_LSL, IR_LSR); break; + case op_ror: rotate(opcode, value, IR_LSR, IR_LSL); break; case op_adf: simple_alu2(opcode, value, IR_ADDF, NULL); break; case op_sbf: simple_alu2(opcode, value, IR_SUBF, NULL); break; @@ -774,12 +840,23 @@ static void insn_ivalue(int opcode, arith value) case op_dvf: simple_alu2(opcode, value, IR_DIVF, NULL); break; case op_ngf: simple_alu1(opcode, value, IR_NEGF, NULL); break; - case op_cmu: /* fall through */ - case op_cms: push(tristate_compare(value, IR_COMPAREUI)); break; + case op_cms: + if (value > (2*EM_wordsize)) + { + push(new_wordir(value)); + helper_function(".cms"); + break; + } + /* fall through */ + case op_cmu: push(tristate_compare(value, IR_COMPAREUI)); break; case op_cmi: push(tristate_compare(value, IR_COMPARESI)); break; case op_cmf: push(tristate_compare(value, IR_COMPAREF)); break; - case op_rck: helper_function(".rck"); break; + case op_rck: + if (value != EM_wordsize) + fatal("'rck %d' not supported", value); + helper_function(".rck"); + break; case op_set: push(new_wordir(value)); helper_function(".set"); break; case op_inn: push(new_wordir(value)); helper_function(".inn"); break; @@ -930,26 +1007,24 @@ static void insn_ivalue(int opcode, arith value) if (value > (EM_wordsize*2)) { - /* We're going to need to do multiple stores; fix the address + /* We're going to need to do multiple loads; fix the address * so it'll go into a register and we can do maths on it. */ appendir(ptr); } + /* Stack grows down. Load backwards. */ while (value > 0) { int s = EM_wordsize*2; if (value < s) s = value; - + value -= s; push( load( s, - ptr, offset + ptr, value ) ); - - value -= s; - offset += s; } assert(value == 0); @@ -1099,7 +1174,12 @@ static void insn_ivalue(int opcode, arith value) case op_dup: { sequence_point(); - if ((value == (EM_wordsize*2)) && (peek(0) == EM_wordsize) && (peek(1) == EM_wordsize)) + if (value > (2*EM_wordsize)) + { + push(new_wordir(value)); + helper_function(".dus4"); + } + else if ((value == (EM_wordsize*2)) && (peek(0) == EM_wordsize) && (peek(1) == EM_wordsize)) { struct ir* v1 = pop(EM_wordsize); struct ir* v2 = pop(EM_wordsize); @@ -1117,12 +1197,30 @@ static void insn_ivalue(int opcode, arith value) break; } + case op_dus: + { + if (value != EM_wordsize) + fatal("'dus %d' not supported", value); + helper_function(".dus4"); + break; + } + case op_exg: { - struct ir* v1 = pop(value); - struct ir* v2 = pop(value); - push(v1); - push(v2); + if (value > (2*EM_wordsize)) + { + push( + new_wordir(value) + ); + helper_function(".exg"); + } + else + { + struct ir* v1 = pop(value); + struct ir* v2 = pop(value); + push(v1); + push(v2); + } break; } @@ -1285,53 +1383,19 @@ static void insn_ivalue(int opcode, arith value) } case op_lxl: - { - struct ir* ir; - - /* Walk the static chain. */ - - ir = new_ir0( - IR_GETFP, EM_pointersize + push( + walk_static_chain(value) ); - - while (value--) - { - ir = new_ir1( - IR_CHAINFP, EM_pointersize, - ir - ); - } - - push(ir); break; - } case op_lxa: - { - struct ir* ir; - - /* Walk the static chain. */ - - ir = new_ir0( - IR_GETFP, EM_pointersize - ); - - while (value--) - { - ir = new_ir1( - IR_CHAINFP, EM_pointersize, - ir - ); - } - push( new_ir1( IR_FPTOAB, EM_pointersize, - ir + walk_static_chain(value) ) ); break; - } case op_fef: { @@ -1394,6 +1458,7 @@ static void insn_ivalue(int opcode, arith value) break; case 1: + materialise_stack(); push( appendir( new_ir0( @@ -1403,10 +1468,6 @@ static void insn_ivalue(int opcode, arith value) ); break; - case 2: - helper_function(".unimplemented_lor_2"); - break; - default: fatal("'lor %d' not supported", value); } @@ -1436,10 +1497,6 @@ static void insn_ivalue(int opcode, arith value) ); break; - case 2: - helper_function(".unimplemented_str_2"); - break; - default: fatal("'str %d' not supported", value); } @@ -1448,100 +1505,27 @@ static void insn_ivalue(int opcode, arith value) } case op_blm: - { - /* Input stack: ( src dest -- ) */ - struct ir* dest = pop(EM_pointersize); - struct ir* src = pop(EM_pointersize); - blockmove(dest, src, new_wordir(value)); + push(new_wordir(value)); + helper_function(".bls4"); break; - } case op_bls: - { - /* Input stack: ( src dest size -- ) */ - struct ir* dest = pop(EM_pointersize); - struct ir* src = pop(EM_pointersize); - struct ir* size = pop(EM_wordsize); - blockmove(dest, src, size); + if (value != EM_wordsize) + fatal("'bls %d' not supported", value); + helper_function(".bls4"); break; - } case op_los: - { - /* Copy an arbitrary amount to the stack. */ - struct ir* bytes = pop(EM_wordsize); - struct ir* address = pop(EM_pointersize); - - materialise_stack(); - appendir( - new_ir1( - IR_STACKADJUST, EM_pointersize, - new_ir1( - IR_NEG, EM_wordsize, - bytes - ) - ) - ); - - push( - new_ir0( - IR_GETSP, EM_pointersize - ) - ); - push(address); - push(bytes); - materialise_stack(); - appendir( - new_ir1( - IR_CALL, 0, - new_labelir("memcpy") - ) - ); - appendir( - new_ir1( - IR_STACKADJUST, EM_pointersize, - new_wordir(EM_pointersize*2 + EM_wordsize) - ) - ); + if (value != EM_wordsize) + fatal("'los %d' not supported", value); + helper_function_with_arg(".los4", pop(EM_wordsize)); break; - } case op_sts: - { - /* Copy an arbitrary amount from the stack. */ - struct ir* bytes = pop(EM_wordsize); - struct ir* dest = pop(EM_pointersize); - struct ir* src; - - materialise_stack(); - src = appendir( - new_ir0( - IR_GETSP, EM_pointersize - ) - ); - - push(dest); - push(src); - push(bytes); - materialise_stack(); - appendir( - new_ir1( - IR_CALL, 0, - new_labelir("memcpy") - ) - ); - appendir( - new_ir1( - IR_STACKADJUST, EM_pointersize, - new_ir2( - IR_ADD, EM_wordsize, - new_wordir(EM_pointersize*2 + EM_wordsize), - bytes - ) - ) - ); + if (value != EM_wordsize) + fatal("'sts %d' not supported", value); + helper_function_with_arg(".sts4", pop(EM_wordsize)); break; - } case op_lin: { @@ -1677,17 +1661,17 @@ static void insn_lvalue(int opcode, const char* label, arith offset) case op_gto: { - struct ir* descriptor = pop(EM_pointersize); + struct ir* descriptor = address_of_external(label, offset); appendir( new_ir1( - IR_SETSP, EM_pointersize, + IR_SETFP, EM_pointersize, load(EM_pointersize, descriptor, EM_pointersize*2) ) ); appendir( new_ir1( - IR_SETFP, EM_pointersize, + IR_SETSP, EM_pointersize, load(EM_pointersize, descriptor, EM_pointersize*1) ) ); From 04ac91889c32128b54d4d40961eab7e8a124a51a Mon Sep 17 00:00:00 2001 From: George Koehler Date: Thu, 1 Feb 2018 12:20:31 -0500 Subject: [PATCH 44/55] Only lower "addi sp, sp, X" if X > 0. If X < 0, then lowering the addi might cause the code to use the stack space before allocating it. This is a bug because an asynchronous signal handler can overwrite the unallocated stack space. --- mach/powerpc/top/table | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/mach/powerpc/top/table b/mach/powerpc/top/table index cbc16c277..196cae128 100644 --- a/mach/powerpc/top/table +++ b/mach/powerpc/top/table @@ -8,6 +8,7 @@ LABEL_STARTER '.'; L1, L2, L3, L4, L5 { not_using_sp(VAL) }; RNZ { strcmp(VAL, "r0") }; /* not r0 */ +UP { positive(VAL) }; X, Y, Z { TRUE }; %%; @@ -20,22 +21,22 @@ addis RNZ, RNZ, 0 -> ; addi RNZ, RNZ, X : addi RNZ, RNZ, Y { plus(X, Y, Z) } -> addi RNZ, RNZ, Z ; -/* Lower "addi sp, sp, X" by lifting other instructions, looking for +/* Lower "addi sp, sp, UP" by lifting other instructions, looking for * chances to merge or delete _addi_ instructions, and assuming that * the code generator uses "sp" not "r1". */ -addi sp, sp, X : ANY L1 { lift(ANY) } - -> ANY L1 : addi sp, sp, X ; -addi sp, sp, X : ANY L1, L2 { lift(ANY) } - -> ANY L1, L2 : addi sp, sp, X ; -addi sp, sp, X : ANY L1, L2, L3 { lift(ANY) } - -> ANY L1, L2, L3 : addi sp, sp, X ; -addi sp, sp, X : ANY L1, L2, L3, L4 { lift(ANY) } - -> ANY L1, L2, L3, L4 : addi sp, sp, X ; -addi sp, sp, X : ANY L1, L2, L3, L4, L5 { lift(ANY) } - -> ANY L1, L2, L3, L4, L5 : addi sp, sp, X ; -addi sp, sp, X : lmw Y, L1 { Y[0]=='r' && atoi(Y+1)>1 } - -> lmw Y, L1 : addi sp, sp, X ; +addi sp, sp, UP : ANY L1 { lift(ANY) } + -> ANY L1 : addi sp, sp, UP ; +addi sp, sp, UP : ANY L1, L2 { lift(ANY) } + -> ANY L1, L2 : addi sp, sp, UP ; +addi sp, sp, UP : ANY L1, L2, L3 { lift(ANY) } + -> ANY L1, L2, L3 : addi sp, sp, UP ; +addi sp, sp, UP : ANY L1, L2, L3, L4 { lift(ANY) } + -> ANY L1, L2, L3, L4 : addi sp, sp, UP ; +addi sp, sp, UP : ANY L1, L2, L3, L4, L5 { lift(ANY) } + -> ANY L1, L2, L3, L4, L5 : addi sp, sp, UP ; +addi sp, sp, UP : lmw Y, L1 { Y[0]=='r' && atoi(Y+1)>1 } + -> lmw Y, L1 : addi sp, sp, UP ; /* Merge _addi_ when popping from the stack. */ addi sp, sp, X : lwz L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' } @@ -117,6 +118,15 @@ int not_using_sp(const char *s) { } +int positive(const char *s) { + long n; + char *end; + + n = strtol(s, &end, 10); + return *s != '\0' && *end == '\0' && n > 0; +} + + /* Instructions to lift(), sorted in strcmp() order. These are from * ../ncg/table, minus branch instructions. */ From a60738a50dbec68b219c9ad68a867127d42fffdd Mon Sep 17 00:00:00 2001 From: George Koehler Date: Mon, 5 Feb 2018 14:55:10 -0500 Subject: [PATCH 45/55] Don't use '-' in option string to getopt(). Using '-' might fail on platforms like FreeBSD. Commit 50a7031 stopped using '-' in the B compiler and ego. I now stop using '-' in mcg, because I can now check that mcg still works. --- mach/proto/mcg/main.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/mach/proto/mcg/main.c b/mach/proto/mcg/main.c index cf8a4435f..aa0fa4816 100644 --- a/mach/proto/mcg/main.c +++ b/mach/proto/mcg/main.c @@ -42,13 +42,14 @@ int main(int argc, char* const argv[]) const char* inputfilename = NULL; const char* outputfilename = NULL; FILE* output; + int i; program_name = argv[0]; opterr = 1; for (;;) { - int c = getopt(argc, argv, "-d:D:C:o:"); + int c = getopt(argc, argv, "d:D:C:o:"); if (c == -1) break; @@ -79,20 +80,22 @@ int main(int argc, char* const argv[]) fatal("already specified an output file"); outputfilename = optarg; break; - - case 1: - if (inputfilename) - fatal("unexpected argument '%s'", optarg); - inputfilename = optarg; } } + for (i = optind; i < argc; i++) + { + if (inputfilename) + fatal("unexpected argument '%s'", argv[i]); + inputfilename = argv[i]; + } + symbol_init(); - if (!EM_open((char*) inputfilename)) - fatal("couldn't open input '%s': %s", + if (!EM_open((char*) inputfilename)) + fatal("couldn't open input '%s': %s", inputfilename ? inputfilename : "", EM_error); - + if (outputfilename) { outputfile = fopen(outputfilename, "w"); From 0a6d3de7fe51b1f29d83dccc147532a6a59a23a7 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Mon, 5 Feb 2018 16:09:30 -0500 Subject: [PATCH 46/55] Use prototypes in ego/cs, ego/sp. --- util/ego/cs/cs_aux.c | 13 ++++------ util/ego/cs/cs_aux.h | 12 ++++----- util/ego/cs/cs_avail.c | 25 ++++++------------- util/ego/cs/cs_avail.h | 6 +++-- util/ego/cs/cs_debug.c | 13 ++++------ util/ego/cs/cs_debug.h | 12 ++++++--- util/ego/cs/cs_elim.c | 48 ++++++++++-------------------------- util/ego/cs/cs_elim.h | 3 ++- util/ego/cs/cs_entity.c | 14 ++++------- util/ego/cs/cs_entity.h | 9 ++++--- util/ego/cs/cs_getent.c | 6 ++--- util/ego/cs/cs_getent.h | 3 ++- util/ego/cs/cs_kill.c | 48 +++++++++++++----------------------- util/ego/cs/cs_kill.h | 15 +++++++---- util/ego/cs/cs_partit.c | 24 ++++++------------ util/ego/cs/cs_partit.h | 30 ++++++++++++++-------- util/ego/cs/cs_profit.c | 18 ++++---------- util/ego/cs/cs_stack.c | 13 +++------- util/ego/cs/cs_stack.h | 12 ++++++--- util/ego/cs/cs_vnm.c | 47 ++++++++++------------------------- util/ego/cs/cs_vnm.h | 3 ++- util/ego/descr/powerpc.descr | 4 +-- util/ego/share/aux.h | 2 +- util/ego/share/global.h | 6 ++--- util/ego/sp/sp.c | 39 ++++++----------------------- 25 files changed, 166 insertions(+), 259 deletions(-) diff --git a/util/ego/cs/cs_aux.c b/util/ego/cs/cs_aux.c index 337deeda7..aeb582c9b 100644 --- a/util/ego/cs/cs_aux.c +++ b/util/ego/cs/cs_aux.c @@ -11,8 +11,7 @@ #include "cs.h" #include "cs_entity.h" -offset array_elemsize(vn) - valnum vn; +offset array_elemsize(valnum vn) { /* Vn is the valuenumber of an entity that points to * an array-descriptor. The third element of this descriptor holds @@ -36,14 +35,12 @@ offset array_elemsize(vn) return aoff(enp->en_ext->o_dblock->d_values, 2); } -occur_p occ_elem(i) - Lindex i; +occur_p occ_elem(Lindex i) { return (occur_p) Lelem(i); } -entity_p en_elem(i) - Lindex i; +entity_p en_elem(Lindex i) { return (entity_p) Lelem(i); } @@ -54,14 +51,14 @@ entity_p en_elem(i) STATIC valnum val_no; -valnum newvalnum() +valnum newvalnum(void) { /* Return a completely new value number. */ return ++val_no; } -start_valnum() +void start_valnum(void) { /* Restart value numbering. */ diff --git a/util/ego/cs/cs_aux.h b/util/ego/cs/cs_aux.h index 11950540e..1ce9373a0 100644 --- a/util/ego/cs/cs_aux.h +++ b/util/ego/cs/cs_aux.h @@ -3,28 +3,28 @@ * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. * See the copyright notice in the ACK home directory, in the file "Copyright". */ -extern offset array_elemsize(); /* (valnum vm) +extern offset array_elemsize(valnum vm); + /* * Returns the size of array-elements, * if vn is the valuenumber of the * address of an array-descriptor. */ -extern occur_p occ_elem(); /* (Lindex i) +extern occur_p occ_elem(Lindex i); /* * Returns a pointer to the occurrence * of which i is an index in a set. */ -extern entity_p en_elem(); /* (Lindex i) +extern entity_p en_elem(Lindex i); /* * Returns a pointer to the entity * of which i is an index in a set. */ -extern valnum newvalnum(); /* () +extern valnum newvalnum(void); /* * Returns a completely new * value number. */ -extern start_valnum(); /* () +extern void start_valnum(void); /* * Restart value numbering. */ - diff --git a/util/ego/cs/cs_avail.c b/util/ego/cs/cs_avail.c index 1f766a85c..025132a2e 100644 --- a/util/ego/cs/cs_avail.c +++ b/util/ego/cs/cs_avail.c @@ -22,8 +22,7 @@ avail_p avails; /* The list of available expressions. */ -STATIC bool commutative(instr) - int instr; +STATIC bool commutative(int instr) { /* Is instr a commutative operator? */ @@ -37,9 +36,7 @@ STATIC bool commutative(instr) } } -STATIC bool same_avail(kind, avp1, avp2) - byte kind; - avail_p avp1, avp2; +STATIC bool same_avail(byte kind, avail_p avp1, avail_p avp2) { /* Two expressions are the same if they have the same operator, * the same size, and their operand(s) have the same value. @@ -75,8 +72,7 @@ STATIC bool same_avail(kind, avp1, avp2) /* NOTREACHED */ } -STATIC void check_local(avp) - avail_p avp; +STATIC void check_local(avail_p avp) { /* Check if the local in which the result of avp was stored, * still holds this result. Update if not. @@ -89,9 +85,7 @@ STATIC void check_local(avp) } } -STATIC entity_p result_local(size, l) - offset size; - line_p l; +STATIC entity_p result_local(offset size, line_p l) { /* If the result of an expression of size bytes is stored into a * local for which a registermessage was generated, return a pointer @@ -114,9 +108,7 @@ STATIC entity_p result_local(size, l) return (entity_p) 0; } -STATIC copy_avail(kind, src, dst) - int kind; - avail_p src, dst; +STATIC void copy_avail(int kind, avail_p src, avail_p dst) { /* Copy some attributes from src to dst. */ @@ -143,10 +135,7 @@ STATIC copy_avail(kind, src, dst) } } -avail_p av_enter(avp, ocp, kind) - avail_p avp; - occur_p ocp; - int kind; +avail_p av_enter(avail_p avp, occur_p ocp, int kind) { /* Put the available expression avp in the list, * if it is not already there. @@ -186,7 +175,7 @@ avail_p av_enter(avp, ocp, kind) return ravp; } -clr_avails() +void clr_avails(void) { /* Throw away the information about the available expressions. */ diff --git a/util/ego/cs/cs_avail.h b/util/ego/cs/cs_avail.h index a0515353a..3364be2a2 100644 --- a/util/ego/cs/cs_avail.h +++ b/util/ego/cs/cs_avail.h @@ -5,7 +5,8 @@ */ extern avail_p avails; /* The set of available expressions. */ -extern avail_p av_enter(); /* (avail_p avp, occur_p ocp, byte kind) +extern avail_p av_enter(avail_p avp, occur_p ocp, byte kind); + /* * Puts the available expression in avp * in the list of available expressions, * if it is not already there. Add ocp to set of @@ -18,6 +19,7 @@ extern avail_p av_enter(); /* (avail_p avp, occur_p ocp, byte kind) * Returns a pointer into the list. */ -extern clr_avails(); /* Release all space occupied by the old list +extern void clr_avails(void); + /* Release all space occupied by the old list * of available expressions. */ diff --git a/util/ego/cs/cs_debug.c b/util/ego/cs/cs_debug.c index bf43d8c12..07890395b 100644 --- a/util/ego/cs/cs_debug.c +++ b/util/ego/cs/cs_debug.c @@ -17,8 +17,7 @@ extern char em_mnem[]; /* The mnemonics of the EM instructions. */ -STATIC void showinstr(lnp) - line_p lnp; +STATIC void showinstr(line_p lnp) { /* Makes the instruction in `lnp' human readable. Only lines that * can occur in expressions that are going to be eliminated are @@ -49,8 +48,7 @@ STATIC void showinstr(lnp) fprintf(stderr,"\n"); } -SHOWOCCUR(ocp) - occur_p ocp; +SHOWOCCUR(occur_p ocp) { /* Shows all instructions in an occurrence. */ @@ -69,8 +67,7 @@ SHOWOCCUR(ocp) #ifdef TRACE -SHOWAVAIL(avp) - avail_p avp; +void SHOWAVAIL(avail_p avp) { /* Shows an available expression. */ showinstr(avp->av_found); @@ -79,7 +76,7 @@ SHOWAVAIL(avp) } -OUTAVAILS() +void OUTAVAILS(void) { register avail_p ravp; @@ -110,7 +107,7 @@ STATIC char *enkinds[] = { "ignore mask" }; -OUTENTITIES() +void OUTENTITIES(void) { register Lindex i; diff --git a/util/ego/cs/cs_debug.h b/util/ego/cs/cs_debug.h index e45287f9b..2d85ebfe8 100644 --- a/util/ego/cs/cs_debug.h +++ b/util/ego/cs/cs_debug.h @@ -5,7 +5,8 @@ */ #ifdef VERBOSE -extern SHOWOCCUR(); /* (occur_p ocp) +extern void SHOWOCCUR(occur_p ocp); + /* * Shows all lines in an occurrence. */ @@ -17,15 +18,18 @@ extern SHOWOCCUR(); /* (occur_p ocp) #ifdef TRACE -extern OUTAVAILS(); /* () +extern void OUTAVAILS(void); + /* * Prints all available expressions. */ -extern OUTENTITIES(); /* () +extern void OUTENTITIES(void); + /* * Prints all entities. */ -extern SHOWAVAIL(); /* (avail_p avp) +extern void SHOWAVAIL(avail_p avp); + /* * Shows an available expression. */ diff --git a/util/ego/cs/cs_elim.c b/util/ego/cs/cs_elim.c index 0a253830f..767517bd8 100644 --- a/util/ego/cs/cs_elim.c +++ b/util/ego/cs/cs_elim.c @@ -20,8 +20,7 @@ #include "cs_partit.h" #include "cs_debug.h" -STATIC dlink(l1, l2) - line_p l1, l2; +STATIC void dlink(line_p l1, line_p l2) { /* Doubly link the lines in l1 and l2. */ @@ -31,11 +30,10 @@ STATIC dlink(l1, l2) l2->l_prev = l1; } -STATIC remove_lines(first, last) - line_p first, last; +STATIC void remove_lines(line_p first, line_p last) { /* Throw away the lines between and including first and last. - * Don't worry about any pointers; the (must) have been taken care of. + * Don't worry about any pointers; they (must) have been taken care of. */ register line_p lnp, next; @@ -46,8 +44,7 @@ STATIC remove_lines(first, last) } } -STATIC bool contained(ocp1, ocp2) - occur_p ocp1, ocp2; +STATIC bool contained(occur_p ocp1, occur_p ocp2) { /* Determine whether ocp1 is contained within ocp2. */ @@ -61,9 +58,7 @@ STATIC bool contained(ocp1, ocp2) return FALSE; } -STATIC delete(ocp, start) - occur_p ocp; - avail_p start; +STATIC void delete(occur_p ocp, avail_p start) { /* Delete all occurrences that are contained within ocp. * They must have been entered in the list before start: @@ -90,10 +85,7 @@ STATIC delete(ocp, start) } } -STATIC complete_aar(lnp, instr, descr_vn) - line_p lnp; - int instr; - valnum descr_vn; +STATIC void complete_aar(line_p lnp, int instr, valnum descr_vn) { /* Lnp is an instruction that loads the address of an array-element. * Instr tells us what effect we should achieve; load (instr is op_lar) @@ -109,10 +101,7 @@ STATIC complete_aar(lnp, instr, descr_vn) dlink(lnp, lindir); } -STATIC replace(ocp, tmp, avp) - occur_p ocp; - offset tmp; - avail_p avp; +STATIC void replace(occur_p ocp, offset tmp, avail_p avp) { /* Replace the lines in the occurrence in ocp by a load of the * temporary with offset tmp. @@ -143,9 +132,7 @@ STATIC replace(ocp, tmp, avp) remove_lines(first, last); } -STATIC append(avp, tmp) - avail_p avp; - offset tmp; +STATIC void append(avail_p avp, offset tmp) { /* Avp->av_found points to a line with an operator in it. This * routine emits a sequence of instructions that saves the result @@ -177,9 +164,7 @@ STATIC append(avp, tmp) } } -STATIC set_replace(avp, tmp) - avail_p avp; - offset tmp; +STATIC void set_replace(avail_p avp, offset tmp) { /* Avp->av_occurs is now a set of occurrences, each of which will be * replaced by a reference to a local. @@ -199,8 +184,7 @@ STATIC set_replace(avp, tmp) } } -STATIC int reg_score(enp) - entity_p enp; +STATIC int reg_score(entity_p enp) { /* Enp is a local that will go into a register. * We return its score upto now. @@ -209,10 +193,7 @@ STATIC int reg_score(enp) return regv_arg(enp->en_loc, 4); } -STATIC line_p gen_mesreg(off, avp, pp) - offset off; - avail_p avp; - proc_p pp; +STATIC line_p gen_mesreg(offset off, avail_p avp, proc_p pp) { /* Generate a register message for the local that will hold the * result of the expression in avp, at the appropriate place in @@ -226,9 +207,7 @@ STATIC line_p gen_mesreg(off, avp, pp) return reg; } -STATIC change_score(mes, score) - line_p mes; - int score; +STATIC void change_score(line_p mes, int score) { /* Change the score in the register message in mes to score. */ @@ -242,8 +221,7 @@ STATIC change_score(mes, score) ap->a_a.a_offset = score; } -eliminate(pp) - proc_p pp; +void eliminate(proc_p pp) { /* Eliminate costly common subexpressions within procedure pp. * We scan the available expressions in - with respect to time found - diff --git a/util/ego/cs/cs_elim.h b/util/ego/cs/cs_elim.h index 4c6a61669..9c7d86477 100644 --- a/util/ego/cs/cs_elim.h +++ b/util/ego/cs/cs_elim.h @@ -3,7 +3,8 @@ * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. * See the copyright notice in the ACK home directory, in the file "Copyright". */ -extern eliminate(); /* (proc_p pp) +extern void eliminate(proc_p pp); + /* * Eliminate some of the recurrences of expressions * that were found by the valuenumbering * algorithm. diff --git a/util/ego/cs/cs_entity.c b/util/ego/cs/cs_entity.c index e4e49ff9a..a2cd5228d 100644 --- a/util/ego/cs/cs_entity.c +++ b/util/ego/cs/cs_entity.c @@ -18,8 +18,7 @@ lset entities; /* Our pseudo symbol-table. */ -entity_p find_entity(vn) - valnum vn; +entity_p find_entity(valnum vn) { /* Try to find the entity with valuenumber vn. */ @@ -33,8 +32,7 @@ entity_p find_entity(vn) return (entity_p) 0; } -STATIC bool same_entity(enp1, enp2) - entity_p enp1, enp2; +STATIC bool same_entity(entity_p enp1, entity_p enp2) { if (enp1->en_kind != enp2->en_kind) return FALSE; if (enp1->en_size != enp2->en_size) return FALSE; @@ -69,8 +67,7 @@ STATIC bool same_entity(enp1, enp2) } } -STATIC copy_entity(src, dst) - entity_p src, dst; +STATIC void copy_entity(entity_p src, entity_p dst) { dst->en_static = src->en_static; dst->en_kind = src->en_kind; @@ -111,8 +108,7 @@ STATIC copy_entity(src, dst) } } -entity_p en_enter(enp) - register entity_p enp; +entity_p en_enter(entity_p enp) { /* Put the entity in enp in the entity set, if it is not already there. * Return pointer to stored entity. @@ -133,7 +129,7 @@ entity_p en_enter(enp) return new; } -clr_entities() +void clr_entities(void) { /* Throw away all pseudo-symboltable information. */ diff --git a/util/ego/cs/cs_entity.h b/util/ego/cs/cs_entity.h index c669efb58..0a222f96e 100644 --- a/util/ego/cs/cs_entity.h +++ b/util/ego/cs/cs_entity.h @@ -5,16 +5,19 @@ */ extern lset entities; /* The pseudo-symboltable. */ -extern entity_p find_entity(); /* (valnum vn) +extern entity_p find_entity(valnum vn); + /* * Tries to find an entity with value number vn. */ -extern entity_p en_enter(); /* (entity_p enp) +extern entity_p en_enter(entity_p enp); + /* * Enter the entity in enp in the set of * entities if it was not already there. */ -extern clr_entities(); /* () +extern void clr_entities(void); + /* * Release all space occupied by our * pseudo-symboltable. */ diff --git a/util/ego/cs/cs_getent.c b/util/ego/cs/cs_getent.c index ef8694536..144750802 100644 --- a/util/ego/cs/cs_getent.c +++ b/util/ego/cs/cs_getent.c @@ -67,8 +67,7 @@ STATIC struct inf_entity { #define ENKIND(ip) ip->inf_used #define SIZEINF(ip) ip->inf_size -STATIC struct inf_entity *getinf(n) - int n; +STATIC struct inf_entity *getinf(int n) { struct inf_entity *ip; @@ -78,8 +77,7 @@ STATIC struct inf_entity *getinf(n) return (struct inf_entity *) 0; } -entity_p getentity(lnp, l_out) - line_p lnp, *l_out; +entity_p getentity(line_p lnp, line_p *l_out) { /* Build the entities where lnp refers to, and enter them. * If a token needs to be popped, the first line that pushed diff --git a/util/ego/cs/cs_getent.h b/util/ego/cs/cs_getent.h index e37e37404..f1c4e955d 100644 --- a/util/ego/cs/cs_getent.h +++ b/util/ego/cs/cs_getent.h @@ -3,7 +3,8 @@ * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. * See the copyright notice in the ACK home directory, in the file "Copyright". */ -extern entity_p getentity(); /* (line_p lnp, *l_out) +extern entity_p getentity(line_p lnp, line_p *l_out); + /* * Extract the entity lnp refers and enter it * in the table of entities. The main entity * lnp refers to is returned; sometimes there diff --git a/util/ego/cs/cs_kill.c b/util/ego/cs/cs_kill.c index 520366f23..fc3144397 100644 --- a/util/ego/cs/cs_kill.c +++ b/util/ego/cs/cs_kill.c @@ -16,9 +16,9 @@ #include "cs_debug.h" #include "cs_avail.h" #include "cs_entity.h" +#include "cs_kill.h" -STATIC base_valno(enp) - entity_p enp; +STATIC valnum base_valno(entity_p enp) { /* Return the value number of the (base) address of an indirectly * accessed entity. @@ -37,8 +37,7 @@ STATIC base_valno(enp) /* NOTREACHED */ } -STATIC entity_p find_base(vn) - valnum vn; +STATIC entity_p find_base(valnum vn) { /* Vn is the valuenumber of the (base) address of an indirectly * accessed entity. Return the entity that holds this address @@ -79,8 +78,7 @@ STATIC entity_p find_base(vn) return (entity_p) 0; } -STATIC bool obj_overlap(op1, op2) - obj_p op1, op2; +STATIC bool obj_overlap(obj_p op1, obj_p op2) { /* Op1 and op2 point to two objects in the same datablock. * Obj_overlap returns whether these objects might overlap. @@ -97,8 +95,7 @@ STATIC bool obj_overlap(op1, op2) #define same_datablock(o1, o2) ((o1)->o_dblock == (o2)->o_dblock) -STATIC bool addr_local(enp) - entity_p enp; +STATIC bool addr_local(entity_p enp) { /* Is enp the address of a stack item. */ @@ -108,17 +105,14 @@ STATIC bool addr_local(enp) enp->en_kind == ENAARGBASE; } -STATIC bool addr_external(enp) - entity_p enp; +STATIC bool addr_external(entity_p enp) { /* Is enp the address of an external. */ return enp != (entity_p) 0 && enp->en_kind == ENAEXTERNAL; } -STATIC kill_external(obp, indir) - obj_p obp; - int indir; +STATIC void kill_external(obj_p obp, int indir) { /* A store is done via the object in obp. If this store is direct * we kill directly accessed entities in the same data block only @@ -164,8 +158,7 @@ STATIC kill_external(obp, indir) } } -STATIC bool loc_overlap(enp1, enp2) - entity_p enp1, enp2; +STATIC bool loc_overlap(entity_p enp1, entity_p enp2) { /* Enp1 and enp2 point to two locals. Loc_overlap returns whether * they overlap. @@ -184,9 +177,7 @@ STATIC bool loc_overlap(enp1, enp2) enp1->en_loc + enp1->en_size > enp2->en_loc; } -STATIC kill_local(enp, indir) - entity_p enp; - bool indir; +STATIC void kill_local(entity_p enp, bool indir) { /* This time a store is done into an ENLOCAL. */ @@ -234,7 +225,7 @@ STATIC kill_local(enp, indir) } } -STATIC void kill_sim() +STATIC void kill_sim(void) { /* A store is done into the ENIGNMASK. */ @@ -252,8 +243,7 @@ STATIC void kill_sim() } } -kill_direct(enp) - entity_p enp; +void kill_direct(entity_p enp) { /* A store will be done into enp. We must forget the values of all the * entities this one may overlap with. @@ -274,8 +264,7 @@ kill_direct(enp) } } -kill_indir(enp) - entity_p enp; +void kill_indir(entity_p enp) { /* An indirect store is done, in an ENINDIR, * an ENOFFSETTED or an ENARRELEM. @@ -306,7 +295,7 @@ kill_indir(enp) } } -kill_much() +extern void kill_much(void) { /* Kills all killable entities, * except the locals for which a registermessage was generated. @@ -324,8 +313,7 @@ kill_much() } } -STATIC bool bad_procflags(pp) - proc_p pp; +STATIC bool bad_procflags(proc_p pp) { /* Return whether the flags about the procedure in pp indicate * that we have little information about it. It might be that @@ -335,8 +323,7 @@ STATIC bool bad_procflags(pp) return !(pp->p_flags1 & PF_BODYSEEN) || (pp->p_flags1 & PF_CALUNKNOWN); } -STATIC kill_globset(s) - cset s; +STATIC void kill_globset(cset s) { /* S is a set of global variables that might be changed. * We act as if a direct store is done into each of them. @@ -349,8 +336,7 @@ STATIC kill_globset(s) } } -kill_call(pp) - proc_p pp; +void kill_call(proc_p pp) { /* Kill everything that might be destroyed by calling * the procedure in pp. @@ -367,7 +353,7 @@ kill_call(pp) } } -kill_all() +void kill_all(void) { /* Kills all entities. */ diff --git a/util/ego/cs/cs_kill.h b/util/ego/cs/cs_kill.h index 6fa6859b8..347e3eb16 100644 --- a/util/ego/cs/cs_kill.h +++ b/util/ego/cs/cs_kill.h @@ -3,27 +3,32 @@ * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. * See the copyright notice in the ACK home directory, in the file "Copyright". */ -extern kill_call(); /* (proc_p pp) +extern void kill_call(proc_p pp); + /* * Kill all entities that might have an other value * after execution of the procedure in pp. */ -extern kill_much(); /* () +extern void kill_much(void); + /* * Kill all killable entities except those for which * a register message was generated. * Constants, addresses, etc are not killable. */ -extern kill_indir(); /* (entity_p enp) +extern void kill_indir(entity_p enp); + /* * Kill all entities that might have an other value * after indirect assignment to the entity in enp. */ -extern kill_direct(); /* (entity_p enp) +extern void kill_direct(entity_p enp); + /* * Kill all entities that might have an other value * after direct assignment to the entity in enp. */ -extern kill_all(); /* () +extern void kill_all(void); + /* * Kill all entities. */ diff --git a/util/ego/cs/cs_partit.c b/util/ego/cs/cs_partit.c index 9a1bde042..e6d3b1fdf 100644 --- a/util/ego/cs/cs_partit.c +++ b/util/ego/cs/cs_partit.c @@ -178,8 +178,7 @@ STATIC struct { #define AVSIZE(l) (info[INSTR(l)].i_av) #define REGTYPE(n) (info[n].i_regtype) -int instrgroup(lnp) - line_p lnp; +int instrgroup(line_p lnp) { if (INSTR(lnp) == op_lor && SHORT(lnp) == 1) { /* We can't do anything with the stackpointer. */ @@ -192,8 +191,7 @@ int instrgroup(lnp) return GROUP(INSTR(lnp)); } -bool stack_group(instr) - int instr; +bool stack_group(int instr) { /* Is this an instruction that only does something to the top of * the stack? @@ -211,8 +209,7 @@ bool stack_group(instr) } } -STATIC offset argw(lnp) - line_p lnp; +STATIC offset argw(line_p lnp) { /* Some EM-instructions have their argument either on the same line, * or on top of the stack. We give up when the argument is on top of @@ -228,8 +225,7 @@ STATIC offset argw(lnp) } } -offset op11size(lnp) - line_p lnp; +offset op11size(line_p lnp) { /* Returns the size of the first argument of * the unary operator in lnp. @@ -248,8 +244,7 @@ offset op11size(lnp) /* NOTREACHED */ } -offset op12size(lnp) - line_p lnp; +offset op12size(line_p lnp) { /* Same for first of binary. */ @@ -264,8 +259,7 @@ offset op12size(lnp) /* NOTREACHED */ } -offset op22size(lnp) - line_p lnp; +offset op22size(line_p lnp) { switch (OP2SIZE(lnp)) { case ARGW: @@ -319,8 +313,7 @@ offset op33size(lnp) return ws; } -offset avsize(lnp) - line_p lnp; +offset avsize(line_p lnp) { /* Returns the size of the result of the instruction in lnp. * If the instruction is a conversion this size is given on the stack. @@ -359,8 +352,7 @@ offset avsize(lnp) /* NOTREACHED */ } -int regtype(instr) - byte instr; +int regtype(byte instr) { switch (REGTYPE(instr & BMASK)) { case ANY: diff --git a/util/ego/cs/cs_partit.h b/util/ego/cs/cs_partit.h index 27e7a00bc..ffcc321cb 100644 --- a/util/ego/cs/cs_partit.h +++ b/util/ego/cs/cs_partit.h @@ -7,53 +7,63 @@ * "manageable chunks. */ -extern int instrgroup(); /* (line_p lnp) +extern int instrgroup(line_p lnp); + /* * Return the group into which the instruction * in lnp belongs to. */ -extern bool stack_group(); /* (int instr) +extern bool stack_group(int instr); + /* * Return whether instr is an instruction that * only changes the state of the stack, i.e. * is a "true" operator. */ -extern offset op11size(); /* (line_p lnp) +extern offset op11size(line_p lnp); + /* * Return the size of the operand of the unary * operator in lnp. */ -extern offset op12size(); /* (line_p lnp) +extern offset op12size(line_p lnp); + /* * Return the size of the first operand of the * binary operator in lnp. */ -extern offset op22size(); /* (line_p lnp) +extern offset op22size(line_p lnp); + /* * Return the size of the second operand of the * binary operator in lnp. */ -extern offset op13size(); /* (line_p lnp) +extern offset op13size(line_p lnp); + /* * Return the size of the first operand of the * ternary operator in lnp. */ -extern offset op23size(); /* (line_p lnp) +extern offset op23size(line_p lnp); + /* * Return the size of the second operand of the * ternary operator in lnp. */ -extern offset op33size(); /* (line_p lnp) +extern offset op33size(line_p lnp); + /* * Return the size of the third operand of the * ternary operator in lnp. */ -extern offset avsize(); /* (line_p lnp) +extern offset avsize(line_p lnp); + /* * Return the size of the result of the * operator in lnp. */ -extern int regtype(); /* (byte instr) +extern int regtype(byte instr); + /* * Return in what kind of machine-register * the result of instr should be stored: * pointer, float, or any. diff --git a/util/ego/cs/cs_profit.c b/util/ego/cs/cs_profit.c index 259a6114d..50cb708fd 100644 --- a/util/ego/cs/cs_profit.c +++ b/util/ego/cs/cs_profit.c @@ -26,9 +26,7 @@ STATIC cset sli_counts; STATIC short LX_threshold; STATIC short AR_limit; -STATIC get_instrs(f, s_p) - FILE *f; - cset *s_p; +STATIC void get_instrs(FILE *f, cset *s_p) { /* Read a set of integers from inputfile f into *s_p. * Such a set must be delimited by a negative number. @@ -42,9 +40,7 @@ STATIC get_instrs(f, s_p) } } -STATIC choose_cset(f, s_p, max) - FILE *f; - cset *s_p; +STATIC void choose_cset(FILE *f, cset *s_p, int max) { /* Read two compact sets of integers from inputfile f. * Choose the first if we optimize with respect to time, @@ -115,8 +111,7 @@ void cs_machinit(void *vp) choose_cset(f, &forbidden, sp_lmnem); } -STATIC bool sli_no_eliminate(lnp) - line_p lnp; +STATIC bool sli_no_eliminate(line_p lnp) { /* Return whether the SLI-instruction in lnp is part of * an array-index computation, and should not be eliminated. @@ -130,8 +125,7 @@ STATIC bool sli_no_eliminate(lnp) ; } -STATIC bool gains(avp) - avail_p avp; +STATIC bool gains(avail_p avp) { /* Return whether we can gain something, when we eliminate * an expression such as in avp. We just glue together some @@ -161,9 +155,7 @@ STATIC bool gains(avp) return TRUE; } -STATIC bool okay_lines(avp, ocp) - avail_p avp; - occur_p ocp; +STATIC bool okay_lines(avail_p avp, occur_p ocp) { register line_p lnp, next; offset sz; diff --git a/util/ego/cs/cs_stack.c b/util/ego/cs/cs_stack.c index 7927438a5..670955d1e 100644 --- a/util/ego/cs/cs_stack.c +++ b/util/ego/cs/cs_stack.c @@ -23,8 +23,7 @@ STATIC token_p free_token; #define Stack_empty() (free_token == &Stack[0]) #define Top (free_token - 1) -Push(tkp) - token_p tkp; +void Push(token_p tkp) { if (tkp->tk_size == UNKNOWN_SIZE) { Empty_stack(); /* The contents of the Stack is useless. */ @@ -39,10 +38,7 @@ Push(tkp) #define WORD_MULTIPLE(n) ((n / ws) * ws + ( n % ws ? ws : 0 )) -void -Pop(tkp, size) - token_p tkp; - offset size; +void Pop(token_p tkp, offset size) { /* Pop a token with given size from the valuenumber stack into tkp. */ @@ -85,8 +81,7 @@ Pop(tkp, size) } } -Dup(lnp) - line_p lnp; +void Dup(line_p lnp) { /* Duplicate top bytes on the Stack. */ @@ -132,7 +127,7 @@ Dup(lnp) } } -clr_stack() +void clr_stack(void) { free_token = &Stack[0]; } diff --git a/util/ego/cs/cs_stack.h b/util/ego/cs/cs_stack.h index 64d59cf90..e5a79b858 100644 --- a/util/ego/cs/cs_stack.h +++ b/util/ego/cs/cs_stack.h @@ -3,21 +3,25 @@ * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. * See the copyright notice in the ACK home directory, in the file "Copyright". */ -extern Push(); /* (token_p tkp) +extern void Push(token_p tkp); + /* * Push the token in tkp on the fake-stack. */ -extern Pop(); /* (token_p tkp; offset size) +extern void Pop(token_p tkp, offset size); + /* * Pop a token of size bytes from the fake-stack * into tkp. If such a token is not there * we put a dummy in tkp and adjust the fake-stack. */ -extern Dup(); /* (line_p lnp) +extern void Dup(line_p lnp); + /* * Reflect the changes made by the dup-instruction * in lnp to the EM-stack into the fake-stack. */ -extern clr_stack(); /* () +extern void clr_stack(void); + /* * Clear the fake-stack. */ diff --git a/util/ego/cs/cs_vnm.c b/util/ego/cs/cs_vnm.c index a4813411c..4dbeb3df2 100644 --- a/util/ego/cs/cs_vnm.c +++ b/util/ego/cs/cs_vnm.c @@ -21,9 +21,7 @@ #include "cs_partit.h" #include "cs_getent.h" -STATIC push_entity(enp, lfirst) - entity_p enp; - line_p lfirst; +STATIC void push_entity(entity_p enp, line_p lfirst) { /* Build token and Push it. */ @@ -35,10 +33,8 @@ STATIC push_entity(enp, lfirst) Push(&tk); } -STATIC put_expensive_load(bp, lnp, lfirst, enp) - bblock_p bp; - line_p lnp, lfirst; - entity_p enp; +STATIC void put_expensive_load(bblock_p bp, line_p lnp, line_p lfirst, + entity_p enp) { struct avail av; occur_p ocp; @@ -52,10 +48,7 @@ STATIC put_expensive_load(bp, lnp, lfirst, enp) av_enter(&av, ocp, EXPENSIVE_LOAD); } -STATIC put_aar(bp, lnp, lfirst, enp) - bblock_p bp; - line_p lnp, lfirst; - entity_p enp; +STATIC void put_aar(bblock_p bp, line_p lnp, line_p lfirst, entity_p enp) { /* Enp points to an ENARRELEM. We do as if its address was computed. */ @@ -74,9 +67,7 @@ STATIC put_aar(bp, lnp, lfirst, enp) av_enter(&av, ocp, TERNAIR_OP); } -STATIC push_avail(avp, lfirst) - avail_p avp; - line_p lfirst; +STATIC void push_avail(avail_p avp, line_p lfirst) { struct token tk; @@ -86,10 +77,7 @@ STATIC push_avail(avp, lfirst) Push(&tk); } -STATIC push_unair_op(bp, lnp, tkp1) - bblock_p bp; - line_p lnp; - token_p tkp1; +STATIC void push_unair_op(bblock_p bp, line_p lnp, token_p tkp1) { struct avail av; occur_p ocp; @@ -103,10 +91,7 @@ STATIC push_unair_op(bp, lnp, tkp1) push_avail(av_enter(&av, ocp, UNAIR_OP), tkp1->tk_lfirst); } -STATIC push_binair_op(bp, lnp, tkp1, tkp2) - bblock_p bp; - line_p lnp; - token_p tkp1, tkp2; +STATIC void push_binair_op(bblock_p bp, line_p lnp, token_p tkp1, token_p tkp2) { struct avail av; occur_p ocp; @@ -121,10 +106,8 @@ STATIC push_binair_op(bp, lnp, tkp1, tkp2) push_avail(av_enter(&av, ocp, BINAIR_OP), tkp1->tk_lfirst); } -STATIC push_ternair_op(bp, lnp, tkp1, tkp2, tkp3) - bblock_p bp; - line_p lnp; - token_p tkp1, tkp2, tkp3; +STATIC void push_ternair_op(bblock_p bp, line_p lnp, token_p tkp1, + token_p tkp2, token_p tkp3) { struct avail av; occur_p ocp; @@ -140,8 +123,7 @@ STATIC push_ternair_op(bp, lnp, tkp1, tkp2, tkp3) push_avail(av_enter(&av, ocp, TERNAIR_OP), tkp1->tk_lfirst); } -STATIC fiddle_stack(lnp) - line_p lnp; +STATIC void fiddle_stack(line_p lnp) { /* The instruction in lnp does something to the valuenumber-stack. */ @@ -232,8 +214,7 @@ STATIC proc_p find_proc(vn) return (proc_p) 0; } -STATIC side_effects(lnp) - line_p lnp; +STATIC void side_effects(line_p lnp) { /* Lnp contains a cai or cal instruction. We try to find the callee * and see what side-effects it has. @@ -255,8 +236,7 @@ STATIC side_effects(lnp) } } -hopeless(instr) - int instr; +STATIC void hopeless(int instr) { /* The effect of `instr' is too difficult to * compute. We assume worst case behaviour. @@ -281,8 +261,7 @@ hopeless(instr) } } -vnm(bp) - bblock_p bp; +void vnm(bblock_p bp) { register line_p lnp; register entity_p rep; diff --git a/util/ego/cs/cs_vnm.h b/util/ego/cs/cs_vnm.h index 0fbce5d72..0c86a77e8 100644 --- a/util/ego/cs/cs_vnm.h +++ b/util/ego/cs/cs_vnm.h @@ -3,7 +3,8 @@ * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. * See the copyright notice in the ACK home directory, in the file "Copyright". */ -extern vnm(); /* (bblock_p bp) +extern void vnm(bblock_p bp); + /* * Performs the valuenumbering algorithm on the basic * block in bp. */ diff --git a/util/ego/descr/powerpc.descr b/util/ego/descr/powerpc.descr index e59990ea1..5fb9bb628 100644 --- a/util/ego/descr/powerpc.descr +++ b/util/ego/descr/powerpc.descr @@ -102,7 +102,7 @@ register save costs: 17 -> (102,136) 18 -> (108,144) 19 -> (114,152) - 20 -> (120,160) + 20 -> (120,160) 21 -> (126,168) 22 -> (132,176) 23 -> (138,184) @@ -137,7 +137,7 @@ reduce sli if shift count larger than: 0 first time then space: addressing modes: op_ads op_adp op_lof op_ldf op_loi op_dch op_lpb -1 op_ads op_adp op_lof op_ldf op_loi op_dch op_lpb -1 -cheap operations: op_cii op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1 +cheap operations: op_cii op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1 op_cii op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1 lexical tresholds: 1 1 indirection limit: 8 diff --git a/util/ego/share/aux.h b/util/ego/share/aux.h index 6a6770469..db2d3f8da 100644 --- a/util/ego/share/aux.h +++ b/util/ego/share/aux.h @@ -36,7 +36,7 @@ line_p reg_mes(offset tmp, short size, int typ, int score); bool dom(bblock_p b1, bblock_p b2); /* * See if b1 dominates b2. Note that a - * block always * dominates itself. + * block always dominates itself. */ bblock_p common_dom(bblock_p a, bblock_p b); /* diff --git a/util/ego/share/global.h b/util/ego/share/global.h index f97df2fa2..4121a5b85 100644 --- a/util/ego/share/global.h +++ b/util/ego/share/global.h @@ -40,13 +40,13 @@ extern int ws; /* word size */ #define UNKNOWN_SIZE (-1) -extern proc_p curproc; /* current procedure */ +extern proc_p curproc; /* current procedure */ -extern char *filename; /* name of current input file */ +extern char *filename; /* name of current input file */ extern lset mesregs; /* set of MES ms_reg pseudos */ -extern short time_space_ratio; /* 0 if optimizing for space only, +extern short time_space_ratio; /* 0 if optimizing for space only, * 100 if optimizing for time only, * else something 'in between'. */ diff --git a/util/ego/sp/sp.c b/util/ego/sp/sp.c index 8538d3dfb..051281d7e 100644 --- a/util/ego/sp/sp.c +++ b/util/ego/sp/sp.c @@ -65,9 +65,8 @@ STATIC void sp_machinit(void *vp) } fscanf(f,"%d",&globl_sp_allowed); } -comb_asps(l1,l2,b) - line_p l1,l2; - bblock_p b; + +STATIC void comb_asps(line_p l1, line_p l2, bblock_p b) { assert(INSTR(l1) == op_asp); assert(INSTR(l2) == op_asp); @@ -78,11 +77,7 @@ comb_asps(l1,l2,b) rm_line(l1,b); } - - - -stack_pollution(b) - bblock_p b; +STATIC void stack_pollution(bblock_p b) { /* For every pair of successive ASP instructions in basic * block b, try to combine the two into one ASP. @@ -134,8 +129,7 @@ stack_pollution(b) } while (asp != (line_p) 0); } -STATIC bool block_save(b) - bblock_p b; +STATIC bool block_save(bblock_p b) { register line_p l; @@ -159,10 +153,7 @@ STATIC bool block_save(b) return stack_diff >= 0; } - - -STATIC mark_pred(b) - bblock_p b; +STATIC void mark_pred(bblock_p b) { Lindex i; bblock_p x; @@ -176,12 +167,7 @@ STATIC mark_pred(b) } } - - - - -STATIC mark_unsave_blocks(p) - proc_p p; +STATIC void mark_unsave_blocks(proc_p p) { register bblock_p b; @@ -193,8 +179,7 @@ STATIC mark_unsave_blocks(p) } } - -void sp_optimize(void *vp) +STATIC void sp_optimize(void *vp) { proc_p p = vp; register bblock_p b; @@ -206,21 +191,13 @@ void sp_optimize(void *vp) } } - - - -main(argc,argv) - int argc; - char *argv[]; +int main(int argc, char *argv[]) { go(argc,argv,no_action,sp_optimize,sp_machinit,no_action); report("stack adjustments deleted",Ssp); exit(0); } - - - /***** DEBUGGING: debug_stack_pollution(p) From a7bb4ec4b1b4fd77c312d2b5ce2bc0f8d4569a0d Mon Sep 17 00:00:00 2001 From: George Koehler Date: Thu, 1 Mar 2018 13:19:38 -0500 Subject: [PATCH 47/55] Fixes for compiling ego with -DTRACE - In share/debug.c, undo my mistake in commit 9037d13 by changing vfprintf back to fprintf in OUTTRACE. - In ud/ud.c, move the trace output from stdout to stderr, because stdout has ego's output file, which becomes opt2's input file. If trace output goes to stdout, it gets prepended to the output file, and opt2 errors with "wrong input file". I also edit both build.lua files so ego depends on its header files; this part isn't needed for -DTRACE. One can now use -DTRACE by adding it to the cflags in both build.lua files. --- util/ego/build.lua | 1 + util/ego/share/build.lua | 3 +-- util/ego/share/debug.c | 2 +- util/ego/ud/ud.c | 41 ++++++++++++++++++++-------------------- 4 files changed, 24 insertions(+), 23 deletions(-) diff --git a/util/ego/build.lua b/util/ego/build.lua index 864447550..78895f508 100644 --- a/util/ego/build.lua +++ b/util/ego/build.lua @@ -3,6 +3,7 @@ local function build_ego(name) name = name, srcs = { "./"..name.."/*.c" }, deps = { + "./"..name.."/*.h", "util/ego/share+lib", "modules/src/em_data+lib", "h+emheaders", diff --git a/util/ego/share/build.lua b/util/ego/share/build.lua index ab1068d2c..5ca714897 100644 --- a/util/ego/share/build.lua +++ b/util/ego/share/build.lua @@ -48,6 +48,7 @@ clibrary { "./init_glob.c", }, deps = { + "./*.h", "+classdefs_h", "+pop_push_h", "h+emheaders", @@ -57,5 +58,3 @@ clibrary { ["+cflags"] = {"-DVERBOSE", "-DNOTCOMPACT"} } } - - diff --git a/util/ego/share/debug.c b/util/ego/share/debug.c index 81080f7cf..56514d149 100644 --- a/util/ego/share/debug.c +++ b/util/ego/share/debug.c @@ -45,7 +45,7 @@ void error(const char *s, ...) void OUTTRACE(const char *s, int n) { fprintf(stderr,"> "); - vfprintf(stderr,s,n); + fprintf(stderr,s,n); fprintf(stderr,"\n"); } #endif diff --git a/util/ego/ud/ud.c b/util/ego/ud/ud.c index c0fe613fd..087337144 100644 --- a/util/ego/ud/ud.c +++ b/util/ego/ud/ud.c @@ -269,13 +269,13 @@ pr_localtab() { short i; local_p lc; - printf("LOCAL-TABLE (%d)\n\n",nrlocals); + fprintf(stderr,"LOCAL-TABLE (%d)\n\n",nrlocals); for (i = 1; i <= nrlocals; i++) { lc = locals[i]; - printf("LOCAL %d\n",i); - printf(" offset= %ld\n",lc->lc_off); - printf(" size= %d\n",lc->lc_size); - printf(" flags= %d\n",lc->lc_flags); + fprintf(stderr,"LOCAL %d\n",i); + fprintf(stderr,"\toffset= %ld\n",lc->lc_off); + fprintf(stderr,"\tsize= %d\n",lc->lc_size); + fprintf(stderr,"\tflags= %d\n",lc->lc_flags); } } @@ -284,12 +284,13 @@ pr_globals() dblock_p d; obj_p obj; - printf("GLOBALS (%d)\n\n",nrglobals); - printf("ID GLOBNR\n"); + fprintf(stderr,"GLOBALS (%d)\n\n",nrglobals); + fprintf(stderr,"ID\tGLOBNR\n"); for (d = fdblock; d != (dblock_p) 0; d = d->d_next) { for (obj = d->d_objlist; obj != (obj_p) 0; obj = obj->o_next) { if (obj->o_globnr != 0) { - printf("%d %d\n", obj->o_id,obj->o_globnr); + fprintf(stderr,"%d\t%d\n", + obj->o_id,obj->o_globnr); } } } @@ -302,20 +303,20 @@ pr_defs() short i; line_p l; - printf("DEF TABLE\n\n"); + fprintf(stderr,"DEF TABLE\n\n"); for (i = 1; i <= nrexpldefs; i++) { l = defs[i]; - printf("%d %s ",EXPL_TO_DEFNR(i), + fprintf(stderr,"%d\t%s ",EXPL_TO_DEFNR(i), &em_mnem[(INSTR(l)-sp_fmnem)*4]); switch(TYPE(l)) { case OPSHORT: - printf("%d\n",SHORT(l)); + fprintf(stderr,"%d\n",SHORT(l)); break; case OPOFFSET: - printf("%ld\n",OFFSET(l)); + fprintf(stderr,"%ld\n",OFFSET(l)); break; case OPOBJECT: - printf("%d\n",OBJ(l)->o_id); + fprintf(stderr,"%d\n",OBJ(l)->o_id); break; default: assert(FALSE); @@ -331,13 +332,13 @@ pr_set(name,k,s,n) { short i; - printf("%s(%d) = {",name,k); + fprintf(stderr,"%s(%d) =\t{",name,k); for (i = 1; i <= n; i++) { if (Cis_elem(i,s)) { - printf("%d ",i); + fprintf(stderr,"%d ",i); } } - printf ("}\n"); + fprintf(stderr,"}\n"); } pr_blocks(p) @@ -347,7 +348,7 @@ pr_blocks(p) short n; for (b = p->p_start; b != 0; b = b->b_next) { - printf ("\n"); + fprintf(stderr,"\n"); n = b->b_id; pr_set("GEN",n,GEN(b),nrdefs); pr_set("KILL",n,KILL(b),nrdefs); @@ -361,10 +362,10 @@ pr_copies() { short i; - printf("\nCOPY TABLE\n\n"); + fprintf(stderr,"\nCOPY TABLE\n\n"); for (i = 1; i <= nrdefs; i++) { if (def_to_copynr[i] != 0) { - printf("%d %d\n",i,def_to_copynr[i]); + fprintf(stderr,"%d\t%d\n",i,def_to_copynr[i]); } } } @@ -376,7 +377,7 @@ pr_cblocks(p) short n; for (b = p->p_start; b != 0; b = b->b_next) { - printf ("\n"); + fprintf(stderr,"\n"); n = b->b_id; pr_set("CGEN",n,C_GEN(b),nrcopies); pr_set("CKILL",n,C_KILL(b),nrcopies); From f26259caac62b30bc710f6aa28c6e08253b6c5b2 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Fri, 2 Mar 2018 16:06:21 -0500 Subject: [PATCH 48/55] Check AAR earlier to prevent LOI/STI unknown size. In ego, the CS phase may convert a LAR/SAR to AAR LOI/STI so it can optimize multiple occurrences of AAR of the same array element. This conversion should not happen if it would LOI/STI a large or unknown size. cs_profit.c okay_lines() checked the size of each occurrence of AAR except the first. If the first AAR was the implicit AAR in a LAR/SAR, then the conversion happened without checking the size. For unknown size, this made a bad LOI -1 or STI -1. Fix by checking the size earlier: if a LAR/SAR has a bad size, then don't enter it as an AAR. This Modula-2 code showed the bug. Given M.def: DEFINITION MODULE M; TYPE S = SET OF [0..95]; PROCEDURE F(a: ARRAY OF S; i, j: INTEGER); END M. and M.mod: (*$R-*) IMPLEMENTATION MODULE M; FROM SYSTEM IMPORT ADDRESS, ADR; PROCEDURE G(s: S; p, q: ADDRESS; t: S); BEGIN s := s; p := p; q := q; t := t; END G; PROCEDURE F(a: ARRAY OF S; i, j: INTEGER); BEGIN G(a[i + j], ADR(a[i + j]), ADR(a[i + j]), a[i + j]) END F; END M. then the bug caused an error: $ ack -mlinuxppc -O3 -c.e M.mod /tmp/Ack_b357d.g, line 57: Argument range error The bug had put LOI -1 in the code, then em_decode got an error because -1 is out of range for LOI. Procedure F has 4 occurrences of `a[i + j]`. The size of `a[i + j]` is 96 bits, or 12 bytes, but the EM code hides the size in an array descriptor, so the size is unknown to CS. The pragma `(*$R-*)` disables a range check on `i + j` so CS can work. EM uses AAR for the 2 `ADR(a[i + j])` and LAR for the other 2 `a[i + j]`. EM pushes the arguments to G in reverse order, so the last `a[i + j]` in Modula-2 is the first LAR in EM. CS found 4 occurrences of AAR. The first AAR was an implicit AAR in LAR. Because of the bug, CS converted this LAR 4 to AAR 4 LOI -1. --- util/ego/cs/cs_profit.c | 31 ++++++++++++++++++------------- util/ego/cs/cs_profit.h | 6 ++++++ util/ego/cs/cs_vnm.c | 17 ++++++++++++----- 3 files changed, 36 insertions(+), 18 deletions(-) diff --git a/util/ego/cs/cs_profit.c b/util/ego/cs/cs_profit.c index 50cb708fd..8845aaa29 100644 --- a/util/ego/cs/cs_profit.c +++ b/util/ego/cs/cs_profit.c @@ -111,6 +111,21 @@ void cs_machinit(void *vp) choose_cset(f, &forbidden, sp_lmnem); } +bool may_become_aar(avail_p avp) +{ + /* Check whether it is desirable to treat a LAR or SAR as an + * AAR LOI/STI. This depends on the size of the array-elements. + */ + offset sz; + + sz = array_elemsize(avp->av_othird); + if (sz == UNKNOWN_SIZE) + return FALSE; + if (time_space_ratio < 50) + return sz <= AR_limit; + return TRUE; +} + STATIC bool sli_no_eliminate(line_p lnp) { /* Return whether the SLI-instruction in lnp is part of @@ -157,8 +172,10 @@ STATIC bool gains(avail_p avp) STATIC bool okay_lines(avail_p avp, occur_p ocp) { + /* Check whether all lines in this occurrence can in + * principle be eliminated; no stores, messages, calls etc. + */ register line_p lnp, next; - offset sz; for (lnp = ocp->oc_lfirst; lnp != (line_p) 0; lnp = next) { next = lnp != ocp->oc_llast ? lnp->l_next : (line_p) 0; @@ -171,18 +188,6 @@ STATIC bool okay_lines(avail_p avp, occur_p ocp) return FALSE; } } - /* All lines in this occurrence can in principle be eliminated; - * no stores, messages, calls etc. - * We now check whether it is desirable to treat a LAR or a SAR - * as an AAR LOI/STI. This depends on the size of the array-elements. - */ - if (INSTR(ocp->oc_llast) == op_lar || INSTR(ocp->oc_llast) == op_sar) { - sz = array_elemsize(avp->av_othird); - if (sz == UNKNOWN_SIZE) return FALSE; - if (avp->av_instr == (byte) op_aar && time_space_ratio < 50) { - return sz <= AR_limit; - } - } return TRUE; } diff --git a/util/ego/cs/cs_profit.h b/util/ego/cs/cs_profit.h index 7ec5e3c17..43f2bade9 100644 --- a/util/ego/cs/cs_profit.h +++ b/util/ego/cs/cs_profit.h @@ -7,6 +7,12 @@ void cs_machinit(void *vp); /* (FILE *f) * Read phase-specific information from f. */ +bool may_become_aar(avail_p avp); + /* + * Return whether a LAR/SAR may become + * an AAR LOI/STI. + */ + bool desirable(avail_p avp); /* * Return whether it is desirable to eliminate * the recurrences of the expression in avp. diff --git a/util/ego/cs/cs_vnm.c b/util/ego/cs/cs_vnm.c index 4dbeb3df2..67507f805 100644 --- a/util/ego/cs/cs_vnm.c +++ b/util/ego/cs/cs_vnm.c @@ -50,11 +50,13 @@ STATIC void put_expensive_load(bblock_p bp, line_p lnp, line_p lfirst, STATIC void put_aar(bblock_p bp, line_p lnp, line_p lfirst, entity_p enp) { - /* Enp points to an ENARRELEM. We do as if its address was computed. */ - + /* Enter the implicit AAR in a LAR or SAR, where enp points to + * the ENARRELEM, and AAR computes its address. + */ struct avail av; occur_p ocp; + assert(INSTR(lnp) == op_lar || INSTR(lnp) == op_sar); assert(enp->en_kind == ENARRELEM); av.av_instr = op_aar; av.av_size = ps; @@ -62,9 +64,14 @@ STATIC void put_aar(bblock_p bp, line_p lnp, line_p lfirst, entity_p enp) av.av_osecond = enp->en_index; av.av_othird = enp->en_adesc; - ocp = newoccur(lfirst, lnp, bp); - - av_enter(&av, ocp, TERNAIR_OP); + /* Before we enter an available AAR, we must check whether we + * may convert this LAR/SAR to AAR LOI/STI. This is so we + * don't LOI/STI a large or unknown size. + */ + if (may_become_aar(&av)) { + ocp = newoccur(lfirst, lnp, bp); + av_enter(&av, ocp, TERNAIR_OP); + } } STATIC void push_avail(avail_p avp, line_p lfirst) From b1b737ed6cac47b82f267a2c3e7d4a36d40a02cf Mon Sep 17 00:00:00 2001 From: George Koehler Date: Mon, 5 Mar 2018 13:32:06 -0500 Subject: [PATCH 49/55] Optimize procedures that do both a / b and a % b. Enable this in CS for PowerPC; disable it for all other machines. PowerPC has no remainder instruction; the back end uses division to compute remainder. If CS finds both a / b and a % b, then CS now rewrites a % b as a - b * (a / b) and computes a / b only once. This removes an extra division in the PowerPC code, so it saves both time and space. I have not considered whether to enable this optimization for other machines. It might be less useful in machines with a remainder instruction. Also, if a % b occurs before a / b, the EM code gets a DUP. PowerPC ncg handles this DUP well; other back ends might not. --- util/ego/cs/cs.h | 13 +++--- util/ego/cs/cs_avail.c | 5 +- util/ego/cs/cs_elim.c | 90 ++++++++++++++++++++++++++++++------ util/ego/cs/cs_partit.c | 5 +- util/ego/cs/cs_profit.c | 12 +++++ util/ego/cs/cs_profit.h | 5 ++ util/ego/cs/cs_vnm.c | 37 +++++++++++++++ util/ego/descr/em22.descr | 1 + util/ego/descr/em24.descr | 1 + util/ego/descr/em44.descr | 1 + util/ego/descr/i386.descr | 1 + util/ego/descr/i86.descr | 1 + util/ego/descr/m68020.descr | 1 + util/ego/descr/m68k2.descr | 1 + util/ego/descr/m68k4.descr | 1 + util/ego/descr/pdp.descr | 1 + util/ego/descr/powerpc.descr | 1 + util/ego/descr/sparc.descr | 1 + util/ego/descr/vax4.descr | 1 + 19 files changed, 156 insertions(+), 23 deletions(-) diff --git a/util/ego/cs/cs.h b/util/ego/cs/cs.h index c749427a5..7a2ebde7b 100644 --- a/util/ego/cs/cs.h +++ b/util/ego/cs/cs.h @@ -88,12 +88,13 @@ struct occur { #define UNAIR_OP 6 #define BINAIR_OP 7 #define TERNAIR_OP 8 -#define KILL_ENTITY 9 -#define SIDE_EFFECTS 10 -#define FIDDLE_STACK 11 -#define IGNORE 12 -#define HOPELESS 13 -#define BBLOCK_END 14 +#define REMAINDER 9 +#define KILL_ENTITY 10 +#define SIDE_EFFECTS 11 +#define FIDDLE_STACK 12 +#define IGNORE 13 +#define HOPELESS 14 +#define BBLOCK_END 15 struct avail { avail_p av_before; /* Ptr to earlier discovered expressions. */ diff --git a/util/ego/cs/cs_avail.c b/util/ego/cs/cs_avail.c index 025132a2e..b28cc496a 100644 --- a/util/ego/cs/cs_avail.c +++ b/util/ego/cs/cs_avail.c @@ -54,6 +54,7 @@ STATIC bool same_avail(byte kind, avail_p avp1, avail_p avp2) case UNAIR_OP: return avp1->av_operand == avp2->av_operand; case BINAIR_OP: + case REMAINDER: if (commutative(avp1->av_instr & BMASK)) return avp1->av_oleft == avp2->av_oleft && avp1->av_oright == avp2->av_oright @@ -124,6 +125,7 @@ STATIC void copy_avail(int kind, avail_p src, avail_p dst) dst->av_operand = src->av_operand; break; case BINAIR_OP: + case REMAINDER: dst->av_oleft = src->av_oleft; dst->av_oright = src->av_oright; break; @@ -160,7 +162,8 @@ avail_p av_enter(avail_p avp, occur_p ocp, int kind) /* Remember local, if any, that holds result. */ if (avp->av_instr != (byte) INSTR(last)) { /* Only possible when instr is the implicit AAR in - * a LAR or SAR. + * a LAR or SAR, or the implicit DVI in an RMI, or + * DVU in RMU. */ ravp->av_saveloc = (entity_p) 0; } else { diff --git a/util/ego/cs/cs_elim.c b/util/ego/cs/cs_elim.c index 767517bd8..7dce0df09 100644 --- a/util/ego/cs/cs_elim.c +++ b/util/ego/cs/cs_elim.c @@ -101,12 +101,49 @@ STATIC void complete_aar(line_p lnp, int instr, valnum descr_vn) dlink(lnp, lindir); } +STATIC void complete_dv_as_rm(line_p lnp, avail_p avp, bool first) +{ + /* Complete a / b as a % b = a - b * (a / b). For the first + * occurrence, lnp must stack q, where q = a / b. We prepend a + * DUP to change postfix a b / into a b a b /, then append a + * MLI/MLU and SBI/SBU to make a b a b / * -. + * + * For later occurences, lnp must stack a b q. We append the + * MLI/MLU and SBI/SBU. + */ + line_p dv, dup, ml, sb; + offset size; + bool s; + + size = avp->av_size; + s = (avp->av_instr == (byte) op_dvi); + assert(s || avp->av_instr == (byte) op_dvu); + if (first) { + /* Prepend our DUP to avp->av_found, to get before the + * DVI if lnp points to the LOL in DVI STL LOL. + */ + dup = int_line(2 * size); + dup->l_instr = op_dup; + dv = avp->av_found; + dlink(dv->l_prev, dup); + dlink(dup, dv); + } + ml = int_line(size); + sb = int_line(size); + ml->l_instr = (s ? op_mli : op_mlu); + sb->l_instr = (s ? op_sbi : op_sbu); + dlink(sb, lnp->l_next); + dlink(ml, sb); + dlink(lnp, ml); +} + STATIC void replace(occur_p ocp, offset tmp, avail_p avp) { /* Replace the lines in the occurrence in ocp by a load of the * temporary with offset tmp. */ register line_p lol, first, last; + register int instr; assert(avp->av_size == ws || avp->av_size == 2*ws); @@ -119,13 +156,24 @@ STATIC void replace(occur_p ocp, offset tmp, avail_p avp) if (first->l_prev == (line_p) 0) ocp->oc_belongs->b_start = lol; dlink(first->l_prev, lol); - if (avp->av_instr == (byte) op_aar) { - /* There may actually be a LAR or a SAR instruction; in that - * case we have to complete the array-instruction. - */ - register int instr = INSTR(last); - - if (instr != op_aar) complete_aar(lol, instr, avp->av_othird); + instr = INSTR(last); + switch (avp->av_instr & 0377) { + case op_aar: + /* There may actually be a LAR or a SAR + * instruction; in that case we have to + * complete the array-instruction. + */ + if (instr != op_aar) + complete_aar(lol, instr, avp->av_othird); + break; + case op_dvi: + if (instr == op_rmi) + complete_dv_as_rm(lol, avp, FALSE); + break; + case op_dvu: + if (instr == op_rmu) + complete_dv_as_rm(lol, avp, FALSE); + break; } /* Throw away the by now useless lines. */ @@ -142,6 +190,7 @@ STATIC void append(avail_p avp, offset tmp) * within a lar or sar, we must first generate the aar. */ register line_p stl, lol; + register int instr; assert(avp->av_size == ws || avp->av_size == 2*ws); @@ -154,13 +203,26 @@ STATIC void append(avail_p avp, offset tmp) dlink(stl, lol); dlink(avp->av_found, stl); - if (avp->av_instr == (byte) op_aar) { - register int instr = INSTR(avp->av_found); - - if (instr != op_aar) { - complete_aar(lol, instr, avp->av_othird); - avp->av_found->l_instr = op_aar; - } + instr = INSTR(avp->av_found); + switch (avp->av_instr & 0377) { + case op_aar: + if (instr != op_aar) { + complete_aar(lol, instr, avp->av_othird); + avp->av_found->l_instr = op_aar; + } + break; + case op_dvi: + if (instr == op_rmi) { + complete_dv_as_rm(lol, avp, TRUE); + avp->av_found->l_instr = op_dvi; + } + break; + case op_dvu: + if (instr == op_rmu) { + complete_dv_as_rm(lol, avp, TRUE); + avp->av_found->l_instr = op_dvu; + } + break; } } diff --git a/util/ego/cs/cs_partit.c b/util/ego/cs/cs_partit.c index e6d3b1fdf..b020ebcfa 100644 --- a/util/ego/cs/cs_partit.c +++ b/util/ego/cs/cs_partit.c @@ -125,8 +125,8 @@ STATIC struct { /* nop */ HOPELESS, XXX, XXX, XXX, XXX, /* rck */ BBLOCK_END, XXX, XXX, XXX, XXX, /* ret */ BBLOCK_END, XXX, XXX, XXX, XXX, -/* rmi */ BINAIR_OP, ARGW, ARGW, ARGW, ANY, -/* rmu */ BINAIR_OP, ARGW, ARGW, ARGW, ANY, +/* rmi */ REMAINDER, ARGW, ARGW, ARGW, ANY, +/* rmu */ REMAINDER, ARGW, ARGW, ARGW, ANY, /* rol */ BINAIR_OP, ARGW, WS, ARGW, ANY, /* ror */ BINAIR_OP, ARGW, WS, ARGW, ANY, /* rtt */ BBLOCK_END, XXX, XXX, XXX, XXX, @@ -203,6 +203,7 @@ bool stack_group(int instr) case UNAIR_OP: case BINAIR_OP: case TERNAIR_OP: + case REMAINDER: return TRUE; default: return FALSE; diff --git a/util/ego/cs/cs_profit.c b/util/ego/cs/cs_profit.c index 8845aaa29..a92028c36 100644 --- a/util/ego/cs/cs_profit.c +++ b/util/ego/cs/cs_profit.c @@ -25,6 +25,7 @@ STATIC cset forbidden; STATIC cset sli_counts; STATIC short LX_threshold; STATIC short AR_limit; +STATIC bool RM_to_DV; STATIC void get_instrs(FILE *f, cset *s_p) { @@ -97,6 +98,12 @@ void cs_machinit(void *vp) fscanf(f, "%d", &space); AR_limit = space; + /* Read whether to convert a remainder RMI/RMU to a division + * DVI/DVU using the formula a % b = a - b * (a / b). + */ + fscanf(f, "%d %d", &time, &space); + RM_to_DV = time_space_ratio >= 50 ? time : space; + /* Read for what counts we must not eliminate an SLI instruction * when it is part of an array-index computation. */ @@ -126,6 +133,11 @@ bool may_become_aar(avail_p avp) return TRUE; } +bool may_become_dv(void) +{ + return RM_to_DV; +} + STATIC bool sli_no_eliminate(line_p lnp) { /* Return whether the SLI-instruction in lnp is part of diff --git a/util/ego/cs/cs_profit.h b/util/ego/cs/cs_profit.h index 43f2bade9..3d1972d24 100644 --- a/util/ego/cs/cs_profit.h +++ b/util/ego/cs/cs_profit.h @@ -13,6 +13,11 @@ bool may_become_aar(avail_p avp); * an AAR LOI/STI. */ +bool may_become_dv(void); /* + * Return whether an RMI/RMU may become + * a DVI/DVU: a % b = a - (a / b * b). + */ + bool desirable(avail_p avp); /* * Return whether it is desirable to eliminate * the recurrences of the expression in avp. diff --git a/util/ego/cs/cs_vnm.c b/util/ego/cs/cs_vnm.c index 67507f805..435dd4658 100644 --- a/util/ego/cs/cs_vnm.c +++ b/util/ego/cs/cs_vnm.c @@ -20,6 +20,7 @@ #include "cs_kill.h" #include "cs_partit.h" #include "cs_getent.h" +#include "cs_profit.h" STATIC void push_entity(entity_p enp, line_p lfirst) { @@ -130,6 +131,37 @@ STATIC void push_ternair_op(bblock_p bp, line_p lnp, token_p tkp1, push_avail(av_enter(&av, ocp, TERNAIR_OP), tkp1->tk_lfirst); } +STATIC void push_remainder(bblock_p bp, line_p lnp, token_p tkp1, token_p tkp2) +{ + /* Enter the implicit division tkp1 / tkp2, + * then push the remainder tkp1 % tkp2. + */ + struct avail av; + occur_p ocp; + + assert(INSTR(lnp) == op_rmi || INSTR(lnp) == op_rmu); + av.av_size = avsize(lnp); + av.av_oleft = tkp1->tk_vn; + av.av_oright = tkp2->tk_vn; + + /* Check whether we may convert RMI/RMU to DVI/DVU. */ + if (may_become_dv()) { + /* The division is DVI in RMI, or DVU in RMU. */ + av.av_instr = (INSTR(lnp) == op_rmi ? op_dvi : op_dvu); + + /* In postfix, a b % becomes a b a b / * -. We must + * keep a and b on the stack, so the first instruction + * to eliminate is lnp, not tkp1->l_first. + */ + ocp = newoccur(lnp, lnp, bp); + av_enter(&av, ocp, BINAIR_OP); + } + + av.av_instr = INSTR(lnp); + ocp = newoccur(tkp1->tk_lfirst, lnp, bp); + push_avail(av_enter(&av, ocp, REMAINDER), tkp1->tk_lfirst); +} + STATIC void fiddle_stack(line_p lnp) { /* The instruction in lnp does something to the valuenumber-stack. */ @@ -317,6 +349,11 @@ void vnm(bblock_p bp) Pop(&tk1, op13size(lnp)); push_ternair_op(bp, lnp, &tk1, &tk2, &tk3); break; + case REMAINDER: + Pop(&tk2, op22size(lnp)); + Pop(&tk1, op12size(lnp)); + push_remainder(bp, lnp, &tk1, &tk2); + break; case KILL_ENTITY: kill_direct(rep); break; diff --git a/util/ego/descr/em22.descr b/util/ego/descr/em22.descr index f995d631c..d9c39226b 100644 --- a/util/ego/descr/em22.descr +++ b/util/ego/descr/em22.descr @@ -78,6 +78,7 @@ cheap operations: -1 -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: -1 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/em24.descr b/util/ego/descr/em24.descr index a95751170..cbe0ab5c3 100644 --- a/util/ego/descr/em24.descr +++ b/util/ego/descr/em24.descr @@ -78,6 +78,7 @@ cheap operations: -1 -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: -1 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/em44.descr b/util/ego/descr/em44.descr index 117f26591..b6dbebba3 100644 --- a/util/ego/descr/em44.descr +++ b/util/ego/descr/em44.descr @@ -78,6 +78,7 @@ cheap operations: -1 -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: -1 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/i386.descr b/util/ego/descr/i386.descr index 264151a60..d5a2014bf 100644 --- a/util/ego/descr/i386.descr +++ b/util/ego/descr/i386.descr @@ -93,6 +93,7 @@ cheap operations: op_cii op_cui op_ciu op_cuu -1 op_cii op_cui op_ciu op_cuu -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: -1 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/i86.descr b/util/ego/descr/i86.descr index 8be3ec23e..9b27cf840 100644 --- a/util/ego/descr/i86.descr +++ b/util/ego/descr/i86.descr @@ -93,6 +93,7 @@ cheap operations: op_cii op_cui op_ciu op_cuu -1 op_cii op_cui op_ciu op_cuu -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: -1 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/m68020.descr b/util/ego/descr/m68020.descr index 9d2f46b2b..f568e00e2 100644 --- a/util/ego/descr/m68020.descr +++ b/util/ego/descr/m68020.descr @@ -102,6 +102,7 @@ cheap operations: op_cii op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1 op_cii op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: 1 2 3 -1 1 2 3 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/m68k2.descr b/util/ego/descr/m68k2.descr index 58e433db8..6b144cba0 100644 --- a/util/ego/descr/m68k2.descr +++ b/util/ego/descr/m68k2.descr @@ -99,6 +99,7 @@ addressing modes: op_adp op_lof op_ldf op_loi op_dch op_lpb -1 cheap operations: -1 -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: -1 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/m68k4.descr b/util/ego/descr/m68k4.descr index 8e1da4c5e..6b9d23dfa 100644 --- a/util/ego/descr/m68k4.descr +++ b/util/ego/descr/m68k4.descr @@ -102,6 +102,7 @@ cheap operations: op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1 op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: -1 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/pdp.descr b/util/ego/descr/pdp.descr index e73b3aaf1..ec8f3abca 100644 --- a/util/ego/descr/pdp.descr +++ b/util/ego/descr/pdp.descr @@ -92,6 +92,7 @@ cheap operations: op_cii op_cui op_cfi op_ciu op_cff op_cuu op_cif -1 op_cii op_cui op_cfi op_ciu op_cff op_cuu op_cif -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: -1 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/powerpc.descr b/util/ego/descr/powerpc.descr index 5fb9bb628..cf613e96c 100644 --- a/util/ego/descr/powerpc.descr +++ b/util/ego/descr/powerpc.descr @@ -141,6 +141,7 @@ cheap operations: op_cii op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1 op_cii op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: yes yes do not eliminate sli if index on shiftcounts: -1 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/sparc.descr b/util/ego/descr/sparc.descr index 978c39ba3..79c33decb 100644 --- a/util/ego/descr/sparc.descr +++ b/util/ego/descr/sparc.descr @@ -100,6 +100,7 @@ cheap operations: op_cuu op_ciu op_cui op_cii -1 op_cuu op_ciu op_cui op_cii -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: -1 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/vax4.descr b/util/ego/descr/vax4.descr index 5a39ea759..beaf0c427 100644 --- a/util/ego/descr/vax4.descr +++ b/util/ego/descr/vax4.descr @@ -113,6 +113,7 @@ cheap operations: op_cii op_cui op_cfi op_ciu op_cff op_cuu op_cif op_cmi op_cmu op_cmf op_cms op_cmp -1 lexical thresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: 1 2 3 -1 1 2 3 -1 forbidden operators: -1 -1 From b1badf18511ef050ed7bb38c7b52ea52c28dcb3c Mon Sep 17 00:00:00 2001 From: George Koehler Date: Wed, 7 Mar 2018 13:37:31 -0500 Subject: [PATCH 50/55] Add instructions like "lwarx". Extend manual. Add more page numbers from PowerPC version 2.01. Remove "xnop" not in 2.01, add "mtcr" from 2.01. Add "lwarx" and the other instructions from Book II. I did not try all the newly added instructions, but these seem to work: dcbt, dcbtst, icibi, isync, lwarx, stwcx., mftb, mftbu In man/powerpc_as.6 (not installed), add a summary of the registers and addressing modes (like in i386_as.6), describe short forms, update description of hi16/ha16, add CAVEATS about instructions that some processors can't run. --- mach/powerpc/as/mach2.c | 11 ++- mach/powerpc/as/mach3.c | 80 ++++++++++++++++------ mach/powerpc/as/mach4.c | 50 +++++++++++--- man/powerpc_as.6 | 145 ++++++++++++++++++++++++++++++++++------ 4 files changed, 230 insertions(+), 56 deletions(-) diff --git a/mach/powerpc/as/mach2.c b/mach/powerpc/as/mach2.c index 4065334e6..e8e61ea0c 100644 --- a/mach/powerpc/as/mach2.c +++ b/mach/powerpc/as/mach2.c @@ -47,11 +47,15 @@ %token OP_FRT_FRB_C %token OP_FRT_RA_D %token OP_FRT_RA_RB +%token OP_L %token OP_LEV %token OP_LIA %token OP_LIL %token OP_LI32 +%token OP_RA_RB +%token OP_RA_RB_TH %token OP_RA_RS_C +%token OP_RA_RS_RA_C %token OP_RA_RS_RB_C %token OP_RA_RS_RB_MB5_ME5_C %token OP_RA_RS_RB_MB6_C @@ -61,14 +65,14 @@ %token OP_RA_RS_SH6_MB6_C %token OP_RA_RS_UI %token OP_RA_RS_UI_CC +%token OP_RS %token OP_RS_FXM %token OP_RS_RA %token OP_RS_RA_D %token OP_RS_RA_DS %token OP_RS_RA_NB %token OP_RS_RA_RB -%token OP_RS_RA_RB_C -%token OP_RS_RA_RA_C +%token OP_RS_RA_RB_CC %token OP_RS_RB %token OP_RS_SPR %token OP_RS_SR @@ -104,4 +108,5 @@ %type c %type e16 negate16 u8 u7 u6 u5 u4 u2 u1 -%type opt_bh cr_opt nb ds bda bdl lia lil spr_num +%type opt_bh cr_opt nb ds bda bdl lia lil +%type spr_num tbr_num opt_tbr diff --git a/mach/powerpc/as/mach3.c b/mach/powerpc/as/mach3.c index 91b088a6a..99507087d 100644 --- a/mach/powerpc/as/mach3.c +++ b/mach/powerpc/as/mach3.c @@ -103,6 +103,10 @@ 0, OP_HA, 0, "ha16", 0, OP_LO, 0, "lo16", +/* The next page numbers are from PowerPC User Instruction Set + * Architecture, Book I, Version 2.01. + */ + /* Branch processor instructions (page 20) */ 0, OP_LIL, 18<<26 | 0<<1 | 0<<0, "b", @@ -128,7 +132,7 @@ 0, OP_BT_BA_BB, 19<<26 | 417<<1, "crorc", 0, OP_BF_BFA, 19<<26 | 0<<1, "mcrf", -/* extended mnemonics for bc, bcctr, bclr */ +/* extended mnemonics for bc, bcctr, bclr (page 144) */ 0, OP_BH, 19<<26 | 20<<21 | 528<<1 | 0<<0, "bctr", 0, OP_BH, 19<<26 | 20<<21 | 528<<1 | 1<<0, "bctrl", 0, OP_BDL, 16<<26 | 16<<21 | 0<<1 | 0<<0, "bdnz", @@ -186,7 +190,7 @@ 0, OP_BI_BH, 19<<26 | 12<<21 | 16<<1 | 0<<0, "btlr", 0, OP_BI_BH, 19<<26 | 12<<21 | 16<<1 | 1<<0, "btlrl", -/* extended m with condition in BI */ +/* extended m with condition in BI (page 146) */ 0, OP_BICR_BDL, 16<<26 | 12<<21 | 2<<16 | 0<<1 | 0<<0, "beq", 0, OP_BICR_BDA, 16<<26 | 12<<21 | 2<<16 | 1<<1 | 0<<0, "beqa", 0, OP_BICR_BH, 19<<26 | 12<<21 | 2<<16 | 528<<1 | 0<<0, "beqctr", @@ -284,7 +288,7 @@ 0, OP_BICR_BH, 19<<26 | 12<<21 | 3<<16 | 16<<1 | 0<<0, "bunlr", 0, OP_BICR_BH, 19<<26 | 12<<21 | 3<<16 | 16<<1 | 1<<0, "bunlrl", -/* extended m for cr logic */ +/* extended m for cr logic (page 147) */ 0, OP_BT_BT_BT, 19<<26 | 289<<1, "crset", 0, OP_BT_BT_BT, 19<<26 | 193<<1, "crclr", 0, OP_BT_BA_BA, 19<<26 | 449<<1, "crmove", @@ -377,12 +381,12 @@ 0, OP_RT_RA_C, 31<<26 | 0<<10 | 104<<1, "neg", 0, OP_RT_RA_C, 31<<26 | 1<<10 | 104<<1, "nego", -/* extended m for addition */ +/* extended m for addition (pages 153, 154) */ 0, OP_RT_RA_D, 14<<26, "la", 0, OP_RT_SI, 14<<26 | 0<<16, "li", 0, OP_RT_SI, 15<<26 | 0<<16, "lis", -/* extended m for subtraction */ +/* extended m for subtraction (pages 147, 148) */ 0, OP_RT_RB_RA_C, 31<<26 | 0<<10 | 40<<1, "sub", 0, OP_RT_RB_RA_C, 31<<26 | 1<<10 | 40<<1, "subo", 0, OP_RT_RB_RA_C, 31<<26 | 0<<10 | 8<<1, "subc", @@ -418,7 +422,7 @@ 0, OP_BF_L_RA_UI, 10<<26, "cmpli", 0, OP_BF_L_RA_RB, 31<<26 | 32<<1, "cmpl", -/* extended m for comparison */ +/* extended m for comparison (page 149) */ 0, OP_BF_RA_SI, 11<<26 | 1<<21, "cmpdi", 0, OP_BF_RA_RB, 31<<26 | 1<<21 | 0<<1, "cmpd", 0, OP_BF_RA_UI, 10<<26 | 1<<21, "cmpldi", @@ -434,7 +438,7 @@ 0, OP_TO_RA_RB, 31<<26 | 68<<1, "td", 0, OP_TO_RA_RB, 31<<26 | 4<<1, "tw", -/* extended m for traps */ +/* extended m for traps (page 150) */ 0, OP_TOX_RA_RB, 31<<26 | 4<<21 | 68<<1, "tdeq", 0, OP_TOX_RA_SI, 2<<26 | 4<<21, "tdeqi", 0, OP_TOX_RA_RB, 31<<26 | 12<<21 | 68<<1, "tdge", @@ -518,11 +522,10 @@ 0, OP_RA_RS_C, 31<<26 | 58<<1, "cntlzd", 0, OP_RA_RS_C, 31<<26 | 26<<1, "cntlzw", -/* extended m using logic */ -0, OP_RS_RA_RA_C, 31<<26 | 444<<1, "mr", +/* extended m using logic (pages 153, 154) */ +0, OP_RA_RS_RA_C, 31<<26 | 444<<1, "mr", 0, OP, 24<<26, "nop", -0, OP_RS_RA_RA_C, 31<<26 | 124<<1, "not", -0, OP, 26<<26, "xnop", +0, OP_RA_RS_RA_C, 31<<26 | 124<<1, "not", /* page 69 */ 0, OP_RA_RS_SH6_MB6_C, 30<<26 | 0<<2, "rldicl", @@ -535,7 +538,7 @@ 0, OP_RA_RS_SH6_MB6_C, 30<<26 | 3<<2, "rldimi", 0, OP_RA_RS_SH5_MB5_ME5_C, 20<<26, "rlwimi", -/* extended m for doubleword rotation */ +/* extended m for doubleword rotation (page 151) */ 0, OP_clrlsldi, 30<<26 | 2<<2, "clrlsldi", 0, OP_clrldi, 30<<26 | 0<<2, "clrldi", 0, OP_clrrdi, 30<<26 | 1<<2, "clrrdi", @@ -548,7 +551,7 @@ 0, OP_sldi, 30<<26 | 1<<2, "sldi", 0, OP_srdi, 30<<26 | 0<<2, "srdi", -/* extended m for word rotation */ +/* extended m for word rotation (page 152) */ 0, OP_clrlslwi, 21<<26, "clrlslwi", 0, OP_clrlwi, 21<<26, "clrlwi", 0, OP_clrrwi, 21<<26, "clrrwi", @@ -573,21 +576,25 @@ 0, OP_RA_RS_RB_C, 31<<26 | 792<<1, "sraw", /* page 78 */ -0, OP_RS_SPR, 31<<26 | 467<<1, "mtspr", -0, OP_RT_SPR, 31<<26 | 339<<1, "mfspr", -0, OP_RS_FXM, 31<<26 | 0<<21 | 144<<1, "mtcrf", -0, OP_RT, 31<<26 | 0<<21 | 19<<1, "mfcr", +0, OP_RS_SPR, 31<<26 | 467<<1, "mtspr", +0, OP_RT_SPR, 31<<26 | 339<<1, "mfspr", +0, OP_RS_FXM, 31<<26 | 0<<20 | 144<<1, "mtcrf", +0, OP_RT, 31<<26 | 0<<20 | 19<<1, "mfcr", -/* extended m for special purpose registers */ +/* extended m for special purpose registers (page 153) */ 0, OP_RT, 31<<26 | 9<<16 | 0<<11 | 339<<1, "mfctr", 0, OP_RT, 31<<26 | 8<<16 | 0<<11 | 339<<1, "mflr", 0, OP_RT, 31<<26 | 1<<16 | 0<<11 | 339<<1, "mfxer", -0, OP_RT, 31<<26 | 9<<16 | 0<<11 | 467<<1, "mtctr", -0, OP_RT, 31<<26 | 8<<16 | 0<<11 | 467<<1, "mtlr", -0, OP_RT, 31<<26 | 1<<16 | 0<<11 | 467<<1, "mtxer", +0, OP_RS, 31<<26 | 9<<16 | 0<<11 | 467<<1, "mtctr", +0, OP_RS, 31<<26 | 8<<16 | 0<<11 | 467<<1, "mtlr", +0, OP_RS, 31<<26 | 1<<16 | 0<<11 | 467<<1, "mtxer", + +/* extended m for condition register (page 154) */ +0, OP_RS, 31<<26 | 0<<20 | 255<<12 | 144<<1, "mtcr", /* Floating point instructions (page 83) */ +/* page 98 */ 0, OP_FRT_RA_D, 48<<26, "lfs", 0, OP_FRT_RA_RB, 31<<26 | 535<<1, "lfsx", 0, OP_FRT_RA_D, 49<<26, "lfsu", @@ -606,6 +613,7 @@ 0, OP_FRS_RA_RB, 31<<26 | 759<<1, "stfdux", 0, OP_FRS_RA_RB, 31<<26 | 983<<1, "stfiwx", +/* page 104 */ 0, OP_FRT_FRB_C, 63<<26 | 72<<1, "fmr", 0, OP_FRT_FRB_C, 63<<26 | 40<<1, "fneg", 0, OP_FRT_FRB_C, 63<<26 | 264<<1, "fabs", @@ -629,6 +637,7 @@ 0, OP_FRT_FRA_FRC_FRB_C, 63<<26 | 30<<1, "fnmsub", 0, OP_FRT_FRA_FRC_FRB_C, 59<<26 | 30<<1, "fnmsubs", +/* page 109 */ 0, OP_FRT_FRB_C, 63<<26 | 12<<1, "frsp", 0, OP_FRT_FRB_C, 63<<26 | 814<<1, "fctid", 0, OP_FRT_FRB_C, 63<<26 | 815<<1, "fctidz", @@ -652,4 +661,31 @@ 0, OP_FRT_FRB_C, 63<<26 | 26<<1, "frsqrte", 0, OP_FRT_FRA_FRC_FRB_C, 63<<26 | 23<<1, "fsel", -/* page 98 */ +/* Storage control instructions (Book II, page 15) */ + +/* Book II, page 17 */ +0, OP_RA_RB, 31<<26 | 982<<1, "icbi", +0, OP_RA_RB_TH /* page 35 */, 31<<26 | 278<<1, "dcbt", +0, OP_RA_RB, 31<<26 | 246<<1, "dcbtst", +0, OP_RA_RB, 31<<26 | 1014<<1, "dcbz", +0, OP_RA_RB, 31<<26 | 54<<1, "dcbst", +0, OP_RA_RB, 31<<26 | 86<<1, "dcbf", +0, OP, 19<<26 | 150<<1, "isync", +0, OP_RT_RA_RB, 31<<26 | 20<<1, "lwarx", +0, OP_RT_RA_RB, 31<<26 | 84<<1, "ldarx", +0, OP_RS_RA_RB_CC, 31<<26 | 150<<1 | 1<<0, "stwcx", +0, OP_RS_RA_RB_CC, 31<<26 | 150<<1 | 1<<0, "stdcx", +0, OP_L, 31<<26 | 598<<1, "sync", +0, OP, 31<<26 | 1<<21 | 598<<1, "lwsync", +0, OP, 31<<26 | 2<<21 | 598<<1, "ptesync", +0, OP, 31<<26 | 854<<1, "eieio", + +/* Time base (Book II, page 30) */ + +0, OP_RT_TBR, 31<<26 | 371<<1, "mftb", +0, OP_RT, 31<<26 | 8<<11 | 13<<16 | 371<<1, "mftbu", + +/* External control (Book II, page 33) */ + +0, OP_RT_RA_RB, 31<<26 | 310<<1, "eciwx", +0, OP_RS_RA_RB, 31<<26 | 438<<1, "ecowx", diff --git a/mach/powerpc/as/mach4.c b/mach/powerpc/as/mach4.c index 8a0cca9de..b344ba8ce 100644 --- a/mach/powerpc/as/mach4.c +++ b/mach/powerpc/as/mach4.c @@ -42,7 +42,23 @@ operation | OP_FRT_RA_D FPR ',' e16 '(' GPR ')' { emit_hl($1 | ($2<<21) | ($6<<16) | $4); } | OP_FRT_RA_RB FPR ',' GPR ',' GPR { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); } | OP_FRT_C c FPR { emit4($1 | $2 | ($3<<21)); } - | OP_RA_RS_C c GPR ',' GPR { emit4($1 | $2 | ($5<<21) | ($3<<16)); } + | OP_L { emit4($1); } + | OP_L u2 { emit4($1 | ($2<<21)); } + | OP_LEV { emit4($1); } + | OP_LEV u7 { emit4($1 | ($2<<5)); } + | OP_RA_RB GPR ',' GPR + { emit4($1 | ($2<<16) | ($4<<11)); } + | OP_RA_RB_TH GPR ',' GPR opt_bh + { emit4($1 | $5 | ($2<<16) | ($4<<11)); } + /* + * For instructions with "mnemonic RS, RA, ..." + * OP_RA_RS_... swaps RS and RA to (RA<<21) || (RS<<16) + * OP_RS_RA_... keeps RS and RA as (RS<<21) || (RA<<16) + */ + | OP_RA_RS_C c GPR ',' GPR + { emit4($1 | $2 | ($5<<21) | ($3<<16)); } + | OP_RA_RS_RA_C c GPR ',' GPR + { emit4($1 | $2 | ($5<<21) | ($3<<16) | ($5<<11)); } | OP_RA_RS_RB_C c GPR ',' GPR ',' GPR { emit4($1 | $2 | ($5<<21) | ($3<<16) | ($7<<11)); } | OP_RA_RS_RB_MB5_ME5_C c GPR ',' GPR ',' GPR ',' u5 ',' u5 @@ -75,20 +91,19 @@ operation | OP_RT_RB_RA_C c GPR ',' GPR ',' GPR { emit4($1 | $2 | ($3<<21) | ($7<<16) | ($5<<11)); } | OP_RT_SI GPR ',' e16 { emit_hl($1 | ($2<<21) | $4); } | OP_RT_SPR GPR ',' spr_num { emit4($1 | ($2<<21) | ($4<<11)); } + | OP_RT_TBR GPR opt_tbr { emit4($1 | ($2<<21) | ($3<<11)); } + | OP_RS GPR { emit4($1 | ($2<<21)); } | OP_RS_FXM u7 ',' GPR { emit4($1 | ($4<<21) | ($2<<12)); } | OP_RS_RA_D GPR ',' e16 '(' GPR ')' { emit_hl($1 | ($2<<21) | ($6<<16) | $4); } | OP_RS_RA_DS GPR ',' ds '(' GPR ')' { emit_hl($1 | ($2<<21) | ($6<<16) | $4); } | OP_RS_RA_NB GPR ',' GPR ',' nb { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); } | OP_RS_RA_RB GPR ',' GPR ',' GPR { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); } - | OP_RS_RA_RB_C c GPR ',' GPR ',' GPR { emit4($1 | $2 | ($5<<21) | ($3<<16) | ($7<<11)); } - | OP_RS_RA_RA_C c GPR ',' GPR { emit4($1 | $2 | ($5<<21) | ($3<<16) | ($5<<11)); } + | OP_RS_RA_RB_CC C GPR ',' GPR ',' GPR { emit4($1 | ($3<<21) | ($5<<16) | ($7<<11)); } | OP_RS_SPR spr_num ',' GPR { emit4($1 | ($4<<21) | ($2<<11)); } | OP_TO_RA_RB u5 ',' GPR ',' GPR { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); } | OP_TO_RA_SI u5 ',' GPR ',' e16 { emit_hl($1 | ($2<<21) | ($4<<16) | $6); } | OP_TOX_RA_RB GPR ',' GPR { emit4($1 | ($2<<16) | ($4<<11)); } | OP_TOX_RA_SI GPR ',' e16 { emit_hl($1 | ($2<<16) | $4); } - | OP_LEV { emit4($1); } - | OP_LEV u7 { emit4($1 | ($2<<5)); } | OP_LIA lia { emit4($1 | $2); } | OP_LIL lil { emit4($1 | $2); } | OP_LI32 li32 /* emitted in subrule */ @@ -298,7 +313,7 @@ u2 } ; -/* Optional comma, branch hint. */ +/* Optional comma, branch hint (or touch hint). */ opt_bh : /* nothing */ { $$ = 0; } | ',' u2 { $$ = ($2<<11); } @@ -409,13 +424,28 @@ lia } ; +/* + * Instructions "mfspr", "mtspr", and "mftb" encode the 10-bit special + * purpose register (spr) or time base register (tbr) by swapping the + * low 5 bits with the high 5 bits. The value from an SPR token has + * already been swapped. + */ + spr_num - : SPR { $$ = $1; } - | absexp + : SPR { $$ = $1; } + | tbr_num { $$ = $1; } + ; + +opt_tbr + : /* nothing */ { $$ = 8 | (12<<5); } + | ',' tbr_num { $$ = $2; } + ; + +tbr_num + : absexp { if (($1 < 0) || ($1 > 0x3ff)) - serror("spr number out of range"); - /* mfspr, mtspr swap the low and high 5 bits */ + serror("10-bit unsigned value out of range"); $$ = ($1 >> 5) | (($1 & 0x1f) << 5); } ; diff --git a/man/powerpc_as.6 b/man/powerpc_as.6 index 8198d6bce..f6bb90818 100644 --- a/man/powerpc_as.6 +++ b/man/powerpc_as.6 @@ -1,33 +1,136 @@ -.TH POWERPC_AS 1 +.TH POWERPC_AS 1 2018-03-07 .ad .SH NAME powerpc_as \- assembler for PowerPC - .SH SYNOPSIS as [options] argument ... - .SH DESCRIPTION This assembler is made with the general framework described in \fIuni_ass\fP(6). - +.PP +It can assemble the instructions from Book I and Book II of PowerPC +version 2.01. +This includes the branch, integer, and floating point instructions +from Book I; and the cache, synchronization, and time base +instructions from Book II. +.PP +There is no support for other instructions, such as supervisor-mode +instructions or vector instructions. +There is some support for 64-bit integer instructions, but the +assembler only has 32-bit symbols. .SH SYNTAX -Most 32-bit integer and floating point instructions are supported, but not many -short form instructions. Instructions which take 16-bit operands can additionally -use the following special functions: - -.IP hi16[value], ha16[value] -Returns the high half of the value of the expression; if the value is not absolute, -also generates the appropriate fixup. Use of either of these \fImust\fR be followed, -in the next instruction, by the corresponding use of \fBlo16[]\fR. Use \fBhi16[]\fR -if the low half is going to interpret its payload as an unsigned value, and -\fBha16[]\fR if it will be interpreted as a signed value (so that the high half can -be adjusted to match). - -.IP lo16[] -Returns the low half of the value of the expression. No fixup is generated. Use of -\fBlo16[]\fR must come in the instruction immediately after a use of \fBhi16[]\fR or -\fBha16[]\fR. - +.SS general purpose registers +There are 32 GPRs from \fBr0\fP to \fBr31\fP. +In this assembler, \fBsp\fP is an alias for \fBr1\fP, and \fBfp\fP is +an alias for \fBr2\fP, because \fIack\fP uses r1 as the stack pointer +and r2 as the frame pointer. +Other compilers don't use r2 as the frame pointer. +.PP +GPR syntax requires a register name, not a number. +For example, \(oqaddi\ r5,\ r4,\ 1\(cq works, but +\(oqaddi\ 5,\ 4,\ 1\(cq is a syntax error. +.PP +Certain instructions ignore the contents of \fBr0\fP and use zero. +This happens when using r0 as the second operand of \fIaddi\fP or +\fIaddis\fP, or when addressing \(oqexpr(r0)\(cq or +\(oqr0,\ gpr\(cq. +The syntax is still the name r0, not the number 0. +.SS floating point registers +There are 32 FPRs from \fBf0\fP to \fBf31\fP. +Each FPR has 64 bits and can hold a single-precision or +double-precision number. +FPR syntax requires a register name, not a number. +.SS special purpose registers +The three named SPRs are \fBctr\fP (count register), \fBlr\fP (link +register), and \fBxer\fP (exception register). +\(oqmfspr\(cq and \(oqmtspr\(cq allow these names or a number. +.SS condition register +There is a 32-bit condition register, where bit 0 is most significant, +and bit 31 is least significant. +This gets split into 8 registers of 4 bits each, from \fBcr0\fP (with +bits 0 to 3) to \fBcr7\fP (with bits 28 to 31). +Some instructions use the names cr0 to cr7, others use a bit numbered +0 to 31, and others use all 32 bits. +.SS addressing modes +\(oqexpr(gpr)\(cq addresses \fIexpr\fP + the contents of \fIgpr\fP, +except that \(oqexpr(r0)\(cq addresses \fIexpr\fP\ +\ 0. +A few instructions, like \(oqstwu\(cq, also update \fIgpr\fP by +setting it to the address. +.PP +\(oqgprA,\ gprB\(cq in certain instructions addresses the contents of +\fIgprA\fP + the contents of \fIgprB\fP, except that \(oqr0,\ gprB\(cq +addresses 0\ +\ the contents of \fIgprB\fP. +.SS 16-bit operands +Some instructions have a 16-bit operand. +This can be a bare \fIexpr\fP (which must fit signed or unsigned +16 bits), or it can be one of these special functions: +.IP "hi16[expr], ha16[expr]" +Returns the high half of the 32-bit value of the expression. +If the low half is negative (from 0x8000 to 0xffff), +then \fBha16[]\fP adjusts the high half by adding 1. +Use \fBhi16[]\fP if the instruction with \fBlo16[]\fP is going to +interpret its operand as an unsigned value, or \fBha16[]\fP if it will +interpret it as signed. +.IP +If \fIexpr\fP is not absolute, then the assembler must generate a +fixup for the linker. +The fixup only works if the instruction is +\(oqaddis gpr, r0, hx16[expr]\(cq or \(oqlis gpr, hx16[expr]\(cq. +.IP lo16[expr] +Returns the low half of the 32-bit value of the expression. +.SS short forms +Some instructions have short forms using extended mnemonics (or +simplified mnemonics) like \fIli\fP, \fIsrwi\fP, and many others. +.IP "li r6, 789" +is short for: addi r6, r0, 789 +.IP "srwi r3, r4, 2" +is short for: rlwinm r3, r4, 30, 2, 31 +.PP +This assembler doesn't support extended mnemonics with branch +prediction, such as \fIblt+\fP or \fIbne-\fP. +It always parses \(oq+\(cq and \(oq-\(cq as operators, +never as part of a mnemonic. +.SH EXAMPLES +There are two ways to load r3 with _symbol\ =\ 0x1234abcd. +One way is +.PP +.nf + lis r3, hi16[_symbol] + ori r3, r3, lo16[_symbol] ! r3 = 0x12340000 | 0x0000abcd +.fi +.PP +The other way is +.PP +.nf + lis r3, ha16[_symbol] + addi r3, r3, lo16[_symbol] ! r3 = 0x12350000 + 0xffffabcd +.fi +.PP +The next code adds 1 to a global variable. +.PP +.nf + lis r3, ha16[_var] + lwz r4, lo16[_var](r3) + addi r4, r4, 1 + stw r4, lo16[_var](r3) +.fi .SH "SEE ALSO" uni_ass(6), ack(1) +.PP +Freescale Semiconductor, \fIProgramming Environments Manual for 32-Bit +Implementations of the PowerPC Architecture\fP, Rev. 3, September 2005. +.PP +IBM, \fIPowerPC User Instruction Set Architecture, Book I\fP, Version +2.01, September 2003. +.PP +IBM, \fIPowerPC Virtual Environment Architecture, Book II\fP, Version +2.01, December 2003. +.SH CAVEATS +Beware that not every processor can run every instruction. +The 32-bit processors can't run 64-bit instructions like \fIlwa\fP, +\fIstd\fP, and \fIfctid\fP. +The PowerPC 601 can't run \fIstfiwx\fP, nor \fIfres\fP, \fIfrsqrte\fP, +\fIfsel\fP. +Many models, like the PowerPC G4, can't run \fIfsqrt\fP nor +\fIfsqrts\fP. From 0720671f7ab6bd2c8a60d2138b4befb52a1862db Mon Sep 17 00:00:00 2001 From: George Koehler Date: Thu, 8 Mar 2018 11:49:40 -0500 Subject: [PATCH 51/55] Fix wr_ranlib() for big-endian machines. With this change, I built and ran ack on a big-endian PowerPC Linux machine. I used gcc 4.9.4 to build ack, and I only built the linuxppc back end. Before this change, wr_ranlib() corrupted a value by changing it from 0x66 to 0x66000066. This value was too big, so led made a fatal error, "bad ranlib string offset". --- modules/src/object/wr_ranlib.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/modules/src/object/wr_ranlib.c b/modules/src/object/wr_ranlib.c index 91274d71c..b515ffb3b 100644 --- a/modules/src/object/wr_ranlib.c +++ b/modules/src/object/wr_ranlib.c @@ -10,16 +10,27 @@ wr_ranlib(fd, ran, cnt1) struct ranlib *ran; long cnt1; { - { - register long cnt = cnt1; - register struct ranlib *r = ran; - register char *c = (char *) r; + struct ranlib *r; + long cnt, val; + char *c; - while (cnt--) { - put4(r->ran_off,c); c += 4; - put4(r->ran_pos,c); c += 4; - r++; - } + /* + * We overwrite the structs in r with the bytes in c, so we + * don't need to allocate another buffer. + * + * put4(r->ran_off, c) can fail if r->ran_off and c overlap in + * memory, if this is a big-endian machine. It tries to swap + * the bytes from big to little endian, but overwrites some + * bytes before reading them. To prevent this, we must copy + * each value before we overwrite it. + */ + r = ran; + c = (char *)r; + cnt = cnt1; + while (cnt--) { + val = r->ran_off; put4(val, c); c += 4; + val = r->ran_pos; put4(val, c); c += 4; + r++; } wr_bytes(fd, (char *) ran, cnt1 * SZ_RAN); } From 860df1b067051d59e7db02d68522adc5b859c7db Mon Sep 17 00:00:00 2001 From: George Koehler Date: Thu, 8 Mar 2018 12:04:02 -0500 Subject: [PATCH 52/55] Read from new, not old, buffer after realloc. This got caught by MALLOC_OPTIONS=S in OpenBSD. The B compiler filled the buffer while compiling hilo.b. Then realloc moved the buffer and unmapped the old buffer. The compiler tried to read the old buffer and segfaulted. --- modules/src/em_code/insert.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/modules/src/em_code/insert.c b/modules/src/em_code/insert.c index 36950c3ea..00c628dcb 100644 --- a/modules/src/em_code/insert.c +++ b/modules/src/em_code/insert.c @@ -99,20 +99,19 @@ C_out_parts(pp) } else { /* copy the chunk to output */ -#ifdef INCORE - register char *s = C_BASE + pp->pp_begin; - char *se = C_BASE + pp->pp_end; - - while (s < se) { - put(*s++); - } -#else register long b = pp->pp_begin; while (b < pp->pp_end) { +#ifdef INCORE + /* C_BASE is not constant, put() may + move C_BASE, so each iteration of + this loop must read C_BASE again. + */ + put(C_BASE[b++]); +#else put(getbyte(b++)); - } #endif + } } prev = pp; pp = pp->pp_next; From 12643f17401f031f7301af9c373d44b51bbe28ab Mon Sep 17 00:00:00 2001 From: George Koehler Date: Thu, 8 Mar 2018 18:51:07 -0500 Subject: [PATCH 53/55] Solve some gcc warnings in ego. Some of these are from gcc -Wimplicit --- util/ego/cs/cs.c | 6 ++---- util/ego/cs/cs_debug.c | 3 ++- util/ego/cs/cs_profit.c | 1 + util/ego/share/files.c | 4 +++- util/ego/share/files.h | 4 ++-- util/ego/share/get.c | 2 +- util/ego/share/go.c | 2 +- util/ego/share/go.h | 2 +- util/ego/share/types.h | 2 +- 9 files changed, 14 insertions(+), 12 deletions(-) diff --git a/util/ego/cs/cs.c b/util/ego/cs/cs.c index dfcccbbf7..068ddc3dc 100644 --- a/util/ego/cs/cs.c +++ b/util/ego/cs/cs.c @@ -25,7 +25,7 @@ int Scs; /* Number of optimizations found. */ -STATIC cs_clear() +STATIC void cs_clear() { clr_avails(); clr_entities(); @@ -74,9 +74,7 @@ STATIC void cs_optimize(void *vp) } } -main(argc, argv) - int argc; - char *argv[]; +int main(int argc, char *argv[]) { Scs = 0; go(argc, argv, no_action, cs_optimize, cs_machinit, no_action); diff --git a/util/ego/cs/cs_debug.c b/util/ego/cs/cs_debug.c index 07890395b..3d5509ddc 100644 --- a/util/ego/cs/cs_debug.c +++ b/util/ego/cs/cs_debug.c @@ -11,6 +11,7 @@ #include "cs.h" #include "cs_aux.h" #include "cs_avail.h" +#include "cs_debug.h" #include "cs_entity.h" #ifdef VERBOSE @@ -48,7 +49,7 @@ STATIC void showinstr(line_p lnp) fprintf(stderr,"\n"); } -SHOWOCCUR(occur_p ocp) +void SHOWOCCUR(occur_p ocp) { /* Shows all instructions in an occurrence. */ diff --git a/util/ego/cs/cs_profit.c b/util/ego/cs/cs_profit.c index a92028c36..2efabcb03 100644 --- a/util/ego/cs/cs_profit.c +++ b/util/ego/cs/cs_profit.c @@ -14,6 +14,7 @@ #include "../share/cset.h" #include "../share/lset.h" #include "cs.h" +#include "cs_alloc.h" #include "cs_aux.h" #include "cs_debug.h" #include "cs_avail.h" diff --git a/util/ego/share/files.c b/util/ego/share/files.c index e45f9b7fb..2dd10b25f 100644 --- a/util/ego/share/files.c +++ b/util/ego/share/files.c @@ -9,9 +9,11 @@ */ #include +#include "types.h" +#include "debug.h" #include "files.h" -struct files* findfiles(int argc, const char** argv) +struct files* findfiles(int argc, char * const *argv) { static struct files files; diff --git a/util/ego/share/files.h b/util/ego/share/files.h index 46b19917a..ab2076ea1 100644 --- a/util/ego/share/files.h +++ b/util/ego/share/files.h @@ -33,11 +33,11 @@ struct files /* The rest of the arguments. */ - const char** argv; + char * const *argv; int argc; }; -struct files* findfiles(int argc, const char** argv); +struct files* findfiles(int argc, char * const *argv); FILE *openfile(const char *name, const char *mode); /* diff --git a/util/ego/share/get.c b/util/ego/share/get.c index 94c7aabe2..a433b0946 100644 --- a/util/ego/share/get.c +++ b/util/ego/share/get.c @@ -285,7 +285,7 @@ dblock_p getdtable(const char *dname) /* getbblocks */ -STATIC argstring(short length, argb_p abp) +STATIC void argstring(short length, argb_p abp) { while (length--) { diff --git a/util/ego/share/go.c b/util/ego/share/go.c index 9a2107d3d..0ccd3c6e9 100644 --- a/util/ego/share/go.c +++ b/util/ego/share/go.c @@ -42,7 +42,7 @@ STATIC void mach_init(char* machfile, void (*phase_machinit)(void *)) fclose(f); } -void go(int argc, const char** argv, +void go(int argc, char * const *argv, void (*initialize)(void *), void (*optimize)(void *), void (*phase_machinit)(void *), void (*proc_flag)(void *)) { diff --git a/util/ego/share/go.h b/util/ego/share/go.h index 3bb8c1f54..55f1b48e8 100644 --- a/util/ego/share/go.h +++ b/util/ego/share/go.h @@ -22,7 +22,7 @@ * and 'optimize' is called with the current procedure * as parameter. */ -void go(int argc, const char** argv, +void go(int argc, char * const *argv, void (*initialize)(void *null), void (*optimize)(void *), /* (proc_p *p) */ void (*phase_machinit)(void *), /* (FILE *f) */ diff --git a/util/ego/share/types.h b/util/ego/share/types.h index cae4d6074..cabc5818d 100644 --- a/util/ego/share/types.h +++ b/util/ego/share/types.h @@ -46,7 +46,7 @@ typedef struct elemholder *lset; typedef struct bitvector *cset; typedef elem_p Lindex; typedef short Cindex; -typedef char *Lelem_t; +typedef void *Lelem_t; typedef short Celem_t; typedef union pext_t *pext_p; From ebba76e08ff15aa2621319513239a51e6caaf0a7 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Sun, 11 Mar 2018 20:10:13 -0400 Subject: [PATCH 54/55] Don't read INSTR(l) after oldline(l) frees it. This bug got in my way while I was looking for another read-after-free bug in the CS phase. --- util/ego/ca/ca.c | 1 + 1 file changed, 1 insertion(+) diff --git a/util/ego/ca/ca.c b/util/ego/ca/ca.c index 095736665..1bf73d24a 100644 --- a/util/ego/ca/ca.c +++ b/util/ego/ca/ca.c @@ -72,6 +72,7 @@ proc_p* p_out; { /* register message without arguments */ oldline(l); + continue; } else { From 85fcbde22fe1068e9ef2d73a57d8a813e24eef12 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Mon, 12 Mar 2018 20:58:31 -0400 Subject: [PATCH 55/55] Check LOI expressions to prevent a read after free. CS eliminates outer expressions before inner ones, as `x * y * z` before `x * y`. It does this by reversing the order of expressions in the code. This almost always works, but it sometimes doesn't work if a STI changes the value number of a LOI. In code like `expr1 LOI expr2 STI expr2 LOI`, CS might eliminate the inner `expr2` before the outer `expr2 LOI`. This caused a read after free because the occurrence of `expr2 LOI` pointed to the eliminated lines of `expr2`. This bug went unnoticed until my recent changes caused CS to crash with a double free. I did not get the crash in OpenBSD, but I saw the crash in Travis, then David Given reproduced the crash in Linux. See the discussion in https://github.com/davidgiven/ack/pull/73 --- util/ego/cs/cs_elim.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/util/ego/cs/cs_elim.c b/util/ego/cs/cs_elim.c index 7dce0df09..b83371416 100644 --- a/util/ego/cs/cs_elim.c +++ b/util/ego/cs/cs_elim.c @@ -142,8 +142,9 @@ STATIC void replace(occur_p ocp, offset tmp, avail_p avp) /* Replace the lines in the occurrence in ocp by a load of the * temporary with offset tmp. */ - register line_p lol, first, last; - register int instr; + avail_p ravp; + line_p lol, first, last; + int instr; assert(avp->av_size == ws || avp->av_size == 2*ws); @@ -176,6 +177,33 @@ STATIC void replace(occur_p ocp, offset tmp, avail_p avp) break; } + /* Some occurrence rocp of an expression before avp might have + * rocp->oc_lfirst == first. If so, then we must set + * rocp->oc_lfirst = lol before we throw away first. + * + * This is almost not possible, but it can happen in code with + * expr1 LOI expr2 STI expr2 LOI, where the STI causes both + * LOIs to have the same value number. Then the first LOI + * might come before the first expr2, so we might replace + * expr2 before we replace expr2 LOI. Then the occurrence of + * expr2 LOI must not point to the eliminated lines of expr2. + */ + for (ravp = avp->av_before; ravp != (avail_p) 0; + ravp = ravp->av_before) { + /* We only check LOI expressions. */ + if (ravp->av_instr == op_loi) { + occur_p rocp; + Lindex i; + + for (i = Lfirst(ravp->av_occurs); i != (Lindex) 0; + i = Lnext(i, ravp->av_occurs)) { + rocp = occ_elem(i); + if (rocp->oc_lfirst == first) + rocp->oc_lfirst = lol; + } + } + } + /* Throw away the by now useless lines. */ remove_lines(first, last); }