From 03b067e1d59e4663e52489fd3ec78f3a11759dd6 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Sat, 17 Sep 2016 23:55:55 -0400 Subject: [PATCH 01/25] Add the missing .lar4 and .sar4 for powerpc. Inspired by the sparc code (mach/sparc/libem/lar.s). My powerpc code might still have bugs, but it's enough for examples/hilo.mod to work. May need to 'make clean' or touch a build.lua file, so ackbuilder can notice the new lar4.s and sar4.s files and build them. --- mach/powerpc/libem/aar4.s | 4 ++++ mach/powerpc/libem/lar4.s | 43 +++++++++++++++++++++++++++++++++++++ mach/powerpc/libem/sar4.s | 45 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+) create mode 100644 mach/powerpc/libem/lar4.s create mode 100644 mach/powerpc/libem/sar4.s diff --git a/mach/powerpc/libem/aar4.s b/mach/powerpc/libem/aar4.s index 33b67e0dc..6a48aa058 100644 --- a/mach/powerpc/libem/aar4.s +++ b/mach/powerpc/libem/aar4.s @@ -13,6 +13,10 @@ ! r3 = ptr to descriptor ! r4 = index ! r5 = address of array +! Yields: +! r3 = address of element +! r0 = size of element (used by .lar4, .sar4) +! Preserves r10 for .lar4, .sar4 .define .aar4 .aar4: diff --git a/mach/powerpc/libem/lar4.s b/mach/powerpc/libem/lar4.s new file mode 100644 index 000000000..6375979c4 --- /dev/null +++ b/mach/powerpc/libem/lar4.s @@ -0,0 +1,43 @@ +# +#include "powerpc.h" + +.sect .text + +! Load from bounds-checked array. +! +! On entry: +! r3 = ptr to descriptor +! r4 = index +! r5 = address of array + +.define .lar4 +.lar4: + mfspr r10, lr + bl .aar4 + mtspr lr, r10 + ! r3 = ptr to element + ! r0 = size of element + + cmpi cr0, 0, r0, 1 + bc IFFALSE, EQ, 1f + ! Load 1 byte. + lbz r4, 0(r3) + stwu r4, -4(sp) + bclr ALWAYS, 0, 0 +1: + cmpi cr0, 0, r0, 2 + bc IFFALSE, EQ, 2f + ! Load 2 bytes. + lhz r4, 0(r3) + stwu r4, -4(sp) + bclr ALWAYS, 0, 0 +2: + ! Load r0 bytes, where r0 must be a positive multiple of 4. + subf sp, r0, sp ! move stack pointer down + or r5, r0, r0 ! index r5 = length r0 +3: + addic. r5, r5, -4 ! r5 -= 4 + lwzx r4, r5, r3 + stwx r4, r5, sp + bc IFTRUE, GT, 3b ! loop if r5 > 0 + bclr ALWAYS, 0, 0 diff --git a/mach/powerpc/libem/sar4.s b/mach/powerpc/libem/sar4.s new file mode 100644 index 000000000..0c1368af1 --- /dev/null +++ b/mach/powerpc/libem/sar4.s @@ -0,0 +1,45 @@ +# +#include "powerpc.h" + +.sect .text + +! Store to bounds-checked array. +! +! On entry: +! r3 = ptr to descriptor +! r4 = index +! r5 = address of array + +.define .sar4 +.sar4: + mfspr r10, lr + bl .aar4 + mtspr lr, r10 + ! r3 = ptr to element + ! r0 = size of element + + cmpi cr0, 0, r0, 1 + bc IFFALSE, EQ, 1f + ! Store 1 byte. + lwz r4, 0(sp) + addi sp, sp, 4 + stb r4, 0(r3) + bclr ALWAYS, 0, 0 +1: + cmpi cr0, 0, r0, 2 + bc IFFALSE, EQ, 2f + ! Store 2 bytes. + lwz r4, 0(sp) + addi sp, sp, 4 + sth r4, 0(r3) + bclr ALWAYS, 0, 0 +2: + ! Store r0 bytes, where r0 must be a positive multiple of 4. + or r5, r0, r0 ! index r5 = length r0 +3: + addic. r5, r5, -4 ! r5 -= 4 + lwzx r4, r5, sp + stwx r4, r5, r3 + bc IFTRUE, GT, 3b ! loop if r5 > 0 + add sp, r0, sp ! move stack pointer up + bclr ALWAYS, 0, 0 From e4ce7da0a207c15f2c9824cac8851a7fd8d68b31 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Sun, 18 Sep 2016 00:07:30 -0400 Subject: [PATCH 02/25] Fix hilo.p for big-endian platforms. Unless it is packed, a Pascal char is a C int. Using C types, hilo.p passed an int *buf to uread(), which expected a char *buf. Then uread() wrote the char on the end of the int. This worked on little-endian platforms. This failed on big-endian platforms, as writing the value to the big end of an int multiplied it by 16777216. The fix is to use a packed array [0..0] of char in Pascal. I also change 'string' to a packed array, though this is not a necessary part of the fix. --- examples/hilo.p | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/examples/hilo.p b/examples/hilo.p index b13bbd1a0..be953a09e 100644 --- a/examples/hilo.p +++ b/examples/hilo.p @@ -8,7 +8,8 @@ program hilo(input, output); type - string = array [0..255] of char; + string = packed array [0..255] of char; + charstar = packed array [0..0] of char; var playing : Boolean; @@ -34,18 +35,18 @@ function random(range : integer) : integer; that conflicts with a Pascal keyword. Luckily there's a private function uread() in the ACK Pascal library that we can use instead. } -function uread(fd : integer; var buffer : char; count : integer) : integer; +function uread(fd : integer; var buffer : charstar; count : integer) : integer; extern; function readchar : char; var - c : char; + c : charstar; dummy : integer; begin - c := chr(0); + c[0] := chr(0); dummy := uread(0, c, 1); - readchar := c; + readchar := c[0]; end; procedure readstring(var buffer : string; var length : integer); From 9ec2918e146ccb260b5195838fe1f34d56340b3b Mon Sep 17 00:00:00 2001 From: George Koehler Date: Sun, 18 Sep 2016 14:37:42 -0400 Subject: [PATCH 03/25] In ncgg, increase MAXREGS from 80 to 200. I need this to add more registers to powerpc. --- util/ncgg/param.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/ncgg/param.h b/util/ncgg/param.h index d39dd6b38..369e51d25 100644 --- a/util/ncgg/param.h +++ b/util/ncgg/param.h @@ -15,7 +15,7 @@ #define BORS(x,y) y #endif -#define MAXREGS BORS(80,30) +#define MAXREGS BORS(200,30) #define MAXPROPS BORS(120,20) #define MAXTOKENS BORS(100,60) #define MAXATT 6 From 9db305b3385456b9189b845da79c8f390a0ab57c Mon Sep 17 00:00:00 2001 From: George Koehler Date: Sun, 18 Sep 2016 15:08:55 -0400 Subject: [PATCH 04/25] Enable the Hall check again, and get powerpc to pass it. Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg gives many errors of "Previous rule impossible on empty stack". David Given reported this problem in 2013: https://sourceforge.net/p/tack/mailman/message/30814694/ Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable the Hall check. This commit enables it again. In ncgg, the Hall check is checking that a rule is possible with an empty fake stack. It would be possible if ncg can coerce the values from the real stack to the fake stack. The powerpc table defined coercions from STACK to {FS, %a} and {FD, %a}, but the Hall check didn't understand the coercions and rejected each rule "with FS" or "with FD". This commit removes the FS and FD tokens and adds a new group of FSREG registers for single-precision floats, while keeping FREG registers for double precision. The registers overlap, with each FSREG containing one FREG, because it is the same register in PowerPC hardware. FS tokens become FSREG registers and FD tokens become FREG registers. The Hall check understands the coercions from STACK to FSREG and FREG. The idea to define separate but overlapping registers comes from the PDP-11 table (mach/pdp/ncg/table). This commit also removes F0 from the FREG group. This is my attempt to keep F0 off the fake stack, because one of the stacking rules uses F0 as a scratch register (FSCRATCH). --- mach/powerpc/ncg/table | 317 ++++++++++++++++++++++------------------- util/ncgg/cgg.y | 4 +- 2 files changed, 176 insertions(+), 145 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 742e8250f..0ad8ef006 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -44,6 +44,7 @@ PROPERTIES REG /* any allocatable GPR */ FPR /* any FPR */ FREG /* any allocatable FPR */ + FSREG /* any allocatable single-precision FPR */ SPR /* any SPR */ CR /* any CR */ @@ -127,7 +128,40 @@ REGISTERS F3("f3") : FPR, FREG, FPR3. F2("f2") : FPR, FREG, FPR2. F1("f1") : FPR, FREG, FPR1. - F0("f0") : FPR, FREG, FPR0. + F0("f0") : FPR, FPR0. + + FS31("f31")=F31 : FSREG. + FS30("f30")=F30 : FSREG. + FS29("f29")=F29 : FSREG. + FS28("f28")=F28 : FSREG. + FS27("f27")=F27 : FSREG. + FS26("f26")=F26 : FSREG. + FS25("f25")=F25 : FSREG. + FS24("f24")=F24 : FSREG. + FS23("f23")=F23 : FSREG. + FS22("f22")=F22 : FSREG. + FS21("f21")=F21 : FSREG. + FS20("f20")=F20 : FSREG. + FS19("f19")=F19 : FSREG. + FS18("f18")=F18 : FSREG. + FS17("f17")=F17 : FSREG. + FS16("f16")=F16 : FSREG. + FS15("f15")=F15 : FSREG. + FS14("f14")=F14 : FSREG. + FS13("f13")=F13 : FSREG. + FS12("f12")=F12 : FSREG. + FS11("f11")=F11 : FSREG. + FS10("f10")=F10 : FSREG. + FS9("f9")=F9 : FSREG. + FS8("f8")=F8 : FSREG. + FS7("f7")=F7 : FSREG. + FS6("f6")=F6 : FSREG. + FS5("f5")=F5 : FSREG. + FS4("f4")=F4 : FSREG. + FS3("f3")=F3 : FSREG. + FS2("f2")=F2 : FSREG. + FS1("f1")=F1 : FSREG. + /* FS0("f0")=F0 */ LR("lr") : SPR. CTR("ctr") : SPR. @@ -190,11 +224,6 @@ TOKENS XOR_RR = { GPR reg1; GPR reg2; } 4. XOR_RC = { GPR reg; INT val; } 4. -/* Floats */ - - FD = { FPR reg; } 8 reg. - FS = { FPR reg; } 4 reg. - /* Comments */ LABELI = { ADDR msg; INT num; } 4 msg " " num. @@ -250,27 +279,29 @@ INSTRUCTIONS eqv GPRI:wo, GPRI:ro, GPRI:ro. extsb GPRI:wo, GPRI:ro. extsh GPRI:wo, GPRI:ro. - fadd FD:wo, FD:ro, FD:ro. - fadds FS:wo, FS:ro, FS:ro. - fcmpo CR:wo, FD:ro, FD:ro. - fdiv FD:wo, FD:ro, FD:ro. - fdivs FS:wo, FS:ro, FS:ro. - fneg FS+FD:wo, FS+FD:ro. - fmul FD:wo, FD:ro, FD:ro. - fmuls FS:wo, FS:ro, FS:ro. - frsp FS:wo, FD:ro. - fsub FD:wo, FD:ro, FD:ro. - fsubs FS:wo, FS:ro, FS:ro. - fmr FS+FD:wo, FS+FD:ro. + fadd FREG:wo, FREG:ro, FREG:ro. + fadds FSREG:wo, FSREG:ro, FSREG:ro. + fcmpo CR:wo, FPR:ro, FPR:ro. + fdiv FREG:wo, FREG:ro, FREG:ro. + fdivs FSREG:wo, FSREG:ro, FSREG:ro. + fneg FREG:wo, FREG:ro. + fneg FSREG:wo, FSREG:ro. + fmul FREG:wo, FREG:ro, FREG:ro. + fmuls FSREG:wo, FSREG:ro, FSREG:ro. + frsp FSREG:wo, FREG:ro. + fsub FREG:wo, FREG:ro, FREG:ro. + fsubs FSREG:wo, FSREG:ro, FSREG:ro. + fmr FPR:wo, FPR:ro. + fmr FSREG:wo, FSREG:ro. la GPRI:wo, LABEL:ro. lbzx GPRI:wo, GPR:ro, GPR:ro. lbz GPRI:wo, GPRINDIRECT+GPRINDIRECTLO:ro. - lfd FD:wo, GPRINDIRECT+GPRINDIRECTLO:ro. - lfdu FD:wo, GPRINDIRECT+GPRINDIRECTLO:ro. - lfdx FD:wo, GPR:ro, GPR:ro. - lfs FS:wo, GPRINDIRECT+GPRINDIRECTLO:ro. - lfsu FS:wo, GPRINDIRECT+GPRINDIRECTLO:rw. - lfsx FS:wo, GPR:ro, GPR:ro. + lfd FPR:wo, GPRINDIRECT+GPRINDIRECTLO:ro. + lfdu FPR:wo, GPRINDIRECT+GPRINDIRECTLO:ro. + lfdx FPR:wo, GPR:ro, GPR:ro. + lfs FSREG:wo, GPRINDIRECT+GPRINDIRECTLO:ro. + lfsu FSREG:wo, GPRINDIRECT+GPRINDIRECTLO:rw. + lfsx FSREG:wo, GPR:ro, GPR:ro. lhzx GPRI:wo, GPR:ro, GPR:ro. lhax GPRI:wo, GPR:ro, GPR:ro. lha GPRI:wo, GPRINDIRECT+GPRINDIRECTLO:ro. @@ -297,12 +328,12 @@ INSTRUCTIONS srw GPRI:wo, GPRI:ro, GPRI:ro. stb GPRI:ro, GPRINDIRECT+GPRINDIRECTLO:rw. stbx GPRI:ro, GPR:ro, GPR:ro. - stfd FD:ro, GPRINDIRECT+GPRINDIRECTLO:rw. - stfdu FD:ro, GPRINDIRECT+GPRINDIRECTLO:rw. - stfdx FD:ro, GPR:ro, GPR:ro. - stfs FS:ro, GPRINDIRECT+GPRINDIRECTLO:rw. - stfsu FS:ro, GPRINDIRECT+GPRINDIRECTLO:rw. - stfsx FS:ro, GPR:ro, GPR:ro. + stfd FPR:ro, GPRINDIRECT+GPRINDIRECTLO:rw. + stfdu FPR:ro, GPRINDIRECT+GPRINDIRECTLO:rw. + stfdx FPR:ro, GPR:ro, GPR:ro. + stfs FSREG:ro, GPRINDIRECT+GPRINDIRECTLO:rw. + stfsu FSREG:ro, GPRINDIRECT+GPRINDIRECTLO:rw. + stfsx FSREG:ro, GPR:ro, GPR:ro. sth GPRI:ro, GPRINDIRECT+GPRINDIRECTLO:rw. sthx GPRI:ro, GPR:ro, GPR:ro. stw GPRI:ro, GPRINDIRECT+GPRINDIRECTLO:rw. @@ -474,25 +505,25 @@ MOVES move {LABEL, %1.adr}, SCRATCH lwz %2, {GPRINDIRECT, SCRATCH, 0} - from IND_RC_W smalls(%off) to FS + from IND_RC_W smalls(%off) to FSREG gen - COMMENT("move IND_RC_W->FS small") + COMMENT("move IND_RC_W->FSREG small") lfs %2, {GPRINDIRECT, %1.reg, %1.off} - from IND_RC_W to FS + from IND_RC_W to FSREG gen - COMMENT("move IND_RC_W->FS large") + COMMENT("move IND_RC_W->FSREG large") addis SCRATCH, %1.reg, {CONST, his(%1.off)} lfs %2, {GPRINDIRECT, SCRATCH, los(%1.off)} - from IND_RR_W to FS + from IND_RR_W to FSREG gen - COMMENT("move IND_RR_W->FS") + COMMENT("move IND_RR_W->FSREG") lfsx %2, %1.reg1, %1.reg2 - from IND_LABEL_W to FS + from IND_LABEL_W to FSREG gen - COMMENT("move IND_LABEL_W->FS") + COMMENT("move IND_LABEL_W->FSREG") move {LABEL, %1.adr}, SCRATCH lfs %2, {GPRINDIRECT, SCRATCH, 0} @@ -520,73 +551,73 @@ MOVES move {LABEL, %2.adr}, SCRATCH stw %1, {GPRINDIRECT, SCRATCH, 0} - from FS to IND_RC_W smalls(%off) + from FSREG to IND_RC_W smalls(%off) gen - COMMENT("move FS->IND_RC_W small") + COMMENT("move FSREG->IND_RC_W small") stfs %1, {GPRINDIRECT, %2.reg, %2.off} - from FS to IND_RC_W + from FSREG to IND_RC_W gen - COMMENT("move FS->IND_RC_W large") + COMMENT("move FSREG->IND_RC_W large") addis SCRATCH, %2.reg, {CONST, his(%2.off)} stfs %1, {GPRINDIRECT, SCRATCH, los(%2.off)} - from FS to IND_RR_W + from FSREG to IND_RR_W gen - COMMENT("move FS->IND_RR_W") + COMMENT("move FSREG->IND_RR_W") stfsx %1, %2.reg1, %2.reg2 - from FS to IND_LABEL_W + from FSREG to IND_LABEL_W gen - COMMENT("move FS->IND_LABEL_D") + COMMENT("move FSREG->IND_LABEL_D") move {LABEL, %2.adr}, SCRATCH stfs %1, {GPRINDIRECT, SCRATCH, 0} /* Read double */ - from IND_RC_D smalls(%off) to FD + from IND_RC_D smalls(%off) to FPR gen - COMMENT("move IND_RC_D->FD small") + COMMENT("move IND_RC_D->FPR small") lfd %2, {GPRINDIRECT, %1.reg, %1.off} - from IND_RC_D to FD + from IND_RC_D to FPR gen - COMMENT("move IND_RC_D->FD large") + COMMENT("move IND_RC_D->FPR large") addis SCRATCH, %1.reg, {CONST, his(%1.off)} lfd %2, {GPRINDIRECT, SCRATCH, los(%1.off)} - from IND_RR_D to FD + from IND_RR_D to FPR gen - COMMENT("move IND_RR_D->FD") + COMMENT("move IND_RR_D->FPR") lfdx %2, %1.reg1, %1.reg2 - from IND_LABEL_D to FD + from IND_LABEL_D to FPR gen - COMMENT("move IND_LABEL_D->FD") + COMMENT("move IND_LABEL_D->FPR") move {LABEL, %1.adr}, SCRATCH lfd %2, {GPRINDIRECT, SCRATCH, 0} /* Write double */ - from FD to IND_RC_D smalls(%off) + from FPR to IND_RC_D smalls(%off) gen - COMMENT("move FD->IND_RC_D small") + COMMENT("move FPR->IND_RC_D small") stfd %1, {GPRINDIRECT, %2.reg, %2.off} - from FD to IND_RC_D + from FPR to IND_RC_D gen - COMMENT("move FD->IND_RC_D large") + COMMENT("move FPR->IND_RC_D large") addis SCRATCH, %2.reg, {CONST, his(%2.off)} stfd %1, {GPRINDIRECT, SCRATCH, los(%2.off)} - from FD to IND_RR_D + from FPR to IND_RR_D gen - COMMENT("move FD->IND_RR_W") + COMMENT("move FPR->IND_RR_W") stfdx %1, %2.reg1, %2.reg2 - from FD to IND_LABEL_D + from FPR to IND_LABEL_D gen - COMMENT("move FD->IND_LABEL_D") + COMMENT("move FPR->IND_LABEL_D") move {LABEL, %2.adr}, SCRATCH stfd %1, {GPRINDIRECT, SCRATCH, 0} @@ -628,7 +659,7 @@ MOVES from TRISTATE_FF to CR0 gen COMMENT("move TRISTATE_FF->CR0") - fcmpo %2, {FD, %1.reg1}, {FD, %1.reg2} + fcmpo %2, %1.reg1, %1.reg2 from GPR to CR0 gen @@ -766,17 +797,17 @@ STACKINGRULES from IND_ALL_D to STACK gen - move %1, {FD, FSCRATCH} - stfdu {FD, FSCRATCH}, {GPRINDIRECT, SP, 0-8} + move %1, FSCRATCH + stfdu FSCRATCH, {GPRINDIRECT, SP, 0-8} - from FD to STACK + from FPR to STACK gen - COMMENT("stack FD") + COMMENT("stack FPR") stfdu %1, {GPRINDIRECT, SP, 0-8} - from FS to STACK + from FSREG to STACK gen - COMMENT("stack FS") + COMMENT("stack FSREG") stfsu %1, {GPRINDIRECT, SP, 0-4} from TOKEN to STACK @@ -836,33 +867,33 @@ COERCIONS move %1, {GPRE, %a} yields %a - from FS - uses FREG + from FSREG + uses FSREG gen - fmr {FS, %a}, %1 - yields {FS, %a} + fmr %a, %1 + yields %a - from FD - uses FREG + from FPR + uses FPR gen - fmr {FD, %a}, %1 - yields {FD, %a} + fmr %a, %1 + yields %a from STACK uses FREG gen - COMMENT("coerce STACK->FD") - lfd {FD, %a}, {GPRINDIRECT, SP, 0} + COMMENT("coerce STACK->FREG") + lfd %a, {GPRINDIRECT, SP, 0} addi SP, SP, {CONST, 8} - yields {FD, %a} + yields %a from STACK - uses FREG + uses FSREG gen - COMMENT("coerce STACK->FS") - lfs {FS, %a}, {GPRINDIRECT, SP, 0} + COMMENT("coerce STACK->FSREG") + lfs %a, {GPRINDIRECT, SP, 0} addi SP, SP, {CONST, 4} - yields {FS, %a} + yields %a from IND_ALL_W uses REG @@ -871,16 +902,16 @@ COERCIONS yields %a from IND_ALL_W - uses FREG + uses FSREG gen - move %1, {FS, %a} - yields {FS, %a} + move %1, %a + yields %a from IND_ALL_D uses FREG gen - move %1, {FD, %a} - yields {FD, %a} + move %1, %a + yields %a @@ -1242,27 +1273,27 @@ PATTERNS move %2.reg, {IND_RC_H, %1.reg, %1.off} pat sti $1==INT32 /* Store word indirect */ - with GPR GPR+FS + with GPR GPR+FSREG gen move %2, {IND_RC_W, %1, 0} - with SUM_RR GPR+FS + with SUM_RR GPR+FSREG gen move %2, {IND_RR_W, %1.reg1, %1.reg2} - with SUM_RC GPR+FS + with SUM_RC GPR+FSREG gen move %2, {IND_RC_W, %1.reg, %1.off} - with LABEL GPR+FS + with LABEL GPR+FSREG gen move %2, {IND_LABEL_W, %1.adr} pat sti $1==INT64 /* Store double-word indirect */ - with GPR FD + with GPR FREG gen move %2, {IND_RC_D, %1, 0} - with SUM_RR FD + with SUM_RR FREG gen move %2, {IND_RR_D, %1.reg1, %1.reg2} - with SUM_RC FD + with SUM_RC FREG gen move %2, {IND_RC_D, %1.reg, %1.off} with GPR GPR GPR @@ -1273,7 +1304,7 @@ PATTERNS gen move %2, {IND_RC_W, %1.reg, %1.off} move %3, {IND_RC_W, %1.reg, %1.off+4} - with LABEL FD + with LABEL FREG gen move %2, {IND_LABEL_D, %1.adr} @@ -2004,47 +2035,47 @@ PATTERNS loe ".fs_00000000" pat adf $1==INT32 /* Add single */ - with FS FS - uses reusing %1, FREG + with FSREG FSREG + uses reusing %1, FSREG gen - fadds {FS, %a}, %2, %1 - yields {FS, %a} + fadds %a, %2, %1 + yields %a pat sbf $1==INT32 /* Subtract single */ - with FS FS - uses reusing %1, FREG + with FSREG FSREG + uses reusing %1, FSREG gen - fsubs {FS, %a}, %2, %1 - yields {FS, %a} + fsubs %a, %2, %1 + yields %a pat mlf $1==INT32 /* Multiply single */ - with FS FS - uses reusing %1, FREG + with FSREG FSREG + uses reusing %1, FSREG gen - fmuls {FS, %a}, %2, %1 - yields {FS, %a} + fmuls %a, %2, %1 + yields %a pat dvf $1==INT32 /* Divide single */ - with FS FS - uses reusing %1, FREG + with FSREG FSREG + uses reusing %1, FSREG gen - fdivs {FS, %a}, %2, %1 - yields {FS, %a} + fdivs %a, %2, %1 + yields %a pat ngf $1==INT32 /* Negate single */ - with FS - uses reusing %1, FREG + with FSREG + uses reusing %1, FSREG gen - fneg {FS, %a}, %1 - yields {FS, %a} + fneg %a, %1 + yields %a pat cmf $1==INT32 /* Compare single */ - with FS FS - yields {TRISTATE_FF, %2.reg, %1.reg} + with FSREG FSREG + yields {TRISTATE_FF, %2.1, %1.1} pat loc loc cff $1==INT32 && $2==INT64 /* Convert single to double */ - with FS - yields {FD, %1.reg} + with FSREG + yields %1.1 pat loc loc cfu $1==INT32 && $2==INT32 /* Convert single to unsigned int */ with STACK @@ -2078,50 +2109,50 @@ PATTERNS lde ".fd_00000000" pat adf $1==INT64 /* Add double */ - with FD FD + with FREG FREG uses FREG gen - fadd {FD, %a}, %2, %1 - yields {FD, %a} + fadd %a, %2, %1 + yields %a pat sbf $1==INT64 /* Subtract double */ - with FD FD + with FREG FREG uses FREG gen - fsub {FD, %a}, %2, %1 - yields {FD, %a} + fsub %a, %2, %1 + yields %a pat mlf $1==INT64 /* Multiply double */ - with FD FD + with FREG FREG uses reusing %1, FREG gen - fmul {FD, %a}, %2, %1 - yields {FD, %a} + fmul %a, %2, %1 + yields %a pat dvf $1==INT64 /* Divide double */ - with FD FD + with FREG FREG uses reusing %1, FREG gen - fdiv {FD, %a}, %2, %1 - yields {FD, %a} + fdiv %a, %2, %1 + yields %a pat ngf $1==INT64 /* Negate double */ - with FD + with FREG uses reusing %1, FREG gen - fneg {FD, %a}, %1 - yields {FD, %a} + fneg %a, %1 + yields %a pat cmf $1==INT64 /* Compare double */ - with FD FD - yields {TRISTATE_FF, %2.reg, %1.reg} + with FREG FREG + yields {TRISTATE_FF, %2, %1} pat loc loc cff $1==INT64 && $2==INT32 /* Convert double to single */ - with FD - uses reusing %1, FREG + with FREG + uses reusing %1, FSREG gen - frsp {FS, %a}, %1 - yields {FS, %a} + frsp %a, %1 + yields %a pat loc loc cfu $1==INT64 && $2==INT32 /* Convert double to unsigned int */ with STACK @@ -2145,7 +2176,7 @@ PATTERNS bl {LABEL, ".cuf8"} pat fef $1==INT64 /* Split double */ - with FD + with FREG gen addi SP, SP, {CONST, 0-8} stfd %1, {GPRINDIRECT, SP, 0} diff --git a/util/ncgg/cgg.y b/util/ncgg/cgg.y index 24948abad..17a6cf966 100644 --- a/util/ncgg/cgg.y +++ b/util/ncgg/cgg.y @@ -635,8 +635,8 @@ coderule maxempatlen=empatlen; } patterns - { /* if (!saferulefound) - error("Previous rule impossible on empty stack"); */ + { if (!saferulefound) + error("Previous rule impossible on empty stack"); outpatterns(); } | PROC IDENT example From 5b69777647bef86c0e3cbcf88311498188dfef68 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Sun, 18 Sep 2016 17:03:23 -0400 Subject: [PATCH 05/25] Rename our pseudo-opcode 'la' to 'li32'. GNU as has "la %r4,8(%r3)" as an alias for "addi %r4,%r3,8", meaning to load the address of the thing at 8(%r3). Our 'la', now 'li32', makes an addis/ori pair to load an immediate 32-bit value. For example, "li32 r4,23456789" loads a big number. --- mach/powerpc/as/mach2.c | 2 +- mach/powerpc/as/mach3.c | 2 +- mach/powerpc/as/mach4.c | 4 ++-- mach/powerpc/libem/aar4.s | 2 +- mach/powerpc/libem/cfu8.s | 6 +++--- mach/powerpc/libem/cif8.s | 2 +- mach/powerpc/libem/cuf8.s | 2 +- mach/powerpc/ncg/table | 4 ++-- plat/linuxppc/libsys/_syscall.s | 2 +- plat/linuxppc/libsys/trap.s | 6 +++--- 10 files changed, 16 insertions(+), 16 deletions(-) diff --git a/mach/powerpc/as/mach2.c b/mach/powerpc/as/mach2.c index d1a959fc3..7cd4a4ff4 100644 --- a/mach/powerpc/as/mach2.c +++ b/mach/powerpc/as/mach2.c @@ -80,7 +80,7 @@ %token OP_TO_RA_RB %token OP_TO_RA_SI -%token OP_LA +%token OP_LI32 /* Other token types */ diff --git a/mach/powerpc/as/mach3.c b/mach/powerpc/as/mach3.c index 278a7e607..153e5d4d2 100644 --- a/mach/powerpc/as/mach3.c +++ b/mach/powerpc/as/mach3.c @@ -98,7 +98,7 @@ /* Special instructions */ -0, OP_LA, 0, "la", +0, OP_LI32, 0, "li32", /* Branch processor instructions (page 20) */ diff --git a/mach/powerpc/as/mach4.c b/mach/powerpc/as/mach4.c index acb4abf1b..2fe584992 100644 --- a/mach/powerpc/as/mach4.c +++ b/mach/powerpc/as/mach4.c @@ -58,7 +58,7 @@ operation | OP_LEV u7 { emit4($1 | ($2<<5)); } | OP_LIA lia { emit4($1 | $2); } | OP_LIL lil { emit4($1 | $2); } - | OP_LA la /* emitted in subrule */ + | OP_LI32 li32 /* emitted in subrule */ ; c @@ -193,7 +193,7 @@ bda } ; -la +li32 : GPR ',' expr { newrelo($3.typ, RELOPPC | FIXUPFLAGS); diff --git a/mach/powerpc/libem/aar4.s b/mach/powerpc/libem/aar4.s index 6a48aa058..2c65af643 100644 --- a/mach/powerpc/libem/aar4.s +++ b/mach/powerpc/libem/aar4.s @@ -20,7 +20,7 @@ .define .aar4 .aar4: - la r0, .trap_earray + li32 r0, .trap_earray mtspr ctr, r0 ! load CTR with trap address lwz r0, 0(r3) diff --git a/mach/powerpc/libem/cfu8.s b/mach/powerpc/libem/cfu8.s index 2e082c252..758df8572 100644 --- a/mach/powerpc/libem/cfu8.s +++ b/mach/powerpc/libem/cfu8.s @@ -13,15 +13,15 @@ .define .cfu8 .cfu8: - la r3, .fd_00000000 + li32 r3, .fd_00000000 lfd f0, 0(r3) ! f0 = 0.0 lfd f1, 0(sp) ! value to be converted - la r3, .fd_FFFFFFFF + li32 r3, .fd_FFFFFFFF lfd f3, 0(r3) ! f3 = 0xFFFFFFFF - la r3, .fd_80000000 + li32 r3, .fd_80000000 lfd f4, 0(r3) ! f4 = 0x80000000 fsel f2, f1, f1, f0 diff --git a/mach/powerpc/libem/cif8.s b/mach/powerpc/libem/cif8.s index 2e7b48d17..d2c82e54e 100644 --- a/mach/powerpc/libem/cif8.s +++ b/mach/powerpc/libem/cif8.s @@ -24,7 +24,7 @@ lfd f0, 0(sp) ! load value - la r3, pivot + li32 r3, pivot lfd f1, 0(r3) ! load pivot value fsub f0, f0, f1 ! adjust diff --git a/mach/powerpc/libem/cuf8.s b/mach/powerpc/libem/cuf8.s index ea5ec263a..5d5a12988 100644 --- a/mach/powerpc/libem/cuf8.s +++ b/mach/powerpc/libem/cuf8.s @@ -20,7 +20,7 @@ lfd f0, 0(sp) ! load value - la r3, pivot + li32 r3, pivot lfd f1, 0(r3) ! load pivot value fsub f0, f0, f1 ! adjust diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 0ad8ef006..60cf93c07 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -293,7 +293,6 @@ INSTRUCTIONS fsubs FSREG:wo, FSREG:ro, FSREG:ro. fmr FPR:wo, FPR:ro. fmr FSREG:wo, FSREG:ro. - la GPRI:wo, LABEL:ro. lbzx GPRI:wo, GPR:ro, GPR:ro. lbz GPRI:wo, GPRINDIRECT+GPRINDIRECTLO:ro. lfd FPR:wo, GPRINDIRECT+GPRINDIRECTLO:ro. @@ -306,6 +305,7 @@ INSTRUCTIONS lhax GPRI:wo, GPR:ro, GPR:ro. lha GPRI:wo, GPRINDIRECT+GPRINDIRECTLO:ro. lhz GPRI:wo, GPRINDIRECT+GPRINDIRECTLO:ro. + li32 GPRI:wo, LABEL:ro. lwzu GPRI:wo, GPRINDIRECT+GPRINDIRECTLO:ro. lwzx GPRI:wo, GPR:ro, GPR:ro. lwz GPRI:wo, GPRINDIRECT+GPRINDIRECTLO:ro. @@ -385,7 +385,7 @@ MOVES from LABEL to GPR gen COMMENT("move LABEL->GPR") - la %2, {LABEL, %1.adr} + li32 %2, {LABEL, %1.adr} /* Sign extension */ diff --git a/plat/linuxppc/libsys/_syscall.s b/plat/linuxppc/libsys/_syscall.s index ccafe2460..c7e818830 100644 --- a/plat/linuxppc/libsys/_syscall.s +++ b/plat/linuxppc/libsys/_syscall.s @@ -42,7 +42,7 @@ __syscall: bc IFTRUE, GT, 2f 3: - la r4, _errno + li32 r4, _errno stw r3, 0(r4) addi r3, r0, -1 bclr ALWAYS, 0, 0 diff --git a/plat/linuxppc/libsys/trap.s b/plat/linuxppc/libsys/trap.s index af36acf2d..09d3b0b21 100644 --- a/plat/linuxppc/libsys/trap.s +++ b/plat/linuxppc/libsys/trap.s @@ -65,13 +65,13 @@ EUNIMPL = 63 ! unimplemented em-instruction called addi r4, r0, 1 rlwnm r4, r4, r3, 0, 31 ! calculate trap bit - la r5, .ignmask + li32 r5, .ignmask lwz r5, 0(r5) ! load ignore mask and. r4, r4, r5 ! compare bclr IFFALSE, EQ, 0 ! return if non-zero 1: - la r4, .trppc + li32 r4, .trppc lwz r5, 0(r4) ! load user trap routine or. r5, r5, r5 ! test bc IFTRUE, EQ, fatal ! if no user trap routine, bail out @@ -92,7 +92,7 @@ EUNIMPL = 63 ! unimplemented em-instruction called fatal: addi r3, r0, 1 - la r4, message + li32 r4, message addi r5, r0, 6 addi r0, r0, 4 ! write() sc 0 From 865ef629ddc3bc81d2a276df0cfe64a8ad182c83 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Wed, 21 Sep 2016 16:26:30 -0400 Subject: [PATCH 06/25] Multiple tweaks to plat/linuxppc/descr Don't define __POWERPC. I don't know any other compiler that defines __POWERPC and don't want to invent a new macro. Apple's gcc 4.0.1 from Xcode 2.5 defines __ppc__, _ARCH_PPC, __POWERPC__. Debian's gcc 4.9.2-10 defines _ARCH_PPC, __PPC__, __powerpc__, __PPC, __powerpc, PPC, powerpc. Move the base vm address from 0x80000000 down to 0x10000000, as this is where Debian loads /bin/true. This is still higher than the base addresses for linux386 and linux68k. Sync led's arguments with linux386. --- plat/linuxppc/descr | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/plat/linuxppc/descr b/plat/linuxppc/descr index b70680201..3086a826c 100644 --- a/plat/linuxppc/descr +++ b/plat/linuxppc/descr @@ -1,6 +1,4 @@ -# $Source: /cvsroot/tack/Ack/plat/linux386/descr,v $ -# $State: Exp $ -# $Revision: 1.1 $ +# plat/linuxppc/descr var w=4 var wa=4 @@ -19,8 +17,8 @@ var xa={x} var ARCH=powerpc var PLATFORM=linuxppc var PLATFORMDIR={EM}/share/ack/{PLATFORM} -var CPP_F=-D__unix -D__POWERPC -var ALIGN=-a0:4 -a1:4 -a2:4 -a3:4 -b0:0x80000054 +var CPP_F=-D__unix +var ALIGN=-a0:4 -a1:4 -a2:4 -a3:4 -b0:0x10000054 var C_LIB={PLATFORMDIR}/libc-ansi.a # bitfields reversed for compatibility with (g)cc. var CC_ALIGN=-Vr @@ -65,8 +63,9 @@ name led mapflag -l* LNAME={PLATFORMDIR}/lib* mapflag -fp FLOATS={EM}/{LIB}fp args {ALIGN} {SEPID?} \ - {PLATFORMDIR}/boot.o \ - ({RTS}:.ocm.b.c={PLATFORMDIR}/c-ansi.o) \ + (.e:{HEAD}={PLATFORMDIR}/boot.o) \ + ({RTS}:.ocm.b={PLATFORMDIR}/c-ansi.o) \ + ({RTS}:.c={PLATFORMDIR}/c-ansi.o) \ ({RTS}:.mod={PLATFORMDIR}/modula2.o) \ ({RTS}:.p={PLATFORMDIR}/pascal.o) \ -o > < \ @@ -75,9 +74,10 @@ name led (.mod:{TAIL}={PLATFORMDIR}/libmodula2.a) \ (.ocm:{TAIL}={PLATFORMDIR}/liboccam.a) \ (.ocm.b.mod.c.p:{TAIL}={PLATFORMDIR}/libc.a) \ - {PLATFORMDIR}/libem.a \ - {PLATFORMDIR}/libsys.a \ - {PLATFORMDIR}/libend.a + {FLOATS?} \ + (.e:{TAIL}={PLATFORMDIR}/libem.a \ + {PLATFORMDIR}/libsys.a \ + {PLATFORMDIR}/libend.a) linker end name cv From 1e3dde915ad9356efa7c6d0d0ab430f2063ad3eb Mon Sep 17 00:00:00 2001 From: George Koehler Date: Tue, 27 Sep 2016 16:46:11 -0400 Subject: [PATCH 07/25] Remove the "invalid" stacking rule. When ncg fell back on this rule, it did emit the string "invalid" in the assembly code and caused a syntax error in the assembler. Adjust the stacking rules so we can stack LOCAL, CONST, and LABEL without falling back on the "invalid" rule, and so we can stack them when we have no free register except the scratch register. --- mach/powerpc/ncg/table | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 60cf93c07..547fe070b 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -233,7 +233,6 @@ TOKENS SETS - TOKEN = LABEL + CONST + LOCAL. GPRI = GPR + GPRE. SUM_ALL = SUM_RC + SUM_RR. @@ -348,8 +347,7 @@ INSTRUCTIONS gpr_ro_gpr_gpr GPRI:ro, GPRI:ro, GPRI:ro. gpr_wo_gprindirect GPRI:wo, GPRINDIRECT:ro. gpr_wo_gpr_gpr GPRI:wo, GPRI:ro, GPRI:ro. - - invalid "invalid". + comment "!" LABEL+LABELI:ro. @@ -753,25 +751,28 @@ TESTS STACKINGRULES - + + from LOCAL to STACK + gen + COMMENT("stack LOCAL") + stwu {GPRE, regvar(%1.off)}, {GPRINDIRECT, SP, 0-4} + from GPR to STACK gen COMMENT("stack GPR") stwu %1, {GPRINDIRECT, SP, 0-4} from CONST to STACK - uses REG gen COMMENT("stack CONST") - move %1, %a - stwu %a, {GPRINDIRECT, SP, 0-4} + move %1, SCRATCH + stwu SCRATCH, {GPRINDIRECT, SP, 0-4} from LABEL to STACK - uses REG gen COMMENT("stack LABEL") - move %1, {GPRE, %a} - stwu %a, {GPRINDIRECT, SP, 0-4} + move %1, SCRATCH + stwu SCRATCH, {GPRINDIRECT, SP, 0-4} from SEX_B to STACK gen @@ -809,13 +810,9 @@ STACKINGRULES gen COMMENT("stack FSREG") stfsu %1, {GPRINDIRECT, SP, 0-4} - - from TOKEN to STACK - gen - invalid. - - - + + + COERCIONS from REG From a71eee391420c2e7c284e217190290520d3e1ec3 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Wed, 28 Sep 2016 00:13:35 -0400 Subject: [PATCH 08/25] For "pat ass", move fake stack to real stack before adjusting SP. This fixes code that tried to "addi SP, SP, 4" to drop a value that was in a register, not on the real stack. Add a rule to optimize "asp 4" (which becomes "loc 4" "ass") when the value being dropped is already in a GPR. --- mach/powerpc/ncg/table | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 547fe070b..a5585bed7 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -2004,12 +2004,19 @@ PATTERNS pat str $1==2 /* Store HP */ leaving ste ".reghp" - + + pat loc ass $1==4 /* Drop 4 bytes from stack */ + with exact GPR + /* nop */ + with STACK + gen + addi SP, SP, {CONST, 4} + pat ass /* Adjust stack by variable amount */ - with CONST + with CONST STACK gen move {SUM_RC, SP, %1.val}, {GPRE, SP} - with GPR + with GPR STACK gen move {SUM_RR, SP, %1}, {GPRE, SP} From 6ae415d48b44da1dd8e9d2fe6333bb1bdb85c2cf Mon Sep 17 00:00:00 2001 From: George Koehler Date: Thu, 29 Sep 2016 15:52:54 -0400 Subject: [PATCH 09/25] Rewrite fef 8 in powerpc assembly. In EM, fef splits a float into exponent and fraction. The old C code, given an infinite float, got stuck in an infinite loop. The new assembly code doesn't loop; it extracts the IEEE exponent. --- mach/powerpc/libem/build.lua | 1 - mach/powerpc/libem/fef8.c | 46 ---------------------------- mach/powerpc/libem/fef8.s | 58 ++++++++++++++++++++++++++++++++++++ mach/powerpc/ncg/table | 10 +++---- 4 files changed, 62 insertions(+), 53 deletions(-) delete mode 100644 mach/powerpc/libem/fef8.c create mode 100644 mach/powerpc/libem/fef8.s diff --git a/mach/powerpc/libem/build.lua b/mach/powerpc/libem/build.lua index d17adcd92..56278aa55 100644 --- a/mach/powerpc/libem/build.lua +++ b/mach/powerpc/libem/build.lua @@ -3,7 +3,6 @@ for _, plat in ipairs(vars.plats) do name = "lib_"..plat, srcs = { "./*.s", - "./*.c" }, vars = { plat = plat }, deps = { diff --git a/mach/powerpc/libem/fef8.c b/mach/powerpc/libem/fef8.c deleted file mode 100644 index 244d0fac8..000000000 --- a/mach/powerpc/libem/fef8.c +++ /dev/null @@ -1,46 +0,0 @@ -/* - * $Source$ - * $State$ - * $Revision$ - */ - -/* no headers allowed! */ - -/* Given a double, calculates the mantissa and exponent. - * - * This function is intended to be called internally by the code generator, - * so the calling convention is odd. - */ - -int __fef8(double* fp) -{ - double f = *fp; - int exponent, sign; - - if (f == 0.0) - return 0; - - if (f < 0.0) - { - sign = -1; - f = -f; - } - else - sign = 0; - - exponent = 0; - while (f >= 1.0) - { - f /= 2.0; - exponent++; - } - - while (f < 0.5) - { - f *= 2.0; - exponent--; - } - - *fp = (sign) ? -f : f; - return exponent; -} diff --git a/mach/powerpc/libem/fef8.s b/mach/powerpc/libem/fef8.s new file mode 100644 index 000000000..fc72b04f2 --- /dev/null +++ b/mach/powerpc/libem/fef8.s @@ -0,0 +1,58 @@ +#include "powerpc.h" + +.sect .text + +! Split a double-precision float into fraction and exponent, like +! frexp(3) in C. On entry: +! r3 = float, high word (bits 0..31) +! r4 = float, low word (bits 32..63) +! Yields: +! r3 = fraction, high word (bits 0..31) +! r4 = fraction, low word (bits 32..63) +! r5 = exponent +! Kills: cr0 f0 f1 r6 r7 + +.define .fef8 +.fef8: + ! IEEE double-precision format: + ! sign exponent fraction + ! 0 1..11 12..63 + rlwinm r6, r3, 12, 21, 31 ! r6 = IEEE exponent + addis r7, r0, 0x7ff0 ! r7 = exponent mask + addi r5, r6, -1022 ! r5 = true exponent + cmpi cr0, 0, r6, 2047 + bclr IFTRUE, EQ, 0 ! return if infinity or NaN + cmpi cr0, 0, r6, 0 + bc IFFALSE, EQ, 1f ! jump if normalized number + + ! Got denormalized number or zero, probably zero. + rlwinm r6, r3, 0, 12, 31 + addi r5, r0, 0 ! r5 = true exponent = 0 + or. r6, r6, r4 ! r6 = high|low fraction + bclr IFTRUE, EQ, 0 ! return if zero + + ! Got denormalized number, not zero. + stwu r4, -4(sp) + stwu r3, -4(sp) + li32 r6, _2_64 + lfd f0, 0(sp) + lfd f1, 0(r6) + fmul f0, f0, f1 ! multiply it by 2**64 + stfd f0, 0(sp) + lwz r3, 0(sp) + lwz r4, 4(sp) + rlwinm r6, r3, 12, 21, 31 ! r6 = IEEE exponent + addi sp, sp, 8 + addi r5, r6, -1022 - 64 ! r5 = true exponent +1: + ! Put fraction in [0.5, 1) or (-1, -0.5] by setting its + ! exponent to true 0, IEEE 1022. + andc r3, r3, r7 ! clear old exponent + oris r3, r3, 1022 << 4 ! set new exponent + bclr ALWAYS, 0, 0 + +.sect .rom +_2_64: + ! (double) 2**64 + .data4 0x43f00000 + .data4 0x00000000 diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index a5585bed7..93b7722ee 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -2180,13 +2180,11 @@ PATTERNS bl {LABEL, ".cuf8"} pat fef $1==INT64 /* Split double */ - with FREG + with GPR3 GPR4 + kills FPR0, FPR1, GPR6, GPR7 gen - addi SP, SP, {CONST, 0-8} - stfd %1, {GPRINDIRECT, SP, 0} - stwu SP, {GPRINDIRECT, SP, 0-4} - bl {LABEL, "___fef8"} - stw R3, {GPRINDIRECT, SP, 0} + bl {LABEL, ".fef8"} + yields R4 R3 R5 pat fif $1==INT64 /* Multiply and split double (?) */ with STACK From e22c8881e7e0fc683e3fddb8aa9844f4a4c7e8ff Mon Sep 17 00:00:00 2001 From: George Koehler Date: Fri, 30 Sep 2016 11:50:50 -0400 Subject: [PATCH 10/25] Add a rule for sdl ldl $1==$2 to work around a bug. In our powerpc table, sdl fails to kill the old value of the local. This is a bug, because a later ldl can load the old value instead of the newly stored value. By rewriting "sdl 0" "ldl 0" as "dup 8" "sdl 0", the newly added rule works around the bug, but only when the ldl is immediately after the sdl. This rule improves code that uses double-precision floating point. The output of printf("%f", 6.0) in C changes from all zero digits to "6000000" but still doesn't print the decimal point. The result of atof("-123.456") becomes correct. In startrek, I can now move the Enterprise, but I still can't fire phasers without crashing the game. We already have a rule for stl lol $1==$2. We had two copies of the rule, so I am deleting the second copy. --- mach/powerpc/ncg/table | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 93b7722ee..1cb51c2d5 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -923,10 +923,14 @@ PATTERNS pat dup $1==INT32 /* Duplicate word on top of stack */ with GPR yields %1 %1 + with FSREG + yields %1 %1 pat dup $1==INT64 /* Duplicate double-word on top of stack */ with GPR GPR yields %2 %1 %2 %1 + with FREG + yields %1 %1 pat exg $1==INT32 /* Exchange top two words on stack */ with GPR GPR @@ -936,7 +940,12 @@ PATTERNS leaving dup 4 stl $1 - + + pat sdl ldl $1==$2 /* Store then load double local */ + leaving + dup 8 + sdl $1 + pat lal sti lal loi $1==$3 && $2==$4 /* Store then load local, of a different size */ leaving dup INT32 @@ -1045,12 +1054,7 @@ PATTERNS leaving lol $1 sti INT32 - - pat stl lol $1==$2 /* Save then load (generated by C compiler) */ - leaving - dup 4 - stl $1 - + pat zrl /* Zero local */ leaving loc 0 From b427d33f9f093f4e2b342b6025b94a360a76e2d0 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Fri, 30 Sep 2016 13:21:42 -0400 Subject: [PATCH 11/25] Define the begdata, begrom, begbss symbols for linuxppc. I copied the definitions from linux386 and linux68k. This change also moves _errno and the other common symbols in boot.s from .text to .bss. Common symbols belong in .bss, but the assembler seems dumb enough to put them in any section. --- plat/linuxppc/boot.s | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/plat/linuxppc/boot.s b/plat/linuxppc/boot.s index 66cb38306..b188b0404 100644 --- a/plat/linuxppc/boot.s +++ b/plat/linuxppc/boot.s @@ -41,7 +41,14 @@ begtext: stwu r3, -4(sp) b __m_a_i_n - + +! Define symbols at the beginning of our various segments, so that we can find +! them. (Except .text, which has already been done.) + +.sect .data; begdata: +.sect .rom; begrom: +.sect .bss; begbss: + ! Some magic data. All EM systems need these. .define _errno From ce5faba91953f5d4340199b2246bef68b620a6e7 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Fri, 30 Sep 2016 13:40:36 -0400 Subject: [PATCH 12/25] Remove .linenumber and .filename; use hol0 and hol0+4. We need this because some .e files in lang/ are using 'loe 0' and 'lae 4' to load the line number from hol0 and filename from hol0+4. --- mach/powerpc/ncg/table | 6 +++--- plat/linuxppc/boot.s | 4 ---- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 1cb51c2d5..6d2fc40b7 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -1893,16 +1893,16 @@ PATTERNS pat fil /* Set current filename */ leaving lae $1 - ste ".filename" + ste "hol0+4" pat lin /* Set current line number */ leaving loc $1 - ste ".linenumber" + ste "hol0" pat lni /* Increment line number */ leaving - ine ".linenumber" + ine "hol0" pat lim /* Load EM trap ignore mask */ leaving diff --git a/plat/linuxppc/boot.s b/plat/linuxppc/boot.s index b188b0404..2da5dd556 100644 --- a/plat/linuxppc/boot.s +++ b/plat/linuxppc/boot.s @@ -57,7 +57,3 @@ begtext: .define .trppc, .ignmask .comm .trppc, 4 ! ptr to user trap handler .comm .ignmask, 4 ! user trap ignore mask - -.define .linenumber, .filename -.comm .linenumber, 4 ! current linenumber (used for debugging) -.comm .filename, 4 ! ptr to current filename (used for debugging) From 7cccd88b71bb7ee82e75ac13dc1eb797951f2836 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Thu, 6 Oct 2016 20:47:42 -0400 Subject: [PATCH 13/25] Rename SCRATCH to RSCRATCH. Never stack RSCRATCH nor FSCRATCH. Rename the scratch gpr (currently r11) from SCRATCH to RSCRATCH so I can search for RSCRATCH without finding FSCRATCH. I also want to avoid confusion with the SCRATCH keyword of the old code generator (cg which came before ncg). Change the stacking rules to prevent stacking of RSCRATCH or FSCRATCH or any other GPR or FPR that isn't an allocatable REG or FREG. Then ncgg rejects any rule that tries to stack a GPR or FPR, so change such rules to stack a REG or FREG. --- mach/powerpc/ncg/table | 162 ++++++++++++++++++++--------------------- 1 file changed, 81 insertions(+), 81 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 6d2fc40b7..6b03ffa90 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -167,7 +167,7 @@ REGISTERS CTR("ctr") : SPR. C0("cr0") : CR, CR0. -#define SCRATCH R11 +#define RSCRATCH R11 #define FSCRATCH F0 @@ -430,8 +430,8 @@ MOVES from IND_RC_B to GPR gen COMMENT("move IND_RC_B->GPR large") - addis SCRATCH, %1.reg, {CONST, his(%1.off)} - lbz %2, {GPRINDIRECT, SCRATCH, los(%1.off)} + addis RSCRATCH, %1.reg, {CONST, his(%1.off)} + lbz %2, {GPRINDIRECT, RSCRATCH, los(%1.off)} from GPR to IND_RC_B smalls(%off) gen @@ -441,8 +441,8 @@ MOVES from GPR to IND_RC_B gen COMMENT("move GPR->IND_RC_B large") - addis SCRATCH, %2.reg, {CONST, his(%2.off)} - stb %1, {GPRINDIRECT, SCRATCH, los(%2.off)} + addis RSCRATCH, %2.reg, {CONST, his(%2.off)} + stb %1, {GPRINDIRECT, RSCRATCH, los(%2.off)} /* Read/write short */ @@ -454,8 +454,8 @@ MOVES from IND_RC_H to GPR gen COMMENT("move IND_RC_H->GPR large") - addis SCRATCH, %1.reg, {CONST, his(%1.off)} - lhz %2, {GPRINDIRECT, SCRATCH, los(%1.off)} + addis RSCRATCH, %1.reg, {CONST, his(%1.off)} + lhz %2, {GPRINDIRECT, RSCRATCH, los(%1.off)} from IND_RC_H_S smalls(%off) to GPR gen @@ -465,8 +465,8 @@ MOVES from IND_RC_H_S to GPR gen COMMENT("move IND_RC_H_S->GPR large") - addis SCRATCH, %1.reg, {CONST, his(%1.off)} - lha %2, {GPRINDIRECT, SCRATCH, los(%1.off)} + addis RSCRATCH, %1.reg, {CONST, his(%1.off)} + lha %2, {GPRINDIRECT, RSCRATCH, los(%1.off)} from GPR to IND_RC_H smalls(%off) gen @@ -476,8 +476,8 @@ MOVES from GPR to IND_RC_H gen COMMENT("move GPR->IND_RC_H large") - addis SCRATCH, %2.reg, {CONST, his(%2.off)} - sth %1, {GPRINDIRECT, SCRATCH, los(%2.off)} + addis RSCRATCH, %2.reg, {CONST, his(%2.off)} + sth %1, {GPRINDIRECT, RSCRATCH, los(%2.off)} /* Read word */ @@ -500,8 +500,8 @@ MOVES from IND_LABEL_W to GPR gen COMMENT("move IND_LABEL_W->GPR") - move {LABEL, %1.adr}, SCRATCH - lwz %2, {GPRINDIRECT, SCRATCH, 0} + move {LABEL, %1.adr}, RSCRATCH + lwz %2, {GPRINDIRECT, RSCRATCH, 0} from IND_RC_W smalls(%off) to FSREG gen @@ -511,8 +511,8 @@ MOVES from IND_RC_W to FSREG gen COMMENT("move IND_RC_W->FSREG large") - addis SCRATCH, %1.reg, {CONST, his(%1.off)} - lfs %2, {GPRINDIRECT, SCRATCH, los(%1.off)} + addis RSCRATCH, %1.reg, {CONST, his(%1.off)} + lfs %2, {GPRINDIRECT, RSCRATCH, los(%1.off)} from IND_RR_W to FSREG gen @@ -522,8 +522,8 @@ MOVES from IND_LABEL_W to FSREG gen COMMENT("move IND_LABEL_W->FSREG") - move {LABEL, %1.adr}, SCRATCH - lfs %2, {GPRINDIRECT, SCRATCH, 0} + move {LABEL, %1.adr}, RSCRATCH + lfs %2, {GPRINDIRECT, RSCRATCH, 0} /* Write word */ @@ -535,8 +535,8 @@ MOVES from GPR to IND_RC_W gen COMMENT("move GPR->IND_RC_W large") - addis SCRATCH, %2.reg, {CONST, his(%2.off)} - stw %1, {GPRINDIRECT, SCRATCH, los(%2.off)} + addis RSCRATCH, %2.reg, {CONST, his(%2.off)} + stw %1, {GPRINDIRECT, RSCRATCH, los(%2.off)} from GPR to IND_RR_W gen @@ -546,8 +546,8 @@ MOVES from GPR to IND_LABEL_W gen COMMENT("move GPR->IND_LABEL_D") - move {LABEL, %2.adr}, SCRATCH - stw %1, {GPRINDIRECT, SCRATCH, 0} + move {LABEL, %2.adr}, RSCRATCH + stw %1, {GPRINDIRECT, RSCRATCH, 0} from FSREG to IND_RC_W smalls(%off) gen @@ -557,8 +557,8 @@ MOVES from FSREG to IND_RC_W gen COMMENT("move FSREG->IND_RC_W large") - addis SCRATCH, %2.reg, {CONST, his(%2.off)} - stfs %1, {GPRINDIRECT, SCRATCH, los(%2.off)} + addis RSCRATCH, %2.reg, {CONST, his(%2.off)} + stfs %1, {GPRINDIRECT, RSCRATCH, los(%2.off)} from FSREG to IND_RR_W gen @@ -568,8 +568,8 @@ MOVES from FSREG to IND_LABEL_W gen COMMENT("move FSREG->IND_LABEL_D") - move {LABEL, %2.adr}, SCRATCH - stfs %1, {GPRINDIRECT, SCRATCH, 0} + move {LABEL, %2.adr}, RSCRATCH + stfs %1, {GPRINDIRECT, RSCRATCH, 0} /* Read double */ @@ -581,8 +581,8 @@ MOVES from IND_RC_D to FPR gen COMMENT("move IND_RC_D->FPR large") - addis SCRATCH, %1.reg, {CONST, his(%1.off)} - lfd %2, {GPRINDIRECT, SCRATCH, los(%1.off)} + addis RSCRATCH, %1.reg, {CONST, his(%1.off)} + lfd %2, {GPRINDIRECT, RSCRATCH, los(%1.off)} from IND_RR_D to FPR gen @@ -592,8 +592,8 @@ MOVES from IND_LABEL_D to FPR gen COMMENT("move IND_LABEL_D->FPR") - move {LABEL, %1.adr}, SCRATCH - lfd %2, {GPRINDIRECT, SCRATCH, 0} + move {LABEL, %1.adr}, RSCRATCH + lfd %2, {GPRINDIRECT, RSCRATCH, 0} /* Write double */ @@ -605,8 +605,8 @@ MOVES from FPR to IND_RC_D gen COMMENT("move FPR->IND_RC_D large") - addis SCRATCH, %2.reg, {CONST, his(%2.off)} - stfd %1, {GPRINDIRECT, SCRATCH, los(%2.off)} + addis RSCRATCH, %2.reg, {CONST, his(%2.off)} + stfd %1, {GPRINDIRECT, RSCRATCH, los(%2.off)} from FPR to IND_RR_D gen @@ -616,8 +616,8 @@ MOVES from FPR to IND_LABEL_D gen COMMENT("move FPR->IND_LABEL_D") - move {LABEL, %2.adr}, SCRATCH - stfd %1, {GPRINDIRECT, SCRATCH, 0} + move {LABEL, %2.adr}, RSCRATCH + stfd %1, {GPRINDIRECT, RSCRATCH, 0} /* Extract condition code field (actually produces (CC&3)<<2) */ @@ -640,8 +640,8 @@ MOVES from TRISTATE_RC_S to CR0 gen COMMENT("move TRISTATE_RC_S->CR0 large") - move {CONST, %1.val}, SCRATCH - cmp %2, {CONST, 0}, %1.reg, SCRATCH + move {CONST, %1.val}, RSCRATCH + cmp %2, {CONST, 0}, %1.reg, RSCRATCH from TRISTATE_RC_U smallu(%val) to CR0 gen @@ -651,8 +651,8 @@ MOVES from TRISTATE_RC_U to CR0 gen COMMENT("move TRISTATE_RC_U->CR0") - move {CONST, %1.val}, SCRATCH - cmpl %2, {CONST, 0}, %1.reg, SCRATCH + move {CONST, %1.val}, RSCRATCH + cmpl %2, {CONST, 0}, %1.reg, RSCRATCH from TRISTATE_FF to CR0 gen @@ -662,23 +662,23 @@ MOVES from GPR to CR0 gen COMMENT("move GPR->CR0") - orX SCRATCH, %1, %1 /* alas, can't call test */ + orX RSCRATCH, %1, %1 /* alas, can't call test */ from TRISTATE_RR_S + TRISTATE_RC_S + TRISTATE_FF to GPR gen COMMENT("move TRISTATE_R*_S->GPR") move %1, C0 - move C0, SCRATCH + move C0, RSCRATCH move {LABEL, ".tristate_s_table"}, %2 - lwzx %2, %2, SCRATCH + lwzx %2, %2, RSCRATCH from TRISTATE_RR_U + TRISTATE_RC_U to GPR gen COMMENT("move TRISTATE_R*_U->GPR") move %1, C0 - move C0, SCRATCH + move C0, RSCRATCH move {LABEL, ".tristate_u_table"}, %2 - lwzx %2, %2, SCRATCH + lwzx %2, %2, RSCRATCH /* Logicals */ @@ -700,8 +700,8 @@ MOVES from AND_RC to GPR gen COMMENT("move AND_RC->GPR") - move {CONST, %1.val}, SCRATCH - and %2, %1.reg, SCRATCH + move {CONST, %1.val}, RSCRATCH + and %2, %1.reg, RSCRATCH from OR_RR to GPR gen @@ -716,8 +716,8 @@ MOVES from OR_RC to GPR gen COMMENT("move OR_RC->GPR") - move {CONST, %1.val}, SCRATCH - or %2, %1.reg, SCRATCH + move {CONST, %1.val}, RSCRATCH + or %2, %1.reg, RSCRATCH from XOR_RR to GPR gen @@ -732,8 +732,8 @@ MOVES from XOR_RC to GPR gen COMMENT("move XOR_RC->GPR") - move {CONST, %1.val}, SCRATCH - xor %2, %1.reg, SCRATCH + move {CONST, %1.val}, RSCRATCH + xor %2, %1.reg, RSCRATCH /* Miscellaneous */ @@ -746,7 +746,7 @@ TESTS to test GPR gen - orX SCRATCH, %1, %1 + orX RSCRATCH, %1, %1 @@ -757,51 +757,51 @@ STACKINGRULES COMMENT("stack LOCAL") stwu {GPRE, regvar(%1.off)}, {GPRINDIRECT, SP, 0-4} - from GPR to STACK + from REG to STACK gen - COMMENT("stack GPR") + COMMENT("stack REG") stwu %1, {GPRINDIRECT, SP, 0-4} from CONST to STACK gen COMMENT("stack CONST") - move %1, SCRATCH - stwu SCRATCH, {GPRINDIRECT, SP, 0-4} + move %1, RSCRATCH + stwu RSCRATCH, {GPRINDIRECT, SP, 0-4} from LABEL to STACK gen COMMENT("stack LABEL") - move %1, SCRATCH - stwu SCRATCH, {GPRINDIRECT, SP, 0-4} + move %1, RSCRATCH + stwu RSCRATCH, {GPRINDIRECT, SP, 0-4} from SEX_B to STACK gen COMMENT("stack SEX_B") - extsb SCRATCH, %1.reg - stwu SCRATCH, {GPRINDIRECT, SP, 0-4} + extsb RSCRATCH, %1.reg + stwu RSCRATCH, {GPRINDIRECT, SP, 0-4} from SEX_H to STACK gen COMMENT("stack SEX_H") - extsh SCRATCH, %1.reg - stwu SCRATCH, {GPRINDIRECT, SP, 0-4} + extsh RSCRATCH, %1.reg + stwu RSCRATCH, {GPRINDIRECT, SP, 0-4} from SUM_ALL + TRISTATE_ALL + LOGICAL_ALL to STACK gen - move %1, {GPRE, SCRATCH} - stwu SCRATCH, {GPRINDIRECT, SP, 0-4} + move %1, {GPRE, RSCRATCH} + stwu RSCRATCH, {GPRINDIRECT, SP, 0-4} from IND_ALL_W to STACK gen - move %1, SCRATCH - stwu SCRATCH, {GPRINDIRECT, SP, 0-4} + move %1, RSCRATCH + stwu RSCRATCH, {GPRINDIRECT, SP, 0-4} from IND_ALL_D to STACK gen move %1, FSCRATCH stfdu FSCRATCH, {GPRINDIRECT, SP, 0-8} - from FPR to STACK + from FREG to STACK gen COMMENT("stack FPR") stfdu %1, {GPRINDIRECT, SP, 0-8} @@ -870,8 +870,8 @@ COERCIONS fmr %a, %1 yields %a - from FPR - uses FPR + from FREG + uses FREG gen fmr %a, %1 yields %a @@ -921,19 +921,19 @@ PATTERNS yields {CONST, $1} pat dup $1==INT32 /* Duplicate word on top of stack */ - with GPR + with REG yields %1 %1 with FSREG yields %1 %1 pat dup $1==INT64 /* Duplicate double-word on top of stack */ - with GPR GPR + with REG REG yields %2 %1 %2 %1 with FREG yields %1 %1 pat exg $1==INT32 /* Exchange top two words on stack */ - with GPR GPR + with REG REG yields %1 %2 pat stl lol $1==$2 /* Store then load local */ @@ -1635,9 +1635,9 @@ PATTERNS uses reusing %1, REG gen move %1, C0 - move C0, SCRATCH + move C0, RSCRATCH move {LABEL, ".teq_table"}, %a - lwzx %a, %a, SCRATCH + lwzx %a, %a, RSCRATCH yields %a pat tne /* top = (top != 0) */ @@ -1645,9 +1645,9 @@ PATTERNS uses reusing %1, REG gen move %1, C0 - move C0, SCRATCH + move C0, RSCRATCH move {LABEL, ".tne_table"}, %a - lwzx %a, %a, SCRATCH + lwzx %a, %a, RSCRATCH yields %a pat tlt /* top = (top < 0) */ @@ -1655,9 +1655,9 @@ PATTERNS uses reusing %1, REG gen move %1, C0 - move C0, SCRATCH + move C0, RSCRATCH move {LABEL, ".tlt_table"}, %a - lwzx %a, %a, SCRATCH + lwzx %a, %a, RSCRATCH yields %a pat tle /* top = (top <= 0) */ @@ -1665,9 +1665,9 @@ PATTERNS uses reusing %1, REG gen move %1, C0 - move C0, SCRATCH + move C0, RSCRATCH move {LABEL, ".tle_table"}, %a - lwzx %a, %a, SCRATCH + lwzx %a, %a, RSCRATCH yields %a pat tgt /* top = (top > 0) */ @@ -1675,9 +1675,9 @@ PATTERNS uses reusing %1, REG gen move %1, C0 - move C0, SCRATCH + move C0, RSCRATCH move {LABEL, ".tgt_table"}, %a - lwzx %a, %a, SCRATCH + lwzx %a, %a, RSCRATCH yields %a pat tge /* top = (top >= 0) */ @@ -1685,9 +1685,9 @@ PATTERNS uses reusing %1, REG gen move %1, C0 - move C0, SCRATCH + move C0, RSCRATCH move {LABEL, ".tge_table"}, %a - lwzx %a, %a, SCRATCH + lwzx %a, %a, RSCRATCH yields %a From 409ba7fb1b1e96e7e6e0f83fe45fe6c231f2e216 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Thu, 6 Oct 2016 22:59:27 -0400 Subject: [PATCH 14/25] Remove most of GPRE from mach/powerpc/ncg/table We only need GPRE in a few places where we write {GPRE, regvar(...)} because ncgg can't parse plain regvar(...). In all other places, a plain GPR works. Also remove gpr_gpr_gpr and a few other unused and fake instructions from the list of instructions. --- mach/powerpc/ncg/table | 138 ++++++++++++++++++----------------------- 1 file changed, 62 insertions(+), 76 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 6b03ffa90..b36a29f2f 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -233,8 +233,6 @@ TOKENS SETS - GPRI = GPR + GPRE. - SUM_ALL = SUM_RC + SUM_RR. TRISTATE_ALL = TRISTATE_RC_S + TRISTATE_RC_U + TRISTATE_RR_S + @@ -255,29 +253,29 @@ SETS INSTRUCTIONS - add GPRI:wo, GPRI:ro, GPRI:ro. - addX "add." GPRI:wo, GPRI:ro, GPRI:ro. - addi GPRI:wo, GPRI:ro, CONST:ro. - addis GPRI:wo, GPRI:ro, CONST+HILABEL:ro. - and GPRI:wo, GPRI:ro, GPRI:ro. - andc GPRI:wo, GPRI:ro, GPRI:ro. - andiX "andi." GPRI:wo, GPRI:ro, CONST:ro kills :cc. - andisX "andis." GPRI:wo, GPRI:ro, CONST:ro kills :cc. + add GPR:wo, GPR:ro, GPR:ro. + addX "add." GPR:wo, GPR:ro, GPR:ro. + addi GPR:wo, GPR:ro, CONST:ro. + addis GPR:wo, GPR:ro, CONST+HILABEL:ro. + and GPR:wo, GPR:ro, GPR:ro. + andc GPR:wo, GPR:ro, GPR:ro. + andiX "andi." GPR:wo, GPR:ro, CONST:ro kills :cc. + andisX "andis." GPR:wo, GPR:ro, CONST:ro kills :cc. b LABEL:ro. bc CONST:ro, CONST:ro, LABEL:ro. bcctr CONST:ro, CONST:ro, CONST:ro. bcctrl CONST:ro, CONST:ro, CONST:ro. bclr CONST:ro, CONST:ro, CONST:ro. bl LABEL:ro. - cmp CR:ro, CONST:ro, GPRI:ro, GPR:ro kills :cc. - cmpi CR:ro, CONST:ro, GPRI:ro, CONST:ro kills :cc. - cmpl CR:ro, CONST:ro, GPRI:ro, GPR:ro kills :cc. - cmpli CR:ro, CONST:ro, GPRI:ro, CONST:ro kills :cc. - divw GPRI:wo, GPRI:ro, GPRI:ro. - divwu GPRI:wo, GPRI:ro, GPRI:ro. - eqv GPRI:wo, GPRI:ro, GPRI:ro. - extsb GPRI:wo, GPRI:ro. - extsh GPRI:wo, GPRI:ro. + cmp CR:ro, CONST:ro, GPR:ro, GPR:ro kills :cc. + cmpi CR:ro, CONST:ro, GPR:ro, CONST:ro kills :cc. + cmpl CR:ro, CONST:ro, GPR:ro, GPR:ro kills :cc. + cmpli CR:ro, CONST:ro, GPR:ro, CONST:ro kills :cc. + divw GPR:wo, GPR:ro, GPR:ro. + divwu GPR:wo, GPR:ro, GPR:ro. + eqv GPR:wo, GPR:ro, GPR:ro. + extsb GPR:wo, GPR:ro. + extsh GPR:wo, GPR:ro. fadd FREG:wo, FREG:ro, FREG:ro. fadds FSREG:wo, FSREG:ro, FSREG:ro. fcmpo CR:wo, FPR:ro, FPR:ro. @@ -292,61 +290,54 @@ INSTRUCTIONS fsubs FSREG:wo, FSREG:ro, FSREG:ro. fmr FPR:wo, FPR:ro. fmr FSREG:wo, FSREG:ro. - lbzx GPRI:wo, GPR:ro, GPR:ro. - lbz GPRI:wo, GPRINDIRECT+GPRINDIRECTLO:ro. + lbzx GPR:wo, GPR:ro, GPR:ro. + lbz GPR:wo, GPRINDIRECT+GPRINDIRECTLO:ro. lfd FPR:wo, GPRINDIRECT+GPRINDIRECTLO:ro. lfdu FPR:wo, GPRINDIRECT+GPRINDIRECTLO:ro. lfdx FPR:wo, GPR:ro, GPR:ro. lfs FSREG:wo, GPRINDIRECT+GPRINDIRECTLO:ro. lfsu FSREG:wo, GPRINDIRECT+GPRINDIRECTLO:rw. lfsx FSREG:wo, GPR:ro, GPR:ro. - lhzx GPRI:wo, GPR:ro, GPR:ro. - lhax GPRI:wo, GPR:ro, GPR:ro. - lha GPRI:wo, GPRINDIRECT+GPRINDIRECTLO:ro. - lhz GPRI:wo, GPRINDIRECT+GPRINDIRECTLO:ro. - li32 GPRI:wo, LABEL:ro. - lwzu GPRI:wo, GPRINDIRECT+GPRINDIRECTLO:ro. - lwzx GPRI:wo, GPR:ro, GPR:ro. - lwz GPRI:wo, GPRINDIRECT+GPRINDIRECTLO:ro. - nand GPRI:wo, GPRI:ro, GPRI:ro. - neg GPRI:wo, GPRI:ro. - nor GPRI:wo, GPRI:ro, GPRI:ro. - mfcr GPRI:wo. - mullw GPRI:wo, GPRI:ro, GPRI:ro. - mfspr GPRI:wo, SPR:ro. - mtspr SPR:wo, GPRI:ro. - or GPRI:wo, GPRI:ro, GPRI:ro. - orc GPRI:wo, GPRI:ro, GPRI:ro. - ori GPRI:wo, GPRI:ro, CONST+LOLABEL:ro. - orX "or." GPRI:wo, GPRI:ro, GPRI:ro kills :cc. - rlwinm GPRI:wo, GPRI:ro, CONST:ro, CONST:ro, CONST:ro. - slw GPRI:wo, GPRI:ro, GPRI:ro. - subf GPRI:wo, GPRI:ro, GPRI:ro. - sraw GPRI:wo, GPRI:ro, GPRI:ro. - srawi GPRI:wo, GPRI:ro, CONST:ro. - srw GPRI:wo, GPRI:ro, GPRI:ro. - stb GPRI:ro, GPRINDIRECT+GPRINDIRECTLO:rw. - stbx GPRI:ro, GPR:ro, GPR:ro. + lhzx GPR:wo, GPR:ro, GPR:ro. + lhax GPR:wo, GPR:ro, GPR:ro. + lha GPR:wo, GPRINDIRECT+GPRINDIRECTLO:ro. + lhz GPR:wo, GPRINDIRECT+GPRINDIRECTLO:ro. + li32 GPR:wo, LABEL:ro. + lwzu GPR:wo, GPRINDIRECT+GPRINDIRECTLO:ro. + lwzx GPR:wo, GPR:ro, GPR:ro. + lwz GPR:wo, GPRINDIRECT+GPRINDIRECTLO:ro. + nand GPR:wo, GPR:ro, GPR:ro. + neg GPR:wo, GPR:ro. + nor GPR:wo, GPR:ro, GPR:ro. + mfcr GPR:wo. + mullw GPR:wo, GPR:ro, GPR:ro. + mfspr GPR:wo, SPR:ro. + mtspr SPR:wo, GPR:ro. + or GPR:wo, GPR:ro, GPR:ro. + orc GPR:wo, GPR:ro, GPR:ro. + ori GPR:wo, GPR:ro, CONST+LOLABEL:ro. + orX "or." GPR:wo, GPR:ro, GPR:ro kills :cc. + rlwinm GPR:wo, GPR:ro, CONST:ro, CONST:ro, CONST:ro. + slw GPR:wo, GPR:ro, GPR:ro. + subf GPR:wo, GPR:ro, GPR:ro. + sraw GPR:wo, GPR:ro, GPR:ro. + srawi GPR:wo, GPR:ro, CONST:ro. + srw GPR:wo, GPR:ro, GPR:ro. + stb GPR:ro, GPRINDIRECT+GPRINDIRECTLO:rw. + stbx GPR:ro, GPR:ro, GPR:ro. stfd FPR:ro, GPRINDIRECT+GPRINDIRECTLO:rw. stfdu FPR:ro, GPRINDIRECT+GPRINDIRECTLO:rw. stfdx FPR:ro, GPR:ro, GPR:ro. stfs FSREG:ro, GPRINDIRECT+GPRINDIRECTLO:rw. stfsu FSREG:ro, GPRINDIRECT+GPRINDIRECTLO:rw. stfsx FSREG:ro, GPR:ro, GPR:ro. - sth GPRI:ro, GPRINDIRECT+GPRINDIRECTLO:rw. - sthx GPRI:ro, GPR:ro, GPR:ro. - stw GPRI:ro, GPRINDIRECT+GPRINDIRECTLO:rw. - stwx GPRI:ro, GPR:ro, GPR:ro. - stwu GPRI:ro, GPRINDIRECT+GPRINDIRECTLO:rw. - xor GPRI:wo, GPRI:ro, GPRI:ro. - xori GPRI:wo, GPRI:ro, CONST:ro. - - gpr_gpr_gpr GPRI:wo, GPRI:ro, GPRI:ro. - gpr_gpr_si GPRI:wo, GPRI:ro, CONST:ro. - gpr_ro_gprindirect GPRI:ro, GPRINDIRECT:rw. - gpr_ro_gpr_gpr GPRI:ro, GPRI:ro, GPRI:ro. - gpr_wo_gprindirect GPRI:wo, GPRINDIRECT:ro. - gpr_wo_gpr_gpr GPRI:wo, GPRI:ro, GPRI:ro. + sth GPR:ro, GPRINDIRECT+GPRINDIRECTLO:rw. + sthx GPR:ro, GPR:ro, GPR:ro. + stw GPR:ro, GPRINDIRECT+GPRINDIRECTLO:rw. + stwx GPR:ro, GPR:ro, GPR:ro. + stwu GPR+GPRE:ro, GPRINDIRECT+GPRINDIRECTLO:rw. + xor GPR:wo, GPR:ro, GPR:ro. + xori GPR:wo, GPR:ro, CONST:ro. comment "!" LABEL+LABELI:ro. @@ -365,18 +356,18 @@ MOVES from GPR to GPRE gen COMMENT("move GPR->GPRE") - or %2, %1, %1 + or %2.reg, %1, %1 /* Constants */ from CONST smalls(%val) to GPR gen - COMMENT("move CONST->GPRE") + COMMENT("move CONST->GPR") addi %2, R0, {CONST, lo(%1.val)} from CONST to GPR gen - COMMENT("move CONST->GPRE") + COMMENT("move CONST->GPR") addis %2, R0, {CONST, hi(%1.val)} ori %2, %2, {CONST, lo(%1.val)} @@ -414,12 +405,7 @@ MOVES gen COMMENT("move SUM_RR->GPR") add %2, %1.reg1, %1.reg2 - - from SUM_RR to GPR - gen - COMMENT("move SUM_RR->GPRE") - add %2, %1.reg1, %1.reg2 - + /* Read/write byte */ from IND_RC_B smalls(%off) to GPR @@ -788,7 +774,7 @@ STACKINGRULES from SUM_ALL + TRISTATE_ALL + LOGICAL_ALL to STACK gen - move %1, {GPRE, RSCRATCH} + move %1, RSCRATCH stwu RSCRATCH, {GPRINDIRECT, SP, 0-4} from IND_ALL_W to STACK @@ -833,7 +819,7 @@ COERCIONS uses REG gen COMMENT("coerce LABEL->REG") - move %1, {GPRE, %a} + move %1, %a yields %a from STACK @@ -861,7 +847,7 @@ COERCIONS from SUM_ALL + TRISTATE_ALL + LOGICAL_ALL uses REG gen - move %1, {GPRE, %a} + move %1, %a yields %a from FSREG @@ -2019,10 +2005,10 @@ PATTERNS pat ass /* Adjust stack by variable amount */ with CONST STACK gen - move {SUM_RC, SP, %1.val}, {GPRE, SP} + move {SUM_RC, SP, %1.val}, SP with GPR STACK gen - move {SUM_RR, SP, %1}, {GPRE, SP} + move {SUM_RR, SP, %1}, SP pat asp /* Adjust stack by constant amount */ leaving From 65c2a8a0aefca014278b0dd8c3ff6ec9d66ccfa8 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Fri, 7 Oct 2016 20:52:13 -0400 Subject: [PATCH 15/25] Remove stackadjust and stackoffset() from ncg. This feature has never been used since its introduction, more than 3 years ago, in David Given's commit c93cb69 of May 8, 2013. The commit was for "PowerPC and M68K work". I am not undoing the entire commit. I am only removing the stackadjust and stackoffset() feature. This commit removes the feature from my branch kernigh-linuxppc. This removal includes the mach/proto/ncg parts. The default branch already removed most of the feature, but kept the mach/proto/ncg parts. That removal happened in commit 81778b6 of May 13, 2013 (which was a merge; git diff af0dede 81778b6). The branch dtrg-experimental-powerpc merged the default branch but without the removal. That merge was commit 4703db0f of Sep 15, 2016 (git diff 8c94b13 4703db0). My branch kernigh-linuxppc is off branch dtrg-experimental-powerpc, so I can no longer get the removal by merging default. David Given described the stackadjust feature in https://sourceforge.net/p/tack/mailman/message/30814691/ The instruction stackadjust would add a value to the offset, and the function stackoffset() would return this offset. One would use this to track sp - fp, then omit the frame pointer by not keeping fp in a register. --- h/cgg_cg.h | 2 -- mach/proto/ncg/codegen.c | 17 ----------------- mach/proto/ncg/extern.h | 3 --- util/ncgg/cgg.y | 10 +++------- util/ncgg/coerc.c | 9 --------- util/ncgg/extern.h | 1 - util/ncgg/keywords | 2 -- util/ncgg/output.c | 9 --------- util/ncgg/pseudo.h | 1 - 9 files changed, 3 insertions(+), 51 deletions(-) diff --git a/h/cgg_cg.h b/h/cgg_cg.h index a7802ad33..6cc04b007 100644 --- a/h/cgg_cg.h +++ b/h/cgg_cg.h @@ -39,7 +39,6 @@ #define DO_TOSTACK 23 #define DO_KILLREG 24 #define DO_LABDEF 25 -#define DO_STACKADJUST 26 #ifndef MAXATT #define MAXATT TOKENSIZE @@ -134,7 +133,6 @@ typedef struct exprnode *node_p; #define EX_ISROM 44 #define EX_TOPELTSIZE 45 #define EX_FALLTHROUGH 46 -#define EX_STACKOFFSET 47 typedef struct { /* to stack coercions */ diff --git a/mach/proto/ncg/codegen.c b/mach/proto/ncg/codegen.c index cf7379ccf..15d99d393 100644 --- a/mach/proto/ncg/codegen.c +++ b/mach/proto/ncg/codegen.c @@ -909,23 +909,6 @@ normalfailed: if (stackpad!=tokpatlen) { break; } -#endif -#ifdef USE_NOFRAMEPOINTER - case DO_STACKADJUST: { - result_t result; - int nodeno; - - DEBUG("STACKADJUST"); - /* The offset is an expression, which we need to evaluate. */ - - getint(nodeno,codep); - compute(&enodes[nodeno], &result); - assert(result.e_typ==EV_INT); - - if (toplevel) - stackoffset += result.e_v.e_con; - break; - } #endif } } diff --git a/mach/proto/ncg/extern.h b/mach/proto/ncg/extern.h index aa5e42489..3f376d4d1 100644 --- a/mach/proto/ncg/extern.h +++ b/mach/proto/ncg/extern.h @@ -20,9 +20,6 @@ extern rl_p curreglist; /* side effect of findcoerc() */ #ifndef NDEBUG extern int Debug; /* on/off debug printout */ #endif -#ifdef USE_NOFRAMEPOINTER -extern int stackoffset; /* offset from localbase to sp */ -#endif /* * Next descriptions are external declarations for tables created diff --git a/util/ncgg/cgg.y b/util/ncgg/cgg.y index 17a6cf966..4f9cbb00c 100644 --- a/util/ncgg/cgg.y +++ b/util/ncgg/cgg.y @@ -38,7 +38,7 @@ int Xstackflag=0; /* set in coercions, moves, and tests. %1 means something */ struct varinfo *gen_inst(),*gen_move(),*gen_test(),*gen_preturn(),*gen_tlab(); -struct varinfo *gen_label(), *gen_stackadjust(), *make_erase(); +struct varinfo *gen_label(), *make_erase(); expr_t make_expr(),ident_expr(),subreg_expr(),tokm_expr(),all_expr(); expr_t perc_ident_expr(),sum_expr(),regvar_expr(); @@ -74,9 +74,9 @@ iocc_t iops[20]; %token TOPELTSIZE FALLTHROUGH LABELDEF %token PROC CALL EXAMPLE %token FROM TO -%token TEST MOVE STACK RETURN STACKADJUST +%token TEST MOVE STACK RETURN %token PATTERNS PAT WITH EXACT KILLS USES REUSING GEN YIELDS LEAVING -%token DEFINED SAMESIGN SFIT UFIT ROM LOWW HIGHW ISROM STACKOFFSET +%token DEFINED SAMESIGN SFIT UFIT ROM LOWW HIGHW ISROM %token CMPEQ CMPNE CMPLT CMPGT CMPLE CMPGE OR2 AND2 LSHIFT RSHIFT NOT COMP %token INREG REGVAR REG_ANY REG_FLOAT REG_LOOP REG_POINTER %token ADORNACCESS @@ -849,8 +849,6 @@ gen_instruction { $$ = gen_label($2-1); use_tes++; } | RETURN { $$ = gen_preturn(); } - | STACKADJUST expr - { $$ = gen_stackadjust($2.ex_index); use_noframepointer++; } ; optstar : /* empty */ @@ -1030,8 +1028,6 @@ expr { $$ = make_expr(TYPINT,EX_LOWW,$3-1,0); } | HIGHW '(' emarg ')' { $$ = make_expr(TYPINT,EX_HIGHW,$3-1,0); } - | STACKOFFSET '(' ')' - { $$ = make_expr(TYPINT,EX_STACKOFFSET, 0, 0); } /* Excluded, because it causes a shift-reduce conflict (problems with a tokenset_no followed by an optexpr) | '-' expr %prec UMINUS diff --git a/util/ncgg/coerc.c b/util/ncgg/coerc.c index eb5f6ee7a..893f81be3 100644 --- a/util/ncgg/coerc.c +++ b/util/ncgg/coerc.c @@ -127,15 +127,6 @@ struct varinfo *gen_preturn() { return(vp); } -struct varinfo *gen_stackadjust(int expr) { - register struct varinfo *vp; - - NEW(vp,struct varinfo); - vp->vi_int[0] = INSSTACKADJUST; - vp->vi_int[1] = expr; - return(vp); -} - struct varinfo *gen_tlab(n) { register struct varinfo *vp; diff --git a/util/ncgg/extern.h b/util/ncgg/extern.h index 909e04774..561591627 100644 --- a/util/ncgg/extern.h +++ b/util/ncgg/extern.h @@ -37,7 +37,6 @@ extern int regclass; extern int maxtokensize; extern int nprocargs, maxprocargs; extern int use_tes; -extern int use_noframepointer; extern char *mystrcpy(); extern char *myalloc(); diff --git a/util/ncgg/keywords b/util/ncgg/keywords index 641fd45b9..606f7c839 100644 --- a/util/ncgg/keywords +++ b/util/ncgg/keywords @@ -43,8 +43,6 @@ reusing REUSING rom ROM samesign SAMESIGN sfit SFIT -stackadjust STACKADJUST -stackoffset STACKOFFSET topeltsize TOPELTSIZE test TEST to TO diff --git a/util/ncgg/output.c b/util/ncgg/output.c index 2a905c339..50458e369 100644 --- a/util/ncgg/output.c +++ b/util/ncgg/output.c @@ -12,8 +12,6 @@ int tabledebug=0; /* do not generate code for table debugging */ #endif int verbose=0; /* print all statistics */ int use_tes; /* use top element size information */ -int use_noframepointer; /* use stackadjust mechanism to remove requirement - for frame pointer */ char *c_file= "tables.c"; char *h_file= "tables.H"; char *cd_file= "code"; @@ -614,8 +612,6 @@ outdefs() { cdef("TABLEDEBUG",1); if (use_tes) cdef("USE_TES",1); - if (use_noframepointer) - cdef("USE_NOFRAMEPOINTER",1); } outars() { @@ -856,11 +852,6 @@ varinfo *kills,*allocates,*generates,*yields,*leaving; codeint(vp->vi_int[1]); codenl(); break; - case INSSTACKADJUST: - code8(DO_STACKADJUST); - codeint(vp->vi_int[1]); - codenl(); - break; } } codecoco(cocono); diff --git a/util/ncgg/pseudo.h b/util/ncgg/pseudo.h index 91013354a..24b335c50 100644 --- a/util/ncgg/pseudo.h +++ b/util/ncgg/pseudo.h @@ -12,4 +12,3 @@ #define INSERASE (-6) #define INSREMOVE (-7) #define INSLABDEF (-8) -#define INSSTACKADJUST (-9) From 29cb008faa794a5a06b0ddf08906b34daaa54beb Mon Sep 17 00:00:00 2001 From: George Koehler Date: Fri, 14 Oct 2016 23:59:26 -0400 Subject: [PATCH 16/25] In powerpc table, fix macros los() and his(). Change the operator in his() from a - minus to a + plus. When los(n) becomes negative, then his(n) needs to add 0x10000, not subtract it. Also change los(n) to do the sign extension, because smalls(los(n)) should be true, not false. Also change hi(n) and lo(n) to wrap n in parentheses, as (n), because these are macros and n might still contain operators. --- mach/powerpc/ncg/table | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index b36a29f2f..e68c3ff3a 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -18,14 +18,14 @@ PC_OFFSET = 4 /* Offset of saved PC relative to our FP */ #define smalls(n) sfit(n, 16) #define smallu(n) ufit(n, 16) -#define lo(n) (n & 0xFFFF) -#define hi(n) ((n>>16) & 0xFFFF) +#define lo(n) ((n) & 0xFFFF) +#define hi(n) (((n)>>16) & 0xFFFF) /* Use these for instructions that treat the low half as signed --- his() * includes a modifier to produce the correct value when the low half gets * sign extended. Er, do make sure you load the low half second. */ -#define los(n) (n & 0xFFFF) -#define his(n) ((hi(n) - (lo(n)>>15)) & 0xFFFF) +#define los(n) (lo(n) | (((0-(lo(n)>>15)) & ~0xFFFF))) +#define his(n) ((hi(n) + (lo(n)>>15)) & 0xFFFF) #define IFFALSE {CONST, 4} #define IFTRUE {CONST, 12} From baa152217e65f26a3d2a40b7bbcc3f2ebf052292 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Sat, 15 Oct 2016 20:00:48 -0400 Subject: [PATCH 17/25] Remove unused parts of mach/powerpc/ncg/table Remove unused tokens GPRINDIRECTLO, HILABEL, LOLABEL, LABELI. Also remove an #if 0 ... #endif group of patterns. --- mach/powerpc/ncg/table | 68 +++++++++++++----------------------------- 1 file changed, 20 insertions(+), 48 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index e68c3ff3a..01c4b240f 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -176,9 +176,6 @@ TOKENS /* Used only in instruction descriptions (to generate the correct syntax). */ GPRINDIRECT = { GPR reg; INT off; } 4 off "(" reg ")". - GPRINDIRECTLO = { GPR reg; ADDR adr; } 4 ">" adr "(" reg ")". /* Warning! Do not use on labels. */ - HILABEL = { ADDR adr; } 4 "<" adr. - LOLABEL = { ADDR adr; } 4 ">" adr. /* Primitives */ @@ -224,12 +221,6 @@ TOKENS XOR_RR = { GPR reg1; GPR reg2; } 4. XOR_RC = { GPR reg; INT val; } 4. -/* Comments */ - - LABELI = { ADDR msg; INT num; } 4 msg " " num. - - - SETS @@ -256,7 +247,7 @@ INSTRUCTIONS add GPR:wo, GPR:ro, GPR:ro. addX "add." GPR:wo, GPR:ro, GPR:ro. addi GPR:wo, GPR:ro, CONST:ro. - addis GPR:wo, GPR:ro, CONST+HILABEL:ro. + addis GPR:wo, GPR:ro, CONST:ro. and GPR:wo, GPR:ro, GPR:ro. andc GPR:wo, GPR:ro, GPR:ro. andiX "andi." GPR:wo, GPR:ro, CONST:ro kills :cc. @@ -291,21 +282,21 @@ INSTRUCTIONS fmr FPR:wo, FPR:ro. fmr FSREG:wo, FSREG:ro. lbzx GPR:wo, GPR:ro, GPR:ro. - lbz GPR:wo, GPRINDIRECT+GPRINDIRECTLO:ro. - lfd FPR:wo, GPRINDIRECT+GPRINDIRECTLO:ro. - lfdu FPR:wo, GPRINDIRECT+GPRINDIRECTLO:ro. + lbz GPR:wo, GPRINDIRECT:ro. + lfd FPR:wo, GPRINDIRECT:ro. + lfdu FPR:wo, GPRINDIRECT:ro. lfdx FPR:wo, GPR:ro, GPR:ro. - lfs FSREG:wo, GPRINDIRECT+GPRINDIRECTLO:ro. - lfsu FSREG:wo, GPRINDIRECT+GPRINDIRECTLO:rw. + lfs FSREG:wo, GPRINDIRECT:ro. + lfsu FSREG:wo, GPRINDIRECT:rw. lfsx FSREG:wo, GPR:ro, GPR:ro. lhzx GPR:wo, GPR:ro, GPR:ro. lhax GPR:wo, GPR:ro, GPR:ro. - lha GPR:wo, GPRINDIRECT+GPRINDIRECTLO:ro. - lhz GPR:wo, GPRINDIRECT+GPRINDIRECTLO:ro. + lha GPR:wo, GPRINDIRECT:ro. + lhz GPR:wo, GPRINDIRECT:ro. li32 GPR:wo, LABEL:ro. - lwzu GPR:wo, GPRINDIRECT+GPRINDIRECTLO:ro. + lwzu GPR:wo, GPRINDIRECT:ro. lwzx GPR:wo, GPR:ro, GPR:ro. - lwz GPR:wo, GPRINDIRECT+GPRINDIRECTLO:ro. + lwz GPR:wo, GPRINDIRECT:ro. nand GPR:wo, GPR:ro, GPR:ro. neg GPR:wo, GPR:ro. nor GPR:wo, GPR:ro, GPR:ro. @@ -315,7 +306,7 @@ INSTRUCTIONS mtspr SPR:wo, GPR:ro. or GPR:wo, GPR:ro, GPR:ro. orc GPR:wo, GPR:ro, GPR:ro. - ori GPR:wo, GPR:ro, CONST+LOLABEL:ro. + ori GPR:wo, GPR:ro, CONST:ro. orX "or." GPR:wo, GPR:ro, GPR:ro kills :cc. rlwinm GPR:wo, GPR:ro, CONST:ro, CONST:ro, CONST:ro. slw GPR:wo, GPR:ro, GPR:ro. @@ -323,23 +314,23 @@ INSTRUCTIONS sraw GPR:wo, GPR:ro, GPR:ro. srawi GPR:wo, GPR:ro, CONST:ro. srw GPR:wo, GPR:ro, GPR:ro. - stb GPR:ro, GPRINDIRECT+GPRINDIRECTLO:rw. + stb GPR:ro, GPRINDIRECT:rw. stbx GPR:ro, GPR:ro, GPR:ro. - stfd FPR:ro, GPRINDIRECT+GPRINDIRECTLO:rw. - stfdu FPR:ro, GPRINDIRECT+GPRINDIRECTLO:rw. + stfd FPR:ro, GPRINDIRECT:rw. + stfdu FPR:ro, GPRINDIRECT:rw. stfdx FPR:ro, GPR:ro, GPR:ro. - stfs FSREG:ro, GPRINDIRECT+GPRINDIRECTLO:rw. - stfsu FSREG:ro, GPRINDIRECT+GPRINDIRECTLO:rw. + stfs FSREG:ro, GPRINDIRECT:rw. + stfsu FSREG:ro, GPRINDIRECT:rw. stfsx FSREG:ro, GPR:ro, GPR:ro. - sth GPR:ro, GPRINDIRECT+GPRINDIRECTLO:rw. + sth GPR:ro, GPRINDIRECT:rw. sthx GPR:ro, GPR:ro, GPR:ro. - stw GPR:ro, GPRINDIRECT+GPRINDIRECTLO:rw. + stw GPR:ro, GPRINDIRECT:rw. stwx GPR:ro, GPR:ro, GPR:ro. - stwu GPR+GPRE:ro, GPRINDIRECT+GPRINDIRECTLO:rw. + stwu GPR+GPRE:ro, GPRINDIRECT:rw. xor GPR:wo, GPR:ro, GPR:ro. xori GPR:wo, GPR:ro, CONST:ro. - comment "!" LABEL+LABELI:ro. + comment "!" LABEL:ro. @@ -1945,25 +1936,6 @@ PATTERNS move {IND_RC_W, %a, 0}, %a mtspr CTR, %a bcctr ALWAYS, {CONST, 0}, {CONST, 0} - -#if 0 - - pat gto /* longjmp */ - with STACK - gen - ld {LABEL, $1+2} - wspec {CONST, 1} - ld {LABEL, $1+4} - wspec {CONST, 0} - ld {LABEL, $1+0} - wspec {CONST, 2} - - pat str $1==1 /* Store special GPRister */ - with GPR0 - gen - wspec {CONST, $1} - -#endif pat lor $1==0 /* Load FP */ uses REG From 7c64dab491e191a7a67a977d62cd8c332f87725a Mon Sep 17 00:00:00 2001 From: George Koehler Date: Sun, 16 Oct 2016 13:58:54 -0400 Subject: [PATCH 18/25] Refactor how powerpc ncg pushes constants. When loc (load constant) pushes a constant, it now checks the value of the constant and pushes any of 7 tokens. These tokens allow stack patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other interesting forms of constants. Use the new constant tokens in the rules for adi, sbi, and, ior, xor. Adjust a few other rules to understand the new tokens. Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC, XOR_RC to GPR no longer touch the scratch register, because the constant is not too big. --- mach/powerpc/ncg/table | 275 +++++++++++++++++++++++++++-------------- 1 file changed, 179 insertions(+), 96 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 01c4b240f..133460a6b 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -176,19 +176,30 @@ TOKENS /* Used only in instruction descriptions (to generate the correct syntax). */ GPRINDIRECT = { GPR reg; INT off; } 4 off "(" reg ")". + CONST = { INT val; } 4 val. /* Primitives */ LABEL = { ADDR adr; } 4 adr. - CONST = { INT val; } 4 val. LOCAL = { INT off; } 4. /* Allows us to use regvar() to refer to registers */ GPRE = { GPR reg; } 4 reg. +/* Constants on the stack */ + + CONST_N8000 = { INT val; } 4. + CONST_N7FFF_N0001 = { INT val; } 4. + CONST_0000_7FFF = { INT val; } 4. + CONST_8000 = { INT val; } 4. + CONST_8001_FFFF = { INT val; } 4. + CONST_HZ = { INT val; } 4. + CONST_HL = { INT val; } 4. + /* Expression partial results */ - + + SUM_RIS = { GPR reg; INT offhi; } 4. SUM_RC = { GPR reg; INT off; } 4. SUM_RR = { GPR reg1; GPR reg2; } 4. @@ -215,15 +226,26 @@ TOKENS NOT_R = { GPR reg; } 4. AND_RR = { GPR reg1; GPR reg2; } 4. - AND_RC = { GPR reg; INT val; } 4. OR_RR = { GPR reg1; GPR reg2; } 4. - OR_RC = { GPR reg; INT val; } 4. + OR_RIS = { GPR reg; INT valhi; } 4. + OR_RC = { GPR reg; INT val; } 4. XOR_RR = { GPR reg1; GPR reg2; } 4. - XOR_RC = { GPR reg; INT val; } 4. + XOR_RIS = { GPR reg; INT valhi; } 4. + XOR_RC = { GPR reg; INT val; } 4. SETS + /* signed 16-bit integer */ + CONST2 = CONST_N8000 + CONST_N7FFF_N0001 + CONST_0000_7FFF. + /* integer that, when negated, fits signed 16-bit */ + CONST2_WHEN_NEG = CONST_N7FFF_N0001 + CONST_0000_7FFF + CONST_8000. + /* unsigned 16-bit integer */ + UCONST2 = CONST_0000_7FFF + CONST_8000 + CONST_8001_FFFF. + /* any constant on stack */ + CONST_ALL = CONST_N8000 + CONST_N7FFF_N0001 + CONST_0000_7FFF + + CONST_8000 + CONST_8001_FFFF + CONST_HZ + CONST_HL. + SUM_ALL = SUM_RC + SUM_RR. TRISTATE_ALL = TRISTATE_RC_S + TRISTATE_RC_U + TRISTATE_RR_S + @@ -231,7 +253,7 @@ SETS SEX_ALL = SEX_B + SEX_H. - LOGICAL_ALL = NOT_R + AND_RR + AND_RC + OR_RR + OR_RC + XOR_RR + + LOGICAL_ALL = NOT_R + AND_RR + OR_RR + OR_RC + XOR_RR + XOR_RC. IND_ALL_W = IND_RC_W + IND_RR_W + IND_LABEL_W. @@ -307,6 +329,7 @@ INSTRUCTIONS or GPR:wo, GPR:ro, GPR:ro. orc GPR:wo, GPR:ro, GPR:ro. ori GPR:wo, GPR:ro, CONST:ro. + oris GPR:wo, GPR:ro, CONST:ro. orX "or." GPR:wo, GPR:ro, GPR:ro kills :cc. rlwinm GPR:wo, GPR:ro, CONST:ro, CONST:ro, CONST:ro. slw GPR:wo, GPR:ro, GPR:ro. @@ -329,6 +352,7 @@ INSTRUCTIONS stwu GPR+GPRE:ro, GPRINDIRECT:rw. xor GPR:wo, GPR:ro, GPR:ro. xori GPR:wo, GPR:ro, CONST:ro. + xoris GPR:wo, GPR:ro, CONST:ro. comment "!" LABEL:ro. @@ -351,17 +375,19 @@ MOVES /* Constants */ - from CONST smalls(%val) to GPR + from CONST_ALL smalls(%val) to GPR gen - COMMENT("move CONST->GPR") - addi %2, R0, {CONST, lo(%1.val)} - - from CONST to GPR + COMMENT("move CONST_ALL->GPR smalls") + addi %2, R0, {CONST, %1.val} + + from CONST_ALL + CONST to GPR gen - COMMENT("move CONST->GPR") + COMMENT("move CONST_ALL->GPR") addis %2, R0, {CONST, hi(%1.val)} ori %2, %2, {CONST, lo(%1.val)} - + /* Can't use addi %2, %2, {CONST, los(%1.val)} + * because %2 might be R0. */ + from LABEL to GPR gen COMMENT("move LABEL->GPR") @@ -381,17 +407,16 @@ MOVES /* Register + something */ - from SUM_RC smalls(%off) to GPR - gen - COMMENT("move SUM_RC->GPR smalls") - addi %2, %1.reg, {CONST, lo(%1.off)} - + from SUM_RIS to GPR + gen + COMMENT("move SUM_RIS->GPR") + addis %2, %1.reg, {CONST, %1.offhi} + from SUM_RC to GPR - gen - COMMENT("move SUM_RC->GPR large") - addi %2, %1.reg, {CONST, los(%1.off)} - addis %2, %2, {CONST, his(%1.off)} - + gen + COMMENT("move SUM_RC->GPR") + addi %2, %1.reg, {CONST, %1.off} + from SUM_RR to GPR gen COMMENT("move SUM_RR->GPR") @@ -669,52 +694,39 @@ MOVES COMMENT("move AND_RR->GPR") and %2, %1.reg1, %1.reg2 - from AND_RC smallu(%val) to GPR - gen - COMMENT("move AND_RC->GPR small") - andiX %2, %1.reg, {CONST, %1.val} - - from AND_RC to GPR - gen - COMMENT("move AND_RC->GPR") - move {CONST, %1.val}, RSCRATCH - and %2, %1.reg, RSCRATCH - from OR_RR to GPR gen COMMENT("move OR_RR->GPR") or %2, %1.reg1, %1.reg2 - from OR_RC smallu(%val) to GPR + from OR_RIS to GPR gen - COMMENT("move OR_RC->GPR small") - ori %2, %1.reg, {CONST, %1.val} + COMMENT("move OR_RIS->GPR") + oris %2, %1.reg, {CONST, %1.valhi} from OR_RC to GPR gen COMMENT("move OR_RC->GPR") - move {CONST, %1.val}, RSCRATCH - or %2, %1.reg, RSCRATCH + ori %2, %1.reg, {CONST, %1.val} from XOR_RR to GPR gen COMMENT("move XOR_RR->GPR") xor %2, %1.reg1, %1.reg2 - from XOR_RC smallu(%val) to GPR + from XOR_RIS to GPR gen - COMMENT("move XOR_RC->GPR small") - xori %2, %1.reg, {CONST, %1.val} + COMMENT("move XOR_RIS->GPR") + xoris %2, %1.reg, {CONST, %1.valhi} from XOR_RC to GPR gen COMMENT("move XOR_RC->GPR") - move {CONST, %1.val}, RSCRATCH - xor %2, %1.reg, RSCRATCH + xori %2, %1.reg, {CONST, %1.val} /* Miscellaneous */ - from OP_ALL_W + LABEL + CONST to GPRE + from OP_ALL_W + LABEL + CONST_ALL to GPRE gen move %1, %2.reg @@ -738,19 +750,13 @@ STACKINGRULES gen COMMENT("stack REG") stwu %1, {GPRINDIRECT, SP, 0-4} - - from CONST to STACK + + from CONST_ALL + LABEL to STACK gen - COMMENT("stack CONST") + COMMENT("stack CONST_ALL + LABEL") move %1, RSCRATCH stwu RSCRATCH, {GPRINDIRECT, SP, 0-4} - - from LABEL to STACK - gen - COMMENT("stack LABEL") - move %1, RSCRATCH - stwu RSCRATCH, {GPRINDIRECT, SP, 0-4} - + from SEX_B to STACK gen COMMENT("stack SEX_B") @@ -798,11 +804,11 @@ COERCIONS COMMENT("coerce REG->REG") move %1, %a yields %a - - from CONST + + from CONST_ALL uses REG gen - COMMENT("coerce CONST->REG") + COMMENT("coerce CONST_ALL->REG") move %1, %a yields %a @@ -894,8 +900,20 @@ PATTERNS /* Intrinsics */ - pat loc /* Load constant */ - yields {CONST, $1} + pat loc $1==(0-0x8000) /* Load constant */ + yields {CONST_N8000, $1} + pat loc $1>=(0-0x7FFF) && $1<=(0-1) + yields {CONST_N7FFF_N0001, $1} + pat loc $1>=0 && $1<=0x7FFF + yields {CONST_0000_7FFF, $1} + pat loc $1==0x8000 + yields {CONST_8000, $1} + pat loc $1>=0x8001 && $1<=0xFFFF + yields {CONST_8001_FFFF, $1} + pat loc lo($1)==0 + yields {CONST_HZ, $1} + pat loc + yields {CONST_HL, $1} pat dup $1==INT32 /* Duplicate word on top of stack */ with REG @@ -984,9 +1002,13 @@ PATTERNS /* Local variables */ - pat lal /* Load address of local */ + pat lal smalls($1) /* Load address of local */ yields {SUM_RC, FP, $1} + pat lal /* Load address of local */ + uses REG={SUM_RIS, FP, his($1)} + yields {SUM_RC, %a, los($1)} + pat lol inreg($1)>0 /* Load from local */ yields {LOCAL, $1} @@ -1001,7 +1023,7 @@ PATTERNS loi INT32*2 pat stl inreg($1)>0 /* Store to local */ - with CONST + LABEL + GPR + OP_ALL_W + with CONST_ALL + LABEL + GPR + OP_ALL_W kills regvar($1), LOCAL %off==$1 gen move %1, {GPRE, regvar($1)} @@ -1356,28 +1378,42 @@ PATTERNS pat adi $1==4 /* Add word (second + top) */ with REG REG yields {SUM_RR, %1, %2} - with CONST REG + with CONST2 REG yields {SUM_RC, %2, %1.val} - with REG CONST + with REG CONST2 yields {SUM_RC, %1, %2.val} - with CONST SUM_RC - yields {SUM_RC, %2.reg, %2.off+%1.val} - with CONST LABEL + with CONST_HZ REG + uses reusing %2, REG={SUM_RIS, %2, his(%1.val)} + yields %a + with REG CONST_HZ + uses reusing %1, REG={SUM_RIS, %1, his(%2.val)} + yields %a + with CONST_ALL-CONST2-CONST_HZ REG + uses reusing %2, REG={SUM_RIS, %2, his(%1.val)} + yields {SUM_RC, %a, los(%1.val)} + with REG CONST_ALL-CONST2-CONST_HZ + uses reusing %1, REG={SUM_RIS, %1, his(%2.val)} + yields {SUM_RC, %a, los(%2.val)} + with CONST_ALL LABEL yields {LABEL, %2.adr+%1.val} - + pat sbi $1==4 /* Subtract word (second - top) */ with REG REG uses reusing %2, REG gen subf %a, %1, %2 yields %a - with CONST REG + with CONST2_WHEN_NEG REG yields {SUM_RC, %2, 0-%1.val} - with CONST SUM_RC - yields {SUM_RC, %2.reg, %2.off-%1.val} - with CONST LABEL + with CONST_HZ REG + uses reusing %2, REG={SUM_RIS, %2, his(0-%1.val)} + yields %a + with CONST_ALL-CONST2_WHEN_NEG-CONST_HZ REG + uses reusing %2, REG={SUM_RIS, %2, his(0-%1.val)} + yields {SUM_RC, %a, los(0-%1.val)} + with CONST_ALL LABEL yields {LABEL, %2.adr+(0-%1.val)} - + pat ngi $1==4 /* Negate word */ with REG uses reusing %1, REG @@ -1437,47 +1473,87 @@ PATTERNS yields %a with GPR GPR yields {AND_RR, %1, %2} - with GPR CONST - yields {AND_RC, %1, %2.val} - with CONST GPR - yields {AND_RC, %2, %1.val} - + with GPR UCONST2 + uses reusing %1, REG + gen + andiX %a, %1, {CONST, %2.val} + yields %a + with UCONST2 GPR + uses reusing %2, REG + gen + andiX %a, %2, {CONST, %1.val} + yields %a + with GPR CONST_HZ + uses reusing %1, REG + gen + andisX %a, %1, {CONST, hi(%2.val)} + yields %a + with CONST_HZ GPR + uses reusing %2, REG + gen + andisX %a, %2, {CONST, hi(%1.val)} + yields %a + pat and !defined($1) /* AND set */ with STACK gen bl {LABEL, ".and"} - + pat ior $1==4 /* OR word */ - with GPR NOT_R + with REG NOT_R uses reusing %1, REG gen orc %a, %1, %2.reg yields %a - with NOT_R GPR + with NOT_R REG uses reusing %2, REG gen orc %a, %2, %1.reg yields %a - with GPR GPR + with REG REG yields {OR_RR, %1, %2} - with GPR CONST + with REG UCONST2 yields {OR_RC, %1, %2.val} - with CONST GPR + with UCONST2 REG yields {OR_RC, %2, %1.val} - + with REG CONST_HZ + uses reusing %1, REG={OR_RIS, %1, hi(%2.val)} + yields %a + with CONST_HZ REG + uses reusing %2, REG={OR_RIS, %2, hi(%1.val)} + yields %a + with REG CONST_ALL-UCONST2-CONST_HZ + uses reusing %1, REG={OR_RIS, %1, hi(%2.val)} + yields {OR_RC, %1, lo(%2.val)} + with CONST_ALL-UCONST2-CONST_HZ REG + uses reusing %2, REG={OR_RIS, %2, hi(%1.val)} + yields {OR_RC, %2, lo(%1.val)} + pat ior !defined($1) /* OR set */ with STACK gen bl {LABEL, ".ior"} - + pat xor $1==4 /* XOR word */ - with GPR GPR + with REG REG yields {XOR_RR, %1, %2} - with GPR CONST + with REG UCONST2 yields {XOR_RC, %1, %2.val} - with CONST GPR + with UCONST2 REG yields {XOR_RC, %2, %1.val} - + with REG CONST_HZ + uses reusing %1, REG={XOR_RIS, %1, hi(%2.val)} + yields %a + with CONST_HZ REG + uses reusing %2, REG={XOR_RIS, %2, hi(%1.val)} + yields %a + with REG CONST_ALL-UCONST2-CONST_HZ + uses reusing %1, REG={XOR_RIS, %1, hi(%2.val)} + yields {XOR_RC, %1, lo(%2.val)} + with CONST_ALL-UCONST2-CONST_HZ REG + uses reusing %2, REG={XOR_RIS, %2, hi(%1.val)} + yields {XOR_RC, %2, lo(%1.val)} + pat xor !defined($1) /* XOR set */ with STACK gen @@ -1508,7 +1584,7 @@ PATTERNS bl {LABEL, ".com"} pat sli $1==4 /* Shift left (second << top) */ - with CONST GPR + with CONST_ALL GPR uses reusing %2, REG gen rlwinm %a, %2, {CONST, (%1.val & 0x1F)}, {CONST, 0}, {CONST, 31-(%1.val & 0x1F)} @@ -1520,7 +1596,7 @@ PATTERNS yields %a pat sri $1==4 /* Shift right signed (second >> top) */ - with CONST GPR + with CONST_ALL GPR uses reusing %2, REG gen srawi %a, %2, {CONST, %1.val & 0x1F} @@ -1532,7 +1608,7 @@ PATTERNS yields %a pat sru $1==4 /* Shift right unsigned (second >> top) */ - with CONST GPR + with CONST_ALL GPR uses reusing %2, REG gen rlwinm %a, %2, {CONST, 32-(%1.val & 0x1F)}, {CONST, (%1.val & 0x1F)}, {CONST, 31} @@ -1742,13 +1818,13 @@ PATTERNS /* Compare and jump */ pat cmi /* Signed tristate compare */ - with CONST GPR + with CONST_ALL GPR yields {TRISTATE_RC_S, %2, %1.val} with GPR GPR yields {TRISTATE_RR_S, %2, %1} pat cmu /* Unsigned tristate compare */ - with CONST GPR + with CONST_ALL GPR yields {TRISTATE_RC_U, %2, %1.val} with GPR GPR yields {TRISTATE_RR_U, %2, %1} @@ -1975,9 +2051,16 @@ PATTERNS addi SP, SP, {CONST, 4} pat ass /* Adjust stack by variable amount */ - with CONST STACK + with CONST2 STACK gen move {SUM_RC, SP, %1.val}, SP + with CONST_HZ STACK + gen + move {SUM_RC, SP, his(%1.val)}, SP + with CONST_ALL-CONST2-CONST_HZ STACK + gen + move {SUM_RC, SP, his(%1.val)}, SP + move {SUM_RC, SP, los(%1.val)}, SP with GPR STACK gen move {SUM_RR, SP, %1}, SP From 5b5f774a64ac7f4ff4485fd084bc806098191d84 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Sun, 16 Oct 2016 16:02:25 -0400 Subject: [PATCH 19/25] Simplify moves to and from IND_RC_* Now that SUM_RC always has a signed 16-bit constant, it happens that the various IND_RC_* tokens also have a signed 16-bit constant, so we no longer need to touch the scratch register. --- mach/powerpc/ncg/table | 128 ++++++++++------------------------------- 1 file changed, 31 insertions(+), 97 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 133460a6b..05ebcfc08 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -424,75 +424,39 @@ MOVES /* Read/write byte */ - from IND_RC_B smalls(%off) to GPR - gen - COMMENT("move IND_RC_B->GPR small") - lbz %2, {GPRINDIRECT, %1.reg, %1.off} - from IND_RC_B to GPR gen - COMMENT("move IND_RC_B->GPR large") - addis RSCRATCH, %1.reg, {CONST, his(%1.off)} - lbz %2, {GPRINDIRECT, RSCRATCH, los(%1.off)} - - from GPR to IND_RC_B smalls(%off) - gen - COMMENT("move GPR->IND_RC_B small") - stb %1, {GPRINDIRECT, %2.reg, %2.off} - + COMMENT("move IND_RC_B->GPR") + lbz %2, {GPRINDIRECT, %1.reg, %1.off} + from GPR to IND_RC_B gen - COMMENT("move GPR->IND_RC_B large") - addis RSCRATCH, %2.reg, {CONST, his(%2.off)} - stb %1, {GPRINDIRECT, RSCRATCH, los(%2.off)} - -/* Read/write short */ + COMMENT("move GPR->IND_RC_B") + stb %1, {GPRINDIRECT, %2.reg, %2.off} + +/* Read/write halfword (short) */ - from IND_RC_H smalls(%off) to GPR - gen - COMMENT("move IND_RC_H->GPR small") - lhz %2, {GPRINDIRECT, %1.reg, %1.off} - from IND_RC_H to GPR gen - COMMENT("move IND_RC_H->GPR large") - addis RSCRATCH, %1.reg, {CONST, his(%1.off)} - lhz %2, {GPRINDIRECT, RSCRATCH, los(%1.off)} - - from IND_RC_H_S smalls(%off) to GPR - gen - COMMENT("move IND_RC_H_S->GPR small") - lha %2, {GPRINDIRECT, %1.reg, %1.off} - + COMMENT("move IND_RC_H->GPR") + lhz %2, {GPRINDIRECT, %1.reg, %1.off} + from IND_RC_H_S to GPR gen - COMMENT("move IND_RC_H_S->GPR large") - addis RSCRATCH, %1.reg, {CONST, his(%1.off)} - lha %2, {GPRINDIRECT, RSCRATCH, los(%1.off)} - - from GPR to IND_RC_H smalls(%off) - gen - COMMENT("move GPR->IND_RC_H small") - sth %1, {GPRINDIRECT, %2.reg, %2.off} - + COMMENT("move IND_RC_H_S->GPR") + lha %2, {GPRINDIRECT, %1.reg, %1.off} + from GPR to IND_RC_H gen - COMMENT("move GPR->IND_RC_H large") - addis RSCRATCH, %2.reg, {CONST, his(%2.off)} - sth %1, {GPRINDIRECT, RSCRATCH, los(%2.off)} - + COMMENT("move GPR->IND_RC_H") + sth %1, {GPRINDIRECT, %2.reg, %2.off} + /* Read word */ - from IND_RC_W smalls(%off) to GPR - gen - COMMENT("move IND_RC_W->GPR small") - lwz %2, {GPRINDIRECT, %1.reg, %1.off} - from IND_RC_W to GPR gen - COMMENT("move IND_RC_W->GPR large") - addis %2, %1.reg, {CONST, his(%1.off)} - lwz %2, {GPRINDIRECT, %2, los(%1.off)} + COMMENT("move IND_RC_W->GPR") + lwz %2, {GPRINDIRECT, %1.reg, %1.off} from IND_RR_W to GPR gen @@ -504,17 +468,11 @@ MOVES COMMENT("move IND_LABEL_W->GPR") move {LABEL, %1.adr}, RSCRATCH lwz %2, {GPRINDIRECT, RSCRATCH, 0} - - from IND_RC_W smalls(%off) to FSREG - gen - COMMENT("move IND_RC_W->FSREG small") - lfs %2, {GPRINDIRECT, %1.reg, %1.off} - + from IND_RC_W to FSREG gen - COMMENT("move IND_RC_W->FSREG large") - addis RSCRATCH, %1.reg, {CONST, his(%1.off)} - lfs %2, {GPRINDIRECT, RSCRATCH, los(%1.off)} + COMMENT("move IND_RC_W->FSREG") + lfs %2, {GPRINDIRECT, %1.reg, %1.off} from IND_RR_W to FSREG gen @@ -529,16 +487,10 @@ MOVES /* Write word */ - from GPR to IND_RC_W smalls(%off) - gen - COMMENT("move GPR->IND_RC_W small") - stw %1, {GPRINDIRECT, %2.reg, %2.off} - from GPR to IND_RC_W gen - COMMENT("move GPR->IND_RC_W large") - addis RSCRATCH, %2.reg, {CONST, his(%2.off)} - stw %1, {GPRINDIRECT, RSCRATCH, los(%2.off)} + COMMENT("move GPR->IND_RC_W") + stw %1, {GPRINDIRECT, %2.reg, %2.off} from GPR to IND_RR_W gen @@ -550,17 +502,11 @@ MOVES COMMENT("move GPR->IND_LABEL_D") move {LABEL, %2.adr}, RSCRATCH stw %1, {GPRINDIRECT, RSCRATCH, 0} - - from FSREG to IND_RC_W smalls(%off) - gen - COMMENT("move FSREG->IND_RC_W small") - stfs %1, {GPRINDIRECT, %2.reg, %2.off} - + from FSREG to IND_RC_W gen - COMMENT("move FSREG->IND_RC_W large") - addis RSCRATCH, %2.reg, {CONST, his(%2.off)} - stfs %1, {GPRINDIRECT, RSCRATCH, los(%2.off)} + COMMENT("move FSREG->IND_RC_W") + stfs %1, {GPRINDIRECT, %2.reg, %2.off} from FSREG to IND_RR_W gen @@ -575,17 +521,11 @@ MOVES /* Read double */ - from IND_RC_D smalls(%off) to FPR - gen - COMMENT("move IND_RC_D->FPR small") - lfd %2, {GPRINDIRECT, %1.reg, %1.off} - from IND_RC_D to FPR gen - COMMENT("move IND_RC_D->FPR large") - addis RSCRATCH, %1.reg, {CONST, his(%1.off)} - lfd %2, {GPRINDIRECT, RSCRATCH, los(%1.off)} - + COMMENT("move IND_RC_D->FPR") + lfd %2, {GPRINDIRECT, %1.reg, %1.off} + from IND_RR_D to FPR gen COMMENT("move IND_RR_D->FPR") @@ -599,16 +539,10 @@ MOVES /* Write double */ - from FPR to IND_RC_D smalls(%off) - gen - COMMENT("move FPR->IND_RC_D small") - stfd %1, {GPRINDIRECT, %2.reg, %2.off} - from FPR to IND_RC_D gen - COMMENT("move FPR->IND_RC_D large") - addis RSCRATCH, %2.reg, {CONST, his(%2.off)} - stfd %1, {GPRINDIRECT, RSCRATCH, los(%2.off)} + COMMENT("move FPR->IND_RC_D") + stfd %1, {GPRINDIRECT, %2.reg, %2.off} from FPR to IND_RR_D gen From 19f0eb86a45852274622348fa56d912d2261d55a Mon Sep 17 00:00:00 2001 From: George Koehler Date: Sun, 16 Oct 2016 16:33:24 -0400 Subject: [PATCH 20/25] Remove IND_LABEL_W and IND_LABEL_D Because li32 always loads a label into a GPR, it is sufficient to coerce LABEL to REG, then use IND_RC_W or IND_RC_D for indirection through the label. --- mach/powerpc/ncg/table | 57 +++--------------------------------------- 1 file changed, 4 insertions(+), 53 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 05ebcfc08..2c8418220 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -218,10 +218,8 @@ TOKENS IND_RC_H_S = { GPR reg; INT off; } 4. IND_RC_W = { GPR reg; INT off; } 4. IND_RR_W = { GPR reg1; GPR reg2; } 4. - IND_LABEL_W = { ADDR adr; } 4. IND_RC_D = { GPR reg; INT off; } 8. IND_RR_D = { GPR reg1; GPR reg2; } 8. - IND_LABEL_D = { ADDR adr; } 8. NOT_R = { GPR reg; } 4. @@ -256,9 +254,9 @@ SETS LOGICAL_ALL = NOT_R + AND_RR + OR_RR + OR_RC + XOR_RR + XOR_RC. - IND_ALL_W = IND_RC_W + IND_RR_W + IND_LABEL_W. + IND_ALL_W = IND_RC_W + IND_RR_W. - IND_ALL_D = IND_RC_D + IND_RR_D + IND_LABEL_D. + IND_ALL_D = IND_RC_D + IND_RR_D. OP_ALL_W = SUM_ALL + TRISTATE_ALL + SEX_ALL + LOGICAL_ALL + IND_ALL_W. @@ -462,12 +460,6 @@ MOVES gen COMMENT("move IND_RR_W->GPR") lwzx %2, %1.reg1, %1.reg2 - - from IND_LABEL_W to GPR - gen - COMMENT("move IND_LABEL_W->GPR") - move {LABEL, %1.adr}, RSCRATCH - lwz %2, {GPRINDIRECT, RSCRATCH, 0} from IND_RC_W to FSREG gen @@ -478,13 +470,7 @@ MOVES gen COMMENT("move IND_RR_W->FSREG") lfsx %2, %1.reg1, %1.reg2 - - from IND_LABEL_W to FSREG - gen - COMMENT("move IND_LABEL_W->FSREG") - move {LABEL, %1.adr}, RSCRATCH - lfs %2, {GPRINDIRECT, RSCRATCH, 0} - + /* Write word */ from GPR to IND_RC_W @@ -496,12 +482,6 @@ MOVES gen COMMENT("move GPR->IND_RR_W") stwx %1, %2.reg1, %2.reg2 - - from GPR to IND_LABEL_W - gen - COMMENT("move GPR->IND_LABEL_D") - move {LABEL, %2.adr}, RSCRATCH - stw %1, {GPRINDIRECT, RSCRATCH, 0} from FSREG to IND_RC_W gen @@ -513,12 +493,6 @@ MOVES COMMENT("move FSREG->IND_RR_W") stfsx %1, %2.reg1, %2.reg2 - from FSREG to IND_LABEL_W - gen - COMMENT("move FSREG->IND_LABEL_D") - move {LABEL, %2.adr}, RSCRATCH - stfs %1, {GPRINDIRECT, RSCRATCH, 0} - /* Read double */ from IND_RC_D to FPR @@ -531,12 +505,6 @@ MOVES COMMENT("move IND_RR_D->FPR") lfdx %2, %1.reg1, %1.reg2 - from IND_LABEL_D to FPR - gen - COMMENT("move IND_LABEL_D->FPR") - move {LABEL, %1.adr}, RSCRATCH - lfd %2, {GPRINDIRECT, RSCRATCH, 0} - /* Write double */ from FPR to IND_RC_D @@ -548,13 +516,7 @@ MOVES gen COMMENT("move FPR->IND_RR_W") stfdx %1, %2.reg1, %2.reg2 - - from FPR to IND_LABEL_D - gen - COMMENT("move FPR->IND_LABEL_D") - move {LABEL, %2.adr}, RSCRATCH - stfd %1, {GPRINDIRECT, RSCRATCH, 0} - + /* Extract condition code field (actually produces (CC&3)<<2) */ from CR0 to GPR @@ -1142,8 +1104,6 @@ PATTERNS yields {IND_RC_W, %1.reg, %1.off} with SUM_RR yields {IND_RR_W, %1.reg1, %1.reg2} - with LABEL - yields {IND_LABEL_W, %1.adr} pat loi $1==INT64 /* Load double-word indirect */ with GPR @@ -1152,8 +1112,6 @@ PATTERNS yields {IND_RC_D, %1.reg, %1.off} with SUM_RR yields {IND_RR_D, %1.reg1, %1.reg2} - with LABEL - yields {IND_LABEL_D, %1.adr} pat loi /* Load arbitrary size */ leaving @@ -1216,9 +1174,6 @@ PATTERNS with SUM_RC GPR+FSREG gen move %2, {IND_RC_W, %1.reg, %1.off} - with LABEL GPR+FSREG - gen - move %2, {IND_LABEL_W, %1.adr} pat sti $1==INT64 /* Store double-word indirect */ with GPR FREG @@ -1238,10 +1193,6 @@ PATTERNS gen move %2, {IND_RC_W, %1.reg, %1.off} move %3, {IND_RC_W, %1.reg, %1.off+4} - with LABEL FREG - gen - move %2, {IND_LABEL_D, %1.adr} - pat sti /* Store arbitrary size */ leaving From e2ccc8f94237d836a87dec274dfc06b406c8bfb3 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Sun, 16 Oct 2016 18:13:39 -0400 Subject: [PATCH 21/25] Add "kills MEMORY" to powerpc sti rules. Adjust some of the loi rules (and associated moves) so we can identify the tokens that must be in MEMORY. With this commit, I can navigate the Enterprise even if I comment out my work-around from e22c888. --- mach/powerpc/ncg/table | 139 +++++++++++++++++++++++++---------------- 1 file changed, 86 insertions(+), 53 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 2c8418220..4a99c9d61 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -214,8 +214,11 @@ TOKENS SEX_H = { GPR reg; } 4. IND_RC_B = { GPR reg; INT off; } 4. + IND_RR_B = { GPR reg1; GPR reg2; } 4. IND_RC_H = { GPR reg; INT off; } 4. + IND_RR_H = { GPR reg1; GPR reg2; } 4. IND_RC_H_S = { GPR reg; INT off; } 4. + IND_RR_H_S = { GPR reg1; GPR reg2; } 4. IND_RC_W = { GPR reg; INT off; } 4. IND_RR_W = { GPR reg1; GPR reg2; } 4. IND_RC_D = { GPR reg; INT off; } 8. @@ -253,11 +256,18 @@ SETS LOGICAL_ALL = NOT_R + AND_RR + OR_RR + OR_RC + XOR_RR + XOR_RC. - - IND_ALL_W = IND_RC_W + IND_RR_W. + /* indirect 4-byte value */ + IND_ALL_W = IND_RC_W + IND_RR_W. + /* indirect 8-byte value */ IND_ALL_D = IND_RC_D + IND_RR_D. - + /* any indirect value that fits in a GPR */ + IND_ALL_BHW = IND_RC_B + IND_RR_B + IND_RC_H + IND_RR_H + + IND_RC_H_S + IND_RR_H_S + IND_ALL_W. + + /* anything killed by sti (store indirect) */ + MEMORY = IND_ALL_BHW + IND_ALL_D. + OP_ALL_W = SUM_ALL + TRISTATE_ALL + SEX_ALL + LOGICAL_ALL + IND_ALL_W. @@ -301,18 +311,18 @@ INSTRUCTIONS fsubs FSREG:wo, FSREG:ro, FSREG:ro. fmr FPR:wo, FPR:ro. fmr FSREG:wo, FSREG:ro. - lbzx GPR:wo, GPR:ro, GPR:ro. lbz GPR:wo, GPRINDIRECT:ro. + lbzx GPR:wo, GPR:ro, GPR:ro. lfd FPR:wo, GPRINDIRECT:ro. lfdu FPR:wo, GPRINDIRECT:ro. lfdx FPR:wo, GPR:ro, GPR:ro. lfs FSREG:wo, GPRINDIRECT:ro. lfsu FSREG:wo, GPRINDIRECT:rw. lfsx FSREG:wo, GPR:ro, GPR:ro. - lhzx GPR:wo, GPR:ro, GPR:ro. - lhax GPR:wo, GPR:ro, GPR:ro. lha GPR:wo, GPRINDIRECT:ro. + lhax GPR:wo, GPR:ro, GPR:ro. lhz GPR:wo, GPRINDIRECT:ro. + lhzx GPR:wo, GPR:ro, GPR:ro. li32 GPR:wo, LABEL:ro. lwzu GPR:wo, GPRINDIRECT:ro. lwzx GPR:wo, GPR:ro, GPR:ro. @@ -420,35 +430,64 @@ MOVES COMMENT("move SUM_RR->GPR") add %2, %1.reg1, %1.reg2 -/* Read/write byte */ +/* Read byte */ from IND_RC_B to GPR gen COMMENT("move IND_RC_B->GPR") lbz %2, {GPRINDIRECT, %1.reg, %1.off} + from IND_RR_B to GPR + gen + COMMENT("move IND_RR_B->GPR") + lbzx %2, %1.reg1, %1.reg2 + +/* Write byte */ + from GPR to IND_RC_B gen COMMENT("move GPR->IND_RC_B") stb %1, {GPRINDIRECT, %2.reg, %2.off} -/* Read/write halfword (short) */ + from GPR to IND_RR_B + gen + COMMENT("move GPR->IND_RR_B") + stbx %1, %2.reg1, %2.reg2 + +/* Read halfword (short) */ from IND_RC_H to GPR gen COMMENT("move IND_RC_H->GPR") lhz %2, {GPRINDIRECT, %1.reg, %1.off} + from IND_RR_H to GPR + gen + COMMENT("move IND_RR_H->GPR") + lhzx %2, %1.reg1, %1.reg2 + from IND_RC_H_S to GPR gen COMMENT("move IND_RC_H_S->GPR") lha %2, {GPRINDIRECT, %1.reg, %1.off} + from IND_RR_H_S to GPR + gen + COMMENT("move IND_RR_H_S->GPR") + lhax %2, %1.reg1, %1.reg2 + +/* Write halfword */ + from GPR to IND_RC_H gen COMMENT("move GPR->IND_RC_H") sth %1, {GPRINDIRECT, %2.reg, %2.off} + from GPR to IND_RR_H + gen + COMMENT("move GPR->IND_RR_H") + sthx %1, %2.reg1, %2.reg2 + /* Read word */ from IND_RC_W to GPR @@ -670,7 +709,7 @@ STACKINGRULES move %1, RSCRATCH stwu RSCRATCH, {GPRINDIRECT, SP, 0-4} - from IND_ALL_W to STACK + from IND_ALL_BHW to STACK gen move %1, RSCRATCH stwu RSCRATCH, {GPRINDIRECT, SP, 0-4} @@ -771,7 +810,7 @@ COERCIONS addi SP, SP, {CONST, 4} yields %a - from IND_ALL_W + from IND_ALL_BHW uses REG gen move %1, %a @@ -1048,55 +1087,29 @@ PATTERNS pat loi $1==INT8 /* Load byte indirect */ with GPR - uses REG - gen - lbz %a, {GPRINDIRECT, %1, 0} - yields %a + yields {IND_RC_B, %1, 0} with SUM_RR - uses reusing %1, REG - gen - lbzx %a, %1.reg1, %1.reg2 - yields %a + yields {IND_RR_B, %1.reg1, %1.reg2} with SUM_RC - uses REG - gen - move {IND_RC_B, %1.reg, %1.off}, %a - yields %a - - pat loi loc loc cii $1==INT16 && $2==INT16 && $3==INT32 /* Load half-word indirect and sign extend */ + yields {IND_RC_B, %1.reg, %1.off} + + pat loi loc loc cii $1==INT16 && $2==INT16 && $3==INT32 + /* Load half-word indirect and sign extend */ with GPR - uses REG - gen - lha %a, {GPRINDIRECT, %1, 0} - yields %a + yields {IND_RC_H_S, %1, 0} with SUM_RR - uses reusing %1, REG - gen - lhax %a, %1.reg1, %1.reg2 - yields %a + yields {IND_RR_H_S, %1.reg1, %1.reg2} with SUM_RC - uses REG - gen - move {IND_RC_H_S, %1.reg, %1.off}, %a - yields %a - + yields {IND_RC_H_S, %1.reg, %1.off} + pat loi $1==INT16 /* Load half-word indirect */ with GPR - uses REG - gen - lhz %a, {GPRINDIRECT, %1, 0} - yields %a + yields {IND_RC_H, %1, 0} with SUM_RR - uses reusing %1, REG - gen - lhzx %a, %1.reg1, %1.reg2 - yields %a + yields {IND_RR_H, %1.reg1, %1.reg2} with SUM_RC - uses REG - gen - move {IND_RC_H, %1.reg, %1.off}, %a - yields %a - + yields {IND_RC_H, %1.reg, %1.off} + pat loi $1==INT32 /* Load word indirect */ with GPR yields {IND_RC_W, %1, 0} @@ -1123,73 +1136,93 @@ PATTERNS kills ALL gen bl {LABEL, ".los"} - + pat sti $1==INT8 /* Store byte indirect */ with GPR GPR + kills MEMORY gen stb %2, {GPRINDIRECT, %1, 0} with SUM_RR GPR + kills MEMORY gen stbx %2, %1.reg1, %1.reg2 with SUM_RC GPR + kills MEMORY gen move %2, {IND_RC_B, %1.reg, %1.off} with GPR SEX_B + kills MEMORY gen stb %2.reg, {GPRINDIRECT, %1, 0} with SUM_RR SEX_B + kills MEMORY gen stbx %2.reg, %1.reg1, %1.reg2 with SUM_RC SEX_B + kills MEMORY gen move %2.reg, {IND_RC_B, %1.reg, %1.off} pat sti $1==INT16 /* Store half-word indirect */ with GPR GPR + kills MEMORY gen sth %2, {GPRINDIRECT, %1, 0} with SUM_RR GPR + kills MEMORY gen sthx %2, %1.reg1, %1.reg2 with SUM_RC GPR + kills MEMORY gen move %2, {IND_RC_H, %1.reg, %1.off} with GPR SEX_H + kills MEMORY gen sth %2.reg, {GPRINDIRECT, %1, 0} with SUM_RR SEX_H + kills MEMORY gen sthx %2.reg, %1.reg1, %1.reg2 with SUM_RC SEX_H + kills MEMORY gen move %2.reg, {IND_RC_H, %1.reg, %1.off} pat sti $1==INT32 /* Store word indirect */ with GPR GPR+FSREG + kills MEMORY gen move %2, {IND_RC_W, %1, 0} with SUM_RR GPR+FSREG + kills MEMORY gen move %2, {IND_RR_W, %1.reg1, %1.reg2} with SUM_RC GPR+FSREG + kills MEMORY gen move %2, {IND_RC_W, %1.reg, %1.off} pat sti $1==INT64 /* Store double-word indirect */ with GPR FREG + kills MEMORY gen move %2, {IND_RC_D, %1, 0} with SUM_RR FREG + kills MEMORY gen move %2, {IND_RR_D, %1.reg1, %1.reg2} with SUM_RC FREG + kills MEMORY gen move %2, {IND_RC_D, %1.reg, %1.off} with GPR GPR GPR + kills MEMORY gen stw %2, {GPRINDIRECT, %1, 0} stw %3, {GPRINDIRECT, %1, 4} with SUM_RC GPR GPR + kills MEMORY gen move %2, {IND_RC_W, %1.reg, %1.off} move %3, {IND_RC_W, %1.reg, %1.off+4} @@ -1198,8 +1231,8 @@ PATTERNS leaving loc $1 sts INT32 - - pat sts /* Load arbitrary size */ + + pat sts /* Store arbitrary size */ with GPR3 GPR4 STACK kills ALL gen From f33b30ed3c185bd784b595ec2cc4d1ce8b29d897 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Mon, 17 Oct 2016 00:39:59 -0400 Subject: [PATCH 22/25] Rewrite .fif8 to avoid powerpc64 fctid This fixes the SIGILL (illegal instruction) in startrek when firing phasers. The 32-bit processors in my PowerPC Mac and in QEMU don't have fctid, a 64-bit instruction. I got the idea from mach/proto/fp/fif8.c to extract the exponent, clear some bits to get an integer, then subtract the integer from the original value to get the fraction. --- mach/powerpc/libem/fif8.s | 93 ++++++++++++++++++++++++++------------- mach/powerpc/ncg/table | 13 +++--- 2 files changed, 70 insertions(+), 36 deletions(-) diff --git a/mach/powerpc/libem/fif8.s b/mach/powerpc/libem/fif8.s index 052c38cf2..a26c77830 100644 --- a/mach/powerpc/libem/fif8.s +++ b/mach/powerpc/libem/fif8.s @@ -1,38 +1,71 @@ -# -! $Source$ -! $State$ -! $Revision$ - #include "powerpc.h" - + .sect .text -! Multiplies two floats, and returns the fraction and integer. +! Multiplies two double-precision floats, then splits the product into +! integer and fraction, like modf(3) in C. On entry: +! f1 = float +! f2 = other float +! Yields: +! f1 = fraction +! f2 = integer +! Kills: cr0 f1 f2 r3 r4 r5 r6 .define .fif8 .fif8: - lfd f0, 8(sp) - lfd f1, 0(sp) - fmul f0, f0, f1 - fabs f1, f0 ! f0 = result - - ! The following chunk does f1 = floor(f1). See page 158 of the book. - - mtfsfi cr7, 3 ! set rounding mode to -inf. - mtfsb0 23 - fctid f2, f1 - fcfid f2, f2 - mcrfs cr7, cr5 - bc IFFALSE, 31, toobig - fmr f1, f2 -toobig: + fmul f1, f1, f2 + stfdu f1, -8(sp) ! push f1 = product + lwz r3, 0(sp) ! r3 = high word + lwz r4, 4(sp) ! r4 = low word - fabs f2, f1 ! f2 = fabs(f1) - fsub f2, f2, f1 - stfd f2, 8(sp) - - fneg f2, f1 - fsel f2, f0, f1, f2 - stfd f2, 0(sp) - + ! IEEE double-precision format: + ! sign exponent fraction + ! 0 1..11 12..63 + ! Subtract 1023 from the IEEE exponent. If the result is from + ! 0 to 51, then the IEEE fraction has that many integer bits. + ! (IEEE has an implicit 1 before its fraction. If the IEEE + ! fraction has 0 integer bits, we still have an integer.) + rlwinm r5, r3, 12, 21, 31 ! r5 = IEEE exponent + addic. r5, r5, -1023 ! r5 = nr of integer bits + bc IFTRUE, LT, no_int + cmpi cr0, 0, r5, 21 + bc IFTRUE, LT, small_int + cmpi cr0, 0, r5, 52 + bc IFTRUE, LT, big_int + + ! f1 is an integer without fraction. Jump to calculate + ! fraction f1 = f2 - f1. It will be zero (or perhaps NaN). + fmr f2, f1 + b subtract + +no_int: + ! f1 is a fraction without integer. + fsub f2, f1, f1 ! integer = zero + b done + +small_int: + ! f1 has r5 = 0 to 20 integer bits in the IEEE fraction. + ! High word has 20 - r5 fraction bits. + addi r6, r0, 20 + subf r6, r5, r6 + srw r3, r3, r6 + addi r4, r0, 0 ! clear low word + slw r3, r3, r6 ! clear fraction in high word + b move_int + +big_int: + ! f1 has r5 = 21 to 51 to integer bits. + ! Low word has 52 - r5 fraction bits. + addi r6, r0, 52 + subf r6, r5, r6 + srw r4, r4, r6 + slw r4, r4, r6 ! clear fraction in low word +move_int: + stw r3, 0(sp) + stw r4, 4(sp) + lfd f2, 0(sp) ! f2 = integer +subtract: + fsub f1, f1, f2 ! fraction = value - integer +done: + addi sp, sp, 8 ! restore stack pointer bclr ALWAYS, 0, 0 diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 4a99c9d61..08ddd7d2e 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -2141,16 +2141,17 @@ PATTERNS with STACK gen bl {LABEL, ".cuf8"} - - pat fef $1==INT64 /* Split double */ + + pat fef $1==INT64 /* Split exponent, fraction */ with GPR3 GPR4 kills FPR0, FPR1, GPR6, GPR7 gen bl {LABEL, ".fef8"} yields R4 R3 R5 - - pat fif $1==INT64 /* Multiply and split double (?) */ - with STACK + + pat fif $1==INT64 /* Multiply then split integer, fraction */ + with FPR1 FPR2 + kills FPR1, FPR2, GPR3, GPR4, GPR5, GPR6 gen bl {LABEL, ".fif8"} - + yields F1 F2 From c7b68033ef0394d6583d5799a9f57becdbf3d509 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Mon, 17 Oct 2016 14:57:21 -0400 Subject: [PATCH 23/25] Add costs to powerpc instructions. Also show how andi., andis., or., set condition codes. --- mach/powerpc/ncg/table | 122 ++++++++++++++++++++++------------------- 1 file changed, 67 insertions(+), 55 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 08ddd7d2e..f9612664e 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -274,14 +274,26 @@ SETS INSTRUCTIONS + /* We give time as cycles of total latency from Freescale + * Semiconductor, MPC7450 RISC Microprocessor Family Reference + * Manual, Rev. 5, section 6.6. + * + * We have only 4-byte alignment for doubles; 8-byte alignment is + * optimal. We guess the misalignment penalty by adding 1 cycle to + * the cost of loading or storing a double: + * lfd lfdu lfdx: 4 -> 5 + * stfd stfdu stfdx: 3 -> 4 + */ + cost(4, 1) /* space, time */ + add GPR:wo, GPR:ro, GPR:ro. addX "add." GPR:wo, GPR:ro, GPR:ro. addi GPR:wo, GPR:ro, CONST:ro. addis GPR:wo, GPR:ro, CONST:ro. and GPR:wo, GPR:ro, GPR:ro. andc GPR:wo, GPR:ro, GPR:ro. - andiX "andi." GPR:wo, GPR:ro, CONST:ro kills :cc. - andisX "andis." GPR:wo, GPR:ro, CONST:ro kills :cc. + andiX "andi." GPR:wo:cc, GPR:ro, CONST:ro. + andisX "andis." GPR:wo:cc, GPR:ro, CONST:ro. b LABEL:ro. bc CONST:ro, CONST:ro, LABEL:ro. bcctr CONST:ro, CONST:ro, CONST:ro. @@ -292,77 +304,77 @@ INSTRUCTIONS cmpi CR:ro, CONST:ro, GPR:ro, CONST:ro kills :cc. cmpl CR:ro, CONST:ro, GPR:ro, GPR:ro kills :cc. cmpli CR:ro, CONST:ro, GPR:ro, CONST:ro kills :cc. - divw GPR:wo, GPR:ro, GPR:ro. - divwu GPR:wo, GPR:ro, GPR:ro. + divw GPR:wo, GPR:ro, GPR:ro cost(4, 23). + divwu GPR:wo, GPR:ro, GPR:ro cost(4, 23). eqv GPR:wo, GPR:ro, GPR:ro. extsb GPR:wo, GPR:ro. extsh GPR:wo, GPR:ro. - fadd FREG:wo, FREG:ro, FREG:ro. - fadds FSREG:wo, FSREG:ro, FSREG:ro. - fcmpo CR:wo, FPR:ro, FPR:ro. - fdiv FREG:wo, FREG:ro, FREG:ro. - fdivs FSREG:wo, FSREG:ro, FSREG:ro. - fneg FREG:wo, FREG:ro. - fneg FSREG:wo, FSREG:ro. - fmul FREG:wo, FREG:ro, FREG:ro. - fmuls FSREG:wo, FSREG:ro, FSREG:ro. - frsp FSREG:wo, FREG:ro. - fsub FREG:wo, FREG:ro, FREG:ro. - fsubs FSREG:wo, FSREG:ro, FSREG:ro. - fmr FPR:wo, FPR:ro. - fmr FSREG:wo, FSREG:ro. - lbz GPR:wo, GPRINDIRECT:ro. - lbzx GPR:wo, GPR:ro, GPR:ro. - lfd FPR:wo, GPRINDIRECT:ro. - lfdu FPR:wo, GPRINDIRECT:ro. - lfdx FPR:wo, GPR:ro, GPR:ro. - lfs FSREG:wo, GPRINDIRECT:ro. - lfsu FSREG:wo, GPRINDIRECT:rw. - lfsx FSREG:wo, GPR:ro, GPR:ro. - lha GPR:wo, GPRINDIRECT:ro. - lhax GPR:wo, GPR:ro, GPR:ro. - lhz GPR:wo, GPRINDIRECT:ro. - lhzx GPR:wo, GPR:ro, GPR:ro. - li32 GPR:wo, LABEL:ro. - lwzu GPR:wo, GPRINDIRECT:ro. - lwzx GPR:wo, GPR:ro, GPR:ro. - lwz GPR:wo, GPRINDIRECT:ro. + fadd FREG:wo, FREG:ro, FREG:ro cost(4, 5). + fadds FSREG:wo, FSREG:ro, FSREG:ro cost(4, 5). + fcmpo CR:wo, FPR:ro, FPR:ro cost(4, 5). + fdiv FREG:wo, FREG:ro, FREG:ro cost(4, 35). + fdivs FSREG:wo, FSREG:ro, FSREG:ro cost(4, 21). + fmr FPR:wo, FPR:ro cost(4, 5). + fmr FSREG:wo, FSREG:ro cost(4, 5). + fmul FREG:wo, FREG:ro, FREG:ro cost(4, 5). + fmuls FSREG:wo, FSREG:ro, FSREG:ro cost(4, 5). + fneg FREG:wo, FREG:ro cost(4, 5). + fneg FSREG:wo, FSREG:ro cost(4, 5). + frsp FSREG:wo, FREG:ro cost(4, 5). + fsub FREG:wo, FREG:ro, FREG:ro cost(4, 5). + fsubs FSREG:wo, FSREG:ro, FSREG:ro cost(4, 5). + lbz GPR:wo, GPRINDIRECT:ro cost(4, 3). + lbzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). + lfd FPR:wo, GPRINDIRECT:ro cost(4, 5). + lfdu FPR:wo, GPRINDIRECT:ro cost(4, 5). + lfdx FPR:wo, GPR:ro, GPR:ro cost(4, 5). + lfs FSREG:wo, GPRINDIRECT:ro cost(4, 4). + lfsu FSREG:wo, GPRINDIRECT:rw cost(4, 4). + lfsx FSREG:wo, GPR:ro, GPR:ro cost(4, 4). + lha GPR:wo, GPRINDIRECT:ro cost(4, 3). + lhax GPR:wo, GPR:ro, GPR:ro cost(4, 3). + lhz GPR:wo, GPRINDIRECT:ro cost(4, 3). + lhzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). + li32 GPR:wo, LABEL:ro cost(8, 2). + lwzu GPR:wo, GPRINDIRECT:ro cost(4, 3). + lwzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). + lwz GPR:wo, GPRINDIRECT:ro cost(4, 3). nand GPR:wo, GPR:ro, GPR:ro. neg GPR:wo, GPR:ro. nor GPR:wo, GPR:ro, GPR:ro. - mfcr GPR:wo. - mullw GPR:wo, GPR:ro, GPR:ro. - mfspr GPR:wo, SPR:ro. - mtspr SPR:wo, GPR:ro. + mfcr GPR:wo cost(4,2). + mullw GPR:wo, GPR:ro, GPR:ro cost(4, 4). + mfspr GPR:wo, SPR:ro cost(4, 3). + mtspr SPR:wo, GPR:ro cost(4, 2). or GPR:wo, GPR:ro, GPR:ro. orc GPR:wo, GPR:ro, GPR:ro. ori GPR:wo, GPR:ro, CONST:ro. oris GPR:wo, GPR:ro, CONST:ro. - orX "or." GPR:wo, GPR:ro, GPR:ro kills :cc. + orX "or." GPR:wo:cc, GPR:ro, GPR:ro. rlwinm GPR:wo, GPR:ro, CONST:ro, CONST:ro, CONST:ro. slw GPR:wo, GPR:ro, GPR:ro. subf GPR:wo, GPR:ro, GPR:ro. - sraw GPR:wo, GPR:ro, GPR:ro. - srawi GPR:wo, GPR:ro, CONST:ro. + sraw GPR:wo, GPR:ro, GPR:ro cost(4, 2). + srawi GPR:wo, GPR:ro, CONST:ro cost(4, 2). srw GPR:wo, GPR:ro, GPR:ro. - stb GPR:ro, GPRINDIRECT:rw. - stbx GPR:ro, GPR:ro, GPR:ro. - stfd FPR:ro, GPRINDIRECT:rw. - stfdu FPR:ro, GPRINDIRECT:rw. - stfdx FPR:ro, GPR:ro, GPR:ro. - stfs FSREG:ro, GPRINDIRECT:rw. - stfsu FSREG:ro, GPRINDIRECT:rw. - stfsx FSREG:ro, GPR:ro, GPR:ro. - sth GPR:ro, GPRINDIRECT:rw. - sthx GPR:ro, GPR:ro, GPR:ro. - stw GPR:ro, GPRINDIRECT:rw. - stwx GPR:ro, GPR:ro, GPR:ro. - stwu GPR+GPRE:ro, GPRINDIRECT:rw. + stb GPR:ro, GPRINDIRECT:rw cost(4, 3). + stbx GPR:ro, GPR:ro, GPR:ro cost(4, 3). + stfd FPR:ro, GPRINDIRECT:rw cost(4, 4). + stfdu FPR:ro, GPRINDIRECT:rw cost(4, 4). + stfdx FPR:ro, GPR:ro, GPR:ro cost(4, 4). + stfs FSREG:ro, GPRINDIRECT:rw cost(4, 3). + stfsu FSREG:ro, GPRINDIRECT:rw cost(4, 3). + stfsx FSREG:ro, GPR:ro, GPR:ro cost(4, 3). + sth GPR:ro, GPRINDIRECT:rw cost(4, 3). + sthx GPR:ro, GPR:ro, GPR:ro cost(4, 3). + stw GPR:ro, GPRINDIRECT:rw cost(4, 3). + stwx GPR:ro, GPR:ro, GPR:ro cost(4, 3). + stwu GPR+GPRE:ro, GPRINDIRECT:rw cost(4, 3). xor GPR:wo, GPR:ro, GPR:ro. xori GPR:wo, GPR:ro, CONST:ro. xoris GPR:wo, GPR:ro, CONST:ro. - comment "!" LABEL:ro. + comment "!" LABEL:ro cost(0, 0). From cfbc537959dcb03941e0e1b4e9b6751c077fa66d Mon Sep 17 00:00:00 2001 From: George Koehler Date: Mon, 17 Oct 2016 20:31:59 -0400 Subject: [PATCH 24/25] In powerpc ncg, add a speed hack for sti 8. ncg is too slow with this many registers. A stack pattern "with GPR GPR GPR" or "with REG REG REG" takes too long to pick registers, causing ncg 8 to take about 2 seconds on each sti 8. I introduce REG_PAIR and there are only 4 such pairs. For programs that use sti 8 (including C programs that copy 8-byte structs), this speed hack improves the ncg run from several seconds to almost instantaneous. Also add a few COMMENT(...) lines in stacking rules. --- mach/powerpc/ncg/table | 78 +++++++++++++++++++++++++++++++++++------- 1 file changed, 65 insertions(+), 13 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index f9612664e..5617bd272 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -42,6 +42,7 @@ PROPERTIES GPR /* any GPR */ REG /* any allocatable GPR */ + REG_PAIR /* speed hack for sti 8 */ FPR /* any FPR */ FREG /* any allocatable FPR */ FSREG /* any allocatable single-precision FPR */ @@ -97,6 +98,12 @@ REGISTERS SP("sp") : GPR, GPRSP. R0("r0") : GPR, GPR0. + /* speed hack for sti 8 */ + PAIR_R9_R10=R9+R10 : REG_PAIR. + PAIR_R7_R8=R7+R8 : REG_PAIR. + PAIR_R5_R6=R5+R6 : REG_PAIR. + PAIR_R3_R4=R3+R4 : REG_PAIR. + F31("f31") : FPR, FREG, FPR31. F30("f30") : FPR, FREG, FPR30. F29("f29") : FPR, FREG, FPR29. @@ -698,6 +705,12 @@ STACKINGRULES COMMENT("stack REG") stwu %1, {GPRINDIRECT, SP, 0-4} + from REG_PAIR to STACK + gen + COMMENT("stack REG_PAIR") + stwu %1.2, {GPRINDIRECT, SP, 0-4} + stwu %1.1, {GPRINDIRECT, SP, 0-4} + from CONST_ALL + LABEL to STACK gen COMMENT("stack CONST_ALL + LABEL") @@ -718,16 +731,19 @@ STACKINGRULES from SUM_ALL + TRISTATE_ALL + LOGICAL_ALL to STACK gen + COMMENT("stack SUM_ALL + TRISTATE_ALL + LOGICAL_ALL") move %1, RSCRATCH stwu RSCRATCH, {GPRINDIRECT, SP, 0-4} from IND_ALL_BHW to STACK gen + COMMENT("stack IND_ALL_BHW") move %1, RSCRATCH stwu RSCRATCH, {GPRINDIRECT, SP, 0-4} from IND_ALL_D to STACK gen + COMMENT("stack IND_ALL_D") move %1, FSCRATCH stfdu FSCRATCH, {GPRINDIRECT, SP, 0-8} @@ -773,7 +789,16 @@ COERCIONS lwz %a, {GPRINDIRECT, SP, 0} addi SP, SP, {CONST, 4} yields %a - + + from STACK + uses REG_PAIR + gen + COMMENT("coerce STACK->REG_PAIR") + lwz %a.1, {GPRINDIRECT, SP, 0} + lwz %a.2, {GPRINDIRECT, SP, 4} + addi SP, SP, {CONST, 8} + yields %a + from SEX_B uses REG gen @@ -821,26 +846,31 @@ COERCIONS lfs %a, {GPRINDIRECT, SP, 0} addi SP, SP, {CONST, 4} yields %a - + from IND_ALL_BHW uses REG gen move %1, %a yields %a - + from IND_ALL_W uses FSREG gen move %1, %a yields %a - + + /* + * from IND_RC_D to REG_PAIR is not possible, because + * %1.off+4 might overflow a signed 16-bit integer in + * move {IND_RC_W, %1.val, %1.off+4}, %a.2 + */ + from IND_ALL_D uses FREG gen move %1, %a yields %a - - + PATTERNS @@ -1216,7 +1246,7 @@ PATTERNS move %2, {IND_RC_W, %1.reg, %1.off} pat sti $1==INT64 /* Store double-word indirect */ - with GPR FREG + with REG FREG kills MEMORY gen move %2, {IND_RC_D, %1, 0} @@ -1228,16 +1258,38 @@ PATTERNS kills MEMORY gen move %2, {IND_RC_D, %1.reg, %1.off} - with GPR GPR GPR + /* + * This pattern would be too slow: + * with REG REG REG + * ncg can't handle that many registers, and would + * take about 2 seconds on each sti 8. So we use + * REG_PAIR as a speed hack for sti 8. + */ + with REG REG_PAIR kills MEMORY gen - stw %2, {GPRINDIRECT, %1, 0} - stw %3, {GPRINDIRECT, %1, 4} - with SUM_RC GPR GPR + move %2.1, {IND_RC_W, %1, 0} + move %2.2, {IND_RC_W, %1, 4} + /* + * Next 2 patterns exist because there is no coercion + * from IND_ALL_D to REG_PAIR. + */ + with REG IND_RC_D kills MEMORY + uses REG={SUM_RC, %2.reg, %2.off}, REG_PAIR gen - move %2, {IND_RC_W, %1.reg, %1.off} - move %3, {IND_RC_W, %1.reg, %1.off+4} + move {IND_RC_W, %a, 0}, %b.1 + move {IND_RC_W, %a, 4}, %b.2 + move %b.1, {IND_RC_W, %1, 0} + move %b.2, {IND_RC_W, %1, 4} + with REG IND_RR_D + kills MEMORY + uses REG={SUM_RR, %2.reg1, %2.reg2}, REG_PAIR + gen + move {IND_RC_W, %a, 0}, %b.1 + move {IND_RC_W, %a, 4}, %b.2 + move %b.1, {IND_RC_W, %1, 0} + move %b.2, {IND_RC_W, %1, 4} pat sti /* Store arbitrary size */ leaving From 99dee0ad24b454d561e6348d162c54769fb21786 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Tue, 18 Oct 2016 21:16:47 -0400 Subject: [PATCH 25/25] Remove f14 to f31 from FREG and FSREG. This would have happened later, if f14 to f31 became regvar (like r13 to r31 are now). I am doing it now because ncg is too slow for rules "with FREG FREG uses FREG". We use such rules for adf 8 and other EM instructions that operate on 2 floats. Like my last commit cfbc537, this commit speeds ncg by removing choices for register allocation. --- mach/powerpc/ncg/table | 60 ++++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 37 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 5617bd272..ed107aceb 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -104,24 +104,29 @@ REGISTERS PAIR_R5_R6=R5+R6 : REG_PAIR. PAIR_R3_R4=R3+R4 : REG_PAIR. - F31("f31") : FPR, FREG, FPR31. - F30("f30") : FPR, FREG, FPR30. - F29("f29") : FPR, FREG, FPR29. - F28("f28") : FPR, FREG, FPR28. - F27("f27") : FPR, FREG, FPR27. - F26("f26") : FPR, FREG, FPR26. - F25("f25") : FPR, FREG, FPR25. - F24("f24") : FPR, FREG, FPR24. - F23("f23") : FPR, FREG, FPR23. - F22("f22") : FPR, FREG, FPR22. - F21("f21") : FPR, FREG, FPR21. - F20("f20") : FPR, FREG, FPR20. - F19("f19") : FPR, FREG, FPR19. - F18("f18") : FPR, FREG, FPR18. - F17("f17") : FPR, FREG, FPR17. - F16("f16") : FPR, FREG, FPR16. - F15("f15") : FPR, FREG, FPR15. - F14("f14") : FPR, FREG, FPR14. + /* + * F14 to F31 are reserved for regvar, if we ever implement + * it. Don't add them to FREG; the register allocator would + * be too slow. + */ + F31("f31") : FPR, FPR31. + F30("f30") : FPR, FPR30. + F29("f29") : FPR, FPR29. + F28("f28") : FPR, FPR28. + F27("f27") : FPR, FPR27. + F26("f26") : FPR, FPR26. + F25("f25") : FPR, FPR25. + F24("f24") : FPR, FPR24. + F23("f23") : FPR, FPR23. + F22("f22") : FPR, FPR22. + F21("f21") : FPR, FPR21. + F20("f20") : FPR, FPR20. + F19("f19") : FPR, FPR19. + F18("f18") : FPR, FPR18. + F17("f17") : FPR, FPR17. + F16("f16") : FPR, FPR16. + F15("f15") : FPR, FPR15. + F14("f14") : FPR, FPR14. F13("f13") : FPR, FREG, FPR13. F12("f12") : FPR, FREG, FPR12. F11("f11") : FPR, FREG, FPR11. @@ -137,24 +142,6 @@ REGISTERS F1("f1") : FPR, FREG, FPR1. F0("f0") : FPR, FPR0. - FS31("f31")=F31 : FSREG. - FS30("f30")=F30 : FSREG. - FS29("f29")=F29 : FSREG. - FS28("f28")=F28 : FSREG. - FS27("f27")=F27 : FSREG. - FS26("f26")=F26 : FSREG. - FS25("f25")=F25 : FSREG. - FS24("f24")=F24 : FSREG. - FS23("f23")=F23 : FSREG. - FS22("f22")=F22 : FSREG. - FS21("f21")=F21 : FSREG. - FS20("f20")=F20 : FSREG. - FS19("f19")=F19 : FSREG. - FS18("f18")=F18 : FSREG. - FS17("f17")=F17 : FSREG. - FS16("f16")=F16 : FSREG. - FS15("f15")=F15 : FSREG. - FS14("f14")=F14 : FSREG. FS13("f13")=F13 : FSREG. FS12("f12")=F12 : FSREG. FS11("f11")=F11 : FSREG. @@ -168,7 +155,6 @@ REGISTERS FS3("f3")=F3 : FSREG. FS2("f2")=F2 : FSREG. FS1("f1")=F1 : FSREG. - /* FS0("f0")=F0 */ LR("lr") : SPR. CTR("ctr") : SPR.