From ba9b021253488207fb38defe36cc60b564135661 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Mon, 13 Feb 2017 15:22:00 -0500 Subject: [PATCH] Use .los4 in lar 4 and .sts4 in sar 4. Our libem had two implementations of loading a block from a stack, one for lar 4 and one for los 4. Now lar 4 and los 4 share the code in .los4. Likewise, sar 4 and sts 4 share the code in .sts4. Rename .los to .los4 and .sts to .sts4, because they implement los 4 and sts 4. Remove the special case for loading or storing 4 bytes, because we can do it with 1 iteration of the loop. Remove the lines to "align size" where the size must already be a multiple of 4. Fix the upper bound check in .aar4. Change .aar4, .lar4, .los4, .sar4, .sts4 to pass all operands on the real stack, except that .los4 and .sts4 take the size in register r3. Have .aar4 set r3 to the size of the array element. So lar 4 is just .aar4 then .los4, and sar 4 is just .aar4 then .sts4. ncg no longer calls .lar4 and .sar4 in libem, because it inlines the code; but I keep .lar4 and .sar4 in libem, because mcg references them. They might or might not work in mcg. --- mach/powerpc/libem/aar4.s | 34 ++++++++++++------------ mach/powerpc/libem/build.lua | 2 +- mach/powerpc/libem/lar4.s | 32 +++-------------------- mach/powerpc/libem/los.s | 47 --------------------------------- mach/powerpc/libem/los4.s | 34 ++++++++++++++++++++++++ mach/powerpc/libem/sar4.s | 34 +++--------------------- mach/powerpc/libem/sts.s | 50 ------------------------------------ mach/powerpc/libem/sts4.s | 35 +++++++++++++++++++++++++ mach/powerpc/ncg/table | 50 ++++++++++++++++++------------------ 9 files changed, 118 insertions(+), 200 deletions(-) delete mode 100644 mach/powerpc/libem/los.s create mode 100644 mach/powerpc/libem/los4.s delete mode 100644 mach/powerpc/libem/sts.s create mode 100644 mach/powerpc/libem/sts4.s diff --git a/mach/powerpc/libem/aar4.s b/mach/powerpc/libem/aar4.s index 5e4155091..fc8620d02 100644 --- a/mach/powerpc/libem/aar4.s +++ b/mach/powerpc/libem/aar4.s @@ -1,14 +1,9 @@ .sect .text -! Index into a bounds-checked array. +! Get address of element of bounds-checked array. ! -! On entry: -! r3 = ptr to descriptor -! r4 = index -! r5 = address of array -! Yields: -! r3 = address of element -! r0 = size of element (used by .lar4, .sar4) +! Stack: ( array-adr index descriptor-adr -- element-adr ) +! Sets r3 = size of element for .los4, .sts4 ! Preserves r10 for .lar4, .sar4 .define .aar4 @@ -17,16 +12,21 @@ ori r0, r0, lo16[.trap_earray] mtspr ctr, r0 ! load CTR with trap address - lwz r0, 0(r3) - subf. r4, r0, r4 ! adjust range + lwz r4, 0(sp) ! r4 = address of descriptor + lwz r5, 4(sp) ! r5 = index + lwz r6, 8(sp) ! r6 = address of array + + lwz r0, 0(r4) + subf. r5, r0, r5 ! subtract lower bound from index bltctr ! check lower bound - lwz r0, 4(r3) - cmplw r4, r3 - bgectr ! check upper bound - - lwz r0, 8(r3) - mullw r4, r4, r0 ! scale index - add r3, r4, r5 ! calculate element address + lwz r0, 4(r4) + cmplw r5, r0 + bgtctr ! check upper bound + lwz r3, 8(r4) ! r3 = size of element + mullw r5, r5, r3 ! scale index by size + add r6, r6, r5 + stw r6, 8(sp) ! push address of element + addi sp, sp, 8 blr diff --git a/mach/powerpc/libem/build.lua b/mach/powerpc/libem/build.lua index 466a28fb3..3d1a1062e 100644 --- a/mach/powerpc/libem/build.lua +++ b/mach/powerpc/libem/build.lua @@ -6,7 +6,7 @@ for _, plat in ipairs(vars.plats) do acklibrary { name = "lib_"..plat, srcs = { - "./*.s", + "./*.s", -- los4.s, sts4.s }, vars = { plat = plat }, deps = { diff --git a/mach/powerpc/libem/lar4.s b/mach/powerpc/libem/lar4.s index 2f5c3683c..27ae5a681 100644 --- a/mach/powerpc/libem/lar4.s +++ b/mach/powerpc/libem/lar4.s @@ -2,39 +2,13 @@ ! Load from bounds-checked array. ! -! On entry: -! r3 = ptr to descriptor -! r4 = index -! r5 = address of array +! Stack: ( array-adr index descriptor-adr -- element ) .define .lar4 .lar4: mfspr r10, lr bl .aar4 + ! pass r3 = size from .aar4 to .los4 + bl .los4 mtspr lr, r10 - ! r3 = ptr to element - ! r0 = size of element - - cmpwi r0, 1 - bne 1f - ! Load 1 byte. - lbz r4, 0(r3) - stwu r4, -4(sp) - blr -1: - cmpwi r0, 2 - bne 2f - ! Load 2 bytes. - lhz r4, 0(r3) - stwu r4, -4(sp) - blr -2: - ! Load r0 bytes, where r0 must be a positive multiple of 4. - subf sp, r0, sp ! move stack pointer down - or r5, r0, r0 ! index r5 = length r0 -3: - addic. r5, r5, -4 ! r5 -= 4 - lwzx r4, r5, r3 - stwx r4, r5, sp - bgt 3b ! loop if r5 > 0 blr diff --git a/mach/powerpc/libem/los.s b/mach/powerpc/libem/los.s deleted file mode 100644 index 2d412bce8..000000000 --- a/mach/powerpc/libem/los.s +++ /dev/null @@ -1,47 +0,0 @@ -.sect .text - -! Loads a variable-sized structure onto the stack. -! -! r3 = size -! r4 = address - -.define .los -.los: - ! These sizes are handled specially. - - cmplwi r3, 1 - ble size1 - - cmplwi r3, 2 - ble size2 - - cmplwi r3, 4 - ble size4 - - ! Variable-sized structure. - - addi r3, r3, 3 - clrrwi r3, r3, 2 ! align size - - add r4, r4, r3 ! adjust address to top of block - - srwi r3, r3, 2 ! convert size to the number of words - mtspr ctr, r3 - -1: - lwzu r5, -4(r4) - stwu r5, -4(sp) - bdnz 1b ! decrement CTR, jump if non-zero - blr - -size1: - lbz r3, 0(r4) - b 1f -size2: - lhz r3, 0(r4) - b 1f -size4: - lwz r3, 0(r4) -1: - stwu r3, -4(sp) - blr diff --git a/mach/powerpc/libem/los4.s b/mach/powerpc/libem/los4.s new file mode 100644 index 000000000..9b53d112c --- /dev/null +++ b/mach/powerpc/libem/los4.s @@ -0,0 +1,34 @@ +.sect .text + +! Loads a variable-sized block onto the stack. +! +! On entry: r3 = size +! Stack: ( address -- block ) +! Preserves r10 for .lar4 + +.define .los4 +.los4: + lwz r4, 0(sp) ! r4 = address + + ! Sizes 1 and 2 are handled specially. + cmplwi r3, 1 + ble 1f + cmplwi r3, 2 + ble 2f + + ! Else the size must be a multiple of 4. + srwi r5, r3, 2 + mtspr ctr, r5 ! ctr = number of words + addi sp, sp, 4 + add r4, r4, r3 ! adjust address to end of block +4: lwzu r5, -4(r4) + stwu r5, -4(sp) + bdnz 4b ! decrement ctr, jump if non-zero + blr + +1: lbz r5, 0(r4) + stw r5, 0(sp) + blr +2: lhz r5, 0(r4) + stw r5, 0(sp) + blr diff --git a/mach/powerpc/libem/sar4.s b/mach/powerpc/libem/sar4.s index 7c9778958..d8917ab4a 100644 --- a/mach/powerpc/libem/sar4.s +++ b/mach/powerpc/libem/sar4.s @@ -2,41 +2,13 @@ ! Store to bounds-checked array. ! -! On entry: -! r3 = ptr to descriptor -! r4 = index -! r5 = address of array +! Stack: ( element array-adr index descriptor-adr -- ) .define .sar4 .sar4: mfspr r10, lr bl .aar4 + ! pass r3 = size from .aar4 to .sts4 + bl .sts4 mtspr lr, r10 - ! r3 = ptr to element - ! r0 = size of element - - cmpwi r0, 1 - bne 1f - ! Store 1 byte. - lwz r4, 0(sp) - addi sp, sp, 4 - stb r4, 0(r3) - blr -1: - cmpwi r0, 2 - bne 2f - ! Store 2 bytes. - lwz r4, 0(sp) - addi sp, sp, 4 - sth r4, 0(r3) - blr -2: - ! Store r0 bytes, where r0 must be a positive multiple of 4. - or r5, r0, r0 ! index r5 = length r0 -3: - addic. r5, r5, -4 ! r5 -= 4 - lwzx r4, r5, sp - stwx r4, r5, r3 - bgt 3b ! loop if r5 > 0 - add sp, r0, sp ! move stack pointer up blr diff --git a/mach/powerpc/libem/sts.s b/mach/powerpc/libem/sts.s deleted file mode 100644 index 411b0fb66..000000000 --- a/mach/powerpc/libem/sts.s +++ /dev/null @@ -1,50 +0,0 @@ -.sect .text - -! Stores a variable-sized structure from the stack. -! -! r3 = size -! r4 = address - -.define .sts -.sts: - ! These sizes are handled specially. - - lwz r5, 0(sp) - - cmplwi r3, 1 - ble size1 - - cmplwi r3, 2 - ble size2 - - cmplwi r3, 4 - ble size4 - - ! Variable-sized structure. - - addi r3, r3, 3 - clrrwi r3, r3, 2 ! align size - - srwi r3, r3, 2 ! convert size to the number of words - mtspr ctr, r3 - -1: - lwz r5, 0(sp) - addi sp, sp, 4 - stw r5, 0(r4) - addi r4, r4, 4 - - bdnz 1b ! decrement CTR, jump if non-zero - blr - -size1: - stb r5, 0(r4) - b 1f -size2: - sth r5, 0(r4) - b 1f -size4: - stw r5, 0(r4) -1: - addi sp, sp, 4 - blr diff --git a/mach/powerpc/libem/sts4.s b/mach/powerpc/libem/sts4.s new file mode 100644 index 000000000..82bdc4db7 --- /dev/null +++ b/mach/powerpc/libem/sts4.s @@ -0,0 +1,35 @@ +.sect .text + +! Stores a variable-sized block from the stack. +! +! On entry: r3 = size +! Stack: ( block address -- ) +! Preserves r10 for .sar4 + +.define .sts4 +.sts4: + lwz r4, 0(sp) ! r4 = address + + ! Sizes 1 and 2 are handled specially. + cmplwi r3, 1 + ble 1f + cmplwi r3, 2 + ble 2f + + ! Else the size must be a multiple of 4. + srwi r5, r3, 2 + mtspr ctr, r5 ! ctr = number of words + addi r4, r4, -4 ! adjust address to before block +4: lwzu r5, 4(sp) + stwu r5, 4(r4) + bdnz 4b ! decrement ctr, jump if non-zero + addi sp, sp, 4 + blr + +1: lwz r5, 4(sp) + stb r5, 0(r4) + b 3f +2: lwz r5, 4(sp) + sth r5, 0(r4) +3: addi sp, sp, 8 + blr diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index a5ac86284..5dda77cdf 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -1042,13 +1042,13 @@ PATTERNS pat loi /* Load arbitrary size */ leaving loc $1 - los INT32 + los 4 - pat los $1==INT32 /* Load arbitrary size */ - with GPR3 GPR4 STACK + pat los $1==4 /* Load arbitrary size */ + with GPR3 STACK kills ALL gen - bl {LABEL, ".los"} + bl {LABEL, ".los4"} pat sti $1==INT8 /* Store byte indirect */ with REG REG @@ -1141,13 +1141,13 @@ PATTERNS pat sti /* Store arbitrary size */ leaving loc $1 - sts INT32 + sts 4 - pat sts $1==INT32 /* Store arbitrary size */ - with GPR3 GPR4 STACK + pat sts $1==4 /* Store arbitrary size */ + with GPR3 STACK kills ALL gen - bl {LABEL, ".sts"} + bl {LABEL, ".sts4"} /* Arithmetic wrappers */ @@ -1459,40 +1459,40 @@ PATTERNS yields %a - /* Arrays */ - pat aar $1==INT32 /* Index array */ - with GPR3 GPR4 GPR5 + pat aar $1==4 /* Address of array element */ + leaving + cal ".aar4" + + pat lar $1==4 /* Load from array */ + with STACK kills ALL gen bl {LABEL, ".aar4"} - yields R3 + /* pass r3 = size from .aar4 to .los4 */ + bl {LABEL, ".los4"} - pat lae lar $2==INT32 && nicesize(rom($1, 3)) /* Load array */ + pat lae lar $2==4 && nicesize(rom($1, 3)) leaving lae $1 - aar INT32 + aar 4 loi rom($1, 3) - pat lar $1==INT32 /* Load array */ - with GPR3 GPR4 GPR5 STACK + pat sar $1==4 /* Store to array */ + with STACK kills ALL gen - bl {LABEL, ".lar4"} + bl {LABEL, ".aar4"} + /* pass r3 = size from .aar4 to .sts4 */ + bl {LABEL, ".sts4"} - pat lae sar $2==INT32 && nicesize(rom($1, 3)) /* Store array */ + pat lae sar $2==4 && nicesize(rom($1, 3)) leaving lae $1 - aar INT32 + aar 4 sti rom($1, 3) - pat sar $1==INT32 /* Store array */ - with GPR3 GPR4 GPR5 STACK - kills ALL - gen - bl {LABEL, ".sar4"} - /* Sets */