Revise stack shuffles and integer conversions in PowerPC ncg.

Allow asp 4, exg 4 to shuffle tokens without coercing them into
registers; but comment why dup 4, dup 8 coerce tokens into registers.

Allow dup, dus, exg with larger sizes; and add tests dup_e.e and
exg_e.e to check that dup 20, dus, exg 20 work as well in powerpc as
in i80 and i86.

Then powerpc failed to compile loc 2 loc 4 cuu in dup_e.e.  Revise the
integer conversions, so powerpc can compile and pass the test.
This commit is contained in:
George Koehler 2017-12-09 17:21:06 -05:00
parent 48788287b8
commit 504d2aa34e
6 changed files with 338 additions and 53 deletions

View file

@ -6,7 +6,7 @@ for _, plat in ipairs(vars.plats) do
acklibrary {
name = "lib_"..plat,
srcs = {
"./*.s", -- rm ret.s
"./*.s", -- exg.s
},
vars = { plat = plat },
deps = {

22
mach/powerpc/libem/exg.s Normal file
View file

@ -0,0 +1,22 @@
.sect .text
! Exchange top two values on stack.
! Stack: ( a b size -- b a )
.define .exg
.exg:
lwz r3, 0(sp) ! r3 = size
srwi r7, r3, 2
mtspr ctr, r7 ! ctr = size / 4
mr r4, sp ! r4 = pointer before value b
add r5, r4, r3 ! r5 = pointer before value a
! Loop to swap each pair of words.
1: lwzu r6, 4(r4)
lwzu r7, 4(r5)
stw r6, 0(r5)
stw r7, 0(r4)
bdnz 1b ! loop ctr times
addi sp, sp, 4 ! drop size from stack
blr

View file

@ -217,6 +217,9 @@ SETS
XOR_RIS + XOR_RC + XOR_RR + NAND_RR + NOR_RR + EQV_RR +
XEQ + XNE + XGT + XGE + XLT + XLE.
/* any register or token of each size */
ANY4 = ANY_BHW + FSREG.
ANY8 = IND_ALL_D + FREG.
INSTRUCTIONS
@ -756,7 +759,7 @@ COERCIONS
PATTERNS
/* Intrinsics */
/* Constants */
pat loc $1==(0-0x8000) /* Load constant */
yields {CONST_N8000, $1}
@ -773,22 +776,78 @@ PATTERNS
pat loc
yields {CONST_HL, $1}
pat dup $1==INT32 /* Duplicate word on top of stack */
with REG
yields %1 %1
with FSREG
/* Stack shuffles */
/* The peephole optimizer does: loc $1 ass 4 -> asp $1
* To optimize multiplication, it uses: dup 8 asp 4
*/
pat asp $1==4 /* Adjust stack by constant */
with exact ANY4
/* drop %1 */
with STACK
gen addi sp, sp, {C, 4}
pat asp smalls($1)
with STACK
gen addi sp, sp, {C, $1}
pat asp lo($1)==0
with STACK
gen addi sp, sp, {C, hi($1)}
pat asp
with STACK
gen
addis sp, sp, {C, his($1)}
addi sp, sp, {C, los($1)}
pat ass $1==4 /* Adjust stack by variable */
with REG STACK
gen add sp, sp, %1
/* To duplicate a token, we coerce the token into a register,
* then duplicate the register. This decreases code size.
*/
pat dup $1==4 /* Duplicate word on top of stack */
with REG+FSREG
yields %1 %1
pat dup $1==INT64 /* Duplicate double-word on top of stack */
with REG REG
pat dup $1==8 /* Duplicate double-word */
with REG+FSREG REG+FSREG
yields %2 %1 %2 %1
with FREG
yields %1 %1
pat exg $1==INT32 /* Exchange top two words on stack */
with REG REG
pat dup /* Duplicate other size */
leaving
loc $1
dus 4
pat dus $1==4 /* Duplicate variable size */
with REG STACK
/* ( a size%1 -- a a ) */
uses REG, REG
gen
srwi %a, %1, {C, 2}
mtspr ctr, %a
add %b, sp, %1
1: lwzu %a, {IND_RC_W, %b, 0-4}
stwu %a, {IND_RC_W, sp, 0-4}
bdnz {LABEL, "1b"}
pat exg $1==4 /* Exchange top two words */
with ANY4 ANY4
yields %1 %2
pat exg defined($1) /* Exchange other size */
leaving
loc $1
cal ".exg"
pat exg !defined($1)
leaving
cal ".exg"
pat ste loe $1==$2 /* Store then load external */
leaving
dup 4
@ -797,32 +856,30 @@ PATTERNS
/* Type conversions */
pat loc loc ciu /* signed X -> unsigned X */
pat loc loc ciu /* signed -> unsigned */
leaving
loc $1
loc $2
cuu
pat loc loc cuu $1==$2 /* unsigned X -> unsigned X */
pat loc loc cui /* unsigned -> signed */
leaving
loc $1
loc $2
cuu
pat loc loc cuu $1<=4 && $2<=4 /* unsigned -> unsigned */
/* nop */
pat loc loc cii $1==$2 /* signed X -> signed X */
/* nop */
pat loc loc cii $1<=4 && $2<=$1
/* signed -> signed of smaller or same size,
* no sign extension */
pat loc loc cui $1==$2 /* unsigned X -> signed X */
/* nop */
pat loc loc cui $1==INT8 && $2==INT32 /* unsigned char -> signed int */
/* nop */
pat loc loc cui $1==INT16 && $2==INT32 /* unsigned short -> signed int */
/* nop */
pat loc loc cii $1==INT8 && $2==INT32 /* signed char -> signed int */
pat loc loc cii $1==1 && $2<=4 /* sign-extend char */
with REG
yields {SEX_B, %1}
pat loc loc cii $1==2 && $2==4 /* signed char -> signed short */
pat loc loc cii $1==2 && $2<=4 /* sign-extend short */
with REG
yields {SEX_H, %1}
@ -2038,33 +2095,6 @@ PATTERNS
gen
move %1, sp
pat loc ass $1==4 && $2==4 /* Drop 4 bytes from stack */
with exact REG
/* nop */
with STACK
gen
addi sp, sp, {C, 4}
pat ass $1==4 /* Adjust stack by variable amount */
with CONST2 STACK
gen
move {SUM_RC, sp, %1.val}, sp
with CONST_HZ STACK
gen
move {SUM_RC, sp, his(%1.val)}, sp
with CONST_STACK-CONST2-CONST_HZ STACK
gen
move {SUM_RC, sp, his(%1.val)}, sp
move {SUM_RC, sp, los(%1.val)}, sp
with REG STACK
gen
move {SUM_RR, sp, %1}, sp
pat asp /* Adjust stack by constant amount */
leaving
loc $1
ass 4
pat lae rck $2==4 /* Range check */
with REG
kills ALL

View file

@ -10,6 +10,8 @@ definerule("plat_testsuite",
-- target names will resolve there.
local testfiles = filenamesof(
"tests/plat/*.c",
"tests/plat/dup_e.e",
"tests/plat/exg_e.e",
"tests/plat/inn_e.e",
"tests/plat/rotate_e.e",
"tests/plat/*.p",

145
tests/plat/dup_e.e Normal file
View file

@ -0,0 +1,145 @@
#
mes 2, EM_WSIZE, EM_PSIZE
/*
* Test _dup_ and _dus_ by loading 20 bytes from _src_, then making
* and checking some duplicates.
*/
exa src
exa size
src
con 3593880729I4, 782166578I4, 4150666996I4, 2453272937I4, 3470523049I4
size
con 20I2
exp $check
exp $_m_a_i_n
pro $_m_a_i_n, 0
/* Push 3 copies of src on stack. */
lae src
loi 20 /* 1st copy */
dup 20 /* 2nd copy */
lae size
loi 2
loc 2
loc EM_WSIZE
cuu
dus EM_WSIZE /* 3rd copy */
cal $check
cal $finished
end /* $_m_a_i_n */
pro $check, 4 * EM_PSIZE + 2 * EM_WSIZE
#define p1 (-1 * EM_PSIZE)
#define p2 (-2 * EM_PSIZE)
#define p3 (-3 * EM_PSIZE)
#define p4 (-4 * EM_PSIZE)
#define b (p4 - 1 * EM_WSIZE)
#define i (p4 - 2 * EM_WSIZE)
/* Set pointers to all 4 copies. */
lae src
lal p4
sti EM_PSIZE /* p4 = src */
lal 0
lal p3
sti EM_PSIZE /* p3 = 3rd copy */
lal 20
lal p2
sti EM_PSIZE /* p2 = 2nd copy */
lal 40
lal p1
sti EM_PSIZE /* p1 = 1st copy */
/* Loop 20 times to verify each byte. */
loc 0
stl i
4
lal p4
loi EM_PSIZE
loi 1
loc 1
loc EM_WSIZE
cii
stl b /* b = byte from src */
lol b
lal p3
loi EM_PSIZE
loi 1 /* byte from 3rd copy */
loc 1
loc EM_WSIZE
cii
beq *3
loc (3 * 256)
lol i
adi EM_WSIZE
loc EM_WSIZE
loc 4
cuu
cal $fail
asp 4
3
lol b
lal p2
loi EM_PSIZE
loi 1 /* byte from 2nd copy */
loc 1
loc EM_WSIZE
cii
beq *2
loc (2 * 256)
lol i
adi EM_WSIZE
loc EM_WSIZE
loc 4
cuu
cal $fail
asp 4
2
lol b
lal p1
loi EM_PSIZE
loi 1 /* byte from 1st copy */
loc 1
loc EM_WSIZE
cii
beq *1
loc (1 * 256)
lol i
adi EM_WSIZE
loc EM_WSIZE
loc 4
cuu
cal $fail
asp 4
1
lal p4
loi EM_PSIZE
adp 1
lal p4
sti EM_PSIZE /* increment p4 */
lal p3
loi EM_PSIZE
adp 1
lal p3
sti EM_PSIZE /* increment p3 */
lal p2
loi EM_PSIZE
adp 1
lal p2
sti EM_PSIZE /* increment p2 */
lal p1
loi EM_PSIZE
adp 1
lal p1
sti EM_PSIZE /* increment p1 */
inl i
lol i
loc 20
blt *4 /* loop 20 times */
ret 0
end /* $check */

86
tests/plat/exg_e.e Normal file
View file

@ -0,0 +1,86 @@
#
mes 2, EM_WSIZE, EM_PSIZE
/*
* Test _exg_ by loading 40 bytes from _src_, then exchanging 20 and
* 20 bytes, and checking the result.
*/
exa src
src
con 1539465570I4, 1344465418I4, 1317578918I4, 1163467696I4, 2645261331I4
con 3981585269I4, 1433968975I4, 4256886989I4, 4114909542I4, 1817334375I4
exp $check
exp $_m_a_i_n
pro $_m_a_i_n, 0
lae src
loi 40
exg 20
cal $check
cal $finished
end /* $_m_a_i_n */
pro $check, 2 * EM_PSIZE + EM_WSIZE
#define p1 (-1 * EM_PSIZE)
#define p2 (-2 * EM_PSIZE)
#define i (p2 - EM_WSIZE)
lae src
lal p2
sti EM_PSIZE /* p2 = src */
lal 0
adp 20
lal p1
sti EM_PSIZE /* p1 = exchanged copy + 20 */
/* Loop 40 times to verify each byte. */
loc 0
stl i
1
lal p2
loi EM_PSIZE
loi 1 /* byte from src */
loc 1
loc EM_WSIZE
cii
lal p1
loi EM_PSIZE
loi 1 /* byte from exchanged copy */
loc 1
loc EM_WSIZE
cii
beq *2
lol i
loc EM_WSIZE
loc 4
cuu
cal $fail
asp 4
2
lal p2
loi EM_PSIZE
adp 1
lal p2
sti EM_PSIZE /* increment p2 */
lal p1
loi EM_PSIZE /* p1 */
inl i
/* When i reaches 20, p1 would reach end of exchanged copy. */
lol i
loc 20
beq *3
adp 1 /* p1 + 1 */
bra *4
3
adp -39 /* p1 - 39, beginning of exchanged copy */
4
lal p1
sti EM_PSIZE /* move p1 */
lol i
loc 40
blt *1
ret 0
end /* $check */