Use lwzu, stwu to tighten more loops.
Because lwzu or stwu moves the pointer, I can remove an addi instruction from the loop, so the loop is slightly faster. I wrote a benchmark in Modula-2 that exercises some of these loops. I measured its time on my old PowerPC Mac. Its user time decreases from 8.401s to 8.217s with the tighter loops.
This commit is contained in:
parent
ac2b0710c8
commit
459a9b5949
|
@ -1,22 +1,20 @@
|
|||
.sect .text
|
||||
|
||||
! Set intersection.
|
||||
! Stack: ( b a size -- a*b )
|
||||
! Stack: ( a b size -- a*b )
|
||||
|
||||
.define .and
|
||||
.and:
|
||||
lwz r3, 0(sp) ! r3 = size
|
||||
srwi r7, r3, 2
|
||||
mtspr ctr, r7 ! ctr = size / 4
|
||||
addi r4, sp, 4 ! r4 = ptr to set a
|
||||
add r5, r4, r3 ! r5 = ptr to set b
|
||||
li r6, 0 ! r6 = index
|
||||
1:
|
||||
lwzx r7, r4, r6
|
||||
lwzx r8, r5, r6
|
||||
and r8, r7, r8 ! intersection of words
|
||||
stwx r8, r5, r6
|
||||
addi r6, r6, 4
|
||||
add r4, sp, r3 ! r4 = pointer before set a
|
||||
|
||||
! Loop with r4 in set a and sp in set b.
|
||||
1: lwzu r5, 4(r4)
|
||||
lwzu r6, 4(sp)
|
||||
and r7, r5, r6 ! intersection of words
|
||||
stw r7, 0(r4)
|
||||
bdnz 1b ! loop ctr times
|
||||
mr sp, r5
|
||||
addi sp, sp, 4 ! drop last word of set b
|
||||
blr
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
.sect .text
|
||||
|
||||
! Compare sets a, b.
|
||||
! Stack: ( b a size -- result )
|
||||
! Stack: ( a b size -- result )
|
||||
! Result is 0 if equal, nonzero if not equal.
|
||||
|
||||
.define .cms
|
||||
|
@ -9,22 +9,19 @@
|
|||
lwz r3, 0(sp) ! r3 = size of each set
|
||||
srwi r7, r3, 2
|
||||
mtspr ctr, r7 ! ctr = size / 4
|
||||
addi r4, sp, 4 ! r4 = ptr to set a
|
||||
add r5, r4, r3 ! r5 = ptr to set b
|
||||
li r6, 0 ! r6 = index
|
||||
1:
|
||||
lwzx r7, r4, r6
|
||||
lwzx r8, r5, r6
|
||||
cmpw cr0, r7, r8 ! compare words in sets
|
||||
addi r6, r6, 4
|
||||
bne cr0, 2f ! branch if not equal
|
||||
add r4, sp, r3 ! r4 = pointer before set a
|
||||
add r7, r4, r3 ! r7 = pointer to store result
|
||||
|
||||
! Loop with r4 in a set a and sp in set b.
|
||||
1: lwzu r5, 4(r4)
|
||||
lwzu r6, 4(sp)
|
||||
cmpw r5, r6 ! compare words
|
||||
bne 2f ! branch if not equal
|
||||
bdnz 1b ! loop ctr times
|
||||
li r9, 0 ! equal: return 0
|
||||
|
||||
li r3, 0 ! equal: return 0
|
||||
b 3f
|
||||
2:
|
||||
li r9, 1 ! not equal: return 1
|
||||
3:
|
||||
slwi r7, r3, 1
|
||||
add sp, sp, r7 ! adjust stack pointer
|
||||
stw r9, 0(sp) ! push result
|
||||
2: li r3, 1 ! not equal: return 1
|
||||
3: mr sp, r7
|
||||
stw r3, 0(sp) ! push result
|
||||
blr
|
||||
|
|
|
@ -5,16 +5,15 @@
|
|||
|
||||
.define .com
|
||||
.com:
|
||||
lwz r3, 0 (sp) ! size
|
||||
addi sp, sp, 4
|
||||
lwz r3, 0(sp) ! r3 = size
|
||||
srwi r7, r3, 2
|
||||
mtspr ctr, r7 ! ctr = size / 4
|
||||
mr r4, sp ! r4 = pointer before set a
|
||||
|
||||
mr r4, sp ! r4 = pointer to set a
|
||||
srwi r5, r3, 2
|
||||
mtspr ctr, r5 ! ctr = r3 / 4
|
||||
1:
|
||||
lwz r6, 0(r4)
|
||||
nor r6, r6, r6 ! complement of word
|
||||
stw r6, 0(r4)
|
||||
addi r4, r4, 4
|
||||
! Loop with r4 in set a.
|
||||
1: lwzu r5, 4(r4)
|
||||
nor r7, r5, r5 ! complement of word
|
||||
stw r7, 0(r4)
|
||||
bdnz 1b ! loop ctr times
|
||||
addi sp, sp, 4 ! drop size from stack
|
||||
blr
|
||||
|
|
|
@ -1,22 +1,20 @@
|
|||
.sect .text
|
||||
|
||||
! Set union.
|
||||
! Stack: ( b a size -- a+b )
|
||||
! Stack: ( a b size -- a+b )
|
||||
|
||||
.define .ior
|
||||
.ior:
|
||||
lwz r3, 0(sp) ! r3 = size
|
||||
srwi r7, r3, 2
|
||||
mtspr ctr, r7 ! ctr = size / 4
|
||||
addi r4, sp, 4 ! r4 = ptr to set a
|
||||
add r5, r4, r3 ! r5 = ptr to set b
|
||||
li r6, 0 ! r6 = index
|
||||
1:
|
||||
lwzx r7, r4, r6
|
||||
lwzx r8, r5, r6
|
||||
or r8, r7, r8 ! union of words
|
||||
stwx r8, r5, r6
|
||||
addi r6, r6, 4
|
||||
add r4, sp, r3 ! r4 = pointer before set a
|
||||
|
||||
! Loop with r4 in set a and sp in set b.
|
||||
1: lwzu r5, 4(r4)
|
||||
lwzu r6, 4(sp)
|
||||
or r7, r5, r6 ! union of words
|
||||
stw r7, 0(r4)
|
||||
bdnz 1b ! loop ctr times
|
||||
mr sp, r5
|
||||
addi sp, sp, 4 ! drop last word of set b
|
||||
blr
|
||||
|
|
|
@ -1,22 +1,20 @@
|
|||
.sect .text
|
||||
|
||||
! Set symmetric difference.
|
||||
! Stack: ( b a size -- a/b )
|
||||
! Stack: ( a b size -- a/b )
|
||||
|
||||
.define .xor
|
||||
.xor:
|
||||
lwz r3, 0(sp) ! r3 = size
|
||||
srwi r7, r3, 2
|
||||
mtspr ctr, r7 ! ctr = size / 4
|
||||
addi r4, sp, 4 ! r4 = ptr to set a
|
||||
add r5, r4, r3 ! r5 = ptr to set b
|
||||
li r6, 0 ! r6 = index
|
||||
1:
|
||||
lwzx r7, r4, r6
|
||||
lwzx r8, r5, r6
|
||||
xor r8, r7, r8 ! symmetric difference of words
|
||||
stwx r8, r5, r6
|
||||
addi r6, r6, 4
|
||||
add r4, sp, r3 ! r4 = pointer before set a
|
||||
|
||||
! Loop with r4 in set a and sp in set b.
|
||||
1: lwzu r5, 4(r4)
|
||||
lwzu r6, 4(sp)
|
||||
xor r7, r5, r6 ! symmetric difference of words
|
||||
stw r7, 0(r4)
|
||||
bdnz 1b ! loop ctr times
|
||||
mr sp, r5
|
||||
addi sp, sp, 4 ! drop last word of set b
|
||||
blr
|
||||
|
|
|
@ -6,14 +6,11 @@
|
|||
.define .zer
|
||||
.zer:
|
||||
lwz r3, 0(sp) ! r3 = size
|
||||
srwi r5, r3, 2
|
||||
mtspr ctr, r5 ! ctr = word size - 4
|
||||
li r4, 0 ! r4 = 0
|
||||
addi sp, sp, 4
|
||||
subf sp, r3, sp ! sp = ptr to new set
|
||||
li r6, 0 ! r6 = index
|
||||
1:
|
||||
stwx r4, sp, r6 ! store zero in set
|
||||
addi r6, r6, 4
|
||||
srwi r7, r3, 2
|
||||
mtspr ctr, r7 ! ctr = size / 4
|
||||
addi sp, sp, 4 ! drop size from stack
|
||||
li r4, 0
|
||||
|
||||
1: stwu r4, -4(sp) ! push zero
|
||||
bdnz 1b ! loop ctr times
|
||||
blr
|
||||
|
|
|
@ -1897,6 +1897,11 @@ PATTERNS
|
|||
gen move %2, r4
|
||||
leaving ret 0
|
||||
|
||||
/*
|
||||
* These rules for blm/bls are wrong if length is zero.
|
||||
* So are several procedures in libem.
|
||||
*/
|
||||
|
||||
pat blm /* Block move constant length */
|
||||
leaving
|
||||
loc $1
|
||||
|
@ -1904,15 +1909,15 @@ PATTERNS
|
|||
|
||||
pat bls /* Block move variable length */
|
||||
with REG REG REG
|
||||
uses reusing %1, REG, REG={CONST_0000_7FFF, 0}
|
||||
/* ( src%3 dst%2 len%1 -- ) */
|
||||
uses reusing %1, REG, REG, REG
|
||||
gen
|
||||
/* Wrong if size is zero */
|
||||
srwi %1, %1, {CONST, 2}
|
||||
mtspr ctr, %1
|
||||
1:
|
||||
lwzx %a, %3, %b
|
||||
stwx %a, %2, %b
|
||||
addi %b, %b, {CONST, 4}
|
||||
srwi %a, %1, {CONST, 2}
|
||||
mtspr ctr, %a
|
||||
addi %b, %3, {CONST, 0-4}
|
||||
addi %c, %2, {CONST, 0-4}
|
||||
1: lwzu %a, {IND_RC_W, %b, 4}
|
||||
stwu %a, {IND_RC_W, %c, 4}
|
||||
bdnz {LABEL, "1b"}
|
||||
|
||||
pat csa /* Array-lookup switch */
|
||||
|
@ -1987,8 +1992,7 @@ PATTERNS
|
|||
REG={CONST_0000_7FFF, $1-1}
|
||||
gen
|
||||
mtspr ctr, %b
|
||||
1:
|
||||
lwz %a, {IND_RC_W, %a, SL_OFFSET}
|
||||
1: lwz %a, {IND_RC_W, %a, SL_OFFSET}
|
||||
bdnz {LABEL, "1b"}
|
||||
yields %a
|
||||
|
||||
|
|
Loading…
Reference in a new issue