Use lwzu, stwu to tighten more loops.

Because lwzu or stwu moves the pointer, I can remove an addi
instruction from the loop, so the loop is slightly faster.

I wrote a benchmark in Modula-2 that exercises some of these loops.  I
measured its time on my old PowerPC Mac.  Its user time decreases from
8.401s to 8.217s with the tighter loops.
This commit is contained in:
George Koehler 2017-10-18 12:12:42 -04:00
parent ac2b0710c8
commit 459a9b5949
7 changed files with 70 additions and 79 deletions

View file

@ -1,22 +1,20 @@
.sect .text .sect .text
! Set intersection. ! Set intersection.
! Stack: ( b a size -- a*b ) ! Stack: ( a b size -- a*b )
.define .and .define .and
.and: .and:
lwz r3, 0(sp) ! r3 = size lwz r3, 0(sp) ! r3 = size
srwi r7, r3, 2 srwi r7, r3, 2
mtspr ctr, r7 ! ctr = size / 4 mtspr ctr, r7 ! ctr = size / 4
addi r4, sp, 4 ! r4 = ptr to set a add r4, sp, r3 ! r4 = pointer before set a
add r5, r4, r3 ! r5 = ptr to set b
li r6, 0 ! r6 = index ! Loop with r4 in set a and sp in set b.
1: 1: lwzu r5, 4(r4)
lwzx r7, r4, r6 lwzu r6, 4(sp)
lwzx r8, r5, r6 and r7, r5, r6 ! intersection of words
and r8, r7, r8 ! intersection of words stw r7, 0(r4)
stwx r8, r5, r6
addi r6, r6, 4
bdnz 1b ! loop ctr times bdnz 1b ! loop ctr times
mr sp, r5 addi sp, sp, 4 ! drop last word of set b
blr blr

View file

@ -1,7 +1,7 @@
.sect .text .sect .text
! Compare sets a, b. ! Compare sets a, b.
! Stack: ( b a size -- result ) ! Stack: ( a b size -- result )
! Result is 0 if equal, nonzero if not equal. ! Result is 0 if equal, nonzero if not equal.
.define .cms .define .cms
@ -9,22 +9,19 @@
lwz r3, 0(sp) ! r3 = size of each set lwz r3, 0(sp) ! r3 = size of each set
srwi r7, r3, 2 srwi r7, r3, 2
mtspr ctr, r7 ! ctr = size / 4 mtspr ctr, r7 ! ctr = size / 4
addi r4, sp, 4 ! r4 = ptr to set a add r4, sp, r3 ! r4 = pointer before set a
add r5, r4, r3 ! r5 = ptr to set b add r7, r4, r3 ! r7 = pointer to store result
li r6, 0 ! r6 = index
1: ! Loop with r4 in a set a and sp in set b.
lwzx r7, r4, r6 1: lwzu r5, 4(r4)
lwzx r8, r5, r6 lwzu r6, 4(sp)
cmpw cr0, r7, r8 ! compare words in sets cmpw r5, r6 ! compare words
addi r6, r6, 4 bne 2f ! branch if not equal
bne cr0, 2f ! branch if not equal
bdnz 1b ! loop ctr times bdnz 1b ! loop ctr times
li r9, 0 ! equal: return 0
li r3, 0 ! equal: return 0
b 3f b 3f
2: 2: li r3, 1 ! not equal: return 1
li r9, 1 ! not equal: return 1 3: mr sp, r7
3: stw r3, 0(sp) ! push result
slwi r7, r3, 1
add sp, sp, r7 ! adjust stack pointer
stw r9, 0(sp) ! push result
blr blr

View file

@ -5,16 +5,15 @@
.define .com .define .com
.com: .com:
lwz r3, 0 (sp) ! size lwz r3, 0(sp) ! r3 = size
addi sp, sp, 4 srwi r7, r3, 2
mtspr ctr, r7 ! ctr = size / 4
mr r4, sp ! r4 = pointer before set a
mr r4, sp ! r4 = pointer to set a ! Loop with r4 in set a.
srwi r5, r3, 2 1: lwzu r5, 4(r4)
mtspr ctr, r5 ! ctr = r3 / 4 nor r7, r5, r5 ! complement of word
1: stw r7, 0(r4)
lwz r6, 0(r4)
nor r6, r6, r6 ! complement of word
stw r6, 0(r4)
addi r4, r4, 4
bdnz 1b ! loop ctr times bdnz 1b ! loop ctr times
addi sp, sp, 4 ! drop size from stack
blr blr

View file

@ -1,22 +1,20 @@
.sect .text .sect .text
! Set union. ! Set union.
! Stack: ( b a size -- a+b ) ! Stack: ( a b size -- a+b )
.define .ior .define .ior
.ior: .ior:
lwz r3, 0(sp) ! r3 = size lwz r3, 0(sp) ! r3 = size
srwi r7, r3, 2 srwi r7, r3, 2
mtspr ctr, r7 ! ctr = size / 4 mtspr ctr, r7 ! ctr = size / 4
addi r4, sp, 4 ! r4 = ptr to set a add r4, sp, r3 ! r4 = pointer before set a
add r5, r4, r3 ! r5 = ptr to set b
li r6, 0 ! r6 = index ! Loop with r4 in set a and sp in set b.
1: 1: lwzu r5, 4(r4)
lwzx r7, r4, r6 lwzu r6, 4(sp)
lwzx r8, r5, r6 or r7, r5, r6 ! union of words
or r8, r7, r8 ! union of words stw r7, 0(r4)
stwx r8, r5, r6
addi r6, r6, 4
bdnz 1b ! loop ctr times bdnz 1b ! loop ctr times
mr sp, r5 addi sp, sp, 4 ! drop last word of set b
blr blr

View file

@ -1,22 +1,20 @@
.sect .text .sect .text
! Set symmetric difference. ! Set symmetric difference.
! Stack: ( b a size -- a/b ) ! Stack: ( a b size -- a/b )
.define .xor .define .xor
.xor: .xor:
lwz r3, 0(sp) ! r3 = size lwz r3, 0(sp) ! r3 = size
srwi r7, r3, 2 srwi r7, r3, 2
mtspr ctr, r7 ! ctr = size / 4 mtspr ctr, r7 ! ctr = size / 4
addi r4, sp, 4 ! r4 = ptr to set a add r4, sp, r3 ! r4 = pointer before set a
add r5, r4, r3 ! r5 = ptr to set b
li r6, 0 ! r6 = index ! Loop with r4 in set a and sp in set b.
1: 1: lwzu r5, 4(r4)
lwzx r7, r4, r6 lwzu r6, 4(sp)
lwzx r8, r5, r6 xor r7, r5, r6 ! symmetric difference of words
xor r8, r7, r8 ! symmetric difference of words stw r7, 0(r4)
stwx r8, r5, r6
addi r6, r6, 4
bdnz 1b ! loop ctr times bdnz 1b ! loop ctr times
mr sp, r5 addi sp, sp, 4 ! drop last word of set b
blr blr

View file

@ -6,14 +6,11 @@
.define .zer .define .zer
.zer: .zer:
lwz r3, 0(sp) ! r3 = size lwz r3, 0(sp) ! r3 = size
srwi r5, r3, 2 srwi r7, r3, 2
mtspr ctr, r5 ! ctr = word size - 4 mtspr ctr, r7 ! ctr = size / 4
li r4, 0 ! r4 = 0 addi sp, sp, 4 ! drop size from stack
addi sp, sp, 4 li r4, 0
subf sp, r3, sp ! sp = ptr to new set
li r6, 0 ! r6 = index 1: stwu r4, -4(sp) ! push zero
1:
stwx r4, sp, r6 ! store zero in set
addi r6, r6, 4
bdnz 1b ! loop ctr times bdnz 1b ! loop ctr times
blr blr

View file

@ -1897,6 +1897,11 @@ PATTERNS
gen move %2, r4 gen move %2, r4
leaving ret 0 leaving ret 0
/*
* These rules for blm/bls are wrong if length is zero.
* So are several procedures in libem.
*/
pat blm /* Block move constant length */ pat blm /* Block move constant length */
leaving leaving
loc $1 loc $1
@ -1904,15 +1909,15 @@ PATTERNS
pat bls /* Block move variable length */ pat bls /* Block move variable length */
with REG REG REG with REG REG REG
uses reusing %1, REG, REG={CONST_0000_7FFF, 0} /* ( src%3 dst%2 len%1 -- ) */
uses reusing %1, REG, REG, REG
gen gen
/* Wrong if size is zero */ srwi %a, %1, {CONST, 2}
srwi %1, %1, {CONST, 2} mtspr ctr, %a
mtspr ctr, %1 addi %b, %3, {CONST, 0-4}
1: addi %c, %2, {CONST, 0-4}
lwzx %a, %3, %b 1: lwzu %a, {IND_RC_W, %b, 4}
stwx %a, %2, %b stwu %a, {IND_RC_W, %c, 4}
addi %b, %b, {CONST, 4}
bdnz {LABEL, "1b"} bdnz {LABEL, "1b"}
pat csa /* Array-lookup switch */ pat csa /* Array-lookup switch */
@ -1987,8 +1992,7 @@ PATTERNS
REG={CONST_0000_7FFF, $1-1} REG={CONST_0000_7FFF, $1-1}
gen gen
mtspr ctr, %b mtspr ctr, %b
1: 1: lwz %a, {IND_RC_W, %a, SL_OFFSET}
lwz %a, {IND_RC_W, %a, SL_OFFSET}
bdnz {LABEL, "1b"} bdnz {LABEL, "1b"}
yields %a yields %a