Use .los4 in lar 4 and .sts4 in sar 4.

Our libem had two implementations of loading a block from a stack, one
for lar 4 and one for los 4.  Now lar 4 and los 4 share the code in
.los4.  Likewise, sar 4 and sts 4 share the code in .sts4.

Rename .los to .los4 and .sts to .sts4, because they implement los 4
and sts 4.  Remove the special case for loading or storing 4 bytes,
because we can do it with 1 iteration of the loop.  Remove the lines
to "align size" where the size must already be a multiple of 4.

Fix the upper bound check in .aar4.

Change .aar4, .lar4, .los4, .sar4, .sts4 to pass all operands on the
real stack, except that .los4 and .sts4 take the size in register r3.
Have .aar4 set r3 to the size of the array element.  So lar 4 is just
.aar4 then .los4, and sar 4 is just .aar4 then .sts4.

ncg no longer calls .lar4 and .sar4 in libem, because it inlines the
code; but I keep .lar4 and .sar4 in libem, because mcg references
them.  They might or might not work in mcg.
This commit is contained in:
George Koehler 2017-02-13 15:22:00 -05:00
parent 54949f713f
commit ba9b021253
9 changed files with 118 additions and 200 deletions

View file

@ -1,14 +1,9 @@
.sect .text .sect .text
! Index into a bounds-checked array. ! Get address of element of bounds-checked array.
! !
! On entry: ! Stack: ( array-adr index descriptor-adr -- element-adr )
! r3 = ptr to descriptor ! Sets r3 = size of element for .los4, .sts4
! r4 = index
! r5 = address of array
! Yields:
! r3 = address of element
! r0 = size of element (used by .lar4, .sar4)
! Preserves r10 for .lar4, .sar4 ! Preserves r10 for .lar4, .sar4
.define .aar4 .define .aar4
@ -17,16 +12,21 @@
ori r0, r0, lo16[.trap_earray] ori r0, r0, lo16[.trap_earray]
mtspr ctr, r0 ! load CTR with trap address mtspr ctr, r0 ! load CTR with trap address
lwz r0, 0(r3) lwz r4, 0(sp) ! r4 = address of descriptor
subf. r4, r0, r4 ! adjust range lwz r5, 4(sp) ! r5 = index
lwz r6, 8(sp) ! r6 = address of array
lwz r0, 0(r4)
subf. r5, r0, r5 ! subtract lower bound from index
bltctr ! check lower bound bltctr ! check lower bound
lwz r0, 4(r3) lwz r0, 4(r4)
cmplw r4, r3 cmplw r5, r0
bgectr ! check upper bound bgtctr ! check upper bound
lwz r0, 8(r3)
mullw r4, r4, r0 ! scale index
add r3, r4, r5 ! calculate element address
lwz r3, 8(r4) ! r3 = size of element
mullw r5, r5, r3 ! scale index by size
add r6, r6, r5
stw r6, 8(sp) ! push address of element
addi sp, sp, 8
blr blr

View file

@ -6,7 +6,7 @@ for _, plat in ipairs(vars.plats) do
acklibrary { acklibrary {
name = "lib_"..plat, name = "lib_"..plat,
srcs = { srcs = {
"./*.s", "./*.s", -- los4.s, sts4.s
}, },
vars = { plat = plat }, vars = { plat = plat },
deps = { deps = {

View file

@ -2,39 +2,13 @@
! Load from bounds-checked array. ! Load from bounds-checked array.
! !
! On entry: ! Stack: ( array-adr index descriptor-adr -- element )
! r3 = ptr to descriptor
! r4 = index
! r5 = address of array
.define .lar4 .define .lar4
.lar4: .lar4:
mfspr r10, lr mfspr r10, lr
bl .aar4 bl .aar4
! pass r3 = size from .aar4 to .los4
bl .los4
mtspr lr, r10 mtspr lr, r10
! r3 = ptr to element
! r0 = size of element
cmpwi r0, 1
bne 1f
! Load 1 byte.
lbz r4, 0(r3)
stwu r4, -4(sp)
blr
1:
cmpwi r0, 2
bne 2f
! Load 2 bytes.
lhz r4, 0(r3)
stwu r4, -4(sp)
blr
2:
! Load r0 bytes, where r0 must be a positive multiple of 4.
subf sp, r0, sp ! move stack pointer down
or r5, r0, r0 ! index r5 = length r0
3:
addic. r5, r5, -4 ! r5 -= 4
lwzx r4, r5, r3
stwx r4, r5, sp
bgt 3b ! loop if r5 > 0
blr blr

View file

@ -1,47 +0,0 @@
.sect .text
! Loads a variable-sized structure onto the stack.
!
! r3 = size
! r4 = address
.define .los
.los:
! These sizes are handled specially.
cmplwi r3, 1
ble size1
cmplwi r3, 2
ble size2
cmplwi r3, 4
ble size4
! Variable-sized structure.
addi r3, r3, 3
clrrwi r3, r3, 2 ! align size
add r4, r4, r3 ! adjust address to top of block
srwi r3, r3, 2 ! convert size to the number of words
mtspr ctr, r3
1:
lwzu r5, -4(r4)
stwu r5, -4(sp)
bdnz 1b ! decrement CTR, jump if non-zero
blr
size1:
lbz r3, 0(r4)
b 1f
size2:
lhz r3, 0(r4)
b 1f
size4:
lwz r3, 0(r4)
1:
stwu r3, -4(sp)
blr

34
mach/powerpc/libem/los4.s Normal file
View file

@ -0,0 +1,34 @@
.sect .text
! Loads a variable-sized block onto the stack.
!
! On entry: r3 = size
! Stack: ( address -- block )
! Preserves r10 for .lar4
.define .los4
.los4:
lwz r4, 0(sp) ! r4 = address
! Sizes 1 and 2 are handled specially.
cmplwi r3, 1
ble 1f
cmplwi r3, 2
ble 2f
! Else the size must be a multiple of 4.
srwi r5, r3, 2
mtspr ctr, r5 ! ctr = number of words
addi sp, sp, 4
add r4, r4, r3 ! adjust address to end of block
4: lwzu r5, -4(r4)
stwu r5, -4(sp)
bdnz 4b ! decrement ctr, jump if non-zero
blr
1: lbz r5, 0(r4)
stw r5, 0(sp)
blr
2: lhz r5, 0(r4)
stw r5, 0(sp)
blr

View file

@ -2,41 +2,13 @@
! Store to bounds-checked array. ! Store to bounds-checked array.
! !
! On entry: ! Stack: ( element array-adr index descriptor-adr -- )
! r3 = ptr to descriptor
! r4 = index
! r5 = address of array
.define .sar4 .define .sar4
.sar4: .sar4:
mfspr r10, lr mfspr r10, lr
bl .aar4 bl .aar4
! pass r3 = size from .aar4 to .sts4
bl .sts4
mtspr lr, r10 mtspr lr, r10
! r3 = ptr to element
! r0 = size of element
cmpwi r0, 1
bne 1f
! Store 1 byte.
lwz r4, 0(sp)
addi sp, sp, 4
stb r4, 0(r3)
blr
1:
cmpwi r0, 2
bne 2f
! Store 2 bytes.
lwz r4, 0(sp)
addi sp, sp, 4
sth r4, 0(r3)
blr
2:
! Store r0 bytes, where r0 must be a positive multiple of 4.
or r5, r0, r0 ! index r5 = length r0
3:
addic. r5, r5, -4 ! r5 -= 4
lwzx r4, r5, sp
stwx r4, r5, r3
bgt 3b ! loop if r5 > 0
add sp, r0, sp ! move stack pointer up
blr blr

View file

@ -1,50 +0,0 @@
.sect .text
! Stores a variable-sized structure from the stack.
!
! r3 = size
! r4 = address
.define .sts
.sts:
! These sizes are handled specially.
lwz r5, 0(sp)
cmplwi r3, 1
ble size1
cmplwi r3, 2
ble size2
cmplwi r3, 4
ble size4
! Variable-sized structure.
addi r3, r3, 3
clrrwi r3, r3, 2 ! align size
srwi r3, r3, 2 ! convert size to the number of words
mtspr ctr, r3
1:
lwz r5, 0(sp)
addi sp, sp, 4
stw r5, 0(r4)
addi r4, r4, 4
bdnz 1b ! decrement CTR, jump if non-zero
blr
size1:
stb r5, 0(r4)
b 1f
size2:
sth r5, 0(r4)
b 1f
size4:
stw r5, 0(r4)
1:
addi sp, sp, 4
blr

35
mach/powerpc/libem/sts4.s Normal file
View file

@ -0,0 +1,35 @@
.sect .text
! Stores a variable-sized block from the stack.
!
! On entry: r3 = size
! Stack: ( block address -- )
! Preserves r10 for .sar4
.define .sts4
.sts4:
lwz r4, 0(sp) ! r4 = address
! Sizes 1 and 2 are handled specially.
cmplwi r3, 1
ble 1f
cmplwi r3, 2
ble 2f
! Else the size must be a multiple of 4.
srwi r5, r3, 2
mtspr ctr, r5 ! ctr = number of words
addi r4, r4, -4 ! adjust address to before block
4: lwzu r5, 4(sp)
stwu r5, 4(r4)
bdnz 4b ! decrement ctr, jump if non-zero
addi sp, sp, 4
blr
1: lwz r5, 4(sp)
stb r5, 0(r4)
b 3f
2: lwz r5, 4(sp)
sth r5, 0(r4)
3: addi sp, sp, 8
blr

View file

@ -1042,13 +1042,13 @@ PATTERNS
pat loi /* Load arbitrary size */ pat loi /* Load arbitrary size */
leaving leaving
loc $1 loc $1
los INT32 los 4
pat los $1==INT32 /* Load arbitrary size */ pat los $1==4 /* Load arbitrary size */
with GPR3 GPR4 STACK with GPR3 STACK
kills ALL kills ALL
gen gen
bl {LABEL, ".los"} bl {LABEL, ".los4"}
pat sti $1==INT8 /* Store byte indirect */ pat sti $1==INT8 /* Store byte indirect */
with REG REG with REG REG
@ -1141,13 +1141,13 @@ PATTERNS
pat sti /* Store arbitrary size */ pat sti /* Store arbitrary size */
leaving leaving
loc $1 loc $1
sts INT32 sts 4
pat sts $1==INT32 /* Store arbitrary size */ pat sts $1==4 /* Store arbitrary size */
with GPR3 GPR4 STACK with GPR3 STACK
kills ALL kills ALL
gen gen
bl {LABEL, ".sts"} bl {LABEL, ".sts4"}
/* Arithmetic wrappers */ /* Arithmetic wrappers */
@ -1459,40 +1459,40 @@ PATTERNS
yields %a yields %a
/* Arrays */ /* Arrays */
pat aar $1==INT32 /* Index array */ pat aar $1==4 /* Address of array element */
with GPR3 GPR4 GPR5 leaving
cal ".aar4"
pat lar $1==4 /* Load from array */
with STACK
kills ALL kills ALL
gen gen
bl {LABEL, ".aar4"} bl {LABEL, ".aar4"}
yields R3 /* pass r3 = size from .aar4 to .los4 */
bl {LABEL, ".los4"}
pat lae lar $2==INT32 && nicesize(rom($1, 3)) /* Load array */ pat lae lar $2==4 && nicesize(rom($1, 3))
leaving leaving
lae $1 lae $1
aar INT32 aar 4
loi rom($1, 3) loi rom($1, 3)
pat lar $1==INT32 /* Load array */ pat sar $1==4 /* Store to array */
with GPR3 GPR4 GPR5 STACK with STACK
kills ALL kills ALL
gen gen
bl {LABEL, ".lar4"} bl {LABEL, ".aar4"}
/* pass r3 = size from .aar4 to .sts4 */
bl {LABEL, ".sts4"}
pat lae sar $2==INT32 && nicesize(rom($1, 3)) /* Store array */ pat lae sar $2==4 && nicesize(rom($1, 3))
leaving leaving
lae $1 lae $1
aar INT32 aar 4
sti rom($1, 3) sti rom($1, 3)
pat sar $1==INT32 /* Store array */
with GPR3 GPR4 GPR5 STACK
kills ALL
gen
bl {LABEL, ".sar4"}
/* Sets */ /* Sets */