Use .los4 in lar 4 and .sts4 in sar 4.

Our libem had two implementations of loading a block from a stack, one
for lar 4 and one for los 4.  Now lar 4 and los 4 share the code in
.los4.  Likewise, sar 4 and sts 4 share the code in .sts4.

Rename .los to .los4 and .sts to .sts4, because they implement los 4
and sts 4.  Remove the special case for loading or storing 4 bytes,
because we can do it with 1 iteration of the loop.  Remove the lines
to "align size" where the size must already be a multiple of 4.

Fix the upper bound check in .aar4.

Change .aar4, .lar4, .los4, .sar4, .sts4 to pass all operands on the
real stack, except that .los4 and .sts4 take the size in register r3.
Have .aar4 set r3 to the size of the array element.  So lar 4 is just
.aar4 then .los4, and sar 4 is just .aar4 then .sts4.

ncg no longer calls .lar4 and .sar4 in libem, because it inlines the
code; but I keep .lar4 and .sar4 in libem, because mcg references
them.  They might or might not work in mcg.
This commit is contained in:
George Koehler 2017-02-13 15:22:00 -05:00
parent 54949f713f
commit ba9b021253
9 changed files with 118 additions and 200 deletions

View file

@ -1,14 +1,9 @@
.sect .text
! Index into a bounds-checked array.
! Get address of element of bounds-checked array.
!
! On entry:
! r3 = ptr to descriptor
! r4 = index
! r5 = address of array
! Yields:
! r3 = address of element
! r0 = size of element (used by .lar4, .sar4)
! Stack: ( array-adr index descriptor-adr -- element-adr )
! Sets r3 = size of element for .los4, .sts4
! Preserves r10 for .lar4, .sar4
.define .aar4
@ -17,16 +12,21 @@
ori r0, r0, lo16[.trap_earray]
mtspr ctr, r0 ! load CTR with trap address
lwz r0, 0(r3)
subf. r4, r0, r4 ! adjust range
lwz r4, 0(sp) ! r4 = address of descriptor
lwz r5, 4(sp) ! r5 = index
lwz r6, 8(sp) ! r6 = address of array
lwz r0, 0(r4)
subf. r5, r0, r5 ! subtract lower bound from index
bltctr ! check lower bound
lwz r0, 4(r3)
cmplw r4, r3
bgectr ! check upper bound
lwz r0, 8(r3)
mullw r4, r4, r0 ! scale index
add r3, r4, r5 ! calculate element address
lwz r0, 4(r4)
cmplw r5, r0
bgtctr ! check upper bound
lwz r3, 8(r4) ! r3 = size of element
mullw r5, r5, r3 ! scale index by size
add r6, r6, r5
stw r6, 8(sp) ! push address of element
addi sp, sp, 8
blr

View file

@ -6,7 +6,7 @@ for _, plat in ipairs(vars.plats) do
acklibrary {
name = "lib_"..plat,
srcs = {
"./*.s",
"./*.s", -- los4.s, sts4.s
},
vars = { plat = plat },
deps = {

View file

@ -2,39 +2,13 @@
! Load from bounds-checked array.
!
! On entry:
! r3 = ptr to descriptor
! r4 = index
! r5 = address of array
! Stack: ( array-adr index descriptor-adr -- element )
.define .lar4
.lar4:
mfspr r10, lr
bl .aar4
! pass r3 = size from .aar4 to .los4
bl .los4
mtspr lr, r10
! r3 = ptr to element
! r0 = size of element
cmpwi r0, 1
bne 1f
! Load 1 byte.
lbz r4, 0(r3)
stwu r4, -4(sp)
blr
1:
cmpwi r0, 2
bne 2f
! Load 2 bytes.
lhz r4, 0(r3)
stwu r4, -4(sp)
blr
2:
! Load r0 bytes, where r0 must be a positive multiple of 4.
subf sp, r0, sp ! move stack pointer down
or r5, r0, r0 ! index r5 = length r0
3:
addic. r5, r5, -4 ! r5 -= 4
lwzx r4, r5, r3
stwx r4, r5, sp
bgt 3b ! loop if r5 > 0
blr

View file

@ -1,47 +0,0 @@
.sect .text
! Loads a variable-sized structure onto the stack.
!
! r3 = size
! r4 = address
.define .los
.los:
! These sizes are handled specially.
cmplwi r3, 1
ble size1
cmplwi r3, 2
ble size2
cmplwi r3, 4
ble size4
! Variable-sized structure.
addi r3, r3, 3
clrrwi r3, r3, 2 ! align size
add r4, r4, r3 ! adjust address to top of block
srwi r3, r3, 2 ! convert size to the number of words
mtspr ctr, r3
1:
lwzu r5, -4(r4)
stwu r5, -4(sp)
bdnz 1b ! decrement CTR, jump if non-zero
blr
size1:
lbz r3, 0(r4)
b 1f
size2:
lhz r3, 0(r4)
b 1f
size4:
lwz r3, 0(r4)
1:
stwu r3, -4(sp)
blr

34
mach/powerpc/libem/los4.s Normal file
View file

@ -0,0 +1,34 @@
.sect .text
! Loads a variable-sized block onto the stack.
!
! On entry: r3 = size
! Stack: ( address -- block )
! Preserves r10 for .lar4
.define .los4
.los4:
lwz r4, 0(sp) ! r4 = address
! Sizes 1 and 2 are handled specially.
cmplwi r3, 1
ble 1f
cmplwi r3, 2
ble 2f
! Else the size must be a multiple of 4.
srwi r5, r3, 2
mtspr ctr, r5 ! ctr = number of words
addi sp, sp, 4
add r4, r4, r3 ! adjust address to end of block
4: lwzu r5, -4(r4)
stwu r5, -4(sp)
bdnz 4b ! decrement ctr, jump if non-zero
blr
1: lbz r5, 0(r4)
stw r5, 0(sp)
blr
2: lhz r5, 0(r4)
stw r5, 0(sp)
blr

View file

@ -2,41 +2,13 @@
! Store to bounds-checked array.
!
! On entry:
! r3 = ptr to descriptor
! r4 = index
! r5 = address of array
! Stack: ( element array-adr index descriptor-adr -- )
.define .sar4
.sar4:
mfspr r10, lr
bl .aar4
! pass r3 = size from .aar4 to .sts4
bl .sts4
mtspr lr, r10
! r3 = ptr to element
! r0 = size of element
cmpwi r0, 1
bne 1f
! Store 1 byte.
lwz r4, 0(sp)
addi sp, sp, 4
stb r4, 0(r3)
blr
1:
cmpwi r0, 2
bne 2f
! Store 2 bytes.
lwz r4, 0(sp)
addi sp, sp, 4
sth r4, 0(r3)
blr
2:
! Store r0 bytes, where r0 must be a positive multiple of 4.
or r5, r0, r0 ! index r5 = length r0
3:
addic. r5, r5, -4 ! r5 -= 4
lwzx r4, r5, sp
stwx r4, r5, r3
bgt 3b ! loop if r5 > 0
add sp, r0, sp ! move stack pointer up
blr

View file

@ -1,50 +0,0 @@
.sect .text
! Stores a variable-sized structure from the stack.
!
! r3 = size
! r4 = address
.define .sts
.sts:
! These sizes are handled specially.
lwz r5, 0(sp)
cmplwi r3, 1
ble size1
cmplwi r3, 2
ble size2
cmplwi r3, 4
ble size4
! Variable-sized structure.
addi r3, r3, 3
clrrwi r3, r3, 2 ! align size
srwi r3, r3, 2 ! convert size to the number of words
mtspr ctr, r3
1:
lwz r5, 0(sp)
addi sp, sp, 4
stw r5, 0(r4)
addi r4, r4, 4
bdnz 1b ! decrement CTR, jump if non-zero
blr
size1:
stb r5, 0(r4)
b 1f
size2:
sth r5, 0(r4)
b 1f
size4:
stw r5, 0(r4)
1:
addi sp, sp, 4
blr

35
mach/powerpc/libem/sts4.s Normal file
View file

@ -0,0 +1,35 @@
.sect .text
! Stores a variable-sized block from the stack.
!
! On entry: r3 = size
! Stack: ( block address -- )
! Preserves r10 for .sar4
.define .sts4
.sts4:
lwz r4, 0(sp) ! r4 = address
! Sizes 1 and 2 are handled specially.
cmplwi r3, 1
ble 1f
cmplwi r3, 2
ble 2f
! Else the size must be a multiple of 4.
srwi r5, r3, 2
mtspr ctr, r5 ! ctr = number of words
addi r4, r4, -4 ! adjust address to before block
4: lwzu r5, 4(sp)
stwu r5, 4(r4)
bdnz 4b ! decrement ctr, jump if non-zero
addi sp, sp, 4
blr
1: lwz r5, 4(sp)
stb r5, 0(r4)
b 3f
2: lwz r5, 4(sp)
sth r5, 0(r4)
3: addi sp, sp, 8
blr

View file

@ -1042,13 +1042,13 @@ PATTERNS
pat loi /* Load arbitrary size */
leaving
loc $1
los INT32
los 4
pat los $1==INT32 /* Load arbitrary size */
with GPR3 GPR4 STACK
pat los $1==4 /* Load arbitrary size */
with GPR3 STACK
kills ALL
gen
bl {LABEL, ".los"}
bl {LABEL, ".los4"}
pat sti $1==INT8 /* Store byte indirect */
with REG REG
@ -1141,13 +1141,13 @@ PATTERNS
pat sti /* Store arbitrary size */
leaving
loc $1
sts INT32
sts 4
pat sts $1==INT32 /* Store arbitrary size */
with GPR3 GPR4 STACK
pat sts $1==4 /* Store arbitrary size */
with GPR3 STACK
kills ALL
gen
bl {LABEL, ".sts"}
bl {LABEL, ".sts4"}
/* Arithmetic wrappers */
@ -1459,40 +1459,40 @@ PATTERNS
yields %a
/* Arrays */
pat aar $1==INT32 /* Index array */
with GPR3 GPR4 GPR5
pat aar $1==4 /* Address of array element */
leaving
cal ".aar4"
pat lar $1==4 /* Load from array */
with STACK
kills ALL
gen
bl {LABEL, ".aar4"}
yields R3
/* pass r3 = size from .aar4 to .los4 */
bl {LABEL, ".los4"}
pat lae lar $2==INT32 && nicesize(rom($1, 3)) /* Load array */
pat lae lar $2==4 && nicesize(rom($1, 3))
leaving
lae $1
aar INT32
aar 4
loi rom($1, 3)
pat lar $1==INT32 /* Load array */
with GPR3 GPR4 GPR5 STACK
pat sar $1==4 /* Store to array */
with STACK
kills ALL
gen
bl {LABEL, ".lar4"}
bl {LABEL, ".aar4"}
/* pass r3 = size from .aar4 to .sts4 */
bl {LABEL, ".sts4"}
pat lae sar $2==INT32 && nicesize(rom($1, 3)) /* Store array */
pat lae sar $2==4 && nicesize(rom($1, 3))
leaving
lae $1
aar INT32
aar 4
sti rom($1, 3)
pat sar $1==INT32 /* Store array */
with GPR3 GPR4 GPR5 STACK
kills ALL
gen
bl {LABEL, ".sar4"}
/* Sets */