Use .los4 in lar 4 and .sts4 in sar 4.
Our libem had two implementations of loading a block from a stack, one for lar 4 and one for los 4. Now lar 4 and los 4 share the code in .los4. Likewise, sar 4 and sts 4 share the code in .sts4. Rename .los to .los4 and .sts to .sts4, because they implement los 4 and sts 4. Remove the special case for loading or storing 4 bytes, because we can do it with 1 iteration of the loop. Remove the lines to "align size" where the size must already be a multiple of 4. Fix the upper bound check in .aar4. Change .aar4, .lar4, .los4, .sar4, .sts4 to pass all operands on the real stack, except that .los4 and .sts4 take the size in register r3. Have .aar4 set r3 to the size of the array element. So lar 4 is just .aar4 then .los4, and sar 4 is just .aar4 then .sts4. ncg no longer calls .lar4 and .sar4 in libem, because it inlines the code; but I keep .lar4 and .sar4 in libem, because mcg references them. They might or might not work in mcg.
This commit is contained in:
parent
54949f713f
commit
ba9b021253
|
@ -1,14 +1,9 @@
|
|||
.sect .text
|
||||
|
||||
! Index into a bounds-checked array.
|
||||
! Get address of element of bounds-checked array.
|
||||
!
|
||||
! On entry:
|
||||
! r3 = ptr to descriptor
|
||||
! r4 = index
|
||||
! r5 = address of array
|
||||
! Yields:
|
||||
! r3 = address of element
|
||||
! r0 = size of element (used by .lar4, .sar4)
|
||||
! Stack: ( array-adr index descriptor-adr -- element-adr )
|
||||
! Sets r3 = size of element for .los4, .sts4
|
||||
! Preserves r10 for .lar4, .sar4
|
||||
|
||||
.define .aar4
|
||||
|
@ -17,16 +12,21 @@
|
|||
ori r0, r0, lo16[.trap_earray]
|
||||
mtspr ctr, r0 ! load CTR with trap address
|
||||
|
||||
lwz r0, 0(r3)
|
||||
subf. r4, r0, r4 ! adjust range
|
||||
lwz r4, 0(sp) ! r4 = address of descriptor
|
||||
lwz r5, 4(sp) ! r5 = index
|
||||
lwz r6, 8(sp) ! r6 = address of array
|
||||
|
||||
lwz r0, 0(r4)
|
||||
subf. r5, r0, r5 ! subtract lower bound from index
|
||||
bltctr ! check lower bound
|
||||
|
||||
lwz r0, 4(r3)
|
||||
cmplw r4, r3
|
||||
bgectr ! check upper bound
|
||||
|
||||
lwz r0, 8(r3)
|
||||
mullw r4, r4, r0 ! scale index
|
||||
add r3, r4, r5 ! calculate element address
|
||||
lwz r0, 4(r4)
|
||||
cmplw r5, r0
|
||||
bgtctr ! check upper bound
|
||||
|
||||
lwz r3, 8(r4) ! r3 = size of element
|
||||
mullw r5, r5, r3 ! scale index by size
|
||||
add r6, r6, r5
|
||||
stw r6, 8(sp) ! push address of element
|
||||
addi sp, sp, 8
|
||||
blr
|
||||
|
|
|
@ -6,7 +6,7 @@ for _, plat in ipairs(vars.plats) do
|
|||
acklibrary {
|
||||
name = "lib_"..plat,
|
||||
srcs = {
|
||||
"./*.s",
|
||||
"./*.s", -- los4.s, sts4.s
|
||||
},
|
||||
vars = { plat = plat },
|
||||
deps = {
|
||||
|
|
|
@ -2,39 +2,13 @@
|
|||
|
||||
! Load from bounds-checked array.
|
||||
!
|
||||
! On entry:
|
||||
! r3 = ptr to descriptor
|
||||
! r4 = index
|
||||
! r5 = address of array
|
||||
! Stack: ( array-adr index descriptor-adr -- element )
|
||||
|
||||
.define .lar4
|
||||
.lar4:
|
||||
mfspr r10, lr
|
||||
bl .aar4
|
||||
! pass r3 = size from .aar4 to .los4
|
||||
bl .los4
|
||||
mtspr lr, r10
|
||||
! r3 = ptr to element
|
||||
! r0 = size of element
|
||||
|
||||
cmpwi r0, 1
|
||||
bne 1f
|
||||
! Load 1 byte.
|
||||
lbz r4, 0(r3)
|
||||
stwu r4, -4(sp)
|
||||
blr
|
||||
1:
|
||||
cmpwi r0, 2
|
||||
bne 2f
|
||||
! Load 2 bytes.
|
||||
lhz r4, 0(r3)
|
||||
stwu r4, -4(sp)
|
||||
blr
|
||||
2:
|
||||
! Load r0 bytes, where r0 must be a positive multiple of 4.
|
||||
subf sp, r0, sp ! move stack pointer down
|
||||
or r5, r0, r0 ! index r5 = length r0
|
||||
3:
|
||||
addic. r5, r5, -4 ! r5 -= 4
|
||||
lwzx r4, r5, r3
|
||||
stwx r4, r5, sp
|
||||
bgt 3b ! loop if r5 > 0
|
||||
blr
|
||||
|
|
|
@ -1,47 +0,0 @@
|
|||
.sect .text
|
||||
|
||||
! Loads a variable-sized structure onto the stack.
|
||||
!
|
||||
! r3 = size
|
||||
! r4 = address
|
||||
|
||||
.define .los
|
||||
.los:
|
||||
! These sizes are handled specially.
|
||||
|
||||
cmplwi r3, 1
|
||||
ble size1
|
||||
|
||||
cmplwi r3, 2
|
||||
ble size2
|
||||
|
||||
cmplwi r3, 4
|
||||
ble size4
|
||||
|
||||
! Variable-sized structure.
|
||||
|
||||
addi r3, r3, 3
|
||||
clrrwi r3, r3, 2 ! align size
|
||||
|
||||
add r4, r4, r3 ! adjust address to top of block
|
||||
|
||||
srwi r3, r3, 2 ! convert size to the number of words
|
||||
mtspr ctr, r3
|
||||
|
||||
1:
|
||||
lwzu r5, -4(r4)
|
||||
stwu r5, -4(sp)
|
||||
bdnz 1b ! decrement CTR, jump if non-zero
|
||||
blr
|
||||
|
||||
size1:
|
||||
lbz r3, 0(r4)
|
||||
b 1f
|
||||
size2:
|
||||
lhz r3, 0(r4)
|
||||
b 1f
|
||||
size4:
|
||||
lwz r3, 0(r4)
|
||||
1:
|
||||
stwu r3, -4(sp)
|
||||
blr
|
34
mach/powerpc/libem/los4.s
Normal file
34
mach/powerpc/libem/los4.s
Normal file
|
@ -0,0 +1,34 @@
|
|||
.sect .text
|
||||
|
||||
! Loads a variable-sized block onto the stack.
|
||||
!
|
||||
! On entry: r3 = size
|
||||
! Stack: ( address -- block )
|
||||
! Preserves r10 for .lar4
|
||||
|
||||
.define .los4
|
||||
.los4:
|
||||
lwz r4, 0(sp) ! r4 = address
|
||||
|
||||
! Sizes 1 and 2 are handled specially.
|
||||
cmplwi r3, 1
|
||||
ble 1f
|
||||
cmplwi r3, 2
|
||||
ble 2f
|
||||
|
||||
! Else the size must be a multiple of 4.
|
||||
srwi r5, r3, 2
|
||||
mtspr ctr, r5 ! ctr = number of words
|
||||
addi sp, sp, 4
|
||||
add r4, r4, r3 ! adjust address to end of block
|
||||
4: lwzu r5, -4(r4)
|
||||
stwu r5, -4(sp)
|
||||
bdnz 4b ! decrement ctr, jump if non-zero
|
||||
blr
|
||||
|
||||
1: lbz r5, 0(r4)
|
||||
stw r5, 0(sp)
|
||||
blr
|
||||
2: lhz r5, 0(r4)
|
||||
stw r5, 0(sp)
|
||||
blr
|
|
@ -2,41 +2,13 @@
|
|||
|
||||
! Store to bounds-checked array.
|
||||
!
|
||||
! On entry:
|
||||
! r3 = ptr to descriptor
|
||||
! r4 = index
|
||||
! r5 = address of array
|
||||
! Stack: ( element array-adr index descriptor-adr -- )
|
||||
|
||||
.define .sar4
|
||||
.sar4:
|
||||
mfspr r10, lr
|
||||
bl .aar4
|
||||
! pass r3 = size from .aar4 to .sts4
|
||||
bl .sts4
|
||||
mtspr lr, r10
|
||||
! r3 = ptr to element
|
||||
! r0 = size of element
|
||||
|
||||
cmpwi r0, 1
|
||||
bne 1f
|
||||
! Store 1 byte.
|
||||
lwz r4, 0(sp)
|
||||
addi sp, sp, 4
|
||||
stb r4, 0(r3)
|
||||
blr
|
||||
1:
|
||||
cmpwi r0, 2
|
||||
bne 2f
|
||||
! Store 2 bytes.
|
||||
lwz r4, 0(sp)
|
||||
addi sp, sp, 4
|
||||
sth r4, 0(r3)
|
||||
blr
|
||||
2:
|
||||
! Store r0 bytes, where r0 must be a positive multiple of 4.
|
||||
or r5, r0, r0 ! index r5 = length r0
|
||||
3:
|
||||
addic. r5, r5, -4 ! r5 -= 4
|
||||
lwzx r4, r5, sp
|
||||
stwx r4, r5, r3
|
||||
bgt 3b ! loop if r5 > 0
|
||||
add sp, r0, sp ! move stack pointer up
|
||||
blr
|
||||
|
|
|
@ -1,50 +0,0 @@
|
|||
.sect .text
|
||||
|
||||
! Stores a variable-sized structure from the stack.
|
||||
!
|
||||
! r3 = size
|
||||
! r4 = address
|
||||
|
||||
.define .sts
|
||||
.sts:
|
||||
! These sizes are handled specially.
|
||||
|
||||
lwz r5, 0(sp)
|
||||
|
||||
cmplwi r3, 1
|
||||
ble size1
|
||||
|
||||
cmplwi r3, 2
|
||||
ble size2
|
||||
|
||||
cmplwi r3, 4
|
||||
ble size4
|
||||
|
||||
! Variable-sized structure.
|
||||
|
||||
addi r3, r3, 3
|
||||
clrrwi r3, r3, 2 ! align size
|
||||
|
||||
srwi r3, r3, 2 ! convert size to the number of words
|
||||
mtspr ctr, r3
|
||||
|
||||
1:
|
||||
lwz r5, 0(sp)
|
||||
addi sp, sp, 4
|
||||
stw r5, 0(r4)
|
||||
addi r4, r4, 4
|
||||
|
||||
bdnz 1b ! decrement CTR, jump if non-zero
|
||||
blr
|
||||
|
||||
size1:
|
||||
stb r5, 0(r4)
|
||||
b 1f
|
||||
size2:
|
||||
sth r5, 0(r4)
|
||||
b 1f
|
||||
size4:
|
||||
stw r5, 0(r4)
|
||||
1:
|
||||
addi sp, sp, 4
|
||||
blr
|
35
mach/powerpc/libem/sts4.s
Normal file
35
mach/powerpc/libem/sts4.s
Normal file
|
@ -0,0 +1,35 @@
|
|||
.sect .text
|
||||
|
||||
! Stores a variable-sized block from the stack.
|
||||
!
|
||||
! On entry: r3 = size
|
||||
! Stack: ( block address -- )
|
||||
! Preserves r10 for .sar4
|
||||
|
||||
.define .sts4
|
||||
.sts4:
|
||||
lwz r4, 0(sp) ! r4 = address
|
||||
|
||||
! Sizes 1 and 2 are handled specially.
|
||||
cmplwi r3, 1
|
||||
ble 1f
|
||||
cmplwi r3, 2
|
||||
ble 2f
|
||||
|
||||
! Else the size must be a multiple of 4.
|
||||
srwi r5, r3, 2
|
||||
mtspr ctr, r5 ! ctr = number of words
|
||||
addi r4, r4, -4 ! adjust address to before block
|
||||
4: lwzu r5, 4(sp)
|
||||
stwu r5, 4(r4)
|
||||
bdnz 4b ! decrement ctr, jump if non-zero
|
||||
addi sp, sp, 4
|
||||
blr
|
||||
|
||||
1: lwz r5, 4(sp)
|
||||
stb r5, 0(r4)
|
||||
b 3f
|
||||
2: lwz r5, 4(sp)
|
||||
sth r5, 0(r4)
|
||||
3: addi sp, sp, 8
|
||||
blr
|
|
@ -1042,13 +1042,13 @@ PATTERNS
|
|||
pat loi /* Load arbitrary size */
|
||||
leaving
|
||||
loc $1
|
||||
los INT32
|
||||
los 4
|
||||
|
||||
pat los $1==INT32 /* Load arbitrary size */
|
||||
with GPR3 GPR4 STACK
|
||||
pat los $1==4 /* Load arbitrary size */
|
||||
with GPR3 STACK
|
||||
kills ALL
|
||||
gen
|
||||
bl {LABEL, ".los"}
|
||||
bl {LABEL, ".los4"}
|
||||
|
||||
pat sti $1==INT8 /* Store byte indirect */
|
||||
with REG REG
|
||||
|
@ -1141,13 +1141,13 @@ PATTERNS
|
|||
pat sti /* Store arbitrary size */
|
||||
leaving
|
||||
loc $1
|
||||
sts INT32
|
||||
sts 4
|
||||
|
||||
pat sts $1==INT32 /* Store arbitrary size */
|
||||
with GPR3 GPR4 STACK
|
||||
pat sts $1==4 /* Store arbitrary size */
|
||||
with GPR3 STACK
|
||||
kills ALL
|
||||
gen
|
||||
bl {LABEL, ".sts"}
|
||||
bl {LABEL, ".sts4"}
|
||||
|
||||
|
||||
/* Arithmetic wrappers */
|
||||
|
@ -1459,40 +1459,40 @@ PATTERNS
|
|||
yields %a
|
||||
|
||||
|
||||
|
||||
/* Arrays */
|
||||
|
||||
pat aar $1==INT32 /* Index array */
|
||||
with GPR3 GPR4 GPR5
|
||||
pat aar $1==4 /* Address of array element */
|
||||
leaving
|
||||
cal ".aar4"
|
||||
|
||||
pat lar $1==4 /* Load from array */
|
||||
with STACK
|
||||
kills ALL
|
||||
gen
|
||||
bl {LABEL, ".aar4"}
|
||||
yields R3
|
||||
/* pass r3 = size from .aar4 to .los4 */
|
||||
bl {LABEL, ".los4"}
|
||||
|
||||
pat lae lar $2==INT32 && nicesize(rom($1, 3)) /* Load array */
|
||||
pat lae lar $2==4 && nicesize(rom($1, 3))
|
||||
leaving
|
||||
lae $1
|
||||
aar INT32
|
||||
aar 4
|
||||
loi rom($1, 3)
|
||||
|
||||
pat lar $1==INT32 /* Load array */
|
||||
with GPR3 GPR4 GPR5 STACK
|
||||
pat sar $1==4 /* Store to array */
|
||||
with STACK
|
||||
kills ALL
|
||||
gen
|
||||
bl {LABEL, ".lar4"}
|
||||
bl {LABEL, ".aar4"}
|
||||
/* pass r3 = size from .aar4 to .sts4 */
|
||||
bl {LABEL, ".sts4"}
|
||||
|
||||
pat lae sar $2==INT32 && nicesize(rom($1, 3)) /* Store array */
|
||||
pat lae sar $2==4 && nicesize(rom($1, 3))
|
||||
leaving
|
||||
lae $1
|
||||
aar INT32
|
||||
aar 4
|
||||
sti rom($1, 3)
|
||||
|
||||
pat sar $1==INT32 /* Store array */
|
||||
with GPR3 GPR4 GPR5 STACK
|
||||
kills ALL
|
||||
gen
|
||||
bl {LABEL, ".sar4"}
|
||||
|
||||
|
||||
/* Sets */
|
||||
|
||||
|
|
Loading…
Reference in a new issue