Change .fef8 and .fif8 to pass values on the stack.
Reorder the code in .fef8 and .fif8 so that in the usual case, we fall through to the blr without taking any branches. The usual case, by my guess, is .fef8 with normalized numbers or .fif8 with small integers. I change .fef8 and .fif8 to pass values on the real stack, not in specific registers. This simplifies the ncg table, and might help me experiment with changes to the ncg table. This change might or might not help mcg. Seems that mcg always uses the stack to pass values to libem, but I have not tested .fef8 or .fif8 with mcg.
This commit is contained in:
parent
1de1e8f7f0
commit
54949f713f
|
@ -3,35 +3,48 @@
|
||||||
.sect .text
|
.sect .text
|
||||||
|
|
||||||
! Split a double-precision float into fraction and exponent, like
|
! Split a double-precision float into fraction and exponent, like
|
||||||
! frexp(3) in C. On entry:
|
! frexp(3) in C.
|
||||||
! r3 = float, high word (bits 0..31)
|
!
|
||||||
! r4 = float, low word (bits 32..63)
|
! Stack: ( double -- fraction exponent )
|
||||||
! Yields:
|
|
||||||
! r3 = fraction, high word (bits 0..31)
|
|
||||||
! r4 = fraction, low word (bits 32..63)
|
|
||||||
! r5 = exponent
|
|
||||||
|
|
||||||
.define .fef8
|
.define .fef8
|
||||||
.fef8:
|
.fef8:
|
||||||
|
lwz r3, 0(sp) ! r3 = high word (bits 0..31)
|
||||||
|
lwz r4, 4(sp) ! r4 = low word (bits 32..63)
|
||||||
|
|
||||||
! IEEE double-precision format:
|
! IEEE double-precision format:
|
||||||
! sign exponent fraction
|
! sign exponent fraction
|
||||||
! 0 1..11 12..63
|
! 0 1..11 12..63
|
||||||
extrwi r6, r3, 11, 1 ! r6 = IEEE exponent
|
!
|
||||||
addi r5, r6, -1022 ! r5 = true exponent
|
! To get fraction in [0.5, 1) or (-1, -0.5], we subtract 1022
|
||||||
|
! from the IEEE exponent.
|
||||||
|
|
||||||
|
extrwi. r6, r3, 11, 1 ! r6 = IEEE exponent
|
||||||
|
addi r5, r6, -1022 ! r5 = our exponent
|
||||||
|
beq 2f ! jump if zero or denormalized
|
||||||
cmpwi r6, 2047
|
cmpwi r6, 2047
|
||||||
beqlr ! return if infinity or NaN
|
beq 1f ! jump if infinity or NaN
|
||||||
cmpwi r6, 0
|
! fall through if normalized
|
||||||
bne 1f ! jump if normalized number
|
|
||||||
|
|
||||||
! Got denormalized number or zero, probably zero.
|
! Put fraction in [0.5, 1) or (-1, -0.5] by setting its
|
||||||
|
! IEEE exponent to 1022.
|
||||||
|
rlwinm r3, r3, 0, 12, 0 ! clear old exponent
|
||||||
|
oris r3, r3, 1022 << 4 ! set new exponent
|
||||||
|
! fall through
|
||||||
|
|
||||||
|
1: stw r3, 0(sp)
|
||||||
|
stw r4, 4(sp) ! push fraction
|
||||||
|
stwu r5, -4(sp) ! push exponent
|
||||||
|
blr
|
||||||
|
|
||||||
|
2: ! Got denormalized number or zero, probably zero.
|
||||||
extrwi r6, r3, 22, 12
|
extrwi r6, r3, 22, 12
|
||||||
addi r5, r0, 0 ! r5 = true exponent = 0
|
|
||||||
or. r6, r6, r4 ! r6 = high|low fraction
|
or. r6, r6, r4 ! r6 = high|low fraction
|
||||||
beqlr ! return if zero
|
bne 3f ! jump if not zero
|
||||||
|
li r5, 0 ! exponent = 0
|
||||||
|
b 1b
|
||||||
|
|
||||||
! Got denormalized number, not zero.
|
3: ! Got denormalized number, not zero.
|
||||||
stwu r4, -4(sp)
|
|
||||||
stwu r3, -4(sp)
|
|
||||||
lfd f0, 0(sp)
|
lfd f0, 0(sp)
|
||||||
lis r6, ha16[_2_64]
|
lis r6, ha16[_2_64]
|
||||||
lfd f1, lo16[_2_64](r6)
|
lfd f1, lo16[_2_64](r6)
|
||||||
|
@ -40,14 +53,8 @@
|
||||||
lwz r3, 0(sp)
|
lwz r3, 0(sp)
|
||||||
lwz r4, 4(sp)
|
lwz r4, 4(sp)
|
||||||
extrwi r6, r3, 11, 1 ! r6 = IEEE exponent
|
extrwi r6, r3, 11, 1 ! r6 = IEEE exponent
|
||||||
addi sp, sp, 8
|
addi r5, r6, -1022 - 64 ! r5 = our exponent
|
||||||
addi r5, r6, -1022 - 64 ! r5 = true exponent
|
b 1b
|
||||||
1:
|
|
||||||
! Put fraction in [0.5, 1) or (-1, -0.5] by setting its
|
|
||||||
! exponent to true 0, IEEE 1022.
|
|
||||||
rlwinm r3, r3, 0, 12, 0 ! clear old exponent
|
|
||||||
oris r3, r3, 1022 << 4 ! set new exponent
|
|
||||||
blr
|
|
||||||
|
|
||||||
.sect .rom
|
.sect .rom
|
||||||
_2_64:
|
_2_64:
|
||||||
|
|
|
@ -1,45 +1,37 @@
|
||||||
.sect .text
|
.sect .text
|
||||||
|
|
||||||
! Multiplies two double-precision floats, then splits the product into
|
! Multiplies two double-precision floats, then splits the product into
|
||||||
! integer and fraction, like modf(3) in C. On entry:
|
! fraction and integer, like modf(3) in C. On entry:
|
||||||
! f1 = float
|
!
|
||||||
! f2 = other float
|
! Stack: ( a b -- fraction integer )
|
||||||
! Yields:
|
|
||||||
! f1 = fraction
|
|
||||||
! f2 = integer
|
|
||||||
|
|
||||||
.define .fif8
|
.define .fif8
|
||||||
.fif8:
|
.fif8:
|
||||||
fmul f1, f1, f2
|
lfd f1, 8(sp)
|
||||||
stfdu f1, -8(sp) ! push f1 = product
|
lfd f2, 0(sp)
|
||||||
|
fmul f1, f1, f2 ! f1 = a * b
|
||||||
|
stfd f1, 0(sp)
|
||||||
lwz r3, 0(sp) ! r3 = high word
|
lwz r3, 0(sp) ! r3 = high word
|
||||||
lwz r4, 4(sp) ! r4 = low word
|
lwz r4, 4(sp) ! r4 = low word
|
||||||
|
|
||||||
! IEEE double-precision format:
|
! IEEE double-precision format:
|
||||||
! sign exponent fraction
|
! sign exponent fraction
|
||||||
! 0 1..11 12..63
|
! 0 1..11 12..63
|
||||||
|
!
|
||||||
! Subtract 1023 from the IEEE exponent. If the result is from
|
! Subtract 1023 from the IEEE exponent. If the result is from
|
||||||
! 0 to 51, then the IEEE fraction has that many integer bits.
|
! 0 to 51, then the IEEE fraction has that many integer bits.
|
||||||
! (IEEE has an implicit 1 before its fraction. If the IEEE
|
! (IEEE has an implicit 1 before its fraction. If the IEEE
|
||||||
! fraction has 0 integer bits, we still have an integer.)
|
! fraction has 0 integer bits, we still have an integer.)
|
||||||
|
|
||||||
extrwi r5, r3, 11, 1 ! r5 = IEEE exponent
|
extrwi r5, r3, 11, 1 ! r5 = IEEE exponent
|
||||||
addic. r5, r5, -1023 ! r5 = nr of integer bits
|
addic. r5, r5, -1023 ! r5 = nr of integer bits
|
||||||
blt no_int
|
blt 4f ! branch if no integer
|
||||||
cmpwi r5, 21
|
|
||||||
blt small_int
|
|
||||||
cmpwi r5, 52
|
cmpwi r5, 52
|
||||||
blt big_int
|
bge 5f ! branch if no fraction
|
||||||
|
cmpwi r5, 21
|
||||||
|
bge 6f ! branch if large integer
|
||||||
|
! fall through if small integer
|
||||||
|
|
||||||
! f1 is an integer without fraction (or infinity or NaN).
|
|
||||||
fmr f2, f1 ! integer = f1
|
|
||||||
b subtract
|
|
||||||
|
|
||||||
no_int:
|
|
||||||
! f1 is a fraction without integer.
|
|
||||||
fsub f2, f1, f1 ! integer = zero
|
|
||||||
b done
|
|
||||||
|
|
||||||
small_int:
|
|
||||||
! f1 has r5 = 0 to 20 integer bits in the IEEE fraction.
|
! f1 has r5 = 0 to 20 integer bits in the IEEE fraction.
|
||||||
! High word has 20 - r5 fraction bits.
|
! High word has 20 - r5 fraction bits.
|
||||||
li r6, 20
|
li r6, 20
|
||||||
|
@ -47,21 +39,28 @@ small_int:
|
||||||
srw r3, r3, r6
|
srw r3, r3, r6
|
||||||
li r4, 0 ! clear low word
|
li r4, 0 ! clear low word
|
||||||
slw r3, r3, r6 ! clear fraction in high word
|
slw r3, r3, r6 ! clear fraction in high word
|
||||||
b move_int
|
! fall through
|
||||||
|
|
||||||
big_int:
|
1: stw r3, 0(sp)
|
||||||
! f1 has r5 = 21 to 51 to integer bits.
|
stw r4, 4(sp)
|
||||||
|
lfd f2, 0(sp) ! integer = high word, low word
|
||||||
|
2: fsub f1, f1, f2 ! fraction = value - integer
|
||||||
|
3: stfd f1, 8(sp) ! push fraction
|
||||||
|
stfd f2, 0(sp) ! push integer
|
||||||
|
blr
|
||||||
|
|
||||||
|
4: ! f1 is a fraction without integer.
|
||||||
|
fsub f2, f1, f1 ! integer = zero
|
||||||
|
b 3b
|
||||||
|
|
||||||
|
5: ! f1 is an integer without fraction (or infinity or NaN).
|
||||||
|
fmr f2, f1 ! integer = f1
|
||||||
|
b 2b
|
||||||
|
|
||||||
|
6: ! f1 has r5 = 21 to 51 to integer bits.
|
||||||
! Low word has 52 - r5 fraction bits.
|
! Low word has 52 - r5 fraction bits.
|
||||||
li r6, 52
|
li r6, 52
|
||||||
subf r6, r5, r6
|
subf r6, r5, r6
|
||||||
srw r4, r4, r6
|
srw r4, r4, r6
|
||||||
slw r4, r4, r6 ! clear fraction in low word
|
slw r4, r4, r6 ! clear fraction in low word
|
||||||
move_int:
|
b 1b
|
||||||
stw r3, 0(sp)
|
|
||||||
stw r4, 4(sp)
|
|
||||||
lfd f2, 0(sp) ! f2 = integer
|
|
||||||
subtract:
|
|
||||||
fsub f1, f1, f2 ! fraction = value - integer
|
|
||||||
done:
|
|
||||||
addi sp, sp, 8 ! restore stack pointer
|
|
||||||
blr
|
|
||||||
|
|
|
@ -2402,16 +2402,11 @@ PATTERNS
|
||||||
addi SP, SP, {CONST, 8}
|
addi SP, SP, {CONST, 8}
|
||||||
yields %c
|
yields %c
|
||||||
|
|
||||||
pat fef $1==INT64 /* Split exponent, fraction */
|
pat fef $1==8 /* Split fraction, exponent */
|
||||||
with GPR3 GPR4
|
leaving
|
||||||
kills ALL
|
cal ".fef8"
|
||||||
gen
|
|
||||||
bl {LABEL, ".fef8"}
|
|
||||||
yields R4 R3 R5
|
|
||||||
|
|
||||||
pat fif $1==INT64 /* Multiply then split integer, fraction */
|
/* Multiply two doubles, then split fraction, integer */
|
||||||
with FPR1 FPR2
|
pat fif $1==8
|
||||||
kills ALL
|
leaving
|
||||||
gen
|
cal ".fif8"
|
||||||
bl {LABEL, ".fif8"}
|
|
||||||
yields F1 F2
|
|
||||||
|
|
Loading…
Reference in a new issue