Add fef 4, fif 4. Improve fef 8, fif 8. Other float changes.

When I wrote fef 8, I forgot to test denormalized numbers.  Oops.  Now
fix two of my mistakes:

 - When checking for zero, `extrwi r6, r3, 22, 12` needs to be
   `extrwi r6, r3, 20, 12`.  There are only 20 bits to extract.

 - After the multiplication by 2**64, I forgot to put the fraction in
   [0.5, 1) or (-1, 0.5] by setting IEEE exponent = 1022.

Teach fif 8 about signed zero and NaN.

In ncg/table, change cmf so NaN is not equal to any value, and comment
why ordered comparisons don't work with NaN.  Also add cost for
fctwiz, remove extra `uses REG`.

Edit comment in cfu8.s because the conditional branch might be before
or after fctwiz.
This commit is contained in:
George Koehler 2018-01-22 14:04:15 -05:00
parent f1304e1a3c
commit 66f93f08c5
7 changed files with 194 additions and 49 deletions

View file

@ -6,7 +6,7 @@ for _, plat in ipairs(vars.plats) do
acklibrary { acklibrary {
name = "lib_"..plat, name = "lib_"..plat,
srcs = { srcs = {
"./*.s", -- cfu8.s "./*.s", -- fif4.s
}, },
vars = { plat = plat }, vars = { plat = plat },
deps = { deps = {

View file

@ -42,6 +42,8 @@
! 1: yields r3 = the converted value. ! 1: yields r3 = the converted value.
! !
! Debian's clang 3.5.0-10 and gcc 4.9.2-10 don't clamp the value ! Debian's clang 3.5.0-10 and gcc 4.9.2-10 don't clamp the value
! before conversion. They avoid fsel and put the conditional branch ! before conversion. They avoid fsel and use the conditional branch
! before fctwiz. PowerPC 601 lacks fsel (but kernel might trap and ! to pick between 2 fctwiz instructions.
! emulate fsel). PowerPC 603, 604, G3, G4, G5 have fsel. !
! PowerPC 601 lacks fsel (but kernel might trap and emulate fsel).
! PowerPC 603, 604, G3, G4, G5 have fsel.

48
mach/powerpc/libem/fef4.s Normal file
View file

@ -0,0 +1,48 @@
.sect .text
! Split a single-precision float into fraction and exponent, like
! frexpf(3) in C, http://en.cppreference.com/w/c/numeric/math/frexp
!
! Stack: ( single -- fraction exponent )
.define .fef4
.fef4:
lwz r3, 0(sp) ! r3 = word of float bits
! IEEE single = sign * 1.fraction * 2**(exponent - 127)
! sign exponent fraction
! 0 1..8 9..31
!
! IEEE exponent = 126 in [0.5, 1) or (-1, -0.5].
extrwi. r6, r3, 8, 1 ! r6 = IEEE exponent
beq 3f ! jump if zero or denormalized
cmpwi r6, 255
addi r5, r6, -126 ! r5 = our exponent
beq 2f ! jump if infinity or NaN
! fall through if normalized
! Put fraction in [0.5, 1) or (-1, -0.5].
1: li r6, 126
insrwi r3, r6, 8, 1 ! IEEE exponent = 126
! fall through
2: stw r3, 0(sp) ! push fraction
stwu r5, -4(sp) ! push exponent
blr
! Got denormalized number or zero, probably zero.
! If zero, then exponent must also be zero.
3: extrwi. r6, r3, 23, 9 ! r6 = fraction
bne 4f ! jump if not zero
li r5, 0 ! exponent = 0
b 2b
! Got denormalized number = 0.fraction * 2**-126
4: cntlzw r5, r6
addi r5, r5, -8
slw r6, r6, r5 ! shift left to make 1.fraction
insrwi r3, r6, 23, 9 ! set new fraction
li r6, -126 + 1
subf r5, r5, r6 ! r5 = our exponent
b 1b

View file

@ -3,7 +3,7 @@
.sect .text .sect .text
! Split a double-precision float into fraction and exponent, like ! Split a double-precision float into fraction and exponent, like
! frexp(3) in C. ! frexp(3) in C, http://en.cppreference.com/w/c/numeric/math/frexp
! !
! Stack: ( double -- fraction exponent ) ! Stack: ( double -- fraction exponent )
@ -12,42 +12,41 @@
lwz r3, 0(sp) ! r3 = high word (bits 0..31) lwz r3, 0(sp) ! r3 = high word (bits 0..31)
lwz r4, 4(sp) ! r4 = low word (bits 32..63) lwz r4, 4(sp) ! r4 = low word (bits 32..63)
! IEEE double-precision format: ! IEEE double = sign * 1.fraction * 2**(exponent - 1023)
! sign exponent fraction ! sign exponent fraction
! 0 1..11 12..63 ! 0 1..11 12..63
! !
! To get fraction in [0.5, 1) or (-1, -0.5], we subtract 1022 ! IEEE exponent = 1022 in [0.5, 1) or (-1, -0.5].
! from the IEEE exponent.
extrwi. r6, r3, 11, 1 ! r6 = IEEE exponent extrwi. r6, r3, 11, 1 ! r6 = IEEE exponent
addi r5, r6, -1022 ! r5 = our exponent beq 3f ! jump if zero or denormalized
beq 2f ! jump if zero or denormalized
cmpwi r6, 2047 cmpwi r6, 2047
beq 1f ! jump if infinity or NaN addi r5, r6, -1022 ! r5 = our exponent
beq 2f ! jump if infinity or NaN
! fall through if normalized ! fall through if normalized
! Put fraction in [0.5, 1) or (-1, -0.5] by setting its ! Put fraction in [0.5, 1) or (-1, -0.5].
! IEEE exponent to 1022. 1: li r6, 1022
rlwinm r3, r3, 0, 12, 0 ! clear old exponent insrwi r3, r6, 11, 1 ! IEEE exponent = 1022
oris r3, r3, 1022 << 4 ! set new exponent
! fall through ! fall through
1: stw r3, 0(sp) 2: stw r3, 0(sp)
stw r4, 4(sp) ! push fraction stw r4, 4(sp) ! push fraction
stwu r5, -4(sp) ! push exponent stwu r5, -4(sp) ! push exponent
blr blr
2: ! Got denormalized number or zero, probably zero. ! Got denormalized number or zero, probably zero.
extrwi r6, r3, 22, 12 ! If zero, then exponent must also be zero.
3: extrwi r6, r3, 20, 12
or. r6, r6, r4 ! r6 = high|low fraction or. r6, r6, r4 ! r6 = high|low fraction
bne 3f ! jump if not zero bne 4f ! jump if not zero
li r5, 0 ! exponent = 0 li r5, 0 ! exponent = 0
b 1b b 2b
3: ! Got denormalized number, not zero. ! Got denormalized number = 0.fraction * 2**-1022
lfd f0, 0(sp) 4: lfd f0, 0(sp)
lis r6, ha16[_2_64] lis r6, ha16[.fs_2_64]
lfd f1, lo16[_2_64](r6) lfs f1, lo16[.fs_2_64](r6)
fmul f0, f0, f1 ! multiply it by 2**64 fmul f0, f0, f1 ! multiply it by 2**64
stfd f0, 0(sp) stfd f0, 0(sp)
lwz r3, 0(sp) lwz r3, 0(sp)
@ -57,7 +56,6 @@
b 1b b 1b
.sect .rom .sect .rom
_2_64: .fs_2_64:
! (double) 2**64 !float 1.84467440737095516e+19 sz 4
.data4 0x43f00000 .data1 0137,0200,00,00
.data4 0x00000000

64
mach/powerpc/libem/fif4.s Normal file
View file

@ -0,0 +1,64 @@
.sect .text
! Multiplies two single-precision floats, then splits the product into
! fraction and integer, both as floats, like modff(3) in C,
! http://en.cppreference.com/w/c/numeric/math/modf
!
! Stack: ( a b -- fraction integer )
.define .fif4
.fif4:
lfs f1, 4(sp)
lfs f2, 0(sp)
fmuls f1, f1, f2 ! f1 = a * b
stfs f1, 0(sp)
lwz r3, 0(sp) ! r3 = word of float bits
! IEEE single = sign * 1.fraction * 2**(exponent - 127)
! sign exponent fraction
! 0 1..8 9..31
!
! Subtract 127 from the IEEE exponent. If the result is from
! 0 to 23, then the IEEE fraction has that many integer bits.
extrwi r5, r3, 8, 1 ! r5 = IEEE exponent
addic. r5, r5, -127 ! r5 = nr of integer bits
blt 3f ! branch if no integer
cmpwi r5, 24
bge 4f ! branch if no fraction
! fall through if integer with fraction
! f1 has r5 = 0 to 23 integer bits in the IEEE fraction.
! There are 23 - r5 fraction bits.
li r6, 23
subf r6, r5, r6
srw r3, r3, r6
slw r3, r3, r6 ! clear fraction in word
! fall through
1: stw r3, 0(sp)
lfs f2, 0(sp) ! integer = high word, low word
fsubs f1, f1, f2 ! fraction = value - integer
2: stfs f1, 4(sp) ! push fraction
stfs f2, 0(sp) ! push integer
blr
! f1 is a fraction without integer (or zero).
! Then integer is zero with same sign.
3: extlwi r3, r3, 1, 0 ! extract sign bit
stfs f1, 4(sp) ! push fraction
stw r3, 0(sp) ! push integer = zero with sign
blr
! f1 is an integer without fraction (or infinity or NaN).
! Unless NaN, then fraction is zero with same sign.
4: fcmpu cr0, f1, f1
bun cr0, 5f
extlwi r3, r3, 1, 0 ! extract sign bit
stw r3, 4(sp) ! push fraction = zero with sign
stfs f1, 0(sp) ! push integer
blr
! f1 is NaN, so both fraction and integer are NaN.
5: fmr f2, f1
b 2b

View file

@ -1,7 +1,8 @@
.sect .text .sect .text
! Multiplies two double-precision floats, then splits the product into ! Multiplies two double-precision floats, then splits the product into
! fraction and integer, like modf(3) in C. On entry: ! fraction and integer, both as floats, like modf(3) in C,
! http://en.cppreference.com/w/c/numeric/math/modf
! !
! Stack: ( a b -- fraction integer ) ! Stack: ( a b -- fraction integer )
@ -14,20 +15,18 @@
lwz r3, 0(sp) ! r3 = high word lwz r3, 0(sp) ! r3 = high word
lwz r4, 4(sp) ! r4 = low word lwz r4, 4(sp) ! r4 = low word
! IEEE double-precision format: ! IEEE double = sign * 1.fraction * 2**(exponent - 1023)
! sign exponent fraction ! sign exponent fraction
! 0 1..11 12..63 ! 0 1..11 12..63
! !
! Subtract 1023 from the IEEE exponent. If the result is from ! Subtract 1023 from the IEEE exponent. If the result is from
! 0 to 51, then the IEEE fraction has that many integer bits. ! 0 to 51, then the IEEE fraction has that many integer bits.
! (IEEE has an implicit 1 before its fraction. If the IEEE
! fraction has 0 integer bits, we still have an integer.)
extrwi r5, r3, 11, 1 ! r5 = IEEE exponent extrwi r5, r3, 11, 1 ! r5 = IEEE exponent
addic. r5, r5, -1023 ! r5 = nr of integer bits addic. r5, r5, -1023 ! r5 = nr of integer bits
blt 4f ! branch if no integer blt 3f ! branch if no integer
cmpwi r5, 52 cmpwi r5, 52
bge 5f ! branch if no fraction bge 4f ! branch if no fraction
cmpwi r5, 21 cmpwi r5, 21
bge 6f ! branch if large integer bge 6f ! branch if large integer
! fall through if small integer ! fall through if small integer
@ -44,22 +43,38 @@
1: stw r3, 0(sp) 1: stw r3, 0(sp)
stw r4, 4(sp) stw r4, 4(sp)
lfd f2, 0(sp) ! integer = high word, low word lfd f2, 0(sp) ! integer = high word, low word
2: fsub f1, f1, f2 ! fraction = value - integer fsub f1, f1, f2 ! fraction = value - integer
3: stfd f1, 8(sp) ! push fraction 2: stfd f1, 8(sp) ! push fraction
stfd f2, 0(sp) ! push integer stfd f2, 0(sp) ! push integer
blr blr
4: ! f1 is a fraction without integer. ! f1 is a fraction without integer (or zero).
fsub f2, f1, f1 ! integer = zero ! Then integer is zero with same sign.
b 3b 3: extlwi r3, r3, 1, 0 ! extract sign bit
li r4, 0
stfd f1, 8(sp) ! push fraction
stw r4, 4(sp)
stw r3, 0(sp) ! push integer = zero with sign
blr
5: ! f1 is an integer without fraction (or infinity or NaN). ! f1 is an integer without fraction (or infinity or NaN).
fmr f2, f1 ! integer = f1 ! Unless NaN, then fraction is zero with same sign.
4: fcmpu cr0, f1, f1 ! integer = f1
bun cr0, 5f
extlwi r3, r3, 1, 0 ! extract sign bit
li r4, 0
stw r4, 12(sp)
stw r3, 8(sp) ! push fraction = zero with sign
stfd f1, 0(sp) ! push integer
blr
! f1 is NaN, so both fraction and integer are NaN.
5: fmr f2, f1
b 2b b 2b
6: ! f1 has r5 = 21 to 51 to integer bits. ! f1 has r5 = 21 to 51 to integer bits.
! Low word has 52 - r5 fraction bits. ! Low word has 52 - r5 fraction bits.
li r6, 52 6: li r6, 52
subf r6, r5, r6 subf r6, r5, r6
srw r4, r4, r6 srw r4, r4, r6
slw r4, r4, r6 ! clear fraction in low word slw r4, r4, r6 ! clear fraction in low word

View file

@ -310,7 +310,7 @@ INSTRUCTIONS
fadds FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 5). fadds FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 5).
fcmpo CR:wo, FREG:ro, FREG:ro cost(4, 5). fcmpo CR:wo, FREG:ro, FREG:ro cost(4, 5).
fcmpo CR:wo, FSREG:ro, FSREG:ro cost(4, 5). fcmpo CR:wo, FSREG:ro, FSREG:ro cost(4, 5).
fctiwz FREG:wo, FREG:ro. fctiwz FREG:wo, FREG:ro cost(4, 5).
fdiv FREG+DLOCAL:wo, FREG:ro, FREG:ro cost(4, 35). fdiv FREG+DLOCAL:wo, FREG:ro, FREG:ro cost(4, 35).
fdivs FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 21). fdivs FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 21).
fmr FPR:wo, FPR:ro cost(4, 5). fmr FPR:wo, FPR:ro cost(4, 5).
@ -2329,10 +2329,20 @@ PATTERNS
with FSREG with FSREG
gen fneg {LOCAL, $2}, %1 gen fneg {LOCAL, $2}, %1
/* When a or b is NaN, then a < b, a <= b, a > b, a >= b
* should all be false. We can't make them false, because
* - EM's _cmf_ is only for ordered comparisons.
* - The peephole optimizer assumes (a < b) == !(a >= b).
*
* We do make a == b false and a != b true, by checking the
* eq (equal) bit or un (unordered) bit in cr0.
*/
pat cmf $1==4 /* Compare single */ pat cmf $1==4 /* Compare single */
with FSREG FSREG with FSREG FSREG
uses REG={COND_FS, %2, %1} uses REG={COND_FS, %2, %1}
gen extlwi %a, %a, {C, 2}, {C, 0} /* Extract lt, gt, un; put lt in sign bit. */
gen andisX %a, %a, {C, 0xd000}
yields %a yields %a
pat cmf teq $1==4 /* Single second == top */ pat cmf teq $1==4 /* Single second == top */
@ -2367,7 +2377,6 @@ PATTERNS
proc cmf4zxx example cmf zeq proc cmf4zxx example cmf zeq
with FSREG FSREG STACK with FSREG FSREG STACK
uses REG
gen gen
fcmpo cr0, %2, %1 fcmpo cr0, %2, %1
bxx* {LABEL, $2} bxx* {LABEL, $2}
@ -2420,6 +2429,13 @@ PATTERNS
loc 4 loc 4
cff cff
pat fef $1==4 /* Split fraction, exponent */
leaving cal ".fef4"
/* Multiply two singles, then split fraction, integer */
pat fif $1==4
leaving cal ".fif4"
/* Double-precision floating-point */ /* Double-precision floating-point */
@ -2471,10 +2487,13 @@ PATTERNS
with FREG with FREG
gen fneg {DLOCAL, $2}, %1 gen fneg {DLOCAL, $2}, %1
/* To compare NaN, see comment above pat cmf $1==4 */
pat cmf $1==8 /* Compare double */ pat cmf $1==8 /* Compare double */
with FREG FREG with FREG FREG
uses REG={COND_FD, %2, %1} uses REG={COND_FD, %2, %1}
gen extlwi %a, %a, {C, 2}, {C, 0} /* Extract lt, gt, un; put lt in sign bit. */
gen andisX %a, %a, {C, 0xd000}
yields %a yields %a
pat cmf teq $1==8 /* Double second == top */ pat cmf teq $1==8 /* Double second == top */
@ -2482,7 +2501,7 @@ PATTERNS
uses REG={COND_FD, %2, %1} uses REG={COND_FD, %2, %1}
yields {XEQ, %a} yields {XEQ, %a}
pat cmf tne $1==8 /* Single second == top */ pat cmf tne $1==8 /* Double second == top */
with FREG FREG with FREG FREG
uses REG={COND_FD, %2, %1} uses REG={COND_FD, %2, %1}
yields {XNE, %a} yields {XNE, %a}
@ -2509,7 +2528,6 @@ PATTERNS
proc cmf8zxx example cmf zeq proc cmf8zxx example cmf zeq
with FREG FREG STACK with FREG FREG STACK
uses REG
gen gen
fcmpo cr0, %2, %1 fcmpo cr0, %2, %1
bxx* {LABEL, $2} bxx* {LABEL, $2}