Add fef 4, fif 4. Improve fef 8, fif 8. Other float changes.

When I wrote fef 8, I forgot to test denormalized numbers.  Oops.  Now
fix two of my mistakes:

 - When checking for zero, `extrwi r6, r3, 22, 12` needs to be
   `extrwi r6, r3, 20, 12`.  There are only 20 bits to extract.

 - After the multiplication by 2**64, I forgot to put the fraction in
   [0.5, 1) or (-1, 0.5] by setting IEEE exponent = 1022.

Teach fif 8 about signed zero and NaN.

In ncg/table, change cmf so NaN is not equal to any value, and comment
why ordered comparisons don't work with NaN.  Also add cost for
fctwiz, remove extra `uses REG`.

Edit comment in cfu8.s because the conditional branch might be before
or after fctwiz.
This commit is contained in:
George Koehler 2018-01-22 14:04:15 -05:00
parent f1304e1a3c
commit 66f93f08c5
7 changed files with 194 additions and 49 deletions

View file

@ -6,7 +6,7 @@ for _, plat in ipairs(vars.plats) do
acklibrary {
name = "lib_"..plat,
srcs = {
"./*.s", -- cfu8.s
"./*.s", -- fif4.s
},
vars = { plat = plat },
deps = {

View file

@ -42,6 +42,8 @@
! 1: yields r3 = the converted value.
!
! Debian's clang 3.5.0-10 and gcc 4.9.2-10 don't clamp the value
! before conversion. They avoid fsel and put the conditional branch
! before fctwiz. PowerPC 601 lacks fsel (but kernel might trap and
! emulate fsel). PowerPC 603, 604, G3, G4, G5 have fsel.
! before conversion. They avoid fsel and use the conditional branch
! to pick between 2 fctwiz instructions.
!
! PowerPC 601 lacks fsel (but kernel might trap and emulate fsel).
! PowerPC 603, 604, G3, G4, G5 have fsel.

48
mach/powerpc/libem/fef4.s Normal file
View file

@ -0,0 +1,48 @@
.sect .text
! Split a single-precision float into fraction and exponent, like
! frexpf(3) in C, http://en.cppreference.com/w/c/numeric/math/frexp
!
! Stack: ( single -- fraction exponent )
.define .fef4
.fef4:
lwz r3, 0(sp) ! r3 = word of float bits
! IEEE single = sign * 1.fraction * 2**(exponent - 127)
! sign exponent fraction
! 0 1..8 9..31
!
! IEEE exponent = 126 in [0.5, 1) or (-1, -0.5].
extrwi. r6, r3, 8, 1 ! r6 = IEEE exponent
beq 3f ! jump if zero or denormalized
cmpwi r6, 255
addi r5, r6, -126 ! r5 = our exponent
beq 2f ! jump if infinity or NaN
! fall through if normalized
! Put fraction in [0.5, 1) or (-1, -0.5].
1: li r6, 126
insrwi r3, r6, 8, 1 ! IEEE exponent = 126
! fall through
2: stw r3, 0(sp) ! push fraction
stwu r5, -4(sp) ! push exponent
blr
! Got denormalized number or zero, probably zero.
! If zero, then exponent must also be zero.
3: extrwi. r6, r3, 23, 9 ! r6 = fraction
bne 4f ! jump if not zero
li r5, 0 ! exponent = 0
b 2b
! Got denormalized number = 0.fraction * 2**-126
4: cntlzw r5, r6
addi r5, r5, -8
slw r6, r6, r5 ! shift left to make 1.fraction
insrwi r3, r6, 23, 9 ! set new fraction
li r6, -126 + 1
subf r5, r5, r6 ! r5 = our exponent
b 1b

View file

@ -3,7 +3,7 @@
.sect .text
! Split a double-precision float into fraction and exponent, like
! frexp(3) in C.
! frexp(3) in C, http://en.cppreference.com/w/c/numeric/math/frexp
!
! Stack: ( double -- fraction exponent )
@ -12,42 +12,41 @@
lwz r3, 0(sp) ! r3 = high word (bits 0..31)
lwz r4, 4(sp) ! r4 = low word (bits 32..63)
! IEEE double-precision format:
! IEEE double = sign * 1.fraction * 2**(exponent - 1023)
! sign exponent fraction
! 0 1..11 12..63
!
! To get fraction in [0.5, 1) or (-1, -0.5], we subtract 1022
! from the IEEE exponent.
! IEEE exponent = 1022 in [0.5, 1) or (-1, -0.5].
extrwi. r6, r3, 11, 1 ! r6 = IEEE exponent
addi r5, r6, -1022 ! r5 = our exponent
beq 2f ! jump if zero or denormalized
beq 3f ! jump if zero or denormalized
cmpwi r6, 2047
beq 1f ! jump if infinity or NaN
addi r5, r6, -1022 ! r5 = our exponent
beq 2f ! jump if infinity or NaN
! fall through if normalized
! Put fraction in [0.5, 1) or (-1, -0.5] by setting its
! IEEE exponent to 1022.
rlwinm r3, r3, 0, 12, 0 ! clear old exponent
oris r3, r3, 1022 << 4 ! set new exponent
! Put fraction in [0.5, 1) or (-1, -0.5].
1: li r6, 1022
insrwi r3, r6, 11, 1 ! IEEE exponent = 1022
! fall through
1: stw r3, 0(sp)
2: stw r3, 0(sp)
stw r4, 4(sp) ! push fraction
stwu r5, -4(sp) ! push exponent
blr
2: ! Got denormalized number or zero, probably zero.
extrwi r6, r3, 22, 12
! Got denormalized number or zero, probably zero.
! If zero, then exponent must also be zero.
3: extrwi r6, r3, 20, 12
or. r6, r6, r4 ! r6 = high|low fraction
bne 3f ! jump if not zero
bne 4f ! jump if not zero
li r5, 0 ! exponent = 0
b 1b
b 2b
3: ! Got denormalized number, not zero.
lfd f0, 0(sp)
lis r6, ha16[_2_64]
lfd f1, lo16[_2_64](r6)
! Got denormalized number = 0.fraction * 2**-1022
4: lfd f0, 0(sp)
lis r6, ha16[.fs_2_64]
lfs f1, lo16[.fs_2_64](r6)
fmul f0, f0, f1 ! multiply it by 2**64
stfd f0, 0(sp)
lwz r3, 0(sp)
@ -57,7 +56,6 @@
b 1b
.sect .rom
_2_64:
! (double) 2**64
.data4 0x43f00000
.data4 0x00000000
.fs_2_64:
!float 1.84467440737095516e+19 sz 4
.data1 0137,0200,00,00

64
mach/powerpc/libem/fif4.s Normal file
View file

@ -0,0 +1,64 @@
.sect .text
! Multiplies two single-precision floats, then splits the product into
! fraction and integer, both as floats, like modff(3) in C,
! http://en.cppreference.com/w/c/numeric/math/modf
!
! Stack: ( a b -- fraction integer )
.define .fif4
.fif4:
lfs f1, 4(sp)
lfs f2, 0(sp)
fmuls f1, f1, f2 ! f1 = a * b
stfs f1, 0(sp)
lwz r3, 0(sp) ! r3 = word of float bits
! IEEE single = sign * 1.fraction * 2**(exponent - 127)
! sign exponent fraction
! 0 1..8 9..31
!
! Subtract 127 from the IEEE exponent. If the result is from
! 0 to 23, then the IEEE fraction has that many integer bits.
extrwi r5, r3, 8, 1 ! r5 = IEEE exponent
addic. r5, r5, -127 ! r5 = nr of integer bits
blt 3f ! branch if no integer
cmpwi r5, 24
bge 4f ! branch if no fraction
! fall through if integer with fraction
! f1 has r5 = 0 to 23 integer bits in the IEEE fraction.
! There are 23 - r5 fraction bits.
li r6, 23
subf r6, r5, r6
srw r3, r3, r6
slw r3, r3, r6 ! clear fraction in word
! fall through
1: stw r3, 0(sp)
lfs f2, 0(sp) ! integer = high word, low word
fsubs f1, f1, f2 ! fraction = value - integer
2: stfs f1, 4(sp) ! push fraction
stfs f2, 0(sp) ! push integer
blr
! f1 is a fraction without integer (or zero).
! Then integer is zero with same sign.
3: extlwi r3, r3, 1, 0 ! extract sign bit
stfs f1, 4(sp) ! push fraction
stw r3, 0(sp) ! push integer = zero with sign
blr
! f1 is an integer without fraction (or infinity or NaN).
! Unless NaN, then fraction is zero with same sign.
4: fcmpu cr0, f1, f1
bun cr0, 5f
extlwi r3, r3, 1, 0 ! extract sign bit
stw r3, 4(sp) ! push fraction = zero with sign
stfs f1, 0(sp) ! push integer
blr
! f1 is NaN, so both fraction and integer are NaN.
5: fmr f2, f1
b 2b

View file

@ -1,7 +1,8 @@
.sect .text
! Multiplies two double-precision floats, then splits the product into
! fraction and integer, like modf(3) in C. On entry:
! fraction and integer, both as floats, like modf(3) in C,
! http://en.cppreference.com/w/c/numeric/math/modf
!
! Stack: ( a b -- fraction integer )
@ -14,20 +15,18 @@
lwz r3, 0(sp) ! r3 = high word
lwz r4, 4(sp) ! r4 = low word
! IEEE double-precision format:
! IEEE double = sign * 1.fraction * 2**(exponent - 1023)
! sign exponent fraction
! 0 1..11 12..63
!
! Subtract 1023 from the IEEE exponent. If the result is from
! 0 to 51, then the IEEE fraction has that many integer bits.
! (IEEE has an implicit 1 before its fraction. If the IEEE
! fraction has 0 integer bits, we still have an integer.)
extrwi r5, r3, 11, 1 ! r5 = IEEE exponent
addic. r5, r5, -1023 ! r5 = nr of integer bits
blt 4f ! branch if no integer
blt 3f ! branch if no integer
cmpwi r5, 52
bge 5f ! branch if no fraction
bge 4f ! branch if no fraction
cmpwi r5, 21
bge 6f ! branch if large integer
! fall through if small integer
@ -44,22 +43,38 @@
1: stw r3, 0(sp)
stw r4, 4(sp)
lfd f2, 0(sp) ! integer = high word, low word
2: fsub f1, f1, f2 ! fraction = value - integer
3: stfd f1, 8(sp) ! push fraction
fsub f1, f1, f2 ! fraction = value - integer
2: stfd f1, 8(sp) ! push fraction
stfd f2, 0(sp) ! push integer
blr
4: ! f1 is a fraction without integer.
fsub f2, f1, f1 ! integer = zero
b 3b
! f1 is a fraction without integer (or zero).
! Then integer is zero with same sign.
3: extlwi r3, r3, 1, 0 ! extract sign bit
li r4, 0
stfd f1, 8(sp) ! push fraction
stw r4, 4(sp)
stw r3, 0(sp) ! push integer = zero with sign
blr
5: ! f1 is an integer without fraction (or infinity or NaN).
fmr f2, f1 ! integer = f1
! f1 is an integer without fraction (or infinity or NaN).
! Unless NaN, then fraction is zero with same sign.
4: fcmpu cr0, f1, f1 ! integer = f1
bun cr0, 5f
extlwi r3, r3, 1, 0 ! extract sign bit
li r4, 0
stw r4, 12(sp)
stw r3, 8(sp) ! push fraction = zero with sign
stfd f1, 0(sp) ! push integer
blr
! f1 is NaN, so both fraction and integer are NaN.
5: fmr f2, f1
b 2b
6: ! f1 has r5 = 21 to 51 to integer bits.
! f1 has r5 = 21 to 51 to integer bits.
! Low word has 52 - r5 fraction bits.
li r6, 52
6: li r6, 52
subf r6, r5, r6
srw r4, r4, r6
slw r4, r4, r6 ! clear fraction in low word

View file

@ -310,7 +310,7 @@ INSTRUCTIONS
fadds FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 5).
fcmpo CR:wo, FREG:ro, FREG:ro cost(4, 5).
fcmpo CR:wo, FSREG:ro, FSREG:ro cost(4, 5).
fctiwz FREG:wo, FREG:ro.
fctiwz FREG:wo, FREG:ro cost(4, 5).
fdiv FREG+DLOCAL:wo, FREG:ro, FREG:ro cost(4, 35).
fdivs FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 21).
fmr FPR:wo, FPR:ro cost(4, 5).
@ -2329,10 +2329,20 @@ PATTERNS
with FSREG
gen fneg {LOCAL, $2}, %1
/* When a or b is NaN, then a < b, a <= b, a > b, a >= b
* should all be false. We can't make them false, because
* - EM's _cmf_ is only for ordered comparisons.
* - The peephole optimizer assumes (a < b) == !(a >= b).
*
* We do make a == b false and a != b true, by checking the
* eq (equal) bit or un (unordered) bit in cr0.
*/
pat cmf $1==4 /* Compare single */
with FSREG FSREG
uses REG={COND_FS, %2, %1}
gen extlwi %a, %a, {C, 2}, {C, 0}
/* Extract lt, gt, un; put lt in sign bit. */
gen andisX %a, %a, {C, 0xd000}
yields %a
pat cmf teq $1==4 /* Single second == top */
@ -2367,7 +2377,6 @@ PATTERNS
proc cmf4zxx example cmf zeq
with FSREG FSREG STACK
uses REG
gen
fcmpo cr0, %2, %1
bxx* {LABEL, $2}
@ -2420,6 +2429,13 @@ PATTERNS
loc 4
cff
pat fef $1==4 /* Split fraction, exponent */
leaving cal ".fef4"
/* Multiply two singles, then split fraction, integer */
pat fif $1==4
leaving cal ".fif4"
/* Double-precision floating-point */
@ -2471,10 +2487,13 @@ PATTERNS
with FREG
gen fneg {DLOCAL, $2}, %1
/* To compare NaN, see comment above pat cmf $1==4 */
pat cmf $1==8 /* Compare double */
with FREG FREG
uses REG={COND_FD, %2, %1}
gen extlwi %a, %a, {C, 2}, {C, 0}
/* Extract lt, gt, un; put lt in sign bit. */
gen andisX %a, %a, {C, 0xd000}
yields %a
pat cmf teq $1==8 /* Double second == top */
@ -2482,7 +2501,7 @@ PATTERNS
uses REG={COND_FD, %2, %1}
yields {XEQ, %a}
pat cmf tne $1==8 /* Single second == top */
pat cmf tne $1==8 /* Double second == top */
with FREG FREG
uses REG={COND_FD, %2, %1}
yields {XNE, %a}
@ -2509,7 +2528,6 @@ PATTERNS
proc cmf8zxx example cmf zeq
with FREG FREG STACK
uses REG
gen
fcmpo cr0, %2, %1
bxx* {LABEL, $2}