Rewrite fef 8 in powerpc assembly.
In EM, fef splits a float into exponent and fraction. The old C code, given an infinite float, got stuck in an infinite loop. The new assembly code doesn't loop; it extracts the IEEE exponent.
This commit is contained in:
parent
a71eee3914
commit
6ae415d48b
4 changed files with 62 additions and 53 deletions
|
@ -3,7 +3,6 @@ for _, plat in ipairs(vars.plats) do
|
|||
name = "lib_"..plat,
|
||||
srcs = {
|
||||
"./*.s",
|
||||
"./*.c"
|
||||
},
|
||||
vars = { plat = plat },
|
||||
deps = {
|
||||
|
|
|
@ -1,46 +0,0 @@
|
|||
/*
|
||||
* $Source$
|
||||
* $State$
|
||||
* $Revision$
|
||||
*/
|
||||
|
||||
/* no headers allowed! */
|
||||
|
||||
/* Given a double, calculates the mantissa and exponent.
|
||||
*
|
||||
* This function is intended to be called internally by the code generator,
|
||||
* so the calling convention is odd.
|
||||
*/
|
||||
|
||||
int __fef8(double* fp)
|
||||
{
|
||||
double f = *fp;
|
||||
int exponent, sign;
|
||||
|
||||
if (f == 0.0)
|
||||
return 0;
|
||||
|
||||
if (f < 0.0)
|
||||
{
|
||||
sign = -1;
|
||||
f = -f;
|
||||
}
|
||||
else
|
||||
sign = 0;
|
||||
|
||||
exponent = 0;
|
||||
while (f >= 1.0)
|
||||
{
|
||||
f /= 2.0;
|
||||
exponent++;
|
||||
}
|
||||
|
||||
while (f < 0.5)
|
||||
{
|
||||
f *= 2.0;
|
||||
exponent--;
|
||||
}
|
||||
|
||||
*fp = (sign) ? -f : f;
|
||||
return exponent;
|
||||
}
|
58
mach/powerpc/libem/fef8.s
Normal file
58
mach/powerpc/libem/fef8.s
Normal file
|
@ -0,0 +1,58 @@
|
|||
#include "powerpc.h"
|
||||
|
||||
.sect .text
|
||||
|
||||
! Split a double-precision float into fraction and exponent, like
|
||||
! frexp(3) in C. On entry:
|
||||
! r3 = float, high word (bits 0..31)
|
||||
! r4 = float, low word (bits 32..63)
|
||||
! Yields:
|
||||
! r3 = fraction, high word (bits 0..31)
|
||||
! r4 = fraction, low word (bits 32..63)
|
||||
! r5 = exponent
|
||||
! Kills: cr0 f0 f1 r6 r7
|
||||
|
||||
.define .fef8
|
||||
.fef8:
|
||||
! IEEE double-precision format:
|
||||
! sign exponent fraction
|
||||
! 0 1..11 12..63
|
||||
rlwinm r6, r3, 12, 21, 31 ! r6 = IEEE exponent
|
||||
addis r7, r0, 0x7ff0 ! r7 = exponent mask
|
||||
addi r5, r6, -1022 ! r5 = true exponent
|
||||
cmpi cr0, 0, r6, 2047
|
||||
bclr IFTRUE, EQ, 0 ! return if infinity or NaN
|
||||
cmpi cr0, 0, r6, 0
|
||||
bc IFFALSE, EQ, 1f ! jump if normalized number
|
||||
|
||||
! Got denormalized number or zero, probably zero.
|
||||
rlwinm r6, r3, 0, 12, 31
|
||||
addi r5, r0, 0 ! r5 = true exponent = 0
|
||||
or. r6, r6, r4 ! r6 = high|low fraction
|
||||
bclr IFTRUE, EQ, 0 ! return if zero
|
||||
|
||||
! Got denormalized number, not zero.
|
||||
stwu r4, -4(sp)
|
||||
stwu r3, -4(sp)
|
||||
li32 r6, _2_64
|
||||
lfd f0, 0(sp)
|
||||
lfd f1, 0(r6)
|
||||
fmul f0, f0, f1 ! multiply it by 2**64
|
||||
stfd f0, 0(sp)
|
||||
lwz r3, 0(sp)
|
||||
lwz r4, 4(sp)
|
||||
rlwinm r6, r3, 12, 21, 31 ! r6 = IEEE exponent
|
||||
addi sp, sp, 8
|
||||
addi r5, r6, -1022 - 64 ! r5 = true exponent
|
||||
1:
|
||||
! Put fraction in [0.5, 1) or (-1, -0.5] by setting its
|
||||
! exponent to true 0, IEEE 1022.
|
||||
andc r3, r3, r7 ! clear old exponent
|
||||
oris r3, r3, 1022 << 4 ! set new exponent
|
||||
bclr ALWAYS, 0, 0
|
||||
|
||||
.sect .rom
|
||||
_2_64:
|
||||
! (double) 2**64
|
||||
.data4 0x43f00000
|
||||
.data4 0x00000000
|
|
@ -2180,13 +2180,11 @@ PATTERNS
|
|||
bl {LABEL, ".cuf8"}
|
||||
|
||||
pat fef $1==INT64 /* Split double */
|
||||
with FREG
|
||||
with GPR3 GPR4
|
||||
kills FPR0, FPR1, GPR6, GPR7
|
||||
gen
|
||||
addi SP, SP, {CONST, 0-8}
|
||||
stfd %1, {GPRINDIRECT, SP, 0}
|
||||
stwu SP, {GPRINDIRECT, SP, 0-4}
|
||||
bl {LABEL, "___fef8"}
|
||||
stw R3, {GPRINDIRECT, SP, 0}
|
||||
bl {LABEL, ".fef8"}
|
||||
yields R4 R3 R5
|
||||
|
||||
pat fif $1==INT64 /* Multiply and split double (?) */
|
||||
with STACK
|
||||
|
|
Loading…
Reference in a new issue