Rewrite fef 8 in powerpc assembly.

In EM, fef splits a float into exponent and fraction. The old C code, given an infinite float, got stuck in an infinite loop. The new assembly code doesn't loop; it extracts the IEEE exponent.
2016-09-29 15:52:54 -04:00 · 2016-09-29 15:52:54 -04:00 · 6ae415d48b
commit 6ae415d48b
parent a71eee3914
4 changed files with 62 additions and 53 deletions
--- a/mach/powerpc/libem/build.lua
+++ b/mach/powerpc/libem/build.lua
@ -3,7 +3,6 @@ for _, plat in ipairs(vars.plats) do
 		name = "lib_"..plat,
 		srcs = {
 			"./*.s",
-			"./*.c"
 		},
 		vars = { plat = plat },
 		deps = {
--- a/mach/powerpc/libem/fef8.c
+++ b/mach/powerpc/libem/fef8.c
@ -1,46 +0,0 @@
-/*
- * $Source$
- * $State$
- * $Revision$
- */
-
-/* no headers allowed! */
-
-/* Given a double, calculates the mantissa and exponent.
- * 
- * This function is intended to be called internally by the code generator,
- * so the calling convention is odd.
- */
-
-int __fef8(double* fp)
-{
-	double f = *fp;
-	int exponent, sign;
-
-	if (f == 0.0)
-		return 0;
-		
-	if (f < 0.0)
-	{
-		sign = -1;
-		f = -f;
-	}
-	else
-		sign = 0;
-	
-	exponent = 0;
-	while (f >= 1.0)
-	{
-		f /= 2.0;
-		exponent++;
-	}
-	
-	while (f < 0.5)
-	{
-		f *= 2.0;
-		exponent--;
-	}
-	
-	*fp = (sign) ? -f : f;
-	return exponent;
-}
--- a/mach/powerpc/libem/fef8.s
+++ b/mach/powerpc/libem/fef8.s
@ -0,0 +1,58 @@
+#include "powerpc.h"
+
+.sect .text
+
+! Split a double-precision float into fraction and exponent, like
+! frexp(3) in C.  On entry:
+!  r3 = float, high word (bits 0..31)
+!  r4 = float, low word (bits 32..63)
+! Yields:
+!  r3 = fraction, high word (bits 0..31)
+!  r4 = fraction, low word (bits 32..63)
+!  r5 = exponent
+! Kills: cr0 f0 f1 r6 r7
+
+.define .fef8
+.fef8:
+	! IEEE double-precision format:
+	!   sign  exponent  fraction
+	!   0     1..11     12..63
+	rlwinm r6, r3, 12, 21, 31	! r6 = IEEE exponent
+	addis r7, r0, 0x7ff0		! r7 = exponent mask
+	addi r5, r6, -1022		! r5 = true exponent
+	cmpi cr0, 0, r6, 2047
+	bclr IFTRUE, EQ, 0		! return if infinity or NaN
+	cmpi cr0, 0, r6, 0
+	bc IFFALSE, EQ, 1f		! jump if normalized number
+
+	! Got denormalized number or zero, probably zero.
+	rlwinm r6, r3, 0, 12, 31
+	addi r5, r0, 0			! r5 = true exponent = 0
+	or. r6, r6, r4			! r6 = high|low fraction
+	bclr IFTRUE, EQ, 0		! return if zero
+
+	! Got denormalized number, not zero.
+	stwu r4, -4(sp)
+	stwu r3, -4(sp)
+	li32 r6, _2_64
+	lfd f0, 0(sp)
+	lfd f1, 0(r6)
+	fmul f0, f0, f1			! multiply it by 2**64
+	stfd f0, 0(sp)
+	lwz r3, 0(sp)
+	lwz r4, 4(sp)
+	rlwinm r6, r3, 12, 21, 31	! r6 = IEEE exponent
+	addi sp, sp, 8
+	addi r5, r6, -1022 - 64		! r5 = true exponent
+1:
+	! Put fraction in [0.5, 1) or (-1, -0.5] by setting its
+	! exponent to true 0, IEEE 1022.
+	andc r3, r3, r7			! clear old exponent
+	oris r3, r3, 1022 << 4		! set new exponent
+	bclr ALWAYS, 0, 0
+
+.sect .rom
+_2_64:
+	! (double) 2**64
+	.data4 0x43f00000
+	.data4 0x00000000
--- a/mach/powerpc/ncg/table
+++ b/mach/powerpc/ncg/table
@ -2180,13 +2180,11 @@ PATTERNS
 				bl {LABEL, ".cuf8"}
 				
 	pat fef $1==INT64                  /* Split double */
-		with FREG
+		with GPR3 GPR4
+			kills FPR0, FPR1, GPR6, GPR7
 			gen
-				addi SP, SP, {CONST, 0-8}
-				stfd %1, {GPRINDIRECT, SP, 0}
-				stwu SP, {GPRINDIRECT, SP, 0-4}
-				bl {LABEL, "___fef8"}
-				stw R3, {GPRINDIRECT, SP, 0}
+				bl {LABEL, ".fef8"}
+			yields R4 R3 R5
 				
 	pat fif $1==INT64                  /* Multiply and split double (?) */
 		with STACK