some modifications to improve speed (sorry, I mean, make a bit less slow)
This commit is contained in:
parent
56a070ad99
commit
2a9fe2a774
7 changed files with 76 additions and 69 deletions
|
@ -34,10 +34,10 @@ g/_sub_ext/s//.sub_ext/g
|
|||
g/_zrf_ext/s//.zrf_ext/g
|
||||
g/_compact/s//.compact/g
|
||||
g/_extend/s//.extend/g
|
||||
g/_load4/s//.load4/g
|
||||
g/_store4/s//.store4/g
|
||||
g/_b32_add/s//.b32_add/g
|
||||
g/_b64_add/s//.b64_add/g
|
||||
g/_b64_sft/s//.b64_sft/g
|
||||
g/_b64_rsft/s//.b64_rsft/g
|
||||
g/_b64_lsft/s//.b64_lsft/g
|
||||
w
|
||||
q
|
||||
|
|
|
@ -15,7 +15,7 @@ add_ext(e1,e2)
|
|||
register EXTEND *e1,*e2;
|
||||
{
|
||||
if (b64_add(&e1->m1,&e2->m1)) { /* addition carry */
|
||||
b64_sft(&e1->m1,1); /* shift mantissa one bit RIGHT */
|
||||
b64_rsft(&e1->m1); /* shift mantissa one bit RIGHT */
|
||||
e1->m1 |= 0x80000000L; /* set max bit */
|
||||
e1->exp++; /* increase the exponent */
|
||||
}
|
||||
|
|
|
@ -55,34 +55,8 @@ register B64 *e1,*e2;
|
|||
b32_add(e1,e2)
|
||||
register unsigned long *e1,*e2;
|
||||
{
|
||||
register int carry;
|
||||
|
||||
if (*e1 & *e2 & MAXBIT) /* both max_bits are set */
|
||||
carry = TRUE; /* so there is a carry */
|
||||
else
|
||||
carry = ((*e1 | *e2) & MAXBIT)
|
||||
/* only one is set - might be a carry */
|
||||
? UNKNOWN
|
||||
/* both are clear - no carry */
|
||||
: FALSE;
|
||||
# ifdef EXT_DEBUG
|
||||
fflush(stdout);
|
||||
printf("\t\t\t\t\tb32_add: overflow before add(%d) test(%d)\n",
|
||||
carry,(*e1&MAXBIT)?FALSE:TRUE);
|
||||
printf("%08X\n%08X\n",*e1,*e2);
|
||||
# endif
|
||||
int carry = ((unsigned long) 0xFFFFFFFF - *e1 < *e2);
|
||||
|
||||
*e1 += *e2;
|
||||
# ifdef EXT_DEBUG
|
||||
printf("%08X\n",*e1);
|
||||
fflush(stdout);
|
||||
# endif
|
||||
if (carry != UNKNOWN)
|
||||
return(carry);
|
||||
else
|
||||
/*
|
||||
* if maxbit in answer is set there is no carry
|
||||
* return the NAND of this bit
|
||||
*/
|
||||
return((*e1&MAXBIT)?FALSE:TRUE);
|
||||
return carry;
|
||||
}
|
||||
|
|
|
@ -40,8 +40,8 @@ EXTEND *e1,*e2;
|
|||
* that m1 is quaranteed to be larger if its
|
||||
* maximum bit is set
|
||||
*/
|
||||
b64_sft(&e1->m1,1); /* 64 bit shift right */
|
||||
b64_sft(&e2->m1,1); /* 64 bit shift right */
|
||||
b64_rsft(&e1->m1); /* 64 bit shift right */
|
||||
b64_rsft(&e2->m1); /* 64 bit shift right */
|
||||
e1->exp++;
|
||||
e2->exp++;
|
||||
/* check for underflow, divide by zero, etc */
|
||||
|
@ -92,7 +92,7 @@ EXTEND *e1,*e2;
|
|||
/* first left shift result 1 bit */
|
||||
/* this is ALWAYS done */
|
||||
|
||||
b64_sft(result,-1);
|
||||
b64_lsft(result);
|
||||
|
||||
/* compare dividend and divisor */
|
||||
/* if dividend >= divisor add a bit */
|
||||
|
@ -124,7 +124,7 @@ EXTEND *e1,*e2;
|
|||
error = ((*lp | *(lp+1)) != 0L) ? 1 : 0;
|
||||
if (error) { /* more work */
|
||||
/* assume max bit == 0 (see above) */
|
||||
b64_sft(&e1->m1,-1);
|
||||
b64_lsft(&e1->m1);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
|
|
|
@ -12,29 +12,33 @@
|
|||
#include "FP_types.h"
|
||||
#include "FP_shift.h"
|
||||
|
||||
_float mlf4();
|
||||
_float sbf4();
|
||||
|
||||
fif4(x,y)
|
||||
_float x,y;
|
||||
{
|
||||
EXTEND e;
|
||||
|
||||
y = mlf4(x,y);
|
||||
extend((_double *)&y,&e,sizeof(SINGLE));
|
||||
e.exp--; /* additional bias correction */
|
||||
if (e.exp < 1) {
|
||||
EXTEND e1,e2;
|
||||
|
||||
extend((_double *)&y,&e1,sizeof(_float));
|
||||
extend((_double *)&x,&e2,sizeof(_float));
|
||||
/* do a multiply */
|
||||
mul_ext(&e1,&e2);
|
||||
e2 = e1;
|
||||
compact(&e2, (_double *)&y, sizeof(_float));
|
||||
e1.exp--; /* additional bias correction */
|
||||
if (e1.exp < 1) {
|
||||
x = 0;
|
||||
return;
|
||||
}
|
||||
if (e.exp > 31 - SGL_M1LEFT) {
|
||||
if (e1.exp > 31 - SGL_M1LEFT) {
|
||||
x = y;
|
||||
y = 0;
|
||||
return;
|
||||
}
|
||||
b64_sft(&e.m1, 64 - e.exp);
|
||||
b64_sft(&e.m1, e.exp - 64); /* "loose" low order bits */
|
||||
e.exp++;
|
||||
compact(&e,(_double *) &x, sizeof(SINGLE));
|
||||
b64_sft(&e1.m1, 64 - e1.exp);
|
||||
b64_sft(&e1.m1, e1.exp - 64); /* "loose" low order bits */
|
||||
e1.exp++;
|
||||
compact(&e1,(_double *) &x, sizeof(SINGLE));
|
||||
y = sbf4(x, y);
|
||||
}
|
||||
|
|
|
@ -12,32 +12,36 @@
|
|||
#include "FP_types.h"
|
||||
#include "FP_shift.h"
|
||||
|
||||
_double mlf8();
|
||||
_double sbf8();
|
||||
|
||||
fif8(x,y)
|
||||
_double x,y;
|
||||
{
|
||||
EXTEND e;
|
||||
|
||||
y = mlf8(x,y);
|
||||
extend((_double *)&y,&e,sizeof(DOUBLE));
|
||||
e.exp--; /* additional bias correction */
|
||||
if (e.exp < 1) {
|
||||
EXTEND e1,e2;
|
||||
|
||||
extend(&y,&e1,sizeof(_double));
|
||||
extend(&x,&e2,sizeof(_double));
|
||||
/* do a multiply */
|
||||
mul_ext(&e1,&e2);
|
||||
e2 = e1;
|
||||
compact(&e2, &y, sizeof(_double));
|
||||
e1.exp--; /* additional bias correction */
|
||||
if (e1.exp < 1) {
|
||||
x.__double[0] = 0;
|
||||
x.__double[1] = 0;
|
||||
return;
|
||||
}
|
||||
if (e.exp > 63 - DBL_M1LEFT) {
|
||||
if (e1.exp > 63 - DBL_M1LEFT) {
|
||||
x.__double[0] = y.__double[0];
|
||||
x.__double[1] = y.__double[1];
|
||||
y.__double[0] = 0;
|
||||
y.__double[1] = 0;
|
||||
return;
|
||||
}
|
||||
b64_sft(&e.m1, 64 - e.exp);
|
||||
b64_sft(&e.m1, e.exp - 64); /* "loose" low order bits */
|
||||
e.exp++;
|
||||
compact(&e, &x, sizeof(DOUBLE));
|
||||
b64_sft(&e1.m1, 64 - e1.exp);
|
||||
b64_sft(&e1.m1, e1.exp - 64); /* "loose" low order bits */
|
||||
e1.exp++;
|
||||
compact(&e1, &x, sizeof(DOUBLE));
|
||||
y = sbf8(x, y);
|
||||
}
|
||||
|
|
|
@ -11,17 +11,42 @@ b64_sft(e1,n)
|
|||
B64 *e1;
|
||||
int n;
|
||||
{
|
||||
if (n>0) do { /* RIGHT shift n bits */
|
||||
e1->l_32 >>= 1; /* shift 64 bits */
|
||||
if (e1->h_32 & 1)
|
||||
e1->l_32 |= 0x80000000L;
|
||||
e1->h_32 >>= 1;
|
||||
} while (--n);
|
||||
else /* LEFT shift n bits */
|
||||
while (n++) {
|
||||
e1->h_32 <<= 1; /* shift 64 bits */
|
||||
if (e1->l_32 & 0x80000000L)
|
||||
e1->h_32 |= 1;
|
||||
if (n >= 32) {
|
||||
e1->l_32 = e1->h_32;
|
||||
e1->h_32 = 0;
|
||||
n -= 32;
|
||||
}
|
||||
if (n > 0) {
|
||||
e1->l_32 = (e1->l_32 >> n) | (e1->h_32 << (32 - n));
|
||||
e1->h_32 >>= n;
|
||||
return;
|
||||
}
|
||||
n = -n;
|
||||
if (n >= 32) {
|
||||
e1->h_32 = e1->l_32;
|
||||
e1->l_32 = 0;
|
||||
n -= 32;
|
||||
}
|
||||
if (n > 0) {
|
||||
e1->h_32 = (e1->h_32 << n) | (e1->l_32 >> (32 - n));
|
||||
e1->l_32 <<= n;
|
||||
}
|
||||
}
|
||||
|
||||
b64_lsft(e1)
|
||||
B64 *e1;
|
||||
{
|
||||
/* shift left 1 bit */
|
||||
e1->h_32 <<= 1;
|
||||
if (e1->l_32 & 0x80000000L) e1->h_32 |= 1;
|
||||
e1->l_32 <<= 1;
|
||||
}
|
||||
|
||||
b64_rsft(e1)
|
||||
B64 *e1;
|
||||
{
|
||||
/* shift right 1 bit */
|
||||
e1->l_32 >>= 1;
|
||||
if (e1->h_32 & 1) e1->l_32 |= 0x80000000L;
|
||||
e1->h_32 >>= 1;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue