speeded up again :-)
This commit is contained in:
parent
ebfc4a15a4
commit
5aa128ea62
|
@ -34,7 +34,6 @@ g/_sub_ext/s//.sub_ext/g
|
|||
g/_zrf_ext/s//.zrf_ext/g
|
||||
g/_compact/s//.compact/g
|
||||
g/_extend/s//.extend/g
|
||||
g/_b32_add/s//.b32_add/g
|
||||
g/_b64_add/s//.b64_add/g
|
||||
g/_b64_sft/s//.b64_sft/g
|
||||
g/_b64_rsft/s//.b64_rsft/g
|
||||
|
|
|
@ -32,10 +32,12 @@ register B64 *e1,*e2;
|
|||
int carry;
|
||||
|
||||
/* add higher pair of 32 bits */
|
||||
overflow = b32_add(&e1->h_32,&e2->h_32);
|
||||
overflow = ((unsigned long) 0xFFFFFFFF - e1->h_32 < e2->h_32);
|
||||
e1->h_32 += e2->h_32;
|
||||
|
||||
/* add lower pair of 32 bits */
|
||||
carry = b32_add(&e1->l_32,&e2->l_32);
|
||||
carry = ((unsigned long) 0xFFFFFFFF - e1->l_32 < e2->l_32);
|
||||
e1->l_32 += e2->l_32;
|
||||
# ifdef EXT_DEBUG
|
||||
printf("\t\t\t\t\tb64_add: overflow (%d); internal carry(%d)\n",
|
||||
overflow,carry);
|
||||
|
@ -46,17 +48,3 @@ register B64 *e1,*e2;
|
|||
else
|
||||
return(overflow); /* return status from higher add */
|
||||
}
|
||||
|
||||
/*
|
||||
* add 32 bits (unsigned longs)
|
||||
* and return the carry status
|
||||
*/
|
||||
|
||||
b32_add(e1,e2)
|
||||
register unsigned long *e1,*e2;
|
||||
{
|
||||
int carry = ((unsigned long) 0xFFFFFFFF - *e1 < *e2);
|
||||
|
||||
*e1 += *e2;
|
||||
return carry;
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
e
|
||||
/*
|
||||
(c) copyright 1988 by the Vrije Universiteit, Amsterdam, The Netherlands.
|
||||
See the copyright notice in the ACK home directory, in the file "Copyright".
|
||||
*/
|
||||
|
|
|
@ -38,6 +38,7 @@ EXTEND *e1,*e2;
|
|||
unsigned short u[9], v[5];
|
||||
register int j;
|
||||
register unsigned short *u_p = u;
|
||||
int maxv = 4;
|
||||
#endif
|
||||
|
||||
if ((e2->m1 | e2->m2) == 0) {
|
||||
|
@ -169,6 +170,7 @@ EXTEND *e1,*e2;
|
|||
v[2] = e2->m1;
|
||||
v[3] = e2->m2 >> 16;
|
||||
v[4] = e2->m2;
|
||||
while (! v[maxv]) maxv--;
|
||||
result[0] = 0;
|
||||
result[1] = 0;
|
||||
lp = result;
|
||||
|
@ -204,7 +206,7 @@ EXTEND *e1,*e2;
|
|||
unsigned long k = 0;
|
||||
int borrow = 0;
|
||||
|
||||
for (i = 4; i > 0; i--) {
|
||||
for (i = maxv; i > 0; i--) {
|
||||
unsigned long tmp = q_est * v[i] + k + borrow;
|
||||
unsigned short md = tmp;
|
||||
|
||||
|
@ -222,7 +224,7 @@ EXTEND *e1,*e2;
|
|||
*/
|
||||
*lp |= (j & 1) ? (q_est - 1) : ((q_est-1)<<16);
|
||||
borrow = 0;
|
||||
for (i = 4; i > 0; i--) {
|
||||
for (i = maxv; i > 0; i--) {
|
||||
unsigned long tmp
|
||||
= v[i]+(unsigned long)u_p[i]+borrow;
|
||||
|
||||
|
|
|
@ -17,12 +17,12 @@
|
|||
mul_ext(e1,e2)
|
||||
EXTEND *e1,*e2;
|
||||
{
|
||||
register int k,i,j; /* loop control */
|
||||
long unsigned *reg[7];
|
||||
long unsigned tmp[4];
|
||||
register int i,j; /* loop control */
|
||||
short unsigned mp[4]; /* multiplier */
|
||||
short unsigned mc[4]; /* multipcand */
|
||||
B64 low64,tmp64; /* 64 bit storage */
|
||||
short unsigned result[8]; /* result */
|
||||
B64 tmp64;
|
||||
register unsigned short *pres;
|
||||
|
||||
/* first save the sign (XOR) */
|
||||
|
||||
|
@ -88,48 +88,33 @@ infinity: e1->m1 = e1->m2 =0L;
|
|||
mc[1] = (unsigned short) e2->m1;
|
||||
mc[2] = e2->m2 >> 16;
|
||||
mc[3] = (unsigned short) e2->m2;
|
||||
/*
|
||||
* assign pointers
|
||||
*/
|
||||
reg[0] = &e1->m1; /* the answer goes here */
|
||||
reg[1] = &tmp[1];
|
||||
reg[2] = &e1->m2; /* and here */
|
||||
reg[3] = &tmp[2];
|
||||
reg[4] = &low64.h_32;
|
||||
reg[5] = &tmp[3];
|
||||
reg[6] = &low64.l_32;
|
||||
|
||||
/*
|
||||
* zero registers
|
||||
*/
|
||||
for(i=7;i--;)
|
||||
*reg[i] = 0;
|
||||
|
||||
for (i = 8; i--;) {
|
||||
result[i] = 0;
|
||||
}
|
||||
/*
|
||||
* fill registers with their components
|
||||
*/
|
||||
for(i=4;i--;) if (mp[i])
|
||||
for(j=4;j--;) if (mc[j]) {
|
||||
k = i+j;
|
||||
tmp[0] = (long)mp[i] * (long)mc[j];
|
||||
if (b32_add(reg[k],tmp)) {
|
||||
for(tmp[0] = 0x10000L;k>0;)
|
||||
if (b32_add(reg[--k],tmp) == 0)
|
||||
break;
|
||||
}
|
||||
for(i=4, pres = &result[4];i--;pres--) if (mp[i]) {
|
||||
unsigned short k = 0;
|
||||
unsigned long mpi = mp[i];
|
||||
for(j=4;j--;) {
|
||||
unsigned long tmp = (unsigned long)pres[j] + k;
|
||||
if (mc[j]) tmp += mpi * mc[j];
|
||||
pres[j] = tmp;
|
||||
k = tmp >> 16;
|
||||
}
|
||||
pres[-1] = k;
|
||||
}
|
||||
|
||||
/*
|
||||
* combine the registers to a total
|
||||
*/
|
||||
tmp64.h_32 = (*reg[1]>>16);
|
||||
tmp64.l_32 = (*reg[1]<<16) + (*reg[3]>>16);
|
||||
b64_add((B64 *)&e1->m1,&tmp64);
|
||||
tmp64.l_32 = *reg[5]<<16;
|
||||
tmp64.h_32 = (*reg[5]>>16) + (*reg[3]<<16);
|
||||
if (b64_add(&low64,&tmp64))
|
||||
e1->m1 = ((unsigned long)(result[0]) << 16) + result[1];
|
||||
e1->m2 = ((unsigned long)(result[2]) << 16) + result[3];
|
||||
if (result[4] & 0x8000) {
|
||||
if (++e1->m2 == 0)
|
||||
e1->m1++;
|
||||
}
|
||||
|
||||
nrm_ext(e1);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue