speeded up again :-)
This commit is contained in:
parent
ebfc4a15a4
commit
5aa128ea62
5 changed files with 30 additions and 56 deletions
|
@ -34,7 +34,6 @@ g/_sub_ext/s//.sub_ext/g
|
||||||
g/_zrf_ext/s//.zrf_ext/g
|
g/_zrf_ext/s//.zrf_ext/g
|
||||||
g/_compact/s//.compact/g
|
g/_compact/s//.compact/g
|
||||||
g/_extend/s//.extend/g
|
g/_extend/s//.extend/g
|
||||||
g/_b32_add/s//.b32_add/g
|
|
||||||
g/_b64_add/s//.b64_add/g
|
g/_b64_add/s//.b64_add/g
|
||||||
g/_b64_sft/s//.b64_sft/g
|
g/_b64_sft/s//.b64_sft/g
|
||||||
g/_b64_rsft/s//.b64_rsft/g
|
g/_b64_rsft/s//.b64_rsft/g
|
||||||
|
|
|
@ -32,10 +32,12 @@ register B64 *e1,*e2;
|
||||||
int carry;
|
int carry;
|
||||||
|
|
||||||
/* add higher pair of 32 bits */
|
/* add higher pair of 32 bits */
|
||||||
overflow = b32_add(&e1->h_32,&e2->h_32);
|
overflow = ((unsigned long) 0xFFFFFFFF - e1->h_32 < e2->h_32);
|
||||||
|
e1->h_32 += e2->h_32;
|
||||||
|
|
||||||
/* add lower pair of 32 bits */
|
/* add lower pair of 32 bits */
|
||||||
carry = b32_add(&e1->l_32,&e2->l_32);
|
carry = ((unsigned long) 0xFFFFFFFF - e1->l_32 < e2->l_32);
|
||||||
|
e1->l_32 += e2->l_32;
|
||||||
# ifdef EXT_DEBUG
|
# ifdef EXT_DEBUG
|
||||||
printf("\t\t\t\t\tb64_add: overflow (%d); internal carry(%d)\n",
|
printf("\t\t\t\t\tb64_add: overflow (%d); internal carry(%d)\n",
|
||||||
overflow,carry);
|
overflow,carry);
|
||||||
|
@ -46,17 +48,3 @@ register B64 *e1,*e2;
|
||||||
else
|
else
|
||||||
return(overflow); /* return status from higher add */
|
return(overflow); /* return status from higher add */
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* add 32 bits (unsigned longs)
|
|
||||||
* and return the carry status
|
|
||||||
*/
|
|
||||||
|
|
||||||
b32_add(e1,e2)
|
|
||||||
register unsigned long *e1,*e2;
|
|
||||||
{
|
|
||||||
int carry = ((unsigned long) 0xFFFFFFFF - *e1 < *e2);
|
|
||||||
|
|
||||||
*e1 += *e2;
|
|
||||||
return carry;
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
e
|
/*
|
||||||
(c) copyright 1988 by the Vrije Universiteit, Amsterdam, The Netherlands.
|
(c) copyright 1988 by the Vrije Universiteit, Amsterdam, The Netherlands.
|
||||||
See the copyright notice in the ACK home directory, in the file "Copyright".
|
See the copyright notice in the ACK home directory, in the file "Copyright".
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -38,6 +38,7 @@ EXTEND *e1,*e2;
|
||||||
unsigned short u[9], v[5];
|
unsigned short u[9], v[5];
|
||||||
register int j;
|
register int j;
|
||||||
register unsigned short *u_p = u;
|
register unsigned short *u_p = u;
|
||||||
|
int maxv = 4;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if ((e2->m1 | e2->m2) == 0) {
|
if ((e2->m1 | e2->m2) == 0) {
|
||||||
|
@ -169,6 +170,7 @@ EXTEND *e1,*e2;
|
||||||
v[2] = e2->m1;
|
v[2] = e2->m1;
|
||||||
v[3] = e2->m2 >> 16;
|
v[3] = e2->m2 >> 16;
|
||||||
v[4] = e2->m2;
|
v[4] = e2->m2;
|
||||||
|
while (! v[maxv]) maxv--;
|
||||||
result[0] = 0;
|
result[0] = 0;
|
||||||
result[1] = 0;
|
result[1] = 0;
|
||||||
lp = result;
|
lp = result;
|
||||||
|
@ -204,7 +206,7 @@ EXTEND *e1,*e2;
|
||||||
unsigned long k = 0;
|
unsigned long k = 0;
|
||||||
int borrow = 0;
|
int borrow = 0;
|
||||||
|
|
||||||
for (i = 4; i > 0; i--) {
|
for (i = maxv; i > 0; i--) {
|
||||||
unsigned long tmp = q_est * v[i] + k + borrow;
|
unsigned long tmp = q_est * v[i] + k + borrow;
|
||||||
unsigned short md = tmp;
|
unsigned short md = tmp;
|
||||||
|
|
||||||
|
@ -222,7 +224,7 @@ EXTEND *e1,*e2;
|
||||||
*/
|
*/
|
||||||
*lp |= (j & 1) ? (q_est - 1) : ((q_est-1)<<16);
|
*lp |= (j & 1) ? (q_est - 1) : ((q_est-1)<<16);
|
||||||
borrow = 0;
|
borrow = 0;
|
||||||
for (i = 4; i > 0; i--) {
|
for (i = maxv; i > 0; i--) {
|
||||||
unsigned long tmp
|
unsigned long tmp
|
||||||
= v[i]+(unsigned long)u_p[i]+borrow;
|
= v[i]+(unsigned long)u_p[i]+borrow;
|
||||||
|
|
||||||
|
|
|
@ -17,12 +17,12 @@
|
||||||
mul_ext(e1,e2)
|
mul_ext(e1,e2)
|
||||||
EXTEND *e1,*e2;
|
EXTEND *e1,*e2;
|
||||||
{
|
{
|
||||||
register int k,i,j; /* loop control */
|
register int i,j; /* loop control */
|
||||||
long unsigned *reg[7];
|
|
||||||
long unsigned tmp[4];
|
|
||||||
short unsigned mp[4]; /* multiplier */
|
short unsigned mp[4]; /* multiplier */
|
||||||
short unsigned mc[4]; /* multipcand */
|
short unsigned mc[4]; /* multipcand */
|
||||||
B64 low64,tmp64; /* 64 bit storage */
|
short unsigned result[8]; /* result */
|
||||||
|
B64 tmp64;
|
||||||
|
register unsigned short *pres;
|
||||||
|
|
||||||
/* first save the sign (XOR) */
|
/* first save the sign (XOR) */
|
||||||
|
|
||||||
|
@ -88,48 +88,33 @@ infinity: e1->m1 = e1->m2 =0L;
|
||||||
mc[1] = (unsigned short) e2->m1;
|
mc[1] = (unsigned short) e2->m1;
|
||||||
mc[2] = e2->m2 >> 16;
|
mc[2] = e2->m2 >> 16;
|
||||||
mc[3] = (unsigned short) e2->m2;
|
mc[3] = (unsigned short) e2->m2;
|
||||||
/*
|
for (i = 8; i--;) {
|
||||||
* assign pointers
|
result[i] = 0;
|
||||||
*/
|
}
|
||||||
reg[0] = &e1->m1; /* the answer goes here */
|
|
||||||
reg[1] = &tmp[1];
|
|
||||||
reg[2] = &e1->m2; /* and here */
|
|
||||||
reg[3] = &tmp[2];
|
|
||||||
reg[4] = &low64.h_32;
|
|
||||||
reg[5] = &tmp[3];
|
|
||||||
reg[6] = &low64.l_32;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* zero registers
|
|
||||||
*/
|
|
||||||
for(i=7;i--;)
|
|
||||||
*reg[i] = 0;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* fill registers with their components
|
* fill registers with their components
|
||||||
*/
|
*/
|
||||||
for(i=4;i--;) if (mp[i])
|
for(i=4, pres = &result[4];i--;pres--) if (mp[i]) {
|
||||||
for(j=4;j--;) if (mc[j]) {
|
unsigned short k = 0;
|
||||||
k = i+j;
|
unsigned long mpi = mp[i];
|
||||||
tmp[0] = (long)mp[i] * (long)mc[j];
|
for(j=4;j--;) {
|
||||||
if (b32_add(reg[k],tmp)) {
|
unsigned long tmp = (unsigned long)pres[j] + k;
|
||||||
for(tmp[0] = 0x10000L;k>0;)
|
if (mc[j]) tmp += mpi * mc[j];
|
||||||
if (b32_add(reg[--k],tmp) == 0)
|
pres[j] = tmp;
|
||||||
break;
|
k = tmp >> 16;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
pres[-1] = k;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* combine the registers to a total
|
* combine the registers to a total
|
||||||
*/
|
*/
|
||||||
tmp64.h_32 = (*reg[1]>>16);
|
e1->m1 = ((unsigned long)(result[0]) << 16) + result[1];
|
||||||
tmp64.l_32 = (*reg[1]<<16) + (*reg[3]>>16);
|
e1->m2 = ((unsigned long)(result[2]) << 16) + result[3];
|
||||||
b64_add((B64 *)&e1->m1,&tmp64);
|
if (result[4] & 0x8000) {
|
||||||
tmp64.l_32 = *reg[5]<<16;
|
|
||||||
tmp64.h_32 = (*reg[5]>>16) + (*reg[3]<<16);
|
|
||||||
if (b64_add(&low64,&tmp64))
|
|
||||||
if (++e1->m2 == 0)
|
if (++e1->m2 == 0)
|
||||||
e1->m1++;
|
e1->m1++;
|
||||||
|
}
|
||||||
|
|
||||||
nrm_ext(e1);
|
nrm_ext(e1);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue