speeded up again :-)

This commit is contained in:
ceriel 1988-08-11 14:50:18 +00:00
parent ebfc4a15a4
commit 5aa128ea62
5 changed files with 30 additions and 56 deletions

View file

@ -34,7 +34,6 @@ g/_sub_ext/s//.sub_ext/g
g/_zrf_ext/s//.zrf_ext/g g/_zrf_ext/s//.zrf_ext/g
g/_compact/s//.compact/g g/_compact/s//.compact/g
g/_extend/s//.extend/g g/_extend/s//.extend/g
g/_b32_add/s//.b32_add/g
g/_b64_add/s//.b64_add/g g/_b64_add/s//.b64_add/g
g/_b64_sft/s//.b64_sft/g g/_b64_sft/s//.b64_sft/g
g/_b64_rsft/s//.b64_rsft/g g/_b64_rsft/s//.b64_rsft/g

View file

@ -32,10 +32,12 @@ register B64 *e1,*e2;
int carry; int carry;
/* add higher pair of 32 bits */ /* add higher pair of 32 bits */
overflow = b32_add(&e1->h_32,&e2->h_32); overflow = ((unsigned long) 0xFFFFFFFF - e1->h_32 < e2->h_32);
e1->h_32 += e2->h_32;
/* add lower pair of 32 bits */ /* add lower pair of 32 bits */
carry = b32_add(&e1->l_32,&e2->l_32); carry = ((unsigned long) 0xFFFFFFFF - e1->l_32 < e2->l_32);
e1->l_32 += e2->l_32;
# ifdef EXT_DEBUG # ifdef EXT_DEBUG
printf("\t\t\t\t\tb64_add: overflow (%d); internal carry(%d)\n", printf("\t\t\t\t\tb64_add: overflow (%d); internal carry(%d)\n",
overflow,carry); overflow,carry);
@ -46,17 +48,3 @@ register B64 *e1,*e2;
else else
return(overflow); /* return status from higher add */ return(overflow); /* return status from higher add */
} }
/*
* add 32 bits (unsigned longs)
* and return the carry status
*/
b32_add(e1,e2)
register unsigned long *e1,*e2;
{
int carry = ((unsigned long) 0xFFFFFFFF - *e1 < *e2);
*e1 += *e2;
return carry;
}

View file

@ -1,4 +1,4 @@
e /*
(c) copyright 1988 by the Vrije Universiteit, Amsterdam, The Netherlands. (c) copyright 1988 by the Vrije Universiteit, Amsterdam, The Netherlands.
See the copyright notice in the ACK home directory, in the file "Copyright". See the copyright notice in the ACK home directory, in the file "Copyright".
*/ */

View file

@ -38,6 +38,7 @@ EXTEND *e1,*e2;
unsigned short u[9], v[5]; unsigned short u[9], v[5];
register int j; register int j;
register unsigned short *u_p = u; register unsigned short *u_p = u;
int maxv = 4;
#endif #endif
if ((e2->m1 | e2->m2) == 0) { if ((e2->m1 | e2->m2) == 0) {
@ -169,6 +170,7 @@ EXTEND *e1,*e2;
v[2] = e2->m1; v[2] = e2->m1;
v[3] = e2->m2 >> 16; v[3] = e2->m2 >> 16;
v[4] = e2->m2; v[4] = e2->m2;
while (! v[maxv]) maxv--;
result[0] = 0; result[0] = 0;
result[1] = 0; result[1] = 0;
lp = result; lp = result;
@ -204,7 +206,7 @@ EXTEND *e1,*e2;
unsigned long k = 0; unsigned long k = 0;
int borrow = 0; int borrow = 0;
for (i = 4; i > 0; i--) { for (i = maxv; i > 0; i--) {
unsigned long tmp = q_est * v[i] + k + borrow; unsigned long tmp = q_est * v[i] + k + borrow;
unsigned short md = tmp; unsigned short md = tmp;
@ -222,7 +224,7 @@ EXTEND *e1,*e2;
*/ */
*lp |= (j & 1) ? (q_est - 1) : ((q_est-1)<<16); *lp |= (j & 1) ? (q_est - 1) : ((q_est-1)<<16);
borrow = 0; borrow = 0;
for (i = 4; i > 0; i--) { for (i = maxv; i > 0; i--) {
unsigned long tmp unsigned long tmp
= v[i]+(unsigned long)u_p[i]+borrow; = v[i]+(unsigned long)u_p[i]+borrow;

View file

@ -17,12 +17,12 @@
mul_ext(e1,e2) mul_ext(e1,e2)
EXTEND *e1,*e2; EXTEND *e1,*e2;
{ {
register int k,i,j; /* loop control */ register int i,j; /* loop control */
long unsigned *reg[7];
long unsigned tmp[4];
short unsigned mp[4]; /* multiplier */ short unsigned mp[4]; /* multiplier */
short unsigned mc[4]; /* multipcand */ short unsigned mc[4]; /* multipcand */
B64 low64,tmp64; /* 64 bit storage */ short unsigned result[8]; /* result */
B64 tmp64;
register unsigned short *pres;
/* first save the sign (XOR) */ /* first save the sign (XOR) */
@ -88,48 +88,33 @@ infinity: e1->m1 = e1->m2 =0L;
mc[1] = (unsigned short) e2->m1; mc[1] = (unsigned short) e2->m1;
mc[2] = e2->m2 >> 16; mc[2] = e2->m2 >> 16;
mc[3] = (unsigned short) e2->m2; mc[3] = (unsigned short) e2->m2;
/* for (i = 8; i--;) {
* assign pointers result[i] = 0;
*/ }
reg[0] = &e1->m1; /* the answer goes here */
reg[1] = &tmp[1];
reg[2] = &e1->m2; /* and here */
reg[3] = &tmp[2];
reg[4] = &low64.h_32;
reg[5] = &tmp[3];
reg[6] = &low64.l_32;
/*
* zero registers
*/
for(i=7;i--;)
*reg[i] = 0;
/* /*
* fill registers with their components * fill registers with their components
*/ */
for(i=4;i--;) if (mp[i]) for(i=4, pres = &result[4];i--;pres--) if (mp[i]) {
for(j=4;j--;) if (mc[j]) { unsigned short k = 0;
k = i+j; unsigned long mpi = mp[i];
tmp[0] = (long)mp[i] * (long)mc[j]; for(j=4;j--;) {
if (b32_add(reg[k],tmp)) { unsigned long tmp = (unsigned long)pres[j] + k;
for(tmp[0] = 0x10000L;k>0;) if (mc[j]) tmp += mpi * mc[j];
if (b32_add(reg[--k],tmp) == 0) pres[j] = tmp;
break; k = tmp >> 16;
}
} }
pres[-1] = k;
}
/* /*
* combine the registers to a total * combine the registers to a total
*/ */
tmp64.h_32 = (*reg[1]>>16); e1->m1 = ((unsigned long)(result[0]) << 16) + result[1];
tmp64.l_32 = (*reg[1]<<16) + (*reg[3]>>16); e1->m2 = ((unsigned long)(result[2]) << 16) + result[3];
b64_add((B64 *)&e1->m1,&tmp64); if (result[4] & 0x8000) {
tmp64.l_32 = *reg[5]<<16;
tmp64.h_32 = (*reg[5]>>16) + (*reg[3]<<16);
if (b64_add(&low64,&tmp64))
if (++e1->m2 == 0) if (++e1->m2 == 0)
e1->m1++; e1->m1++;
}
nrm_ext(e1); nrm_ext(e1);
} }