speeded up again :-)

This commit is contained in:
ceriel 1988-08-11 14:50:18 +00:00
parent ebfc4a15a4
commit 5aa128ea62
5 changed files with 30 additions and 56 deletions

View file

@ -34,7 +34,6 @@ g/_sub_ext/s//.sub_ext/g
g/_zrf_ext/s//.zrf_ext/g
g/_compact/s//.compact/g
g/_extend/s//.extend/g
g/_b32_add/s//.b32_add/g
g/_b64_add/s//.b64_add/g
g/_b64_sft/s//.b64_sft/g
g/_b64_rsft/s//.b64_rsft/g

View file

@ -32,10 +32,12 @@ register B64 *e1,*e2;
int carry;
/* add higher pair of 32 bits */
overflow = b32_add(&e1->h_32,&e2->h_32);
overflow = ((unsigned long) 0xFFFFFFFF - e1->h_32 < e2->h_32);
e1->h_32 += e2->h_32;
/* add lower pair of 32 bits */
carry = b32_add(&e1->l_32,&e2->l_32);
carry = ((unsigned long) 0xFFFFFFFF - e1->l_32 < e2->l_32);
e1->l_32 += e2->l_32;
# ifdef EXT_DEBUG
printf("\t\t\t\t\tb64_add: overflow (%d); internal carry(%d)\n",
overflow,carry);
@ -46,17 +48,3 @@ register B64 *e1,*e2;
else
return(overflow); /* return status from higher add */
}
/*
* add 32 bits (unsigned longs)
* and return the carry status
*/
b32_add(e1,e2)
register unsigned long *e1,*e2;
{
int carry = ((unsigned long) 0xFFFFFFFF - *e1 < *e2);
*e1 += *e2;
return carry;
}

View file

@ -1,4 +1,4 @@
e
/*
(c) copyright 1988 by the Vrije Universiteit, Amsterdam, The Netherlands.
See the copyright notice in the ACK home directory, in the file "Copyright".
*/

View file

@ -38,6 +38,7 @@ EXTEND *e1,*e2;
unsigned short u[9], v[5];
register int j;
register unsigned short *u_p = u;
int maxv = 4;
#endif
if ((e2->m1 | e2->m2) == 0) {
@ -169,6 +170,7 @@ EXTEND *e1,*e2;
v[2] = e2->m1;
v[3] = e2->m2 >> 16;
v[4] = e2->m2;
while (! v[maxv]) maxv--;
result[0] = 0;
result[1] = 0;
lp = result;
@ -204,7 +206,7 @@ EXTEND *e1,*e2;
unsigned long k = 0;
int borrow = 0;
for (i = 4; i > 0; i--) {
for (i = maxv; i > 0; i--) {
unsigned long tmp = q_est * v[i] + k + borrow;
unsigned short md = tmp;
@ -222,7 +224,7 @@ EXTEND *e1,*e2;
*/
*lp |= (j & 1) ? (q_est - 1) : ((q_est-1)<<16);
borrow = 0;
for (i = 4; i > 0; i--) {
for (i = maxv; i > 0; i--) {
unsigned long tmp
= v[i]+(unsigned long)u_p[i]+borrow;

View file

@ -17,12 +17,12 @@
mul_ext(e1,e2)
EXTEND *e1,*e2;
{
register int k,i,j; /* loop control */
long unsigned *reg[7];
long unsigned tmp[4];
register int i,j; /* loop control */
short unsigned mp[4]; /* multiplier */
short unsigned mc[4]; /* multipcand */
B64 low64,tmp64; /* 64 bit storage */
short unsigned result[8]; /* result */
B64 tmp64;
register unsigned short *pres;
/* first save the sign (XOR) */
@ -88,48 +88,33 @@ infinity: e1->m1 = e1->m2 =0L;
mc[1] = (unsigned short) e2->m1;
mc[2] = e2->m2 >> 16;
mc[3] = (unsigned short) e2->m2;
/*
* assign pointers
*/
reg[0] = &e1->m1; /* the answer goes here */
reg[1] = &tmp[1];
reg[2] = &e1->m2; /* and here */
reg[3] = &tmp[2];
reg[4] = &low64.h_32;
reg[5] = &tmp[3];
reg[6] = &low64.l_32;
/*
* zero registers
*/
for(i=7;i--;)
*reg[i] = 0;
for (i = 8; i--;) {
result[i] = 0;
}
/*
* fill registers with their components
*/
for(i=4;i--;) if (mp[i])
for(j=4;j--;) if (mc[j]) {
k = i+j;
tmp[0] = (long)mp[i] * (long)mc[j];
if (b32_add(reg[k],tmp)) {
for(tmp[0] = 0x10000L;k>0;)
if (b32_add(reg[--k],tmp) == 0)
break;
}
for(i=4, pres = &result[4];i--;pres--) if (mp[i]) {
unsigned short k = 0;
unsigned long mpi = mp[i];
for(j=4;j--;) {
unsigned long tmp = (unsigned long)pres[j] + k;
if (mc[j]) tmp += mpi * mc[j];
pres[j] = tmp;
k = tmp >> 16;
}
pres[-1] = k;
}
/*
* combine the registers to a total
*/
tmp64.h_32 = (*reg[1]>>16);
tmp64.l_32 = (*reg[1]<<16) + (*reg[3]>>16);
b64_add((B64 *)&e1->m1,&tmp64);
tmp64.l_32 = *reg[5]<<16;
tmp64.h_32 = (*reg[5]>>16) + (*reg[3]<<16);
if (b64_add(&low64,&tmp64))
e1->m1 = ((unsigned long)(result[0]) << 16) + result[1];
e1->m2 = ((unsigned long)(result[2]) << 16) + result[3];
if (result[4] & 0x8000) {
if (++e1->m2 == 0)
e1->m1++;
}
nrm_ext(e1);
}