speeded up again :-)
This commit is contained in:
		
							parent
							
								
									ebfc4a15a4
								
							
						
					
					
						commit
						5aa128ea62
					
				
					 5 changed files with 30 additions and 56 deletions
				
			
		|  | @ -34,7 +34,6 @@ g/_sub_ext/s//.sub_ext/g | |||
| g/_zrf_ext/s//.zrf_ext/g | ||||
| g/_compact/s//.compact/g | ||||
| g/_extend/s//.extend/g | ||||
| g/_b32_add/s//.b32_add/g | ||||
| g/_b64_add/s//.b64_add/g | ||||
| g/_b64_sft/s//.b64_sft/g | ||||
| g/_b64_rsft/s//.b64_rsft/g | ||||
|  |  | |||
|  | @ -32,10 +32,12 @@ register	B64	*e1,*e2; | |||
| 				int	carry; | ||||
| 
 | ||||
| 			/* add higher pair of 32 bits */ | ||||
| 	overflow = b32_add(&e1->h_32,&e2->h_32); | ||||
| 	overflow = ((unsigned long) 0xFFFFFFFF - e1->h_32 < e2->h_32); | ||||
| 	e1->h_32 += e2->h_32; | ||||
| 
 | ||||
| 			/* add lower pair of 32 bits */ | ||||
| 	carry =	   b32_add(&e1->l_32,&e2->l_32); | ||||
| 	carry = ((unsigned long) 0xFFFFFFFF - e1->l_32 < e2->l_32); | ||||
| 	e1->l_32 += e2->l_32; | ||||
| # ifdef	EXT_DEBUG | ||||
| 	printf("\t\t\t\t\tb64_add: overflow (%d); internal carry(%d)\n", | ||||
| 					overflow,carry); | ||||
|  | @ -46,17 +48,3 @@ register	B64	*e1,*e2; | |||
| 	else | ||||
| 		return(overflow);	/* return status from higher add */ | ||||
| } | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 *	add 32 bits (unsigned longs) | ||||
| 	 *	and return the carry status | ||||
| 	 */ | ||||
| 
 | ||||
| b32_add(e1,e2) | ||||
| register	unsigned long	*e1,*e2; | ||||
| { | ||||
| 	int	carry = ((unsigned long) 0xFFFFFFFF - *e1 < *e2); | ||||
| 
 | ||||
| 	*e1 += *e2; | ||||
| 	return carry; | ||||
| } | ||||
|  |  | |||
|  | @ -1,4 +1,4 @@ | |||
| e | ||||
| /*
 | ||||
|   (c) copyright 1988 by the Vrije Universiteit, Amsterdam, The Netherlands. | ||||
|   See the copyright notice in the ACK home directory, in the file "Copyright". | ||||
| */ | ||||
|  |  | |||
|  | @ -38,6 +38,7 @@ EXTEND	*e1,*e2; | |||
| 			unsigned short u[9], v[5]; | ||||
| 			register int j; | ||||
| 			register unsigned short *u_p = u; | ||||
| 			int maxv = 4; | ||||
| #endif | ||||
| 
 | ||||
| 	if ((e2->m1 | e2->m2) == 0) { | ||||
|  | @ -169,6 +170,7 @@ EXTEND	*e1,*e2; | |||
| 	v[2] = e2->m1; | ||||
| 	v[3] = e2->m2 >> 16; | ||||
| 	v[4] = e2->m2; | ||||
| 	while (! v[maxv]) maxv--; | ||||
| 	result[0] = 0; | ||||
| 	result[1] = 0; | ||||
| 	lp = result; | ||||
|  | @ -204,7 +206,7 @@ EXTEND	*e1,*e2; | |||
| 			unsigned long k = 0; | ||||
| 			int borrow = 0; | ||||
| 
 | ||||
| 			for (i = 4; i > 0; i--) { | ||||
| 			for (i = maxv; i > 0; i--) { | ||||
| 				unsigned long tmp = q_est * v[i] + k + borrow; | ||||
| 				unsigned short md = tmp; | ||||
| 
 | ||||
|  | @ -222,7 +224,7 @@ EXTEND	*e1,*e2; | |||
| 				*/ | ||||
| 				*lp |= (j & 1) ? (q_est - 1) : ((q_est-1)<<16); | ||||
| 				borrow = 0; | ||||
| 				for (i = 4; i > 0; i--) { | ||||
| 				for (i = maxv; i > 0; i--) { | ||||
| 					unsigned long tmp  | ||||
| 					    = v[i]+(unsigned long)u_p[i]+borrow; | ||||
| 					 | ||||
|  |  | |||
|  | @ -17,12 +17,12 @@ | |||
| mul_ext(e1,e2) | ||||
| EXTEND	*e1,*e2; | ||||
| { | ||||
| 	register int	k,i,j;		/* loop control	*/ | ||||
| 	long  unsigned	*reg[7]; | ||||
| 	long  unsigned	tmp[4]; | ||||
| 	register int	i,j;		/* loop control	*/ | ||||
| 	short unsigned	mp[4];	/* multiplier */ | ||||
| 	short unsigned	mc[4];	/* multipcand */ | ||||
| 	B64	low64,tmp64;	/* 64 bit storage	*/ | ||||
| 	short unsigned	result[8];	/* result */ | ||||
| 	B64		tmp64; | ||||
| 	register unsigned short *pres; | ||||
| 
 | ||||
| 	/* first save the sign (XOR)			*/ | ||||
| 
 | ||||
|  | @ -88,48 +88,33 @@ infinity:	e1->m1 = e1->m2 =0L; | |||
| 	mc[1] = (unsigned short) e2->m1; | ||||
| 	mc[2] = e2->m2 >> 16; | ||||
| 	mc[3] = (unsigned short) e2->m2; | ||||
| 	/*
 | ||||
| 	 *	assign pointers | ||||
| 	 */ | ||||
| 	reg[0] = &e1->m1;	/* the answer goes here */ | ||||
| 	reg[1] = &tmp[1]; | ||||
| 	reg[2] = &e1->m2;	/* and here	*/ | ||||
| 	reg[3] = &tmp[2]; | ||||
| 	reg[4] = &low64.h_32; | ||||
| 	reg[5] = &tmp[3]; | ||||
| 	reg[6] = &low64.l_32; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 *	zero registers | ||||
| 	 */ | ||||
| 	for(i=7;i--;) | ||||
| 		*reg[i] = 0; | ||||
| 
 | ||||
| 	for (i = 8; i--;) { | ||||
| 		result[i] = 0; | ||||
| 	} | ||||
| 	/*
 | ||||
| 	 *	fill registers with their components | ||||
| 	 */ | ||||
| 	for(i=4;i--;) if (mp[i]) | ||||
| 		for(j=4;j--;) if (mc[j]) { | ||||
| 			k = i+j; | ||||
| 			tmp[0] = (long)mp[i] * (long)mc[j]; | ||||
| 			if (b32_add(reg[k],tmp))	{ | ||||
| 				for(tmp[0] = 0x10000L;k>0;) | ||||
| 					if (b32_add(reg[--k],tmp) == 0) | ||||
| 						break; | ||||
| 			} | ||||
| 	for(i=4, pres = &result[4];i--;pres--) if (mp[i]) { | ||||
| 		unsigned short k = 0; | ||||
| 		unsigned long mpi = mp[i]; | ||||
| 		for(j=4;j--;) { | ||||
| 			unsigned long tmp = (unsigned long)pres[j] + k; | ||||
| 			if (mc[j]) tmp += mpi * mc[j]; | ||||
| 			pres[j] = tmp; | ||||
| 			k = tmp >> 16; | ||||
| 		} | ||||
| 		pres[-1] = k; | ||||
| 	} | ||||
| 	 | ||||
| 	/*
 | ||||
| 	 *	combine the registers to a total | ||||
| 	 */ | ||||
| 	tmp64.h_32 = (*reg[1]>>16); | ||||
| 	tmp64.l_32 = (*reg[1]<<16) + (*reg[3]>>16); | ||||
| 	b64_add((B64 *)&e1->m1,&tmp64); | ||||
| 	tmp64.l_32 = *reg[5]<<16; | ||||
| 	tmp64.h_32 = (*reg[5]>>16) + (*reg[3]<<16); | ||||
| 	if (b64_add(&low64,&tmp64)) | ||||
| 	e1->m1 = ((unsigned long)(result[0]) << 16) + result[1]; | ||||
| 	e1->m2 = ((unsigned long)(result[2]) << 16) + result[3]; | ||||
| 	if (result[4] & 0x8000) { | ||||
| 		if (++e1->m2 == 0) | ||||
| 			e1->m1++; | ||||
| 	} | ||||
| 
 | ||||
| 	nrm_ext(e1); | ||||
| } | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue