speeded up again :-)
This commit is contained in:
		
							parent
							
								
									ebfc4a15a4
								
							
						
					
					
						commit
						5aa128ea62
					
				
					 5 changed files with 30 additions and 56 deletions
				
			
		|  | @ -34,7 +34,6 @@ g/_sub_ext/s//.sub_ext/g | ||||||
| g/_zrf_ext/s//.zrf_ext/g | g/_zrf_ext/s//.zrf_ext/g | ||||||
| g/_compact/s//.compact/g | g/_compact/s//.compact/g | ||||||
| g/_extend/s//.extend/g | g/_extend/s//.extend/g | ||||||
| g/_b32_add/s//.b32_add/g |  | ||||||
| g/_b64_add/s//.b64_add/g | g/_b64_add/s//.b64_add/g | ||||||
| g/_b64_sft/s//.b64_sft/g | g/_b64_sft/s//.b64_sft/g | ||||||
| g/_b64_rsft/s//.b64_rsft/g | g/_b64_rsft/s//.b64_rsft/g | ||||||
|  |  | ||||||
|  | @ -32,10 +32,12 @@ register	B64	*e1,*e2; | ||||||
| 				int	carry; | 				int	carry; | ||||||
| 
 | 
 | ||||||
| 			/* add higher pair of 32 bits */ | 			/* add higher pair of 32 bits */ | ||||||
| 	overflow = b32_add(&e1->h_32,&e2->h_32); | 	overflow = ((unsigned long) 0xFFFFFFFF - e1->h_32 < e2->h_32); | ||||||
|  | 	e1->h_32 += e2->h_32; | ||||||
| 
 | 
 | ||||||
| 			/* add lower pair of 32 bits */ | 			/* add lower pair of 32 bits */ | ||||||
| 	carry =	   b32_add(&e1->l_32,&e2->l_32); | 	carry = ((unsigned long) 0xFFFFFFFF - e1->l_32 < e2->l_32); | ||||||
|  | 	e1->l_32 += e2->l_32; | ||||||
| # ifdef	EXT_DEBUG | # ifdef	EXT_DEBUG | ||||||
| 	printf("\t\t\t\t\tb64_add: overflow (%d); internal carry(%d)\n", | 	printf("\t\t\t\t\tb64_add: overflow (%d); internal carry(%d)\n", | ||||||
| 					overflow,carry); | 					overflow,carry); | ||||||
|  | @ -46,17 +48,3 @@ register	B64	*e1,*e2; | ||||||
| 	else | 	else | ||||||
| 		return(overflow);	/* return status from higher add */ | 		return(overflow);	/* return status from higher add */ | ||||||
| } | } | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 *	add 32 bits (unsigned longs) |  | ||||||
| 	 *	and return the carry status |  | ||||||
| 	 */ |  | ||||||
| 
 |  | ||||||
| b32_add(e1,e2) |  | ||||||
| register	unsigned long	*e1,*e2; |  | ||||||
| { |  | ||||||
| 	int	carry = ((unsigned long) 0xFFFFFFFF - *e1 < *e2); |  | ||||||
| 
 |  | ||||||
| 	*e1 += *e2; |  | ||||||
| 	return carry; |  | ||||||
| } |  | ||||||
|  |  | ||||||
|  | @ -1,4 +1,4 @@ | ||||||
| e | /*
 | ||||||
|   (c) copyright 1988 by the Vrije Universiteit, Amsterdam, The Netherlands. |   (c) copyright 1988 by the Vrije Universiteit, Amsterdam, The Netherlands. | ||||||
|   See the copyright notice in the ACK home directory, in the file "Copyright". |   See the copyright notice in the ACK home directory, in the file "Copyright". | ||||||
| */ | */ | ||||||
|  |  | ||||||
|  | @ -38,6 +38,7 @@ EXTEND	*e1,*e2; | ||||||
| 			unsigned short u[9], v[5]; | 			unsigned short u[9], v[5]; | ||||||
| 			register int j; | 			register int j; | ||||||
| 			register unsigned short *u_p = u; | 			register unsigned short *u_p = u; | ||||||
|  | 			int maxv = 4; | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| 	if ((e2->m1 | e2->m2) == 0) { | 	if ((e2->m1 | e2->m2) == 0) { | ||||||
|  | @ -169,6 +170,7 @@ EXTEND	*e1,*e2; | ||||||
| 	v[2] = e2->m1; | 	v[2] = e2->m1; | ||||||
| 	v[3] = e2->m2 >> 16; | 	v[3] = e2->m2 >> 16; | ||||||
| 	v[4] = e2->m2; | 	v[4] = e2->m2; | ||||||
|  | 	while (! v[maxv]) maxv--; | ||||||
| 	result[0] = 0; | 	result[0] = 0; | ||||||
| 	result[1] = 0; | 	result[1] = 0; | ||||||
| 	lp = result; | 	lp = result; | ||||||
|  | @ -204,7 +206,7 @@ EXTEND	*e1,*e2; | ||||||
| 			unsigned long k = 0; | 			unsigned long k = 0; | ||||||
| 			int borrow = 0; | 			int borrow = 0; | ||||||
| 
 | 
 | ||||||
| 			for (i = 4; i > 0; i--) { | 			for (i = maxv; i > 0; i--) { | ||||||
| 				unsigned long tmp = q_est * v[i] + k + borrow; | 				unsigned long tmp = q_est * v[i] + k + borrow; | ||||||
| 				unsigned short md = tmp; | 				unsigned short md = tmp; | ||||||
| 
 | 
 | ||||||
|  | @ -222,7 +224,7 @@ EXTEND	*e1,*e2; | ||||||
| 				*/ | 				*/ | ||||||
| 				*lp |= (j & 1) ? (q_est - 1) : ((q_est-1)<<16); | 				*lp |= (j & 1) ? (q_est - 1) : ((q_est-1)<<16); | ||||||
| 				borrow = 0; | 				borrow = 0; | ||||||
| 				for (i = 4; i > 0; i--) { | 				for (i = maxv; i > 0; i--) { | ||||||
| 					unsigned long tmp  | 					unsigned long tmp  | ||||||
| 					    = v[i]+(unsigned long)u_p[i]+borrow; | 					    = v[i]+(unsigned long)u_p[i]+borrow; | ||||||
| 					 | 					 | ||||||
|  |  | ||||||
|  | @ -17,12 +17,12 @@ | ||||||
| mul_ext(e1,e2) | mul_ext(e1,e2) | ||||||
| EXTEND	*e1,*e2; | EXTEND	*e1,*e2; | ||||||
| { | { | ||||||
| 	register int	k,i,j;		/* loop control	*/ | 	register int	i,j;		/* loop control	*/ | ||||||
| 	long  unsigned	*reg[7]; |  | ||||||
| 	long  unsigned	tmp[4]; |  | ||||||
| 	short unsigned	mp[4];	/* multiplier */ | 	short unsigned	mp[4];	/* multiplier */ | ||||||
| 	short unsigned	mc[4];	/* multipcand */ | 	short unsigned	mc[4];	/* multipcand */ | ||||||
| 	B64	low64,tmp64;	/* 64 bit storage	*/ | 	short unsigned	result[8];	/* result */ | ||||||
|  | 	B64		tmp64; | ||||||
|  | 	register unsigned short *pres; | ||||||
| 
 | 
 | ||||||
| 	/* first save the sign (XOR)			*/ | 	/* first save the sign (XOR)			*/ | ||||||
| 
 | 
 | ||||||
|  | @ -88,48 +88,33 @@ infinity:	e1->m1 = e1->m2 =0L; | ||||||
| 	mc[1] = (unsigned short) e2->m1; | 	mc[1] = (unsigned short) e2->m1; | ||||||
| 	mc[2] = e2->m2 >> 16; | 	mc[2] = e2->m2 >> 16; | ||||||
| 	mc[3] = (unsigned short) e2->m2; | 	mc[3] = (unsigned short) e2->m2; | ||||||
| 	/*
 | 	for (i = 8; i--;) { | ||||||
| 	 *	assign pointers | 		result[i] = 0; | ||||||
| 	 */ | 	} | ||||||
| 	reg[0] = &e1->m1;	/* the answer goes here */ |  | ||||||
| 	reg[1] = &tmp[1]; |  | ||||||
| 	reg[2] = &e1->m2;	/* and here	*/ |  | ||||||
| 	reg[3] = &tmp[2]; |  | ||||||
| 	reg[4] = &low64.h_32; |  | ||||||
| 	reg[5] = &tmp[3]; |  | ||||||
| 	reg[6] = &low64.l_32; |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 *	zero registers |  | ||||||
| 	 */ |  | ||||||
| 	for(i=7;i--;) |  | ||||||
| 		*reg[i] = 0; |  | ||||||
| 
 |  | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 *	fill registers with their components | 	 *	fill registers with their components | ||||||
| 	 */ | 	 */ | ||||||
| 	for(i=4;i--;) if (mp[i]) | 	for(i=4, pres = &result[4];i--;pres--) if (mp[i]) { | ||||||
| 		for(j=4;j--;) if (mc[j]) { | 		unsigned short k = 0; | ||||||
| 			k = i+j; | 		unsigned long mpi = mp[i]; | ||||||
| 			tmp[0] = (long)mp[i] * (long)mc[j]; | 		for(j=4;j--;) { | ||||||
| 			if (b32_add(reg[k],tmp))	{ | 			unsigned long tmp = (unsigned long)pres[j] + k; | ||||||
| 				for(tmp[0] = 0x10000L;k>0;) | 			if (mc[j]) tmp += mpi * mc[j]; | ||||||
| 					if (b32_add(reg[--k],tmp) == 0) | 			pres[j] = tmp; | ||||||
| 						break; | 			k = tmp >> 16; | ||||||
| 			} |  | ||||||
| 		} | 		} | ||||||
|  | 		pres[-1] = k; | ||||||
|  | 	} | ||||||
| 	 | 	 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 *	combine the registers to a total | 	 *	combine the registers to a total | ||||||
| 	 */ | 	 */ | ||||||
| 	tmp64.h_32 = (*reg[1]>>16); | 	e1->m1 = ((unsigned long)(result[0]) << 16) + result[1]; | ||||||
| 	tmp64.l_32 = (*reg[1]<<16) + (*reg[3]>>16); | 	e1->m2 = ((unsigned long)(result[2]) << 16) + result[3]; | ||||||
| 	b64_add((B64 *)&e1->m1,&tmp64); | 	if (result[4] & 0x8000) { | ||||||
| 	tmp64.l_32 = *reg[5]<<16; |  | ||||||
| 	tmp64.h_32 = (*reg[5]>>16) + (*reg[3]<<16); |  | ||||||
| 	if (b64_add(&low64,&tmp64)) |  | ||||||
| 		if (++e1->m2 == 0) | 		if (++e1->m2 == 0) | ||||||
| 			e1->m1++; | 			e1->m1++; | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	nrm_ext(e1); | 	nrm_ext(e1); | ||||||
| } | } | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue