ack/mach/proto/fp/div_ext.c

/*
  (c) copyright 1988 by the Vrije Universiteit, Amsterdam, The Netherlands.
  See the copyright notice in the ACK home directory, in the file "Copyright".
*/

/* $Header$ */

/*
	DIVIDE EXTENDED FORMAT
*/

#include "FP_bias.h"
#include "FP_trap.h"
#include "FP_types.h"

/*
	November 15, 1984

	This is a routine to do the work.
	There are two versions: 
	One is based on the partial products method
	and makes no use possible machine instructions
	to divide (hardware dividers).
	The other is used when USE_DIVIDE is defined. It is much faster on
	machines with fast 4 byte operations.
*/
/********************************************************/

div_ext(e1,e2)
EXTEND	*e1,*e2;
{
			short	error = 0;
			unsigned long	result[2];
	register	unsigned long	*lp;
#ifndef USE_DIVIDE
			short	count;
#else
			unsigned short u[9], v[5];
			register int j;
			register unsigned short *u_p = u;
			int maxv = 4;
#endif

	if ((e2->m1 | e2->m2) == 0) {
                /*
                 * Exception 8.2 - Divide by zero
                 */
		trap(EFDIVZ);
		e1->m1 = e1->m2 = 0L;
		e1->exp = EXT_MAX;
		return;
	}
	if ((e1->m1 | e1->m2) == 0) {	/* 0 / anything == 0 */
		e1->exp = 0;	/* make sure */
		return;
	}
#ifndef USE_DIVIDE
	/*
	 * numbers are right shifted one bit to make sure
	 * that m1 is quaranteed to be larger if its
	 * maximum bit is set
	 */
	b64_rsft(&e1->m1);	/* 64 bit shift right */
	b64_rsft(&e2->m1);	/* 64 bit shift right */
	e1->exp++;
	e2->exp++;
#endif
	/*	check for underflow, divide by zero, etc	*/
	e1->sign ^= e2->sign;
	e1->exp -= e2->exp;
	e1->exp += 2;		/* bias correction	*/
	if (e1->exp < EXT_MIN)	{
		/*
		 * Exception 8.4 - Underflow
		 */
		trap(EFUNFL);	/* underflow */
		e1->exp = EXT_MIN;
		e1->m1 = e1->m2 = 0L;
		return;
	}
	if (e1->exp >= EXT_MAX) {
                /*
                 * Exception 8.3 - Overflow
                 */
                trap(EFOVFL);   /* overflow */
                e1->exp = EXT_MAX;
                e1->m1 = e1->m2 = 0L;
                return;
        }

#ifndef USE_DIVIDE
		/* do division of mantissas	*/
		/* uses partial product method	*/
		/* init control variables	*/

	count = 64;
	result[0] = 0L;
	result[1] = 0L;

		/* partial product division loop */

	while (count--)	{
		/* first left shift result 1 bit	*/
		/* this is ALWAYS done			*/

		b64_lsft(result);

		/* compare dividend and divisor		*/
		/* if dividend >= divisor add a bit	*/
		/* and subtract divisior from dividend	*/

		if ( (e1->m1 < e2->m1) ||
			((e1->m1 == e2->m1) && (e1->m2 < e2->m2) ))
			;	/* null statement */
				/* i.e., don't add or subtract */
		else	{
			result[1]++;	/* ADD	*/
			if (e2->m2 > e1->m2)
				e1->m1 -= 1;	/* carry in */
			e1->m1 -= e2->m1;	/* do SUBTRACTION */
			e1->m2 -= e2->m2;	/*    SUBTRACTION */
		}

		/*	shift dividend left one bit OR	*/
		/*	IF it equals ZERO we can break out	*/
		/*	of the loop, but still must shift	*/
		/*	the quotient the remaining count bits	*/
		/* NB	save the results of this test in error	*/
		/*	if not zero, then the result is inexact. */
		/* 	this would be reported in IEEE standard	*/

		/*	lp points to dividend			*/
		lp = &e1->m1;

		error = ((*lp | *(lp+1)) != 0L) ? 1 : 0;
		if (error)	{	/* more work */
			/*	assume max bit == 0 (see above)	*/
			b64_lsft(&e1->m1);
			continue;
		}
		else
			break;	/* leave loop	*/
	}	/* end of divide by subtraction loop	*/

	if (count > 0)	{
		lp = result;
		if (count > 31) {	/* move to higher word */
			*lp = *(lp+1);
			count -= 32;
			*(lp+1) = 0L;	/* clear low word	*/
		}
		if (*lp)
			*lp <<= count;	/* shift rest of way	*/
		lp++;	/*  == &result[1]	*/
		if (*lp) {
			result[0] |= (*lp >> 32-count);
			*lp <<= count;
		}
	}
#else USE_DIVIDE

	u[4] = (e1->m2 & 1) << 15;
	b64_rsft(&(e1->m1));
	u[0] = e1->m1 >> 16;
	u[1] = e1->m1;
	u[2] = e1->m2 >> 16;
	u[3] = e1->m2;
	u[5] = 0; u[6] = 0; u[7] = 0;
	v[1] = e2->m1 >> 16;
	v[2] = e2->m1;
	v[3] = e2->m2 >> 16;
	v[4] = e2->m2;
	while (! v[maxv]) maxv--;
	result[0] = 0;
	result[1] = 0;
	lp = result;

	/*
	 * Use an algorithm of Knuth (The art of programming, Seminumerical
	 * algorithms), to divide u by v. u and v are both seen as numbers
	 * with base 65536. 
	 */
	for (j = 0; j <= 3; j++, u_p++) {
		unsigned long q_est, temp;

		if (j == 2) lp++;
		if (u_p[0] == 0 && u_p[1] < v[1]) continue;
		temp = ((unsigned long)u_p[0] << 16) + u_p[1];
		if (u_p[0] >= v[1]) {
			q_est = 0x0000FFFFL;
		}
		else {
			q_est = temp / v[1];
		}
		temp -= q_est * v[1];
		while (temp < 0x10000 && v[2]*q_est > ((temp<<16)+u_p[2])) {
			q_est--;
			temp += v[1];
		}
		/*	Now, according to Knuth, we have an estimate of the
			quotient, that is either correct or one too big, but
			almost always correct.
		*/
		if (q_est != 0)  {
			int i;
			unsigned long k = 0;
			int borrow = 0;

			for (i = maxv; i > 0; i--) {
				unsigned long tmp = q_est * v[i] + k + borrow;
				unsigned short md = tmp;

				borrow = (md > u_p[i]);
				u_p[i] -= md;
				k = tmp >> 16;
			}
			k += borrow;
			borrow = u_p[0] < k;
			u_p[0] -= k;

			if (borrow) {
				/* So, this does not happen often; the estimate
				   was one too big; correct this
				*/
				*lp |= (j & 1) ? (q_est - 1) : ((q_est-1)<<16);
				borrow = 0;
				for (i = maxv; i > 0; i--) {
					unsigned long tmp 
					    = v[i]+(unsigned long)u_p[i]+borrow;
					
					u_p[i] = tmp;
					borrow = tmp >> 16;
				}
				u_p[0] += borrow;
			}
			else *lp |= (j & 1) ? q_est : (q_est<<16);
		}
	}
#ifdef	EXCEPTION_INEXACT
	u_p = &u[0];
	for (j = 7; j >= 0; j--) {
		if (*u_p++) {
			error = 1;
			break;
		}
	}
#endif
#endif

#ifdef  EXCEPTION_INEXACT
        if (error)      {
                /*
                 * report here exception 8.5 - Inexact
                 * from Draft 8.0 of IEEE P754:
                 * In the absence of an invalid operation exception,
                 * if the rounded result of an operation is not exact or if
                 * it overflows without a trap, then the inexact exception
                 * shall be assigned. The rounded or overflowed result
                 * shall be delivered to the destination.
                 */
                INEXACT();
#endif
	e1->m1 = result[0];
	e1->m2 = result[1];

	nrm_ext(e1);
}
Added copyright notices 1988-04-07 11:40:46 +00:00			`/*`
			`(c) copyright 1988 by the Vrije Universiteit, Amsterdam, The Netherlands.`
			`See the copyright notice in the ACK home directory, in the file "Copyright".`
			`*/`

			`/* $Header$ */`

Initial revision 1988-04-07 10:57:49 +00:00			`/*`
			`DIVIDE EXTENDED FORMAT`
			`*/`

			`#include "FP_bias.h"`
			`#include "FP_trap.h"`
			`#include "FP_types.h"`

			`/*`
			`November 15, 1984`

			`This is a routine to do the work.`
Added version for machines with proper 4-byte operations 1988-08-10 10:07:53 +00:00			`There are two versions:`
			`One is based on the partial products method`
Initial revision 1988-04-07 10:57:49 +00:00			`and makes no use possible machine instructions`
merged with Michael Felts latest version 1988-07-25 10:46:15 +00:00			`to divide (hardware dividers).`
Added version for machines with proper 4-byte operations 1988-08-10 10:07:53 +00:00			`The other is used when USE_DIVIDE is defined. It is much faster on`
			`machines with fast 4 byte operations.`
Initial revision 1988-04-07 10:57:49 +00:00			`*/`
			`/********************************************************/`

			`div_ext(e1,e2)`
			`EXTEND e1,e2;`
			`{`
			`short error = 0;`
			`unsigned long result[2];`
			`register unsigned long *lp;`
Added version for machines with proper 4-byte operations 1988-08-10 10:07:53 +00:00			`#ifndef USE_DIVIDE`
			`short count;`
			`#else`
			`unsigned short u[9], v[5];`
			`register int j;`
			`register unsigned short *u_p = u;`
speeded up again :-) 1988-08-11 14:50:18 +00:00			`int maxv = 4;`
Added version for machines with proper 4-byte operations 1988-08-10 10:07:53 +00:00			`#endif`
Initial revision 1988-04-07 10:57:49 +00:00
also detect 0/0 1988-08-05 11:40:29 +00:00			`if ((e2->m1 \| e2->m2) == 0) {`
			`/*`
			`* Exception 8.2 - Divide by zero`
			`*/`
			`trap(EFDIVZ);`
			`e1->m1 = e1->m2 = 0L;`
			`e1->exp = EXT_MAX;`
			`return;`
			`}`
Initial revision 1988-04-07 10:57:49 +00:00			`if ((e1->m1 \| e1->m2) == 0) { /* 0 / anything == 0 */`
			`e1->exp = 0; /* make sure */`
			`return;`
			`}`
Added version for machines with proper 4-byte operations 1988-08-10 10:07:53 +00:00			`#ifndef USE_DIVIDE`
Initial revision 1988-04-07 10:57:49 +00:00			`/*`
			`* numbers are right shifted one bit to make sure`
			`* that m1 is quaranteed to be larger if its`
			`* maximum bit is set`
			`*/`
some modifications to improve speed (sorry, I mean, make a bit less slow) 1988-08-03 23:31:40 +00:00			`b64_rsft(&e1->m1); /* 64 bit shift right */`
			`b64_rsft(&e2->m1); /* 64 bit shift right */`
Initial revision 1988-04-07 10:57:49 +00:00			`e1->exp++;`
			`e2->exp++;`
Added version for machines with proper 4-byte operations 1988-08-10 10:07:53 +00:00			`#endif`
Initial revision 1988-04-07 10:57:49 +00:00			`/* check for underflow, divide by zero, etc */`
			`e1->sign ^= e2->sign;`
			`e1->exp -= e2->exp;`
			`e1->exp += 2; /* bias correction */`
			`if (e1->exp < EXT_MIN) {`
merged with Michael Felts latest version 1988-07-25 10:46:15 +00:00			`/*`
			`* Exception 8.4 - Underflow`
			`*/`
Initial revision 1988-04-07 10:57:49 +00:00			`trap(EFUNFL); /* underflow */`
			`e1->exp = EXT_MIN;`
			`e1->m1 = e1->m2 = 0L;`
merged with Michael Felts latest version 1988-07-25 10:46:15 +00:00			`return;`
Initial revision 1988-04-07 10:57:49 +00:00			`}`
merged with Michael Felts latest version 1988-07-25 10:46:15 +00:00			`if (e1->exp >= EXT_MAX) {`
			`/*`
			`* Exception 8.3 - Overflow`
			`*/`
			`trap(EFOVFL); /* overflow */`
			`e1->exp = EXT_MAX;`
			`e1->m1 = e1->m2 = 0L;`
			`return;`
			`}`
Initial revision 1988-04-07 10:57:49 +00:00
Added version for machines with proper 4-byte operations 1988-08-10 10:07:53 +00:00			`#ifndef USE_DIVIDE`
Initial revision 1988-04-07 10:57:49 +00:00			`/* do division of mantissas */`
			`/* uses partial product method */`
			`/* init control variables */`

			`count = 64;`
Added version for machines with proper 4-byte operations 1988-08-10 10:07:53 +00:00			`result[0] = 0L;`
			`result[1] = 0L;`
Initial revision 1988-04-07 10:57:49 +00:00
			`/* partial product division loop */`

			`while (count--) {`
			`/* first left shift result 1 bit */`
			`/* this is ALWAYS done */`

some modifications to improve speed (sorry, I mean, make a bit less slow) 1988-08-03 23:31:40 +00:00			`b64_lsft(result);`
Initial revision 1988-04-07 10:57:49 +00:00
			`/* compare dividend and divisor */`
			`/* if dividend >= divisor add a bit */`
			`/* and subtract divisior from dividend */`

			`if ( (e1->m1 < e2->m1) \|\|`
			`((e1->m1 == e2->m1) && (e1->m2 < e2->m2) ))`
			`; /* null statement */`
			`/* i.e., don't add or subtract */`
			`else {`
			`result[1]++; /* ADD */`
			`if (e2->m2 > e1->m2)`
			`e1->m1 -= 1; /* carry in */`
			`e1->m1 -= e2->m1; /* do SUBTRACTION */`
			`e1->m2 -= e2->m2; /* SUBTRACTION */`
			`}`

			`/* shift dividend left one bit OR */`
			`/* IF it equals ZERO we can break out */`
			`/* of the loop, but still must shift */`
			`/* the quotient the remaining count bits */`
			`/* NB save the results of this test in error */`
			`/* if not zero, then the result is inexact. */`
			`/* this would be reported in IEEE standard */`

			`/* lp points to dividend */`
			`lp = &e1->m1;`

			`error = ((lp \| (lp+1)) != 0L) ? 1 : 0;`
			`if (error) { /* more work */`
			`/* assume max bit == 0 (see above) */`
some modifications to improve speed (sorry, I mean, make a bit less slow) 1988-08-03 23:31:40 +00:00			`b64_lsft(&e1->m1);`
Initial revision 1988-04-07 10:57:49 +00:00			`continue;`
			`}`
			`else`
			`break; /* leave loop */`
			`} /* end of divide by subtraction loop */`

			`if (count > 0) {`
			`lp = result;`
			`if (count > 31) { /* move to higher word */`
			`lp = (lp+1);`
			`count -= 32;`
			`(lp+1) = 0L; / clear low word */`
			`}`
			`if (*lp)`
			`lp <<= count; / shift rest of way */`
			`lp++; /* == &result[1] */`
			`if (*lp) {`
			`result[0] \|= (*lp >> 32-count);`
			`*lp <<= count;`
			`}`
			`}`
Added version for machines with proper 4-byte operations 1988-08-10 10:07:53 +00:00			`#else USE_DIVIDE`

			`u[4] = (e1->m2 & 1) << 15;`
			`b64_rsft(&(e1->m1));`
			`u[0] = e1->m1 >> 16;`
			`u[1] = e1->m1;`
			`u[2] = e1->m2 >> 16;`
			`u[3] = e1->m2;`
			`u[5] = 0; u[6] = 0; u[7] = 0;`
			`v[1] = e2->m1 >> 16;`
			`v[2] = e2->m1;`
			`v[3] = e2->m2 >> 16;`
			`v[4] = e2->m2;`
speeded up again :-) 1988-08-11 14:50:18 +00:00			`while (! v[maxv]) maxv--;`
Added version for machines with proper 4-byte operations 1988-08-10 10:07:53 +00:00			`result[0] = 0;`
			`result[1] = 0;`
			`lp = result;`

			`/*`
			`* Use an algorithm of Knuth (The art of programming, Seminumerical`
			`* algorithms), to divide u by v. u and v are both seen as numbers`
			`* with base 65536.`
			`*/`
			`for (j = 0; j <= 3; j++, u_p++) {`
			`unsigned long q_est, temp;`

			`if (j == 2) lp++;`
			`if (u_p[0] == 0 && u_p[1] < v[1]) continue;`
			`temp = ((unsigned long)u_p[0] << 16) + u_p[1];`
			`if (u_p[0] >= v[1]) {`
			`q_est = 0x0000FFFFL;`
			`}`
			`else {`
			`q_est = temp / v[1];`
			`}`
			`temp -= q_est * v[1];`
			`while (temp < 0x10000 && v[2]*q_est > ((temp<<16)+u_p[2])) {`
			`q_est--;`
			`temp += v[1];`
			`}`
			`/* Now, according to Knuth, we have an estimate of the`
			`quotient, that is either correct or one too big, but`
			`almost always correct.`
			`*/`
			`if (q_est != 0) {`
			`int i;`
			`unsigned long k = 0;`
			`int borrow = 0;`

speeded up again :-) 1988-08-11 14:50:18 +00:00			`for (i = maxv; i > 0; i--) {`
Added version for machines with proper 4-byte operations 1988-08-10 10:07:53 +00:00			`unsigned long tmp = q_est * v[i] + k + borrow;`
			`unsigned short md = tmp;`

			`borrow = (md > u_p[i]);`
			`u_p[i] -= md;`
			`k = tmp >> 16;`
			`}`
			`k += borrow;`
			`borrow = u_p[0] < k;`
			`u_p[0] -= k;`

			`if (borrow) {`
			`/* So, this does not happen often; the estimate`
			`was one too big; correct this`
			`*/`
			`*lp \|= (j & 1) ? (q_est - 1) : ((q_est-1)<<16);`
			`borrow = 0;`
speeded up again :-) 1988-08-11 14:50:18 +00:00			`for (i = maxv; i > 0; i--) {`
Added version for machines with proper 4-byte operations 1988-08-10 10:07:53 +00:00			`unsigned long tmp`
			`= v[i]+(unsigned long)u_p[i]+borrow;`

			`u_p[i] = tmp;`
			`borrow = tmp >> 16;`
			`}`
			`u_p[0] += borrow;`
			`}`
			`else *lp \|= (j & 1) ? q_est : (q_est<<16);`
			`}`
			`}`
			`#ifdef EXCEPTION_INEXACT`
			`u_p = &u[0];`
			`for (j = 7; j >= 0; j--) {`
			`if (*u_p++) {`
			`error = 1;`
			`break;`
			`}`
			`}`
			`#endif`
			`#endif`

merged with Michael Felts latest version 1988-07-25 10:46:15 +00:00			`#ifdef EXCEPTION_INEXACT`
			`if (error) {`
			`/*`
			`* report here exception 8.5 - Inexact`
			`* from Draft 8.0 of IEEE P754:`
			`* In the absence of an invalid operation exception,`
			`* if the rounded result of an operation is not exact or if`
			`* it overflows without a trap, then the inexact exception`
			`* shall be assigned. The rounded or overflowed result`
			`* shall be delivered to the destination.`
			`*/`
			`INEXACT();`
			`#endif`
Initial revision 1988-04-07 10:57:49 +00:00			`e1->m1 = result[0];`
			`e1->m2 = result[1];`
Added version for machines with proper 4-byte operations 1988-08-10 10:07:53 +00:00
Initial revision 1988-04-07 10:57:49 +00:00			`nrm_ext(e1);`
			`}`