.TH FLT_ARITH 3ACK " Fri June 30 1989" .ad .SH NAME flt_arith \- high precision floating point arithmetic .SH SYNOPSIS .nf .B #include .PP .if t .ta 3m 13m 22m .if n .ta 5m 25m 40m struct flt_mantissa { long flt_h_32; /* high order 32 bits of mantissa */ long flt_l_32; /* low order 32 bits of mantissa */ }; typedef struct { short flt_sign; /* 0 for positive, 1 for negative */ short flt_exp; /* between -16384 and 16384 */ struct flt_mantissa flt_mantissa; /* normalized, in [1,2). */ } flt_arith; extern int flt_status; #define FLT_OVFL 001 #define FLT_UNFL 002 #define FLT_DIV0 004 #define FLT_NOFLT 010 #define FLT_BTSM 020 .PP .B flt_add(e1, e2, e3) .B flt_arith *e1, *e2, *e3; .PP .B flt_mul(e1, e2, e3) .B flt_arith *e1, *e2, *e3; .PP .B flt_sub(e1, e2, e3) .B flt_arith *e1, *e2, *e3; .PP .B flt_div(e1, e2, e3) .B flt_arith *e1, *e2, *e3; .PP .B flt_modf(e1, intpart, fractpart) .B flt_arith *e1, *intpart, *fractpart; .PP .B int flt_cmp(e1, e2) .B flt_arith *e1, *e2; .PP .B int flt_str2flt(s, e) .B char *s; .B flt_arith *e; .PP .B flt_flt2str(e, buf, bufsize) .B flt_arith *e; .B char *buf; .B int bufsize; .PP .B int flt_status; .PP .B #include .B flt_arith2flt(n, e) .B arith n; .B flt_arith *e; .PP .B arith flt_flt2arith(e) .B flt_arith *e; .PP .B flt_b64_sft(m, n) .B struct flt_mantissa *m; .B int n; .PP .B flt_b64_rsft(m) .B struct flt_mantissa *m; .PP .B flt_b64_lsft(m) .B struct flt_mantissa *m; .SH DESCRIPTION This set of routines emulates floating point arithmetic, in a high precision. It is intended primarily for compilers that need to evaluate floating point expressions at compile-time. It could be argued that this should be done in the floating point arithmetic of the target machine, but EM does not define its floating point arithmetic. .PP .B flt_add adds the numbers indicated by .I e1 and .I e2 and stores the result indirectly through .IR e3 . .PP .B flt_mul multiplies the numbers indicated by .I e1 and .I e2 and stores the result indirectly through .IR e3 . .PP .B flt_sub subtracts the number indicated by .I e2 from the one indicated by .I e1 and stores the result indirectly through .IR e3 . .PP .B flt_div divides the number indicated by .I e1 by the one indicated by .I e2 and stores the result indirectly through .IR e3 . .PP .B flt_modf splits the number indicated by .I e in an integer and a fraction part, and stores the integer part through .I intpart and the fraction part through .IR fractpart . So, adding the numbers indicated by .I intpart and .I fractpart results (in the absence of rounding error) in the number indicated by .IR e . Also, the absolute value of the number indicated by .I intpart is less than or equal to the absolute value of the number indicated by .IR e . The absolute value of the number indicated by .I fractpart is less than 1. .PP .B flt_cmp compares the numbers indicated by .I e1 and .I e2 and returns -1 if .I e1 < .IR e2 , 0 if .I e1 = .IR e2 , and 1 if .I e1 > .IR e2 . .PP .B flt_str2flt converts the string indicated by .I s to a floating point number, and stores this number through .IR e. The string should contain a floating point constant, which consists of an integer part, a decimal point, a fraction part, an \f(CWe\fP or an \f(CWE\fP, and an optionally signed integer exponent. The integer and fraction parts both consist of a sequence of digits. They may not both be missing. The decimal point, the \f(CWe\fP and the exponent may be missing. .PP .B flt_flt2str converts the number indicated by .I e into a string, in a scientific notation acceptable for EM. The result is stored in .IR buf . At most .I bufsize characters are stored. .PP .B flt_arith2flt converts the number .I n to the floating point format use in this package and returns the result in .IR e . .PP .B flt_flt2arith truncates the number indicated by .I e to the largest integer value smaller than or equal to the number indicated by .IR e . It returns this value. .PP Before each operation, the .I flt_status variable is reset to 0. After an operation, it can be checked for one of the following values: .IP FLT_OVFL .br an overflow occurred. The result is a large value with the correct sign. This can occur with the routines .IR flt_add , .IR flt_sub , .IR flt_div , .IR flt_mul , .IR flt_flt2arith , and .IR flt_str2flt . .IP FLT_UNFL .br an underflow occurred. The result is 0. This can occur with the routines .IR flt_div , .IR flt_mul , .IR flt_sub , .IR flt_add , and .IR flt_str2flt . .IP FLT_DIV0 .br divide by 0. The result is a large value with the sign of the dividend. This can only occur with the routine .IR flt_div . .IP FLT_NOFLT .br indicates that the string did not represent a floating point number. The result is 0. This can only occur with the routine .IR flt_str2flt . .IP FLT_BTSM .br indicates that the buffer is too small. The contents of the buffer is undefined. This can only occur with the routine .IR flt_flt2str . .PP The routine .I flt_b64_sft shifts the mantissa .I m .I |n| bits left or right, depending on the sign of .IR n . If .I n is negative, it is a left-shift; If .I n is positive, it is a right shift. .PP The routine .I flt_b64_rsft shifts the mantissa .I m 1 bit right. .PP The routine .I flt_b64_lsft shifts the mantissa .I m 1 bit left. .SH FILES ~em/modules/h/flt_arith.h .br ~em/modules/h/em_arith.h .br ~em/modules/lib/libflt.a