It seems that someone wanted to build flt_arith with a compiler that had long but not unsigned long. This required extra code to accomplish unsigned right shift, unsigned division, and unsigned comparison using the signed operations. Now that we use uint32_t, we can simply use the unsigned operations and remove the ucmp() function. We have similar code in mach/proto/fp/ and in lang/cem/libcc.ansi/stdlib/ext_comp.c where we use the unsigned operations. Some long variables become uint32_t, and some masks with 0xFFFFFFFF disappear because uint32_t has only 32 bits. Update flt_arith.3 to show that mantissa uses uint32_t. Provide a target to install modules/src/flt_arith/test.c as flt_test so I can run the tests.
263 lines
5.5 KiB
Groff
263 lines
5.5 KiB
Groff
.TH FLT_ARITH 3 "$Revision$"
|
|
.ad
|
|
.SH NAME
|
|
flt_arith \- high precision floating point arithmetic
|
|
.SH SYNOPSIS
|
|
.nf
|
|
.B #include <flt_arith.h>
|
|
.PP
|
|
.if t .ta 3m 13m 22m
|
|
.if n .ta 5m 25m 40m
|
|
struct flt_mantissa {
|
|
uint32_t flt_h_32; /* high order 32 bits of mantissa */
|
|
uint32_t flt_l_32; /* low order 32 bits of mantissa */
|
|
};
|
|
|
|
typedef struct {
|
|
short flt_sign; /* 0 for positive, 1 for negative */
|
|
short flt_exp; /* between -16384 and 16384 */
|
|
struct flt_mantissa flt_mantissa; /* normalized, in [1,2). */
|
|
} flt_arith;
|
|
|
|
extern int flt_status;
|
|
#define FLT_OVFL 001
|
|
#define FLT_UNFL 002
|
|
#define FLT_DIV0 004
|
|
#define FLT_NOFLT 010
|
|
#define FLT_BTSM 020
|
|
|
|
#define FLT_STRLEN 32
|
|
.PP
|
|
.B void flt_add(e1, e2, e3)
|
|
.B flt_arith *e1, *e2, *e3;
|
|
.PP
|
|
.B void flt_mul(e1, e2, e3)
|
|
.B flt_arith *e1, *e2, *e3;
|
|
.PP
|
|
.B void flt_sub(e1, e2, e3)
|
|
.B flt_arith *e1, *e2, *e3;
|
|
.PP
|
|
.B void flt_div(e1, e2, e3)
|
|
.B flt_arith *e1, *e2, *e3;
|
|
.PP
|
|
.B void flt_umin(e)
|
|
.B flt_arith *e;
|
|
.PP
|
|
.B void flt_modf(e1, intpart, fractpart)
|
|
.B flt_arith *e1, *intpart, *fractpart;
|
|
.PP
|
|
.B int flt_cmp(e1, e2)
|
|
.B flt_arith *e1, *e2;
|
|
.PP
|
|
.B void flt_str2flt(s, e)
|
|
.B char *s;
|
|
.B flt_arith *e;
|
|
.PP
|
|
.B void flt_flt2str(e, buf, bufsize)
|
|
.B flt_arith *e;
|
|
.B char *buf;
|
|
.B int bufsize;
|
|
.PP
|
|
.B int flt_status;
|
|
.PP
|
|
.B #include <em_arith.h>
|
|
.B void flt_arith2flt(n, e, uns)
|
|
.B arith n;
|
|
.B flt_arith *e;
|
|
.B int uns;
|
|
.PP
|
|
.B arith flt_flt2arith(e, uns)
|
|
.B flt_arith *e;
|
|
.B int uns;
|
|
.PP
|
|
.B void flt_b64_sft(m, n)
|
|
.B struct flt_mantissa *m;
|
|
.B int n;
|
|
.SH DESCRIPTION
|
|
This set of routines emulates floating point arithmetic, in a high
|
|
precision. It is intended primarily for compilers that need to evaluate
|
|
floating point expressions at compile-time. It could be argued that this
|
|
should be done in the floating point arithmetic of the target machine,
|
|
but EM does not define its floating point arithmetic.
|
|
.PP
|
|
.B flt_add
|
|
adds the numbers indicated by
|
|
.I e1
|
|
and
|
|
.I e2
|
|
and stores the result indirectly through
|
|
.IR e3 .
|
|
.PP
|
|
.B flt_mul
|
|
multiplies the numbers indicated by
|
|
.I e1
|
|
and
|
|
.I e2
|
|
and stores the result indirectly through
|
|
.IR e3 .
|
|
.PP
|
|
.B flt_sub
|
|
subtracts the number indicated by
|
|
.I e2
|
|
from the one indicated by
|
|
.I e1
|
|
and stores the result indirectly through
|
|
.IR e3 .
|
|
.PP
|
|
.B flt_div
|
|
divides the number indicated by
|
|
.I e1
|
|
by the one indicated by
|
|
.I e2
|
|
and stores the result indirectly through
|
|
.IR e3 .
|
|
.PP
|
|
.B flt_umin
|
|
negates the number indicated by
|
|
.I e
|
|
and stores the result indirectly through
|
|
.IR e .
|
|
.PP
|
|
.B flt_modf
|
|
splits the number indicated by
|
|
.I e
|
|
in an integer and a fraction part, and stores the integer part through
|
|
.I intpart
|
|
and the fraction part through
|
|
.IR fractpart .
|
|
So, adding the numbers indicated by
|
|
.I intpart
|
|
and
|
|
.I fractpart
|
|
results (in the absence of rounding error) in the number
|
|
indicated by
|
|
.IR e .
|
|
Also, the absolute value of the number indicated by
|
|
.I intpart
|
|
is less than or equal to the absolute value of the number indicated by
|
|
.IR e .
|
|
The absolute value of the number indicated by
|
|
.I fractpart
|
|
is less than 1.
|
|
.PP
|
|
.B flt_cmp
|
|
compares the numbers indicated by
|
|
.I e1
|
|
and
|
|
.I e2
|
|
and returns -1 if
|
|
.I e1
|
|
<
|
|
.IR e2 ,
|
|
0 if
|
|
.I e1
|
|
=
|
|
.IR e2 ,
|
|
and 1 if
|
|
.I e1
|
|
>
|
|
.IR e2 .
|
|
.PP
|
|
.B flt_str2flt
|
|
converts the string indicated by
|
|
.I s
|
|
to a floating point number, and stores this number through
|
|
.IR e.
|
|
The string should contain a floating point constant, which consists of
|
|
an integer part, a decimal point, a fraction part, an \f(CWe\fP or an
|
|
\f(CWE\fP, and an optionally signed integer exponent. The integer and
|
|
fraction parts both consist of a sequence of digits. They may not both be
|
|
missing. The decimal point, the \f(CWe\fP and the exponent may be
|
|
missing.
|
|
.PP
|
|
.B flt_flt2str
|
|
converts the number indicated by
|
|
.I e
|
|
into a string, in a scientific notation acceptable for EM. The result is
|
|
stored in
|
|
.IR buf .
|
|
At most
|
|
.I bufsize
|
|
characters are stored.
|
|
The maximum length needed is available in the constant FLT_STRLEN.
|
|
.PP
|
|
.B flt_arith2flt
|
|
converts the number
|
|
.I n
|
|
to the floating point format used in this package and returns the result
|
|
in
|
|
.IR e . If the
|
|
.I uns
|
|
flag is set, the number
|
|
.I n
|
|
is regarded as an unsigned.
|
|
.PP
|
|
.B flt_flt2arith
|
|
truncates the number indicated by
|
|
.I e
|
|
to the largest integer value smaller than or equal to the number indicated by
|
|
.IR e .
|
|
It returns this value. If the
|
|
.I uns
|
|
flag is set, the result is to be regarded as unsigned.
|
|
.PP
|
|
Before each operation, the
|
|
.I flt_status
|
|
variable is reset to 0. After an operation, it can be checked for one
|
|
of the following values:
|
|
.IP FLT_OVFL
|
|
.br
|
|
an overflow occurred. The result is a large value with the correct sign.
|
|
This can occur with the routines
|
|
.IR flt_add ,
|
|
.IR flt_sub ,
|
|
.IR flt_div ,
|
|
.IR flt_mul ,
|
|
.IR flt_flt2arith ,
|
|
and
|
|
.IR flt_str2flt .
|
|
.IP FLT_UNFL
|
|
.br
|
|
an underflow occurred. The result is 0.
|
|
This can occur with the routines
|
|
.IR flt_div ,
|
|
.IR flt_mul ,
|
|
.IR flt_sub ,
|
|
.IR flt_add ,
|
|
and
|
|
.IR flt_str2flt .
|
|
.IP FLT_DIV0
|
|
.br
|
|
divide by 0. The result is a large value with the sign of the dividend.
|
|
This can only occur with the routine
|
|
.IR flt_div .
|
|
.IP FLT_NOFLT
|
|
.br
|
|
indicates that the string did not represent a floating point number. The
|
|
result is 0.
|
|
This can only occur with the routine
|
|
.IR flt_str2flt .
|
|
.IP FLT_BTSM
|
|
.br
|
|
indicates that the buffer is too small. The contents of the buffer is
|
|
undefined. This can only occur with the routine
|
|
.IR flt_flt2str .
|
|
.PP
|
|
The routine
|
|
.I flt_b64_sft
|
|
shifts the mantissa
|
|
.I m
|
|
.I |n|
|
|
bits left or right, depending on the sign of
|
|
.IR n .
|
|
If
|
|
.I n
|
|
is negative, it is a left-shift; If
|
|
.I n
|
|
is positive, it is a right shift.
|
|
.SH FILES
|
|
~em/modules/h/flt_arith.h
|
|
.br
|
|
~em/modules/h/em_arith.h
|
|
.br
|
|
~em/modules/lib/libflt.a
|