Test long long division and remainder; fix i386.

My i386 code from 893df4b gave the wrong sign to some 8-byte
remainders.  Fix by splitting .dvi8 and .rmi8 so each has its own code
to pick the sign.  They and .dvu8 and .rmu8 share a private sub
.divrem8 for unsigned division.

Improve the i386 code by using instructions like _bsr_ and _shrd_.
Change the helpers to yield a quotient in ebx:eax or a remainder in
ecx:edx; this seems more convenient, because _div_ puts its quotient
in eax and remainder in edx.
This commit is contained in:
George Koehler 2019-09-16 20:19:36 -04:00
parent 12457f6385
commit f6a1e08218
8 changed files with 223 additions and 111 deletions

View file

@ -1,7 +1,7 @@
for _, plat in ipairs(vars.plats) do for _, plat in ipairs(vars.plats) do
acklibrary { acklibrary {
name = "lib_"..plat, name = "lib_"..plat,
srcs = { "./*.s" }, -- dvi8.s srcs = { "./*.s" }, -- divrem8.s
vars = { plat = plat }, vars = { plat = plat },
} }
end end

63
mach/i386/libem/divrem8.s Normal file
View file

@ -0,0 +1,63 @@
.sect .text; .sect .rom; .sect .data; .sect .bss
.sect .text
.define .divrem8
yl=12
yh=16
xl=20
xh=24
! This private sub for .dvi8, .dvu8, .rmi8, .rmu8
! does unsigned division of x = xh:xl by y = yh:yl,
! yields ebx:eax = quotient, ecx:edx = remainder.
.divrem8:
! Caller must set eax, edx, flag z like so:
! mov edx,yh(esp)
! test edx,edx
! mov eax,xh(esp)
jnz 1f ! jump if y >= 2**32
! y = yl, so x / y = xh:xl / yl = qh:0 + (xl + rh) / yl
! where qh, rh are quotient, remainder from xh / yl.
mov ecx,yl(esp)
xor edx,edx ! edx:eax = xh
div ecx ! eax = qh, edx = rh
mov ebx,eax
mov eax,xl(esp) ! edx:eax = qh:xl
div ecx ! ebx:eax = qh:ql = quotient
xor ecx,ecx ! ecx:edx = 0:rl = remainder
ret
1: ! Here y >= 2**32. Find y >> cl in [2**31, 2**32).
mov ebx,yl(esp) ! edx:ebx = y
bsr ecx,edx ! scan yh for highest set bit
incb cl ! cl bits from cl-1 to 0
shrd ebx,edx,cl ! ebx = y >> cl
! Estimate x / y as q = (x / (y >> cl)) >> cl.
xor edx,edx ! edx:eax = xh
div ebx ! eax = xh / (y >> cl)
push eax
mov eax,xl+4(esp) ! push moved xl to xl+4
div ebx
pop edx ! edx:eax = x / (y >> cl)
shrd eax,edx,cl ! eax = q
! Calculate the remainder x - y * q. If the subtraction
! overflows, then the correct quotient is q - 1, else it is q.
mov ebx,yh(esp)
imul ebx,eax ! ebx = yh * q
push eax
mul yl+4(esp) ! edx:eax = yl * q
add ebx,edx ! ebx:eax = y * q
mov edx,xl+4(esp)
mov ecx,xh+4(esp)
sub edx,eax
sbb ecx,ebx ! ecx:edx = remainder
pop eax ! eax = q
jnc 1f ! jump unless subtraction overflowed
dec eax ! fix quotient
add edx,yl(esp)
adc ecx,yh(esp) ! fix remainder
1: xor ebx,ebx ! ebx:eax = quotient
ret

View file

@ -1,115 +1,37 @@
.sect .text; .sect .rom; .sect .data; .sect .bss .sect .text; .sect .rom; .sect .data; .sect .bss
.sect .text .sect .text
.define .dvi8, .dvu8 .define .dvi8
yl=8 yl=4
yh=12 yh=8
xl=16 xl=12
xh=20 xh=16
! .dvi8 and .dvu8 divide x = xh:xl by y = yh:yl, ! .dvi8 yields ebx:eax = quotient from x / y
! yield edx:eax = quotient, ecx:ebx = remainder.
.dvu8:
! Unsigned division: set di = 0 for non-negative quotient.
push edi
xor di,di
mov eax,xh(esp)
mov edx,yh(esp)
and edx,edx
jmp 7f
.dvi8: .dvi8:
! Signed division: replace x and y with their absolute values. xorb cl,cl ! cl = 0, non-negative result
! Set di = 1 for negative quotient, 0 for non-negative. mov eax,xh(esp) ! eax for .divrem8
push edi test eax,eax
xor di,di ! di = 0 jge 1f ! jump unless x < 0
mov eax,xh(esp) incb cl ! cl = 1, negative result
and eax,eax
jns 1f
inc di ! di = 1
neg eax neg eax
neg xl(esp) neg xl(esp)
sbb eax,0 ! eax:xl = absolute value of x sbb eax,0
1: mov edx,yh(esp) mov xh(esp),eax ! x = absolute value
and edx,edx 1: mov edx,yh(esp) ! edx for .divrem8
jns 7f test edx,edx ! flag z for .divrem8 when y >= 0
xor di,1 ! flip di jge 1f ! jump unless y < 0
xorb cl,1 ! flip sign of result
neg edx neg edx
neg yl(esp) neg yl(esp)
sbb edx,0 ! edx:yl = absolute value of y sbb edx,0 ! flag z for .divrem8 when y < 0
mov yh(esp),edx ! y = absolute value
7: ! Here .dvu8 joins .dvi8, eax = xh, edx = yh, flags test edx, 1: push ecx
! the values in xh(esp) and yh(esp) are garbage. call .divrem8
jnz 8f ! jump if y >= 2**32 pop ecx
testb cl,cl
! x / y = x / yl = xh / yl + xl / yl = qh + (xl + rh) / yl jz 1f ! jump unless result < 0
! where qh and rh are quotient, remainder from xh / yl.
mov ebx,yl(esp)
xor edx,edx ! edx:eax = xh
div ebx ! eax = qh, edx = rh
mov ecx,eax
mov eax,xl(esp)
div ebx ! eax = ql, edx = remainder
mov ebx,edx
mov edx,ecx ! edx:eax = quotient qh:ql
xor ecx,ecx ! ecx:ebx = remainder
9: ! Finally, if di != 0 then negate quotient, remainder.
and di,di
jz 1f
neg edx
neg eax
sbb edx,0 ! negate quotient edx:eax
neg ecx
neg ebx neg ebx
sbb ecx,0 ! negate remainder ecx:ebx neg eax
1: pop edi ! caller's edi sbb ebx,0 ! negate quotient ebx:eax
ret 16 1: ret 16
8: ! We come here if y >= 2**32.
mov xh(esp),eax
mov yh(esp),edx
mov ebx,yl(esp) ! edx:ebx = y
! Estimate x / y as q = (x / (y >> cl)) >> cl,
! where 2**31 <= (y >> cl) < 2**32.
xor cx,cx
1: inc cx
shr edx,1
rcr ebx,1 ! edx:ebx = y >> cl
and edx,edx
jnz 1b ! loop until y >> cl fits in ebx
! x / (y >> cl) = qh + (x + rh) / (y >> cl)
push edi
xor edx,edx ! edx:eax = xh
div ebx ! eax = qh, edx = rh
mov edi,eax
mov eax,xl+4(esp) ! push edi moved xl to xl+4
div ebx ! edi:eax = x / (y >> cl)
! q = (x / (y >> cl)) >> cl = esi:eax >> cl
shr eax,cl
neg cx ! cl = (32 - cl) modulo 32
shl edi,cl
or eax,edi ! eax = q
! Calculate the remainder x - q * y. If the subtraction
! overflows, then the correct quotient is q - 1, else it is q.
mov ecx,yh+4(esp)
imul ecx,eax ! ecx = q * yh
mov edi,eax
mul yl+4(esp) ! edx:eax = q * yl
add edx,ecx ! edx:eax = q * y
mov ebx,xl+4(esp)
mov ecx,xh+4(esp) ! ecx:ebx = x
sub ebx,eax
sbb ecx,edx ! ecx:ebx = remainder
jnc 1f
dec edi ! fix quotient
add ebx,yl+4(esp)
adc ebx,yh+4(esp) ! fix remainder
1: mov eax,edi
xor edx,edx ! edx:eax = quotient
pop edi ! negative flag
jmp 9b

20
mach/i386/libem/dvu8.s Normal file
View file

@ -0,0 +1,20 @@
.sect .text; .sect .rom; .sect .data; .sect .bss
.sect .text
.define .dvu8, .rmu8
yl=4
yh=8
xl=12
xh=16
! .dvu8 yields ebx:eax = quotient from x / y
! .rmu8 yields ecx:edx = remainder from x / y
.dvu8:
.rmu8:
mov edx,yh(esp)
test edx,edx
mov eax,xh(esp) ! prepare for .divrem8
push ebp ! move esp
call .divrem8
pop ebp ! move esp
ret 16

36
mach/i386/libem/rmi8.s Normal file
View file

@ -0,0 +1,36 @@
.sect .text; .sect .rom; .sect .data; .sect .bss
.sect .text
.define .rmi8
yl=4
yh=8
xl=12
xh=16
! .rmi8 yields ecx:edx = remainder from x / y
.rmi8:
xorb cl,cl ! cl = 0, non-negative result
mov eax,xh(esp) ! eax for .divrem8
test eax,eax
jge 1f ! jump unless x < 0
incb cl ! cl = 1, negative result
neg eax
neg xl(esp)
sbb eax,0
mov xh(esp),eax ! x = absolute value
1: mov edx,yh(esp) ! edx for .divrem8
test edx,edx ! flag z for .divrem8 when y >= 0
jge 1f ! jump unless y < 0
neg edx
neg yl(esp)
sbb edx,0 ! flag z for .divrem8 when y < 0
mov yh(esp),edx ! y = absolute value
1: push ecx
call .divrem8
pop eax
testb al,al
jz 1f ! jump unless result < 0
neg ecx
neg edx
sbb ecx,0 ! negate remainder ecx:edx
1: ret 16

View file

@ -1038,7 +1038,7 @@ with noacc ACC
pat dvi $1==8 pat dvi $1==8
kills ALL kills ALL
gen proccall {label,".dvi8"} yields edx eax gen proccall {label,".dvi8"} yields ebx eax
/* /*
pat dvi !defined($1) pat dvi !defined($1)
@ -1055,7 +1055,7 @@ with noacc ACC
pat rmi $1==8 pat rmi $1==8
kills ALL kills ALL
gen proccall {label,".dvi8"} yields ecx ebx gen proccall {label,".rmi8"} yields ecx edx
/* /*
pat rmi !defined($1) pat rmi !defined($1)
@ -1202,7 +1202,7 @@ gen div %1 yields eax
pat dvu $1==8 pat dvu $1==8
kills ALL kills ALL
gen proccall {label,".dvu8"} yields edx eax gen proccall {label,".dvu8"} yields ebx eax
/* /*
pat dvu !defined($1) pat dvu !defined($1)
@ -1218,7 +1218,7 @@ gen div %1 yields edx
pat rmu $1==8 pat rmu $1==8
kills ALL kills ALL
gen proccall {label,".dvu8"} yields ecx ebx gen proccall {label,".rmu8"} yields ecx edx
/* /*
pat rmu !defined($1) pat rmu !defined($1)

View file

@ -4,7 +4,7 @@ definerule("plat_testsuite",
{ {
plat = { type="string" }, plat = { type="string" },
method = { type="string" }, method = { type="string" },
-- added long-long/llbitset_e.c -- added long-long/lldivrem_e.c
sets = { type="table", default={"core", "b", "bugs", "m2", "floats", "long-long"}}, sets = { type="table", default={"core", "b", "bugs", "m2", "floats", "long-long"}},
skipsets = { type="table", default={}}, skipsets = { type="table", default={}},
tests = { type="targets", default={} }, tests = { type="targets", default={} },

View file

@ -0,0 +1,71 @@
#include "test.h"
/*
* Test division and remainder. Failure code will look like
* - 0x3d = id 0x3, 'd' for division
* - 0x3e = id 0x3, 'e' for remainder
*/
struct s_divrem {
unsigned int id;
long long a;
long long b;
long long a_div_b; /* a / b */
long long a_rem_b; /* a % b */
} s_cases[] = {
{0x1, 310LL, 100LL, 3LL, 10LL},
{0x2, 310LL, -100LL, -3LL, 10LL},
{0x3, -310LL, 100LL, -3LL, -10LL},
{0x4, -310LL, -100LL, 3LL, -10LL},
{0x5, 3000000000000010LL, 100LL, 30000000000000LL, 10LL},
{0x6, 3000000000000010LL, -100LL, -30000000000000LL, 10LL},
{0x7, -3000000000000010LL, 100LL, -30000000000000LL, -10LL},
{0x8, -3000000000000010LL, -100LL, 30000000000000LL, -10LL},
{0x9, 3000000000000010LL, 1000000000000LL, 3000LL, 10LL},
{0xa, 3000000000000010LL, -1000000000000LL, -3000LL, 10LL},
{0xb, -3000000000000010LL, 1000000000000LL, -3000LL, -10LL},
{0xc, -3000000000000010LL, -1000000000000LL, 3000LL, -10LL},
/*
* In next 3 cases, i386 tries (a / (b >> 13)) >> 13 = 8,
* may need to correct the quotient from 8 to 7.
*/
{0x11, 0x864200000000LL, 0x10c840000000LL, 8LL, 0LL},
{0x12, 0x864200000000LL, 0x10c840000001LL, 7LL, 0x10c83ffffff9LL},
{0x13, 0x864200000000LL, 0x10c840001fffLL, 7LL, 0x10c83fff2007LL},
};
struct u_divrem {
unsigned int id;
unsigned long long a;
unsigned long long b;
unsigned long long a_div_b;
unsigned long long a_rem_b;
} u_cases[] = {
{0x81, 310ULL, 100ULL, 3ULL, 10ULL},
{0x82, 3000000000000010ULL, 100ULL, 30000000000000ULL, 10ULL},
{0x83, 3000000000000010ULL, 1000000000000ULL, 3000ULL, 10ULL},
{0x91, 0x8000000000000000ULL, 3ULL, 0x2aaaaaaaaaaaaaaaULL, 2ULL},
{0x92, 0xffffffffffffffffULL, 3ULL, 0x5555555555555555ULL, 0ULL},
};
#define LEN(ary) (sizeof(ary) / sizeof(ary[0]))
void _m_a_i_n(void) {
int i;
for (i = 0; i < LEN(s_cases); i++) {
struct s_divrem *s = &s_cases[i];
if (s->a / s->b != s->a_div_b)
fail((s->id << 4) | 0xd);
if (s->a % s->b != s->a_rem_b)
fail((s->id << 4) | 0xe);
}
for (i = 0; i < LEN(u_cases); i++) {
struct u_divrem *u = &u_cases[i];
if (u->a / u->b != u->a_div_b)
fail((u->id << 4) | 0xd);
if (u->a % u->b != u->a_rem_b)
fail((u->id << 4) | 0xe);
}
finished();
}