Test long long division and remainder; fix i386.
My i386 code from 893df4b
gave the wrong sign to some 8-byte
remainders. Fix by splitting .dvi8 and .rmi8 so each has its own code
to pick the sign. They and .dvu8 and .rmu8 share a private sub
.divrem8 for unsigned division.
Improve the i386 code by using instructions like _bsr_ and _shrd_.
Change the helpers to yield a quotient in ebx:eax or a remainder in
ecx:edx; this seems more convenient, because _div_ puts its quotient
in eax and remainder in edx.
This commit is contained in:
parent
12457f6385
commit
f6a1e08218
|
@ -1,7 +1,7 @@
|
||||||
for _, plat in ipairs(vars.plats) do
|
for _, plat in ipairs(vars.plats) do
|
||||||
acklibrary {
|
acklibrary {
|
||||||
name = "lib_"..plat,
|
name = "lib_"..plat,
|
||||||
srcs = { "./*.s" }, -- dvi8.s
|
srcs = { "./*.s" }, -- divrem8.s
|
||||||
vars = { plat = plat },
|
vars = { plat = plat },
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
|
63
mach/i386/libem/divrem8.s
Normal file
63
mach/i386/libem/divrem8.s
Normal file
|
@ -0,0 +1,63 @@
|
||||||
|
.sect .text; .sect .rom; .sect .data; .sect .bss
|
||||||
|
.sect .text
|
||||||
|
.define .divrem8
|
||||||
|
|
||||||
|
yl=12
|
||||||
|
yh=16
|
||||||
|
xl=20
|
||||||
|
xh=24
|
||||||
|
! This private sub for .dvi8, .dvu8, .rmi8, .rmu8
|
||||||
|
! does unsigned division of x = xh:xl by y = yh:yl,
|
||||||
|
! yields ebx:eax = quotient, ecx:edx = remainder.
|
||||||
|
|
||||||
|
.divrem8:
|
||||||
|
! Caller must set eax, edx, flag z like so:
|
||||||
|
! mov edx,yh(esp)
|
||||||
|
! test edx,edx
|
||||||
|
! mov eax,xh(esp)
|
||||||
|
jnz 1f ! jump if y >= 2**32
|
||||||
|
|
||||||
|
! y = yl, so x / y = xh:xl / yl = qh:0 + (xl + rh) / yl
|
||||||
|
! where qh, rh are quotient, remainder from xh / yl.
|
||||||
|
mov ecx,yl(esp)
|
||||||
|
xor edx,edx ! edx:eax = xh
|
||||||
|
div ecx ! eax = qh, edx = rh
|
||||||
|
mov ebx,eax
|
||||||
|
mov eax,xl(esp) ! edx:eax = qh:xl
|
||||||
|
div ecx ! ebx:eax = qh:ql = quotient
|
||||||
|
xor ecx,ecx ! ecx:edx = 0:rl = remainder
|
||||||
|
ret
|
||||||
|
|
||||||
|
1: ! Here y >= 2**32. Find y >> cl in [2**31, 2**32).
|
||||||
|
mov ebx,yl(esp) ! edx:ebx = y
|
||||||
|
bsr ecx,edx ! scan yh for highest set bit
|
||||||
|
incb cl ! cl bits from cl-1 to 0
|
||||||
|
shrd ebx,edx,cl ! ebx = y >> cl
|
||||||
|
|
||||||
|
! Estimate x / y as q = (x / (y >> cl)) >> cl.
|
||||||
|
xor edx,edx ! edx:eax = xh
|
||||||
|
div ebx ! eax = xh / (y >> cl)
|
||||||
|
push eax
|
||||||
|
mov eax,xl+4(esp) ! push moved xl to xl+4
|
||||||
|
div ebx
|
||||||
|
pop edx ! edx:eax = x / (y >> cl)
|
||||||
|
shrd eax,edx,cl ! eax = q
|
||||||
|
|
||||||
|
! Calculate the remainder x - y * q. If the subtraction
|
||||||
|
! overflows, then the correct quotient is q - 1, else it is q.
|
||||||
|
mov ebx,yh(esp)
|
||||||
|
imul ebx,eax ! ebx = yh * q
|
||||||
|
push eax
|
||||||
|
mul yl+4(esp) ! edx:eax = yl * q
|
||||||
|
add ebx,edx ! ebx:eax = y * q
|
||||||
|
mov edx,xl+4(esp)
|
||||||
|
mov ecx,xh+4(esp)
|
||||||
|
sub edx,eax
|
||||||
|
sbb ecx,ebx ! ecx:edx = remainder
|
||||||
|
pop eax ! eax = q
|
||||||
|
jnc 1f ! jump unless subtraction overflowed
|
||||||
|
dec eax ! fix quotient
|
||||||
|
add edx,yl(esp)
|
||||||
|
adc ecx,yh(esp) ! fix remainder
|
||||||
|
1: xor ebx,ebx ! ebx:eax = quotient
|
||||||
|
ret
|
|
@ -1,115 +1,37 @@
|
||||||
.sect .text; .sect .rom; .sect .data; .sect .bss
|
.sect .text; .sect .rom; .sect .data; .sect .bss
|
||||||
.sect .text
|
.sect .text
|
||||||
.define .dvi8, .dvu8
|
.define .dvi8
|
||||||
|
|
||||||
yl=8
|
yl=4
|
||||||
yh=12
|
yh=8
|
||||||
xl=16
|
xl=12
|
||||||
xh=20
|
xh=16
|
||||||
! .dvi8 and .dvu8 divide x = xh:xl by y = yh:yl,
|
! .dvi8 yields ebx:eax = quotient from x / y
|
||||||
! yield edx:eax = quotient, ecx:ebx = remainder.
|
|
||||||
|
|
||||||
.dvu8:
|
|
||||||
! Unsigned division: set di = 0 for non-negative quotient.
|
|
||||||
push edi
|
|
||||||
xor di,di
|
|
||||||
mov eax,xh(esp)
|
|
||||||
mov edx,yh(esp)
|
|
||||||
and edx,edx
|
|
||||||
jmp 7f
|
|
||||||
|
|
||||||
.dvi8:
|
.dvi8:
|
||||||
! Signed division: replace x and y with their absolute values.
|
xorb cl,cl ! cl = 0, non-negative result
|
||||||
! Set di = 1 for negative quotient, 0 for non-negative.
|
mov eax,xh(esp) ! eax for .divrem8
|
||||||
push edi
|
test eax,eax
|
||||||
xor di,di ! di = 0
|
jge 1f ! jump unless x < 0
|
||||||
mov eax,xh(esp)
|
incb cl ! cl = 1, negative result
|
||||||
and eax,eax
|
|
||||||
jns 1f
|
|
||||||
inc di ! di = 1
|
|
||||||
neg eax
|
neg eax
|
||||||
neg xl(esp)
|
neg xl(esp)
|
||||||
sbb eax,0 ! eax:xl = absolute value of x
|
sbb eax,0
|
||||||
1: mov edx,yh(esp)
|
mov xh(esp),eax ! x = absolute value
|
||||||
and edx,edx
|
1: mov edx,yh(esp) ! edx for .divrem8
|
||||||
jns 7f
|
test edx,edx ! flag z for .divrem8 when y >= 0
|
||||||
xor di,1 ! flip di
|
jge 1f ! jump unless y < 0
|
||||||
|
xorb cl,1 ! flip sign of result
|
||||||
neg edx
|
neg edx
|
||||||
neg yl(esp)
|
neg yl(esp)
|
||||||
sbb edx,0 ! edx:yl = absolute value of y
|
sbb edx,0 ! flag z for .divrem8 when y < 0
|
||||||
|
mov yh(esp),edx ! y = absolute value
|
||||||
7: ! Here .dvu8 joins .dvi8, eax = xh, edx = yh, flags test edx,
|
1: push ecx
|
||||||
! the values in xh(esp) and yh(esp) are garbage.
|
call .divrem8
|
||||||
jnz 8f ! jump if y >= 2**32
|
pop ecx
|
||||||
|
testb cl,cl
|
||||||
! x / y = x / yl = xh / yl + xl / yl = qh + (xl + rh) / yl
|
jz 1f ! jump unless result < 0
|
||||||
! where qh and rh are quotient, remainder from xh / yl.
|
|
||||||
mov ebx,yl(esp)
|
|
||||||
xor edx,edx ! edx:eax = xh
|
|
||||||
div ebx ! eax = qh, edx = rh
|
|
||||||
mov ecx,eax
|
|
||||||
mov eax,xl(esp)
|
|
||||||
div ebx ! eax = ql, edx = remainder
|
|
||||||
mov ebx,edx
|
|
||||||
mov edx,ecx ! edx:eax = quotient qh:ql
|
|
||||||
xor ecx,ecx ! ecx:ebx = remainder
|
|
||||||
|
|
||||||
9: ! Finally, if di != 0 then negate quotient, remainder.
|
|
||||||
and di,di
|
|
||||||
jz 1f
|
|
||||||
neg edx
|
|
||||||
neg eax
|
|
||||||
sbb edx,0 ! negate quotient edx:eax
|
|
||||||
neg ecx
|
|
||||||
neg ebx
|
neg ebx
|
||||||
sbb ecx,0 ! negate remainder ecx:ebx
|
neg eax
|
||||||
1: pop edi ! caller's edi
|
sbb ebx,0 ! negate quotient ebx:eax
|
||||||
ret 16
|
1: ret 16
|
||||||
|
|
||||||
8: ! We come here if y >= 2**32.
|
|
||||||
mov xh(esp),eax
|
|
||||||
mov yh(esp),edx
|
|
||||||
mov ebx,yl(esp) ! edx:ebx = y
|
|
||||||
|
|
||||||
! Estimate x / y as q = (x / (y >> cl)) >> cl,
|
|
||||||
! where 2**31 <= (y >> cl) < 2**32.
|
|
||||||
xor cx,cx
|
|
||||||
1: inc cx
|
|
||||||
shr edx,1
|
|
||||||
rcr ebx,1 ! edx:ebx = y >> cl
|
|
||||||
and edx,edx
|
|
||||||
jnz 1b ! loop until y >> cl fits in ebx
|
|
||||||
|
|
||||||
! x / (y >> cl) = qh + (x + rh) / (y >> cl)
|
|
||||||
push edi
|
|
||||||
xor edx,edx ! edx:eax = xh
|
|
||||||
div ebx ! eax = qh, edx = rh
|
|
||||||
mov edi,eax
|
|
||||||
mov eax,xl+4(esp) ! push edi moved xl to xl+4
|
|
||||||
div ebx ! edi:eax = x / (y >> cl)
|
|
||||||
|
|
||||||
! q = (x / (y >> cl)) >> cl = esi:eax >> cl
|
|
||||||
shr eax,cl
|
|
||||||
neg cx ! cl = (32 - cl) modulo 32
|
|
||||||
shl edi,cl
|
|
||||||
or eax,edi ! eax = q
|
|
||||||
|
|
||||||
! Calculate the remainder x - q * y. If the subtraction
|
|
||||||
! overflows, then the correct quotient is q - 1, else it is q.
|
|
||||||
mov ecx,yh+4(esp)
|
|
||||||
imul ecx,eax ! ecx = q * yh
|
|
||||||
mov edi,eax
|
|
||||||
mul yl+4(esp) ! edx:eax = q * yl
|
|
||||||
add edx,ecx ! edx:eax = q * y
|
|
||||||
mov ebx,xl+4(esp)
|
|
||||||
mov ecx,xh+4(esp) ! ecx:ebx = x
|
|
||||||
sub ebx,eax
|
|
||||||
sbb ecx,edx ! ecx:ebx = remainder
|
|
||||||
jnc 1f
|
|
||||||
dec edi ! fix quotient
|
|
||||||
add ebx,yl+4(esp)
|
|
||||||
adc ebx,yh+4(esp) ! fix remainder
|
|
||||||
1: mov eax,edi
|
|
||||||
xor edx,edx ! edx:eax = quotient
|
|
||||||
pop edi ! negative flag
|
|
||||||
jmp 9b
|
|
||||||
|
|
20
mach/i386/libem/dvu8.s
Normal file
20
mach/i386/libem/dvu8.s
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
.sect .text; .sect .rom; .sect .data; .sect .bss
|
||||||
|
.sect .text
|
||||||
|
.define .dvu8, .rmu8
|
||||||
|
|
||||||
|
yl=4
|
||||||
|
yh=8
|
||||||
|
xl=12
|
||||||
|
xh=16
|
||||||
|
! .dvu8 yields ebx:eax = quotient from x / y
|
||||||
|
! .rmu8 yields ecx:edx = remainder from x / y
|
||||||
|
|
||||||
|
.dvu8:
|
||||||
|
.rmu8:
|
||||||
|
mov edx,yh(esp)
|
||||||
|
test edx,edx
|
||||||
|
mov eax,xh(esp) ! prepare for .divrem8
|
||||||
|
push ebp ! move esp
|
||||||
|
call .divrem8
|
||||||
|
pop ebp ! move esp
|
||||||
|
ret 16
|
36
mach/i386/libem/rmi8.s
Normal file
36
mach/i386/libem/rmi8.s
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
.sect .text; .sect .rom; .sect .data; .sect .bss
|
||||||
|
.sect .text
|
||||||
|
.define .rmi8
|
||||||
|
|
||||||
|
yl=4
|
||||||
|
yh=8
|
||||||
|
xl=12
|
||||||
|
xh=16
|
||||||
|
! .rmi8 yields ecx:edx = remainder from x / y
|
||||||
|
|
||||||
|
.rmi8:
|
||||||
|
xorb cl,cl ! cl = 0, non-negative result
|
||||||
|
mov eax,xh(esp) ! eax for .divrem8
|
||||||
|
test eax,eax
|
||||||
|
jge 1f ! jump unless x < 0
|
||||||
|
incb cl ! cl = 1, negative result
|
||||||
|
neg eax
|
||||||
|
neg xl(esp)
|
||||||
|
sbb eax,0
|
||||||
|
mov xh(esp),eax ! x = absolute value
|
||||||
|
1: mov edx,yh(esp) ! edx for .divrem8
|
||||||
|
test edx,edx ! flag z for .divrem8 when y >= 0
|
||||||
|
jge 1f ! jump unless y < 0
|
||||||
|
neg edx
|
||||||
|
neg yl(esp)
|
||||||
|
sbb edx,0 ! flag z for .divrem8 when y < 0
|
||||||
|
mov yh(esp),edx ! y = absolute value
|
||||||
|
1: push ecx
|
||||||
|
call .divrem8
|
||||||
|
pop eax
|
||||||
|
testb al,al
|
||||||
|
jz 1f ! jump unless result < 0
|
||||||
|
neg ecx
|
||||||
|
neg edx
|
||||||
|
sbb ecx,0 ! negate remainder ecx:edx
|
||||||
|
1: ret 16
|
|
@ -1038,7 +1038,7 @@ with noacc ACC
|
||||||
|
|
||||||
pat dvi $1==8
|
pat dvi $1==8
|
||||||
kills ALL
|
kills ALL
|
||||||
gen proccall {label,".dvi8"} yields edx eax
|
gen proccall {label,".dvi8"} yields ebx eax
|
||||||
|
|
||||||
/*
|
/*
|
||||||
pat dvi !defined($1)
|
pat dvi !defined($1)
|
||||||
|
@ -1055,7 +1055,7 @@ with noacc ACC
|
||||||
|
|
||||||
pat rmi $1==8
|
pat rmi $1==8
|
||||||
kills ALL
|
kills ALL
|
||||||
gen proccall {label,".dvi8"} yields ecx ebx
|
gen proccall {label,".rmi8"} yields ecx edx
|
||||||
|
|
||||||
/*
|
/*
|
||||||
pat rmi !defined($1)
|
pat rmi !defined($1)
|
||||||
|
@ -1202,7 +1202,7 @@ gen div %1 yields eax
|
||||||
|
|
||||||
pat dvu $1==8
|
pat dvu $1==8
|
||||||
kills ALL
|
kills ALL
|
||||||
gen proccall {label,".dvu8"} yields edx eax
|
gen proccall {label,".dvu8"} yields ebx eax
|
||||||
|
|
||||||
/*
|
/*
|
||||||
pat dvu !defined($1)
|
pat dvu !defined($1)
|
||||||
|
@ -1218,7 +1218,7 @@ gen div %1 yields edx
|
||||||
|
|
||||||
pat rmu $1==8
|
pat rmu $1==8
|
||||||
kills ALL
|
kills ALL
|
||||||
gen proccall {label,".dvu8"} yields ecx ebx
|
gen proccall {label,".rmu8"} yields ecx edx
|
||||||
|
|
||||||
/*
|
/*
|
||||||
pat rmu !defined($1)
|
pat rmu !defined($1)
|
||||||
|
|
|
@ -4,7 +4,7 @@ definerule("plat_testsuite",
|
||||||
{
|
{
|
||||||
plat = { type="string" },
|
plat = { type="string" },
|
||||||
method = { type="string" },
|
method = { type="string" },
|
||||||
-- added long-long/llbitset_e.c
|
-- added long-long/lldivrem_e.c
|
||||||
sets = { type="table", default={"core", "b", "bugs", "m2", "floats", "long-long"}},
|
sets = { type="table", default={"core", "b", "bugs", "m2", "floats", "long-long"}},
|
||||||
skipsets = { type="table", default={}},
|
skipsets = { type="table", default={}},
|
||||||
tests = { type="targets", default={} },
|
tests = { type="targets", default={} },
|
||||||
|
|
71
tests/plat/long-long/lldivrem_e.c
Normal file
71
tests/plat/long-long/lldivrem_e.c
Normal file
|
@ -0,0 +1,71 @@
|
||||||
|
#include "test.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test division and remainder. Failure code will look like
|
||||||
|
* - 0x3d = id 0x3, 'd' for division
|
||||||
|
* - 0x3e = id 0x3, 'e' for remainder
|
||||||
|
*/
|
||||||
|
|
||||||
|
struct s_divrem {
|
||||||
|
unsigned int id;
|
||||||
|
long long a;
|
||||||
|
long long b;
|
||||||
|
long long a_div_b; /* a / b */
|
||||||
|
long long a_rem_b; /* a % b */
|
||||||
|
} s_cases[] = {
|
||||||
|
{0x1, 310LL, 100LL, 3LL, 10LL},
|
||||||
|
{0x2, 310LL, -100LL, -3LL, 10LL},
|
||||||
|
{0x3, -310LL, 100LL, -3LL, -10LL},
|
||||||
|
{0x4, -310LL, -100LL, 3LL, -10LL},
|
||||||
|
{0x5, 3000000000000010LL, 100LL, 30000000000000LL, 10LL},
|
||||||
|
{0x6, 3000000000000010LL, -100LL, -30000000000000LL, 10LL},
|
||||||
|
{0x7, -3000000000000010LL, 100LL, -30000000000000LL, -10LL},
|
||||||
|
{0x8, -3000000000000010LL, -100LL, 30000000000000LL, -10LL},
|
||||||
|
{0x9, 3000000000000010LL, 1000000000000LL, 3000LL, 10LL},
|
||||||
|
{0xa, 3000000000000010LL, -1000000000000LL, -3000LL, 10LL},
|
||||||
|
{0xb, -3000000000000010LL, 1000000000000LL, -3000LL, -10LL},
|
||||||
|
{0xc, -3000000000000010LL, -1000000000000LL, 3000LL, -10LL},
|
||||||
|
/*
|
||||||
|
* In next 3 cases, i386 tries (a / (b >> 13)) >> 13 = 8,
|
||||||
|
* may need to correct the quotient from 8 to 7.
|
||||||
|
*/
|
||||||
|
{0x11, 0x864200000000LL, 0x10c840000000LL, 8LL, 0LL},
|
||||||
|
{0x12, 0x864200000000LL, 0x10c840000001LL, 7LL, 0x10c83ffffff9LL},
|
||||||
|
{0x13, 0x864200000000LL, 0x10c840001fffLL, 7LL, 0x10c83fff2007LL},
|
||||||
|
};
|
||||||
|
|
||||||
|
struct u_divrem {
|
||||||
|
unsigned int id;
|
||||||
|
unsigned long long a;
|
||||||
|
unsigned long long b;
|
||||||
|
unsigned long long a_div_b;
|
||||||
|
unsigned long long a_rem_b;
|
||||||
|
} u_cases[] = {
|
||||||
|
{0x81, 310ULL, 100ULL, 3ULL, 10ULL},
|
||||||
|
{0x82, 3000000000000010ULL, 100ULL, 30000000000000ULL, 10ULL},
|
||||||
|
{0x83, 3000000000000010ULL, 1000000000000ULL, 3000ULL, 10ULL},
|
||||||
|
{0x91, 0x8000000000000000ULL, 3ULL, 0x2aaaaaaaaaaaaaaaULL, 2ULL},
|
||||||
|
{0x92, 0xffffffffffffffffULL, 3ULL, 0x5555555555555555ULL, 0ULL},
|
||||||
|
};
|
||||||
|
|
||||||
|
#define LEN(ary) (sizeof(ary) / sizeof(ary[0]))
|
||||||
|
|
||||||
|
void _m_a_i_n(void) {
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < LEN(s_cases); i++) {
|
||||||
|
struct s_divrem *s = &s_cases[i];
|
||||||
|
if (s->a / s->b != s->a_div_b)
|
||||||
|
fail((s->id << 4) | 0xd);
|
||||||
|
if (s->a % s->b != s->a_rem_b)
|
||||||
|
fail((s->id << 4) | 0xe);
|
||||||
|
}
|
||||||
|
for (i = 0; i < LEN(u_cases); i++) {
|
||||||
|
struct u_divrem *u = &u_cases[i];
|
||||||
|
if (u->a / u->b != u->a_div_b)
|
||||||
|
fail((u->id << 4) | 0xd);
|
||||||
|
if (u->a % u->b != u->a_rem_b)
|
||||||
|
fail((u->id << 4) | 0xe);
|
||||||
|
}
|
||||||
|
finished();
|
||||||
|
}
|
Loading…
Reference in a new issue