Test long long division and remainder; fix i386.
My i386 code from 893df4b
gave the wrong sign to some 8-byte
remainders. Fix by splitting .dvi8 and .rmi8 so each has its own code
to pick the sign. They and .dvu8 and .rmu8 share a private sub
.divrem8 for unsigned division.
Improve the i386 code by using instructions like _bsr_ and _shrd_.
Change the helpers to yield a quotient in ebx:eax or a remainder in
ecx:edx; this seems more convenient, because _div_ puts its quotient
in eax and remainder in edx.
This commit is contained in:
parent
12457f6385
commit
f6a1e08218
8 changed files with 223 additions and 111 deletions
|
@ -1,7 +1,7 @@
|
|||
for _, plat in ipairs(vars.plats) do
|
||||
acklibrary {
|
||||
name = "lib_"..plat,
|
||||
srcs = { "./*.s" }, -- dvi8.s
|
||||
srcs = { "./*.s" }, -- divrem8.s
|
||||
vars = { plat = plat },
|
||||
}
|
||||
end
|
||||
|
|
63
mach/i386/libem/divrem8.s
Normal file
63
mach/i386/libem/divrem8.s
Normal file
|
@ -0,0 +1,63 @@
|
|||
.sect .text; .sect .rom; .sect .data; .sect .bss
|
||||
.sect .text
|
||||
.define .divrem8
|
||||
|
||||
yl=12
|
||||
yh=16
|
||||
xl=20
|
||||
xh=24
|
||||
! This private sub for .dvi8, .dvu8, .rmi8, .rmu8
|
||||
! does unsigned division of x = xh:xl by y = yh:yl,
|
||||
! yields ebx:eax = quotient, ecx:edx = remainder.
|
||||
|
||||
.divrem8:
|
||||
! Caller must set eax, edx, flag z like so:
|
||||
! mov edx,yh(esp)
|
||||
! test edx,edx
|
||||
! mov eax,xh(esp)
|
||||
jnz 1f ! jump if y >= 2**32
|
||||
|
||||
! y = yl, so x / y = xh:xl / yl = qh:0 + (xl + rh) / yl
|
||||
! where qh, rh are quotient, remainder from xh / yl.
|
||||
mov ecx,yl(esp)
|
||||
xor edx,edx ! edx:eax = xh
|
||||
div ecx ! eax = qh, edx = rh
|
||||
mov ebx,eax
|
||||
mov eax,xl(esp) ! edx:eax = qh:xl
|
||||
div ecx ! ebx:eax = qh:ql = quotient
|
||||
xor ecx,ecx ! ecx:edx = 0:rl = remainder
|
||||
ret
|
||||
|
||||
1: ! Here y >= 2**32. Find y >> cl in [2**31, 2**32).
|
||||
mov ebx,yl(esp) ! edx:ebx = y
|
||||
bsr ecx,edx ! scan yh for highest set bit
|
||||
incb cl ! cl bits from cl-1 to 0
|
||||
shrd ebx,edx,cl ! ebx = y >> cl
|
||||
|
||||
! Estimate x / y as q = (x / (y >> cl)) >> cl.
|
||||
xor edx,edx ! edx:eax = xh
|
||||
div ebx ! eax = xh / (y >> cl)
|
||||
push eax
|
||||
mov eax,xl+4(esp) ! push moved xl to xl+4
|
||||
div ebx
|
||||
pop edx ! edx:eax = x / (y >> cl)
|
||||
shrd eax,edx,cl ! eax = q
|
||||
|
||||
! Calculate the remainder x - y * q. If the subtraction
|
||||
! overflows, then the correct quotient is q - 1, else it is q.
|
||||
mov ebx,yh(esp)
|
||||
imul ebx,eax ! ebx = yh * q
|
||||
push eax
|
||||
mul yl+4(esp) ! edx:eax = yl * q
|
||||
add ebx,edx ! ebx:eax = y * q
|
||||
mov edx,xl+4(esp)
|
||||
mov ecx,xh+4(esp)
|
||||
sub edx,eax
|
||||
sbb ecx,ebx ! ecx:edx = remainder
|
||||
pop eax ! eax = q
|
||||
jnc 1f ! jump unless subtraction overflowed
|
||||
dec eax ! fix quotient
|
||||
add edx,yl(esp)
|
||||
adc ecx,yh(esp) ! fix remainder
|
||||
1: xor ebx,ebx ! ebx:eax = quotient
|
||||
ret
|
|
@ -1,115 +1,37 @@
|
|||
.sect .text; .sect .rom; .sect .data; .sect .bss
|
||||
.sect .text
|
||||
.define .dvi8, .dvu8
|
||||
.define .dvi8
|
||||
|
||||
yl=8
|
||||
yh=12
|
||||
xl=16
|
||||
xh=20
|
||||
! .dvi8 and .dvu8 divide x = xh:xl by y = yh:yl,
|
||||
! yield edx:eax = quotient, ecx:ebx = remainder.
|
||||
|
||||
.dvu8:
|
||||
! Unsigned division: set di = 0 for non-negative quotient.
|
||||
push edi
|
||||
xor di,di
|
||||
mov eax,xh(esp)
|
||||
mov edx,yh(esp)
|
||||
and edx,edx
|
||||
jmp 7f
|
||||
yl=4
|
||||
yh=8
|
||||
xl=12
|
||||
xh=16
|
||||
! .dvi8 yields ebx:eax = quotient from x / y
|
||||
|
||||
.dvi8:
|
||||
! Signed division: replace x and y with their absolute values.
|
||||
! Set di = 1 for negative quotient, 0 for non-negative.
|
||||
push edi
|
||||
xor di,di ! di = 0
|
||||
mov eax,xh(esp)
|
||||
and eax,eax
|
||||
jns 1f
|
||||
inc di ! di = 1
|
||||
xorb cl,cl ! cl = 0, non-negative result
|
||||
mov eax,xh(esp) ! eax for .divrem8
|
||||
test eax,eax
|
||||
jge 1f ! jump unless x < 0
|
||||
incb cl ! cl = 1, negative result
|
||||
neg eax
|
||||
neg xl(esp)
|
||||
sbb eax,0 ! eax:xl = absolute value of x
|
||||
1: mov edx,yh(esp)
|
||||
and edx,edx
|
||||
jns 7f
|
||||
xor di,1 ! flip di
|
||||
sbb eax,0
|
||||
mov xh(esp),eax ! x = absolute value
|
||||
1: mov edx,yh(esp) ! edx for .divrem8
|
||||
test edx,edx ! flag z for .divrem8 when y >= 0
|
||||
jge 1f ! jump unless y < 0
|
||||
xorb cl,1 ! flip sign of result
|
||||
neg edx
|
||||
neg yl(esp)
|
||||
sbb edx,0 ! edx:yl = absolute value of y
|
||||
|
||||
7: ! Here .dvu8 joins .dvi8, eax = xh, edx = yh, flags test edx,
|
||||
! the values in xh(esp) and yh(esp) are garbage.
|
||||
jnz 8f ! jump if y >= 2**32
|
||||
|
||||
! x / y = x / yl = xh / yl + xl / yl = qh + (xl + rh) / yl
|
||||
! where qh and rh are quotient, remainder from xh / yl.
|
||||
mov ebx,yl(esp)
|
||||
xor edx,edx ! edx:eax = xh
|
||||
div ebx ! eax = qh, edx = rh
|
||||
mov ecx,eax
|
||||
mov eax,xl(esp)
|
||||
div ebx ! eax = ql, edx = remainder
|
||||
mov ebx,edx
|
||||
mov edx,ecx ! edx:eax = quotient qh:ql
|
||||
xor ecx,ecx ! ecx:ebx = remainder
|
||||
|
||||
9: ! Finally, if di != 0 then negate quotient, remainder.
|
||||
and di,di
|
||||
jz 1f
|
||||
neg edx
|
||||
neg eax
|
||||
sbb edx,0 ! negate quotient edx:eax
|
||||
neg ecx
|
||||
sbb edx,0 ! flag z for .divrem8 when y < 0
|
||||
mov yh(esp),edx ! y = absolute value
|
||||
1: push ecx
|
||||
call .divrem8
|
||||
pop ecx
|
||||
testb cl,cl
|
||||
jz 1f ! jump unless result < 0
|
||||
neg ebx
|
||||
sbb ecx,0 ! negate remainder ecx:ebx
|
||||
1: pop edi ! caller's edi
|
||||
ret 16
|
||||
|
||||
8: ! We come here if y >= 2**32.
|
||||
mov xh(esp),eax
|
||||
mov yh(esp),edx
|
||||
mov ebx,yl(esp) ! edx:ebx = y
|
||||
|
||||
! Estimate x / y as q = (x / (y >> cl)) >> cl,
|
||||
! where 2**31 <= (y >> cl) < 2**32.
|
||||
xor cx,cx
|
||||
1: inc cx
|
||||
shr edx,1
|
||||
rcr ebx,1 ! edx:ebx = y >> cl
|
||||
and edx,edx
|
||||
jnz 1b ! loop until y >> cl fits in ebx
|
||||
|
||||
! x / (y >> cl) = qh + (x + rh) / (y >> cl)
|
||||
push edi
|
||||
xor edx,edx ! edx:eax = xh
|
||||
div ebx ! eax = qh, edx = rh
|
||||
mov edi,eax
|
||||
mov eax,xl+4(esp) ! push edi moved xl to xl+4
|
||||
div ebx ! edi:eax = x / (y >> cl)
|
||||
|
||||
! q = (x / (y >> cl)) >> cl = esi:eax >> cl
|
||||
shr eax,cl
|
||||
neg cx ! cl = (32 - cl) modulo 32
|
||||
shl edi,cl
|
||||
or eax,edi ! eax = q
|
||||
|
||||
! Calculate the remainder x - q * y. If the subtraction
|
||||
! overflows, then the correct quotient is q - 1, else it is q.
|
||||
mov ecx,yh+4(esp)
|
||||
imul ecx,eax ! ecx = q * yh
|
||||
mov edi,eax
|
||||
mul yl+4(esp) ! edx:eax = q * yl
|
||||
add edx,ecx ! edx:eax = q * y
|
||||
mov ebx,xl+4(esp)
|
||||
mov ecx,xh+4(esp) ! ecx:ebx = x
|
||||
sub ebx,eax
|
||||
sbb ecx,edx ! ecx:ebx = remainder
|
||||
jnc 1f
|
||||
dec edi ! fix quotient
|
||||
add ebx,yl+4(esp)
|
||||
adc ebx,yh+4(esp) ! fix remainder
|
||||
1: mov eax,edi
|
||||
xor edx,edx ! edx:eax = quotient
|
||||
pop edi ! negative flag
|
||||
jmp 9b
|
||||
neg eax
|
||||
sbb ebx,0 ! negate quotient ebx:eax
|
||||
1: ret 16
|
||||
|
|
20
mach/i386/libem/dvu8.s
Normal file
20
mach/i386/libem/dvu8.s
Normal file
|
@ -0,0 +1,20 @@
|
|||
.sect .text; .sect .rom; .sect .data; .sect .bss
|
||||
.sect .text
|
||||
.define .dvu8, .rmu8
|
||||
|
||||
yl=4
|
||||
yh=8
|
||||
xl=12
|
||||
xh=16
|
||||
! .dvu8 yields ebx:eax = quotient from x / y
|
||||
! .rmu8 yields ecx:edx = remainder from x / y
|
||||
|
||||
.dvu8:
|
||||
.rmu8:
|
||||
mov edx,yh(esp)
|
||||
test edx,edx
|
||||
mov eax,xh(esp) ! prepare for .divrem8
|
||||
push ebp ! move esp
|
||||
call .divrem8
|
||||
pop ebp ! move esp
|
||||
ret 16
|
36
mach/i386/libem/rmi8.s
Normal file
36
mach/i386/libem/rmi8.s
Normal file
|
@ -0,0 +1,36 @@
|
|||
.sect .text; .sect .rom; .sect .data; .sect .bss
|
||||
.sect .text
|
||||
.define .rmi8
|
||||
|
||||
yl=4
|
||||
yh=8
|
||||
xl=12
|
||||
xh=16
|
||||
! .rmi8 yields ecx:edx = remainder from x / y
|
||||
|
||||
.rmi8:
|
||||
xorb cl,cl ! cl = 0, non-negative result
|
||||
mov eax,xh(esp) ! eax for .divrem8
|
||||
test eax,eax
|
||||
jge 1f ! jump unless x < 0
|
||||
incb cl ! cl = 1, negative result
|
||||
neg eax
|
||||
neg xl(esp)
|
||||
sbb eax,0
|
||||
mov xh(esp),eax ! x = absolute value
|
||||
1: mov edx,yh(esp) ! edx for .divrem8
|
||||
test edx,edx ! flag z for .divrem8 when y >= 0
|
||||
jge 1f ! jump unless y < 0
|
||||
neg edx
|
||||
neg yl(esp)
|
||||
sbb edx,0 ! flag z for .divrem8 when y < 0
|
||||
mov yh(esp),edx ! y = absolute value
|
||||
1: push ecx
|
||||
call .divrem8
|
||||
pop eax
|
||||
testb al,al
|
||||
jz 1f ! jump unless result < 0
|
||||
neg ecx
|
||||
neg edx
|
||||
sbb ecx,0 ! negate remainder ecx:edx
|
||||
1: ret 16
|
|
@ -1038,7 +1038,7 @@ with noacc ACC
|
|||
|
||||
pat dvi $1==8
|
||||
kills ALL
|
||||
gen proccall {label,".dvi8"} yields edx eax
|
||||
gen proccall {label,".dvi8"} yields ebx eax
|
||||
|
||||
/*
|
||||
pat dvi !defined($1)
|
||||
|
@ -1055,7 +1055,7 @@ with noacc ACC
|
|||
|
||||
pat rmi $1==8
|
||||
kills ALL
|
||||
gen proccall {label,".dvi8"} yields ecx ebx
|
||||
gen proccall {label,".rmi8"} yields ecx edx
|
||||
|
||||
/*
|
||||
pat rmi !defined($1)
|
||||
|
@ -1202,7 +1202,7 @@ gen div %1 yields eax
|
|||
|
||||
pat dvu $1==8
|
||||
kills ALL
|
||||
gen proccall {label,".dvu8"} yields edx eax
|
||||
gen proccall {label,".dvu8"} yields ebx eax
|
||||
|
||||
/*
|
||||
pat dvu !defined($1)
|
||||
|
@ -1218,7 +1218,7 @@ gen div %1 yields edx
|
|||
|
||||
pat rmu $1==8
|
||||
kills ALL
|
||||
gen proccall {label,".dvu8"} yields ecx ebx
|
||||
gen proccall {label,".rmu8"} yields ecx edx
|
||||
|
||||
/*
|
||||
pat rmu !defined($1)
|
||||
|
|
|
@ -4,7 +4,7 @@ definerule("plat_testsuite",
|
|||
{
|
||||
plat = { type="string" },
|
||||
method = { type="string" },
|
||||
-- added long-long/llbitset_e.c
|
||||
-- added long-long/lldivrem_e.c
|
||||
sets = { type="table", default={"core", "b", "bugs", "m2", "floats", "long-long"}},
|
||||
skipsets = { type="table", default={}},
|
||||
tests = { type="targets", default={} },
|
||||
|
|
71
tests/plat/long-long/lldivrem_e.c
Normal file
71
tests/plat/long-long/lldivrem_e.c
Normal file
|
@ -0,0 +1,71 @@
|
|||
#include "test.h"
|
||||
|
||||
/*
|
||||
* Test division and remainder. Failure code will look like
|
||||
* - 0x3d = id 0x3, 'd' for division
|
||||
* - 0x3e = id 0x3, 'e' for remainder
|
||||
*/
|
||||
|
||||
struct s_divrem {
|
||||
unsigned int id;
|
||||
long long a;
|
||||
long long b;
|
||||
long long a_div_b; /* a / b */
|
||||
long long a_rem_b; /* a % b */
|
||||
} s_cases[] = {
|
||||
{0x1, 310LL, 100LL, 3LL, 10LL},
|
||||
{0x2, 310LL, -100LL, -3LL, 10LL},
|
||||
{0x3, -310LL, 100LL, -3LL, -10LL},
|
||||
{0x4, -310LL, -100LL, 3LL, -10LL},
|
||||
{0x5, 3000000000000010LL, 100LL, 30000000000000LL, 10LL},
|
||||
{0x6, 3000000000000010LL, -100LL, -30000000000000LL, 10LL},
|
||||
{0x7, -3000000000000010LL, 100LL, -30000000000000LL, -10LL},
|
||||
{0x8, -3000000000000010LL, -100LL, 30000000000000LL, -10LL},
|
||||
{0x9, 3000000000000010LL, 1000000000000LL, 3000LL, 10LL},
|
||||
{0xa, 3000000000000010LL, -1000000000000LL, -3000LL, 10LL},
|
||||
{0xb, -3000000000000010LL, 1000000000000LL, -3000LL, -10LL},
|
||||
{0xc, -3000000000000010LL, -1000000000000LL, 3000LL, -10LL},
|
||||
/*
|
||||
* In next 3 cases, i386 tries (a / (b >> 13)) >> 13 = 8,
|
||||
* may need to correct the quotient from 8 to 7.
|
||||
*/
|
||||
{0x11, 0x864200000000LL, 0x10c840000000LL, 8LL, 0LL},
|
||||
{0x12, 0x864200000000LL, 0x10c840000001LL, 7LL, 0x10c83ffffff9LL},
|
||||
{0x13, 0x864200000000LL, 0x10c840001fffLL, 7LL, 0x10c83fff2007LL},
|
||||
};
|
||||
|
||||
struct u_divrem {
|
||||
unsigned int id;
|
||||
unsigned long long a;
|
||||
unsigned long long b;
|
||||
unsigned long long a_div_b;
|
||||
unsigned long long a_rem_b;
|
||||
} u_cases[] = {
|
||||
{0x81, 310ULL, 100ULL, 3ULL, 10ULL},
|
||||
{0x82, 3000000000000010ULL, 100ULL, 30000000000000ULL, 10ULL},
|
||||
{0x83, 3000000000000010ULL, 1000000000000ULL, 3000ULL, 10ULL},
|
||||
{0x91, 0x8000000000000000ULL, 3ULL, 0x2aaaaaaaaaaaaaaaULL, 2ULL},
|
||||
{0x92, 0xffffffffffffffffULL, 3ULL, 0x5555555555555555ULL, 0ULL},
|
||||
};
|
||||
|
||||
#define LEN(ary) (sizeof(ary) / sizeof(ary[0]))
|
||||
|
||||
void _m_a_i_n(void) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < LEN(s_cases); i++) {
|
||||
struct s_divrem *s = &s_cases[i];
|
||||
if (s->a / s->b != s->a_div_b)
|
||||
fail((s->id << 4) | 0xd);
|
||||
if (s->a % s->b != s->a_rem_b)
|
||||
fail((s->id << 4) | 0xe);
|
||||
}
|
||||
for (i = 0; i < LEN(u_cases); i++) {
|
||||
struct u_divrem *u = &u_cases[i];
|
||||
if (u->a / u->b != u->a_div_b)
|
||||
fail((u->id << 4) | 0xd);
|
||||
if (u->a % u->b != u->a_rem_b)
|
||||
fail((u->id << 4) | 0xe);
|
||||
}
|
||||
finished();
|
||||
}
|
Loading…
Reference in a new issue