Test long long division and remainder; fix i386.

My i386 code from 893df4b gave the wrong sign to some 8-byte remainders. Fix by splitting .dvi8 and .rmi8 so each has its own code to pick the sign. They and .dvu8 and .rmu8 share a private sub .divrem8 for unsigned division. Improve the i386 code by using instructions like _bsr_ and _shrd_. Change the helpers to yield a quotient in ebx:eax or a remainder in ecx:edx; this seems more convenient, because _div_ puts its quotient in eax and remainder in edx.
2019-09-16 20:19:36 -04:00 · 2019-09-16 20:19:36 -04:00 · f6a1e08218
commit f6a1e08218
parent 12457f6385
8 changed files with 223 additions and 111 deletions
--- a/mach/i386/libem/build.lua
+++ b/mach/i386/libem/build.lua
@ -1,7 +1,7 @@
 for _, plat in ipairs(vars.plats) do
 	acklibrary {
 		name = "lib_"..plat,
-		srcs = { "./*.s" }, -- dvi8.s
+		srcs = { "./*.s" }, -- divrem8.s
 		vars = { plat = plat },
 	}
 end
--- a/mach/i386/libem/divrem8.s
+++ b/mach/i386/libem/divrem8.s
@ -0,0 +1,63 @@
 .sect .text; .sect .rom; .sect .data; .sect .bss
 .sect .text
 .define .divrem8
 yl=12
 yh=16
 xl=20
 xh=24
 	! This private sub for .dvi8, .dvu8, .rmi8, .rmu8
 	! does unsigned division of x = xh:xl by y = yh:yl,
 	! yields ebx:eax = quotient, ecx:edx = remainder.
 .divrem8:
 	! Caller must set eax, edx, flag z like so:
 	! mov	edx,yh(esp)
 	! test	edx,edx
 	! mov	eax,xh(esp)
 	jnz	1f		! jump if y >= 2**32
 	! y = yl, so x / y = xh:xl / yl = qh:0 + (xl + rh) / yl
 	! where qh, rh are quotient, remainder from xh / yl.
 	mov	ecx,yl(esp)
 	xor	edx,edx		! edx:eax = xh
 	div	ecx		! eax = qh, edx = rh
 	mov	ebx,eax
 	mov	eax,xl(esp)	! edx:eax = qh:xl
 	div	ecx		! ebx:eax = qh:ql = quotient
 	xor	ecx,ecx		! ecx:edx =  0:rl = remainder
 	ret
 1:	! Here y >= 2**32.  Find y >> cl in [2**31, 2**32).
 	mov	ebx,yl(esp)	! edx:ebx = y
 	bsr	ecx,edx		! scan yh for highest set bit
 	incb	cl		! cl bits from cl-1 to 0
 	shrd	ebx,edx,cl	! ebx = y >> cl
 	! Estimate x / y as q = (x / (y >> cl)) >> cl.
 	xor	edx,edx		! edx:eax = xh
 	div	ebx		! eax = xh / (y >> cl)
 	push	eax
 	mov	eax,xl+4(esp)	! push moved xl to xl+4
 	div	ebx
 	pop	edx		! edx:eax = x / (y >> cl)
 	shrd	eax,edx,cl	! eax = q
 	! Calculate the remainder x - y * q.  If the subtraction
 	! overflows, then the correct quotient is q - 1, else it is q.
 	mov	ebx,yh(esp)
 	imul	ebx,eax		! ebx = yh * q
 	push	eax
 	mul	yl+4(esp)	! edx:eax = yl * q
 	add	ebx,edx		! ebx:eax = y * q
 	mov	edx,xl+4(esp)
 	mov	ecx,xh+4(esp)
 	sub	edx,eax
 	sbb	ecx,ebx		! ecx:edx = remainder
 	pop	eax		! eax = q
 	jnc	1f		! jump unless subtraction overflowed
 	dec	eax		! fix quotient
 	add	edx,yl(esp)
 	adc	ecx,yh(esp)	! fix remainder
 1:	xor	ebx,ebx		! ebx:eax = quotient
 	ret
--- a/mach/i386/libem/dvi8.s
+++ b/mach/i386/libem/dvi8.s
@ -1,115 +1,37 @@
 .sect .text; .sect .rom; .sect .data; .sect .bss
 .sect .text
-.define .dvi8, .dvu8
+.define .dvi8
-yl=8
+yl=4
-yh=12
+yh=8
-xl=16
+xl=12
-xh=20
+xh=16
-	! .dvi8 and .dvu8 divide x = xh:xl by y = yh:yl,
+	! .dvi8 yields ebx:eax = quotient from x / y
 	! yield edx:eax = quotient, ecx:ebx = remainder.
 .dvu8:
 	! Unsigned division: set di = 0 for non-negative quotient.
 	push	edi
 	xor	di,di
 	mov	eax,xh(esp)
 	mov	edx,yh(esp)
 	and	edx,edx
 	jmp	7f
 .dvi8:
-	! Signed division: replace x and y with their absolute values.
+	xorb	cl,cl		! cl = 0, non-negative result
-	! Set di = 1 for negative quotient, 0 for non-negative.
+	mov	eax,xh(esp)	! eax for .divrem8
-	push	edi
+	test	eax,eax
-	xor	di,di		! di = 0
+	jge	1f		! jump unless x < 0
-	mov	eax,xh(esp)
+	incb	cl		! cl = 1, negative result
 	and	eax,eax
 	jns	1f
 	inc	di		! di = 1
 	neg	eax
 	neg	xl(esp)
-	sbb	eax,0		! eax:xl = absolute value of x
+	sbb	eax,0
-1:	mov	edx,yh(esp)
+	mov	xh(esp),eax	! x = absolute value
-	and	edx,edx
+1:	mov	edx,yh(esp)	! edx for .divrem8
-	jns	7f
+	test	edx,edx		! flag z for .divrem8 when y >= 0
-	xor	di,1		! flip di
+	jge	1f		! jump unless y < 0
 	xorb	cl,1		! flip sign of result
 	neg	edx
 	neg	yl(esp)
-	sbb	edx,0		! edx:yl = absolute value of y
+	sbb	edx,0		! flag z for .divrem8 when y < 0
-
+	mov	yh(esp),edx	! y = absolute value
-7:	! Here .dvu8 joins .dvi8, eax = xh, edx = yh, flags test edx,
+1:	push	ecx
-	! the values in xh(esp) and yh(esp) are garbage.
+	call	.divrem8
-	jnz	8f		! jump if y >= 2**32
+	pop	ecx
-
+	testb	cl,cl
-	! x / y = x / yl = xh / yl + xl / yl = qh + (xl + rh) / yl
+	jz	1f		! jump unless result < 0
 	! where qh and rh are quotient, remainder from xh / yl.
 	mov	ebx,yl(esp)
 	xor	edx,edx		! edx:eax = xh
 	div	ebx		! eax = qh, edx = rh
 	mov	ecx,eax
 	mov	eax,xl(esp)
 	div	ebx		! eax = ql, edx = remainder
 	mov	ebx,edx
 	mov	edx,ecx		! edx:eax = quotient qh:ql
 	xor	ecx,ecx		! ecx:ebx = remainder
 9:	! Finally, if di != 0 then negate quotient, remainder.
 	and	di,di
 	jz	1f
 	neg	edx
 	neg	eax
 	sbb	edx,0		! negate quotient edx:eax
 	neg	ecx
 	neg	ebx
-	sbb	ecx,0		! negate remainder ecx:ebx
+	neg	eax
-1:	pop	edi		! caller's edi
+	sbb	ebx,0		! negate quotient ebx:eax
-	ret	16
+1:	ret	16
 8:	! We come here if y >= 2**32.
 	mov	xh(esp),eax
 	mov	yh(esp),edx
 	mov	ebx,yl(esp)	! edx:ebx = y
 	! Estimate x / y as q = (x / (y >> cl)) >> cl,
 	! where 2**31 <= (y >> cl) < 2**32.
 	xor	cx,cx
 1:	inc	cx
 	shr	edx,1
 	rcr	ebx,1		! edx:ebx = y >> cl
 	and	edx,edx
 	jnz	1b		! loop until y >> cl fits in ebx
 	! x / (y >> cl) = qh + (x + rh) / (y >> cl)
 	push	edi
 	xor	edx,edx		! edx:eax = xh
 	div	ebx		! eax = qh, edx = rh
 	mov	edi,eax
 	mov	eax,xl+4(esp)	! push edi moved xl to xl+4
 	div	ebx		! edi:eax = x / (y >> cl)
 	! q = (x / (y >> cl)) >> cl = esi:eax >> cl
 	shr	eax,cl
 	neg	cx		! cl = (32 - cl) modulo 32
 	shl	edi,cl
 	or	eax,edi		! eax = q
 	! Calculate the remainder x - q * y.  If the subtraction
 	! overflows, then the correct quotient is q - 1, else it is q.
 	mov	ecx,yh+4(esp)
 	imul	ecx,eax		! ecx = q * yh
 	mov	edi,eax
 	mul	yl+4(esp)	! edx:eax = q * yl
 	add	edx,ecx		! edx:eax = q * y
 	mov	ebx,xl+4(esp)
 	mov	ecx,xh+4(esp)	! ecx:ebx = x
 	sub	ebx,eax
 	sbb	ecx,edx		! ecx:ebx = remainder
 	jnc	1f
 	dec	edi		! fix quotient
 	add	ebx,yl+4(esp)
 	adc	ebx,yh+4(esp)	! fix remainder
 1:	mov	eax,edi
 	xor	edx,edx		! edx:eax = quotient
 	pop	edi		! negative flag
 	jmp	9b
--- a/mach/i386/libem/dvu8.s
+++ b/mach/i386/libem/dvu8.s
@ -0,0 +1,20 @@
 .sect .text; .sect .rom; .sect .data; .sect .bss
 .sect .text
 .define .dvu8, .rmu8
 yl=4
 yh=8
 xl=12
 xh=16
 	! .dvu8 yields ebx:eax = quotient from x / y
 	! .rmu8 yields ecx:edx = remainder from x / y
 .dvu8:
 .rmu8:	
 	mov	edx,yh(esp)
 	test	edx,edx
 	mov	eax,xh(esp)	! prepare for .divrem8
 	push	ebp		! move esp
 	call	.divrem8
 	pop	ebp		! move esp
 	ret	16
--- a/mach/i386/libem/rmi8.s
+++ b/mach/i386/libem/rmi8.s
@ -0,0 +1,36 @@
 .sect .text; .sect .rom; .sect .data; .sect .bss
 .sect .text
 .define .rmi8
 yl=4
 yh=8
 xl=12
 xh=16
 	! .rmi8 yields ecx:edx = remainder from x / y
 .rmi8:
 	xorb	cl,cl		! cl = 0, non-negative result
 	mov	eax,xh(esp)	! eax for .divrem8
 	test	eax,eax
 	jge	1f		! jump unless x < 0
 	incb	cl		! cl = 1, negative result
 	neg	eax
 	neg	xl(esp)
 	sbb	eax,0
 	mov	xh(esp),eax	! x = absolute value
 1:	mov	edx,yh(esp)	! edx for .divrem8
 	test	edx,edx		! flag z for .divrem8 when y >= 0
 	jge	1f		! jump unless y < 0
 	neg	edx
 	neg	yl(esp)
 	sbb	edx,0		! flag z for .divrem8 when y < 0
 	mov	yh(esp),edx	! y = absolute value
 1:	push	ecx
 	call	.divrem8
 	pop	eax
 	testb	al,al
 	jz	1f		! jump unless result < 0
 	neg	ecx
 	neg	edx
 	sbb	ecx,0		! negate remainder ecx:edx
 1:	ret	16
--- a/mach/i386/ncg/table
+++ b/mach/i386/ncg/table
@ -1038,7 +1038,7 @@ with noacc ACC
 pat dvi $1==8
  kills ALL
-  gen proccall {label,".dvi8"}	yields edx eax
+  gen proccall {label,".dvi8"}	yields ebx eax
 /*
 pat dvi !defined($1)
@ -1055,7 +1055,7 @@ with noacc ACC
 pat rmi $1==8
  kills ALL
-  gen proccall {label,".dvi8"}	yields ecx ebx
+  gen proccall {label,".rmi8"}	yields ecx edx
 /*
 pat rmi !defined($1)
@ -1202,7 +1202,7 @@ gen div %1			yields eax
 pat dvu $1==8
  kills ALL
-  gen proccall {label,".dvu8"}	yields edx eax
+  gen proccall {label,".dvu8"}	yields ebx eax
 /*
 pat dvu !defined($1)
@ -1218,7 +1218,7 @@ gen div %1			yields edx
 pat rmu $1==8
  kills ALL
-  gen proccall {label,".dvu8"}	yields ecx ebx
+  gen proccall {label,".rmu8"}	yields ecx edx
 /*
 pat rmu !defined($1)
--- a/tests/plat/build.lua
+++ b/tests/plat/build.lua
@ -4,7 +4,7 @@ definerule("plat_testsuite",
 	{
 		plat = { type="string" },
 		method = { type="string" },
-		-- added long-long/llbitset_e.c
+		-- added long-long/lldivrem_e.c
 		sets = { type="table", default={"core", "b", "bugs", "m2", "floats", "long-long"}},
 		skipsets = { type="table", default={}},
 		tests = { type="targets", default={} },
--- a/tests/plat/long-long/lldivrem_e.c
+++ b/tests/plat/long-long/lldivrem_e.c
@ -0,0 +1,71 @@
 #include "test.h"
 /*
 * Test division and remainder.  Failure code will look like
 *  - 0x3d = id 0x3, 'd' for division
 *  - 0x3e = id 0x3, 'e' for remainder
 */
 struct s_divrem {
 	unsigned int id;
 	long long a;
 	long long b;
 	long long a_div_b; /* a / b */
 	long long a_rem_b; /* a % b */
 } s_cases[] = {
 	{0x1,  310LL,  100LL,  3LL,  10LL},
 	{0x2,  310LL, -100LL, -3LL,  10LL},
 	{0x3, -310LL,  100LL, -3LL, -10LL},
 	{0x4, -310LL, -100LL,  3LL, -10LL},
 	{0x5,  3000000000000010LL,  100LL,  30000000000000LL,  10LL},
 	{0x6,  3000000000000010LL, -100LL, -30000000000000LL,  10LL},
 	{0x7, -3000000000000010LL,  100LL, -30000000000000LL, -10LL},
 	{0x8, -3000000000000010LL, -100LL,  30000000000000LL, -10LL},
 	{0x9,  3000000000000010LL,  1000000000000LL,  3000LL,  10LL},
 	{0xa,  3000000000000010LL, -1000000000000LL, -3000LL,  10LL},
 	{0xb, -3000000000000010LL,  1000000000000LL, -3000LL, -10LL},
 	{0xc, -3000000000000010LL, -1000000000000LL,  3000LL, -10LL},
 	/*
 	 * In next 3 cases, i386 tries (a / (b >> 13)) >> 13 = 8,
 	 * may need to correct the quotient from 8 to 7.
 	 */
 	{0x11, 0x864200000000LL, 0x10c840000000LL, 8LL, 0LL},
 	{0x12, 0x864200000000LL, 0x10c840000001LL, 7LL, 0x10c83ffffff9LL},
 	{0x13, 0x864200000000LL, 0x10c840001fffLL, 7LL, 0x10c83fff2007LL},
 };
 struct u_divrem {
 	unsigned int id;
 	unsigned long long a;
 	unsigned long long b;
 	unsigned long long a_div_b;
 	unsigned long long a_rem_b;
 } u_cases[] = {
 	{0x81, 310ULL, 100ULL, 3ULL, 10ULL},
 	{0x82, 3000000000000010ULL, 100ULL, 30000000000000ULL, 10ULL},
 	{0x83, 3000000000000010ULL, 1000000000000ULL, 3000ULL, 10ULL},
 	{0x91, 0x8000000000000000ULL, 3ULL, 0x2aaaaaaaaaaaaaaaULL, 2ULL},
 	{0x92, 0xffffffffffffffffULL, 3ULL, 0x5555555555555555ULL, 0ULL},
 };
 #define LEN(ary) (sizeof(ary) / sizeof(ary[0]))
 void _m_a_i_n(void) {
 	int i;
 	for (i = 0; i < LEN(s_cases); i++) {
 		struct s_divrem *s = &s_cases[i];
 		if (s->a / s->b != s->a_div_b)
 			fail((s->id << 4) | 0xd);
 		if (s->a % s->b != s->a_rem_b)
 			fail((s->id << 4) | 0xe);
 	}
 	for (i = 0; i < LEN(u_cases); i++) {
 		struct u_divrem *u = &u_cases[i];
 		if (u->a / u->b != u->a_div_b)
 			fail((u->id << 4) | 0xd);
 		if (u->a % u->b != u->a_rem_b)
 			fail((u->id << 4) | 0xe);
 	}
 	finished();
 }