ack/mach/m68020/libem/divrem8.s

.define .divrem8
.sect .text
.sect .rom
.sect .data
.sect .bss

yh=16
yl=20
xh=24
xl=28
	! This private sub for .dvi8, .dvu8, .rmi8, .rmu8
	! does unsigned division of x = xh:xl by y = yh:yl,
	! yields d0:d1 = quotient, d2:d3 = remainder.

.sect .text
.divrem8:
	! Caller must set d0, d1 like so:
	! mov.l (xh, sp), d0
	! mov.l (yh, sp), d1
	tst.l	d1
	bne	1f		! branch if y >= 2**32

	! y = yl, so x / y = xh:xl / yl = qh:0 + rh:xl / yl
	! where qh, rh are quotient, remainder from xh / yl.
	move.l	(xl, sp), d1
	move.l	(yl, sp), d2
	clr.l	d3		! d3:d0 = xh
	divu.l	d2, d3:d0	! d0 =  0:xh / yl, d3 = rh
	divu.l	d2, d3:d1	! d1 = rh:xl / yl, so d0:d1 = x / y
	clr.l	d2		! remainder in d2:d3
	rts

1:	! Here y >= 2**32.
	move.l	d0, a0		! save xh
	move.l	d1, a1		! save yh
	move.l	d7, a2		! save caller's d7

	! Find y >> right in [2**31, 2**32).
	move.l	(yl, sp), d2
	bfffo	d1[0:32], d3	! find highest set bit in yh
	lsl.l	d3, d1		! shift yh left
	bset	#5, d3
	neg.l	d3		! right = (32 - left) modulo 64
	lsr.l	d3, d2		! shift yl right
	or.l	d1, d2		! d2 = y >> right

	! Estimate x / y as q = (x / (y >> right)) >> right.
	move.l	(xl, sp), d1
	clr.l	d7
	divu.l	d2, d7:d0
	divu.l	d2, d7:d1	! d0:d1 = x / (y >> right)
	lsr.l	d3, d1
	bset	#5, d3
	neg.l	d3
	lsl.l	d3, d0
	or.l	d0, d1		! d1 = q

	! Calculate the remainder x - y * q.  If the subtraction
	! overflows, then the correct quotient is q - 1, else it is q.
	move.l	a1, d3		! yh
	mulu.l	d1, d3		! yh * q
	move.l	(yl, sp), d7
	mulu.l	d1, d0:d7	! yl * q
	add.l	d3, d0		! d0:d7 = y * q
	move.l	(xl, sp), d3
	move.l	a0, d2		! d2:d3 = x
	sub.l	d7, d3
	subx.l	d0, d2		! d2:d3 = x - y * q
	bcc	1f		! branch unless subtraction overflowed
	sub.l	#1, d1		! fix quotient
	move.l	a1, d7		! yh
	add.l	(yl, sp), d3
	addx.l	d7, d2		! fix remainder
1:	clr.l	d0		! d0:d1 = quotient
	move.l	a2, d7		! restore caller's d7
	rts
Add 8-byte long long for linux68k. Add rules for 8-byte integers to m68020 ncg. Add 8-byte long long to ACK C on linux68k. Enable long-long tests for linux68k. The tests pass in our emulator using musahi; I don't have a real 68k processor and haven't tried other emulators. Still missing are conversions between 8-byte integers and any size of floats. The long-long tests don't cover these conversions, and our emulator can't do floating-point. Our build always enables TBL68020 and uses word size 4. Without TBL68020, 8-byte multiply and divide are missing. With word size 2, some conversions between 2-byte and 8-byte integers are missing. Fix .cii in libem, which didn't work when converting from 1-byte or 2-byte integers. Now .cii and .cuu work, but also add some rules to skip .cii and .cuu when converting 8-byte integers. The new rule for loc 4 loc 8 cii `with test_set4` exposes a bug: the table may believe that the condition codes test a 4-byte register when they only test a word or byte, and this incorrect test may describe an unsigned word or byte as negative. Another rule `with exact test_set1+test_set2` works around the bug by ignoring the negative flag, because a zero-extended word or byte is never negative. The old rules for comparison and logic do work with 8-byte integers and bitsets, but add some specific 8-byte rules to skip libem calls or loops. There were no rules for 8-byte arithmetic, shift, or rotate; so add some. There is a register shortage, because the table requires preserving d3 to d7, leaving only 3 data registers (d0, d1, d2) for 8-byte operations. Because of the shortage, the code may move data to an address register, or read a memory location more than once. The multiplication and division code are translations of the i386 code. They pass the tests, but might not give the best performance on a real 68k processor. 2019-09-24 17:32:17 +00:00			`.define .divrem8`
			`.sect .text`
			`.sect .rom`
			`.sect .data`
			`.sect .bss`

			`yh=16`
			`yl=20`
			`xh=24`
			`xl=28`
			`! This private sub for .dvi8, .dvu8, .rmi8, .rmu8`
			`! does unsigned division of x = xh:xl by y = yh:yl,`
			`! yields d0:d1 = quotient, d2:d3 = remainder.`

			`.sect .text`
			`.divrem8:`
			`! Caller must set d0, d1 like so:`
			`! mov.l (xh, sp), d0`
			`! mov.l (yh, sp), d1`
			`tst.l d1`
			`bne 1f ! branch if y >= 2**32`

			`! y = yl, so x / y = xh:xl / yl = qh:0 + rh:xl / yl`
			`! where qh, rh are quotient, remainder from xh / yl.`
			`move.l (xl, sp), d1`
			`move.l (yl, sp), d2`
			`clr.l d3 ! d3:d0 = xh`
			`divu.l d2, d3:d0 ! d0 = 0:xh / yl, d3 = rh`
			`divu.l d2, d3:d1 ! d1 = rh:xl / yl, so d0:d1 = x / y`
			`clr.l d2 ! remainder in d2:d3`
			`rts`

			`1: ! Here y >= 2**32.`
			`move.l d0, a0 ! save xh`
			`move.l d1, a1 ! save yh`
			`move.l d7, a2 ! save caller's d7`

			`! Find y >> right in [231, 232).`
			`move.l (yl, sp), d2`
			`bfffo d1[0:32], d3 ! find highest set bit in yh`
			`lsl.l d3, d1 ! shift yh left`
			`bset #5, d3`
			`neg.l d3 ! right = (32 - left) modulo 64`
			`lsr.l d3, d2 ! shift yl right`
			`or.l d1, d2 ! d2 = y >> right`

			`! Estimate x / y as q = (x / (y >> right)) >> right.`
			`move.l (xl, sp), d1`
			`clr.l d7`
			`divu.l d2, d7:d0`
			`divu.l d2, d7:d1 ! d0:d1 = x / (y >> right)`
			`lsr.l d3, d1`
			`bset #5, d3`
			`neg.l d3`
			`lsl.l d3, d0`
			`or.l d0, d1 ! d1 = q`

			`! Calculate the remainder x - y * q. If the subtraction`
			`! overflows, then the correct quotient is q - 1, else it is q.`
			`move.l a1, d3 ! yh`
			`mulu.l d1, d3 ! yh * q`
			`move.l (yl, sp), d7`
			`mulu.l d1, d0:d7 ! yl * q`
			`add.l d3, d0 ! d0:d7 = y * q`
			`move.l (xl, sp), d3`
			`move.l a0, d2 ! d2:d3 = x`
			`sub.l d7, d3`
			`subx.l d0, d2 ! d2:d3 = x - y * q`
			`bcc 1f ! branch unless subtraction overflowed`
			`sub.l #1, d1 ! fix quotient`
			`move.l a1, d7 ! yh`
			`add.l (yl, sp), d3`
			`addx.l d7, d2 ! fix remainder`
			`1: clr.l d0 ! d0:d1 = quotient`
			`move.l a2, d7 ! restore caller's d7`
			`rts`