Add 8-byte long long for linux68k.

Add rules for 8-byte integers to m68020 ncg. Add 8-byte long long to ACK C on linux68k. Enable long-long tests for linux68k. The tests pass in our emulator using musahi; I don't have a real 68k processor and haven't tried other emulators. Still missing are conversions between 8-byte integers and any size of floats. The long-long tests don't cover these conversions, and our emulator can't do floating-point. Our build always enables TBL68020 and uses word size 4. Without TBL68020, 8-byte multiply and divide are missing. With word size 2, some conversions between 2-byte and 8-byte integers are missing. Fix .cii in libem, which didn't work when converting from 1-byte or 2-byte integers. Now .cii and .cuu work, but also add some rules to skip .cii and .cuu when converting 8-byte integers. The new rule for loc 4 loc 8 cii `with test_set4` exposes a bug: the table may believe that the condition codes test a 4-byte register when they only test a word or byte, and this incorrect test may describe an unsigned word or byte as negative. Another rule `with exact test_set1+test_set2` works around the bug by ignoring the negative flag, because a zero-extended word or byte is never negative. The old rules for comparison and logic do work with 8-byte integers and bitsets, but add some specific 8-byte rules to skip libem calls or loops. There were no rules for 8-byte arithmetic, shift, or rotate; so add some. There is a register shortage, because the table requires preserving d3 to d7, leaving only 3 data registers (d0, d1, d2) for 8-byte operations. Because of the shortage, the code may move data to an address register, or read a memory location more than once. The multiplication and division code are translations of the i386 code. They pass the tests, but might not give the best performance on a real 68k processor.
2019-09-24 13:32:17 -04:00 · 2019-09-24 13:32:17 -04:00 · e867861f6d
commit e867861f6d
parent fd27acb487
10 changed files with 608 additions and 12 deletions
--- a/mach/m68020/libem/build.lua
+++ b/mach/m68020/libem/build.lua
@ -2,7 +2,7 @@ for _, plat in ipairs(vars.plats) do
 	acklibrary {
 		name = "lib_"..plat,
 		srcs = {
-			"./*.s",
+			"./*.s", -- added divrem8.s
 			"./*.c"
 		},
 		vars = { plat = plat },
--- a/mach/m68020/libem/cii.s
+++ b/mach/m68020/libem/cii.s
@ -13,12 +13,19 @@
 	sub.l	d0, sp		! pop extra bytes
 	jmp	(a0)
 1:
-	clr.l	d1
+	move.l	(sp), d1
-	tst.l	(sp)
+	lsr.l	#1, d0
-	bne	4f
+	bcs	1f		! branch if source size == 1
-	not.l	d1		! d1 contains sign of source
+	lsr.l	#1, d0
-4:
+	bcs	2f		! branch if source size == 2
-	asr.l	#2, d0
+	tst.l	d1
 	bra	4f
 1:	lsr.l	#1, d0		! size difference / 4
 	ext.w	d1
 2:	ext.l	d1
 	move.l	d1, (sp)
 4:	slt	d1
 	extb.l	d1		! d1 contains sign of source
 	sub.l	#1, d0
 2:
 	move.l	d1, -(sp)
--- a/mach/m68020/libem/divrem8.s
+++ b/mach/m68020/libem/divrem8.s
@ -0,0 +1,76 @@
 .define .divrem8
 .sect .text
 .sect .rom
 .sect .data
 .sect .bss
 yh=16
 yl=20
 xh=24
 xl=28
 	! This private sub for .dvi8, .dvu8, .rmi8, .rmu8
 	! does unsigned division of x = xh:xl by y = yh:yl,
 	! yields d0:d1 = quotient, d2:d3 = remainder.
 .sect .text
 .divrem8:
 	! Caller must set d0, d1 like so:
 	! mov.l (xh, sp), d0
 	! mov.l (yh, sp), d1
 	tst.l	d1
 	bne	1f		! branch if y >= 2**32
 	! y = yl, so x / y = xh:xl / yl = qh:0 + rh:xl / yl
 	! where qh, rh are quotient, remainder from xh / yl.
 	move.l	(xl, sp), d1
 	move.l	(yl, sp), d2
 	clr.l	d3		! d3:d0 = xh
 	divu.l	d2, d3:d0	! d0 =  0:xh / yl, d3 = rh
 	divu.l	d2, d3:d1	! d1 = rh:xl / yl, so d0:d1 = x / y
 	clr.l	d2		! remainder in d2:d3
 	rts
 1:	! Here y >= 2**32.
 	move.l	d0, a0		! save xh
 	move.l	d1, a1		! save yh
 	move.l	d7, a2		! save caller's d7
 	! Find y >> right in [2**31, 2**32).
 	move.l	(yl, sp), d2
 	bfffo	d1[0:32], d3	! find highest set bit in yh
 	lsl.l	d3, d1		! shift yh left
 	bset	#5, d3
 	neg.l	d3		! right = (32 - left) modulo 64
 	lsr.l	d3, d2		! shift yl right
 	or.l	d1, d2		! d2 = y >> right
 	! Estimate x / y as q = (x / (y >> right)) >> right.
 	move.l	(xl, sp), d1
 	clr.l	d7
 	divu.l	d2, d7:d0
 	divu.l	d2, d7:d1	! d0:d1 = x / (y >> right)
 	lsr.l	d3, d1
 	bset	#5, d3
 	neg.l	d3
 	lsl.l	d3, d0
 	or.l	d0, d1		! d1 = q
 	! Calculate the remainder x - y * q.  If the subtraction
 	! overflows, then the correct quotient is q - 1, else it is q.
 	move.l	a1, d3		! yh
 	mulu.l	d1, d3		! yh * q
 	move.l	(yl, sp), d7
 	mulu.l	d1, d0:d7	! yl * q
 	add.l	d3, d0		! d0:d7 = y * q
 	move.l	(xl, sp), d3
 	move.l	a0, d2		! d2:d3 = x
 	sub.l	d7, d3
 	subx.l	d0, d2		! d2:d3 = x - y * q
 	bcc	1f		! branch unless subtraction overflowed
 	sub.l	#1, d1		! fix quotient
 	move.l	a1, d7		! yh
 	add.l	(yl, sp), d3
 	addx.l	d7, d2		! fix remainder
 1:	clr.l	d0		! d0:d1 = quotient
 	move.l	a2, d7		! restore caller's d7
 	rts
--- a/mach/m68020/libem/dvi8.s
+++ b/mach/m68020/libem/dvi8.s
@ -0,0 +1,34 @@
 .define .dvi8
 .sect .text
 .sect .rom
 .sect .data
 .sect .bss
 yh=8
 yl=12
 xh=16
 xl=20
 	! .dvi8 yields d0:d1 = quotient from x / y
 .sect .text
 .dvi8:
 	move.l	d3, -(sp)	! preserve caller's d3
 	clr.l	d2		! d2 = 0, non-negative result
 	move.l	(xh, sp), d0	! d0 for .divrem8
 	bge	1f
 	move.l	#1, d2		! d2 = 1, negative result
 	neg.l	(xl, sp)
 	negx.l	d0		! x = absolute value
 1:	move.l	(yh, sp), d1	! d1 for .divrem8
 	bge	1f
 	bchg	#0, d2		! flip sign of result
 	neg.l	(yl, sp)
 	negx.l	d1		! y = absolute value
 1:	move.l	d2, -(sp)
 	jsr	(.divrem8)
 	move.l	(sp)+, d2
 	beq	1f		! branch unless result < 0
 	neg.l	d1
 	negx.l	d0		! negate quotient d0:d1
 1:	move.l	(sp)+, d3
 	rtd	#16
--- a/mach/m68020/libem/dvu8.s
+++ b/mach/m68020/libem/dvu8.s
@ -0,0 +1,20 @@
 .define .dvu8
 .sect .text
 .sect .rom
 .sect .data
 .sect .bss
 yh=8
 xh=16
 	! .dvu8 yields d0:d1 = quotient from x / y
 .sect .text
 .dvu8:
 	move.l	d3, -(sp)	! preserve caller's d3
 	move.l	(xh, sp), d0
 	move.l	(yh, sp), d1
 	sub.l	#4, sp
 	jsr	(.divrem8)
 	add.l	#4, sp
 	move.l	(sp)+, d3
 	rtd	#16
--- a/mach/m68020/libem/rmi8.s
+++ b/mach/m68020/libem/rmi8.s
@ -0,0 +1,35 @@
 .define .rmi8
 .sect .text
 .sect .rom
 .sect .data
 .sect .bss
 yh=8
 yl=12
 xh=16
 xl=20
 	! .rmi8 yields d0:d1 = remainder from x / y
 .sect .text
 .rmi8:
 	move.l	d3, -(sp)	! preserve caller's d3
 	clr.l	d2		! d2 = 0, non-negative result
 	move.l	(xh, sp), d0	! d0 for .divrem8
 	bge	1f
 	move.l	#1, d2		! d2 = 1, negative result
 	neg.l	(xl, sp)
 	negx.l	d0		! x = absolute value
 1:	move.l	(yh, sp), d1	! d1 for .divrem8
 	bge	1f
 	neg.l	(yl, sp)
 	negx.l	d1		! y = absolute value
 1:	move.l	d2, -(sp)
 	jsr	(.divrem8)
 	move.l	(sp)+, d0
 	beq	1f		! branch unless result < 0
 	neg.l	d3
 	negx.l	d2		! negate quotient d3:d2
 1:	move.l	d3, d1
 	move.l	d2, d0
 	move.l	(sp)+, d3
 	rtd	#16
--- a/mach/m68020/libem/rmu8.s
+++ b/mach/m68020/libem/rmu8.s
@ -0,0 +1,22 @@
 .define .rmu8
 .sect .text
 .sect .rom
 .sect .data
 .sect .bss
 yh=8
 xh=16
 	! .rmu8 yields d0:d1 = remainder from x / y
 .sect .text
 .rmu8:
 	move.l	d3, -(sp)	! preserve caller's d3
 	move.l	(xh, sp), d0
 	move.l	(yh, sp), d1
 	sub.l	#4, sp
 	jsr	(.divrem8)
 	add.l	#4, sp
 	move.l	d3, d1
 	move.l	d2, d0
 	move.l	(sp)+, d3
 	rtd	#16
--- a/mach/m68020/ncg/table
+++ b/mach/m68020/ncg/table
@ -612,6 +612,7 @@ add_l  "add.l"	conreg4:ro,	alterable4:rw:cc	cost(2,6).
 add_w  "add.w"	any2:ro,	D_REG+LOCAL:rw:cc	cost(2,3).
 add_w  "add.w"	conreg2:ro,	alterable2:rw:cc	cost(2,6).
 #endif
 addx_l "addx.l" D_REG4:ro,	D_REG4:rw kills :cc	cost(2,3).
 and_l  "and.l"	data4:ro,	D_REG4:rw:cc		cost(2,3).
 and_l  "and.l"	D_REG4:ro,	memalt4:rw:cc		cost(2,6).
 and_l  "and.l"	consts4:ro,	datalt4:rw:cc		cost(2,6).
@ -628,6 +629,7 @@ asr   "asr #1,"	memalt2:rw:cc				cost(2,4).
 asl_w  "asl.w"	shconreg:ro,	D_REG:rw:cc		cost(2,5).
 asr_w  "asr.w"	shconreg:ro,	D_REG:rw:cc		cost(2,4).
 #endif
 bchg		const:ro,	D_REG:rw kills:cc	cost(2,4).
 bclr		const:ro,	D_REG:rw kills:cc	cost(2,4).
 bra		label					cost(2,5).
 bcc		label					cost(2,5).
@ -671,14 +673,15 @@ eor_l  "eor.l"	conreg4:ro,	datalt4:rw:cc		cost(2,6).
 #if WORD_SIZE==2
 eor_w  "eor.w"	conreg2:ro,	datalt2:rw:cc		cost(2,4).
 #endif
 exg		genreg4:rw,	genreg4:rw		cost(2,3).
 /* in the next two instructions: LOCAL only allowed if register var */
 ext_l  "ext.l"	D_REG+LOCAL+D_REG4:rw:cc		cost(2,2).
 ext_w  "ext.w"	D_REG+LOCAL+D_REG4:rw:cc		cost(2,2).
 jmp		address+control4			cost(2,0).
 jsr		address+control4 kills :cc d0 d1 d2 a0 a1 cost(2,3).
 lea		address+control4:ro, A_REG+areg:wo	cost(2,0).
 lsl_l  "lsl.l"	shconreg:ro,	D_REG4:rw:cc		cost(2,4).
 /*
 lsl_l  "lsl.l"	shconreg:ro,	D_REG:rw:cc		cost(2,4).
 lsl   "lsl #1,"	memalt2:rw:cc				cost(2,4).
 */
 lsr_l  "lsr.l"	shconreg:ro,	D_REG4:rw:cc		cost(2,4).
@ -709,6 +712,8 @@ neg_l  "neg.l"	memory4:rw:cc				cost(2,6).
 #if WORD_SIZE==2
 neg_w  "neg.w"	memory2:rw:cc				cost(2,6).
 #endif
 negx_l "negx.l" D_REG4:rw:cc				cost(2,3).
 negx_l "negx.l" memory4:rw:cc				cost(2,6).
 not_l  "not.l"	D_REG4:rw:cc				cost(2,3).
 not_l  "not.l"	memory4:rw:cc				cost(2,6).
 #if WORD_SIZE==2
@ -733,6 +738,7 @@ ror_w  "ror.w"	shconreg:ro,	D_REG:rw:cc		cost(2,4).
 #endif
 roxl "roxl #1,"	memalt2:rw:cc				cost(2,4).
 roxr "roxr #1,"	memalt2:rw:cc				cost(2,4).
 slt		datalt1:rw				cost(2,3).
 sne		datalt1:rw				cost(2,3).
 sub_l  "sub.l"	any4:ro,	D_REG4:rw:cc		cost(2,3).
 sub_l  "sub.l"	any4+areg:ro,	A_REG+areg:rw		cost(2,3).
@ -740,6 +746,9 @@ sub_l  "sub.l"	conreg4:ro,	alterable4:rw:cc	cost(2,6).
 #if WORD_SIZE==2
 sub_w  "sub.w"	any2:ro,	D_REG+LOCAL:rw:cc	cost(2,3).
 sub_w  "sub.w"	conreg2:ro,	alterable2:rw:cc	cost(2,6).
 #endif
 subx_l "subx.l" D_REG4:ro,	D_REG4:rw kills :cc	cost(2,3).
 #if WORD_SIZE==2
 /* On a swap, we only want the lower part of D_REG, so don't set cc */
 swap		D_REG:rw kills :cc			cost(2,2).
 #endif
@ -773,6 +782,7 @@ divs_l "divs.l" data4:ro,	D_REG4:rw:cc		cost(2,90).
 divu_l "divu.l" data4:ro,	D_REG4:rw:cc		cost(2,78).
 divsl_l "divsl.l" data4:ro,	DREG_pair:rw kills :cc	cost(2,90).
 divul_l "divul.l" data4:ro,	DREG_pair:rw kills :cc	cost(2,78).
 mulu_l "mulu.l" data4:ro,	DREG_pair:rw kills :cc	cost(2,44).
 pea		address+control4+regX			cost(2,4).
 #if WORD_SIZE==2
 cmp2_w "cmp2.w" address+control2:ro, genreg2:ro kills :cc cost(2,18).
@ -3796,6 +3806,18 @@ with exact any4 STACK
    gen add_l {post_inc4, sp}, %a
 			yields	%a
 pat adi $1==8
 with exact any4 any4 DD_REG4 DD_REG4
    uses reusing %1, DD_REG4 = %1
    gen add_l %2, %4
 	addx_l %a, %3	yields	%4 %3
 with DD_REG4 DD_REG4 D_REG4 any4
    gen add_l %4, %2
 	addx_l %3, %1	yields	%2 %1
 with DD_REG4 DD_REG4 D_REG4 STACK
    gen add_l {post_inc4, sp}, %2
 	addx_l %3, %1	yields	%2 %1
 #if WORD_SIZE==2
 pat sbi $1==2
 with any2-bconst DD_REG
@ -3822,6 +3844,12 @@ with exact any4 STACK
 with any4-bconst4 AA_REG
    gen sub_l %1, %2	yields	%2
 pat sbi $1==8
 with D_REG4 any4-D_REG4 DD_REG4 DD_REG4
    /* only 3 of DD_REG4; may unstack %2 into AA_REG */
    gen sub_l %2, %4
        subx_l %1, %3	yields	%4 %3
 #if WORD_SIZE==2
 pat loc loc cii ldc mli $1==2 && $2==4 && highw($4)==0 && loww($4)>0 && $5==4
 with any2-pre_post
@ -3847,6 +3875,34 @@ with STACK
 			yields	dl1
 #endif
 #ifdef TBL68020
 pat mli $1==8
 with exact data4 data4 DD_REG4 DD_REG4	/* yh yl xh xl */
    uses DD_REG4 = %4
    gen mulu_l %1, %a			/* xl * yh */
 	mulu_l %2, %3			/* xh * yl */
 	add_l %3, %a
 	mulu_l %2, {DREG_pair, %3, %4}	/* xl * yl */
 	add_l %a, %3
 			yields	%4 %3
 with DD_REG4 DD_REG4 data4 data4	/* yh yl xh xl */
    uses DD_REG = %2
    gen mulu_l %3, %a			/* yl * xh */
 	mulu_l %4, %1			/* yh * xl */
 	add_l %1, %a
 	mulu_l %4, {DREG_pair, %1, %2}	/* yl * xl */
 	add_l %a, %1
 			yields	%2 %1
 with DD_REG4 DD_REG4 STACK		/* yh yl xh xl */
    uses DD_REG4 = %2
    gen mulu_l {post_inc4, sp}, %a	/* yl * xh */
 	mulu_l {indirect4, sp}, %1	/* yh * xl */
 	add_l %1, %a
 	mulu_l {post_inc4, sp}, {DREG_pair, %1, %2} /* yl * xl */
 	add_l %a, %1
 			yields	%2 %1
 #endif /* TBL68020 */
 #if WORD_SIZE==2
 pat dvi $1==2
 with data2-sconsts DD_REG
@ -3866,6 +3922,14 @@ with STACK
 			yields	dl1
 #endif /* TBL68020 */
 #ifdef TBL68020
 pat dvi $1==8
 with STACK
    kills ALL
    gen jsr {absolute4, ".dvi8"}
 			yields	dl1 dl0
 #endif /* TBL68020 */
 #if WORD_SIZE==2
 pat rmi $1==2
 with data2-sconsts DD_REG
@ -3891,6 +3955,14 @@ with STACK
 			yields	dl2
 #endif /* TBL68020 */
 #ifdef TBL68020
 pat rmi $1==8
 with STACK
    kills ALL
    gen jsr {absolute4, ".rmi8"}
 			yields	dl1 dl0
 #endif /* TBL68020 */
 #if WORD_SIZE==2
 pat ngi $1==2
 with DD_REG
@ -3901,6 +3973,11 @@ pat ngi $1==4
 with DD_REG4
    gen neg_l %1	yields	%1
 pat ngi $1==8
 with DD_REG4 DD_REG4
    gen neg_l %2
 	negx_l %1	yields	%2 %1
 #if WORD_SIZE==2
 pat sli $1==2
 with shconreg DD_REG
@ -3911,6 +3988,43 @@ pat sli $1==4
 with shconreg DD_REG4
    gen asl_l %1, %2	yields	%2
 pat sli $1==8
 with DD_REG4 DD_REG4 DD_REG4
    uses AA_REG = %3		/* no 4th DD_REG */
    gen lsl_l %1, %3
 	lsl_l %1, %2		/* shift by %1 modulo 64 */
 	bchg {const, 5}, %1
 	bne {slabel, 1f}	/* jump if shift >= 32 */
 	neg_l %1
 	exg %a, %3
 	lsr_l %1, %3		/* (32 - shift) modulo 64 */
 	or_l %3, %2		/* shift bits from %3 to %2 */
 	move %a, %3
 	bra {slabel, 2f}
 	1:
 	move %a, %2
 	lsl_l %1, %2		/* (shift - 32) modulo 64 */
 	2:		yields	%3 %2
 pat loc sli ($1&32)==0 && $2==8
 with DD_REG4 DD_REG4
    uses AA_REG = %2, DD_REG = {bconst, $1&31}
    gen lsl_l %b, %2
 	lsl_l %b, %1
 	bset {const, 5}, %b
 	neg_l %b
 	exg %a, %2
 	lsr_l %b, %2
 	or_l %2, %1
 	move %a, %2
 			yields	%2 %1
 pat loc sli ($1&63)==32 && $2==8
 with any4 any4		yields	{zero_const, 0} %2
 pat loc sli ($1&32)!=0 && $2==8
 with any4 DD_REG4
    uses reusing %1, DD_REG = {bconst, $1&31}
    gen lsl_l %a, %2	yields	{zero_const, 0} %2
 #if WORD_SIZE==2
 pat sri $1==2
 with shconreg DD_REG
@ -3921,6 +4035,43 @@ pat sri $1==4
 with shconreg DD_REG4
    gen asr_l %1, %2	yields	%2
 pat sri $1==8
 with DD_REG4 DD_REG4 DD_REG4
    uses AA_REG = %2		/* no 4th DD_REG */
    gen asr_l %1, %2
 	lsr_l %1, %3		/* shift by %1 modulo 64 */
 	bchg {const, 5}, %1
 	bne {slabel, 1f}	/* jump if shift >= 32 */
 	neg_l %1
 	exg %a, %2
 	lsl_l %1, %2		/* (32 - shift) modulo 64 */
 	or_l %2, %3		/* shift bits from %2 to %3 */
 	move %a, %2
 	bra {slabel, 2f}
 	1:
 	move %a, %3
 	asr_l %1, %3		/* (shift - 32) modulo 64 */
 	2:		yields	%3 %2
 pat loc sri ($1&32)==0 && $2==8
 with DD_REG4 DD_REG4
    uses AA_REG = %1, DD_REG = {bconst, $1&31}
    gen asr_l %b, %1
 	lsr_l %b, %2
 	bset {const, 5}, %b
 	neg_l %b
 	exg %a, %1
 	lsl_l %b, %1
 	or_l %1, %2
 	move %a, %1
 			yields	%2 %1
 pat loc sri ($1&63)==32 && $2==8
 with DD_REG4 any4	yields	%1	leaving loc 4 loc 8 cii
 pat loc sri ($1&32)!=0 && $2==8
 with DD_REG4 any4
    uses reusing %2, DD_REG = {bconst, $1&31}
    gen asr_l %a, %1	yields	%1	leaving loc 4 loc 8 cii
 /************************************************
 * Group 4: unsigned arithmetic.		*
 ************************************************/
@ -3947,6 +4098,8 @@ with STACK
 			yields	dl1
 #endif /* TBL68020 */
 pat mlu $1==8			leaving mli 8
 #if WORD_SIZE==2
 pat dvu $1==2
 with data2-sconsts data2
@ -3966,6 +4119,14 @@ with STACK
 			yields	dl1
 #endif /* TBL68020 */
 #ifdef TBL68020
 pat dvu $1==8
 with STACK
    kills ALL
    gen jsr {absolute4, ".dvu8"}
 			yields	dl1 dl0
 #endif /* TBL68020 */
 #if WORD_SIZE==2
 pat rmu $1==2
 with data2-sconsts data2
@ -3992,8 +4153,18 @@ with STACK
 			yields	dl2
 #endif /* TBL68020 */
 #ifdef TBL68020
 pat rmu $1==8
 with STACK
    kills ALL
    gen jsr {absolute4, ".rmu8"}
 			yields	dl1 dl0
 #endif /* TBL68020 */
 pat slu				leaving sli $1
 pat loc slu $2==8		leaving loc $1 sli 8
 #if WORD_SIZE==2
 pat sru $1==2
 with shconreg DD_REG
@ -4004,6 +4175,43 @@ pat sru $1==4
 with shconreg DD_REG4
    gen lsr_l %1, %2	yields	%2
 pat sru $1==8
 with DD_REG4 DD_REG4 DD_REG4
    uses AA_REG = %2		/* no 4th DD_REG */
    gen lsr_l %1, %2
 	lsr_l %1, %3		/* shift by %1 modulo 64 */
 	bchg {const, 5}, %1
 	bne {slabel, 1f}	/* jump if shift >= 32 */
 	neg_l %1
 	exg %a, %2
 	lsl_l %1, %2		/* (32 - shift) modulo 64 */
 	or_l %2, %3		/* shift bits from %2 to %3 */
 	move %a, %2
 	bra {slabel, 2f}
 	1:
 	move %a, %3
 	lsr_l %1, %3		/* (shift - 32) modulo 64 */
 	2:		yields	%3 %2
 pat loc sru ($1&32)==0 && $2==8
 with DD_REG4 DD_REG4
    uses AA_REG = %2, DD_REG = {bconst, $1&31}
    gen lsr_l %b, %1
 	lsr_l %b, %2
 	bset {const, 5}, %b
 	neg_l %b
 	exg %a, %1
 	lsl_l %b, %1
 	or_l %1, %2
 	move %a, %1
 			yields	%2 %1
 pat loc sru ($1&63)==32 && $2==8
 with any4 any4		yields	%1 {zero_const, 0}
 pat loc sru ($1&32)!=0 && $2==8
 with DD_REG4 any4
    uses reusing %2, DD_REG = {bconst, $1&31}
    gen lsr_l %a, %1	yields	%1 {zero_const, 0}
 /************************************************
 * Group 5: floating point arithmetic		*
 ************************************************/
@ -4753,6 +4961,17 @@ with exact any_int STACK
    uses reusing %1,DD_REG=%1
    gen xxx* {post_inc_int, sp}, %a	yields %a
 proc log8 example and
 with exact data4 data4 DD_REG4 DD_REG4
    gen xxx* %1, %3
 	xxx* %2, %4			yields	%4 %3
 with DD_REG4 DD_REG4 data4 data4
    gen xxx* %3, %1
 	xxx* %4, %2			yields	%2 %1
 with DD_REG4 DD_REG4 STACK
    gen xxx* {post_inc4, sp}, %1
 	xxx* {post_inc4, sp}, %2	yields	%2 %1
 proc logdef example and
 with STACK
    uses DD_REG4 = {const, $1/WORD_SIZE -1},
@ -4813,6 +5032,7 @@ pat and $1==WORD_SIZE			call logw(AND_I)
 #if WORD_SIZE==2
 pat and $1==2*WORD_SIZE			call log2w("and.l")
 #endif
 pat and $1==8				call log8("and.l")
 pat and $1>4 && $1/WORD_SIZE<=65536	call logdef(AND_I)
 pat and defined($1)			call logbdef(AND_I)
 pat and !defined($1)			call logndef(AND_I)
@ -4821,6 +5041,7 @@ pat ior $1==WORD_SIZE			call logw(OR_I)
 #if WORD_SIZE==2
 pat ior $1==2*WORD_SIZE			call log2w("or.l")
 #endif
 pat ior $1==8				call log8("or.l")
 pat ior $1>2 && $1/WORD_SIZE<=65536	call logdef(OR_I)
 pat ior defined($1)			call logbdef(OR_I)
 pat ior !defined($1)			call logndef(OR_I)
@ -4835,6 +5056,21 @@ pat xor $1==4
 with DD_REG4 conreg4-bconst4
    gen eor_l %2, %1	yields	%1
 pat xor $1==8
 with exact any4 any4 DD_REG4 DD_REG4
    uses reusing %1, DD_REG4 = %1
    gen eor_l %a, %3
 	move %2, %a
 	eor_l %a, %4	yields	%4 %3
 with DD_REG4 DD_REG4 D_REG4 any4
    gen eor_l %3, %1
 	move %4, %3
 	eor_l %3, %2	yields	%2 %1
 with DD_REG4 DD_REG4 DD_REG4 STACK
    gen eor_l %3, %1
        move_l {post_inc4, sp}, %3
 	eor_l %3, %2	yields	%2 %1
 pat xor $1>4 && $1/WORD_SIZE<=65536		call logdef(EOR_I)
 pat xor defined($1)			call logbdef(EOR_I)
 pat xor !defined($1)			call logndef(EOR_I)
@ -4907,6 +5143,50 @@ pat rol $1==4
 with shconreg DD_REG4
    gen rol_l %1, %2	yields	%2
 pat rol $1==8
 with DD_REG4 DD_REG4 DD_REG4
    uses AA_REG, AA_REG		/* no 4th DD_REG */
    gen bclr {const, 5}, %1
 	beq {slabel, 1f}
 	exg %2, %3		/* rotate left 32 */
 	1:
 	move %2, %a
 	move %3, %b
 	lsl_l %1, %2
 	lsl_l %1, %3
 	bset {const, 5}, %1
 	neg_l %1		/* (32 - shift) modulo 64 */
 	exg %a, %2
 	lsr_l %1, %2
 	or_l %2, %3		/* rotate bits from %2 to %3 */
 	move %a, %2
 	exg %b, %3
 	lsr_l %1, %3
 	or_l %3, %2		/* rotate bits from %3 to %2 */
 	move %b, %3
 			yields	%3 %2
 pat loc rol ($1&32)==0 && $2==8
 with DD_REG4 DD_REG4
    uses AA_REG, AA_REG, DD_REG = {bconst, $1&31}
    gen move %1, %a
 	move %2, %b
 	lsl_l %c, %1
 	lsl_l %c, %2
 	bset {const, 5}, %c
 	neg_l %c
 	exg %a, %1
 	lsr_l %c, %1
 	or_l %1, %2
 	move %a, %1
 	exg %b, %2
 	lsr_l %c, %2
 	or_l %2, %1
 	move %b, %2
 			yields	%2 %1
 pat loc rol ($1&63)==32 && $2==8	leaving exg 4
 pat loc rol ($1&32)!=0 && $2==8		leaving loc (0-$1)&31 ror 8
 #if WORD_SIZE==2
 pat ror $1==2
 with shconreg DD_REG
@ -4917,6 +5197,50 @@ pat ror $1==4
 with shconreg DD_REG4
    gen ror_l %1, %2	yields	%2
 pat ror $1==8
 with DD_REG4 DD_REG4 DD_REG4
    uses AA_REG, AA_REG		/* no 4th DD_REG */
    gen bclr {const, 5}, %1
 	beq {slabel, 1f}
 	exg %2, %3		/* rotate right 32 */
 	1:
 	move %2, %a
 	move %3, %b
 	lsr_l %1, %2
 	lsr_l %1, %3
 	bset {const, 5}, %1
 	neg_l %1		/* (32 - shift) modulo 64 */
 	exg %a, %2
 	lsl_l %1, %2
 	or_l %2, %3		/* rotate bits from %2 to %3 */
 	move %a, %2
 	exg %b, %3
 	lsl_l %1, %3
 	or_l %3, %2		/* rotate bits from %3 to %2 */
 	move %b, %3
 			yields	%3 %2
 pat loc ror ($1&32)==0 && $2==8
 with DD_REG4 DD_REG4
    uses AA_REG, AA_REG, DD_REG = {bconst, $1&31}
    gen move %1, %a
 	move %2, %b
 	lsr_l %c, %1
 	lsr_l %c, %2
 	bset {const, 5}, %c
 	neg_l %c
 	exg %a, %1
 	lsl_l %c, %1
 	or_l %1, %2
 	move %a, %1
 	exg %b, %2
 	lsl_l %c, %2
 	or_l %2, %1
 	move %b, %2
 			yields	%2 %1
 pat loc ror ($1&63)==32 && $2==8	leaving exg 4
 pat loc ror ($1&32)!=0 && $2==8		leaving loc (0-$1)&31 rol 8
@ -6391,6 +6715,55 @@ pat cmu zge $1==WORD_SIZE		call cmuzxx("bcc","bls")
 pat cmu zgt $1==WORD_SIZE		call cmuzxx("bhi","bcs")
 proc cmx8txx example cmi tlt
 with exact DD_REG4 DD_REG4 any4 any4
    uses reusing %3, DD_REG4 = %3
    gen sub_l %4, %2
 	subx_l %a, %1	/* keep overflow flag */
 	sxx[2] %1
 	neg_b %1	yields	{dreg1, %1}
 with D_REG4 any4-D_REG4 DD_REG4 DD_REG4
    /* only 3 of DD_REG4; may unstack %2 into AA_REG */
    gen sub_l %2, %4
 	subx_l %1, %3
 	sxx[1] %3
 	neg_b %3	yields	{dreg1, %3}
 pat cmi tlt $1==8			call cmx8txx("slt","sgt")
 pat cmi tle $1==8			call cmx8txx("sle","sge")
 pat cmi tge $1==8			call cmx8txx("sge","sle")
 pat cmi tgt $1==8			call cmx8txx("sgt","slt")
 pat cms teq $1==8			call cmx8txx("seq","seq")
 pat cms tne $1==8			call cmx8txx("sne","sne")
 pat cmu tlt $1==8			call cmx8txx("scs","shi")
 pat cmu tle $1==8			call cmx8txx("sls","scc")
 pat cmu tge $1==8			call cmx8txx("scc","sls")
 pat cmu tgt $1==8			call cmx8txx("shi","scs")
 proc cmx8zxx example cmi zlt
 with exact DD_REG4 DD_REG4 any4 any4
    kills ALL
    uses reusing %3, DD_REG4 = %3
    gen sub_l %4, %2
 	subx_l %a, %1
 	bxx[2] {llabel, $2}
 with D_REG4 any4-D_REG4 DD_REG4 DD_REG4 STACK
    gen sub_l %2, %4
 	subx_l %1, %3
 	bxx[1] {llabel, $2}
 pat cmi zlt $1==8			call cmx8zxx("blt","bgt")
 pat cmi zle $1==8			call cmx8zxx("ble","bge")
 pat cmi zge $1==8			call cmx8zxx("bge","ble")
 pat cmi zgt $1==8			call cmx8zxx("bgt","blt")
 pat cms zeq $1==8			call cmx8zxx("beq","beq")
 pat cms zne $1==8			call cmx8zxx("bne","bne")
 pat cmu zlt $1==8			call cmx8zxx("bcs","bhi")
 pat cmu zle $1==8			call cmx8zxx("bls","bcc")
 pat cmu zge $1==8			call cmx8zxx("bcc","bls")
 pat cmu zgt $1==8			call cmx8zxx("bhi","bcs")
 #if TBL68881
 proc cmf4zxx example cmf zlt
 with FS_REG FS_REG
@ -6630,6 +7003,33 @@ uses reusing %1,DD_REG4
 pat loc loc ciu $1==$2	/* skip this */
 pat loc loc cui $1==$2	/* skip this */
 pat loc loc cii $1==4 && $2==8
 with exact test_set1+test_set2
 			yields	%1 {zero_const, 0}
 with test_set4
    uses DD_REG4
    gen test %1
 	slt {dreg1, %a}
 #ifdef TBL68020
 	extb_l %a
 #else
 	ext_w %a
 	ext_l %a
 #endif
 			yields	%1 %a
 pat loc loc cii $1<4 && $2==8
 			leaving loc $1 loc 4 cii loc 4 loc 8 cii
 pat loc loc ciu $1==4 && $2==8		yields {zero_const, 0}
 pat loc loc cui $1==4 && $2==8		yields {zero_const, 0}
 pat loc loc cuu $1==4 && $2==8		yields {zero_const, 0}
 pat loc loc cii $1==8 && $2==4		leaving asp 4
 pat loc loc ciu $1==8 && $2==4		leaving asp 4
 pat loc loc cui $1==8 && $2==4		leaving asp 4
 pat loc loc cuu $1==8 && $2==4		leaving asp 4
 /* The following rules should be handled by the peephole optimizer, I think */
--- a/plat/linux68k/descr
+++ b/plat/linux68k/descr
@ -10,6 +10,8 @@ var s=2
 var sa={s}
 var l={w}
 var la={w}
 var q=8
 var qa=4
 var f={w}
 var fa={w}
 var d=8
@ -19,11 +21,12 @@ var xa={x}
 var ARCH=m68020
 var PLATFORM=linux68k
 var PLATFORMDIR={EM}/share/ack/{PLATFORM}
-var CPP_F=-D__unix -D__mc68020 -D__m68k -D__mc68000 -D__M68020
+var CPP_F=-D__unix -D__mc68020 -D__m68k -D__mc68000 -D__M68020 -D_EM_LLSIZE={q}
 var ALIGN=-a0:4 -a1:4 -a2:4 -a3:4 -b0:0x08000054
 var C_LIB={PLATFORMDIR}/libc-ansi.a
 # bitfields reversed for compatibility with (g)cc.
-var CC_ALIGN=-Vr
+# long long enabled.
 var CC_ALIGN=-Vrq{q}.{qa}
 var OLD_C_LIB={C_LIB}
 var MACHOPT_F=-ml10
 var EGO_PLAT_FLAGS=-M{EM}/share/ack/ego/{ARCH}.descr
--- a/plat/linux68k/tests/build.lua
+++ b/plat/linux68k/tests/build.lua
@ -6,6 +6,5 @@ plat_testsuite {
    method = "plat/linux68k/emu+emu68k",
    skipsets = {
        "floats", -- FPU instructions not supported by emulator
        "long-long",
    },
 }