Add 8-byte long long for linux68k.

Add rules for 8-byte integers to m68020 ncg.  Add 8-byte long long to
ACK C on linux68k.  Enable long-long tests for linux68k.  The tests
pass in our emulator using musahi; I don't have a real 68k processor
and haven't tried other emulators.

Still missing are conversions between 8-byte integers and any size of
floats.  The long-long tests don't cover these conversions, and our
emulator can't do floating-point.

Our build always enables TBL68020 and uses word size 4.  Without
TBL68020, 8-byte multiply and divide are missing.  With word size 2,
some conversions between 2-byte and 8-byte integers are missing.

Fix .cii in libem, which didn't work when converting from 1-byte or
2-byte integers.  Now .cii and .cuu work, but also add some rules to
skip .cii and .cuu when converting 8-byte integers.  The new rule for
loc 4 loc 8 cii `with test_set4` exposes a bug: the table may believe
that the condition codes test a 4-byte register when they only test a
word or byte, and this incorrect test may describe an unsigned word or
byte as negative.  Another rule `with exact test_set1+test_set2` works
around the bug by ignoring the negative flag, because a zero-extended
word or byte is never negative.

The old rules for comparison and logic do work with 8-byte integers
and bitsets, but add some specific 8-byte rules to skip libem calls or
loops.  There were no rules for 8-byte arithmetic, shift, or rotate;
so add some.  There is a register shortage, because the table requires
preserving d3 to d7, leaving only 3 data registers (d0, d1, d2) for
8-byte operations.  Because of the shortage, the code may move data to
an address register, or read a memory location more than once.

The multiplication and division code are translations of the i386
code.  They pass the tests, but might not give the best performance on
a real 68k processor.
This commit is contained in:
George Koehler 2019-09-24 13:32:17 -04:00
parent fd27acb487
commit e867861f6d
10 changed files with 608 additions and 12 deletions

View file

@ -2,7 +2,7 @@ for _, plat in ipairs(vars.plats) do
acklibrary { acklibrary {
name = "lib_"..plat, name = "lib_"..plat,
srcs = { srcs = {
"./*.s", "./*.s", -- added divrem8.s
"./*.c" "./*.c"
}, },
vars = { plat = plat }, vars = { plat = plat },

View file

@ -13,12 +13,19 @@
sub.l d0, sp ! pop extra bytes sub.l d0, sp ! pop extra bytes
jmp (a0) jmp (a0)
1: 1:
clr.l d1 move.l (sp), d1
tst.l (sp) lsr.l #1, d0
bne 4f bcs 1f ! branch if source size == 1
not.l d1 ! d1 contains sign of source lsr.l #1, d0
4: bcs 2f ! branch if source size == 2
asr.l #2, d0 tst.l d1
bra 4f
1: lsr.l #1, d0 ! size difference / 4
ext.w d1
2: ext.l d1
move.l d1, (sp)
4: slt d1
extb.l d1 ! d1 contains sign of source
sub.l #1, d0 sub.l #1, d0
2: 2:
move.l d1, -(sp) move.l d1, -(sp)

View file

@ -0,0 +1,76 @@
.define .divrem8
.sect .text
.sect .rom
.sect .data
.sect .bss
yh=16
yl=20
xh=24
xl=28
! This private sub for .dvi8, .dvu8, .rmi8, .rmu8
! does unsigned division of x = xh:xl by y = yh:yl,
! yields d0:d1 = quotient, d2:d3 = remainder.
.sect .text
.divrem8:
! Caller must set d0, d1 like so:
! mov.l (xh, sp), d0
! mov.l (yh, sp), d1
tst.l d1
bne 1f ! branch if y >= 2**32
! y = yl, so x / y = xh:xl / yl = qh:0 + rh:xl / yl
! where qh, rh are quotient, remainder from xh / yl.
move.l (xl, sp), d1
move.l (yl, sp), d2
clr.l d3 ! d3:d0 = xh
divu.l d2, d3:d0 ! d0 = 0:xh / yl, d3 = rh
divu.l d2, d3:d1 ! d1 = rh:xl / yl, so d0:d1 = x / y
clr.l d2 ! remainder in d2:d3
rts
1: ! Here y >= 2**32.
move.l d0, a0 ! save xh
move.l d1, a1 ! save yh
move.l d7, a2 ! save caller's d7
! Find y >> right in [2**31, 2**32).
move.l (yl, sp), d2
bfffo d1[0:32], d3 ! find highest set bit in yh
lsl.l d3, d1 ! shift yh left
bset #5, d3
neg.l d3 ! right = (32 - left) modulo 64
lsr.l d3, d2 ! shift yl right
or.l d1, d2 ! d2 = y >> right
! Estimate x / y as q = (x / (y >> right)) >> right.
move.l (xl, sp), d1
clr.l d7
divu.l d2, d7:d0
divu.l d2, d7:d1 ! d0:d1 = x / (y >> right)
lsr.l d3, d1
bset #5, d3
neg.l d3
lsl.l d3, d0
or.l d0, d1 ! d1 = q
! Calculate the remainder x - y * q. If the subtraction
! overflows, then the correct quotient is q - 1, else it is q.
move.l a1, d3 ! yh
mulu.l d1, d3 ! yh * q
move.l (yl, sp), d7
mulu.l d1, d0:d7 ! yl * q
add.l d3, d0 ! d0:d7 = y * q
move.l (xl, sp), d3
move.l a0, d2 ! d2:d3 = x
sub.l d7, d3
subx.l d0, d2 ! d2:d3 = x - y * q
bcc 1f ! branch unless subtraction overflowed
sub.l #1, d1 ! fix quotient
move.l a1, d7 ! yh
add.l (yl, sp), d3
addx.l d7, d2 ! fix remainder
1: clr.l d0 ! d0:d1 = quotient
move.l a2, d7 ! restore caller's d7
rts

34
mach/m68020/libem/dvi8.s Normal file
View file

@ -0,0 +1,34 @@
.define .dvi8
.sect .text
.sect .rom
.sect .data
.sect .bss
yh=8
yl=12
xh=16
xl=20
! .dvi8 yields d0:d1 = quotient from x / y
.sect .text
.dvi8:
move.l d3, -(sp) ! preserve caller's d3
clr.l d2 ! d2 = 0, non-negative result
move.l (xh, sp), d0 ! d0 for .divrem8
bge 1f
move.l #1, d2 ! d2 = 1, negative result
neg.l (xl, sp)
negx.l d0 ! x = absolute value
1: move.l (yh, sp), d1 ! d1 for .divrem8
bge 1f
bchg #0, d2 ! flip sign of result
neg.l (yl, sp)
negx.l d1 ! y = absolute value
1: move.l d2, -(sp)
jsr (.divrem8)
move.l (sp)+, d2
beq 1f ! branch unless result < 0
neg.l d1
negx.l d0 ! negate quotient d0:d1
1: move.l (sp)+, d3
rtd #16

20
mach/m68020/libem/dvu8.s Normal file
View file

@ -0,0 +1,20 @@
.define .dvu8
.sect .text
.sect .rom
.sect .data
.sect .bss
yh=8
xh=16
! .dvu8 yields d0:d1 = quotient from x / y
.sect .text
.dvu8:
move.l d3, -(sp) ! preserve caller's d3
move.l (xh, sp), d0
move.l (yh, sp), d1
sub.l #4, sp
jsr (.divrem8)
add.l #4, sp
move.l (sp)+, d3
rtd #16

35
mach/m68020/libem/rmi8.s Normal file
View file

@ -0,0 +1,35 @@
.define .rmi8
.sect .text
.sect .rom
.sect .data
.sect .bss
yh=8
yl=12
xh=16
xl=20
! .rmi8 yields d0:d1 = remainder from x / y
.sect .text
.rmi8:
move.l d3, -(sp) ! preserve caller's d3
clr.l d2 ! d2 = 0, non-negative result
move.l (xh, sp), d0 ! d0 for .divrem8
bge 1f
move.l #1, d2 ! d2 = 1, negative result
neg.l (xl, sp)
negx.l d0 ! x = absolute value
1: move.l (yh, sp), d1 ! d1 for .divrem8
bge 1f
neg.l (yl, sp)
negx.l d1 ! y = absolute value
1: move.l d2, -(sp)
jsr (.divrem8)
move.l (sp)+, d0
beq 1f ! branch unless result < 0
neg.l d3
negx.l d2 ! negate quotient d3:d2
1: move.l d3, d1
move.l d2, d0
move.l (sp)+, d3
rtd #16

22
mach/m68020/libem/rmu8.s Normal file
View file

@ -0,0 +1,22 @@
.define .rmu8
.sect .text
.sect .rom
.sect .data
.sect .bss
yh=8
xh=16
! .rmu8 yields d0:d1 = remainder from x / y
.sect .text
.rmu8:
move.l d3, -(sp) ! preserve caller's d3
move.l (xh, sp), d0
move.l (yh, sp), d1
sub.l #4, sp
jsr (.divrem8)
add.l #4, sp
move.l d3, d1
move.l d2, d0
move.l (sp)+, d3
rtd #16

View file

@ -612,6 +612,7 @@ add_l "add.l" conreg4:ro, alterable4:rw:cc cost(2,6).
add_w "add.w" any2:ro, D_REG+LOCAL:rw:cc cost(2,3). add_w "add.w" any2:ro, D_REG+LOCAL:rw:cc cost(2,3).
add_w "add.w" conreg2:ro, alterable2:rw:cc cost(2,6). add_w "add.w" conreg2:ro, alterable2:rw:cc cost(2,6).
#endif #endif
addx_l "addx.l" D_REG4:ro, D_REG4:rw kills :cc cost(2,3).
and_l "and.l" data4:ro, D_REG4:rw:cc cost(2,3). and_l "and.l" data4:ro, D_REG4:rw:cc cost(2,3).
and_l "and.l" D_REG4:ro, memalt4:rw:cc cost(2,6). and_l "and.l" D_REG4:ro, memalt4:rw:cc cost(2,6).
and_l "and.l" consts4:ro, datalt4:rw:cc cost(2,6). and_l "and.l" consts4:ro, datalt4:rw:cc cost(2,6).
@ -628,6 +629,7 @@ asr "asr #1," memalt2:rw:cc cost(2,4).
asl_w "asl.w" shconreg:ro, D_REG:rw:cc cost(2,5). asl_w "asl.w" shconreg:ro, D_REG:rw:cc cost(2,5).
asr_w "asr.w" shconreg:ro, D_REG:rw:cc cost(2,4). asr_w "asr.w" shconreg:ro, D_REG:rw:cc cost(2,4).
#endif #endif
bchg const:ro, D_REG:rw kills:cc cost(2,4).
bclr const:ro, D_REG:rw kills:cc cost(2,4). bclr const:ro, D_REG:rw kills:cc cost(2,4).
bra label cost(2,5). bra label cost(2,5).
bcc label cost(2,5). bcc label cost(2,5).
@ -671,14 +673,15 @@ eor_l "eor.l" conreg4:ro, datalt4:rw:cc cost(2,6).
#if WORD_SIZE==2 #if WORD_SIZE==2
eor_w "eor.w" conreg2:ro, datalt2:rw:cc cost(2,4). eor_w "eor.w" conreg2:ro, datalt2:rw:cc cost(2,4).
#endif #endif
exg genreg4:rw, genreg4:rw cost(2,3).
/* in the next two instructions: LOCAL only allowed if register var */ /* in the next two instructions: LOCAL only allowed if register var */
ext_l "ext.l" D_REG+LOCAL+D_REG4:rw:cc cost(2,2). ext_l "ext.l" D_REG+LOCAL+D_REG4:rw:cc cost(2,2).
ext_w "ext.w" D_REG+LOCAL+D_REG4:rw:cc cost(2,2). ext_w "ext.w" D_REG+LOCAL+D_REG4:rw:cc cost(2,2).
jmp address+control4 cost(2,0). jmp address+control4 cost(2,0).
jsr address+control4 kills :cc d0 d1 d2 a0 a1 cost(2,3). jsr address+control4 kills :cc d0 d1 d2 a0 a1 cost(2,3).
lea address+control4:ro, A_REG+areg:wo cost(2,0). lea address+control4:ro, A_REG+areg:wo cost(2,0).
lsl_l "lsl.l" shconreg:ro, D_REG4:rw:cc cost(2,4).
/* /*
lsl_l "lsl.l" shconreg:ro, D_REG:rw:cc cost(2,4).
lsl "lsl #1," memalt2:rw:cc cost(2,4). lsl "lsl #1," memalt2:rw:cc cost(2,4).
*/ */
lsr_l "lsr.l" shconreg:ro, D_REG4:rw:cc cost(2,4). lsr_l "lsr.l" shconreg:ro, D_REG4:rw:cc cost(2,4).
@ -709,6 +712,8 @@ neg_l "neg.l" memory4:rw:cc cost(2,6).
#if WORD_SIZE==2 #if WORD_SIZE==2
neg_w "neg.w" memory2:rw:cc cost(2,6). neg_w "neg.w" memory2:rw:cc cost(2,6).
#endif #endif
negx_l "negx.l" D_REG4:rw:cc cost(2,3).
negx_l "negx.l" memory4:rw:cc cost(2,6).
not_l "not.l" D_REG4:rw:cc cost(2,3). not_l "not.l" D_REG4:rw:cc cost(2,3).
not_l "not.l" memory4:rw:cc cost(2,6). not_l "not.l" memory4:rw:cc cost(2,6).
#if WORD_SIZE==2 #if WORD_SIZE==2
@ -733,6 +738,7 @@ ror_w "ror.w" shconreg:ro, D_REG:rw:cc cost(2,4).
#endif #endif
roxl "roxl #1," memalt2:rw:cc cost(2,4). roxl "roxl #1," memalt2:rw:cc cost(2,4).
roxr "roxr #1," memalt2:rw:cc cost(2,4). roxr "roxr #1," memalt2:rw:cc cost(2,4).
slt datalt1:rw cost(2,3).
sne datalt1:rw cost(2,3). sne datalt1:rw cost(2,3).
sub_l "sub.l" any4:ro, D_REG4:rw:cc cost(2,3). sub_l "sub.l" any4:ro, D_REG4:rw:cc cost(2,3).
sub_l "sub.l" any4+areg:ro, A_REG+areg:rw cost(2,3). sub_l "sub.l" any4+areg:ro, A_REG+areg:rw cost(2,3).
@ -740,6 +746,9 @@ sub_l "sub.l" conreg4:ro, alterable4:rw:cc cost(2,6).
#if WORD_SIZE==2 #if WORD_SIZE==2
sub_w "sub.w" any2:ro, D_REG+LOCAL:rw:cc cost(2,3). sub_w "sub.w" any2:ro, D_REG+LOCAL:rw:cc cost(2,3).
sub_w "sub.w" conreg2:ro, alterable2:rw:cc cost(2,6). sub_w "sub.w" conreg2:ro, alterable2:rw:cc cost(2,6).
#endif
subx_l "subx.l" D_REG4:ro, D_REG4:rw kills :cc cost(2,3).
#if WORD_SIZE==2
/* On a swap, we only want the lower part of D_REG, so don't set cc */ /* On a swap, we only want the lower part of D_REG, so don't set cc */
swap D_REG:rw kills :cc cost(2,2). swap D_REG:rw kills :cc cost(2,2).
#endif #endif
@ -773,6 +782,7 @@ divs_l "divs.l" data4:ro, D_REG4:rw:cc cost(2,90).
divu_l "divu.l" data4:ro, D_REG4:rw:cc cost(2,78). divu_l "divu.l" data4:ro, D_REG4:rw:cc cost(2,78).
divsl_l "divsl.l" data4:ro, DREG_pair:rw kills :cc cost(2,90). divsl_l "divsl.l" data4:ro, DREG_pair:rw kills :cc cost(2,90).
divul_l "divul.l" data4:ro, DREG_pair:rw kills :cc cost(2,78). divul_l "divul.l" data4:ro, DREG_pair:rw kills :cc cost(2,78).
mulu_l "mulu.l" data4:ro, DREG_pair:rw kills :cc cost(2,44).
pea address+control4+regX cost(2,4). pea address+control4+regX cost(2,4).
#if WORD_SIZE==2 #if WORD_SIZE==2
cmp2_w "cmp2.w" address+control2:ro, genreg2:ro kills :cc cost(2,18). cmp2_w "cmp2.w" address+control2:ro, genreg2:ro kills :cc cost(2,18).
@ -3796,6 +3806,18 @@ with exact any4 STACK
gen add_l {post_inc4, sp}, %a gen add_l {post_inc4, sp}, %a
yields %a yields %a
pat adi $1==8
with exact any4 any4 DD_REG4 DD_REG4
uses reusing %1, DD_REG4 = %1
gen add_l %2, %4
addx_l %a, %3 yields %4 %3
with DD_REG4 DD_REG4 D_REG4 any4
gen add_l %4, %2
addx_l %3, %1 yields %2 %1
with DD_REG4 DD_REG4 D_REG4 STACK
gen add_l {post_inc4, sp}, %2
addx_l %3, %1 yields %2 %1
#if WORD_SIZE==2 #if WORD_SIZE==2
pat sbi $1==2 pat sbi $1==2
with any2-bconst DD_REG with any2-bconst DD_REG
@ -3822,6 +3844,12 @@ with exact any4 STACK
with any4-bconst4 AA_REG with any4-bconst4 AA_REG
gen sub_l %1, %2 yields %2 gen sub_l %1, %2 yields %2
pat sbi $1==8
with D_REG4 any4-D_REG4 DD_REG4 DD_REG4
/* only 3 of DD_REG4; may unstack %2 into AA_REG */
gen sub_l %2, %4
subx_l %1, %3 yields %4 %3
#if WORD_SIZE==2 #if WORD_SIZE==2
pat loc loc cii ldc mli $1==2 && $2==4 && highw($4)==0 && loww($4)>0 && $5==4 pat loc loc cii ldc mli $1==2 && $2==4 && highw($4)==0 && loww($4)>0 && $5==4
with any2-pre_post with any2-pre_post
@ -3847,6 +3875,34 @@ with STACK
yields dl1 yields dl1
#endif #endif
#ifdef TBL68020
pat mli $1==8
with exact data4 data4 DD_REG4 DD_REG4 /* yh yl xh xl */
uses DD_REG4 = %4
gen mulu_l %1, %a /* xl * yh */
mulu_l %2, %3 /* xh * yl */
add_l %3, %a
mulu_l %2, {DREG_pair, %3, %4} /* xl * yl */
add_l %a, %3
yields %4 %3
with DD_REG4 DD_REG4 data4 data4 /* yh yl xh xl */
uses DD_REG = %2
gen mulu_l %3, %a /* yl * xh */
mulu_l %4, %1 /* yh * xl */
add_l %1, %a
mulu_l %4, {DREG_pair, %1, %2} /* yl * xl */
add_l %a, %1
yields %2 %1
with DD_REG4 DD_REG4 STACK /* yh yl xh xl */
uses DD_REG4 = %2
gen mulu_l {post_inc4, sp}, %a /* yl * xh */
mulu_l {indirect4, sp}, %1 /* yh * xl */
add_l %1, %a
mulu_l {post_inc4, sp}, {DREG_pair, %1, %2} /* yl * xl */
add_l %a, %1
yields %2 %1
#endif /* TBL68020 */
#if WORD_SIZE==2 #if WORD_SIZE==2
pat dvi $1==2 pat dvi $1==2
with data2-sconsts DD_REG with data2-sconsts DD_REG
@ -3866,6 +3922,14 @@ with STACK
yields dl1 yields dl1
#endif /* TBL68020 */ #endif /* TBL68020 */
#ifdef TBL68020
pat dvi $1==8
with STACK
kills ALL
gen jsr {absolute4, ".dvi8"}
yields dl1 dl0
#endif /* TBL68020 */
#if WORD_SIZE==2 #if WORD_SIZE==2
pat rmi $1==2 pat rmi $1==2
with data2-sconsts DD_REG with data2-sconsts DD_REG
@ -3891,6 +3955,14 @@ with STACK
yields dl2 yields dl2
#endif /* TBL68020 */ #endif /* TBL68020 */
#ifdef TBL68020
pat rmi $1==8
with STACK
kills ALL
gen jsr {absolute4, ".rmi8"}
yields dl1 dl0
#endif /* TBL68020 */
#if WORD_SIZE==2 #if WORD_SIZE==2
pat ngi $1==2 pat ngi $1==2
with DD_REG with DD_REG
@ -3901,6 +3973,11 @@ pat ngi $1==4
with DD_REG4 with DD_REG4
gen neg_l %1 yields %1 gen neg_l %1 yields %1
pat ngi $1==8
with DD_REG4 DD_REG4
gen neg_l %2
negx_l %1 yields %2 %1
#if WORD_SIZE==2 #if WORD_SIZE==2
pat sli $1==2 pat sli $1==2
with shconreg DD_REG with shconreg DD_REG
@ -3911,6 +3988,43 @@ pat sli $1==4
with shconreg DD_REG4 with shconreg DD_REG4
gen asl_l %1, %2 yields %2 gen asl_l %1, %2 yields %2
pat sli $1==8
with DD_REG4 DD_REG4 DD_REG4
uses AA_REG = %3 /* no 4th DD_REG */
gen lsl_l %1, %3
lsl_l %1, %2 /* shift by %1 modulo 64 */
bchg {const, 5}, %1
bne {slabel, 1f} /* jump if shift >= 32 */
neg_l %1
exg %a, %3
lsr_l %1, %3 /* (32 - shift) modulo 64 */
or_l %3, %2 /* shift bits from %3 to %2 */
move %a, %3
bra {slabel, 2f}
1:
move %a, %2
lsl_l %1, %2 /* (shift - 32) modulo 64 */
2: yields %3 %2
pat loc sli ($1&32)==0 && $2==8
with DD_REG4 DD_REG4
uses AA_REG = %2, DD_REG = {bconst, $1&31}
gen lsl_l %b, %2
lsl_l %b, %1
bset {const, 5}, %b
neg_l %b
exg %a, %2
lsr_l %b, %2
or_l %2, %1
move %a, %2
yields %2 %1
pat loc sli ($1&63)==32 && $2==8
with any4 any4 yields {zero_const, 0} %2
pat loc sli ($1&32)!=0 && $2==8
with any4 DD_REG4
uses reusing %1, DD_REG = {bconst, $1&31}
gen lsl_l %a, %2 yields {zero_const, 0} %2
#if WORD_SIZE==2 #if WORD_SIZE==2
pat sri $1==2 pat sri $1==2
with shconreg DD_REG with shconreg DD_REG
@ -3921,6 +4035,43 @@ pat sri $1==4
with shconreg DD_REG4 with shconreg DD_REG4
gen asr_l %1, %2 yields %2 gen asr_l %1, %2 yields %2
pat sri $1==8
with DD_REG4 DD_REG4 DD_REG4
uses AA_REG = %2 /* no 4th DD_REG */
gen asr_l %1, %2
lsr_l %1, %3 /* shift by %1 modulo 64 */
bchg {const, 5}, %1
bne {slabel, 1f} /* jump if shift >= 32 */
neg_l %1
exg %a, %2
lsl_l %1, %2 /* (32 - shift) modulo 64 */
or_l %2, %3 /* shift bits from %2 to %3 */
move %a, %2
bra {slabel, 2f}
1:
move %a, %3
asr_l %1, %3 /* (shift - 32) modulo 64 */
2: yields %3 %2
pat loc sri ($1&32)==0 && $2==8
with DD_REG4 DD_REG4
uses AA_REG = %1, DD_REG = {bconst, $1&31}
gen asr_l %b, %1
lsr_l %b, %2
bset {const, 5}, %b
neg_l %b
exg %a, %1
lsl_l %b, %1
or_l %1, %2
move %a, %1
yields %2 %1
pat loc sri ($1&63)==32 && $2==8
with DD_REG4 any4 yields %1 leaving loc 4 loc 8 cii
pat loc sri ($1&32)!=0 && $2==8
with DD_REG4 any4
uses reusing %2, DD_REG = {bconst, $1&31}
gen asr_l %a, %1 yields %1 leaving loc 4 loc 8 cii
/************************************************ /************************************************
* Group 4: unsigned arithmetic. * * Group 4: unsigned arithmetic. *
************************************************/ ************************************************/
@ -3947,6 +4098,8 @@ with STACK
yields dl1 yields dl1
#endif /* TBL68020 */ #endif /* TBL68020 */
pat mlu $1==8 leaving mli 8
#if WORD_SIZE==2 #if WORD_SIZE==2
pat dvu $1==2 pat dvu $1==2
with data2-sconsts data2 with data2-sconsts data2
@ -3966,6 +4119,14 @@ with STACK
yields dl1 yields dl1
#endif /* TBL68020 */ #endif /* TBL68020 */
#ifdef TBL68020
pat dvu $1==8
with STACK
kills ALL
gen jsr {absolute4, ".dvu8"}
yields dl1 dl0
#endif /* TBL68020 */
#if WORD_SIZE==2 #if WORD_SIZE==2
pat rmu $1==2 pat rmu $1==2
with data2-sconsts data2 with data2-sconsts data2
@ -3992,8 +4153,18 @@ with STACK
yields dl2 yields dl2
#endif /* TBL68020 */ #endif /* TBL68020 */
#ifdef TBL68020
pat rmu $1==8
with STACK
kills ALL
gen jsr {absolute4, ".rmu8"}
yields dl1 dl0
#endif /* TBL68020 */
pat slu leaving sli $1 pat slu leaving sli $1
pat loc slu $2==8 leaving loc $1 sli 8
#if WORD_SIZE==2 #if WORD_SIZE==2
pat sru $1==2 pat sru $1==2
with shconreg DD_REG with shconreg DD_REG
@ -4004,6 +4175,43 @@ pat sru $1==4
with shconreg DD_REG4 with shconreg DD_REG4
gen lsr_l %1, %2 yields %2 gen lsr_l %1, %2 yields %2
pat sru $1==8
with DD_REG4 DD_REG4 DD_REG4
uses AA_REG = %2 /* no 4th DD_REG */
gen lsr_l %1, %2
lsr_l %1, %3 /* shift by %1 modulo 64 */
bchg {const, 5}, %1
bne {slabel, 1f} /* jump if shift >= 32 */
neg_l %1
exg %a, %2
lsl_l %1, %2 /* (32 - shift) modulo 64 */
or_l %2, %3 /* shift bits from %2 to %3 */
move %a, %2
bra {slabel, 2f}
1:
move %a, %3
lsr_l %1, %3 /* (shift - 32) modulo 64 */
2: yields %3 %2
pat loc sru ($1&32)==0 && $2==8
with DD_REG4 DD_REG4
uses AA_REG = %2, DD_REG = {bconst, $1&31}
gen lsr_l %b, %1
lsr_l %b, %2
bset {const, 5}, %b
neg_l %b
exg %a, %1
lsl_l %b, %1
or_l %1, %2
move %a, %1
yields %2 %1
pat loc sru ($1&63)==32 && $2==8
with any4 any4 yields %1 {zero_const, 0}
pat loc sru ($1&32)!=0 && $2==8
with DD_REG4 any4
uses reusing %2, DD_REG = {bconst, $1&31}
gen lsr_l %a, %1 yields %1 {zero_const, 0}
/************************************************ /************************************************
* Group 5: floating point arithmetic * * Group 5: floating point arithmetic *
************************************************/ ************************************************/
@ -4753,6 +4961,17 @@ with exact any_int STACK
uses reusing %1,DD_REG=%1 uses reusing %1,DD_REG=%1
gen xxx* {post_inc_int, sp}, %a yields %a gen xxx* {post_inc_int, sp}, %a yields %a
proc log8 example and
with exact data4 data4 DD_REG4 DD_REG4
gen xxx* %1, %3
xxx* %2, %4 yields %4 %3
with DD_REG4 DD_REG4 data4 data4
gen xxx* %3, %1
xxx* %4, %2 yields %2 %1
with DD_REG4 DD_REG4 STACK
gen xxx* {post_inc4, sp}, %1
xxx* {post_inc4, sp}, %2 yields %2 %1
proc logdef example and proc logdef example and
with STACK with STACK
uses DD_REG4 = {const, $1/WORD_SIZE -1}, uses DD_REG4 = {const, $1/WORD_SIZE -1},
@ -4813,6 +5032,7 @@ pat and $1==WORD_SIZE call logw(AND_I)
#if WORD_SIZE==2 #if WORD_SIZE==2
pat and $1==2*WORD_SIZE call log2w("and.l") pat and $1==2*WORD_SIZE call log2w("and.l")
#endif #endif
pat and $1==8 call log8("and.l")
pat and $1>4 && $1/WORD_SIZE<=65536 call logdef(AND_I) pat and $1>4 && $1/WORD_SIZE<=65536 call logdef(AND_I)
pat and defined($1) call logbdef(AND_I) pat and defined($1) call logbdef(AND_I)
pat and !defined($1) call logndef(AND_I) pat and !defined($1) call logndef(AND_I)
@ -4821,6 +5041,7 @@ pat ior $1==WORD_SIZE call logw(OR_I)
#if WORD_SIZE==2 #if WORD_SIZE==2
pat ior $1==2*WORD_SIZE call log2w("or.l") pat ior $1==2*WORD_SIZE call log2w("or.l")
#endif #endif
pat ior $1==8 call log8("or.l")
pat ior $1>2 && $1/WORD_SIZE<=65536 call logdef(OR_I) pat ior $1>2 && $1/WORD_SIZE<=65536 call logdef(OR_I)
pat ior defined($1) call logbdef(OR_I) pat ior defined($1) call logbdef(OR_I)
pat ior !defined($1) call logndef(OR_I) pat ior !defined($1) call logndef(OR_I)
@ -4835,6 +5056,21 @@ pat xor $1==4
with DD_REG4 conreg4-bconst4 with DD_REG4 conreg4-bconst4
gen eor_l %2, %1 yields %1 gen eor_l %2, %1 yields %1
pat xor $1==8
with exact any4 any4 DD_REG4 DD_REG4
uses reusing %1, DD_REG4 = %1
gen eor_l %a, %3
move %2, %a
eor_l %a, %4 yields %4 %3
with DD_REG4 DD_REG4 D_REG4 any4
gen eor_l %3, %1
move %4, %3
eor_l %3, %2 yields %2 %1
with DD_REG4 DD_REG4 DD_REG4 STACK
gen eor_l %3, %1
move_l {post_inc4, sp}, %3
eor_l %3, %2 yields %2 %1
pat xor $1>4 && $1/WORD_SIZE<=65536 call logdef(EOR_I) pat xor $1>4 && $1/WORD_SIZE<=65536 call logdef(EOR_I)
pat xor defined($1) call logbdef(EOR_I) pat xor defined($1) call logbdef(EOR_I)
pat xor !defined($1) call logndef(EOR_I) pat xor !defined($1) call logndef(EOR_I)
@ -4907,6 +5143,50 @@ pat rol $1==4
with shconreg DD_REG4 with shconreg DD_REG4
gen rol_l %1, %2 yields %2 gen rol_l %1, %2 yields %2
pat rol $1==8
with DD_REG4 DD_REG4 DD_REG4
uses AA_REG, AA_REG /* no 4th DD_REG */
gen bclr {const, 5}, %1
beq {slabel, 1f}
exg %2, %3 /* rotate left 32 */
1:
move %2, %a
move %3, %b
lsl_l %1, %2
lsl_l %1, %3
bset {const, 5}, %1
neg_l %1 /* (32 - shift) modulo 64 */
exg %a, %2
lsr_l %1, %2
or_l %2, %3 /* rotate bits from %2 to %3 */
move %a, %2
exg %b, %3
lsr_l %1, %3
or_l %3, %2 /* rotate bits from %3 to %2 */
move %b, %3
yields %3 %2
pat loc rol ($1&32)==0 && $2==8
with DD_REG4 DD_REG4
uses AA_REG, AA_REG, DD_REG = {bconst, $1&31}
gen move %1, %a
move %2, %b
lsl_l %c, %1
lsl_l %c, %2
bset {const, 5}, %c
neg_l %c
exg %a, %1
lsr_l %c, %1
or_l %1, %2
move %a, %1
exg %b, %2
lsr_l %c, %2
or_l %2, %1
move %b, %2
yields %2 %1
pat loc rol ($1&63)==32 && $2==8 leaving exg 4
pat loc rol ($1&32)!=0 && $2==8 leaving loc (0-$1)&31 ror 8
#if WORD_SIZE==2 #if WORD_SIZE==2
pat ror $1==2 pat ror $1==2
with shconreg DD_REG with shconreg DD_REG
@ -4917,6 +5197,50 @@ pat ror $1==4
with shconreg DD_REG4 with shconreg DD_REG4
gen ror_l %1, %2 yields %2 gen ror_l %1, %2 yields %2
pat ror $1==8
with DD_REG4 DD_REG4 DD_REG4
uses AA_REG, AA_REG /* no 4th DD_REG */
gen bclr {const, 5}, %1
beq {slabel, 1f}
exg %2, %3 /* rotate right 32 */
1:
move %2, %a
move %3, %b
lsr_l %1, %2
lsr_l %1, %3
bset {const, 5}, %1
neg_l %1 /* (32 - shift) modulo 64 */
exg %a, %2
lsl_l %1, %2
or_l %2, %3 /* rotate bits from %2 to %3 */
move %a, %2
exg %b, %3
lsl_l %1, %3
or_l %3, %2 /* rotate bits from %3 to %2 */
move %b, %3
yields %3 %2
pat loc ror ($1&32)==0 && $2==8
with DD_REG4 DD_REG4
uses AA_REG, AA_REG, DD_REG = {bconst, $1&31}
gen move %1, %a
move %2, %b
lsr_l %c, %1
lsr_l %c, %2
bset {const, 5}, %c
neg_l %c
exg %a, %1
lsl_l %c, %1
or_l %1, %2
move %a, %1
exg %b, %2
lsl_l %c, %2
or_l %2, %1
move %b, %2
yields %2 %1
pat loc ror ($1&63)==32 && $2==8 leaving exg 4
pat loc ror ($1&32)!=0 && $2==8 leaving loc (0-$1)&31 rol 8
@ -6391,6 +6715,55 @@ pat cmu zge $1==WORD_SIZE call cmuzxx("bcc","bls")
pat cmu zgt $1==WORD_SIZE call cmuzxx("bhi","bcs") pat cmu zgt $1==WORD_SIZE call cmuzxx("bhi","bcs")
proc cmx8txx example cmi tlt
with exact DD_REG4 DD_REG4 any4 any4
uses reusing %3, DD_REG4 = %3
gen sub_l %4, %2
subx_l %a, %1 /* keep overflow flag */
sxx[2] %1
neg_b %1 yields {dreg1, %1}
with D_REG4 any4-D_REG4 DD_REG4 DD_REG4
/* only 3 of DD_REG4; may unstack %2 into AA_REG */
gen sub_l %2, %4
subx_l %1, %3
sxx[1] %3
neg_b %3 yields {dreg1, %3}
pat cmi tlt $1==8 call cmx8txx("slt","sgt")
pat cmi tle $1==8 call cmx8txx("sle","sge")
pat cmi tge $1==8 call cmx8txx("sge","sle")
pat cmi tgt $1==8 call cmx8txx("sgt","slt")
pat cms teq $1==8 call cmx8txx("seq","seq")
pat cms tne $1==8 call cmx8txx("sne","sne")
pat cmu tlt $1==8 call cmx8txx("scs","shi")
pat cmu tle $1==8 call cmx8txx("sls","scc")
pat cmu tge $1==8 call cmx8txx("scc","sls")
pat cmu tgt $1==8 call cmx8txx("shi","scs")
proc cmx8zxx example cmi zlt
with exact DD_REG4 DD_REG4 any4 any4
kills ALL
uses reusing %3, DD_REG4 = %3
gen sub_l %4, %2
subx_l %a, %1
bxx[2] {llabel, $2}
with D_REG4 any4-D_REG4 DD_REG4 DD_REG4 STACK
gen sub_l %2, %4
subx_l %1, %3
bxx[1] {llabel, $2}
pat cmi zlt $1==8 call cmx8zxx("blt","bgt")
pat cmi zle $1==8 call cmx8zxx("ble","bge")
pat cmi zge $1==8 call cmx8zxx("bge","ble")
pat cmi zgt $1==8 call cmx8zxx("bgt","blt")
pat cms zeq $1==8 call cmx8zxx("beq","beq")
pat cms zne $1==8 call cmx8zxx("bne","bne")
pat cmu zlt $1==8 call cmx8zxx("bcs","bhi")
pat cmu zle $1==8 call cmx8zxx("bls","bcc")
pat cmu zge $1==8 call cmx8zxx("bcc","bls")
pat cmu zgt $1==8 call cmx8zxx("bhi","bcs")
#if TBL68881 #if TBL68881
proc cmf4zxx example cmf zlt proc cmf4zxx example cmf zlt
with FS_REG FS_REG with FS_REG FS_REG
@ -6630,6 +7003,33 @@ uses reusing %1,DD_REG4
pat loc loc ciu $1==$2 /* skip this */ pat loc loc ciu $1==$2 /* skip this */
pat loc loc cui $1==$2 /* skip this */ pat loc loc cui $1==$2 /* skip this */
pat loc loc cii $1==4 && $2==8
with exact test_set1+test_set2
yields %1 {zero_const, 0}
with test_set4
uses DD_REG4
gen test %1
slt {dreg1, %a}
#ifdef TBL68020
extb_l %a
#else
ext_w %a
ext_l %a
#endif
yields %1 %a
pat loc loc cii $1<4 && $2==8
leaving loc $1 loc 4 cii loc 4 loc 8 cii
pat loc loc ciu $1==4 && $2==8 yields {zero_const, 0}
pat loc loc cui $1==4 && $2==8 yields {zero_const, 0}
pat loc loc cuu $1==4 && $2==8 yields {zero_const, 0}
pat loc loc cii $1==8 && $2==4 leaving asp 4
pat loc loc ciu $1==8 && $2==4 leaving asp 4
pat loc loc cui $1==8 && $2==4 leaving asp 4
pat loc loc cuu $1==8 && $2==4 leaving asp 4
/* The following rules should be handled by the peephole optimizer, I think */ /* The following rules should be handled by the peephole optimizer, I think */

View file

@ -10,6 +10,8 @@ var s=2
var sa={s} var sa={s}
var l={w} var l={w}
var la={w} var la={w}
var q=8
var qa=4
var f={w} var f={w}
var fa={w} var fa={w}
var d=8 var d=8
@ -19,11 +21,12 @@ var xa={x}
var ARCH=m68020 var ARCH=m68020
var PLATFORM=linux68k var PLATFORM=linux68k
var PLATFORMDIR={EM}/share/ack/{PLATFORM} var PLATFORMDIR={EM}/share/ack/{PLATFORM}
var CPP_F=-D__unix -D__mc68020 -D__m68k -D__mc68000 -D__M68020 var CPP_F=-D__unix -D__mc68020 -D__m68k -D__mc68000 -D__M68020 -D_EM_LLSIZE={q}
var ALIGN=-a0:4 -a1:4 -a2:4 -a3:4 -b0:0x08000054 var ALIGN=-a0:4 -a1:4 -a2:4 -a3:4 -b0:0x08000054
var C_LIB={PLATFORMDIR}/libc-ansi.a var C_LIB={PLATFORMDIR}/libc-ansi.a
# bitfields reversed for compatibility with (g)cc. # bitfields reversed for compatibility with (g)cc.
var CC_ALIGN=-Vr # long long enabled.
var CC_ALIGN=-Vrq{q}.{qa}
var OLD_C_LIB={C_LIB} var OLD_C_LIB={C_LIB}
var MACHOPT_F=-ml10 var MACHOPT_F=-ml10
var EGO_PLAT_FLAGS=-M{EM}/share/ack/ego/{ARCH}.descr var EGO_PLAT_FLAGS=-M{EM}/share/ack/ego/{ARCH}.descr

View file

@ -6,6 +6,5 @@ plat_testsuite {
method = "plat/linux68k/emu+emu68k", method = "plat/linux68k/emu+emu68k",
skipsets = { skipsets = {
"floats", -- FPU instructions not supported by emulator "floats", -- FPU instructions not supported by emulator
"long-long",
}, },
} }