From e867861f6d84947514a90396d426a6472ffc3131 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Tue, 24 Sep 2019 13:32:17 -0400 Subject: [PATCH] Add 8-byte long long for linux68k. Add rules for 8-byte integers to m68020 ncg. Add 8-byte long long to ACK C on linux68k. Enable long-long tests for linux68k. The tests pass in our emulator using musahi; I don't have a real 68k processor and haven't tried other emulators. Still missing are conversions between 8-byte integers and any size of floats. The long-long tests don't cover these conversions, and our emulator can't do floating-point. Our build always enables TBL68020 and uses word size 4. Without TBL68020, 8-byte multiply and divide are missing. With word size 2, some conversions between 2-byte and 8-byte integers are missing. Fix .cii in libem, which didn't work when converting from 1-byte or 2-byte integers. Now .cii and .cuu work, but also add some rules to skip .cii and .cuu when converting 8-byte integers. The new rule for loc 4 loc 8 cii `with test_set4` exposes a bug: the table may believe that the condition codes test a 4-byte register when they only test a word or byte, and this incorrect test may describe an unsigned word or byte as negative. Another rule `with exact test_set1+test_set2` works around the bug by ignoring the negative flag, because a zero-extended word or byte is never negative. The old rules for comparison and logic do work with 8-byte integers and bitsets, but add some specific 8-byte rules to skip libem calls or loops. There were no rules for 8-byte arithmetic, shift, or rotate; so add some. There is a register shortage, because the table requires preserving d3 to d7, leaving only 3 data registers (d0, d1, d2) for 8-byte operations. Because of the shortage, the code may move data to an address register, or read a memory location more than once. The multiplication and division code are translations of the i386 code. They pass the tests, but might not give the best performance on a real 68k processor. --- mach/m68020/libem/build.lua | 2 +- mach/m68020/libem/cii.s | 19 +- mach/m68020/libem/divrem8.s | 76 +++++++ mach/m68020/libem/dvi8.s | 34 +++ mach/m68020/libem/dvu8.s | 20 ++ mach/m68020/libem/rmi8.s | 35 +++ mach/m68020/libem/rmu8.s | 22 ++ mach/m68020/ncg/table | 404 +++++++++++++++++++++++++++++++++- plat/linux68k/descr | 7 +- plat/linux68k/tests/build.lua | 1 - 10 files changed, 608 insertions(+), 12 deletions(-) create mode 100644 mach/m68020/libem/divrem8.s create mode 100644 mach/m68020/libem/dvi8.s create mode 100644 mach/m68020/libem/dvu8.s create mode 100644 mach/m68020/libem/rmi8.s create mode 100644 mach/m68020/libem/rmu8.s diff --git a/mach/m68020/libem/build.lua b/mach/m68020/libem/build.lua index d17adcd92..d5c9af8ad 100644 --- a/mach/m68020/libem/build.lua +++ b/mach/m68020/libem/build.lua @@ -2,7 +2,7 @@ for _, plat in ipairs(vars.plats) do acklibrary { name = "lib_"..plat, srcs = { - "./*.s", + "./*.s", -- added divrem8.s "./*.c" }, vars = { plat = plat }, diff --git a/mach/m68020/libem/cii.s b/mach/m68020/libem/cii.s index 01757dfc5..b3dd8c0be 100644 --- a/mach/m68020/libem/cii.s +++ b/mach/m68020/libem/cii.s @@ -13,12 +13,19 @@ sub.l d0, sp ! pop extra bytes jmp (a0) 1: - clr.l d1 - tst.l (sp) - bne 4f - not.l d1 ! d1 contains sign of source -4: - asr.l #2, d0 + move.l (sp), d1 + lsr.l #1, d0 + bcs 1f ! branch if source size == 1 + lsr.l #1, d0 + bcs 2f ! branch if source size == 2 + tst.l d1 + bra 4f +1: lsr.l #1, d0 ! size difference / 4 + ext.w d1 +2: ext.l d1 + move.l d1, (sp) +4: slt d1 + extb.l d1 ! d1 contains sign of source sub.l #1, d0 2: move.l d1, -(sp) diff --git a/mach/m68020/libem/divrem8.s b/mach/m68020/libem/divrem8.s new file mode 100644 index 000000000..557924098 --- /dev/null +++ b/mach/m68020/libem/divrem8.s @@ -0,0 +1,76 @@ +.define .divrem8 +.sect .text +.sect .rom +.sect .data +.sect .bss + +yh=16 +yl=20 +xh=24 +xl=28 + ! This private sub for .dvi8, .dvu8, .rmi8, .rmu8 + ! does unsigned division of x = xh:xl by y = yh:yl, + ! yields d0:d1 = quotient, d2:d3 = remainder. + +.sect .text +.divrem8: + ! Caller must set d0, d1 like so: + ! mov.l (xh, sp), d0 + ! mov.l (yh, sp), d1 + tst.l d1 + bne 1f ! branch if y >= 2**32 + + ! y = yl, so x / y = xh:xl / yl = qh:0 + rh:xl / yl + ! where qh, rh are quotient, remainder from xh / yl. + move.l (xl, sp), d1 + move.l (yl, sp), d2 + clr.l d3 ! d3:d0 = xh + divu.l d2, d3:d0 ! d0 = 0:xh / yl, d3 = rh + divu.l d2, d3:d1 ! d1 = rh:xl / yl, so d0:d1 = x / y + clr.l d2 ! remainder in d2:d3 + rts + +1: ! Here y >= 2**32. + move.l d0, a0 ! save xh + move.l d1, a1 ! save yh + move.l d7, a2 ! save caller's d7 + + ! Find y >> right in [2**31, 2**32). + move.l (yl, sp), d2 + bfffo d1[0:32], d3 ! find highest set bit in yh + lsl.l d3, d1 ! shift yh left + bset #5, d3 + neg.l d3 ! right = (32 - left) modulo 64 + lsr.l d3, d2 ! shift yl right + or.l d1, d2 ! d2 = y >> right + + ! Estimate x / y as q = (x / (y >> right)) >> right. + move.l (xl, sp), d1 + clr.l d7 + divu.l d2, d7:d0 + divu.l d2, d7:d1 ! d0:d1 = x / (y >> right) + lsr.l d3, d1 + bset #5, d3 + neg.l d3 + lsl.l d3, d0 + or.l d0, d1 ! d1 = q + + ! Calculate the remainder x - y * q. If the subtraction + ! overflows, then the correct quotient is q - 1, else it is q. + move.l a1, d3 ! yh + mulu.l d1, d3 ! yh * q + move.l (yl, sp), d7 + mulu.l d1, d0:d7 ! yl * q + add.l d3, d0 ! d0:d7 = y * q + move.l (xl, sp), d3 + move.l a0, d2 ! d2:d3 = x + sub.l d7, d3 + subx.l d0, d2 ! d2:d3 = x - y * q + bcc 1f ! branch unless subtraction overflowed + sub.l #1, d1 ! fix quotient + move.l a1, d7 ! yh + add.l (yl, sp), d3 + addx.l d7, d2 ! fix remainder +1: clr.l d0 ! d0:d1 = quotient + move.l a2, d7 ! restore caller's d7 + rts diff --git a/mach/m68020/libem/dvi8.s b/mach/m68020/libem/dvi8.s new file mode 100644 index 000000000..03fc3e985 --- /dev/null +++ b/mach/m68020/libem/dvi8.s @@ -0,0 +1,34 @@ +.define .dvi8 +.sect .text +.sect .rom +.sect .data +.sect .bss + +yh=8 +yl=12 +xh=16 +xl=20 + ! .dvi8 yields d0:d1 = quotient from x / y + +.sect .text +.dvi8: + move.l d3, -(sp) ! preserve caller's d3 + clr.l d2 ! d2 = 0, non-negative result + move.l (xh, sp), d0 ! d0 for .divrem8 + bge 1f + move.l #1, d2 ! d2 = 1, negative result + neg.l (xl, sp) + negx.l d0 ! x = absolute value +1: move.l (yh, sp), d1 ! d1 for .divrem8 + bge 1f + bchg #0, d2 ! flip sign of result + neg.l (yl, sp) + negx.l d1 ! y = absolute value +1: move.l d2, -(sp) + jsr (.divrem8) + move.l (sp)+, d2 + beq 1f ! branch unless result < 0 + neg.l d1 + negx.l d0 ! negate quotient d0:d1 +1: move.l (sp)+, d3 + rtd #16 diff --git a/mach/m68020/libem/dvu8.s b/mach/m68020/libem/dvu8.s new file mode 100644 index 000000000..00ec6b552 --- /dev/null +++ b/mach/m68020/libem/dvu8.s @@ -0,0 +1,20 @@ +.define .dvu8 +.sect .text +.sect .rom +.sect .data +.sect .bss + +yh=8 +xh=16 + ! .dvu8 yields d0:d1 = quotient from x / y + +.sect .text +.dvu8: + move.l d3, -(sp) ! preserve caller's d3 + move.l (xh, sp), d0 + move.l (yh, sp), d1 + sub.l #4, sp + jsr (.divrem8) + add.l #4, sp + move.l (sp)+, d3 + rtd #16 diff --git a/mach/m68020/libem/rmi8.s b/mach/m68020/libem/rmi8.s new file mode 100644 index 000000000..ffb672b2c --- /dev/null +++ b/mach/m68020/libem/rmi8.s @@ -0,0 +1,35 @@ +.define .rmi8 +.sect .text +.sect .rom +.sect .data +.sect .bss + +yh=8 +yl=12 +xh=16 +xl=20 + ! .rmi8 yields d0:d1 = remainder from x / y + +.sect .text +.rmi8: + move.l d3, -(sp) ! preserve caller's d3 + clr.l d2 ! d2 = 0, non-negative result + move.l (xh, sp), d0 ! d0 for .divrem8 + bge 1f + move.l #1, d2 ! d2 = 1, negative result + neg.l (xl, sp) + negx.l d0 ! x = absolute value +1: move.l (yh, sp), d1 ! d1 for .divrem8 + bge 1f + neg.l (yl, sp) + negx.l d1 ! y = absolute value +1: move.l d2, -(sp) + jsr (.divrem8) + move.l (sp)+, d0 + beq 1f ! branch unless result < 0 + neg.l d3 + negx.l d2 ! negate quotient d3:d2 +1: move.l d3, d1 + move.l d2, d0 + move.l (sp)+, d3 + rtd #16 diff --git a/mach/m68020/libem/rmu8.s b/mach/m68020/libem/rmu8.s new file mode 100644 index 000000000..823a2778c --- /dev/null +++ b/mach/m68020/libem/rmu8.s @@ -0,0 +1,22 @@ +.define .rmu8 +.sect .text +.sect .rom +.sect .data +.sect .bss + +yh=8 +xh=16 + ! .rmu8 yields d0:d1 = remainder from x / y + +.sect .text +.rmu8: + move.l d3, -(sp) ! preserve caller's d3 + move.l (xh, sp), d0 + move.l (yh, sp), d1 + sub.l #4, sp + jsr (.divrem8) + add.l #4, sp + move.l d3, d1 + move.l d2, d0 + move.l (sp)+, d3 + rtd #16 diff --git a/mach/m68020/ncg/table b/mach/m68020/ncg/table index 9aede9929..fe1099078 100644 --- a/mach/m68020/ncg/table +++ b/mach/m68020/ncg/table @@ -612,6 +612,7 @@ add_l "add.l" conreg4:ro, alterable4:rw:cc cost(2,6). add_w "add.w" any2:ro, D_REG+LOCAL:rw:cc cost(2,3). add_w "add.w" conreg2:ro, alterable2:rw:cc cost(2,6). #endif +addx_l "addx.l" D_REG4:ro, D_REG4:rw kills :cc cost(2,3). and_l "and.l" data4:ro, D_REG4:rw:cc cost(2,3). and_l "and.l" D_REG4:ro, memalt4:rw:cc cost(2,6). and_l "and.l" consts4:ro, datalt4:rw:cc cost(2,6). @@ -628,6 +629,7 @@ asr "asr #1," memalt2:rw:cc cost(2,4). asl_w "asl.w" shconreg:ro, D_REG:rw:cc cost(2,5). asr_w "asr.w" shconreg:ro, D_REG:rw:cc cost(2,4). #endif +bchg const:ro, D_REG:rw kills:cc cost(2,4). bclr const:ro, D_REG:rw kills:cc cost(2,4). bra label cost(2,5). bcc label cost(2,5). @@ -671,14 +673,15 @@ eor_l "eor.l" conreg4:ro, datalt4:rw:cc cost(2,6). #if WORD_SIZE==2 eor_w "eor.w" conreg2:ro, datalt2:rw:cc cost(2,4). #endif +exg genreg4:rw, genreg4:rw cost(2,3). /* in the next two instructions: LOCAL only allowed if register var */ ext_l "ext.l" D_REG+LOCAL+D_REG4:rw:cc cost(2,2). ext_w "ext.w" D_REG+LOCAL+D_REG4:rw:cc cost(2,2). jmp address+control4 cost(2,0). jsr address+control4 kills :cc d0 d1 d2 a0 a1 cost(2,3). lea address+control4:ro, A_REG+areg:wo cost(2,0). +lsl_l "lsl.l" shconreg:ro, D_REG4:rw:cc cost(2,4). /* -lsl_l "lsl.l" shconreg:ro, D_REG:rw:cc cost(2,4). lsl "lsl #1," memalt2:rw:cc cost(2,4). */ lsr_l "lsr.l" shconreg:ro, D_REG4:rw:cc cost(2,4). @@ -709,6 +712,8 @@ neg_l "neg.l" memory4:rw:cc cost(2,6). #if WORD_SIZE==2 neg_w "neg.w" memory2:rw:cc cost(2,6). #endif +negx_l "negx.l" D_REG4:rw:cc cost(2,3). +negx_l "negx.l" memory4:rw:cc cost(2,6). not_l "not.l" D_REG4:rw:cc cost(2,3). not_l "not.l" memory4:rw:cc cost(2,6). #if WORD_SIZE==2 @@ -733,6 +738,7 @@ ror_w "ror.w" shconreg:ro, D_REG:rw:cc cost(2,4). #endif roxl "roxl #1," memalt2:rw:cc cost(2,4). roxr "roxr #1," memalt2:rw:cc cost(2,4). +slt datalt1:rw cost(2,3). sne datalt1:rw cost(2,3). sub_l "sub.l" any4:ro, D_REG4:rw:cc cost(2,3). sub_l "sub.l" any4+areg:ro, A_REG+areg:rw cost(2,3). @@ -740,6 +746,9 @@ sub_l "sub.l" conreg4:ro, alterable4:rw:cc cost(2,6). #if WORD_SIZE==2 sub_w "sub.w" any2:ro, D_REG+LOCAL:rw:cc cost(2,3). sub_w "sub.w" conreg2:ro, alterable2:rw:cc cost(2,6). +#endif +subx_l "subx.l" D_REG4:ro, D_REG4:rw kills :cc cost(2,3). +#if WORD_SIZE==2 /* On a swap, we only want the lower part of D_REG, so don't set cc */ swap D_REG:rw kills :cc cost(2,2). #endif @@ -773,6 +782,7 @@ divs_l "divs.l" data4:ro, D_REG4:rw:cc cost(2,90). divu_l "divu.l" data4:ro, D_REG4:rw:cc cost(2,78). divsl_l "divsl.l" data4:ro, DREG_pair:rw kills :cc cost(2,90). divul_l "divul.l" data4:ro, DREG_pair:rw kills :cc cost(2,78). +mulu_l "mulu.l" data4:ro, DREG_pair:rw kills :cc cost(2,44). pea address+control4+regX cost(2,4). #if WORD_SIZE==2 cmp2_w "cmp2.w" address+control2:ro, genreg2:ro kills :cc cost(2,18). @@ -3796,6 +3806,18 @@ with exact any4 STACK gen add_l {post_inc4, sp}, %a yields %a +pat adi $1==8 +with exact any4 any4 DD_REG4 DD_REG4 + uses reusing %1, DD_REG4 = %1 + gen add_l %2, %4 + addx_l %a, %3 yields %4 %3 +with DD_REG4 DD_REG4 D_REG4 any4 + gen add_l %4, %2 + addx_l %3, %1 yields %2 %1 +with DD_REG4 DD_REG4 D_REG4 STACK + gen add_l {post_inc4, sp}, %2 + addx_l %3, %1 yields %2 %1 + #if WORD_SIZE==2 pat sbi $1==2 with any2-bconst DD_REG @@ -3822,6 +3844,12 @@ with exact any4 STACK with any4-bconst4 AA_REG gen sub_l %1, %2 yields %2 +pat sbi $1==8 +with D_REG4 any4-D_REG4 DD_REG4 DD_REG4 + /* only 3 of DD_REG4; may unstack %2 into AA_REG */ + gen sub_l %2, %4 + subx_l %1, %3 yields %4 %3 + #if WORD_SIZE==2 pat loc loc cii ldc mli $1==2 && $2==4 && highw($4)==0 && loww($4)>0 && $5==4 with any2-pre_post @@ -3847,6 +3875,34 @@ with STACK yields dl1 #endif +#ifdef TBL68020 +pat mli $1==8 +with exact data4 data4 DD_REG4 DD_REG4 /* yh yl xh xl */ + uses DD_REG4 = %4 + gen mulu_l %1, %a /* xl * yh */ + mulu_l %2, %3 /* xh * yl */ + add_l %3, %a + mulu_l %2, {DREG_pair, %3, %4} /* xl * yl */ + add_l %a, %3 + yields %4 %3 +with DD_REG4 DD_REG4 data4 data4 /* yh yl xh xl */ + uses DD_REG = %2 + gen mulu_l %3, %a /* yl * xh */ + mulu_l %4, %1 /* yh * xl */ + add_l %1, %a + mulu_l %4, {DREG_pair, %1, %2} /* yl * xl */ + add_l %a, %1 + yields %2 %1 +with DD_REG4 DD_REG4 STACK /* yh yl xh xl */ + uses DD_REG4 = %2 + gen mulu_l {post_inc4, sp}, %a /* yl * xh */ + mulu_l {indirect4, sp}, %1 /* yh * xl */ + add_l %1, %a + mulu_l {post_inc4, sp}, {DREG_pair, %1, %2} /* yl * xl */ + add_l %a, %1 + yields %2 %1 +#endif /* TBL68020 */ + #if WORD_SIZE==2 pat dvi $1==2 with data2-sconsts DD_REG @@ -3866,6 +3922,14 @@ with STACK yields dl1 #endif /* TBL68020 */ +#ifdef TBL68020 +pat dvi $1==8 +with STACK + kills ALL + gen jsr {absolute4, ".dvi8"} + yields dl1 dl0 +#endif /* TBL68020 */ + #if WORD_SIZE==2 pat rmi $1==2 with data2-sconsts DD_REG @@ -3891,6 +3955,14 @@ with STACK yields dl2 #endif /* TBL68020 */ +#ifdef TBL68020 +pat rmi $1==8 +with STACK + kills ALL + gen jsr {absolute4, ".rmi8"} + yields dl1 dl0 +#endif /* TBL68020 */ + #if WORD_SIZE==2 pat ngi $1==2 with DD_REG @@ -3901,6 +3973,11 @@ pat ngi $1==4 with DD_REG4 gen neg_l %1 yields %1 +pat ngi $1==8 +with DD_REG4 DD_REG4 + gen neg_l %2 + negx_l %1 yields %2 %1 + #if WORD_SIZE==2 pat sli $1==2 with shconreg DD_REG @@ -3911,6 +3988,43 @@ pat sli $1==4 with shconreg DD_REG4 gen asl_l %1, %2 yields %2 +pat sli $1==8 +with DD_REG4 DD_REG4 DD_REG4 + uses AA_REG = %3 /* no 4th DD_REG */ + gen lsl_l %1, %3 + lsl_l %1, %2 /* shift by %1 modulo 64 */ + bchg {const, 5}, %1 + bne {slabel, 1f} /* jump if shift >= 32 */ + neg_l %1 + exg %a, %3 + lsr_l %1, %3 /* (32 - shift) modulo 64 */ + or_l %3, %2 /* shift bits from %3 to %2 */ + move %a, %3 + bra {slabel, 2f} + 1: + move %a, %2 + lsl_l %1, %2 /* (shift - 32) modulo 64 */ + 2: yields %3 %2 + +pat loc sli ($1&32)==0 && $2==8 +with DD_REG4 DD_REG4 + uses AA_REG = %2, DD_REG = {bconst, $1&31} + gen lsl_l %b, %2 + lsl_l %b, %1 + bset {const, 5}, %b + neg_l %b + exg %a, %2 + lsr_l %b, %2 + or_l %2, %1 + move %a, %2 + yields %2 %1 +pat loc sli ($1&63)==32 && $2==8 +with any4 any4 yields {zero_const, 0} %2 +pat loc sli ($1&32)!=0 && $2==8 +with any4 DD_REG4 + uses reusing %1, DD_REG = {bconst, $1&31} + gen lsl_l %a, %2 yields {zero_const, 0} %2 + #if WORD_SIZE==2 pat sri $1==2 with shconreg DD_REG @@ -3921,6 +4035,43 @@ pat sri $1==4 with shconreg DD_REG4 gen asr_l %1, %2 yields %2 +pat sri $1==8 +with DD_REG4 DD_REG4 DD_REG4 + uses AA_REG = %2 /* no 4th DD_REG */ + gen asr_l %1, %2 + lsr_l %1, %3 /* shift by %1 modulo 64 */ + bchg {const, 5}, %1 + bne {slabel, 1f} /* jump if shift >= 32 */ + neg_l %1 + exg %a, %2 + lsl_l %1, %2 /* (32 - shift) modulo 64 */ + or_l %2, %3 /* shift bits from %2 to %3 */ + move %a, %2 + bra {slabel, 2f} + 1: + move %a, %3 + asr_l %1, %3 /* (shift - 32) modulo 64 */ + 2: yields %3 %2 + +pat loc sri ($1&32)==0 && $2==8 +with DD_REG4 DD_REG4 + uses AA_REG = %1, DD_REG = {bconst, $1&31} + gen asr_l %b, %1 + lsr_l %b, %2 + bset {const, 5}, %b + neg_l %b + exg %a, %1 + lsl_l %b, %1 + or_l %1, %2 + move %a, %1 + yields %2 %1 +pat loc sri ($1&63)==32 && $2==8 +with DD_REG4 any4 yields %1 leaving loc 4 loc 8 cii +pat loc sri ($1&32)!=0 && $2==8 +with DD_REG4 any4 + uses reusing %2, DD_REG = {bconst, $1&31} + gen asr_l %a, %1 yields %1 leaving loc 4 loc 8 cii + /************************************************ * Group 4: unsigned arithmetic. * ************************************************/ @@ -3947,6 +4098,8 @@ with STACK yields dl1 #endif /* TBL68020 */ +pat mlu $1==8 leaving mli 8 + #if WORD_SIZE==2 pat dvu $1==2 with data2-sconsts data2 @@ -3966,6 +4119,14 @@ with STACK yields dl1 #endif /* TBL68020 */ +#ifdef TBL68020 +pat dvu $1==8 +with STACK + kills ALL + gen jsr {absolute4, ".dvu8"} + yields dl1 dl0 +#endif /* TBL68020 */ + #if WORD_SIZE==2 pat rmu $1==2 with data2-sconsts data2 @@ -3992,8 +4153,18 @@ with STACK yields dl2 #endif /* TBL68020 */ +#ifdef TBL68020 +pat rmu $1==8 +with STACK + kills ALL + gen jsr {absolute4, ".rmu8"} + yields dl1 dl0 +#endif /* TBL68020 */ + pat slu leaving sli $1 +pat loc slu $2==8 leaving loc $1 sli 8 + #if WORD_SIZE==2 pat sru $1==2 with shconreg DD_REG @@ -4004,6 +4175,43 @@ pat sru $1==4 with shconreg DD_REG4 gen lsr_l %1, %2 yields %2 +pat sru $1==8 +with DD_REG4 DD_REG4 DD_REG4 + uses AA_REG = %2 /* no 4th DD_REG */ + gen lsr_l %1, %2 + lsr_l %1, %3 /* shift by %1 modulo 64 */ + bchg {const, 5}, %1 + bne {slabel, 1f} /* jump if shift >= 32 */ + neg_l %1 + exg %a, %2 + lsl_l %1, %2 /* (32 - shift) modulo 64 */ + or_l %2, %3 /* shift bits from %2 to %3 */ + move %a, %2 + bra {slabel, 2f} + 1: + move %a, %3 + lsr_l %1, %3 /* (shift - 32) modulo 64 */ + 2: yields %3 %2 + +pat loc sru ($1&32)==0 && $2==8 +with DD_REG4 DD_REG4 + uses AA_REG = %2, DD_REG = {bconst, $1&31} + gen lsr_l %b, %1 + lsr_l %b, %2 + bset {const, 5}, %b + neg_l %b + exg %a, %1 + lsl_l %b, %1 + or_l %1, %2 + move %a, %1 + yields %2 %1 +pat loc sru ($1&63)==32 && $2==8 +with any4 any4 yields %1 {zero_const, 0} +pat loc sru ($1&32)!=0 && $2==8 +with DD_REG4 any4 + uses reusing %2, DD_REG = {bconst, $1&31} + gen lsr_l %a, %1 yields %1 {zero_const, 0} + /************************************************ * Group 5: floating point arithmetic * ************************************************/ @@ -4753,6 +4961,17 @@ with exact any_int STACK uses reusing %1,DD_REG=%1 gen xxx* {post_inc_int, sp}, %a yields %a +proc log8 example and +with exact data4 data4 DD_REG4 DD_REG4 + gen xxx* %1, %3 + xxx* %2, %4 yields %4 %3 +with DD_REG4 DD_REG4 data4 data4 + gen xxx* %3, %1 + xxx* %4, %2 yields %2 %1 +with DD_REG4 DD_REG4 STACK + gen xxx* {post_inc4, sp}, %1 + xxx* {post_inc4, sp}, %2 yields %2 %1 + proc logdef example and with STACK uses DD_REG4 = {const, $1/WORD_SIZE -1}, @@ -4813,6 +5032,7 @@ pat and $1==WORD_SIZE call logw(AND_I) #if WORD_SIZE==2 pat and $1==2*WORD_SIZE call log2w("and.l") #endif +pat and $1==8 call log8("and.l") pat and $1>4 && $1/WORD_SIZE<=65536 call logdef(AND_I) pat and defined($1) call logbdef(AND_I) pat and !defined($1) call logndef(AND_I) @@ -4821,6 +5041,7 @@ pat ior $1==WORD_SIZE call logw(OR_I) #if WORD_SIZE==2 pat ior $1==2*WORD_SIZE call log2w("or.l") #endif +pat ior $1==8 call log8("or.l") pat ior $1>2 && $1/WORD_SIZE<=65536 call logdef(OR_I) pat ior defined($1) call logbdef(OR_I) pat ior !defined($1) call logndef(OR_I) @@ -4835,6 +5056,21 @@ pat xor $1==4 with DD_REG4 conreg4-bconst4 gen eor_l %2, %1 yields %1 +pat xor $1==8 +with exact any4 any4 DD_REG4 DD_REG4 + uses reusing %1, DD_REG4 = %1 + gen eor_l %a, %3 + move %2, %a + eor_l %a, %4 yields %4 %3 +with DD_REG4 DD_REG4 D_REG4 any4 + gen eor_l %3, %1 + move %4, %3 + eor_l %3, %2 yields %2 %1 +with DD_REG4 DD_REG4 DD_REG4 STACK + gen eor_l %3, %1 + move_l {post_inc4, sp}, %3 + eor_l %3, %2 yields %2 %1 + pat xor $1>4 && $1/WORD_SIZE<=65536 call logdef(EOR_I) pat xor defined($1) call logbdef(EOR_I) pat xor !defined($1) call logndef(EOR_I) @@ -4907,6 +5143,50 @@ pat rol $1==4 with shconreg DD_REG4 gen rol_l %1, %2 yields %2 +pat rol $1==8 +with DD_REG4 DD_REG4 DD_REG4 + uses AA_REG, AA_REG /* no 4th DD_REG */ + gen bclr {const, 5}, %1 + beq {slabel, 1f} + exg %2, %3 /* rotate left 32 */ + 1: + move %2, %a + move %3, %b + lsl_l %1, %2 + lsl_l %1, %3 + bset {const, 5}, %1 + neg_l %1 /* (32 - shift) modulo 64 */ + exg %a, %2 + lsr_l %1, %2 + or_l %2, %3 /* rotate bits from %2 to %3 */ + move %a, %2 + exg %b, %3 + lsr_l %1, %3 + or_l %3, %2 /* rotate bits from %3 to %2 */ + move %b, %3 + yields %3 %2 + +pat loc rol ($1&32)==0 && $2==8 +with DD_REG4 DD_REG4 + uses AA_REG, AA_REG, DD_REG = {bconst, $1&31} + gen move %1, %a + move %2, %b + lsl_l %c, %1 + lsl_l %c, %2 + bset {const, 5}, %c + neg_l %c + exg %a, %1 + lsr_l %c, %1 + or_l %1, %2 + move %a, %1 + exg %b, %2 + lsr_l %c, %2 + or_l %2, %1 + move %b, %2 + yields %2 %1 +pat loc rol ($1&63)==32 && $2==8 leaving exg 4 +pat loc rol ($1&32)!=0 && $2==8 leaving loc (0-$1)&31 ror 8 + #if WORD_SIZE==2 pat ror $1==2 with shconreg DD_REG @@ -4917,7 +5197,51 @@ pat ror $1==4 with shconreg DD_REG4 gen ror_l %1, %2 yields %2 - +pat ror $1==8 +with DD_REG4 DD_REG4 DD_REG4 + uses AA_REG, AA_REG /* no 4th DD_REG */ + gen bclr {const, 5}, %1 + beq {slabel, 1f} + exg %2, %3 /* rotate right 32 */ + 1: + move %2, %a + move %3, %b + lsr_l %1, %2 + lsr_l %1, %3 + bset {const, 5}, %1 + neg_l %1 /* (32 - shift) modulo 64 */ + exg %a, %2 + lsl_l %1, %2 + or_l %2, %3 /* rotate bits from %2 to %3 */ + move %a, %2 + exg %b, %3 + lsl_l %1, %3 + or_l %3, %2 /* rotate bits from %3 to %2 */ + move %b, %3 + yields %3 %2 + +pat loc ror ($1&32)==0 && $2==8 +with DD_REG4 DD_REG4 + uses AA_REG, AA_REG, DD_REG = {bconst, $1&31} + gen move %1, %a + move %2, %b + lsr_l %c, %1 + lsr_l %c, %2 + bset {const, 5}, %c + neg_l %c + exg %a, %1 + lsl_l %c, %1 + or_l %1, %2 + move %a, %1 + exg %b, %2 + lsl_l %c, %2 + or_l %2, %1 + move %b, %2 + yields %2 %1 +pat loc ror ($1&63)==32 && $2==8 leaving exg 4 +pat loc ror ($1&32)!=0 && $2==8 leaving loc (0-$1)&31 rol 8 + + /************************************************ @@ -6391,6 +6715,55 @@ pat cmu zge $1==WORD_SIZE call cmuzxx("bcc","bls") pat cmu zgt $1==WORD_SIZE call cmuzxx("bhi","bcs") +proc cmx8txx example cmi tlt +with exact DD_REG4 DD_REG4 any4 any4 + uses reusing %3, DD_REG4 = %3 + gen sub_l %4, %2 + subx_l %a, %1 /* keep overflow flag */ + sxx[2] %1 + neg_b %1 yields {dreg1, %1} +with D_REG4 any4-D_REG4 DD_REG4 DD_REG4 + /* only 3 of DD_REG4; may unstack %2 into AA_REG */ + gen sub_l %2, %4 + subx_l %1, %3 + sxx[1] %3 + neg_b %3 yields {dreg1, %3} + +pat cmi tlt $1==8 call cmx8txx("slt","sgt") +pat cmi tle $1==8 call cmx8txx("sle","sge") +pat cmi tge $1==8 call cmx8txx("sge","sle") +pat cmi tgt $1==8 call cmx8txx("sgt","slt") +pat cms teq $1==8 call cmx8txx("seq","seq") +pat cms tne $1==8 call cmx8txx("sne","sne") +pat cmu tlt $1==8 call cmx8txx("scs","shi") +pat cmu tle $1==8 call cmx8txx("sls","scc") +pat cmu tge $1==8 call cmx8txx("scc","sls") +pat cmu tgt $1==8 call cmx8txx("shi","scs") + +proc cmx8zxx example cmi zlt +with exact DD_REG4 DD_REG4 any4 any4 + kills ALL + uses reusing %3, DD_REG4 = %3 + gen sub_l %4, %2 + subx_l %a, %1 + bxx[2] {llabel, $2} +with D_REG4 any4-D_REG4 DD_REG4 DD_REG4 STACK + gen sub_l %2, %4 + subx_l %1, %3 + bxx[1] {llabel, $2} + +pat cmi zlt $1==8 call cmx8zxx("blt","bgt") +pat cmi zle $1==8 call cmx8zxx("ble","bge") +pat cmi zge $1==8 call cmx8zxx("bge","ble") +pat cmi zgt $1==8 call cmx8zxx("bgt","blt") +pat cms zeq $1==8 call cmx8zxx("beq","beq") +pat cms zne $1==8 call cmx8zxx("bne","bne") +pat cmu zlt $1==8 call cmx8zxx("bcs","bhi") +pat cmu zle $1==8 call cmx8zxx("bls","bcc") +pat cmu zge $1==8 call cmx8zxx("bcc","bls") +pat cmu zgt $1==8 call cmx8zxx("bhi","bcs") + + #if TBL68881 proc cmf4zxx example cmf zlt with FS_REG FS_REG @@ -6630,6 +7003,33 @@ uses reusing %1,DD_REG4 pat loc loc ciu $1==$2 /* skip this */ pat loc loc cui $1==$2 /* skip this */ +pat loc loc cii $1==4 && $2==8 +with exact test_set1+test_set2 + yields %1 {zero_const, 0} +with test_set4 + uses DD_REG4 + gen test %1 + slt {dreg1, %a} +#ifdef TBL68020 + extb_l %a +#else + ext_w %a + ext_l %a +#endif + yields %1 %a + +pat loc loc cii $1<4 && $2==8 + leaving loc $1 loc 4 cii loc 4 loc 8 cii + +pat loc loc ciu $1==4 && $2==8 yields {zero_const, 0} +pat loc loc cui $1==4 && $2==8 yields {zero_const, 0} +pat loc loc cuu $1==4 && $2==8 yields {zero_const, 0} + +pat loc loc cii $1==8 && $2==4 leaving asp 4 +pat loc loc ciu $1==8 && $2==4 leaving asp 4 +pat loc loc cui $1==8 && $2==4 leaving asp 4 +pat loc loc cuu $1==8 && $2==4 leaving asp 4 + /* The following rules should be handled by the peephole optimizer, I think */ diff --git a/plat/linux68k/descr b/plat/linux68k/descr index a530fffdb..d813f61a8 100644 --- a/plat/linux68k/descr +++ b/plat/linux68k/descr @@ -10,6 +10,8 @@ var s=2 var sa={s} var l={w} var la={w} +var q=8 +var qa=4 var f={w} var fa={w} var d=8 @@ -19,11 +21,12 @@ var xa={x} var ARCH=m68020 var PLATFORM=linux68k var PLATFORMDIR={EM}/share/ack/{PLATFORM} -var CPP_F=-D__unix -D__mc68020 -D__m68k -D__mc68000 -D__M68020 +var CPP_F=-D__unix -D__mc68020 -D__m68k -D__mc68000 -D__M68020 -D_EM_LLSIZE={q} var ALIGN=-a0:4 -a1:4 -a2:4 -a3:4 -b0:0x08000054 var C_LIB={PLATFORMDIR}/libc-ansi.a # bitfields reversed for compatibility with (g)cc. -var CC_ALIGN=-Vr +# long long enabled. +var CC_ALIGN=-Vrq{q}.{qa} var OLD_C_LIB={C_LIB} var MACHOPT_F=-ml10 var EGO_PLAT_FLAGS=-M{EM}/share/ack/ego/{ARCH}.descr diff --git a/plat/linux68k/tests/build.lua b/plat/linux68k/tests/build.lua index 221abc8d6..37edfaada 100644 --- a/plat/linux68k/tests/build.lua +++ b/plat/linux68k/tests/build.lua @@ -6,6 +6,5 @@ plat_testsuite { method = "plat/linux68k/emu+emu68k", skipsets = { "floats", -- FPU instructions not supported by emulator - "long-long", }, }