Teach mcg to pass our tests.

Tests pass if one edits the top build.lua to uncomment "qemuppc" from
both vars.plats and vars.plats_with_tests, and one leaves mcg in
plat/qemuppc/descr.

Add or correct some EM instructions in treebuilder.c:
 - "lof", "stf": handle negative offsets in load() and store().
 - "cuu": add using IR_FROMUI.
 - "lim", "sim": keep an entire word in ".ignmask", to be compatible
   with mach/powerpc/libem/trp.s and ncg.  We also keep a word in
   ".ignmask" in ncg for both i386 and m68020.
 - "trp": pass trap number in register.  See comment in
   helper_function_with_arg().
 - "sig": push the old value of .trppc on the stack.
 - "and ?", "ior ?", "xor ?", "com ?", "cms ?", "set ?", "inn ?":
   connect to helper functions in libem.
 - "blm", "bls": drop call to memmove() and use new helper ".bls4",
   because tests/plat/structcopy_e.c can't call memmove().
 - "xor s", "cms s": if s is large, fall back on helper function.
 - "rol", "ror": add by decomposing each rotate into 4 IR ops.
 - "rck s", "bls s": make fatal unless s is word size.
 - "loi": push multiple loads in the correct order.
 - "dup s", "exg s": if s is large, fall back on helper.
 - "dus": add using new helper ".dus4".
 - "lxl", "lxa": follow the static chain, not the dynamic chain.
 - "lor 1": materialise the stack before pushing the stack pointer.
 - "lor 2", "str 2": make fatal.
 - "los", "sts": drop calls to memcpy() and use helpers ".los4" and
   and ".sts4", so lang/m2/libm2/LtoUset.e starts working.
 - "gto": correctly read descriptor.

Change mach/powerpc/mcg/table:
 - ANY.L: add for "asp -8".
 - LOAD.L: work around register corruption.
 - COMPAREUL.I: add for "cms 8".
This commit is contained in:
George Koehler 2018-01-30 15:53:26 -05:00
parent b3c0a767a5
commit 9077b3a5ab
5 changed files with 223 additions and 183 deletions

19
mach/powerpc/libem/bls4.s Normal file
View file

@ -0,0 +1,19 @@
.sect .text
! Does a block move of words between non-overlapping buffers.
! Stack: ( src dst len -- )
.define .bls4
.bls4:
lwz r3, 0(sp) ! len
lwz r4, 4(sp) ! dst
lwz r5, 8(sp) ! src
addi sp, sp, 12
srwi r3, r3, 2
mtspr ctr, r3
addi r5, r5, -4
addi r4, r4, -4
1: lwzu r3, 4(r5)
stwu r3, 4(r4)
bdnz 1b
blr

View file

@ -6,7 +6,7 @@ for _, plat in ipairs(vars.plats) do
acklibrary {
name = "lib_"..plat,
srcs = {
"./*.s", -- fif4.s
"./*.s", -- dus4.s
},
vars = { plat = plat },
deps = {

16
mach/powerpc/libem/dus4.s Normal file
View file

@ -0,0 +1,16 @@
.sect .text
! Duplicates some words on top of stack.
! Stack: ( a size -- a a )
.define .dus4
.dus4:
lwz r3, 0(sp)
addi sp, sp, 4
srwi r4, r3, 2
mtspr ctr, r4
add r5, sp, r3
1: lwzu r4, -4(r5)
stwu r4, -4(sp)
bdnz 1b
blr

View file

@ -241,6 +241,9 @@ PATTERNS
out:(int)reg = ANY.I
cost 1;
out:(long)reg = ANY.L
cost 1;
out:(int)reg = COPYF.I(in:(float)reg)
emit "stfsu %in, -4(sp)"
emit "lwz %out, 0(sp)"
@ -306,10 +309,21 @@ PATTERNS
emit "lwz %out, %addr"
cost 4;
#if 0
/* FIXME: Doesn't work because %out.0 and %addr might share a
* register, so it corrupts %addr before it loads %out.1. */
out:(long)reg = LOAD.L(addr:address)
emit "lwz %out.0, 4+%addr"
emit "lwz %out.1, 0+%addr"
cost 8;
#else
/* Works, but costs an extra instruction. */
out:(long)reg = LOAD.L(addr:address)
emit "la %out.1, %addr"
emit "lwz %out.0, 4(%out.1)"
emit "lwz %out.1, 0(%out.1)"
cost 12;
#endif
out:(int)ushort0 = LOADH.I(addr:address)
emit "lhz %out, %addr"
@ -566,6 +580,13 @@ PATTERNS
emit "! COMPARESI.I(cr, 0)"
cost 4;
cr:(cr)cr = COMPAREUL.I(left:(long)reg, right:(long)reg)
emit "cmpl %cr, 0, %left.1, %right.1"
emit "bne 1f"
emit "cmpl %cr, 0, %left.0, %right.0"
emit "1:"
cost 12;
/* Booleans */

View file

@ -274,7 +274,7 @@ static struct ir* store(int size, struct ir* address, int offset, struct ir* val
else
opcode = IR_STORE;
if (offset > 0)
if (offset != 0)
address = new_ir2(
IR_ADD, EM_pointersize,
address, new_wordir(offset)
@ -304,7 +304,7 @@ static struct ir* load(int size, struct ir* address, int offset)
else
opcode = IR_LOAD;
if (offset > 0)
if (offset != 0)
address = new_ir2(
IR_ADD, EM_pointersize,
address, new_wordir(offset)
@ -416,6 +416,31 @@ static void helper_function(const char* name)
);
}
static void helper_function_with_arg(const char* name, struct ir* arg)
{
/* Abuses IR_SETRET to set a register to pass one argument to a
* helper function.
*
* FIXME: As of January 2018, mach/powerpc/libem takes an
* argument in register r3 only for ".los4", ".sts4", ".trp".
* This is an accident. Should the argument be on the stack, or
* should other helpers use a register? */
materialise_stack();
appendir(
new_ir1(
IR_SETRET, arg->size,
arg
)
);
appendir(
new_ir1(
IR_CALL, 0,
new_labelir(name)
)
);
}
static void insn_simple(int opcode)
{
switch (opcode)
@ -437,6 +462,7 @@ static void insn_simple(int opcode)
case op_cii: simple_convert(IR_FROMSI); break;
case op_ciu: simple_convert(IR_FROMSI); break;
case op_cui: simple_convert(IR_FROMUI); break;
case op_cuu: simple_convert(IR_FROMUI); break;
case op_cfu: simple_convert(IR_FROMUF); break;
case op_cfi: simple_convert(IR_FROMSF); break;
case op_cif: simple_convert(IR_FROMSI); break;
@ -496,10 +522,12 @@ static void insn_simple(int opcode)
case op_lim:
{
/* Traps use only 16 bits of .ignmask, but we keep an
* entire word, even if a word has more than 2 bytes. */
push(
new_ir1(
(EM_wordsize == 2) ? IR_LOAD : IR_LOADH, EM_wordsize,
new_labelir(".ignmask")
load(
EM_wordsize,
new_labelir(".ignmask"), 0
)
);
break;
@ -507,26 +535,34 @@ static void insn_simple(int opcode)
case op_sim:
{
sequence_point();
appendir(
new_ir2(
(EM_wordsize == 2) ? IR_STORE : IR_STOREH, EM_wordsize,
new_labelir(".ignmask"),
store(
EM_wordsize,
new_labelir(".ignmask"), 0,
pop(EM_wordsize)
)
);
break;
}
case op_trp: helper_function(".trp"); break;
case op_trp:
helper_function_with_arg(".trp", pop(EM_wordsize));
break;
case op_sig:
{
struct ir* label = new_labelir(".trppc");
struct ir* value = pop(EM_pointersize);
push(
load(
EM_pointersize,
label, 0
)
);
appendir(
store(
EM_pointersize,
new_labelir(".trppc"), 0,
label, 0,
value
)
);
@ -539,12 +575,13 @@ static void insn_simple(int opcode)
break;
}
/* FIXME: These instructions are really complex and barely used
* (Modula-2 and Pascal set support, I believe). Leave them until
* later. */
case op_set: helper_function(".unimplemented_set"); break;
case op_ior: helper_function(".unimplemented_ior"); break;
case op_and: helper_function(".and"); break;
case op_ior: helper_function(".ior"); break;
case op_xor: helper_function(".xor"); break;
case op_com: helper_function(".com"); break;
case op_cms: helper_function(".cms"); break;
case op_set: helper_function(".set"); break;
case op_inn: helper_function(".inn"); break;
case op_dch:
push(
@ -670,6 +707,31 @@ static void simple_alu2(int opcode, int size, int irop, const char* fallback)
}
}
static void rotate(int opcode, int size, int irop, int irop_reverse)
{
if (size > (2*EM_wordsize))
fatal("treebuilder: can't do opcode %s with size %d", em_mnem[opcode - sp_fmnem], size);
else
{
struct ir* right = pop(size);
struct ir* left = pop(size);
struct ir* bits = new_wordir(8 * size);
/* a rol b -> (a << b) | (a >> (32 - b)) */
push(
new_ir2(
IR_OR, size,
new_ir2(irop, size, left, right),
new_ir2(
irop_reverse, size,
left,
new_ir2(IR_SUB, size, bits, right)
)
)
);
}
}
static struct ir* extract_block_refs(struct basicblock* bb)
{
struct ir* outir = NULL;
@ -720,27 +782,29 @@ static struct ir* ptradd(struct ir* address, int offset)
);
}
static void blockmove(struct ir* dest, struct ir* src, struct ir* size)
static struct ir* walk_static_chain(int level)
{
/* memmove stack: ( size src dest -- ) */
push(size);
push(src);
push(dest);
struct ir* ir;
materialise_stack();
appendir(
new_ir1(
IR_CALL, 0,
new_labelir("memmove")
)
/* The static chain, when it exists, is the first argument of each
* procedure. The chain begins with the current frame at level 0,
* and continues until we reach the outermost procedure. */
ir = new_ir0(
IR_GETFP, EM_pointersize
);
appendir(
while (level--)
{
/* Walk to the next frame pointer. */
ir = load(
EM_pointersize,
new_ir1(
IR_STACKADJUST, EM_pointersize,
new_wordir(EM_pointersize*2 + EM_wordsize)
)
IR_FPTOAB, EM_pointersize,
ir
), 0
);
}
return ir;
}
static void insn_ivalue(int opcode, arith value)
{
@ -765,8 +829,10 @@ static void insn_ivalue(int opcode, arith value)
case op_and: simple_alu2(opcode, value, IR_AND, ".and"); break;
case op_ior: simple_alu2(opcode, value, IR_OR, ".ior"); break;
case op_xor: simple_alu2(opcode, value, IR_EOR, NULL); break;
case op_xor: simple_alu2(opcode, value, IR_EOR, ".xor"); break;
case op_com: simple_alu1(opcode, value, IR_NOT, ".com"); break;
case op_rol: rotate(opcode, value, IR_LSL, IR_LSR); break;
case op_ror: rotate(opcode, value, IR_LSR, IR_LSL); break;
case op_adf: simple_alu2(opcode, value, IR_ADDF, NULL); break;
case op_sbf: simple_alu2(opcode, value, IR_SUBF, NULL); break;
@ -774,12 +840,23 @@ static void insn_ivalue(int opcode, arith value)
case op_dvf: simple_alu2(opcode, value, IR_DIVF, NULL); break;
case op_ngf: simple_alu1(opcode, value, IR_NEGF, NULL); break;
case op_cmu: /* fall through */
case op_cms: push(tristate_compare(value, IR_COMPAREUI)); break;
case op_cms:
if (value > (2*EM_wordsize))
{
push(new_wordir(value));
helper_function(".cms");
break;
}
/* fall through */
case op_cmu: push(tristate_compare(value, IR_COMPAREUI)); break;
case op_cmi: push(tristate_compare(value, IR_COMPARESI)); break;
case op_cmf: push(tristate_compare(value, IR_COMPAREF)); break;
case op_rck: helper_function(".rck"); break;
case op_rck:
if (value != EM_wordsize)
fatal("'rck %d' not supported", value);
helper_function(".rck");
break;
case op_set: push(new_wordir(value)); helper_function(".set"); break;
case op_inn: push(new_wordir(value)); helper_function(".inn"); break;
@ -930,26 +1007,24 @@ static void insn_ivalue(int opcode, arith value)
if (value > (EM_wordsize*2))
{
/* We're going to need to do multiple stores; fix the address
/* We're going to need to do multiple loads; fix the address
* so it'll go into a register and we can do maths on it. */
appendir(ptr);
}
/* Stack grows down. Load backwards. */
while (value > 0)
{
int s = EM_wordsize*2;
if (value < s)
s = value;
value -= s;
push(
load(
s,
ptr, offset
ptr, value
)
);
value -= s;
offset += s;
}
assert(value == 0);
@ -1099,7 +1174,12 @@ static void insn_ivalue(int opcode, arith value)
case op_dup:
{
sequence_point();
if ((value == (EM_wordsize*2)) && (peek(0) == EM_wordsize) && (peek(1) == EM_wordsize))
if (value > (2*EM_wordsize))
{
push(new_wordir(value));
helper_function(".dus4");
}
else if ((value == (EM_wordsize*2)) && (peek(0) == EM_wordsize) && (peek(1) == EM_wordsize))
{
struct ir* v1 = pop(EM_wordsize);
struct ir* v2 = pop(EM_wordsize);
@ -1117,12 +1197,30 @@ static void insn_ivalue(int opcode, arith value)
break;
}
case op_dus:
{
if (value != EM_wordsize)
fatal("'dus %d' not supported", value);
helper_function(".dus4");
break;
}
case op_exg:
{
if (value > (2*EM_wordsize))
{
push(
new_wordir(value)
);
helper_function(".exg");
}
else
{
struct ir* v1 = pop(value);
struct ir* v2 = pop(value);
push(v1);
push(v2);
}
break;
}
@ -1285,53 +1383,19 @@ static void insn_ivalue(int opcode, arith value)
}
case op_lxl:
{
struct ir* ir;
/* Walk the static chain. */
ir = new_ir0(
IR_GETFP, EM_pointersize
push(
walk_static_chain(value)
);
while (value--)
{
ir = new_ir1(
IR_CHAINFP, EM_pointersize,
ir
);
}
push(ir);
break;
}
case op_lxa:
{
struct ir* ir;
/* Walk the static chain. */
ir = new_ir0(
IR_GETFP, EM_pointersize
);
while (value--)
{
ir = new_ir1(
IR_CHAINFP, EM_pointersize,
ir
);
}
push(
new_ir1(
IR_FPTOAB, EM_pointersize,
ir
walk_static_chain(value)
)
);
break;
}
case op_fef:
{
@ -1394,6 +1458,7 @@ static void insn_ivalue(int opcode, arith value)
break;
case 1:
materialise_stack();
push(
appendir(
new_ir0(
@ -1403,10 +1468,6 @@ static void insn_ivalue(int opcode, arith value)
);
break;
case 2:
helper_function(".unimplemented_lor_2");
break;
default:
fatal("'lor %d' not supported", value);
}
@ -1436,10 +1497,6 @@ static void insn_ivalue(int opcode, arith value)
);
break;
case 2:
helper_function(".unimplemented_str_2");
break;
default:
fatal("'str %d' not supported", value);
}
@ -1448,100 +1505,27 @@ static void insn_ivalue(int opcode, arith value)
}
case op_blm:
{
/* Input stack: ( src dest -- ) */
struct ir* dest = pop(EM_pointersize);
struct ir* src = pop(EM_pointersize);
blockmove(dest, src, new_wordir(value));
push(new_wordir(value));
helper_function(".bls4");
break;
}
case op_bls:
{
/* Input stack: ( src dest size -- ) */
struct ir* dest = pop(EM_pointersize);
struct ir* src = pop(EM_pointersize);
struct ir* size = pop(EM_wordsize);
blockmove(dest, src, size);
if (value != EM_wordsize)
fatal("'bls %d' not supported", value);
helper_function(".bls4");
break;
}
case op_los:
{
/* Copy an arbitrary amount to the stack. */
struct ir* bytes = pop(EM_wordsize);
struct ir* address = pop(EM_pointersize);
materialise_stack();
appendir(
new_ir1(
IR_STACKADJUST, EM_pointersize,
new_ir1(
IR_NEG, EM_wordsize,
bytes
)
)
);
push(
new_ir0(
IR_GETSP, EM_pointersize
)
);
push(address);
push(bytes);
materialise_stack();
appendir(
new_ir1(
IR_CALL, 0,
new_labelir("memcpy")
)
);
appendir(
new_ir1(
IR_STACKADJUST, EM_pointersize,
new_wordir(EM_pointersize*2 + EM_wordsize)
)
);
if (value != EM_wordsize)
fatal("'los %d' not supported", value);
helper_function_with_arg(".los4", pop(EM_wordsize));
break;
}
case op_sts:
{
/* Copy an arbitrary amount from the stack. */
struct ir* bytes = pop(EM_wordsize);
struct ir* dest = pop(EM_pointersize);
struct ir* src;
materialise_stack();
src = appendir(
new_ir0(
IR_GETSP, EM_pointersize
)
);
push(dest);
push(src);
push(bytes);
materialise_stack();
appendir(
new_ir1(
IR_CALL, 0,
new_labelir("memcpy")
)
);
appendir(
new_ir1(
IR_STACKADJUST, EM_pointersize,
new_ir2(
IR_ADD, EM_wordsize,
new_wordir(EM_pointersize*2 + EM_wordsize),
bytes
)
)
);
if (value != EM_wordsize)
fatal("'sts %d' not supported", value);
helper_function_with_arg(".sts4", pop(EM_wordsize));
break;
}
case op_lin:
{
@ -1677,17 +1661,17 @@ static void insn_lvalue(int opcode, const char* label, arith offset)
case op_gto:
{
struct ir* descriptor = pop(EM_pointersize);
struct ir* descriptor = address_of_external(label, offset);
appendir(
new_ir1(
IR_SETSP, EM_pointersize,
IR_SETFP, EM_pointersize,
load(EM_pointersize, descriptor, EM_pointersize*2)
)
);
appendir(
new_ir1(
IR_SETFP, EM_pointersize,
IR_SETSP, EM_pointersize,
load(EM_pointersize, descriptor, EM_pointersize*1)
)
);