diff --git a/mach/powerpc/libem/bls4.s b/mach/powerpc/libem/bls4.s new file mode 100644 index 000000000..a36faca68 --- /dev/null +++ b/mach/powerpc/libem/bls4.s @@ -0,0 +1,19 @@ +.sect .text + +! Does a block move of words between non-overlapping buffers. +! Stack: ( src dst len -- ) + +.define .bls4 +.bls4: + lwz r3, 0(sp) ! len + lwz r4, 4(sp) ! dst + lwz r5, 8(sp) ! src + addi sp, sp, 12 + srwi r3, r3, 2 + mtspr ctr, r3 + addi r5, r5, -4 + addi r4, r4, -4 +1: lwzu r3, 4(r5) + stwu r3, 4(r4) + bdnz 1b + blr diff --git a/mach/powerpc/libem/build.lua b/mach/powerpc/libem/build.lua index 2709a4770..5ed9b52e8 100644 --- a/mach/powerpc/libem/build.lua +++ b/mach/powerpc/libem/build.lua @@ -6,7 +6,7 @@ for _, plat in ipairs(vars.plats) do acklibrary { name = "lib_"..plat, srcs = { - "./*.s", -- fif4.s + "./*.s", -- dus4.s }, vars = { plat = plat }, deps = { diff --git a/mach/powerpc/libem/dus4.s b/mach/powerpc/libem/dus4.s new file mode 100644 index 000000000..9c751947a --- /dev/null +++ b/mach/powerpc/libem/dus4.s @@ -0,0 +1,16 @@ +.sect .text + +! Duplicates some words on top of stack. +! Stack: ( a size -- a a ) + +.define .dus4 +.dus4: + lwz r3, 0(sp) + addi sp, sp, 4 + srwi r4, r3, 2 + mtspr ctr, r4 + add r5, sp, r3 +1: lwzu r4, -4(r5) + stwu r4, -4(sp) + bdnz 1b + blr diff --git a/mach/powerpc/mcg/table b/mach/powerpc/mcg/table index b72990c36..ca44ce869 100644 --- a/mach/powerpc/mcg/table +++ b/mach/powerpc/mcg/table @@ -237,10 +237,13 @@ PATTERNS SETSP.I(in:(int)reg) emit "mr sp, %in" cost 4; - + out:(int)reg = ANY.I cost 1; + out:(long)reg = ANY.L + cost 1; + out:(int)reg = COPYF.I(in:(float)reg) emit "stfsu %in, -4(sp)" emit "lwz %out, 0(sp)" @@ -306,10 +309,21 @@ PATTERNS emit "lwz %out, %addr" cost 4; +#if 0 + /* FIXME: Doesn't work because %out.0 and %addr might share a + * register, so it corrupts %addr before it loads %out.1. */ out:(long)reg = LOAD.L(addr:address) emit "lwz %out.0, 4+%addr" emit "lwz %out.1, 0+%addr" cost 8; +#else + /* Works, but costs an extra instruction. */ + out:(long)reg = LOAD.L(addr:address) + emit "la %out.1, %addr" + emit "lwz %out.0, 4(%out.1)" + emit "lwz %out.1, 0(%out.1)" + cost 12; +#endif out:(int)ushort0 = LOADH.I(addr:address) emit "lhz %out, %addr" @@ -566,6 +580,13 @@ PATTERNS emit "! COMPARESI.I(cr, 0)" cost 4; + cr:(cr)cr = COMPAREUL.I(left:(long)reg, right:(long)reg) + emit "cmpl %cr, 0, %left.1, %right.1" + emit "bne 1f" + emit "cmpl %cr, 0, %left.0, %right.0" + emit "1:" + cost 12; + /* Booleans */ diff --git a/mach/proto/mcg/treebuilder.c b/mach/proto/mcg/treebuilder.c index eed770170..ac811fc14 100644 --- a/mach/proto/mcg/treebuilder.c +++ b/mach/proto/mcg/treebuilder.c @@ -274,7 +274,7 @@ static struct ir* store(int size, struct ir* address, int offset, struct ir* val else opcode = IR_STORE; - if (offset > 0) + if (offset != 0) address = new_ir2( IR_ADD, EM_pointersize, address, new_wordir(offset) @@ -304,7 +304,7 @@ static struct ir* load(int size, struct ir* address, int offset) else opcode = IR_LOAD; - if (offset > 0) + if (offset != 0) address = new_ir2( IR_ADD, EM_pointersize, address, new_wordir(offset) @@ -416,6 +416,31 @@ static void helper_function(const char* name) ); } +static void helper_function_with_arg(const char* name, struct ir* arg) +{ + /* Abuses IR_SETRET to set a register to pass one argument to a + * helper function. + * + * FIXME: As of January 2018, mach/powerpc/libem takes an + * argument in register r3 only for ".los4", ".sts4", ".trp". + * This is an accident. Should the argument be on the stack, or + * should other helpers use a register? */ + + materialise_stack(); + appendir( + new_ir1( + IR_SETRET, arg->size, + arg + ) + ); + appendir( + new_ir1( + IR_CALL, 0, + new_labelir(name) + ) + ); +} + static void insn_simple(int opcode) { switch (opcode) @@ -437,6 +462,7 @@ static void insn_simple(int opcode) case op_cii: simple_convert(IR_FROMSI); break; case op_ciu: simple_convert(IR_FROMSI); break; case op_cui: simple_convert(IR_FROMUI); break; + case op_cuu: simple_convert(IR_FROMUI); break; case op_cfu: simple_convert(IR_FROMUF); break; case op_cfi: simple_convert(IR_FROMSF); break; case op_cif: simple_convert(IR_FROMSI); break; @@ -496,10 +522,12 @@ static void insn_simple(int opcode) case op_lim: { + /* Traps use only 16 bits of .ignmask, but we keep an + * entire word, even if a word has more than 2 bytes. */ push( - new_ir1( - (EM_wordsize == 2) ? IR_LOAD : IR_LOADH, EM_wordsize, - new_labelir(".ignmask") + load( + EM_wordsize, + new_labelir(".ignmask"), 0 ) ); break; @@ -507,26 +535,34 @@ static void insn_simple(int opcode) case op_sim: { - sequence_point(); appendir( - new_ir2( - (EM_wordsize == 2) ? IR_STORE : IR_STOREH, EM_wordsize, - new_labelir(".ignmask"), + store( + EM_wordsize, + new_labelir(".ignmask"), 0, pop(EM_wordsize) ) ); break; } - case op_trp: helper_function(".trp"); break; + case op_trp: + helper_function_with_arg(".trp", pop(EM_wordsize)); + break; case op_sig: { + struct ir* label = new_labelir(".trppc"); struct ir* value = pop(EM_pointersize); + push( + load( + EM_pointersize, + label, 0 + ) + ); appendir( store( EM_pointersize, - new_labelir(".trppc"), 0, + label, 0, value ) ); @@ -539,12 +575,13 @@ static void insn_simple(int opcode) break; } - /* FIXME: These instructions are really complex and barely used - * (Modula-2 and Pascal set support, I believe). Leave them until - * later. */ - case op_set: helper_function(".unimplemented_set"); break; - case op_ior: helper_function(".unimplemented_ior"); break; - + case op_and: helper_function(".and"); break; + case op_ior: helper_function(".ior"); break; + case op_xor: helper_function(".xor"); break; + case op_com: helper_function(".com"); break; + case op_cms: helper_function(".cms"); break; + case op_set: helper_function(".set"); break; + case op_inn: helper_function(".inn"); break; case op_dch: push( @@ -670,6 +707,31 @@ static void simple_alu2(int opcode, int size, int irop, const char* fallback) } } +static void rotate(int opcode, int size, int irop, int irop_reverse) +{ + if (size > (2*EM_wordsize)) + fatal("treebuilder: can't do opcode %s with size %d", em_mnem[opcode - sp_fmnem], size); + else + { + struct ir* right = pop(size); + struct ir* left = pop(size); + struct ir* bits = new_wordir(8 * size); + + /* a rol b -> (a << b) | (a >> (32 - b)) */ + push( + new_ir2( + IR_OR, size, + new_ir2(irop, size, left, right), + new_ir2( + irop_reverse, size, + left, + new_ir2(IR_SUB, size, bits, right) + ) + ) + ); + } +} + static struct ir* extract_block_refs(struct basicblock* bb) { struct ir* outir = NULL; @@ -720,26 +782,28 @@ static struct ir* ptradd(struct ir* address, int offset) ); } -static void blockmove(struct ir* dest, struct ir* src, struct ir* size) +static struct ir* walk_static_chain(int level) { - /* memmove stack: ( size src dest -- ) */ - push(size); - push(src); - push(dest); + struct ir* ir; - materialise_stack(); - appendir( - new_ir1( - IR_CALL, 0, - new_labelir("memmove") - ) - ); - appendir( - new_ir1( - IR_STACKADJUST, EM_pointersize, - new_wordir(EM_pointersize*2 + EM_wordsize) - ) + /* The static chain, when it exists, is the first argument of each + * procedure. The chain begins with the current frame at level 0, + * and continues until we reach the outermost procedure. */ + ir = new_ir0( + IR_GETFP, EM_pointersize ); + while (level--) + { + /* Walk to the next frame pointer. */ + ir = load( + EM_pointersize, + new_ir1( + IR_FPTOAB, EM_pointersize, + ir + ), 0 + ); + } + return ir; } static void insn_ivalue(int opcode, arith value) @@ -765,8 +829,10 @@ static void insn_ivalue(int opcode, arith value) case op_and: simple_alu2(opcode, value, IR_AND, ".and"); break; case op_ior: simple_alu2(opcode, value, IR_OR, ".ior"); break; - case op_xor: simple_alu2(opcode, value, IR_EOR, NULL); break; + case op_xor: simple_alu2(opcode, value, IR_EOR, ".xor"); break; case op_com: simple_alu1(opcode, value, IR_NOT, ".com"); break; + case op_rol: rotate(opcode, value, IR_LSL, IR_LSR); break; + case op_ror: rotate(opcode, value, IR_LSR, IR_LSL); break; case op_adf: simple_alu2(opcode, value, IR_ADDF, NULL); break; case op_sbf: simple_alu2(opcode, value, IR_SUBF, NULL); break; @@ -774,12 +840,23 @@ static void insn_ivalue(int opcode, arith value) case op_dvf: simple_alu2(opcode, value, IR_DIVF, NULL); break; case op_ngf: simple_alu1(opcode, value, IR_NEGF, NULL); break; - case op_cmu: /* fall through */ - case op_cms: push(tristate_compare(value, IR_COMPAREUI)); break; + case op_cms: + if (value > (2*EM_wordsize)) + { + push(new_wordir(value)); + helper_function(".cms"); + break; + } + /* fall through */ + case op_cmu: push(tristate_compare(value, IR_COMPAREUI)); break; case op_cmi: push(tristate_compare(value, IR_COMPARESI)); break; case op_cmf: push(tristate_compare(value, IR_COMPAREF)); break; - case op_rck: helper_function(".rck"); break; + case op_rck: + if (value != EM_wordsize) + fatal("'rck %d' not supported", value); + helper_function(".rck"); + break; case op_set: push(new_wordir(value)); helper_function(".set"); break; case op_inn: push(new_wordir(value)); helper_function(".inn"); break; @@ -930,26 +1007,24 @@ static void insn_ivalue(int opcode, arith value) if (value > (EM_wordsize*2)) { - /* We're going to need to do multiple stores; fix the address + /* We're going to need to do multiple loads; fix the address * so it'll go into a register and we can do maths on it. */ appendir(ptr); } + /* Stack grows down. Load backwards. */ while (value > 0) { int s = EM_wordsize*2; if (value < s) s = value; - + value -= s; push( load( s, - ptr, offset + ptr, value ) ); - - value -= s; - offset += s; } assert(value == 0); @@ -1099,7 +1174,12 @@ static void insn_ivalue(int opcode, arith value) case op_dup: { sequence_point(); - if ((value == (EM_wordsize*2)) && (peek(0) == EM_wordsize) && (peek(1) == EM_wordsize)) + if (value > (2*EM_wordsize)) + { + push(new_wordir(value)); + helper_function(".dus4"); + } + else if ((value == (EM_wordsize*2)) && (peek(0) == EM_wordsize) && (peek(1) == EM_wordsize)) { struct ir* v1 = pop(EM_wordsize); struct ir* v2 = pop(EM_wordsize); @@ -1117,12 +1197,30 @@ static void insn_ivalue(int opcode, arith value) break; } + case op_dus: + { + if (value != EM_wordsize) + fatal("'dus %d' not supported", value); + helper_function(".dus4"); + break; + } + case op_exg: { - struct ir* v1 = pop(value); - struct ir* v2 = pop(value); - push(v1); - push(v2); + if (value > (2*EM_wordsize)) + { + push( + new_wordir(value) + ); + helper_function(".exg"); + } + else + { + struct ir* v1 = pop(value); + struct ir* v2 = pop(value); + push(v1); + push(v2); + } break; } @@ -1285,53 +1383,19 @@ static void insn_ivalue(int opcode, arith value) } case op_lxl: - { - struct ir* ir; - - /* Walk the static chain. */ - - ir = new_ir0( - IR_GETFP, EM_pointersize + push( + walk_static_chain(value) ); - - while (value--) - { - ir = new_ir1( - IR_CHAINFP, EM_pointersize, - ir - ); - } - - push(ir); break; - } case op_lxa: - { - struct ir* ir; - - /* Walk the static chain. */ - - ir = new_ir0( - IR_GETFP, EM_pointersize - ); - - while (value--) - { - ir = new_ir1( - IR_CHAINFP, EM_pointersize, - ir - ); - } - push( new_ir1( IR_FPTOAB, EM_pointersize, - ir + walk_static_chain(value) ) ); break; - } case op_fef: { @@ -1394,6 +1458,7 @@ static void insn_ivalue(int opcode, arith value) break; case 1: + materialise_stack(); push( appendir( new_ir0( @@ -1403,10 +1468,6 @@ static void insn_ivalue(int opcode, arith value) ); break; - case 2: - helper_function(".unimplemented_lor_2"); - break; - default: fatal("'lor %d' not supported", value); } @@ -1436,10 +1497,6 @@ static void insn_ivalue(int opcode, arith value) ); break; - case 2: - helper_function(".unimplemented_str_2"); - break; - default: fatal("'str %d' not supported", value); } @@ -1448,100 +1505,27 @@ static void insn_ivalue(int opcode, arith value) } case op_blm: - { - /* Input stack: ( src dest -- ) */ - struct ir* dest = pop(EM_pointersize); - struct ir* src = pop(EM_pointersize); - blockmove(dest, src, new_wordir(value)); + push(new_wordir(value)); + helper_function(".bls4"); break; - } case op_bls: - { - /* Input stack: ( src dest size -- ) */ - struct ir* dest = pop(EM_pointersize); - struct ir* src = pop(EM_pointersize); - struct ir* size = pop(EM_wordsize); - blockmove(dest, src, size); + if (value != EM_wordsize) + fatal("'bls %d' not supported", value); + helper_function(".bls4"); break; - } case op_los: - { - /* Copy an arbitrary amount to the stack. */ - struct ir* bytes = pop(EM_wordsize); - struct ir* address = pop(EM_pointersize); - - materialise_stack(); - appendir( - new_ir1( - IR_STACKADJUST, EM_pointersize, - new_ir1( - IR_NEG, EM_wordsize, - bytes - ) - ) - ); - - push( - new_ir0( - IR_GETSP, EM_pointersize - ) - ); - push(address); - push(bytes); - materialise_stack(); - appendir( - new_ir1( - IR_CALL, 0, - new_labelir("memcpy") - ) - ); - appendir( - new_ir1( - IR_STACKADJUST, EM_pointersize, - new_wordir(EM_pointersize*2 + EM_wordsize) - ) - ); + if (value != EM_wordsize) + fatal("'los %d' not supported", value); + helper_function_with_arg(".los4", pop(EM_wordsize)); break; - } case op_sts: - { - /* Copy an arbitrary amount from the stack. */ - struct ir* bytes = pop(EM_wordsize); - struct ir* dest = pop(EM_pointersize); - struct ir* src; - - materialise_stack(); - src = appendir( - new_ir0( - IR_GETSP, EM_pointersize - ) - ); - - push(dest); - push(src); - push(bytes); - materialise_stack(); - appendir( - new_ir1( - IR_CALL, 0, - new_labelir("memcpy") - ) - ); - appendir( - new_ir1( - IR_STACKADJUST, EM_pointersize, - new_ir2( - IR_ADD, EM_wordsize, - new_wordir(EM_pointersize*2 + EM_wordsize), - bytes - ) - ) - ); + if (value != EM_wordsize) + fatal("'sts %d' not supported", value); + helper_function_with_arg(".sts4", pop(EM_wordsize)); break; - } case op_lin: { @@ -1677,17 +1661,17 @@ static void insn_lvalue(int opcode, const char* label, arith offset) case op_gto: { - struct ir* descriptor = pop(EM_pointersize); + struct ir* descriptor = address_of_external(label, offset); appendir( new_ir1( - IR_SETSP, EM_pointersize, + IR_SETFP, EM_pointersize, load(EM_pointersize, descriptor, EM_pointersize*2) ) ); appendir( new_ir1( - IR_SETFP, EM_pointersize, + IR_SETSP, EM_pointersize, load(EM_pointersize, descriptor, EM_pointersize*1) ) );