Lots more opcodes. Rearrange the stack layout so that fp->ab is a fixed value

(needed for CHAINFP and FPTOAB). Wire up lfrs to calls via a phi when
necessary, to allow call-bra-lfr chains.
This commit is contained in:
David Given 2016-10-29 11:57:56 +02:00
parent bfa65168e2
commit 2cc2c0ae98
11 changed files with 318 additions and 200 deletions

View file

@ -4,19 +4,20 @@
*
* | ...params...
* | --------------- <- ab
* | old FR
* | old FP
* | --------------- <- st, fp (a.k.a. lb)
* | spills
* | ---------------
* | saved regs
* | LR
* | FP
* | --------------- <- st, fp (a.k.a. lb)
* | ---------------
* | locals
* | --------------- <- sp
* V ...user area...
*
* st indexes up; lb indexes down.
*
* We ensure that dereferencing fp always produces the caller's fp.
* Note that [fp] == old_fp and ab == fp + 8.
*/
static ARRAYOF(struct hreg) saved_regs;
@ -39,28 +40,29 @@ void platform_calculate_offsets(void)
}
current_proc->fp_to_st = 0;
current_proc->fp_to_ab = current_proc->spills_size + current_proc->saved_size + 8;
current_proc->fp_to_lb = 0;
current_proc->fp_to_ab = 8;
current_proc->fp_to_lb = -(current_proc->spills_size + current_proc->saved_size);
}
struct hop* platform_prologue(void)
{
int i;
int saved_offset;
int spoffset = current_proc->saved_size + current_proc->spills_size +
current_proc->locals_size;
struct hop* hop = new_hop(current_proc->entry, NULL);
hop_add_insel(hop, "! saved_size = %d+8 bytes", current_proc->saved_size);
hop_add_insel(hop, "! saved_size = %d bytes", current_proc->saved_size);
hop_add_insel(hop, "! spills_size = %d bytes", current_proc->spills_size);
hop_add_insel(hop, "! locals_size = %d bytes", current_proc->locals_size);
hop_add_insel(hop, "addi sp, sp, %d", -(current_proc->fp_to_ab + current_proc->locals_size));
hop_add_insel(hop, "addi sp, sp, %d", -(spoffset + 8));
hop_add_insel(hop, "mfspr r0, lr");
hop_add_insel(hop, "stw fp, %d(sp)", current_proc->locals_size + 0);
hop_add_insel(hop, "stw r0, %d(sp)", current_proc->locals_size + 4);
hop_add_insel(hop, "addi fp, sp, %d", current_proc->locals_size);
hop_add_insel(hop, "stw fp, %d(sp)", spoffset + 0);
hop_add_insel(hop, "stw r0, %d(sp)", spoffset + 4);
hop_add_insel(hop, "addi fp, sp, %d", spoffset);
/* Saved reg offsets are negative. */
saved_offset = current_proc->saved_size + 8;
saved_offset = -current_proc->spills_size;
for (i=0; i<saved_regs.count; i++)
{
struct hreg* hreg = saved_regs.item[i];
@ -80,7 +82,7 @@ struct hop* platform_epilogue(void)
int saved_offset;
/* Saved reg offsets are negative. */
saved_offset = current_proc->saved_size + 8;
saved_offset = -current_proc->spills_size;
for (i=0; i<saved_regs.count; i++)
{
struct hreg* hreg = saved_regs.item[i];

View file

@ -210,18 +210,26 @@ PATTERNS
emit "mr %out, fp"
cost 4;
SETFP.I(in:(int)reg)
emit "mr fp, %in"
cost 4;
out:(int)reg = CHAINFP.I(in:(int)reg)
emit "lwz %out, 0(%in)"
cost 4;
out:(int)reg = FPTOARGS.I(GETFP.I)
out:(int)reg = FPTOAB.I(GETFP.I)
emit "addi %out, fp, 8"
cost 4;
out:(int)reg = FPTOARGS.I(in:(int)reg)
out:(int)reg = FPTOAB.I(in:(int)reg)
emit "addi %out, %in, 8"
cost 4;
out:(int)reg = FPTOLB.I(in:(int)reg)
with %out == %in
cost 1;
out:(int)reg = GETSP.I
emit "mr %out, sp"
cost 4;
@ -230,6 +238,9 @@ PATTERNS
emit "mr sp, %in"
cost 4;
out:(int)reg = ANY.I
cost 1;
out:(int)reg = COPYF.I(in:(float)reg)
emit "stfsu %in, -4(sp)"
emit "lwz %out, 0(sp)"
@ -376,6 +387,11 @@ PATTERNS
emit "srawi %out.1, %out.0, 31"
cost 8;
out:(ret)reg = FROMF.I(in:(dret)reg)
with corrupted(volatile)
emit "bl .fromf2i"
cost 4;
out:(ret)reg = FROMD.I(in:(dret)reg)
with corrupted(volatile)
emit "bl .fromd2i"
@ -391,54 +407,16 @@ PATTERNS
emit "bl .fromsi2d"
cost 4;
#if 0
/* byte conversions */
out:(int)ubyte0 = CIU14(in:(int)ubyte0)
with %out == %in
emit "! CIU14(ubyte0) -> ubyte0"
cost 1;
out:(int)ubyte0 = CIU41(in:(int)ubyte0)
with %out == %in
emit "! CIU41(ubyte0) -> ubyte0"
cost 1;
out:(int)ubyteX = CIU41(in:(int)ubyteX)
with %out == %in
emit "! CIU41(ubyteX) -> ubyteX"
cost 1;
out:(int)reg = CII14(in:(int)ubyteX)
emit "extsb %out, %in ! CII14(ubyteX) -> reg"
out:(fret)reg = FROMUI.F(in:(ret)reg)
with corrupted(volatile)
emit "bl .fromui2f"
cost 4;
/* short conversions */
out:(int)ushort0 = CIU24(in:(int)ushort0)
with %out == %in
emit "! CIU24(ushort0) -> ushort0"
cost 1;
out:(int)ushort0 = CIU42(in:(int)ushort0)
with %out == %in
emit "! CIU42(ushort0) -> ushort0"
cost 1;
out:(int)ushortX = CIU42(in:(int)ushortX)
with %out == %in
emit "! CIU42(ushortX) -> ushortX"
cost 1;
out:(int)reg = CII24(in:(int)ushort0)
with %out == %in
emit "! CII24(ushort0) -> reg"
out:(dret)reg = FROMUI.D(in:(ret)reg)
with corrupted(volatile)
emit "bl .fromui2d"
cost 4;
out:(int)reg = CII24(in:(int)ushortX)
emit "extsh %out, %in"
cost 4;
#endif
/* Locals */
@ -452,6 +430,7 @@ PATTERNS
/* Memory addressing modes */
address = ADD.I(addr:(int)reg, offset:CONST.I)
@ -484,38 +463,26 @@ PATTERNS
emit "b $false"
cost 8;
CALL(dest:LABEL.I)
with corrupted(volatile)
emit "bl $dest"
#define CALLLABEL(insn) \
insn (dest:LABEL.I) \
with corrupted(volatile) \
emit "bl $dest" \
cost 4;
out:(ret)reg = CALL.I(dest:LABEL.I)
with corrupted(volatile)
emit "bl $dest"
cost 4;
CALLLABEL(CALL)
out:(int)reg = CALLLABEL(CALL.I)
out:(long)reg = CALLLABEL(CALL.L)
out:(lret)reg = CALL.L(dest:LABEL.I)
with corrupted(volatile)
emit "bl $dest"
cost 4;
CALL(dest:(int)reg)
with corrupted(volatile)
emit "mtspr ctr, %dest"
emit "bcctrl 20, 0, 0"
#define CALLINDIRECT(insn) \
insn (dest:(int)reg) \
with corrupted(volatile) \
emit "mtspr ctr, %dest" \
emit "bcctrl 20, 0, 0" \
cost 8;
out:(ret)reg = CALL.I(dest:(int)reg)
with corrupted(volatile)
emit "mtspr ctr, %dest"
emit "bcctrl 20, 0, 0"
cost 8;
out:(lret)reg = CALL.L(dest:(int)reg)
with corrupted(volatile)
emit "mtspr ctr, %dest"
emit "bcctrl 20, 0, 0"
cost 8;
CALLINDIRECT(CALL)
out:(int)reg = CALLINDIRECT(CALL.I)
out:(long)reg = CALLINDIRECT(CALL.L)
JUMP(dest:LABEL.I)
emit "b $dest"
@ -701,6 +668,12 @@ PATTERNS
emit "lfd %out, %addr"
cost 4;
out:(float)reg = in:CONST.F
when specific_constant(%in, 0)
emit "la r0, .fd_00000000"
emit "lfs %out, 0(r0)"
cost 12;
FPU4R(ADDF.F, "fadds")
FPU8R(ADDF.D, "fadd")
@ -729,35 +702,5 @@ PATTERNS
emit "fcmpu %cr, %left, %right"
cost 4;
#if 0
out:(ret)reg = CFI44(val:(fret)reg)
with corrupted(volatile)
emit "bl .cfi44"
cost 4;
out:(fret)reg = CIF44(val:(ret)reg)
with corrupted(volatile)
emit "bl .cif44"
cost 4;
out:(ret)reg = CFI84(val:(dret)reg)
with corrupted(volatile)
emit "bl .cfi84"
cost 4;
out:(dret)reg = CIF48(val:(ret)reg)
with corrupted(volatile)
emit "bl .cif48"
cost 4;
out:(float)reg = CFF84(val:(double)reg)
emit "frsp %out, %val"
cost 4;
out:(double)reg = CFF48(val:(float)reg)
emit "fmr %out, %val"
cost 1;
#endif
/* vim: set sw=4 ts=4 expandtab : */

View file

@ -282,9 +282,15 @@ char* hop_render(struct hop* hop)
case IR_CONST:
appendf("%d", ir->u.ivalue);
break;
default:
assert(false);
}
break;
}
default:
assert(false);
}
}

View file

@ -118,6 +118,7 @@ extern void pass_register_allocator(void);
extern void pass_remove_dead_blocks(void);
extern void pass_remove_dead_phis(void);
extern void pass_split_critical_edges(void);
extern void pass_wire_up_return_values(void);
extern void platform_calculate_offsets(void);
extern struct hop* platform_prologue(void);

View file

@ -67,6 +67,8 @@ void pass_live_vreg_analysis(void)
propagate_liveness(dominance.postorder.item[i]);
}
while (!finished);
//assert(cfg.entry->liveins.count == 0);
}
/* vim: set sw=4 ts=4 expandtab : */

View file

@ -120,7 +120,7 @@ static struct hreg* evict(struct vreg* vreg)
* Shouldn't really happen in real life. */
return hreg;
}
if (candidatein == candidateout)
if (candidatein && candidateout && (candidatein == candidateout))
{
/* This is a through register. */
tracef('R', "R: evicting %%%d from %s\n", candidatein->id, hreg->id);
@ -513,8 +513,9 @@ static void assign_hregs_to_vregs(void)
phi->prev->regsout, phi->ir->result);
if (hreg && !pmap_findleft(old, hreg))
{
tracef('R', "R: import hreg %s for phi input %%%d from %s\n",
hreg->id, vreg->id, phi->prev->name);
tracef('R', "R: import hreg %s for %%%d, imported from %s %%%d\n",
hreg->id, vreg->id,
phi->prev->name, phi->ir->id);
pmap_put(old, hreg, vreg);
}
}
@ -534,8 +535,9 @@ static void assign_hregs_to_vregs(void)
struct phicongruence* c = vreg->congruence;
struct hreg* hreg = allocate_phi_hreg(old, vreg, c->type);
tracef('R', "R: import fallback hreg %s for phi input %%%d from %s\n",
hreg->id, vreg->id, phi->prev->name);
tracef('R', "R: import fallback hreg %s for %%%d, imported from %s %%%d\n",
hreg->id, vreg->id,
phi->prev->name, phi->ir->id);
pmap_add(old, hreg, vreg);
}
}

View file

@ -0,0 +1,123 @@
#include "mcg.h"
/* The ACK returns values from functions not on the stack but in a special
* 'register' which are read with lfr. This register is defined to survive
* asp, bra and gto. The way it's intended to work is that value just gets put
* in a particular hreg and stays there until lfr brings it to the attention of
* the code generator.
*
* Trouble is, while that worked on ncg, it doesn't work here because the
* register allocator may decide to insert moves arbitrarily. So we need to
* somehow turn this special register into a real register so that it can be
* kept alive.
*
* The easiest thing to do is to just push the result of call onto the stack...
* but that doesn't work either, because if someone does a call without an lfr,
* we don't want to be left with an unpopped value.
*
* So what we do is we find lfrs, and then we search for the call which
* generated the value, and then we hook up the IRs so there's a connection
* between the two. But beware! The lfr value survives bra! Which means a
* single lfr may actually read the value produced by *several* call
* instructions. You know what that means? Phis.
*
* (Luckily a single call instruction can't be read by multiple lfrs, because
* conditional branches trash the lfr value.)
*/
static void find_call(struct basicblock* bb, int index, struct ir* lfr,
struct basicblock** callbb, struct ir** callir)
{
if (index == -1)
index = bb->irs.count - 1;
while (index >= 0)
{
struct ir* ir = bb->irs.item[index];
switch (ir->opcode)
{
case IR_CALL:
ir->size = lfr->size;
*callbb = bb;
*callir = ir;
return;
case IR_STACKADJUST:
case IR_GETRET:
case IR_JUMP:
/* lfr value preserved */
break;
default:
/* lfr value has been corrupted. */
fatal("lfr reading corrupted value in %s", bb->name);
}
index--;
}
/* Our search hit the top of the block; we need to import the
* lfr value from a previous block. */
if (bb->prevs.count == 1)
{
/* Only a single predecessor, so no phi is necessary. */
find_call(bb->prevs.item[0], -1, lfr, callbb, callir);
}
else
{
/* We have multiple predecessors. This means that the lfr value may
* come from any of these blocks. We need a phi. */
int i;
struct ir* phi = new_ir0(IR_PHI, lfr->size);
phi->root = phi;
array_insert(&bb->irs, phi, 0);
for (i=0; i<bb->prevs.count; i++)
{
struct basicblock* prev = bb->prevs.item[i];
struct basicblock* parentbb;
struct ir* parentir;
find_call(prev, -1, phi, &parentbb, &parentir);
pmap_add(&phi->u.phivalue, parentbb, parentir);
}
*callbb = bb;
*callir = phi;
}
}
static void wire_up_ir(struct basicblock* bb, int index)
{
struct ir* lfr = bb->irs.item[index];
struct basicblock* callbb;
struct ir* callir;
find_call(bb, index, lfr, &callbb, &callir);
lfr->left = callir;
lfr->opcode = IR_NOP;
}
void pass_wire_up_return_values(void)
{
int i, j;
for (i=0; i<cfg.preorder.count; i++)
{
struct basicblock* bb = cfg.preorder.item[i];
for (j=0; j<bb->irs.count; j++)
{
struct ir* ir = bb->irs.item[j];
if (ir->opcode == IR_GETRET)
wire_up_ir(bb, j);
}
}
}
/* vim: set sw=4 ts=4 expandtab : */

View file

@ -218,8 +218,8 @@ static struct ir* new_copy(char wanted, char real, struct ir* ir)
else if ((wanted == 'L') && (real == 'D'))
opcode = IR_COPYD;
else
fatal("type mismatch: parent IR wanted %c, child IR provided %c",
wanted, real);
fatal("type mismatch: parent IR $%d wanted %c, child IR provided %c",
ir->id, wanted, real);
copy = new_ir1(opcode, ir->size, ir);
copy->type = wanted;

View file

@ -179,6 +179,7 @@ void procedure_compile(struct procedure* proc)
* and nexts (and then calling update_graph_data()). */
print_blocks('3');
pass_wire_up_return_values();
pass_convert_stack_ops();
print_blocks('4');
pass_convert_locals_to_ssa();

View file

@ -4,7 +4,6 @@ static struct basicblock* current_bb;
static int stackptr;
static struct ir* stack[64];
static struct ir* lastcall;
static struct ir* convert(struct ir* src, int srcsize, int destsize, int opcode);
static struct ir* appendir(struct ir* ir);
@ -340,6 +339,20 @@ static void simple_test_neg(int size, int irop)
);
}
static void helper_function(const char* name)
{
/* Delegates to a helper function; these leave their result on the stack
* rather than returning values through lfr. */
materialise_stack();
appendir(
new_ir1(
IR_CALL, 0,
new_labelir(name)
)
);
}
static void insn_simple(int opcode)
{
switch (opcode)
@ -364,6 +377,7 @@ static void insn_simple(int opcode)
case op_cfu: simple_convert(IR_FROMF); break; /* FIXME: technically wrong */
case op_cfi: simple_convert(IR_FROMF); break;
case op_cif: simple_convert(IR_FROMSI); break;
case op_cuf: simple_convert(IR_FROMUI); break;
case op_cff: simple_convert(IR_FROMF); break;
case op_cmp:
@ -384,7 +398,7 @@ static void insn_simple(int opcode)
struct ir* dest = pop(EM_pointersize);
materialise_stack();
lastcall = appendir(
appendir(
new_ir1(
IR_CALL, 0,
dest
@ -421,7 +435,7 @@ static void insn_simple(int opcode)
{
push(
new_ir1(
IR_LOAD, 2,
(EM_wordsize == 2) ? IR_LOAD : IR_LOADH, EM_wordsize,
new_labelir(".ignmask")
)
);
@ -432,7 +446,7 @@ static void insn_simple(int opcode)
{
appendir(
new_ir2(
IR_STORE, 2,
(EM_wordsize == 2) ? IR_STORE : IR_STOREH, EM_wordsize,
new_labelir(".ignmask"),
pop(EM_wordsize)
)
@ -440,31 +454,32 @@ static void insn_simple(int opcode)
break;
}
case op_trp:
{
materialise_stack();
appendir(
new_ir1(
IR_CALL, 0,
new_labelir(".trp")
)
);
break;
}
case op_trp: helper_function(".trp"); break;
case op_sig: helper_function(".sig"); break;
case op_rtt: helper_function(".rtt"); break;
/* FIXME: These instructions are really complex and barely used
* (Modula-2 bitset support, I believe). Leave them until leter. */
case op_set:
case op_ior:
{
appendir(
* (Modula-2 bitset support, I believe). Leave them until later. */
case op_set: helper_function(".unimplemented_set"); break;
case op_ior: helper_function(".unimplemented_ior"); break;
case op_dch:
push(
new_ir1(
IR_CALL, 0,
new_labelir(".unimplemented")
IR_CHAINFP, EM_pointersize,
pop(EM_pointersize)
)
);
break;
case op_lpb:
push(
new_ir1(
IR_FPTOAB, EM_pointersize,
pop(EM_pointersize)
)
);
break;
}
case op_lni:
{
@ -598,6 +613,28 @@ static struct ir* ptradd(struct ir* address, int offset)
);
}
static void blockmove(struct ir* dest, struct ir* src, struct ir* size)
{
/* memmove stack: ( size src dest -- ) */
push(size);
push(src);
push(dest);
materialise_stack();
appendir(
new_ir1(
IR_CALL, 0,
new_labelir("memmove")
)
);
appendir(
new_ir1(
IR_STACKADJUST, EM_pointersize,
new_wordir(EM_pointersize*2 + EM_wordsize)
)
);
}
static void insn_ivalue(int opcode, arith value)
{
switch (opcode)
@ -725,7 +762,7 @@ static void insn_ivalue(int opcode, arith value)
case op_zrf:
{
struct ir* ir = new_constir(value, 0);
ir->opcode = IR_CONSTF;
ir->opcode = IR_CONST;
push(ir);
break;
}
@ -1012,9 +1049,13 @@ static void insn_ivalue(int opcode, arith value)
case op_lfr:
{
assert(lastcall != NULL);
lastcall->size = value;
push(lastcall);
push(
appendir(
new_ir0(
IR_GETRET, value
)
)
);
break;
}
@ -1063,7 +1104,7 @@ static void insn_ivalue(int opcode, arith value)
* the physical stack (which is very dubious). */
appendir(
new_ir1(
IR_CALL, EM_wordsize,
IR_CALL, 0,
new_labelir(helper)
)
);
@ -1094,26 +1135,11 @@ static void insn_ivalue(int opcode, arith value)
case op_lxa:
{
struct ir* ir;
/* Walk the static chain. */
ir = new_ir0(
IR_GETFP, EM_pointersize
);
while (value--)
{
ir = new_ir1(
IR_CHAINFP, EM_pointersize,
ir
);
}
push(
/* What does this actually *do*? The spec doesn't say. */
appendir(
new_ir1(
IR_FPTOARGS, EM_pointersize,
ir
IR_CALL, 0,
new_labelir(".unimplemented_lxa")
)
);
break;
@ -1166,6 +1192,17 @@ static void insn_ivalue(int opcode, arith value)
{
switch (value)
{
case 0:
appendir(
new_ir1(
IR_FPTOLB, EM_pointersize,
new_ir0(
IR_GETFP, EM_pointersize
)
)
);
break;
case 1:
appendir(
new_ir0(
@ -1185,6 +1222,15 @@ static void insn_ivalue(int opcode, arith value)
{
switch (value)
{
case 0:
appendir(
new_ir1(
IR_SETFP, EM_pointersize,
pop(EM_pointersize)
)
);
break;
case 1:
appendir(
new_ir1(
@ -1204,27 +1250,19 @@ static void insn_ivalue(int opcode, arith value)
case op_blm:
{
/* Input stack: ( src dest -- ) */
/* Memmove stack: ( size src dest -- ) */
struct ir* dest = pop(EM_pointersize);
struct ir* src = pop(EM_pointersize);
blockmove(dest, src, new_wordir(value));
break;
}
push(new_wordir(value));
push(src);
push(dest);
materialise_stack();
appendir(
new_ir1(
IR_CALL, 0,
new_labelir("memmove")
)
);
appendir(
new_ir1(
IR_STACKADJUST, EM_pointersize,
new_wordir(EM_pointersize*2 + EM_wordsize)
)
);
case op_bls:
{
/* Input stack: ( src dest size -- ) */
struct ir* dest = pop(EM_pointersize);
struct ir* src = pop(EM_pointersize);
struct ir* size = pop(EM_wordsize);
blockmove(dest, src, size);
break;
}
@ -1426,7 +1464,7 @@ static void insn_lvalue(int opcode, const char* label, arith offset)
case op_cal:
assert(offset == 0);
materialise_stack();
lastcall = appendir(
appendir(
new_ir1(
IR_CALL, 0,
new_labelir(label)
@ -1513,8 +1551,6 @@ void tb_procedure(void)
for (i=0; i<current_proc->blocks.count; i++)
generate_tree(current_proc->blocks.item[i]);
}
/* vim: set sw=4 ts=4 expandtab : */

View file

@ -6,8 +6,7 @@
# ?: pull/push types from other ? parameters
# Simple terminals
S ?=.. CONST # must be followed by float form
S ?=.. CONSTF
S ?=.. CONST
V ?=.. REG
S ?=?. NOP
S I=.. LABEL
@ -95,7 +94,9 @@ S I=I. IFLT
S I=I. IFLE
# Procedures
S i=.. CALL
S i=I. CALL
S i=?. GETRET
S ?=i. SETRET
# Flow control --- these never return
V .=i. JUMP
@ -106,10 +107,11 @@ V .=.. RET
# Special
S ?=i. STACKADJUST
S ?=i. SETRET
S i=.. GETFP
S ?=i. SETFP
S i=.. GETSP
S ?=i. SETSP
S i=i. CHAINFP
S i=i. FPTOARGS
S i=i. FPTOAB
S i=i. FPTOLB