x86-64: Rewrite linux parameter passing
This fixes two ABI testcases involving large arguments when there are still registers available for later args.
This commit is contained in:
parent
307b7b183d
commit
53c5fc2246
2 changed files with 86 additions and 176 deletions
|
@ -637,10 +637,10 @@ int main(int argc, char **argv) {
|
||||||
RUN_TEST(ret_longdouble_test);
|
RUN_TEST(ret_longdouble_test);
|
||||||
RUN_TEST(ret_2float_test);
|
RUN_TEST(ret_2float_test);
|
||||||
RUN_TEST(ret_2double_test);
|
RUN_TEST(ret_2double_test);
|
||||||
#if !defined __x86_64__ || defined _WIN32
|
|
||||||
/* currently broken on x86_64 linux */
|
|
||||||
RUN_TEST(ret_8plus2double_test);
|
RUN_TEST(ret_8plus2double_test);
|
||||||
RUN_TEST(ret_6plus2longlong_test);
|
RUN_TEST(ret_6plus2longlong_test);
|
||||||
|
#if !defined __x86_64__ || defined _WIN32
|
||||||
|
/* currently broken on x86_64 linux */
|
||||||
RUN_TEST(ret_mixed_test);
|
RUN_TEST(ret_mixed_test);
|
||||||
RUN_TEST(ret_mixed2_test);
|
RUN_TEST(ret_mixed2_test);
|
||||||
#endif
|
#endif
|
||||||
|
|
258
x86_64-gen.c
258
x86_64-gen.c
|
@ -1196,209 +1196,119 @@ void gfunc_call(int nb_args)
|
||||||
{
|
{
|
||||||
X86_64_Mode mode;
|
X86_64_Mode mode;
|
||||||
CType type;
|
CType type;
|
||||||
int size, align, r, args_size, stack_adjust, run_start, run_end, i, reg_count;
|
int size, align, r, args_size, stack_adjust, i, reg_count;
|
||||||
int nb_reg_args = 0;
|
int nb_reg_args = 0;
|
||||||
int nb_sse_args = 0;
|
int nb_sse_args = 0;
|
||||||
int sse_reg, gen_reg;
|
int sse_reg, gen_reg;
|
||||||
|
char _onstack[nb_args], *onstack = _onstack;
|
||||||
|
|
||||||
/* calculate the number of integer/float register arguments */
|
/* calculate the number of integer/float register arguments, remember
|
||||||
for(i = 0; i < nb_args; i++) {
|
arguments to be passed via stack (in onstack[]), and also remember
|
||||||
|
if we have to align the stack pointer to 16 (onstack[i] == 2). Needs
|
||||||
|
to be done in a left-to-right pass over arguments. */
|
||||||
|
stack_adjust = 0;
|
||||||
|
for(i = nb_args - 1; i >= 0; i--) {
|
||||||
mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, ®_count);
|
mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, ®_count);
|
||||||
if (mode == x86_64_mode_sse)
|
if (mode == x86_64_mode_sse && nb_sse_args + reg_count <= 8) {
|
||||||
nb_sse_args += reg_count;
|
nb_sse_args += reg_count;
|
||||||
else if (mode == x86_64_mode_integer)
|
onstack[i] = 0;
|
||||||
|
} else if (mode == x86_64_mode_integer && nb_reg_args + reg_count <= REGN) {
|
||||||
nb_reg_args += reg_count;
|
nb_reg_args += reg_count;
|
||||||
|
onstack[i] = 0;
|
||||||
|
} else if (mode == x86_64_mode_none) {
|
||||||
|
onstack[i] = 0;
|
||||||
|
} else {
|
||||||
|
if (align == 16 && (stack_adjust &= 15)) {
|
||||||
|
onstack[i] = 2;
|
||||||
|
stack_adjust = 0;
|
||||||
|
} else
|
||||||
|
onstack[i] = 1;
|
||||||
|
stack_adjust += size;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nb_sse_args && tcc_state->nosse)
|
if (nb_sse_args && tcc_state->nosse)
|
||||||
tcc_error("SSE disabled but floating point arguments passed");
|
tcc_error("SSE disabled but floating point arguments passed");
|
||||||
|
|
||||||
/* arguments are collected in runs. Each run is a collection of 8-byte aligned arguments
|
/* fetch cpu flag before generating any code */
|
||||||
and ended by a 16-byte aligned argument. This is because, from the point of view of
|
if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP)
|
||||||
the callee, argument alignment is computed from the bottom up. */
|
gv(RC_INT);
|
||||||
|
|
||||||
/* for struct arguments, we need to call memcpy and the function
|
/* for struct arguments, we need to call memcpy and the function
|
||||||
call breaks register passing arguments we are preparing.
|
call breaks register passing arguments we are preparing.
|
||||||
So, we process arguments which will be passed by stack first. */
|
So, we process arguments which will be passed by stack first. */
|
||||||
gen_reg = nb_reg_args;
|
gen_reg = nb_reg_args;
|
||||||
sse_reg = nb_sse_args;
|
sse_reg = nb_sse_args;
|
||||||
run_start = 0;
|
|
||||||
args_size = 0;
|
args_size = 0;
|
||||||
while (run_start != nb_args) {
|
stack_adjust &= 15;
|
||||||
int run_gen_reg = gen_reg, run_sse_reg = sse_reg;
|
for (i = 0; i < nb_args;) {
|
||||||
|
mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, ®_count);
|
||||||
run_end = nb_args;
|
if (!onstack[i]) {
|
||||||
stack_adjust = 0;
|
++i;
|
||||||
for(i = run_start; (i < nb_args) && (run_end == nb_args); i++) {
|
continue;
|
||||||
mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, ®_count);
|
}
|
||||||
switch (mode) {
|
/* Possibly adjust stack to align SSE boundary. We're processing
|
||||||
case x86_64_mode_memory:
|
args from right to left while allocating happens left to right
|
||||||
case x86_64_mode_x87:
|
(stack grows down), so the adjustment needs to happen _after_
|
||||||
stack_arg:
|
an argument that requires it. */
|
||||||
if (align == 16)
|
if (stack_adjust) {
|
||||||
run_end = i;
|
o(0x50); /* push %rax; aka sub $8,%rsp */
|
||||||
else
|
args_size += 8;
|
||||||
stack_adjust += size;
|
stack_adjust = 0;
|
||||||
break;
|
|
||||||
|
|
||||||
case x86_64_mode_sse:
|
|
||||||
sse_reg -= reg_count;
|
|
||||||
if (sse_reg + reg_count > 8) goto stack_arg;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case x86_64_mode_integer:
|
|
||||||
gen_reg -= reg_count;
|
|
||||||
if (gen_reg + reg_count > REGN) goto stack_arg;
|
|
||||||
break;
|
|
||||||
default: break; /* nothing to be done for x86_64_mode_none */
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
if (onstack[i] == 2)
|
||||||
gen_reg = run_gen_reg;
|
stack_adjust = 1;
|
||||||
sse_reg = run_sse_reg;
|
|
||||||
|
|
||||||
/* adjust stack to align SSE boundary */
|
|
||||||
if (stack_adjust &= 15) {
|
|
||||||
/* fetch cpu flag before the following sub will change the value */
|
|
||||||
if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP)
|
|
||||||
gv(RC_INT);
|
|
||||||
|
|
||||||
stack_adjust = 16 - stack_adjust;
|
vrotb(i+1);
|
||||||
o(0x48);
|
|
||||||
oad(0xec81, stack_adjust); /* sub $xxx, %rsp */
|
|
||||||
args_size += stack_adjust;
|
|
||||||
}
|
|
||||||
|
|
||||||
for(i = run_start; i < run_end;) {
|
|
||||||
/* Swap argument to top, it will possibly be changed here,
|
|
||||||
and might use more temps. At the end of the loop we keep
|
|
||||||
in on the stack and swap it back to its original position
|
|
||||||
if it is a register. */
|
|
||||||
SValue tmp = vtop[0];
|
|
||||||
int arg_stored = 1;
|
|
||||||
|
|
||||||
vtop[0] = vtop[-i];
|
switch (vtop->type.t & VT_BTYPE) {
|
||||||
vtop[-i] = tmp;
|
case VT_STRUCT:
|
||||||
mode = classify_x86_64_arg(&vtop->type, NULL, &size, &align, ®_count);
|
/* allocate the necessary size on stack */
|
||||||
|
o(0x48);
|
||||||
switch (vtop->type.t & VT_BTYPE) {
|
oad(0xec81, size); /* sub $xxx, %rsp */
|
||||||
case VT_STRUCT:
|
/* generate structure store */
|
||||||
if (mode == x86_64_mode_sse) {
|
r = get_reg(RC_INT);
|
||||||
if (sse_reg > 8)
|
orex(1, r, 0, 0x89); /* mov %rsp, r */
|
||||||
sse_reg -= reg_count;
|
o(0xe0 + REG_VALUE(r));
|
||||||
else
|
vset(&vtop->type, r | VT_LVAL, 0);
|
||||||
arg_stored = 0;
|
vswap();
|
||||||
} else if (mode == x86_64_mode_integer) {
|
vstore();
|
||||||
if (gen_reg > REGN)
|
break;
|
||||||
gen_reg -= reg_count;
|
|
||||||
else
|
|
||||||
arg_stored = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (arg_stored) {
|
|
||||||
/* allocate the necessary size on stack */
|
|
||||||
o(0x48);
|
|
||||||
oad(0xec81, size); /* sub $xxx, %rsp */
|
|
||||||
/* generate structure store */
|
|
||||||
r = get_reg(RC_INT);
|
|
||||||
orex(1, r, 0, 0x89); /* mov %rsp, r */
|
|
||||||
o(0xe0 + REG_VALUE(r));
|
|
||||||
vset(&vtop->type, r | VT_LVAL, 0);
|
|
||||||
vswap();
|
|
||||||
vstore();
|
|
||||||
args_size += size;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case VT_LDOUBLE:
|
|
||||||
assert(0);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case VT_FLOAT:
|
|
||||||
case VT_DOUBLE:
|
|
||||||
assert(mode == x86_64_mode_sse);
|
|
||||||
if (sse_reg > 8) {
|
|
||||||
--sse_reg;
|
|
||||||
r = gv(RC_FLOAT);
|
|
||||||
o(0x50); /* push $rax */
|
|
||||||
/* movq %xmmN, (%rsp) */
|
|
||||||
o(0xd60f66);
|
|
||||||
o(0x04 + REG_VALUE(r)*8);
|
|
||||||
o(0x24);
|
|
||||||
args_size += size;
|
|
||||||
} else {
|
|
||||||
arg_stored = 0;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
assert(mode == x86_64_mode_integer);
|
|
||||||
/* simple type */
|
|
||||||
/* XXX: implicit cast ? */
|
|
||||||
if (gen_reg > REGN) {
|
|
||||||
--gen_reg;
|
|
||||||
r = gv(RC_INT);
|
|
||||||
orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
|
|
||||||
args_size += size;
|
|
||||||
} else {
|
|
||||||
arg_stored = 0;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* And swap the argument back to it's original position. */
|
|
||||||
tmp = vtop[0];
|
|
||||||
vtop[0] = vtop[-i];
|
|
||||||
vtop[-i] = tmp;
|
|
||||||
|
|
||||||
if (arg_stored) {
|
case VT_LDOUBLE:
|
||||||
vrotb(i+1);
|
|
||||||
assert((vtop->type.t == tmp.type.t) && (vtop->r == tmp.r));
|
|
||||||
vpop();
|
|
||||||
--nb_args;
|
|
||||||
--run_end;
|
|
||||||
} else {
|
|
||||||
++i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* handle 16 byte aligned arguments at end of run */
|
|
||||||
run_start = i = run_end;
|
|
||||||
while (i < nb_args) {
|
|
||||||
/* Rotate argument to top since it will always be popped */
|
|
||||||
mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, ®_count);
|
|
||||||
if (align != 16)
|
|
||||||
break;
|
|
||||||
|
|
||||||
vrotb(i+1);
|
|
||||||
|
|
||||||
if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
|
|
||||||
gv(RC_ST0);
|
gv(RC_ST0);
|
||||||
oad(0xec8148, size); /* sub $xxx, %rsp */
|
oad(0xec8148, size); /* sub $xxx, %rsp */
|
||||||
o(0x7cdb); /* fstpt 0(%rsp) */
|
o(0x7cdb); /* fstpt 0(%rsp) */
|
||||||
g(0x24);
|
g(0x24);
|
||||||
g(0x00);
|
g(0x00);
|
||||||
args_size += size;
|
break;
|
||||||
} else {
|
|
||||||
assert(mode == x86_64_mode_memory);
|
|
||||||
|
|
||||||
/* allocate the necessary size on stack */
|
case VT_FLOAT:
|
||||||
o(0x48);
|
case VT_DOUBLE:
|
||||||
oad(0xec81, size); /* sub $xxx, %rsp */
|
assert(mode == x86_64_mode_sse);
|
||||||
/* generate structure store */
|
r = gv(RC_FLOAT);
|
||||||
r = get_reg(RC_INT);
|
o(0x50); /* push $rax */
|
||||||
orex(1, r, 0, 0x89); /* mov %rsp, r */
|
/* movq %xmmN, (%rsp) */
|
||||||
o(0xe0 + REG_VALUE(r));
|
o(0xd60f66);
|
||||||
vset(&vtop->type, r | VT_LVAL, 0);
|
o(0x04 + REG_VALUE(r)*8);
|
||||||
vswap();
|
o(0x24);
|
||||||
vstore();
|
break;
|
||||||
args_size += size;
|
|
||||||
}
|
default:
|
||||||
|
assert(mode == x86_64_mode_integer);
|
||||||
vpop();
|
/* simple type */
|
||||||
--nb_args;
|
/* XXX: implicit cast ? */
|
||||||
}
|
r = gv(RC_INT);
|
||||||
|
orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
args_size += size;
|
||||||
|
|
||||||
|
vpop();
|
||||||
|
--nb_args;
|
||||||
|
onstack++;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* XXX This should be superfluous. */
|
/* XXX This should be superfluous. */
|
||||||
save_regs(0); /* save used temporary registers */
|
save_regs(0); /* save used temporary registers */
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue