From aeb8f427e2be133d6a0a67b695eb9579f5fa4232 Mon Sep 17 00:00:00 2001 From: grischka Date: Mon, 4 Jan 2021 13:16:05 +0100 Subject: [PATCH] tccgen: introduce TOK_NEG for unary minus for floats (currently only). On x86_64 uses built-in fp constants (in libtcc1.c) to avoid multiple anonymous instances. Also: win32/i386: use __alloca for big struct stack store - use new function int tok_alloc_const(const char*); - change alloca86.S to preserve EDX tccelf.c: fix a warning with 'roinf_use' --- i386-asm.c | 8 ++-- i386-gen.c | 38 +++++++++---------- lib/alloca86.S | 28 +++++++------- lib/libtcc1.c | 6 +++ tcc.h | 2 + tccasm.c | 19 ++++------ tccelf.c | 9 +++-- tccgen.c | 99 ++++++++++++++++++++++++++++---------------------- tccpp.c | 6 +++ tcctok.h | 2 + x86_64-gen.c | 28 +++++++++++++- 11 files changed, 145 insertions(+), 100 deletions(-) diff --git a/i386-asm.c b/i386-asm.c index 10926fef..23e1fbbb 100644 --- a/i386-asm.c +++ b/i386-asm.c @@ -1207,7 +1207,7 @@ ST_FUNC int asm_parse_regvar (int t) s = table_ident[t - TOK_IDENT]->str; if (s[0] != '%') return -1; - t = tok_alloc(s+1, strlen(s)-1)->tok; + t = tok_alloc_const(s + 1); unget_tok(t); unget_tok('%'); parse_operand(tcc_state, &op); @@ -1488,7 +1488,7 @@ ST_FUNC void subst_asm_operand(CString *add_str, in the C symbol table when later looking up this name. So enter them now into the asm label list when we still know the symbol. */ - get_asm_sym(tok_alloc(name, strlen(name))->tok, sv->sym); + get_asm_sym(tok_alloc_const(name), sv->sym); } if (tcc_state->leading_underscore) cstr_ccat(add_str, '_'); @@ -1698,7 +1698,6 @@ ST_FUNC void asm_gen_code(ASMOperand *operands, int nb_operands, ST_FUNC void asm_clobber(uint8_t *clobber_regs, const char *str) { int reg; - TokenSym *ts; #ifdef TCC_TARGET_X86_64 unsigned int type; #endif @@ -1707,8 +1706,7 @@ ST_FUNC void asm_clobber(uint8_t *clobber_regs, const char *str) !strcmp(str, "cc") || !strcmp(str, "flags")) return; - ts = tok_alloc(str, strlen(str)); - reg = ts->tok; + reg = tok_alloc_const(str); if (reg >= TOK_ASM_eax && reg <= TOK_ASM_edi) { reg -= TOK_ASM_eax; } else if (reg >= TOK_ASM_ax && reg <= TOK_ASM_di) { diff --git a/i386-gen.c b/i386-gen.c index d83418ef..f34072dd 100644 --- a/i386-gen.c +++ b/i386-gen.c @@ -423,26 +423,20 @@ ST_FUNC void gfunc_call(int nb_args) size = (size + 3) & ~3; /* allocate the necessary size on stack */ #ifdef TCC_TARGET_PE - if (size >= 0x4096) { - /* cannot call alloca with bound checking. Do stack probing. */ - o(0x50); // push %eax - oad(0xb8, size - 4); // mov size-4,%eax - oad(0x3d, 4096); // p1: cmp $4096,%eax - o(0x1476); // jbe - oad(0x248485,-4096); // test %eax,-4096(%esp) - oad(0xec81, 4096); // sub $4096,%esp - oad(0x2d, 4096); // sub $4096,%eax - o(0xe5eb); // jmp - o(0xc429); // p2: sub %eax,%esp - oad(0xc481, size - 4); // add size-4,%esp - o(0x58); // pop %eax - } + if (size >= 4096) { + r = get_reg(RC_EAX); + oad(0x68, size); // push size + /* cannot call normal 'alloca' with bound checking */ + gen_static_call(tok_alloc_const("__alloca")); + gadd_sp(4); + } else #endif - oad(0xec81, size); /* sub $xxx, %esp */ - /* generate structure store */ - r = get_reg(RC_INT); - o(0x89); /* mov %esp, r */ - o(0xe0 + r); + { + oad(0xec81, size); /* sub $xxx, %esp */ + /* generate structure store */ + r = get_reg(RC_INT); + o(0xe089 + (r << 8)); /* mov %esp, r */ + } vset(&vtop->type, r | VT_LVAL, 0); vswap(); vstore(); @@ -844,6 +838,12 @@ ST_FUNC void gen_opf(int op) { int a, ft, fc, swapped, r; + if (op == TOK_NEG) { /* unary minus */ + gv(RC_FLOAT); + o(0xe0d9); /* fchs */ + return; + } + /* convert constants to memory references */ if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) { vswap(); diff --git a/lib/alloca86.S b/lib/alloca86.S index bdc73911..426c67da 100644 --- a/lib/alloca86.S +++ b/lib/alloca86.S @@ -7,30 +7,30 @@ # define _(s) s #endif -.globl _(alloca) +.globl _(alloca), _(__alloca) _(alloca): - pop %edx - pop %eax +_(__alloca): + push %ebp + mov %esp,%ebp + mov 8(%ebp),%eax add $3,%eax and $-4,%eax - jz p3 - #ifdef _WIN32 + jmp .+16 #p2 p1: - cmp $4096,%eax - jbe p2 - test %eax,-4096(%esp) sub $4096,%esp sub $4096,%eax - jmp p1 + test %eax,(%esp) p2: + cmp $4096,%eax + jae p1 #endif - sub %eax,%esp - mov %esp,%eax -p3: - push %edx - push %edx + mov 4(%ebp),%eax + mov 0(%ebp),%ebp + add $8,%esp + push %eax + lea 8(%esp),%eax ret /* ---------------------------------------------- */ diff --git a/lib/libtcc1.c b/lib/libtcc1.c index 5d1942d8..d6d8dd28 100644 --- a/lib/libtcc1.c +++ b/lib/libtcc1.c @@ -625,3 +625,9 @@ long long __fixxfdi (long double a1) return s ? ret : -ret; } #endif /* !ARM */ + +#if defined __x86_64__ +/* float constants used for unary minus operation */ +const float __mzerosf = -0.0; +const double __mzerodf = -0.0; +#endif diff --git a/tcc.h b/tcc.h index 3bf68a58..72c8c496 100644 --- a/tcc.h +++ b/tcc.h @@ -1090,6 +1090,7 @@ struct filespec { #define TOK_SHL '<' /* shift left */ #define TOK_SAR '>' /* signed shift right */ #define TOK_SHR 0x8b /* unsigned shift right */ +#define TOK_NEG TOK_MID /* unary minus operation (for floats) */ #define TOK_ARROW 0xa0 /* -> */ #define TOK_DOTS 0xa1 /* three dots */ @@ -1378,6 +1379,7 @@ ST_DATA TokenSym **table_ident; #define IS_NUM 4 ST_FUNC TokenSym *tok_alloc(const char *str, int len); +ST_FUNC int tok_alloc_const(const char *str); ST_FUNC const char *get_tok_str(int v, CValue *cv); ST_FUNC void begin_macro(TokenString *str, int alloc); ST_FUNC void end_macro(void); diff --git a/tccasm.c b/tccasm.c index 911052de..097f41ca 100644 --- a/tccasm.c +++ b/tccasm.c @@ -27,11 +27,8 @@ static Section *last_text_section; /* to handle .previous asm directive */ ST_FUNC int asm_get_local_label_name(TCCState *s1, unsigned int n) { char buf[64]; - TokenSym *ts; - snprintf(buf, sizeof(buf), "L..%u", n); - ts = tok_alloc(buf, strlen(buf)); - return ts->tok; + return tok_alloc_const(buf); } static int tcc_assemble_internal(TCCState *s1, int do_preprocess, int global); @@ -54,12 +51,11 @@ static int asm2cname(int v, int *addeddot) if (!name) return v; if (name[0] == '_') { - v = tok_alloc(name + 1, strlen(name) - 1)->tok; + v = tok_alloc_const(name + 1); } else if (!strchr(name, '.')) { - int n = strlen(name) + 2; char newname[256]; snprintf(newname, sizeof newname, ".%s", name); - v = tok_alloc(newname, n - 1)->tok; + v = tok_alloc_const(newname); *addeddot = 1; } return v; @@ -111,11 +107,10 @@ ST_FUNC Sym* get_asm_sym(int name, Sym *csym) static Sym* asm_section_sym(TCCState *s1, Section *sec) { - char buf[100]; - int label = tok_alloc(buf, - snprintf(buf, sizeof buf, "L.%s", sec->name) - )->tok; - Sym *sym = asm_label_find(label); + char buf[100]; int label; Sym *sym; + snprintf(buf, sizeof buf, "L.%s", sec->name); + label = tok_alloc_const(buf); + sym = asm_label_find(label); return sym ? sym : asm_new_label1(s1, label, 1, sec->sh_num, 0); } diff --git a/tccelf.c b/tccelf.c index 4cc37202..2453b2f6 100644 --- a/tccelf.c +++ b/tccelf.c @@ -2511,9 +2511,12 @@ static int elf_output_file(TCCState *s1, const char *filename) { int i, ret, phnum, phfill, shnum, file_type, file_offset, *sec_order; struct dyn_inf dyninf = {0}; - struct ro_inf roinf, *roinf_use = &roinf; + struct ro_inf roinf; ElfW(Phdr) *phdr; Section *strsec, *interp, *dynamic, *dynstr, *note = NULL; +#ifndef ELF_OBJ_ONLY + struct ro_inf *roinf_use = NULL; +#endif file_type = s1->output_type; @@ -2648,10 +2651,8 @@ static int elf_output_file(TCCState *s1, const char *filename) #if !TARGETOS_FreeBSD && !TARGETOS_NetBSD && !defined(__APPLE__) && !defined(_WIN32) /* GNU_RELRO */ if (file_type != TCC_OUTPUT_OBJ) - phnum++; - else + phnum++, roinf_use = &roinf; #endif - roinf_use = NULL; /* allocate program segment headers */ phdr = tcc_mallocz(phnum * sizeof(ElfW(Phdr))); diff --git a/tccgen.c b/tccgen.c index dabc897f..914294bd 100644 --- a/tccgen.c +++ b/tccgen.c @@ -2790,6 +2790,33 @@ static void gen_opic(int op) } } +#if defined TCC_TARGET_X86_64 || defined TCC_TARGET_I386 +# define gen_negf gen_opf +#else +/* XXX: implement in gen_opf() for other backends too */ +void gen_negf(int op) +{ + /* In IEEE negate(x) isn't subtract(0,x). Without NaNs it's + subtract(-0, x), but with them it's really a sign flip + operation. We implement this with bit manipulation and have + to do some type reinterpretation for this, which TCC can do + only via memory. */ + + int align, size, bt; + + size = type_size(&vtop->type, &align); + bt = vtop->type.t & VT_BTYPE; + save_reg(gv(RC_TYPE(bt))); + vdup(); + incr_bf_adr(size - 1); + vdup(); + vpushi(0x80); /* flip sign */ + gen_op('^'); + vstore(); + vpop(); +} +#endif + /* generate a floating point operation with constant propagation */ static void gen_opif(int op) { @@ -2803,6 +2830,9 @@ static void gen_opif(int op) v1 = vtop - 1; v2 = vtop; + if (op == TOK_NEG) + v1 = v2; + /* currently, we cannot do computations with forward symbols */ c1 = (v1->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST; c2 = (v2->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST; @@ -2817,29 +2847,43 @@ static void gen_opif(int op) f1 = v1->c.ld; f2 = v2->c.ld; } - /* NOTE: we only do constant propagation if finite number (not NaN or infinity) (ANSI spec) */ - if (!ieee_finite(f1) || !ieee_finite(f2)) + if (!(ieee_finite(f1) || !ieee_finite(f2)) && !const_wanted) goto general_case; - switch(op) { case '+': f1 += f2; break; case '-': f1 -= f2; break; case '*': f1 *= f2; break; case '/': if (f2 == 0.0) { + union { float f; unsigned u; } x1, x2, y; /* If not in initializer we need to potentially generate FP exceptions at runtime, otherwise we want to fold. */ if (!const_wanted) goto general_case; + /* the run-time result of 0.0/0.0 on x87, also of other compilers + when used to compile the f1 /= f2 below, would be -nan */ + x1.f = f1, x2.f = f2; + if (f1 == 0.0) + y.u = 0x7fc00000; /* nan */ + else + y.u = 0x7f800000; /* infinity */ + y.u |= (x1.u ^ x2.u) & 0x80000000; /* set sign */ + f1 = y.f; + break; } - f1 /= f2; + f1 /= f2; break; + case TOK_NEG: + f1 = -f1; + goto unary_result; /* XXX: also handles tests ? */ default: goto general_case; } + vtop--; + unary_result: /* XXX: overflow test ? */ if (v1->type.t == VT_FLOAT) { v1->c.f = f1; @@ -2848,10 +2892,13 @@ static void gen_opif(int op) } else { v1->c.ld = f1; } - vtop--; } else { general_case: - gen_opf(op); + if (op == TOK_NEG) { + gen_negf(op); + } else { + gen_opf(op); + } } } @@ -5878,44 +5925,8 @@ ST_FUNC void unary(void) case '-': next(); unary(); - t = vtop->type.t & VT_BTYPE; - if (is_float(t)) { - if ((vtop->r & VT_VALMASK) == VT_CONST) { - /* This is what gen_opif would do if we had a NEG operation. */ - if (t == VT_FLOAT) - vtop->c.f = -vtop->c.f; - else if (t == VT_DOUBLE) - vtop->c.d = -vtop->c.d; - else - vtop->c.ld = -vtop->c.ld; - } else { - /* In IEEE negate(x) isn't subtract(0,x). Without NaNs it's - subtract(-0, x), but with them it's really a sign flip - operation. We implement this with bit manipulation and have - to do some type reinterpretation for this, which TCC can do - only via memory. */ - int align, size = type_size(&vtop->type, &align); - save_reg(gv(RC_TYPE(t))); - vdup(); - gaddrof(); - vtop->type = char_pointer_type; - /* Byte of sign bit. For big endian, this would have to - add zero always. */ -#if defined(TCC_TARGET_X86_64) || defined(TCC_TARGET_I386) - /* sizeof long double is 12 or 16 here, but it's - really the 80bit extended float format. */ - if (t == VT_LDOUBLE) - size = 10; -#endif - vpushi(size - 1); - gen_op('+'); - indir(); - vdup(); - vpushi(0x80); /* flip sign */ - gen_op('^'); - vstore(); - vpop(); - } + if (is_float(vtop->type.t)) { + gen_opif(TOK_NEG); } else { vpushi(0); vswap(); diff --git a/tccpp.c b/tccpp.c index e0821a60..cd9bd684 100644 --- a/tccpp.c +++ b/tccpp.c @@ -487,6 +487,12 @@ ST_FUNC TokenSym *tok_alloc(const char *str, int len) return tok_alloc_new(pts, str, len); } +ST_FUNC int tok_alloc_const(const char *str) +{ + return tok_alloc(str, strlen(str))->tok; +} + + /* XXX: buffer overflow */ /* XXX: float tokens */ ST_FUNC const char *get_tok_str(int v, CValue *cv) diff --git a/tcctok.h b/tcctok.h index a7552393..6fc04af7 100644 --- a/tcctok.h +++ b/tcctok.h @@ -100,6 +100,8 @@ DEF(TOK___NAN__, "__nan__") DEF(TOK___SNAN__, "__snan__") DEF(TOK___INF__, "__inf__") + DEF(TOK___mzerosf, "__mzerosf") + DEF(TOK___mzerodf, "__mzerodf") /* attribute identifiers */ /* XXX: handle all tokens generically since speed is not critical */ diff --git a/x86_64-gen.c b/x86_64-gen.c index 222249d4..a8eef52a 100644 --- a/x86_64-gen.c +++ b/x86_64-gen.c @@ -1813,14 +1813,38 @@ void gen_opl(int op) gen_opi(op); } +void vpush_const(int t, int v) +{ + CType ctype = { t | VT_CONSTANT, 0 }; + vpushsym(&ctype, external_global_sym(v, &ctype)); + vtop->r |= VT_LVAL; +} + /* generate a floating point operation 'v = t1 op t2' instruction. The two operands are guaranteed to have the same floating point type */ /* XXX: need to use ST1 too */ void gen_opf(int op) { int a, ft, fc, swapped, r; - int float_type = - (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT; + int bt = vtop->type.t & VT_BTYPE; + int float_type = bt == VT_LDOUBLE ? RC_ST0 : RC_FLOAT; + + if (op == TOK_NEG) { /* unary minus */ + gv(float_type); + if (float_type == RC_ST0) { + o(0xe0d9); /* fchs */ + } else { + /* -0.0, in libtcc1.c */ + vpush_const(bt, bt == VT_FLOAT ? TOK___mzerosf : TOK___mzerodf); + gv(RC_FLOAT); + if (bt == VT_DOUBLE) + o(0x66); + /* xorp[sd] %xmm1, %xmm0 */ + o(0xc0570f | (REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8) << 16); + vtop--; + } + return; + } /* convert constants to memory references */ if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {