From 2309517066c545f79b7228cfc2f4cf5539ffd6ff Mon Sep 17 00:00:00 2001 From: Michael Matz Date: Thu, 7 Jul 2022 17:02:39 +0200 Subject: [PATCH] x86asm: Add popcnt as this is the first opcode TCC supports that has a 0xf3 prefix and uses integer registers (not SSE ones) this also needs some shuffling of the prefix code to not generate invalid instructions (the REX prefix _must_ come directly before the main opcode (including 0f prefix), and hence needs to come after the 0xf3 prefix). Also disable some mnemonics in asmtest.S that new GAS doesn't support anymore. The only difference to GAS (in asmtest.S) is now the 'lock negl' instruction which TCC emits as 'lock; negl'. That's fine. --- i386-asm.c | 63 +++++++++++++++++++++++++++++-------------------- i386-asm.h | 2 ++ i386-tok.h | 1 + tests/Makefile | 2 +- tests/asmtest.S | 24 ++++++++++++++----- x86_64-asm.h | 2 ++ 6 files changed, 62 insertions(+), 32 deletions(-) diff --git a/i386-asm.c b/i386-asm.c index ebdfe036..22bb4340 100644 --- a/i386-asm.c +++ b/i386-asm.c @@ -682,7 +682,7 @@ static void maybe_print_stats (void) ST_FUNC void asm_opcode(TCCState *s1, int opcode) { const ASMInstr *pa; - int i, modrm_index, modreg_index, reg, v, op1, seg_prefix, pc; + int i, modrm_index, modreg_index, reg, v, op1, seg_prefix, pc, p; int nb_ops, s; Operand ops[MAX_OPERANDS], *pop; int op_type[3]; /* decoded op type */ @@ -891,30 +891,6 @@ again: } } -#ifdef TCC_TARGET_X86_64 - /* Generate addr32 prefix if needed */ - for(i = 0; i < nb_ops; i++) { - if (ops[i].type & OP_EA32) { - g(0x67); - break; - } - } -#endif - /* generate data16 prefix if needed */ - p66 = 0; - if (s == 1) - p66 = 1; - else { - /* accepting mmx+sse in all operands --> needs 0x66 to - switch to sse mode. Accepting only sse in an operand --> is - already SSE insn and needs 0x66/f2/f3 handling. */ - for (i = 0; i < nb_ops; i++) - if ((op_type[i] & (OP_MMX | OP_SSE)) == (OP_MMX | OP_SSE) - && ops[i].type & OP_SSE) - p66 = 1; - } - if (p66) - g(0x66); #ifdef TCC_TARGET_X86_64 rex64 = 0; if (pa->instr_type & OPC_48) @@ -947,8 +923,45 @@ again: g(0x9b); if (seg_prefix) g(seg_prefix); +#ifdef TCC_TARGET_X86_64 + /* Generate addr32 prefix if needed */ + for(i = 0; i < nb_ops; i++) { + if (ops[i].type & OP_EA32) { + g(0x67); + break; + } + } +#endif + /* generate data16 prefix if needed */ + p66 = 0; + if (s == 1) + p66 = 1; + else { + /* accepting mmx+sse in all operands --> needs 0x66 to + switch to sse mode. Accepting only sse in an operand --> is + already SSE insn and needs 0x66/f2/f3 handling. */ + for (i = 0; i < nb_ops; i++) + if ((op_type[i] & (OP_MMX | OP_SSE)) == (OP_MMX | OP_SSE) + && ops[i].type & OP_SSE) + p66 = 1; + } + if (p66) + g(0x66); v = pa->opcode; + p = v >> 8; /* possibly prefix byte(s) */ + switch (p) { + case 0: break; /* no prefix */ + case 0x48: break; /* REX, handled elsewhere */ + case 0x66: + case 0x67: + case 0xf2: + case 0xf3: v = v & 0xff; g(p); break; + case 0xd4: case 0xd5: break; /* aam and aad, not prefix, but hardcoded immediate argument "10" */ + case 0xd8: case 0xd9: case 0xda: case 0xdb: /* x87, no normal prefix */ + case 0xdc: case 0xdd: case 0xde: case 0xdf: break; + default: tcc_error("bad prefix 0x%2x in opcode table", p); break; + } if (pa->instr_type & OPC_0F) v = ((v & ~0xff) << 8) | 0x0f00 | (v & 0xff); if ((v == 0x69 || v == 0x6b) && nb_ops == 2) { diff --git a/i386-asm.h b/i386-asm.h index dfc51837..16ed5116 100644 --- a/i386-asm.h +++ b/i386-asm.h @@ -73,6 +73,8 @@ ALT(DEF_ASM_OP2(btrw, 0x0fba, 6, OPC_MODRM | OPC_WLX, OPT_IM8, OPT_REGW | OPT_EA ALT(DEF_ASM_OP2(btcw, 0x0fbb, 0, OPC_MODRM | OPC_WLX, OPT_REGW, OPT_REGW | OPT_EA)) ALT(DEF_ASM_OP2(btcw, 0x0fba, 7, OPC_MODRM | OPC_WLX, OPT_IM8, OPT_REGW | OPT_EA)) +ALT(DEF_ASM_OP2(popcntw, 0xf30fb8, 0, OPC_MODRM | OPC_WLX, OPT_REGW | OPT_EA, OPT_REGW)) + /* prefixes */ DEF_ASM_OP0(wait, 0x9b) DEF_ASM_OP0(fwait, 0x9b) diff --git a/i386-tok.h b/i386-tok.h index e3ef2c75..3145aa63 100644 --- a/i386-tok.h +++ b/i386-tok.h @@ -268,6 +268,7 @@ DEF_WLX(bts) DEF_WLX(btr) DEF_WLX(btc) + DEF_WLX(popcnt) DEF_WLX(lar) DEF_WLX(lsl) diff --git a/tests/Makefile b/tests/Makefile index 2405e954..fa97c74a 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -220,7 +220,7 @@ ex%: $(TOPSRC)/examples/ex%.c # tiny assembler testing asmtest.ref: asmtest.S - $(CC) -Wa,-W -o asmtest.ref.o -c asmtest.S + $(CC) -Wa,-W -Wa,-mx86-used-note=no -o asmtest.ref.o -c asmtest.S objdump -D asmtest.ref.o > asmtest.ref ifeq ($(ARCH),arm) diff --git a/tests/asmtest.S b/tests/asmtest.S index e9c0e32f..3d2080bd 100644 --- a/tests/asmtest.S +++ b/tests/asmtest.S @@ -425,7 +425,7 @@ L3: fsubp %st(5) fsubp - fsubp %st(1), %st + //fsubp %st(1), %st # not accepted by new GAS anymore fsubs 0x1000 fisubs 0x1002 @@ -438,7 +438,7 @@ L3: fsubrp %st(5) fsubrp - fsubrp %st(1), %st + //fsubrp %st(1), %st # not accepted by new GAS anymore fsubrs 0x1000 fisubrs 0x1002 @@ -451,7 +451,7 @@ L3: fdivp %st(5) fdivp - fdivp %st(1), %st + //fdivp %st(1), %st # not accepted by new GAS anymore fdivs 0x1000 fidivs 0x1002 @@ -745,7 +745,14 @@ int $0x10 bts %edx, 0x1000 btsl $2, 0x1000 - + popcnt %ax, %si + popcntw %ax, %si + popcnt 0x1000, %edx + popcntl 0x1000, %edx +#ifdef __x86_64__ + popcnt %rbx, %rdi + popcntq %rcx, %r8 +#endif #ifdef __i386__ boundl %edx, 0x10000 @@ -768,8 +775,13 @@ int $0x10 swapgs - str %rdx - str %r9 + /* Newer gas assemble 'str %rdx' as 'str %edx', based on the observation + that the 16bit value of the task register is zero-extended into the + destination anyway, and hence storing into %edx is the same as storing + into %rdx. TCC doesn't do that micro-optimization, hence just store + into the 32bit reg as well. */ + str %edx + str %r9d #endif lmsw 0x1000 diff --git a/x86_64-asm.h b/x86_64-asm.h index 4e037732..fdfd8bc0 100644 --- a/x86_64-asm.h +++ b/x86_64-asm.h @@ -73,6 +73,8 @@ ALT(DEF_ASM_OP2(btrw, 0x0fba, 6, OPC_MODRM | OPC_WLX, OPT_IM8, OPT_REGW | OPT_EA ALT(DEF_ASM_OP2(btcw, 0x0fbb, 0, OPC_MODRM | OPC_WLX, OPT_REGW, OPT_REGW | OPT_EA)) ALT(DEF_ASM_OP2(btcw, 0x0fba, 7, OPC_MODRM | OPC_WLX, OPT_IM8, OPT_REGW | OPT_EA)) +ALT(DEF_ASM_OP2(popcntw, 0xf30fb8, 0, OPC_MODRM | OPC_WLX, OPT_REGW | OPT_EA, OPT_REGW)) + /* prefixes */ DEF_ASM_OP0(lock, 0xf0) DEF_ASM_OP0(rep, 0xf3)