diff --git a/i386-asm.c b/i386-asm.c index ebdfe036..22bb4340 100644 --- a/i386-asm.c +++ b/i386-asm.c @@ -682,7 +682,7 @@ static void maybe_print_stats (void) ST_FUNC void asm_opcode(TCCState *s1, int opcode) { const ASMInstr *pa; - int i, modrm_index, modreg_index, reg, v, op1, seg_prefix, pc; + int i, modrm_index, modreg_index, reg, v, op1, seg_prefix, pc, p; int nb_ops, s; Operand ops[MAX_OPERANDS], *pop; int op_type[3]; /* decoded op type */ @@ -891,30 +891,6 @@ again: } } -#ifdef TCC_TARGET_X86_64 - /* Generate addr32 prefix if needed */ - for(i = 0; i < nb_ops; i++) { - if (ops[i].type & OP_EA32) { - g(0x67); - break; - } - } -#endif - /* generate data16 prefix if needed */ - p66 = 0; - if (s == 1) - p66 = 1; - else { - /* accepting mmx+sse in all operands --> needs 0x66 to - switch to sse mode. Accepting only sse in an operand --> is - already SSE insn and needs 0x66/f2/f3 handling. */ - for (i = 0; i < nb_ops; i++) - if ((op_type[i] & (OP_MMX | OP_SSE)) == (OP_MMX | OP_SSE) - && ops[i].type & OP_SSE) - p66 = 1; - } - if (p66) - g(0x66); #ifdef TCC_TARGET_X86_64 rex64 = 0; if (pa->instr_type & OPC_48) @@ -947,8 +923,45 @@ again: g(0x9b); if (seg_prefix) g(seg_prefix); +#ifdef TCC_TARGET_X86_64 + /* Generate addr32 prefix if needed */ + for(i = 0; i < nb_ops; i++) { + if (ops[i].type & OP_EA32) { + g(0x67); + break; + } + } +#endif + /* generate data16 prefix if needed */ + p66 = 0; + if (s == 1) + p66 = 1; + else { + /* accepting mmx+sse in all operands --> needs 0x66 to + switch to sse mode. Accepting only sse in an operand --> is + already SSE insn and needs 0x66/f2/f3 handling. */ + for (i = 0; i < nb_ops; i++) + if ((op_type[i] & (OP_MMX | OP_SSE)) == (OP_MMX | OP_SSE) + && ops[i].type & OP_SSE) + p66 = 1; + } + if (p66) + g(0x66); v = pa->opcode; + p = v >> 8; /* possibly prefix byte(s) */ + switch (p) { + case 0: break; /* no prefix */ + case 0x48: break; /* REX, handled elsewhere */ + case 0x66: + case 0x67: + case 0xf2: + case 0xf3: v = v & 0xff; g(p); break; + case 0xd4: case 0xd5: break; /* aam and aad, not prefix, but hardcoded immediate argument "10" */ + case 0xd8: case 0xd9: case 0xda: case 0xdb: /* x87, no normal prefix */ + case 0xdc: case 0xdd: case 0xde: case 0xdf: break; + default: tcc_error("bad prefix 0x%2x in opcode table", p); break; + } if (pa->instr_type & OPC_0F) v = ((v & ~0xff) << 8) | 0x0f00 | (v & 0xff); if ((v == 0x69 || v == 0x6b) && nb_ops == 2) { diff --git a/i386-asm.h b/i386-asm.h index dfc51837..16ed5116 100644 --- a/i386-asm.h +++ b/i386-asm.h @@ -73,6 +73,8 @@ ALT(DEF_ASM_OP2(btrw, 0x0fba, 6, OPC_MODRM | OPC_WLX, OPT_IM8, OPT_REGW | OPT_EA ALT(DEF_ASM_OP2(btcw, 0x0fbb, 0, OPC_MODRM | OPC_WLX, OPT_REGW, OPT_REGW | OPT_EA)) ALT(DEF_ASM_OP2(btcw, 0x0fba, 7, OPC_MODRM | OPC_WLX, OPT_IM8, OPT_REGW | OPT_EA)) +ALT(DEF_ASM_OP2(popcntw, 0xf30fb8, 0, OPC_MODRM | OPC_WLX, OPT_REGW | OPT_EA, OPT_REGW)) + /* prefixes */ DEF_ASM_OP0(wait, 0x9b) DEF_ASM_OP0(fwait, 0x9b) diff --git a/i386-tok.h b/i386-tok.h index e3ef2c75..3145aa63 100644 --- a/i386-tok.h +++ b/i386-tok.h @@ -268,6 +268,7 @@ DEF_WLX(bts) DEF_WLX(btr) DEF_WLX(btc) + DEF_WLX(popcnt) DEF_WLX(lar) DEF_WLX(lsl) diff --git a/tests/Makefile b/tests/Makefile index 2405e954..fa97c74a 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -220,7 +220,7 @@ ex%: $(TOPSRC)/examples/ex%.c # tiny assembler testing asmtest.ref: asmtest.S - $(CC) -Wa,-W -o asmtest.ref.o -c asmtest.S + $(CC) -Wa,-W -Wa,-mx86-used-note=no -o asmtest.ref.o -c asmtest.S objdump -D asmtest.ref.o > asmtest.ref ifeq ($(ARCH),arm) diff --git a/tests/asmtest.S b/tests/asmtest.S index e9c0e32f..3d2080bd 100644 --- a/tests/asmtest.S +++ b/tests/asmtest.S @@ -425,7 +425,7 @@ L3: fsubp %st(5) fsubp - fsubp %st(1), %st + //fsubp %st(1), %st # not accepted by new GAS anymore fsubs 0x1000 fisubs 0x1002 @@ -438,7 +438,7 @@ L3: fsubrp %st(5) fsubrp - fsubrp %st(1), %st + //fsubrp %st(1), %st # not accepted by new GAS anymore fsubrs 0x1000 fisubrs 0x1002 @@ -451,7 +451,7 @@ L3: fdivp %st(5) fdivp - fdivp %st(1), %st + //fdivp %st(1), %st # not accepted by new GAS anymore fdivs 0x1000 fidivs 0x1002 @@ -745,7 +745,14 @@ int $0x10 bts %edx, 0x1000 btsl $2, 0x1000 - + popcnt %ax, %si + popcntw %ax, %si + popcnt 0x1000, %edx + popcntl 0x1000, %edx +#ifdef __x86_64__ + popcnt %rbx, %rdi + popcntq %rcx, %r8 +#endif #ifdef __i386__ boundl %edx, 0x10000 @@ -768,8 +775,13 @@ int $0x10 swapgs - str %rdx - str %r9 + /* Newer gas assemble 'str %rdx' as 'str %edx', based on the observation + that the 16bit value of the task register is zero-extended into the + destination anyway, and hence storing into %edx is the same as storing + into %rdx. TCC doesn't do that micro-optimization, hence just store + into the 32bit reg as well. */ + str %edx + str %r9d #endif lmsw 0x1000 diff --git a/x86_64-asm.h b/x86_64-asm.h index 4e037732..fdfd8bc0 100644 --- a/x86_64-asm.h +++ b/x86_64-asm.h @@ -73,6 +73,8 @@ ALT(DEF_ASM_OP2(btrw, 0x0fba, 6, OPC_MODRM | OPC_WLX, OPT_IM8, OPT_REGW | OPT_EA ALT(DEF_ASM_OP2(btcw, 0x0fbb, 0, OPC_MODRM | OPC_WLX, OPT_REGW, OPT_REGW | OPT_EA)) ALT(DEF_ASM_OP2(btcw, 0x0fba, 7, OPC_MODRM | OPC_WLX, OPT_IM8, OPT_REGW | OPT_EA)) +ALT(DEF_ASM_OP2(popcntw, 0xf30fb8, 0, OPC_MODRM | OPC_WLX, OPT_REGW | OPT_EA, OPT_REGW)) + /* prefixes */ DEF_ASM_OP0(lock, 0xf0) DEF_ASM_OP0(rep, 0xf3)