x86asm: Add popcnt

as this is the first opcode TCC supports that has a 0xf3 prefix
and uses integer registers (not SSE ones) this also needs some shuffling
of the prefix code to not generate invalid instructions (the REX prefix
_must_ come directly before the main opcode (including 0f prefix), and
hence needs to come after the 0xf3 prefix).  Also disable some mnemonics
in asmtest.S that new GAS doesn't support anymore.  The only difference
to GAS (in asmtest.S) is now the 'lock negl' instruction which TCC
emits as 'lock; negl'.  That's fine.
This commit is contained in:
Michael Matz 2022-07-07 17:02:39 +02:00
parent 98bab41cba
commit 2309517066
6 changed files with 62 additions and 32 deletions

View file

@ -682,7 +682,7 @@ static void maybe_print_stats (void)
ST_FUNC void asm_opcode(TCCState *s1, int opcode) ST_FUNC void asm_opcode(TCCState *s1, int opcode)
{ {
const ASMInstr *pa; const ASMInstr *pa;
int i, modrm_index, modreg_index, reg, v, op1, seg_prefix, pc; int i, modrm_index, modreg_index, reg, v, op1, seg_prefix, pc, p;
int nb_ops, s; int nb_ops, s;
Operand ops[MAX_OPERANDS], *pop; Operand ops[MAX_OPERANDS], *pop;
int op_type[3]; /* decoded op type */ int op_type[3]; /* decoded op type */
@ -891,30 +891,6 @@ again:
} }
} }
#ifdef TCC_TARGET_X86_64
/* Generate addr32 prefix if needed */
for(i = 0; i < nb_ops; i++) {
if (ops[i].type & OP_EA32) {
g(0x67);
break;
}
}
#endif
/* generate data16 prefix if needed */
p66 = 0;
if (s == 1)
p66 = 1;
else {
/* accepting mmx+sse in all operands --> needs 0x66 to
switch to sse mode. Accepting only sse in an operand --> is
already SSE insn and needs 0x66/f2/f3 handling. */
for (i = 0; i < nb_ops; i++)
if ((op_type[i] & (OP_MMX | OP_SSE)) == (OP_MMX | OP_SSE)
&& ops[i].type & OP_SSE)
p66 = 1;
}
if (p66)
g(0x66);
#ifdef TCC_TARGET_X86_64 #ifdef TCC_TARGET_X86_64
rex64 = 0; rex64 = 0;
if (pa->instr_type & OPC_48) if (pa->instr_type & OPC_48)
@ -947,8 +923,45 @@ again:
g(0x9b); g(0x9b);
if (seg_prefix) if (seg_prefix)
g(seg_prefix); g(seg_prefix);
#ifdef TCC_TARGET_X86_64
/* Generate addr32 prefix if needed */
for(i = 0; i < nb_ops; i++) {
if (ops[i].type & OP_EA32) {
g(0x67);
break;
}
}
#endif
/* generate data16 prefix if needed */
p66 = 0;
if (s == 1)
p66 = 1;
else {
/* accepting mmx+sse in all operands --> needs 0x66 to
switch to sse mode. Accepting only sse in an operand --> is
already SSE insn and needs 0x66/f2/f3 handling. */
for (i = 0; i < nb_ops; i++)
if ((op_type[i] & (OP_MMX | OP_SSE)) == (OP_MMX | OP_SSE)
&& ops[i].type & OP_SSE)
p66 = 1;
}
if (p66)
g(0x66);
v = pa->opcode; v = pa->opcode;
p = v >> 8; /* possibly prefix byte(s) */
switch (p) {
case 0: break; /* no prefix */
case 0x48: break; /* REX, handled elsewhere */
case 0x66:
case 0x67:
case 0xf2:
case 0xf3: v = v & 0xff; g(p); break;
case 0xd4: case 0xd5: break; /* aam and aad, not prefix, but hardcoded immediate argument "10" */
case 0xd8: case 0xd9: case 0xda: case 0xdb: /* x87, no normal prefix */
case 0xdc: case 0xdd: case 0xde: case 0xdf: break;
default: tcc_error("bad prefix 0x%2x in opcode table", p); break;
}
if (pa->instr_type & OPC_0F) if (pa->instr_type & OPC_0F)
v = ((v & ~0xff) << 8) | 0x0f00 | (v & 0xff); v = ((v & ~0xff) << 8) | 0x0f00 | (v & 0xff);
if ((v == 0x69 || v == 0x6b) && nb_ops == 2) { if ((v == 0x69 || v == 0x6b) && nb_ops == 2) {

View file

@ -73,6 +73,8 @@ ALT(DEF_ASM_OP2(btrw, 0x0fba, 6, OPC_MODRM | OPC_WLX, OPT_IM8, OPT_REGW | OPT_EA
ALT(DEF_ASM_OP2(btcw, 0x0fbb, 0, OPC_MODRM | OPC_WLX, OPT_REGW, OPT_REGW | OPT_EA)) ALT(DEF_ASM_OP2(btcw, 0x0fbb, 0, OPC_MODRM | OPC_WLX, OPT_REGW, OPT_REGW | OPT_EA))
ALT(DEF_ASM_OP2(btcw, 0x0fba, 7, OPC_MODRM | OPC_WLX, OPT_IM8, OPT_REGW | OPT_EA)) ALT(DEF_ASM_OP2(btcw, 0x0fba, 7, OPC_MODRM | OPC_WLX, OPT_IM8, OPT_REGW | OPT_EA))
ALT(DEF_ASM_OP2(popcntw, 0xf30fb8, 0, OPC_MODRM | OPC_WLX, OPT_REGW | OPT_EA, OPT_REGW))
/* prefixes */ /* prefixes */
DEF_ASM_OP0(wait, 0x9b) DEF_ASM_OP0(wait, 0x9b)
DEF_ASM_OP0(fwait, 0x9b) DEF_ASM_OP0(fwait, 0x9b)

View file

@ -268,6 +268,7 @@
DEF_WLX(bts) DEF_WLX(bts)
DEF_WLX(btr) DEF_WLX(btr)
DEF_WLX(btc) DEF_WLX(btc)
DEF_WLX(popcnt)
DEF_WLX(lar) DEF_WLX(lar)
DEF_WLX(lsl) DEF_WLX(lsl)

View file

@ -220,7 +220,7 @@ ex%: $(TOPSRC)/examples/ex%.c
# tiny assembler testing # tiny assembler testing
asmtest.ref: asmtest.S asmtest.ref: asmtest.S
$(CC) -Wa,-W -o asmtest.ref.o -c asmtest.S $(CC) -Wa,-W -Wa,-mx86-used-note=no -o asmtest.ref.o -c asmtest.S
objdump -D asmtest.ref.o > asmtest.ref objdump -D asmtest.ref.o > asmtest.ref
ifeq ($(ARCH),arm) ifeq ($(ARCH),arm)

View file

@ -425,7 +425,7 @@ L3:
fsubp %st(5) fsubp %st(5)
fsubp fsubp
fsubp %st(1), %st //fsubp %st(1), %st # not accepted by new GAS anymore
fsubs 0x1000 fsubs 0x1000
fisubs 0x1002 fisubs 0x1002
@ -438,7 +438,7 @@ L3:
fsubrp %st(5) fsubrp %st(5)
fsubrp fsubrp
fsubrp %st(1), %st //fsubrp %st(1), %st # not accepted by new GAS anymore
fsubrs 0x1000 fsubrs 0x1000
fisubrs 0x1002 fisubrs 0x1002
@ -451,7 +451,7 @@ L3:
fdivp %st(5) fdivp %st(5)
fdivp fdivp
fdivp %st(1), %st //fdivp %st(1), %st # not accepted by new GAS anymore
fdivs 0x1000 fdivs 0x1000
fidivs 0x1002 fidivs 0x1002
@ -745,7 +745,14 @@ int $0x10
bts %edx, 0x1000 bts %edx, 0x1000
btsl $2, 0x1000 btsl $2, 0x1000
popcnt %ax, %si
popcntw %ax, %si
popcnt 0x1000, %edx
popcntl 0x1000, %edx
#ifdef __x86_64__
popcnt %rbx, %rdi
popcntq %rcx, %r8
#endif
#ifdef __i386__ #ifdef __i386__
boundl %edx, 0x10000 boundl %edx, 0x10000
@ -768,8 +775,13 @@ int $0x10
swapgs swapgs
str %rdx /* Newer gas assemble 'str %rdx' as 'str %edx', based on the observation
str %r9 that the 16bit value of the task register is zero-extended into the
destination anyway, and hence storing into %edx is the same as storing
into %rdx. TCC doesn't do that micro-optimization, hence just store
into the 32bit reg as well. */
str %edx
str %r9d
#endif #endif
lmsw 0x1000 lmsw 0x1000

View file

@ -73,6 +73,8 @@ ALT(DEF_ASM_OP2(btrw, 0x0fba, 6, OPC_MODRM | OPC_WLX, OPT_IM8, OPT_REGW | OPT_EA
ALT(DEF_ASM_OP2(btcw, 0x0fbb, 0, OPC_MODRM | OPC_WLX, OPT_REGW, OPT_REGW | OPT_EA)) ALT(DEF_ASM_OP2(btcw, 0x0fbb, 0, OPC_MODRM | OPC_WLX, OPT_REGW, OPT_REGW | OPT_EA))
ALT(DEF_ASM_OP2(btcw, 0x0fba, 7, OPC_MODRM | OPC_WLX, OPT_IM8, OPT_REGW | OPT_EA)) ALT(DEF_ASM_OP2(btcw, 0x0fba, 7, OPC_MODRM | OPC_WLX, OPT_IM8, OPT_REGW | OPT_EA))
ALT(DEF_ASM_OP2(popcntw, 0xf30fb8, 0, OPC_MODRM | OPC_WLX, OPT_REGW | OPT_EA, OPT_REGW))
/* prefixes */ /* prefixes */
DEF_ASM_OP0(lock, 0xf0) DEF_ASM_OP0(lock, 0xf0)
DEF_ASM_OP0(rep, 0xf3) DEF_ASM_OP0(rep, 0xf3)