x86asm: Add popcnt
as this is the first opcode TCC supports that has a 0xf3 prefix and uses integer registers (not SSE ones) this also needs some shuffling of the prefix code to not generate invalid instructions (the REX prefix _must_ come directly before the main opcode (including 0f prefix), and hence needs to come after the 0xf3 prefix). Also disable some mnemonics in asmtest.S that new GAS doesn't support anymore. The only difference to GAS (in asmtest.S) is now the 'lock negl' instruction which TCC emits as 'lock; negl'. That's fine.
This commit is contained in:
parent
98bab41cba
commit
2309517066
6 changed files with 62 additions and 32 deletions
63
i386-asm.c
63
i386-asm.c
|
@ -682,7 +682,7 @@ static void maybe_print_stats (void)
|
|||
ST_FUNC void asm_opcode(TCCState *s1, int opcode)
|
||||
{
|
||||
const ASMInstr *pa;
|
||||
int i, modrm_index, modreg_index, reg, v, op1, seg_prefix, pc;
|
||||
int i, modrm_index, modreg_index, reg, v, op1, seg_prefix, pc, p;
|
||||
int nb_ops, s;
|
||||
Operand ops[MAX_OPERANDS], *pop;
|
||||
int op_type[3]; /* decoded op type */
|
||||
|
@ -891,30 +891,6 @@ again:
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef TCC_TARGET_X86_64
|
||||
/* Generate addr32 prefix if needed */
|
||||
for(i = 0; i < nb_ops; i++) {
|
||||
if (ops[i].type & OP_EA32) {
|
||||
g(0x67);
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
/* generate data16 prefix if needed */
|
||||
p66 = 0;
|
||||
if (s == 1)
|
||||
p66 = 1;
|
||||
else {
|
||||
/* accepting mmx+sse in all operands --> needs 0x66 to
|
||||
switch to sse mode. Accepting only sse in an operand --> is
|
||||
already SSE insn and needs 0x66/f2/f3 handling. */
|
||||
for (i = 0; i < nb_ops; i++)
|
||||
if ((op_type[i] & (OP_MMX | OP_SSE)) == (OP_MMX | OP_SSE)
|
||||
&& ops[i].type & OP_SSE)
|
||||
p66 = 1;
|
||||
}
|
||||
if (p66)
|
||||
g(0x66);
|
||||
#ifdef TCC_TARGET_X86_64
|
||||
rex64 = 0;
|
||||
if (pa->instr_type & OPC_48)
|
||||
|
@ -947,8 +923,45 @@ again:
|
|||
g(0x9b);
|
||||
if (seg_prefix)
|
||||
g(seg_prefix);
|
||||
#ifdef TCC_TARGET_X86_64
|
||||
/* Generate addr32 prefix if needed */
|
||||
for(i = 0; i < nb_ops; i++) {
|
||||
if (ops[i].type & OP_EA32) {
|
||||
g(0x67);
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
/* generate data16 prefix if needed */
|
||||
p66 = 0;
|
||||
if (s == 1)
|
||||
p66 = 1;
|
||||
else {
|
||||
/* accepting mmx+sse in all operands --> needs 0x66 to
|
||||
switch to sse mode. Accepting only sse in an operand --> is
|
||||
already SSE insn and needs 0x66/f2/f3 handling. */
|
||||
for (i = 0; i < nb_ops; i++)
|
||||
if ((op_type[i] & (OP_MMX | OP_SSE)) == (OP_MMX | OP_SSE)
|
||||
&& ops[i].type & OP_SSE)
|
||||
p66 = 1;
|
||||
}
|
||||
if (p66)
|
||||
g(0x66);
|
||||
|
||||
v = pa->opcode;
|
||||
p = v >> 8; /* possibly prefix byte(s) */
|
||||
switch (p) {
|
||||
case 0: break; /* no prefix */
|
||||
case 0x48: break; /* REX, handled elsewhere */
|
||||
case 0x66:
|
||||
case 0x67:
|
||||
case 0xf2:
|
||||
case 0xf3: v = v & 0xff; g(p); break;
|
||||
case 0xd4: case 0xd5: break; /* aam and aad, not prefix, but hardcoded immediate argument "10" */
|
||||
case 0xd8: case 0xd9: case 0xda: case 0xdb: /* x87, no normal prefix */
|
||||
case 0xdc: case 0xdd: case 0xde: case 0xdf: break;
|
||||
default: tcc_error("bad prefix 0x%2x in opcode table", p); break;
|
||||
}
|
||||
if (pa->instr_type & OPC_0F)
|
||||
v = ((v & ~0xff) << 8) | 0x0f00 | (v & 0xff);
|
||||
if ((v == 0x69 || v == 0x6b) && nb_ops == 2) {
|
||||
|
|
|
@ -73,6 +73,8 @@ ALT(DEF_ASM_OP2(btrw, 0x0fba, 6, OPC_MODRM | OPC_WLX, OPT_IM8, OPT_REGW | OPT_EA
|
|||
ALT(DEF_ASM_OP2(btcw, 0x0fbb, 0, OPC_MODRM | OPC_WLX, OPT_REGW, OPT_REGW | OPT_EA))
|
||||
ALT(DEF_ASM_OP2(btcw, 0x0fba, 7, OPC_MODRM | OPC_WLX, OPT_IM8, OPT_REGW | OPT_EA))
|
||||
|
||||
ALT(DEF_ASM_OP2(popcntw, 0xf30fb8, 0, OPC_MODRM | OPC_WLX, OPT_REGW | OPT_EA, OPT_REGW))
|
||||
|
||||
/* prefixes */
|
||||
DEF_ASM_OP0(wait, 0x9b)
|
||||
DEF_ASM_OP0(fwait, 0x9b)
|
||||
|
|
|
@ -268,6 +268,7 @@
|
|||
DEF_WLX(bts)
|
||||
DEF_WLX(btr)
|
||||
DEF_WLX(btc)
|
||||
DEF_WLX(popcnt)
|
||||
|
||||
DEF_WLX(lar)
|
||||
DEF_WLX(lsl)
|
||||
|
|
|
@ -220,7 +220,7 @@ ex%: $(TOPSRC)/examples/ex%.c
|
|||
|
||||
# tiny assembler testing
|
||||
asmtest.ref: asmtest.S
|
||||
$(CC) -Wa,-W -o asmtest.ref.o -c asmtest.S
|
||||
$(CC) -Wa,-W -Wa,-mx86-used-note=no -o asmtest.ref.o -c asmtest.S
|
||||
objdump -D asmtest.ref.o > asmtest.ref
|
||||
|
||||
ifeq ($(ARCH),arm)
|
||||
|
|
|
@ -425,7 +425,7 @@ L3:
|
|||
|
||||
fsubp %st(5)
|
||||
fsubp
|
||||
fsubp %st(1), %st
|
||||
//fsubp %st(1), %st # not accepted by new GAS anymore
|
||||
|
||||
fsubs 0x1000
|
||||
fisubs 0x1002
|
||||
|
@ -438,7 +438,7 @@ L3:
|
|||
|
||||
fsubrp %st(5)
|
||||
fsubrp
|
||||
fsubrp %st(1), %st
|
||||
//fsubrp %st(1), %st # not accepted by new GAS anymore
|
||||
|
||||
fsubrs 0x1000
|
||||
fisubrs 0x1002
|
||||
|
@ -451,7 +451,7 @@ L3:
|
|||
|
||||
fdivp %st(5)
|
||||
fdivp
|
||||
fdivp %st(1), %st
|
||||
//fdivp %st(1), %st # not accepted by new GAS anymore
|
||||
|
||||
fdivs 0x1000
|
||||
fidivs 0x1002
|
||||
|
@ -745,7 +745,14 @@ int $0x10
|
|||
bts %edx, 0x1000
|
||||
btsl $2, 0x1000
|
||||
|
||||
|
||||
popcnt %ax, %si
|
||||
popcntw %ax, %si
|
||||
popcnt 0x1000, %edx
|
||||
popcntl 0x1000, %edx
|
||||
#ifdef __x86_64__
|
||||
popcnt %rbx, %rdi
|
||||
popcntq %rcx, %r8
|
||||
#endif
|
||||
|
||||
#ifdef __i386__
|
||||
boundl %edx, 0x10000
|
||||
|
@ -768,8 +775,13 @@ int $0x10
|
|||
|
||||
swapgs
|
||||
|
||||
str %rdx
|
||||
str %r9
|
||||
/* Newer gas assemble 'str %rdx' as 'str %edx', based on the observation
|
||||
that the 16bit value of the task register is zero-extended into the
|
||||
destination anyway, and hence storing into %edx is the same as storing
|
||||
into %rdx. TCC doesn't do that micro-optimization, hence just store
|
||||
into the 32bit reg as well. */
|
||||
str %edx
|
||||
str %r9d
|
||||
#endif
|
||||
|
||||
lmsw 0x1000
|
||||
|
|
|
@ -73,6 +73,8 @@ ALT(DEF_ASM_OP2(btrw, 0x0fba, 6, OPC_MODRM | OPC_WLX, OPT_IM8, OPT_REGW | OPT_EA
|
|||
ALT(DEF_ASM_OP2(btcw, 0x0fbb, 0, OPC_MODRM | OPC_WLX, OPT_REGW, OPT_REGW | OPT_EA))
|
||||
ALT(DEF_ASM_OP2(btcw, 0x0fba, 7, OPC_MODRM | OPC_WLX, OPT_IM8, OPT_REGW | OPT_EA))
|
||||
|
||||
ALT(DEF_ASM_OP2(popcntw, 0xf30fb8, 0, OPC_MODRM | OPC_WLX, OPT_REGW | OPT_EA, OPT_REGW))
|
||||
|
||||
/* prefixes */
|
||||
DEF_ASM_OP0(lock, 0xf0)
|
||||
DEF_ASM_OP0(rep, 0xf3)
|
||||
|
|
Loading…
Reference in a new issue