From eb870b006c969d16002fc724d5bbfe33287801e5 Mon Sep 17 00:00:00 2001 From: seyko Date: Wed, 23 Sep 2015 14:58:06 +0300 Subject: [PATCH] SSE opcodes to TCC assembler (i386, x86_64) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit patch from Anaƫl Seghezzi a test program: ============================ #include struct fl4{ float x, y, z, w; }; void asm_test(void) { struct fl4 v1, v2, v3; v1.x = 0.1; v1.y = 0.2; v1.z = 0.4; v1.w = 0.3; v2.x = 0.11; v2.y = 0.0; v2.z = 0.01; v2.w = 0.04; asm volatile ( "movups %0, %%xmm0;" "movups %1, %%xmm1;" "addps %%xmm1, %%xmm0;" "movups %%xmm0, %2" :: "g" (v1), "g" (v2), "g" (v3) : "memory"); printf("sse fl4 add : %f %f %f %f\n", v3.x, v3.y, v3.z, v3.w); printf("expected : %f %f %f %f\n", v1.x+v2.x, v1.y+v2.y, v1.z+v2.z, v1.w+v2.w); } int main() { asm_test(); } /* sse fl4 add : 0.210000 0.200000 0.410000 0.340000 expected : 0.210000 0.200000 0.410000 0.340000 */ ============================ --- i386-asm.h | 28 +++++++++++++++++++++++++++- x86_64-asm.h | 28 +++++++++++++++++++++++++++- 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/i386-asm.h b/i386-asm.h index 43c88679..486cffe9 100644 --- a/i386-asm.h +++ b/i386-asm.h @@ -464,7 +464,33 @@ ALT(DEF_ASM_OP2(psrlq, 0x0f73, 2, OPC_MODRM, OPT_IM8, OPT_MMX )) DEF_ASM_OP2(punpcklbw, 0x0f60, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) DEF_ASM_OP2(punpcklwd, 0x0f61, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) DEF_ASM_OP2(punpckldq, 0x0f62, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) - DEF_ASM_OP2(pxor, 0x0fef, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) /* must be last !OP0 */ + DEF_ASM_OP2(pxor, 0x0fef, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + + /* sse */ + DEF_ASM_OP2(movups, 0x0f10, 0, OPC_MODRM, OPT_EA | OPT_REG32, OPT_SSE ) +ALT(DEF_ASM_OP2(movups, 0x0f11, 0, OPC_MODRM, OPT_SSE, OPT_EA | OPT_REG32 )) + DEF_ASM_OP2(movaps, 0x0f28, 0, OPC_MODRM, OPT_EA | OPT_REG32, OPT_SSE ) +ALT(DEF_ASM_OP2(movaps, 0x0f29, 0, OPC_MODRM, OPT_SSE, OPT_EA | OPT_REG32 )) + DEF_ASM_OP2(movhps, 0x0f16, 0, OPC_MODRM, OPT_EA | OPT_REG32, OPT_SSE ) +ALT(DEF_ASM_OP2(movhps, 0x0f17, 0, OPC_MODRM, OPT_SSE, OPT_EA | OPT_REG32 )) + DEF_ASM_OP2(addps, 0x0f58, 0, OPC_MODRM, OPT_EA | OPT_SSE, OPT_SSE ) + DEF_ASM_OP2(cvtpi2ps, 0x0f2a, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_SSE ) + DEF_ASM_OP2(cvtps2pi, 0x0f2d, 0, OPC_MODRM, OPT_EA | OPT_SSE, OPT_MMX ) + DEF_ASM_OP2(cvttps2pi, 0x0f2c, 0, OPC_MODRM, OPT_EA | OPT_SSE, OPT_MMX ) + DEF_ASM_OP2(divps, 0x0f5e, 0, OPC_MODRM, OPT_EA | OPT_SSE, OPT_SSE ) + DEF_ASM_OP2(maxps, 0x0f5f, 0, OPC_MODRM, OPT_EA | OPT_SSE, OPT_SSE ) + DEF_ASM_OP2(minps, 0x0f5d, 0, OPC_MODRM, OPT_EA | OPT_SSE, OPT_SSE ) + DEF_ASM_OP2(mulps, 0x0f59, 0, OPC_MODRM, OPT_EA | OPT_SSE, OPT_SSE ) + DEF_ASM_OP2(pavgb, 0x0fe0, 0, OPC_MODRM, OPT_EA | OPT_SSE, OPT_SSE ) + DEF_ASM_OP2(pavgw, 0x0fe3, 0, OPC_MODRM, OPT_EA | OPT_SSE, OPT_SSE ) + DEF_ASM_OP2(pmaxsw, 0x0fee, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(pmaxub, 0x0fde, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(pminsw, 0x0fea, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(pminub, 0x0fda, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(rcpss, 0x0f53, 0, OPC_MODRM, OPT_EA | OPT_SSE, OPT_SSE ) + DEF_ASM_OP2(rsqrtps, 0x0f52, 0, OPC_MODRM, OPT_EA | OPT_SSE, OPT_SSE ) + DEF_ASM_OP2(sqrtps, 0x0f51, 0, OPC_MODRM, OPT_EA | OPT_SSE, OPT_SSE ) + DEF_ASM_OP2(subps, 0x0f5c, 0, OPC_MODRM, OPT_EA | OPT_SSE, OPT_SSE ) #undef ALT #undef DEF_ASM_OP0 diff --git a/x86_64-asm.h b/x86_64-asm.h index 2f80e4ba..df76bf0f 100644 --- a/x86_64-asm.h +++ b/x86_64-asm.h @@ -439,7 +439,33 @@ ALT(DEF_ASM_OP2(psrlq, 0x0f73, 2, OPC_MODRM, OPT_IM8, OPT_MMX )) DEF_ASM_OP2(punpcklbw, 0x0f60, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) DEF_ASM_OP2(punpcklwd, 0x0f61, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) DEF_ASM_OP2(punpckldq, 0x0f62, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) - DEF_ASM_OP2(pxor, 0x0fef, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) /* must be last !OP0 */ + DEF_ASM_OP2(pxor, 0x0fef, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + + /* sse */ + DEF_ASM_OP2(movups, 0x0f10, 0, OPC_MODRM, OPT_EA | OPT_REG32, OPT_SSE ) +ALT(DEF_ASM_OP2(movups, 0x0f11, 0, OPC_MODRM, OPT_SSE, OPT_EA | OPT_REG32 )) + DEF_ASM_OP2(movaps, 0x0f28, 0, OPC_MODRM, OPT_EA | OPT_REG32, OPT_SSE ) +ALT(DEF_ASM_OP2(movaps, 0x0f29, 0, OPC_MODRM, OPT_SSE, OPT_EA | OPT_REG32 )) + DEF_ASM_OP2(movhps, 0x0f16, 0, OPC_MODRM, OPT_EA | OPT_REG32, OPT_SSE ) +ALT(DEF_ASM_OP2(movhps, 0x0f17, 0, OPC_MODRM, OPT_SSE, OPT_EA | OPT_REG32 )) + DEF_ASM_OP2(addps, 0x0f58, 0, OPC_MODRM, OPT_EA | OPT_SSE, OPT_SSE ) + DEF_ASM_OP2(cvtpi2ps, 0x0f2a, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_SSE ) + DEF_ASM_OP2(cvtps2pi, 0x0f2d, 0, OPC_MODRM, OPT_EA | OPT_SSE, OPT_MMX ) + DEF_ASM_OP2(cvttps2pi, 0x0f2c, 0, OPC_MODRM, OPT_EA | OPT_SSE, OPT_MMX ) + DEF_ASM_OP2(divps, 0x0f5e, 0, OPC_MODRM, OPT_EA | OPT_SSE, OPT_SSE ) + DEF_ASM_OP2(maxps, 0x0f5f, 0, OPC_MODRM, OPT_EA | OPT_SSE, OPT_SSE ) + DEF_ASM_OP2(minps, 0x0f5d, 0, OPC_MODRM, OPT_EA | OPT_SSE, OPT_SSE ) + DEF_ASM_OP2(mulps, 0x0f59, 0, OPC_MODRM, OPT_EA | OPT_SSE, OPT_SSE ) + DEF_ASM_OP2(pavgb, 0x0fe0, 0, OPC_MODRM, OPT_EA | OPT_SSE, OPT_SSE ) + DEF_ASM_OP2(pavgw, 0x0fe3, 0, OPC_MODRM, OPT_EA | OPT_SSE, OPT_SSE ) + DEF_ASM_OP2(pmaxsw, 0x0fee, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(pmaxub, 0x0fde, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(pminsw, 0x0fea, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(pminub, 0x0fda, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(rcpss, 0x0f53, 0, OPC_MODRM, OPT_EA | OPT_SSE, OPT_SSE ) + DEF_ASM_OP2(rsqrtps, 0x0f52, 0, OPC_MODRM, OPT_EA | OPT_SSE, OPT_SSE ) + DEF_ASM_OP2(sqrtps, 0x0f51, 0, OPC_MODRM, OPT_EA | OPT_SSE, OPT_SSE ) + DEF_ASM_OP2(subps, 0x0f5c, 0, OPC_MODRM, OPT_EA | OPT_SSE, OPT_SSE ) #undef ALT #undef DEF_ASM_OP0