| /* VMOVSD m64, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */ |
| /* VMOVSS m32, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */ |
| /* VMOVSD xmm1, m64 = VEX.LIG.F2.0F.WIG 11 /r */ |
| /* VMOVSS xmm1, m64 = VEX.LIG.F3.0F.WIG 11 /r */ |
| /* VMOVUPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 11 /r */ |
| /* VMOVAPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 28 /r */ |
| /* VMOVAPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 28 /r */ |
| /* VMOVAPS xmm2/m128, xmm1 = VEX.128.0F.WIG 28 /r */ |
| /* VMOVAPS xmm1, xmm2/m128 = VEX.128.0F.WIG 29 /r */ |
| /* VMOVAPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 29 /r */ |
| |
| /* . VCVTSI2SD r/m32, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W0 2A /r */ |
| /* . VCVTSI2SD r/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W1 2A /r */ |
| /* . VCVTSI2SS r/m64, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W1 2A /r */ |
| /* . VCVTTSD2SI xmm1/m64, r32 = VEX.LIG.F2.0F.W0 2C /r */ |
| /* VCVTTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2C /r */ |
| /* VUCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2E /r */ |
| /* VUCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2E /r */ |
| /* . VSQRTSD xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F2.0F.WIG 51 /r */ |
| /* VANDPD r/m, rV, r ::: r = rV & r/m (MVR format) */ |
| /* VANDNPD r/m, rV, r ::: r = (not rV) & r/m (MVR format) */ |
| /* VORPD r/m, rV, r ::: r = rV ^ r/m (MVR format) */ |
| /* VXORPD r/m, rV, r ::: r = rV ^ r/m (MVR format) */ |
| /* VXORPS r/m, rV, r ::: r = rV ^ r/m (MVR format) */ |
| /* VADDSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F0.0F.WIG 58 /r */ |
| /* VMULSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F0.0F.WIG 59 /r */ |
| /* VCVTPS2PD xmm2/m64, xmm1 = VEX.128.0F.WIG 5A /r */ |
| /* VSUBSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5C /r */ |
| /* VMINSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5D /r */ |
| /* VDIVSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5E /r */ |
| /* VMAXSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5F /r */ |
| |
| /* VMOVD r32/m32, xmm1 = VEX.128.66.0F.W0 6E */ |
| /* VMOVDQA ymm2/m256, ymm1 = VEX.256.66.0F.WIG 6F */ |
| /* VMOVDQA xmm2/m128, xmm1 = VEX.128.66.0F.WIG 6F */ |
| /* VMOVDQU xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 6F */ |
| |
| /* VPSHUFD imm8, xmm2/m128, xmm1 = VEX.128.66.0F.WIG 70 /r ib */ |
| /* VPSLLD imm8, xmm2, xmm1 = VEX.128.66.0F.WIG 72 /6 ib */ |
| /* VPSRLDQ VEX.NDD.128.66.0F.WIG 73 /3 ib */ |
| /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m (MVR format) */ |
| |
| /* VMOVDQA ymm1, ymm2/m256 = VEX.256.66.0F.WIG 7F */ |
| /* VMOVDQA xmm1, xmm2/m128 = VEX.128.66.0F.WIG 7F */ |
| /* VMOVDQU xmm1, xmm2/m128 = VEX.128.F3.0F.WIG 7F */ |
| |
| /* . VCMPSD xmm3/m64(E=argL), xmm2(V=argR), xmm1(G) */ |
| /* . VPOR = VEX.NDS.128.66.0F.WIG EB /r */ |
| /* . VPXOR = VEX.NDS.128.66.0F.WIG EF /r */ |
| /* . VPSUBB = VEX.NDS.128.66.0F.WIG EF /r */ |
| /* . VPSUBD = VEX.NDS.128.66.0F.WIG FE /r */ |
| /* . VPADDD = VEX.NDS.128.66.0F.WIG FE /r */ |
| /* . VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) (MVR format) */ |
| /* . VPMOVZXBW = VEX.128.66.0F38.WIG 30 /r */ |
| /* . VPMOVZXWD = VEX.128.66.0F38.WIG 33 /r */ |
| /* . VPMINSD = VEX.NDS.128.66.0F38.WIG 39 /r */ |
| /* . VPMAXSD = VEX.NDS.128.66.0F38.WIG 3D /r */ |
| /* VPEXTRD imm8, r32/m32, xmm2 */ |
| /* VINSERTF128 r/m, rV, rD */ |
| /* VEXTRACTF128 rS, r/m */ |
| |
| /* . VPBLENDVB xmmG, xmmE/memE, xmmV, xmmIS4 */ |
| /* VEX.128.F2.0F.WIG /12 r = MOVDDUP xmm2/m64, xmm1 */ |
| /* VCVTPD2PS xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5A /r */ |
| /* . VMULSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 59 /r */ |
| /* . VSUBSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5C /r */ |
| /* . VADDSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 58 /r */ |
| /* . VDIVSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5E /r */ |
| /* . VUNPCKLPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 14 /r */ |
| /* . VCVTSI2SS r/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W0 2A /r */ |
| /* . VANDPS = VEX.NDS.128.0F.WIG 54 /r */ |
| /* . VMINSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5D /r */ |
| /* . VMAXSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5F /r */ |
| |
| /* really needs testing -- Intel docs don't make sense */ |
| /* VMOVQ xmm2/m64, xmm1 = VEX.128.F3.0F.W0 */ |
| |
| /* really needs testing -- Intel docs don't make sense */ |
| /* of the form vmovq %xmm0,-0x8(%rsp) */ |
| |
| /* VCMPSS xmm3/m32(E=argL), xmm2(V=argR), xmm1(G) */ |
| /* . VANDNPS = VEX.NDS.128.0F.WIG 55 /r */ |
| /* . VORPS = VEX.NDS.128.0F.WIG 56 /r */ |
| |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <assert.h> |
| #include <malloc.h> |
| |
| typedef unsigned char UChar; |
| typedef unsigned int UInt; |
| typedef unsigned long int UWord; |
| typedef unsigned long long int ULong; |
| |
| #define IS_32_ALIGNED(_ptr) (0 == (0x1F & (UWord)(_ptr))) |
| |
| typedef union { UChar u8[32]; UInt u32[8]; } YMM; |
| |
| typedef struct { YMM a1; YMM a2; YMM a3; YMM a4; ULong u64; } Block; |
| |
| void showYMM ( YMM* vec ) |
| { |
| int i; |
| assert(IS_32_ALIGNED(vec)); |
| for (i = 31; i >= 0; i--) { |
| printf("%02x", (UInt)vec->u8[i]); |
| if (i > 0 && 0 == ((i+0) & 7)) printf("."); |
| } |
| } |
| |
| void showBlock ( char* msg, Block* block ) |
| { |
| printf(" %s\n", msg); |
| printf(" "); showYMM(&block->a1); printf("\n"); |
| printf(" "); showYMM(&block->a2); printf("\n"); |
| printf(" "); showYMM(&block->a3); printf("\n"); |
| printf(" "); showYMM(&block->a4); printf("\n"); |
| printf(" %016llx\n", block->u64); |
| } |
| |
| UChar randUChar ( void ) |
| { |
| static UInt seed = 80021; |
| seed = 1103515245 * seed + 12345; |
| return (seed >> 17) & 0xFF; |
| } |
| |
| void randBlock ( Block* b ) |
| { |
| int i; |
| UChar* p = (UChar*)b; |
| for (i = 0; i < sizeof(Block); i++) |
| p[i] = randUChar(); |
| } |
| |
| |
| /* Generate a function test_NAME, that tests the given insn, in both |
| its mem and reg forms. The reg form of the insn may mention, as |
| operands only %ymm6, %ymm7, %ymm8, %ymm9 and %r14. The mem form of |
| the insn may mention as operands only (%rax), %ymm7, %ymm8, %ymm9 |
| and %r14. */ |
| |
| #define GEN_test_RandM(_name, _reg_form, _mem_form) \ |
| \ |
| static void test_##_name ( void ) \ |
| { \ |
| Block* b = memalign(32, sizeof(Block)); \ |
| randBlock(b); \ |
| printf("%s(reg)\n", #_name); \ |
| showBlock("before", b); \ |
| __asm__ __volatile__( \ |
| "vmovdqa 0(%0),%%ymm7" "\n\t" \ |
| "vmovdqa 32(%0),%%ymm8" "\n\t" \ |
| "vmovdqa 64(%0),%%ymm6" "\n\t" \ |
| "vmovdqa 96(%0),%%ymm9" "\n\t" \ |
| "movq 128(%0),%%r14" "\n\t" \ |
| _reg_form "\n\t" \ |
| "vmovdqa %%ymm7, 0(%0)" "\n\t" \ |
| "vmovdqa %%ymm8, 32(%0)" "\n\t" \ |
| "vmovdqa %%ymm6, 64(%0)" "\n\t" \ |
| "vmovdqa %%ymm9, 96(%0)" "\n\t" \ |
| "movq %%r14, 128(%0)" "\n\t" \ |
| : /*OUT*/ \ |
| : /*IN*/"r"(b) \ |
| : /*TRASH*/"xmm7","xmm8","xmm6","xmm9","r14","memory","cc" \ |
| ); \ |
| showBlock("after", b); \ |
| randBlock(b); \ |
| printf("%s(mem)\n", #_name); \ |
| showBlock("before", b); \ |
| __asm__ __volatile__( \ |
| "leaq 0(%0),%%rax" "\n\t" \ |
| "vmovdqa 32(%0),%%ymm8" "\n\t" \ |
| "vmovdqa 64(%0),%%ymm7" "\n\t" \ |
| "vmovdqa 96(%0),%%ymm9" "\n\t" \ |
| "movq 128(%0),%%r14" "\n\t" \ |
| _mem_form "\n\t" \ |
| "vmovdqa %%ymm8, 32(%0)" "\n\t" \ |
| "vmovdqa %%ymm7, 64(%0)" "\n\t" \ |
| "vmovdqa %%ymm9, 96(%0)" "\n\t" \ |
| "movq %%r14, 128(%0)" "\n\t" \ |
| : /*OUT*/ \ |
| : /*IN*/"r"(b) \ |
| : /*TRASH*/"xmm8","xmm7","xmm9","r14","rax","memory","cc" \ |
| ); \ |
| showBlock("after", b); \ |
| printf("\n"); \ |
| free(b); \ |
| } |
| |
| GEN_test_RandM(VPOR_128, |
| "vpor %%xmm6, %%xmm8, %%xmm7", |
| "vpor (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VPXOR_128, |
| "vpxor %%xmm6, %%xmm8, %%xmm7", |
| "vpxor (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VPSUBB_128, |
| "vpsubb %%xmm6, %%xmm8, %%xmm7", |
| "vpsubb (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VPSUBD_128, |
| "vpsubd %%xmm6, %%xmm8, %%xmm7", |
| "vpsubd (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VPADDD_128, |
| "vpaddd %%xmm6, %%xmm8, %%xmm7", |
| "vpaddd (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VPMOVZXWD_128, |
| "vpmovzxwd %%xmm6, %%xmm8", |
| "vpmovzxwd (%%rax), %%xmm8") |
| |
| GEN_test_RandM(VPMOVZXBW_128, |
| "vpmovzxbw %%xmm6, %%xmm8", |
| "vpmovzxbw (%%rax), %%xmm8") |
| |
| GEN_test_RandM(VPBLENDVB_128, |
| "vpblendvb %%xmm9, %%xmm6, %%xmm8, %%xmm7", |
| "vpblendvb %%xmm9, (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VPMINSD_128, |
| "vpminsd %%xmm6, %%xmm8, %%xmm7", |
| "vpminsd (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VPMAXSD_128, |
| "vpmaxsd %%xmm6, %%xmm8, %%xmm7", |
| "vpmaxsd (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VANDPD_128, |
| "vandpd %%xmm6, %%xmm8, %%xmm7", |
| "vandpd (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VCVTSI2SD_32, |
| "vcvtsi2sdl %%r14d, %%xmm8, %%xmm7", |
| "vcvtsi2sdl (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VCVTSI2SD_64, |
| "vcvtsi2sdq %%r14, %%xmm8, %%xmm7", |
| "vcvtsi2sdq (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VCVTSI2SS_64, |
| "vcvtsi2ssq %%r14, %%xmm8, %%xmm7", |
| "vcvtsi2ssq (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VCVTTSD2SI_32, |
| "vcvttsd2si %%xmm8, %%r14d", |
| "vcvttsd2si (%%rax), %%r14d") |
| |
| GEN_test_RandM(VPSHUFB_128, |
| "vpshufb %%xmm6, %%xmm8, %%xmm7", |
| "vpshufb (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VCMPSD_128_0x0, |
| "vcmpsd $0, %%xmm6, %%xmm8, %%xmm7", |
| "vcmpsd $0, (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VCMPSD_128_0xD, |
| "vcmpsd $0xd, %%xmm6, %%xmm8, %%xmm7", |
| "vcmpsd $0xd, (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VSQRTSD_128, |
| "vsqrtsd %%xmm6, %%xmm8, %%xmm7", |
| "vsqrtsd (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VORPS_128, |
| "vorps %%xmm6, %%xmm8, %%xmm7", |
| "vorps (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VANDNPS_128, |
| "vandnps %%xmm6, %%xmm8, %%xmm7", |
| "vandnps (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VMAXSS_128, |
| "vmaxss %%xmm6, %%xmm8, %%xmm7", |
| "vmaxss (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VMINSS_128, |
| "vminss %%xmm6, %%xmm8, %%xmm7", |
| "vminss (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VANDPS_128, |
| "vandps %%xmm6, %%xmm8, %%xmm7", |
| "vandps (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VCVTSI2SS_128, |
| "vcvtsi2ssl %%r14d, %%xmm8, %%xmm7", |
| "vcvtsi2ssl (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VUNPCKLPS_128, |
| "vunpcklps %%xmm6, %%xmm8, %%xmm7", |
| "vunpcklps (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VDIVSS_128, |
| "vdivss %%xmm6, %%xmm8, %%xmm7", |
| "vdivss (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VADDSS_128, |
| "vaddss %%xmm6, %%xmm8, %%xmm7", |
| "vaddss (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VSUBSS_128, |
| "vsubss %%xmm6, %%xmm8, %%xmm7", |
| "vsubss (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VMULSS_128, |
| "vmulss %%xmm6, %%xmm8, %%xmm7", |
| "vmulss (%%rax), %%xmm8, %%xmm7") |
| |
| int main ( void ) |
| { |
| test_VMULSS_128(); |
| test_VSUBSS_128(); |
| test_VADDSS_128(); |
| test_VDIVSS_128(); |
| test_VUNPCKLPS_128(); |
| test_VCVTSI2SS_128(); |
| test_VANDPS_128(); |
| test_VMINSS_128(); |
| test_VMAXSS_128(); |
| test_VANDNPS_128(); |
| test_VORPS_128(); |
| test_VSQRTSD_128(); |
| // test_VCMPSD_128_0xD(); BORKED |
| test_VCMPSD_128_0x0(); |
| test_VPSHUFB_128(); |
| test_VCVTTSD2SI_32(); |
| test_VCVTSI2SS_64(); |
| test_VCVTSI2SD_64(); |
| test_VCVTSI2SD_32(); |
| test_VPOR_128(); |
| test_VPXOR_128(); |
| test_VPSUBB_128(); |
| test_VPSUBD_128(); |
| test_VPADDD_128(); |
| test_VPMOVZXBW_128(); |
| test_VPMOVZXWD_128(); |
| test_VPBLENDVB_128(); |
| test_VPMINSD_128(); |
| test_VPMAXSD_128(); |
| test_VANDPD_128(); |
| return 0; |
| } |