| |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <assert.h> |
| #include "tests/malloc.h" |
| |
| typedef unsigned char UChar; |
| typedef unsigned int UInt; |
| typedef unsigned long int UWord; |
| typedef unsigned long long int ULong; |
| |
| UChar randArray[1027] __attribute__((used)); |
| |
| #define IS_32_ALIGNED(_ptr) (0 == (0x1F & (UWord)(_ptr))) |
| |
| typedef union { UChar u8[32]; UInt u32[8]; } YMM; |
| |
| typedef struct { YMM a1; YMM a2; YMM a3; YMM a4; ULong u64; } Block; |
| |
| void showYMM ( YMM* vec ) |
| { |
| int i; |
| assert(IS_32_ALIGNED(vec)); |
| for (i = 31; i >= 0; i--) { |
| printf("%02x", (UInt)vec->u8[i]); |
| if (i > 0 && 0 == ((i+0) & 7)) printf("."); |
| } |
| } |
| |
| void showBlock ( char* msg, Block* block ) |
| { |
| printf(" %s\n", msg); |
| printf(" "); showYMM(&block->a1); printf("\n"); |
| printf(" "); showYMM(&block->a2); printf("\n"); |
| printf(" "); showYMM(&block->a3); printf("\n"); |
| printf(" "); showYMM(&block->a4); printf("\n"); |
| printf(" %016llx\n", block->u64); |
| } |
| |
| UChar randUChar ( void ) |
| { |
| static UInt seed = 80021; |
| seed = 1103515245 * seed + 12345; |
| return (seed >> 17) & 0xFF; |
| } |
| |
| void randBlock ( Block* b ) |
| { |
| int i; |
| UChar* p = (UChar*)b; |
| for (i = 0; i < sizeof(Block); i++) |
| p[i] = randUChar(); |
| } |
| |
| |
| /* Generate a function test_NAME, that tests the given insn, in both |
| its mem and reg forms. The reg form of the insn may mention, as |
| operands only %ymm6, %ymm7, %ymm8, %ymm9 and %r14. The mem form of |
| the insn may mention as operands only (%rax), %ymm7, %ymm8, %ymm9 |
| and %r14. It's OK for the insn to clobber ymm0, as this is needed |
| for testing PCMPxSTRx, and ymm6, as this is needed for testing |
| MOVMASK variants. */ |
| |
| #define GEN_test_RandM(_name, _reg_form, _mem_form) \ |
| \ |
| __attribute__ ((noinline)) static void test_##_name ( void ) \ |
| { \ |
| Block* b = memalign32(sizeof(Block)); \ |
| randBlock(b); \ |
| printf("%s(reg)\n", #_name); \ |
| showBlock("before", b); \ |
| __asm__ __volatile__( \ |
| "vmovdqa 0(%0),%%ymm7" "\n\t" \ |
| "vmovdqa 32(%0),%%ymm8" "\n\t" \ |
| "vmovdqa 64(%0),%%ymm6" "\n\t" \ |
| "vmovdqa 96(%0),%%ymm9" "\n\t" \ |
| "movq 128(%0),%%r14" "\n\t" \ |
| _reg_form "\n\t" \ |
| "vmovdqa %%ymm7, 0(%0)" "\n\t" \ |
| "vmovdqa %%ymm8, 32(%0)" "\n\t" \ |
| "vmovdqa %%ymm6, 64(%0)" "\n\t" \ |
| "vmovdqa %%ymm9, 96(%0)" "\n\t" \ |
| "movq %%r14, 128(%0)" "\n\t" \ |
| : /*OUT*/ \ |
| : /*IN*/"r"(b) \ |
| : /*TRASH*/"xmm0","xmm7","xmm8","xmm6","xmm9","r14","memory","cc" \ |
| ); \ |
| showBlock("after", b); \ |
| randBlock(b); \ |
| printf("%s(mem)\n", #_name); \ |
| showBlock("before", b); \ |
| __asm__ __volatile__( \ |
| "leaq 0(%0),%%rax" "\n\t" \ |
| "vmovdqa 32(%0),%%ymm8" "\n\t" \ |
| "vmovdqa 64(%0),%%ymm7" "\n\t" \ |
| "vmovdqa 96(%0),%%ymm9" "\n\t" \ |
| "movq 128(%0),%%r14" "\n\t" \ |
| _mem_form "\n\t" \ |
| "vmovdqa %%ymm8, 32(%0)" "\n\t" \ |
| "vmovdqa %%ymm7, 64(%0)" "\n\t" \ |
| "vmovdqa %%ymm9, 96(%0)" "\n\t" \ |
| "movq %%r14, 128(%0)" "\n\t" \ |
| : /*OUT*/ \ |
| : /*IN*/"r"(b) \ |
| : /*TRASH*/"xmm6", \ |
| "xmm0","xmm8","xmm7","xmm9","r14","rax","memory","cc" \ |
| ); \ |
| showBlock("after", b); \ |
| printf("\n"); \ |
| free(b); \ |
| } |
| |
| #define GEN_test_Ronly(_name, _reg_form) \ |
| GEN_test_RandM(_name, _reg_form, "") |
| #define GEN_test_Monly(_name, _mem_form) \ |
| GEN_test_RandM(_name, "", _mem_form) |
| |
| /* Vector integers promoved from 128-bit in AVX to 256-bit in AVX2. */ |
| |
| GEN_test_RandM(VPOR_256, |
| "vpor %%ymm6, %%ymm8, %%ymm7", |
| "vpor (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPXOR_256, |
| "vpxor %%ymm6, %%ymm8, %%ymm7", |
| "vpxor (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPSUBB_256, |
| "vpsubb %%ymm6, %%ymm8, %%ymm7", |
| "vpsubb (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPSUBD_256, |
| "vpsubd %%ymm6, %%ymm8, %%ymm7", |
| "vpsubd (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPADDD_256, |
| "vpaddd %%ymm6, %%ymm8, %%ymm7", |
| "vpaddd (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPMOVZXWD_256, |
| "vpmovzxwd %%xmm6, %%ymm8", |
| "vpmovzxwd (%%rax), %%ymm8") |
| |
| GEN_test_RandM(VPMOVZXBW_256, |
| "vpmovzxbw %%xmm6, %%ymm8", |
| "vpmovzxbw (%%rax), %%ymm8") |
| |
| GEN_test_RandM(VPBLENDVB_256, |
| "vpblendvb %%ymm9, %%ymm6, %%ymm8, %%ymm7", |
| "vpblendvb %%ymm9, (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPMINSD_256, |
| "vpminsd %%ymm6, %%ymm8, %%ymm7", |
| "vpminsd (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPMAXSD_256, |
| "vpmaxsd %%ymm6, %%ymm8, %%ymm7", |
| "vpmaxsd (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPSHUFB_256, |
| "vpshufb %%ymm6, %%ymm8, %%ymm7", |
| "vpshufb (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPUNPCKLBW_256, |
| "vpunpcklbw %%ymm6, %%ymm8, %%ymm7", |
| "vpunpcklbw (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPUNPCKHBW_256, |
| "vpunpckhbw %%ymm6, %%ymm8, %%ymm7", |
| "vpunpckhbw (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPABSD_256, |
| "vpabsd %%ymm6, %%ymm8", |
| "vpabsd (%%rax), %%ymm8") |
| |
| GEN_test_RandM(VPACKUSWB_256, |
| "vpackuswb %%ymm9, %%ymm8, %%ymm7", |
| "vpackuswb (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_Ronly(VPMOVMSKB_256, |
| "vpmovmskb %%ymm8, %%r14") |
| |
| GEN_test_RandM(VPAND_256, |
| "vpand %%ymm9, %%ymm8, %%ymm7", |
| "vpand (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPCMPEQB_256, |
| "vpcmpeqb %%ymm9, %%ymm8, %%ymm7", |
| "vpcmpeqb (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPSHUFLW_0x39_256, |
| "vpshuflw $0x39, %%ymm9, %%ymm7", |
| "vpshuflw $0xC6, (%%rax), %%ymm8") |
| |
| GEN_test_RandM(VPSHUFHW_0x39_256, |
| "vpshufhw $0x39, %%ymm9, %%ymm7", |
| "vpshufhw $0xC6, (%%rax), %%ymm8") |
| |
| GEN_test_RandM(VPMULLW_256, |
| "vpmullw %%ymm9, %%ymm8, %%ymm7", |
| "vpmullw (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPADDUSW_256, |
| "vpaddusw %%ymm9, %%ymm8, %%ymm7", |
| "vpaddusw (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPMULHUW_256, |
| "vpmulhuw %%ymm9, %%ymm8, %%ymm7", |
| "vpmulhuw (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPADDUSB_256, |
| "vpaddusb %%ymm9, %%ymm8, %%ymm7", |
| "vpaddusb (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPUNPCKLWD_256, |
| "vpunpcklwd %%ymm6, %%ymm8, %%ymm7", |
| "vpunpcklwd (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPUNPCKHWD_256, |
| "vpunpckhwd %%ymm6, %%ymm8, %%ymm7", |
| "vpunpckhwd (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_Ronly(VPSLLD_0x05_256, |
| "vpslld $0x5, %%ymm9, %%ymm7") |
| |
| GEN_test_Ronly(VPSRLD_0x05_256, |
| "vpsrld $0x5, %%ymm9, %%ymm7") |
| |
| GEN_test_Ronly(VPSRAD_0x05_256, |
| "vpsrad $0x5, %%ymm9, %%ymm7") |
| |
| GEN_test_RandM(VPSUBUSB_256, |
| "vpsubusb %%ymm9, %%ymm8, %%ymm7", |
| "vpsubusb (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPSUBSB_256, |
| "vpsubsb %%ymm9, %%ymm8, %%ymm7", |
| "vpsubsb (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_Ronly(VPSRLDQ_0x05_256, |
| "vpsrldq $0x5, %%ymm9, %%ymm7") |
| |
| GEN_test_Ronly(VPSLLDQ_0x05_256, |
| "vpslldq $0x5, %%ymm9, %%ymm7") |
| |
| GEN_test_RandM(VPANDN_256, |
| "vpandn %%ymm9, %%ymm8, %%ymm7", |
| "vpandn (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPUNPCKLQDQ_256, |
| "vpunpcklqdq %%ymm6, %%ymm8, %%ymm7", |
| "vpunpcklqdq (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_Ronly(VPSRLW_0x05_256, |
| "vpsrlw $0x5, %%ymm9, %%ymm7") |
| |
| GEN_test_Ronly(VPSLLW_0x05_256, |
| "vpsllw $0x5, %%ymm9, %%ymm7") |
| |
| GEN_test_RandM(VPADDW_256, |
| "vpaddw %%ymm6, %%ymm8, %%ymm7", |
| "vpaddw (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPACKSSDW_256, |
| "vpackssdw %%ymm9, %%ymm8, %%ymm7", |
| "vpackssdw (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPUNPCKLDQ_256, |
| "vpunpckldq %%ymm6, %%ymm8, %%ymm7", |
| "vpunpckldq (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPCMPEQD_256, |
| "vpcmpeqd %%ymm6, %%ymm8, %%ymm7", |
| "vpcmpeqd (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPSHUFD_0x39_256, |
| "vpshufd $0x39, %%ymm9, %%ymm8", |
| "vpshufd $0xC6, (%%rax), %%ymm7") |
| |
| GEN_test_RandM(VPADDQ_256, |
| "vpaddq %%ymm6, %%ymm8, %%ymm7", |
| "vpaddq (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPSUBQ_256, |
| "vpsubq %%ymm6, %%ymm8, %%ymm7", |
| "vpsubq (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPSUBW_256, |
| "vpsubw %%ymm6, %%ymm8, %%ymm7", |
| "vpsubw (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPCMPEQQ_256, |
| "vpcmpeqq %%ymm6, %%ymm8, %%ymm7", |
| "vpcmpeqq (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPCMPGTQ_256, |
| "vpcmpgtq %%ymm6, %%ymm8, %%ymm7", |
| "vpcmpgtq (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_Ronly(VPSRLQ_0x05_256, |
| "vpsrlq $0x5, %%ymm9, %%ymm7") |
| |
| GEN_test_RandM(VPMULUDQ_256, |
| "vpmuludq %%ymm6, %%ymm8, %%ymm7", |
| "vpmuludq (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPMULDQ_256, |
| "vpmuldq %%ymm6, %%ymm8, %%ymm7", |
| "vpmuldq (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_Ronly(VPSLLQ_0x05_256, |
| "vpsllq $0x5, %%ymm9, %%ymm7") |
| |
| GEN_test_RandM(VPMAXUD_256, |
| "vpmaxud %%ymm6, %%ymm8, %%ymm7", |
| "vpmaxud (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPMINUD_256, |
| "vpminud %%ymm6, %%ymm8, %%ymm7", |
| "vpminud (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPMULLD_256, |
| "vpmulld %%ymm6, %%ymm8, %%ymm7", |
| "vpmulld (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPMAXUW_256, |
| "vpmaxuw %%ymm6, %%ymm8, %%ymm7", |
| "vpmaxuw (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPMINUW_256, |
| "vpminuw %%ymm6, %%ymm8, %%ymm7", |
| "vpminuw (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPMAXSW_256, |
| "vpmaxsw %%ymm6, %%ymm8, %%ymm7", |
| "vpmaxsw (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPMINSW_256, |
| "vpminsw %%ymm6, %%ymm8, %%ymm7", |
| "vpminsw (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPMAXUB_256, |
| "vpmaxub %%ymm6, %%ymm8, %%ymm7", |
| "vpmaxub (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPMINUB_256, |
| "vpminub %%ymm6, %%ymm8, %%ymm7", |
| "vpminub (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPMAXSB_256, |
| "vpmaxsb %%ymm6, %%ymm8, %%ymm7", |
| "vpmaxsb (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPMINSB_256, |
| "vpminsb %%ymm6, %%ymm8, %%ymm7", |
| "vpminsb (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPMOVSXBW_256, |
| "vpmovsxbw %%xmm6, %%ymm8", |
| "vpmovsxbw (%%rax), %%ymm8") |
| |
| GEN_test_RandM(VPSUBUSW_256, |
| "vpsubusw %%ymm9, %%ymm8, %%ymm7", |
| "vpsubusw (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPSUBSW_256, |
| "vpsubsw %%ymm9, %%ymm8, %%ymm7", |
| "vpsubsw (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPCMPEQW_256, |
| "vpcmpeqw %%ymm6, %%ymm8, %%ymm7", |
| "vpcmpeqw (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPADDB_256, |
| "vpaddb %%ymm6, %%ymm8, %%ymm7", |
| "vpaddb (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPUNPCKHDQ_256, |
| "vpunpckhdq %%ymm6, %%ymm8, %%ymm7", |
| "vpunpckhdq (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPMOVSXDQ_256, |
| "vpmovsxdq %%xmm6, %%ymm8", |
| "vpmovsxdq (%%rax), %%ymm8") |
| |
| GEN_test_RandM(VPMOVSXWD_256, |
| "vpmovsxwd %%xmm6, %%ymm8", |
| "vpmovsxwd (%%rax), %%ymm8") |
| |
| GEN_test_RandM(VPMULHW_256, |
| "vpmulhw %%ymm9, %%ymm8, %%ymm7", |
| "vpmulhw (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPUNPCKHQDQ_256, |
| "vpunpckhqdq %%ymm6, %%ymm8, %%ymm7", |
| "vpunpckhqdq (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_Ronly(VPSRAW_0x05_256, |
| "vpsraw $0x5, %%ymm9, %%ymm7") |
| |
| GEN_test_RandM(VPCMPGTB_256, |
| "vpcmpgtb %%ymm6, %%ymm8, %%ymm7", |
| "vpcmpgtb (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPCMPGTW_256, |
| "vpcmpgtw %%ymm6, %%ymm8, %%ymm7", |
| "vpcmpgtw (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPCMPGTD_256, |
| "vpcmpgtd %%ymm6, %%ymm8, %%ymm7", |
| "vpcmpgtd (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPMOVZXBD_256, |
| "vpmovzxbd %%xmm6, %%ymm8", |
| "vpmovzxbd (%%rax), %%ymm8") |
| |
| GEN_test_RandM(VPMOVSXBD_256, |
| "vpmovsxbd %%xmm6, %%ymm8", |
| "vpmovsxbd (%%rax), %%ymm8") |
| |
| GEN_test_RandM(VPALIGNR_256_1of3, |
| "vpalignr $0, %%ymm6, %%ymm8, %%ymm7", |
| "vpalignr $3, (%%rax), %%ymm8, %%ymm7") |
| GEN_test_RandM(VPALIGNR_256_2of3, |
| "vpalignr $6, %%ymm6, %%ymm8, %%ymm7", |
| "vpalignr $9, (%%rax), %%ymm8, %%ymm7") |
| GEN_test_RandM(VPALIGNR_256_3of3, |
| "vpalignr $12, %%ymm6, %%ymm8, %%ymm7", |
| "vpalignr $15, (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPBLENDW_256_0x00, |
| "vpblendw $0x00, %%ymm6, %%ymm8, %%ymm7", |
| "vpblendw $0x01, (%%rax), %%ymm8, %%ymm7") |
| GEN_test_RandM(VPBLENDW_256_0xFE, |
| "vpblendw $0xFE, %%ymm6, %%ymm8, %%ymm7", |
| "vpblendw $0xFF, (%%rax), %%ymm8, %%ymm7") |
| GEN_test_RandM(VPBLENDW_256_0x30, |
| "vpblendw $0x30, %%ymm6, %%ymm8, %%ymm7", |
| "vpblendw $0x03, (%%rax), %%ymm8, %%ymm7") |
| GEN_test_RandM(VPBLENDW_256_0x21, |
| "vpblendw $0x21, %%ymm6, %%ymm8, %%ymm7", |
| "vpblendw $0x12, (%%rax), %%ymm8, %%ymm7") |
| GEN_test_RandM(VPBLENDW_256_0xD7, |
| "vpblendw $0xD7, %%ymm6, %%ymm8, %%ymm7", |
| "vpblendw $0x6C, (%%rax), %%ymm8, %%ymm7") |
| GEN_test_RandM(VPBLENDW_256_0xB5, |
| "vpblendw $0xB5, %%ymm6, %%ymm8, %%ymm7", |
| "vpblendw $0x4A, (%%rax), %%ymm8, %%ymm7") |
| GEN_test_RandM(VPBLENDW_256_0x85, |
| "vpblendw $0x85, %%ymm6, %%ymm8, %%ymm7", |
| "vpblendw $0xDC, (%%rax), %%ymm8, %%ymm7") |
| GEN_test_RandM(VPBLENDW_256_0x29, |
| "vpblendw $0x29, %%ymm6, %%ymm8, %%ymm7", |
| "vpblendw $0x92, (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPSLLW_256, |
| "andl $15, %%r14d;" |
| "vmovd %%r14d, %%xmm6;" |
| "vpsllw %%xmm6, %%ymm8, %%ymm9", |
| "andq $15, 128(%%rax);" |
| "vpsllw 128(%%rax), %%ymm8, %%ymm9") |
| |
| GEN_test_RandM(VPSRLW_256, |
| "andl $15, %%r14d;" |
| "vmovd %%r14d, %%xmm6;" |
| "vpsrlw %%xmm6, %%ymm8, %%ymm9", |
| "andq $15, 128(%%rax);" |
| "vpsrlw 128(%%rax), %%ymm8, %%ymm9") |
| |
| GEN_test_RandM(VPSRAW_256, |
| "andl $31, %%r14d;" |
| "vmovd %%r14d, %%xmm6;" |
| "vpsraw %%xmm6, %%ymm8, %%ymm9", |
| "andq $15, 128(%%rax);" |
| "vpsraw 128(%%rax), %%ymm8, %%ymm9") |
| |
| GEN_test_RandM(VPSLLD_256, |
| "andl $31, %%r14d;" |
| "vmovd %%r14d, %%xmm6;" |
| "vpslld %%xmm6, %%ymm8, %%ymm9", |
| "andq $31, 128(%%rax);" |
| "vpslld 128(%%rax), %%ymm8, %%ymm9") |
| |
| GEN_test_RandM(VPSRLD_256, |
| "andl $31, %%r14d;" |
| "vmovd %%r14d, %%xmm6;" |
| "vpsrld %%xmm6, %%ymm8, %%ymm9", |
| "andq $31, 128(%%rax);" |
| "vpsrld 128(%%rax), %%ymm8, %%ymm9") |
| |
| GEN_test_RandM(VPSRAD_256, |
| "andl $31, %%r14d;" |
| "vmovd %%r14d, %%xmm6;" |
| "vpsrad %%xmm6, %%ymm8, %%ymm9", |
| "andq $31, 128(%%rax);" |
| "vpsrad 128(%%rax), %%ymm8, %%ymm9") |
| |
| GEN_test_RandM(VPSLLQ_256, |
| "andl $63, %%r14d;" |
| "vmovd %%r14d, %%xmm6;" |
| "vpsllq %%xmm6, %%ymm8, %%ymm9", |
| "andq $63, 128(%%rax);" |
| "vpsllq 128(%%rax), %%ymm8, %%ymm9") |
| |
| GEN_test_RandM(VPSRLQ_256, |
| "andl $63, %%r14d;" |
| "vmovd %%r14d, %%xmm6;" |
| "vpsrlq %%xmm6, %%ymm8, %%ymm9", |
| "andq $63, 128(%%rax);" |
| "vpsrlq 128(%%rax), %%ymm8, %%ymm9") |
| |
| GEN_test_RandM(VPMADDWD_256, |
| "vpmaddwd %%ymm6, %%ymm8, %%ymm7", |
| "vpmaddwd (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_Monly(VMOVNTDQA_256, |
| "vmovntdqa (%%rax), %%ymm9") |
| |
| GEN_test_RandM(VPACKSSWB_256, |
| "vpacksswb %%ymm6, %%ymm8, %%ymm7", |
| "vpacksswb (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPAVGB_256, |
| "vpavgb %%ymm6, %%ymm8, %%ymm7", |
| "vpavgb (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPAVGW_256, |
| "vpavgw %%ymm6, %%ymm8, %%ymm7", |
| "vpavgw (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPADDSB_256, |
| "vpaddsb %%ymm6, %%ymm8, %%ymm7", |
| "vpaddsb (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPADDSW_256, |
| "vpaddsw %%ymm6, %%ymm8, %%ymm7", |
| "vpaddsw (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPHADDW_256, |
| "vphaddw %%ymm6, %%ymm8, %%ymm7", |
| "vphaddw (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPHADDD_256, |
| "vphaddd %%ymm6, %%ymm8, %%ymm7", |
| "vphaddd (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPHADDSW_256, |
| "vphaddsw %%ymm6, %%ymm8, %%ymm7", |
| "vphaddsw (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPMADDUBSW_256, |
| "vpmaddubsw %%ymm6, %%ymm8, %%ymm7", |
| "vpmaddubsw (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPHSUBW_256, |
| "vphsubw %%ymm6, %%ymm8, %%ymm7", |
| "vphsubw (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPHSUBD_256, |
| "vphsubd %%ymm6, %%ymm8, %%ymm7", |
| "vphsubd (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPHSUBSW_256, |
| "vphsubsw %%ymm6, %%ymm8, %%ymm7", |
| "vphsubsw (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPABSB_256, |
| "vpabsb %%ymm6, %%ymm7", |
| "vpabsb (%%rax), %%ymm7") |
| |
| GEN_test_RandM(VPABSW_256, |
| "vpabsw %%ymm6, %%ymm7", |
| "vpabsw (%%rax), %%ymm7") |
| |
| GEN_test_RandM(VPMOVSXBQ_256, |
| "vpmovsxbq %%xmm6, %%ymm8", |
| "vpmovsxbq (%%rax), %%ymm8") |
| |
| GEN_test_RandM(VPMOVSXWQ_256, |
| "vpmovsxwq %%xmm6, %%ymm8", |
| "vpmovsxwq (%%rax), %%ymm8") |
| |
| GEN_test_RandM(VPACKUSDW_256, |
| "vpackusdw %%ymm6, %%ymm8, %%ymm7", |
| "vpackusdw (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPMOVZXBQ_256, |
| "vpmovzxbq %%xmm6, %%ymm8", |
| "vpmovzxbq (%%rax), %%ymm8") |
| |
| GEN_test_RandM(VPMOVZXWQ_256, |
| "vpmovzxwq %%xmm6, %%ymm8", |
| "vpmovzxwq (%%rax), %%ymm8") |
| |
| GEN_test_RandM(VPMOVZXDQ_256, |
| "vpmovzxdq %%xmm6, %%ymm8", |
| "vpmovzxdq (%%rax), %%ymm8") |
| |
| GEN_test_RandM(VMPSADBW_256_0x0, |
| "vmpsadbw $0, %%ymm6, %%ymm8, %%ymm7", |
| "vmpsadbw $0, (%%rax), %%ymm8, %%ymm7") |
| GEN_test_RandM(VMPSADBW_256_0x39, |
| "vmpsadbw $0x39, %%ymm6, %%ymm8, %%ymm7", |
| "vmpsadbw $0x39, (%%rax), %%ymm8, %%ymm7") |
| GEN_test_RandM(VMPSADBW_256_0x32, |
| "vmpsadbw $0x32, %%ymm6, %%ymm8, %%ymm7", |
| "vmpsadbw $0x32, (%%rax), %%ymm8, %%ymm7") |
| GEN_test_RandM(VMPSADBW_256_0x2b, |
| "vmpsadbw $0x2b, %%ymm6, %%ymm8, %%ymm7", |
| "vmpsadbw $0x2b, (%%rax), %%ymm8, %%ymm7") |
| GEN_test_RandM(VMPSADBW_256_0x24, |
| "vmpsadbw $0x24, %%ymm6, %%ymm8, %%ymm7", |
| "vmpsadbw $0x24, (%%rax), %%ymm8, %%ymm7") |
| GEN_test_RandM(VMPSADBW_256_0x1d, |
| "vmpsadbw $0x1d, %%ymm6, %%ymm8, %%ymm7", |
| "vmpsadbw $0x1d, (%%rax), %%ymm8, %%ymm7") |
| GEN_test_RandM(VMPSADBW_256_0x16, |
| "vmpsadbw $0x16, %%ymm6, %%ymm8, %%ymm7", |
| "vmpsadbw $0x16, (%%rax), %%ymm8, %%ymm7") |
| GEN_test_RandM(VMPSADBW_256_0x0f, |
| "vmpsadbw $0x0f, %%ymm6, %%ymm8, %%ymm7", |
| "vmpsadbw $0x0f, (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPSADBW_256, |
| "vpsadbw %%ymm6, %%ymm8, %%ymm7", |
| "vpsadbw (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPSIGNB_256, |
| "vpsignb %%ymm6, %%ymm8, %%ymm7", |
| "vpsignb (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPSIGNW_256, |
| "vpsignw %%ymm6, %%ymm8, %%ymm7", |
| "vpsignw (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPSIGND_256, |
| "vpsignd %%ymm6, %%ymm8, %%ymm7", |
| "vpsignd (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPMULHRSW_256, |
| "vpmulhrsw %%ymm6, %%ymm8, %%ymm7", |
| "vpmulhrsw (%%rax), %%ymm8, %%ymm7") |
| |
| /* Instructions new in AVX2. */ |
| |
| GEN_test_Monly(VBROADCASTI128, |
| "vbroadcasti128 (%%rax), %%ymm9") |
| |
| GEN_test_RandM(VEXTRACTI128_0x0, |
| "vextracti128 $0x0, %%ymm7, %%xmm9", |
| "vextracti128 $0x0, %%ymm7, (%%rax)") |
| |
| GEN_test_RandM(VEXTRACTI128_0x1, |
| "vextracti128 $0x1, %%ymm7, %%xmm9", |
| "vextracti128 $0x1, %%ymm7, (%%rax)") |
| |
| GEN_test_RandM(VINSERTI128_0x0, |
| "vinserti128 $0x0, %%xmm9, %%ymm7, %%ymm8", |
| "vinserti128 $0x0, (%%rax), %%ymm7, %%ymm8") |
| |
| GEN_test_RandM(VINSERTI128_0x1, |
| "vinserti128 $0x1, %%xmm9, %%ymm7, %%ymm8", |
| "vinserti128 $0x1, (%%rax), %%ymm7, %%ymm8") |
| |
| GEN_test_RandM(VPERM2I128_0x00, |
| "vperm2i128 $0x00, %%ymm6, %%ymm8, %%ymm7", |
| "vperm2i128 $0x00, (%%rax), %%ymm8, %%ymm7") |
| GEN_test_RandM(VPERM2I128_0xFF, |
| "vperm2i128 $0xFF, %%ymm6, %%ymm8, %%ymm7", |
| "vperm2i128 $0xFF, (%%rax), %%ymm8, %%ymm7") |
| GEN_test_RandM(VPERM2I128_0x30, |
| "vperm2i128 $0x30, %%ymm6, %%ymm8, %%ymm7", |
| "vperm2i128 $0x30, (%%rax), %%ymm8, %%ymm7") |
| GEN_test_RandM(VPERM2I128_0x21, |
| "vperm2i128 $0x21, %%ymm6, %%ymm8, %%ymm7", |
| "vperm2i128 $0x21, (%%rax), %%ymm8, %%ymm7") |
| GEN_test_RandM(VPERM2I128_0x12, |
| "vperm2i128 $0x12, %%ymm6, %%ymm8, %%ymm7", |
| "vperm2i128 $0x12, (%%rax), %%ymm8, %%ymm7") |
| GEN_test_RandM(VPERM2I128_0x03, |
| "vperm2i128 $0x03, %%ymm6, %%ymm8, %%ymm7", |
| "vperm2i128 $0x03, (%%rax), %%ymm8, %%ymm7") |
| GEN_test_RandM(VPERM2I128_0x85, |
| "vperm2i128 $0x85, %%ymm6, %%ymm8, %%ymm7", |
| "vperm2i128 $0x85, (%%rax), %%ymm8, %%ymm7") |
| GEN_test_RandM(VPERM2I128_0x5A, |
| "vperm2i128 $0x5A, %%ymm6, %%ymm8, %%ymm7", |
| "vperm2i128 $0x5A, (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_Ronly(VBROADCASTSS_128, |
| "vbroadcastss %%xmm9, %%xmm7") |
| |
| GEN_test_Ronly(VBROADCASTSS_256, |
| "vbroadcastss %%xmm9, %%ymm7") |
| |
| GEN_test_Ronly(VBROADCASTSD_256, |
| "vbroadcastsd %%xmm9, %%ymm7") |
| |
| GEN_test_RandM(VPERMD, |
| "vpermd %%ymm6, %%ymm7, %%ymm9", |
| "vpermd (%%rax), %%ymm7, %%ymm9") |
| |
| GEN_test_RandM(VPERMQ_0x00, |
| "vpermq $0x00, %%ymm6, %%ymm7", |
| "vpermq $0x01, (%%rax), %%ymm7") |
| GEN_test_RandM(VPERMQ_0xFE, |
| "vpermq $0xFE, %%ymm6, %%ymm7", |
| "vpermq $0xFF, (%%rax), %%ymm7") |
| GEN_test_RandM(VPERMQ_0x30, |
| "vpermq $0x30, %%ymm6, %%ymm7", |
| "vpermq $0x03, (%%rax), %%ymm7") |
| GEN_test_RandM(VPERMQ_0x21, |
| "vpermq $0x21, %%ymm6, %%ymm7", |
| "vpermq $0x12, (%%rax), %%ymm7") |
| GEN_test_RandM(VPERMQ_0xD7, |
| "vpermq $0xD7, %%ymm6, %%ymm7", |
| "vpermq $0x6C, (%%rax), %%ymm7") |
| GEN_test_RandM(VPERMQ_0xB5, |
| "vpermq $0xB5, %%ymm6, %%ymm7", |
| "vpermq $0x4A, (%%rax), %%ymm7") |
| GEN_test_RandM(VPERMQ_0x85, |
| "vpermq $0x85, %%ymm6, %%ymm7", |
| "vpermq $0xDC, (%%rax), %%ymm7") |
| GEN_test_RandM(VPERMQ_0x29, |
| "vpermq $0x29, %%ymm6, %%ymm7", |
| "vpermq $0x92, (%%rax), %%ymm7") |
| |
| GEN_test_RandM(VPERMPS, |
| "vpermps %%ymm6, %%ymm7, %%ymm9", |
| "vpermps (%%rax), %%ymm7, %%ymm9") |
| |
| GEN_test_RandM(VPERMPD_0x00, |
| "vpermpd $0x00, %%ymm6, %%ymm7", |
| "vpermpd $0x01, (%%rax), %%ymm7") |
| GEN_test_RandM(VPERMPD_0xFE, |
| "vpermpd $0xFE, %%ymm6, %%ymm7", |
| "vpermpd $0xFF, (%%rax), %%ymm7") |
| GEN_test_RandM(VPERMPD_0x30, |
| "vpermpd $0x30, %%ymm6, %%ymm7", |
| "vpermpd $0x03, (%%rax), %%ymm7") |
| GEN_test_RandM(VPERMPD_0x21, |
| "vpermpd $0x21, %%ymm6, %%ymm7", |
| "vpermpd $0x12, (%%rax), %%ymm7") |
| GEN_test_RandM(VPERMPD_0xD7, |
| "vpermpd $0xD7, %%ymm6, %%ymm7", |
| "vpermpd $0x6C, (%%rax), %%ymm7") |
| GEN_test_RandM(VPERMPD_0xB5, |
| "vpermpd $0xB5, %%ymm6, %%ymm7", |
| "vpermpd $0x4A, (%%rax), %%ymm7") |
| GEN_test_RandM(VPERMPD_0x85, |
| "vpermpd $0x85, %%ymm6, %%ymm7", |
| "vpermpd $0xDC, (%%rax), %%ymm7") |
| GEN_test_RandM(VPERMPD_0x29, |
| "vpermpd $0x29, %%ymm6, %%ymm7", |
| "vpermpd $0x92, (%%rax), %%ymm7") |
| |
| GEN_test_RandM(VPBLENDD_128_0x00, |
| "vpblendd $0x00, %%xmm6, %%xmm8, %%xmm7", |
| "vpblendd $0x01, (%%rax), %%xmm8, %%xmm7") |
| GEN_test_RandM(VPBLENDD_128_0x02, |
| "vpblendd $0x02, %%xmm6, %%xmm8, %%xmm7", |
| "vpblendd $0x03, (%%rax), %%xmm8, %%xmm7") |
| GEN_test_RandM(VPBLENDD_128_0x04, |
| "vpblendd $0x04, %%xmm6, %%xmm8, %%xmm7", |
| "vpblendd $0x05, (%%rax), %%xmm8, %%xmm7") |
| GEN_test_RandM(VPBLENDD_128_0x06, |
| "vpblendd $0x06, %%xmm6, %%xmm8, %%xmm7", |
| "vpblendd $0x07, (%%rax), %%xmm8, %%xmm7") |
| GEN_test_RandM(VPBLENDD_128_0x08, |
| "vpblendd $0x08, %%xmm6, %%xmm8, %%xmm7", |
| "vpblendd $0x09, (%%rax), %%xmm8, %%xmm7") |
| GEN_test_RandM(VPBLENDD_128_0x0A, |
| "vpblendd $0x0A, %%xmm6, %%xmm8, %%xmm7", |
| "vpblendd $0x0B, (%%rax), %%xmm8, %%xmm7") |
| GEN_test_RandM(VPBLENDD_128_0x0C, |
| "vpblendd $0x0C, %%xmm6, %%xmm8, %%xmm7", |
| "vpblendd $0x0D, (%%rax), %%xmm8, %%xmm7") |
| GEN_test_RandM(VPBLENDD_128_0x0E, |
| "vpblendd $0x0E, %%xmm6, %%xmm8, %%xmm7", |
| "vpblendd $0x0F, (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VPBLENDD_256_0x00, |
| "vpblendd $0x00, %%ymm6, %%ymm8, %%ymm7", |
| "vpblendd $0x01, (%%rax), %%ymm8, %%ymm7") |
| GEN_test_RandM(VPBLENDD_256_0xFE, |
| "vpblendd $0xFE, %%ymm6, %%ymm8, %%ymm7", |
| "vpblendd $0xFF, (%%rax), %%ymm8, %%ymm7") |
| GEN_test_RandM(VPBLENDD_256_0x30, |
| "vpblendd $0x30, %%ymm6, %%ymm8, %%ymm7", |
| "vpblendd $0x03, (%%rax), %%ymm8, %%ymm7") |
| GEN_test_RandM(VPBLENDD_256_0x21, |
| "vpblendd $0x21, %%ymm6, %%ymm8, %%ymm7", |
| "vpblendd $0x12, (%%rax), %%ymm8, %%ymm7") |
| GEN_test_RandM(VPBLENDD_256_0xD7, |
| "vpblendd $0xD7, %%ymm6, %%ymm8, %%ymm7", |
| "vpblendd $0x6C, (%%rax), %%ymm8, %%ymm7") |
| GEN_test_RandM(VPBLENDD_256_0xB5, |
| "vpblendd $0xB5, %%ymm6, %%ymm8, %%ymm7", |
| "vpblendd $0x4A, (%%rax), %%ymm8, %%ymm7") |
| GEN_test_RandM(VPBLENDD_256_0x85, |
| "vpblendd $0x85, %%ymm6, %%ymm8, %%ymm7", |
| "vpblendd $0xDC, (%%rax), %%ymm8, %%ymm7") |
| GEN_test_RandM(VPBLENDD_256_0x29, |
| "vpblendd $0x29, %%ymm6, %%ymm8, %%ymm7", |
| "vpblendd $0x92, (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPSLLVD_128, |
| "vpslld $27, %%xmm6, %%xmm6;" |
| "vpsrld $27, %%xmm6, %%xmm6;" |
| "vpsllvd %%xmm6, %%xmm8, %%xmm7", |
| "andl $31, (%%rax);" |
| "andl $31, 4(%%rax);" |
| "andl $31, 8(%%rax);" |
| "vpsllvd (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VPSLLVD_256, |
| "vpslld $27, %%ymm6, %%ymm6;" |
| "vpsrld $27, %%ymm6, %%ymm6;" |
| "vpsllvd %%ymm6, %%ymm8, %%ymm7", |
| "andl $31, (%%rax);" |
| "andl $31, 4(%%rax);" |
| "andl $31, 8(%%rax);" |
| "andl $31, 16(%%rax);" |
| "andl $31, 20(%%rax);" |
| "andl $31, 24(%%rax);" |
| "vpsllvd (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPSLLVQ_128, |
| "vpsllq $58, %%xmm6, %%xmm6;" |
| "vpsrlq $58, %%xmm6, %%xmm6;" |
| "vpsllvq %%xmm6, %%xmm8, %%xmm7", |
| "andl $63, (%%rax);" |
| "vpsllvq (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VPSLLVQ_256, |
| "vpsllq $58, %%ymm6, %%ymm6;" |
| "vpsrlq $58, %%ymm6, %%ymm6;" |
| "vpsllvq %%ymm6, %%ymm8, %%ymm7", |
| "andl $63, (%%rax);" |
| "andl $63, 8(%%rax);" |
| "andl $63, 16(%%rax);" |
| "vpsllvq (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPSRLVD_128, |
| "vpslld $27, %%xmm6, %%xmm6;" |
| "vpsrld $27, %%xmm6, %%xmm6;" |
| "vpsrlvd %%xmm6, %%xmm8, %%xmm7", |
| "andl $31, (%%rax);" |
| "andl $31, 4(%%rax);" |
| "andl $31, 8(%%rax);" |
| "vpsrlvd (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VPSRLVD_256, |
| "vpslld $27, %%ymm6, %%ymm6;" |
| "vpsrld $27, %%ymm6, %%ymm6;" |
| "vpsrlvd %%ymm6, %%ymm8, %%ymm7", |
| "andl $31, (%%rax);" |
| "andl $31, 4(%%rax);" |
| "andl $31, 8(%%rax);" |
| "andl $31, 16(%%rax);" |
| "andl $31, 20(%%rax);" |
| "andl $31, 24(%%rax);" |
| "vpsrlvd (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPSRLVQ_128, |
| "vpsllq $58, %%xmm6, %%xmm6;" |
| "vpsrlq $58, %%xmm6, %%xmm6;" |
| "vpsrlvq %%xmm6, %%xmm8, %%xmm7", |
| "andl $63, (%%rax);" |
| "vpsrlvq (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VPSRLVQ_256, |
| "vpsllq $58, %%ymm6, %%ymm6;" |
| "vpsrlq $58, %%ymm6, %%ymm6;" |
| "vpsrlvq %%ymm6, %%ymm8, %%ymm7", |
| "andl $63, (%%rax);" |
| "andl $63, 8(%%rax);" |
| "andl $63, 16(%%rax);" |
| "vpsrlvq (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPSRAVD_128, |
| "vpslld $27, %%xmm6, %%xmm6;" |
| "vpsrld $27, %%xmm6, %%xmm6;" |
| "vpsravd %%xmm6, %%xmm8, %%xmm7", |
| "andl $31, (%%rax);" |
| "andl $31, 4(%%rax);" |
| "andl $31, 8(%%rax);" |
| "vpsravd (%%rax), %%xmm8, %%xmm7") |
| |
| GEN_test_RandM(VPSRAVD_256, |
| "vpslld $27, %%ymm6, %%ymm6;" |
| "vpsrld $27, %%ymm6, %%ymm6;" |
| "vpsravd %%ymm6, %%ymm8, %%ymm7", |
| "andl $31, (%%rax);" |
| "andl $31, 4(%%rax);" |
| "andl $31, 8(%%rax);" |
| "andl $31, 16(%%rax);" |
| "andl $31, 20(%%rax);" |
| "andl $31, 24(%%rax);" |
| "vpsravd (%%rax), %%ymm8, %%ymm7") |
| |
| GEN_test_RandM(VPBROADCASTB_128, |
| "vpbroadcastb %%xmm9, %%xmm7", |
| "vpbroadcastb (%%rax), %%xmm7") |
| |
| GEN_test_RandM(VPBROADCASTB_256, |
| "vpbroadcastb %%xmm9, %%ymm7", |
| "vpbroadcastb (%%rax), %%ymm7") |
| |
| GEN_test_RandM(VPBROADCASTW_128, |
| "vpbroadcastw %%xmm9, %%xmm7", |
| "vpbroadcastw (%%rax), %%xmm7") |
| |
| GEN_test_RandM(VPBROADCASTW_256, |
| "vpbroadcastw %%xmm9, %%ymm7", |
| "vpbroadcastw (%%rax), %%ymm7") |
| |
| GEN_test_RandM(VPBROADCASTD_128, |
| "vpbroadcastd %%xmm9, %%xmm7", |
| "vpbroadcastd (%%rax), %%xmm7") |
| |
| GEN_test_RandM(VPBROADCASTD_256, |
| "vpbroadcastd %%xmm9, %%ymm7", |
| "vpbroadcastd (%%rax), %%ymm7") |
| |
| GEN_test_RandM(VPBROADCASTQ_128, |
| "vpbroadcastq %%xmm9, %%xmm7", |
| "vpbroadcastq (%%rax), %%xmm7") |
| |
| GEN_test_RandM(VPBROADCASTQ_256, |
| "vpbroadcastq %%xmm9, %%ymm7", |
| "vpbroadcastq (%%rax), %%ymm7") |
| |
| GEN_test_Monly(VPMASKMOVD_128_LoadForm, |
| "vpmaskmovd (%%rax), %%xmm8, %%xmm7;" |
| "vxorps %%xmm6, %%xmm6, %%xmm6;" |
| "vpmaskmovd (%%rax,%%rax,4), %%xmm6, %%xmm9") |
| |
| GEN_test_Monly(VPMASKMOVD_256_LoadForm, |
| "vpmaskmovd (%%rax), %%ymm8, %%ymm7;" |
| "vxorps %%ymm6, %%ymm6, %%ymm6;" |
| "vpmaskmovd (%%rax,%%rax,4), %%ymm6, %%ymm9") |
| |
| GEN_test_Monly(VPMASKMOVQ_128_LoadForm, |
| "vpmaskmovq (%%rax), %%xmm8, %%xmm7;" |
| "vxorpd %%xmm6, %%xmm6, %%xmm6;" |
| "vpmaskmovq (%%rax,%%rax,4), %%xmm6, %%xmm9") |
| |
| GEN_test_Monly(VPMASKMOVQ_256_LoadForm, |
| "vpmaskmovq (%%rax), %%ymm8, %%ymm7;" |
| "vxorpd %%ymm6, %%ymm6, %%ymm6;" |
| "vpmaskmovq (%%rax,%%rax,4), %%ymm6, %%ymm9") |
| |
| GEN_test_Monly(VPMASKMOVD_128_StoreForm, |
| "vpmaskmovd %%xmm8, %%xmm7, (%%rax);" |
| "vxorps %%xmm6, %%xmm6, %%xmm6;" |
| "vpmaskmovd %%xmm9, %%xmm6, (%%rax,%%rax,4)") |
| |
| GEN_test_Monly(VPMASKMOVD_256_StoreForm, |
| "vpmaskmovd %%ymm8, %%ymm7, (%%rax);" |
| "vxorps %%ymm6, %%ymm6, %%ymm6;" |
| "vpmaskmovd %%ymm9, %%ymm6, (%%rax,%%rax,4)") |
| |
| GEN_test_Monly(VPMASKMOVQ_128_StoreForm, |
| "vpmaskmovq %%xmm8, %%xmm7, (%%rax);" |
| "vxorpd %%xmm6, %%xmm6, %%xmm6;" |
| "vpmaskmovq %%xmm9, %%xmm6, (%%rax,%%rax,4)") |
| |
| GEN_test_Monly(VPMASKMOVQ_256_StoreForm, |
| "vpmaskmovq %%ymm8, %%ymm7, (%%rax);" |
| "vxorpd %%ymm6, %%ymm6, %%ymm6;" |
| "vpmaskmovq %%ymm9, %%ymm6, (%%rax,%%rax,4)") |
| |
| GEN_test_Ronly(VGATHERDPS_128, |
| "vpslld $25, %%xmm7, %%xmm8;" |
| "vpsrld $25, %%xmm8, %%xmm8;" |
| "vblendvps %%xmm6, %%xmm8, %%xmm7, %%xmm8;" |
| "leaq randArray(%%rip), %%r14;" |
| "vgatherdps %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;" |
| "xorl %%r14d, %%r14d") |
| |
| GEN_test_Ronly(VGATHERDPS_256, |
| "vpslld $25, %%ymm7, %%ymm8;" |
| "vpsrld $25, %%ymm8, %%ymm8;" |
| "vblendvps %%ymm6, %%ymm8, %%ymm7, %%ymm8;" |
| "leaq randArray(%%rip), %%r14;" |
| "vgatherdps %%ymm6, 3(%%r14,%%ymm8,4), %%ymm9;" |
| "xorl %%r14d, %%r14d") |
| |
| GEN_test_Ronly(VGATHERQPS_128_1, |
| "vpsllq $57, %%xmm7, %%xmm8;" |
| "vpsrlq $57, %%xmm8, %%xmm8;" |
| "vpmovsxdq %%xmm6, %%xmm9;" |
| "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;" |
| "vmovdqa 96(%0), %%ymm9;" |
| "leaq randArray(%%rip), %%r14;" |
| "vgatherqps %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;" |
| "xorl %%r14d, %%r14d") |
| |
| GEN_test_Ronly(VGATHERQPS_256_1, |
| "vpsllq $57, %%ymm7, %%ymm8;" |
| "vpsrlq $57, %%ymm8, %%ymm8;" |
| "vpmovsxdq %%xmm6, %%ymm9;" |
| "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;" |
| "vmovdqa 96(%0), %%ymm9;" |
| "leaq randArray(%%rip), %%r14;" |
| "vgatherqps %%xmm6, 3(%%r14,%%ymm8,4), %%xmm9;" |
| "xorl %%r14d, %%r14d") |
| |
| GEN_test_Ronly(VGATHERQPS_128_2, |
| "vpsllq $57, %%xmm7, %%xmm8;" |
| "vpsrlq $57, %%xmm8, %%xmm8;" |
| "vpmovsxdq %%xmm6, %%xmm9;" |
| "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;" |
| "vmovdqa 96(%0), %%ymm9;" |
| "leaq randArray(%%rip), %%r14;" |
| "vmovq %%r14, %%xmm7;" |
| "vpsllq $2, %%xmm8, %%xmm8;" |
| "vpbroadcastq %%xmm7, %%xmm7;" |
| "vpaddq %%xmm7, %%xmm8, %%xmm8;" |
| "vgatherqps %%xmm6, 1(,%%xmm8,1), %%xmm9;" |
| "vpsubq %%xmm7, %%xmm8, %%xmm8;" |
| "vmovdqa 0(%0), %%ymm7;" |
| "xorl %%r14d, %%r14d") |
| |
| GEN_test_Ronly(VGATHERQPS_256_2, |
| "vpsllq $57, %%ymm7, %%ymm8;" |
| "vpsrlq $57, %%ymm8, %%ymm8;" |
| "vpmovsxdq %%xmm6, %%ymm9;" |
| "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;" |
| "vmovdqa 96(%0), %%ymm9;" |
| "leaq randArray(%%rip), %%r14;" |
| "vmovq %%r14, %%xmm7;" |
| "vpsllq $2, %%ymm8, %%ymm8;" |
| "vpbroadcastq %%xmm7, %%ymm7;" |
| "vpaddq %%ymm7, %%ymm8, %%ymm8;" |
| "vgatherqps %%xmm6, 1(,%%ymm8,1), %%xmm9;" |
| "vpsubq %%ymm7, %%ymm8, %%ymm8;" |
| "vmovdqa 0(%0), %%ymm7;" |
| "xorl %%r14d, %%r14d") |
| |
| GEN_test_Ronly(VGATHERDPD_128, |
| "vpslld $26, %%xmm7, %%xmm8;" |
| "vpsrld $26, %%xmm8, %%xmm8;" |
| "vshufps $13, %%xmm6, %%xmm6, %%xmm9;" |
| "vblendvps %%xmm9, %%xmm8, %%xmm7, %%xmm8;" |
| "vmovdqa 96(%0), %%ymm9;" |
| "leaq randArray(%%rip), %%r14;" |
| "vgatherdpd %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;" |
| "xorl %%r14d, %%r14d") |
| |
| GEN_test_Ronly(VGATHERDPD_256, |
| "vpslld $26, %%ymm7, %%ymm8;" |
| "vpsrld $26, %%ymm8, %%ymm8;" |
| "vextracti128 $1, %%ymm6, %%xmm9;" |
| "vshufps $221, %%ymm9, %%ymm6, %%ymm9;" |
| "vblendvps %%ymm9, %%ymm8, %%ymm7, %%ymm8;" |
| "vmovdqa 96(%0), %%ymm9;" |
| "leaq randArray(%%rip), %%r14;" |
| "vgatherdpd %%ymm6, 3(%%r14,%%xmm8,8), %%ymm9;" |
| "xorl %%r14d, %%r14d") |
| |
| GEN_test_Ronly(VGATHERQPD_128_1, |
| "vpsllq $58, %%xmm7, %%xmm8;" |
| "vpsrlq $58, %%xmm8, %%xmm8;" |
| "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;" |
| "leaq randArray(%%rip), %%r14;" |
| "vgatherqpd %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;" |
| "xorl %%r14d, %%r14d") |
| |
| GEN_test_Ronly(VGATHERQPD_256_1, |
| "vpsllq $58, %%ymm7, %%ymm8;" |
| "vpsrlq $58, %%ymm8, %%ymm8;" |
| "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;" |
| "leaq randArray(%%rip), %%r14;" |
| "vgatherqpd %%ymm6, 3(%%r14,%%ymm8,8), %%ymm9;" |
| "xorl %%r14d, %%r14d") |
| |
| GEN_test_Ronly(VGATHERQPD_128_2, |
| "vpsllq $58, %%xmm7, %%xmm8;" |
| "vpsrlq $58, %%xmm8, %%xmm8;" |
| "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;" |
| "leaq randArray(%%rip), %%r14;" |
| "vmovq %%r14, %%xmm7;" |
| "vpsllq $2, %%xmm8, %%xmm8;" |
| "vpbroadcastq %%xmm7, %%xmm7;" |
| "vpaddq %%xmm7, %%xmm8, %%xmm8;" |
| "vgatherqpd %%xmm6, 1(,%%xmm8,1), %%xmm9;" |
| "vpsubq %%xmm7, %%xmm8, %%xmm8;" |
| "vmovdqa 0(%0), %%ymm7;" |
| "xorl %%r14d, %%r14d") |
| |
| GEN_test_Ronly(VGATHERQPD_256_2, |
| "vpsllq $58, %%ymm7, %%ymm8;" |
| "vpsrlq $58, %%ymm8, %%ymm8;" |
| "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;" |
| "leaq randArray(%%rip), %%r14;" |
| "vmovq %%r14, %%xmm7;" |
| "vpsllq $2, %%ymm8, %%ymm8;" |
| "vpbroadcastq %%xmm7, %%ymm7;" |
| "vpaddq %%ymm7, %%ymm8, %%ymm8;" |
| "vgatherqpd %%ymm6, 1(,%%ymm8,1), %%ymm9;" |
| "vpsubq %%ymm7, %%ymm8, %%ymm8;" |
| "vmovdqa 0(%0), %%ymm7;" |
| "xorl %%r14d, %%r14d") |
| |
| GEN_test_Ronly(VPGATHERDD_128, |
| "vpslld $25, %%xmm7, %%xmm8;" |
| "vpsrld $25, %%xmm8, %%xmm8;" |
| "vblendvps %%xmm6, %%xmm8, %%xmm7, %%xmm8;" |
| "leaq randArray(%%rip), %%r14;" |
| "vpgatherdd %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;" |
| "xorl %%r14d, %%r14d") |
| |
| GEN_test_Ronly(VPGATHERDD_256, |
| "vpslld $25, %%ymm7, %%ymm8;" |
| "vpsrld $25, %%ymm8, %%ymm8;" |
| "vblendvps %%ymm6, %%ymm8, %%ymm7, %%ymm8;" |
| "leaq randArray(%%rip), %%r14;" |
| "vpgatherdd %%ymm6, 3(%%r14,%%ymm8,4), %%ymm9;" |
| "xorl %%r14d, %%r14d") |
| |
| GEN_test_Ronly(VPGATHERQD_128_1, |
| "vpsllq $57, %%xmm7, %%xmm8;" |
| "vpsrlq $57, %%xmm8, %%xmm8;" |
| "vpmovsxdq %%xmm6, %%xmm9;" |
| "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;" |
| "vmovdqa 96(%0), %%ymm9;" |
| "leaq randArray(%%rip), %%r14;" |
| "vpgatherqd %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;" |
| "xorl %%r14d, %%r14d") |
| |
| GEN_test_Ronly(VPGATHERQD_256_1, |
| "vpsllq $57, %%ymm7, %%ymm8;" |
| "vpsrlq $57, %%ymm8, %%ymm8;" |
| "vpmovsxdq %%xmm6, %%ymm9;" |
| "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;" |
| "vmovdqa 96(%0), %%ymm9;" |
| "leaq randArray(%%rip), %%r14;" |
| "vpgatherqd %%xmm6, 3(%%r14,%%ymm8,4), %%xmm9;" |
| "xorl %%r14d, %%r14d") |
| |
| GEN_test_Ronly(VPGATHERQD_128_2, |
| "vpsllq $57, %%xmm7, %%xmm8;" |
| "vpsrlq $57, %%xmm8, %%xmm8;" |
| "vpmovsxdq %%xmm6, %%xmm9;" |
| "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;" |
| "vmovdqa 96(%0), %%ymm9;" |
| "leaq randArray(%%rip), %%r14;" |
| "vmovq %%r14, %%xmm7;" |
| "vpsllq $2, %%xmm8, %%xmm8;" |
| "vpbroadcastq %%xmm7, %%xmm7;" |
| "vpaddq %%xmm7, %%xmm8, %%xmm8;" |
| "vpgatherqd %%xmm6, 1(,%%xmm8,1), %%xmm9;" |
| "vpsubq %%xmm7, %%xmm8, %%xmm8;" |
| "vmovdqa 0(%0), %%ymm7;" |
| "xorl %%r14d, %%r14d") |
| |
| GEN_test_Ronly(VPGATHERQD_256_2, |
| "vpsllq $57, %%ymm7, %%ymm8;" |
| "vpsrlq $57, %%ymm8, %%ymm8;" |
| "vpmovsxdq %%xmm6, %%ymm9;" |
| "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;" |
| "vmovdqa 96(%0), %%ymm9;" |
| "leaq randArray(%%rip), %%r14;" |
| "vmovq %%r14, %%xmm7;" |
| "vpsllq $2, %%ymm8, %%ymm8;" |
| "vpbroadcastq %%xmm7, %%ymm7;" |
| "vpaddq %%ymm7, %%ymm8, %%ymm8;" |
| "vpgatherqd %%xmm6, 1(,%%ymm8,1), %%xmm9;" |
| "vpsubq %%ymm7, %%ymm8, %%ymm8;" |
| "vmovdqa 0(%0), %%ymm7;" |
| "xorl %%r14d, %%r14d") |
| |
| GEN_test_Ronly(VPGATHERDQ_128, |
| "vpslld $26, %%xmm7, %%xmm8;" |
| "vpsrld $26, %%xmm8, %%xmm8;" |
| "vshufps $13, %%xmm6, %%xmm6, %%xmm9;" |
| "vblendvps %%xmm9, %%xmm8, %%xmm7, %%xmm8;" |
| "vmovdqa 96(%0), %%ymm9;" |
| "leaq randArray(%%rip), %%r14;" |
| "vpgatherdq %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;" |
| "xorl %%r14d, %%r14d") |
| |
| GEN_test_Ronly(VPGATHERDQ_256, |
| "vpslld $26, %%ymm7, %%ymm8;" |
| "vpsrld $26, %%ymm8, %%ymm8;" |
| "vextracti128 $1, %%ymm6, %%xmm9;" |
| "vshufps $221, %%ymm9, %%ymm6, %%ymm9;" |
| "vblendvps %%ymm9, %%ymm8, %%ymm7, %%ymm8;" |
| "vmovdqa 96(%0), %%ymm9;" |
| "leaq randArray(%%rip), %%r14;" |
| "vpgatherdq %%ymm6, 3(%%r14,%%xmm8,8), %%ymm9;" |
| "xorl %%r14d, %%r14d") |
| |
| GEN_test_Ronly(VPGATHERQQ_128_1, |
| "vpsllq $58, %%xmm7, %%xmm8;" |
| "vpsrlq $58, %%xmm8, %%xmm8;" |
| "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;" |
| "leaq randArray(%%rip), %%r14;" |
| "vpgatherqq %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;" |
| "xorl %%r14d, %%r14d") |
| |
| GEN_test_Ronly(VPGATHERQQ_256_1, |
| "vpsllq $58, %%ymm7, %%ymm8;" |
| "vpsrlq $58, %%ymm8, %%ymm8;" |
| "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;" |
| "leaq randArray(%%rip), %%r14;" |
| "vpgatherqq %%ymm6, 3(%%r14,%%ymm8,8), %%ymm9;" |
| "xorl %%r14d, %%r14d") |
| |
| GEN_test_Ronly(VPGATHERQQ_128_2, |
| "vpsllq $58, %%xmm7, %%xmm8;" |
| "vpsrlq $58, %%xmm8, %%xmm8;" |
| "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;" |
| "leaq randArray(%%rip), %%r14;" |
| "vmovq %%r14, %%xmm7;" |
| "vpsllq $2, %%xmm8, %%xmm8;" |
| "vpbroadcastq %%xmm7, %%xmm7;" |
| "vpaddq %%xmm7, %%xmm8, %%xmm8;" |
| "vpgatherqq %%xmm6, 1(,%%xmm8,1), %%xmm9;" |
| "vpsubq %%xmm7, %%xmm8, %%xmm8;" |
| "vmovdqa 0(%0), %%ymm7;" |
| "xorl %%r14d, %%r14d") |
| |
| GEN_test_Ronly(VPGATHERQQ_256_2, |
| "vpsllq $58, %%ymm7, %%ymm8;" |
| "vpsrlq $58, %%ymm8, %%ymm8;" |
| "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;" |
| "leaq randArray(%%rip), %%r14;" |
| "vmovq %%r14, %%xmm7;" |
| "vpsllq $2, %%ymm8, %%ymm8;" |
| "vpbroadcastq %%xmm7, %%ymm7;" |
| "vpaddq %%ymm7, %%ymm8, %%ymm8;" |
| "vpgatherqq %%ymm6, 1(,%%ymm8,1), %%ymm9;" |
| "vpsubq %%ymm7, %%ymm8, %%ymm8;" |
| "vmovdqa 0(%0), %%ymm7;" |
| "xorl %%r14d, %%r14d") |
| |
| /* Comment duplicated above, for convenient reference: |
| Allowed operands in test insns: |
| Reg form: %ymm6, %ymm7, %ymm8, %ymm9 and %r14. |
| Mem form: (%rax), %ymm7, %ymm8, %ymm9 and %r14. |
| Imm8 etc fields are also allowed, where they make sense. |
| Both forms may use ymm0 as scratch. Mem form may also use |
| ymm6 as scratch. |
| */ |
| |
| #define N_DEFAULT_ITERS 3 |
| |
| // Do the specified test some number of times |
| #define DO_N(_iters, _testfn) \ |
| do { int i; for (i = 0; i < (_iters); i++) { test_##_testfn(); } } while (0) |
| |
| // Do the specified test the default number of times |
| #define DO_D(_testfn) DO_N(N_DEFAULT_ITERS, _testfn) |
| |
| |
| int main ( void ) |
| { |
| DO_D( VPOR_256 ); |
| DO_D( VPXOR_256 ); |
| DO_D( VPSUBB_256 ); |
| DO_D( VPSUBD_256 ); |
| DO_D( VPADDD_256 ); |
| DO_D( VPMOVZXWD_256 ); |
| DO_D( VPMOVZXBW_256 ); |
| DO_D( VPBLENDVB_256 ); |
| DO_D( VPMINSD_256 ); |
| DO_D( VPMAXSD_256 ); |
| DO_D( VPSHUFB_256 ); |
| DO_D( VPUNPCKLBW_256 ); |
| DO_D( VPUNPCKHBW_256 ); |
| DO_D( VPABSD_256 ); |
| DO_D( VPACKUSWB_256 ); |
| DO_D( VPMOVMSKB_256 ); |
| DO_D( VPAND_256 ); |
| DO_D( VPCMPEQB_256 ); |
| DO_D( VPSHUFLW_0x39_256 ); |
| DO_D( VPSHUFHW_0x39_256 ); |
| DO_D( VPMULLW_256 ); |
| DO_D( VPADDUSW_256 ); |
| DO_D( VPMULHUW_256 ); |
| DO_D( VPADDUSB_256 ); |
| DO_D( VPUNPCKLWD_256 ); |
| DO_D( VPUNPCKHWD_256 ); |
| DO_D( VPSLLD_0x05_256 ); |
| DO_D( VPSRLD_0x05_256 ); |
| DO_D( VPSRAD_0x05_256 ); |
| DO_D( VPSUBUSB_256 ); |
| DO_D( VPSUBSB_256 ); |
| DO_D( VPSRLDQ_0x05_256 ); |
| DO_D( VPSLLDQ_0x05_256 ); |
| DO_D( VPANDN_256 ); |
| DO_D( VPUNPCKLQDQ_256 ); |
| DO_D( VPSRLW_0x05_256 ); |
| DO_D( VPSLLW_0x05_256 ); |
| DO_D( VPADDW_256 ); |
| DO_D( VPACKSSDW_256 ); |
| DO_D( VPUNPCKLDQ_256 ); |
| DO_D( VPCMPEQD_256 ); |
| DO_D( VPSHUFD_0x39_256 ); |
| DO_D( VPADDQ_256 ); |
| DO_D( VPSUBQ_256 ); |
| DO_D( VPSUBW_256 ); |
| DO_D( VPCMPEQQ_256 ); |
| DO_D( VPCMPGTQ_256 ); |
| DO_D( VPSRLQ_0x05_256 ); |
| DO_D( VPMULUDQ_256 ); |
| DO_D( VPMULDQ_256 ); |
| DO_D( VPSLLQ_0x05_256 ); |
| DO_D( VPMAXUD_256 ); |
| DO_D( VPMINUD_256 ); |
| DO_D( VPMULLD_256 ); |
| DO_D( VPMAXUW_256 ); |
| DO_D( VPMINUW_256 ); |
| DO_D( VPMAXSW_256 ); |
| DO_D( VPMINSW_256 ); |
| DO_D( VPMAXUB_256 ); |
| DO_D( VPMINUB_256 ); |
| DO_D( VPMAXSB_256 ); |
| DO_D( VPMINSB_256 ); |
| DO_D( VPMOVSXBW_256 ); |
| DO_D( VPSUBUSW_256 ); |
| DO_D( VPSUBSW_256 ); |
| DO_D( VPCMPEQW_256 ); |
| DO_D( VPADDB_256 ); |
| DO_D( VPUNPCKHDQ_256 ); |
| DO_D( VPMOVSXDQ_256 ); |
| DO_D( VPMOVSXWD_256 ); |
| DO_D( VPMULHW_256 ); |
| DO_D( VPUNPCKHQDQ_256 ); |
| DO_D( VPSRAW_0x05_256 ); |
| DO_D( VPCMPGTB_256 ); |
| DO_D( VPCMPGTW_256 ); |
| DO_D( VPCMPGTD_256 ); |
| DO_D( VPMOVZXBD_256 ); |
| DO_D( VPMOVSXBD_256 ); |
| DO_D( VPALIGNR_256_1of3 ); |
| DO_D( VPALIGNR_256_2of3 ); |
| DO_D( VPALIGNR_256_3of3 ); |
| DO_D( VPBLENDW_256_0x00 ); |
| DO_D( VPBLENDW_256_0xFE ); |
| DO_D( VPBLENDW_256_0x30 ); |
| DO_D( VPBLENDW_256_0x21 ); |
| DO_D( VPBLENDW_256_0xD7 ); |
| DO_D( VPBLENDW_256_0xB5 ); |
| DO_D( VPBLENDW_256_0x85 ); |
| DO_D( VPBLENDW_256_0x29 ); |
| DO_D( VPSLLW_256 ); |
| DO_D( VPSRLW_256 ); |
| DO_D( VPSRAW_256 ); |
| DO_D( VPSLLD_256 ); |
| DO_D( VPSRLD_256 ); |
| DO_D( VPSRAD_256 ); |
| DO_D( VPSLLQ_256 ); |
| DO_D( VPSRLQ_256 ); |
| DO_D( VPMADDWD_256 ); |
| DO_D( VMOVNTDQA_256 ); |
| DO_D( VPACKSSWB_256 ); |
| DO_D( VPAVGB_256 ); |
| DO_D( VPAVGW_256 ); |
| DO_D( VPADDSB_256 ); |
| DO_D( VPADDSW_256 ); |
| DO_D( VPHADDW_256 ); |
| DO_D( VPHADDD_256 ); |
| DO_D( VPHADDSW_256 ); |
| DO_D( VPMADDUBSW_256 ); |
| DO_D( VPHSUBW_256 ); |
| DO_D( VPHSUBD_256 ); |
| DO_D( VPHSUBSW_256 ); |
| DO_D( VPABSB_256 ); |
| DO_D( VPABSW_256 ); |
| DO_D( VPMOVSXBQ_256 ); |
| DO_D( VPMOVSXWQ_256 ); |
| DO_D( VPACKUSDW_256 ); |
| DO_D( VPMOVZXBQ_256 ); |
| DO_D( VPMOVZXWQ_256 ); |
| DO_D( VPMOVZXDQ_256 ); |
| DO_D( VMPSADBW_256_0x0 ); |
| DO_D( VMPSADBW_256_0x39 ); |
| DO_D( VMPSADBW_256_0x32 ); |
| DO_D( VMPSADBW_256_0x2b ); |
| DO_D( VMPSADBW_256_0x24 ); |
| DO_D( VMPSADBW_256_0x1d ); |
| DO_D( VMPSADBW_256_0x16 ); |
| DO_D( VMPSADBW_256_0x0f ); |
| DO_D( VPSADBW_256 ); |
| DO_D( VPSIGNB_256 ); |
| DO_D( VPSIGNW_256 ); |
| DO_D( VPSIGND_256 ); |
| DO_D( VPMULHRSW_256 ); |
| DO_D( VBROADCASTI128 ); |
| DO_D( VEXTRACTI128_0x0 ); |
| DO_D( VEXTRACTI128_0x1 ); |
| DO_D( VINSERTI128_0x0 ); |
| DO_D( VINSERTI128_0x1 ); |
| DO_D( VPERM2I128_0x00 ); |
| DO_D( VPERM2I128_0xFF ); |
| DO_D( VPERM2I128_0x30 ); |
| DO_D( VPERM2I128_0x21 ); |
| DO_D( VPERM2I128_0x12 ); |
| DO_D( VPERM2I128_0x03 ); |
| DO_D( VPERM2I128_0x85 ); |
| DO_D( VPERM2I128_0x5A ); |
| DO_D( VBROADCASTSS_128 ); |
| DO_D( VBROADCASTSS_256 ); |
| DO_D( VBROADCASTSD_256 ); |
| DO_D( VPERMD ); |
| DO_D( VPERMQ_0x00 ); |
| DO_D( VPERMQ_0xFE ); |
| DO_D( VPERMQ_0x30 ); |
| DO_D( VPERMQ_0x21 ); |
| DO_D( VPERMQ_0xD7 ); |
| DO_D( VPERMQ_0xB5 ); |
| DO_D( VPERMQ_0x85 ); |
| DO_D( VPERMQ_0x29 ); |
| DO_D( VPERMPS ); |
| DO_D( VPERMPD_0x00 ); |
| DO_D( VPERMPD_0xFE ); |
| DO_D( VPERMPD_0x30 ); |
| DO_D( VPERMPD_0x21 ); |
| DO_D( VPERMPD_0xD7 ); |
| DO_D( VPERMPD_0xB5 ); |
| DO_D( VPERMPD_0x85 ); |
| DO_D( VPERMPD_0x29 ); |
| DO_D( VPBLENDD_128_0x00 ); |
| DO_D( VPBLENDD_128_0x02 ); |
| DO_D( VPBLENDD_128_0x04 ); |
| DO_D( VPBLENDD_128_0x06 ); |
| DO_D( VPBLENDD_128_0x08 ); |
| DO_D( VPBLENDD_128_0x0A ); |
| DO_D( VPBLENDD_128_0x0C ); |
| DO_D( VPBLENDD_128_0x0E ); |
| DO_D( VPBLENDD_256_0x00 ); |
| DO_D( VPBLENDD_256_0xFE ); |
| DO_D( VPBLENDD_256_0x30 ); |
| DO_D( VPBLENDD_256_0x21 ); |
| DO_D( VPBLENDD_256_0xD7 ); |
| DO_D( VPBLENDD_256_0xB5 ); |
| DO_D( VPBLENDD_256_0x85 ); |
| DO_D( VPBLENDD_256_0x29 ); |
| DO_D( VPSLLVD_128 ); |
| DO_D( VPSLLVD_256 ); |
| DO_D( VPSLLVQ_128 ); |
| DO_D( VPSLLVQ_256 ); |
| DO_D( VPSRLVD_128 ); |
| DO_D( VPSRLVD_256 ); |
| DO_D( VPSRLVQ_128 ); |
| DO_D( VPSRLVQ_256 ); |
| DO_D( VPSRAVD_128 ); |
| DO_D( VPSRAVD_256 ); |
| DO_D( VPBROADCASTB_128 ); |
| DO_D( VPBROADCASTB_256 ); |
| DO_D( VPBROADCASTW_128 ); |
| DO_D( VPBROADCASTW_256 ); |
| DO_D( VPBROADCASTD_128 ); |
| DO_D( VPBROADCASTD_256 ); |
| DO_D( VPBROADCASTQ_128 ); |
| DO_D( VPBROADCASTQ_256 ); |
| DO_D( VPMASKMOVD_128_LoadForm ); |
| DO_D( VPMASKMOVD_256_LoadForm ); |
| DO_D( VPMASKMOVQ_128_LoadForm ); |
| DO_D( VPMASKMOVQ_256_LoadForm ); |
| DO_D( VPMASKMOVD_128_StoreForm ); |
| DO_D( VPMASKMOVD_256_StoreForm ); |
| DO_D( VPMASKMOVQ_128_StoreForm ); |
| DO_D( VPMASKMOVQ_256_StoreForm ); |
| { int i; for (i = 0; i < sizeof(randArray); i++) randArray[i] = randUChar(); } |
| DO_D( VGATHERDPS_128 ); |
| DO_D( VGATHERDPS_256 ); |
| DO_D( VGATHERQPS_128_1 ); |
| DO_D( VGATHERQPS_256_1 ); |
| DO_D( VGATHERQPS_128_2 ); |
| DO_D( VGATHERQPS_256_2 ); |
| DO_D( VGATHERDPD_128 ); |
| DO_D( VGATHERDPD_256 ); |
| DO_D( VGATHERQPD_128_1 ); |
| DO_D( VGATHERQPD_256_1 ); |
| DO_D( VGATHERQPD_128_2 ); |
| DO_D( VGATHERQPD_256_2 ); |
| DO_D( VPGATHERDD_128 ); |
| DO_D( VPGATHERDD_256 ); |
| DO_D( VPGATHERQD_128_1 ); |
| DO_D( VPGATHERQD_256_1 ); |
| DO_D( VPGATHERQD_128_2 ); |
| DO_D( VPGATHERQD_256_2 ); |
| DO_D( VPGATHERDQ_128 ); |
| DO_D( VPGATHERDQ_256 ); |
| DO_D( VPGATHERQQ_128_1 ); |
| DO_D( VPGATHERQQ_256_1 ); |
| DO_D( VPGATHERQQ_128_2 ); |
| DO_D( VPGATHERQQ_256_2 ); |
| return 0; |
| } |