sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 1 | |
| 2 | #include <stdio.h> |
| 3 | #include <stdlib.h> |
| 4 | #include <assert.h> |
rhyskidd | 917ca31 | 2015-08-15 11:40:27 +0000 | [diff] [blame] | 5 | #include "tests/malloc.h" |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 6 | |
| 7 | typedef unsigned char UChar; |
| 8 | typedef unsigned int UInt; |
| 9 | typedef unsigned long int UWord; |
| 10 | typedef unsigned long long int ULong; |
| 11 | |
rhyskidd | 7232d21 | 2015-08-15 12:21:42 +0000 | [diff] [blame] | 12 | #if defined(VGO_darwin) |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 13 | UChar randArray[1027] __attribute__((used)); |
rhyskidd | 7232d21 | 2015-08-15 12:21:42 +0000 | [diff] [blame] | 14 | #else |
| 15 | UChar _randArray[1027] __attribute__((used)); |
| 16 | #endif |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 17 | |
| 18 | #define IS_32_ALIGNED(_ptr) (0 == (0x1F & (UWord)(_ptr))) |
| 19 | |
| 20 | typedef union { UChar u8[32]; UInt u32[8]; } YMM; |
| 21 | |
| 22 | typedef struct { YMM a1; YMM a2; YMM a3; YMM a4; ULong u64; } Block; |
| 23 | |
| 24 | void showYMM ( YMM* vec ) |
| 25 | { |
| 26 | int i; |
| 27 | assert(IS_32_ALIGNED(vec)); |
| 28 | for (i = 31; i >= 0; i--) { |
| 29 | printf("%02x", (UInt)vec->u8[i]); |
| 30 | if (i > 0 && 0 == ((i+0) & 7)) printf("."); |
| 31 | } |
| 32 | } |
| 33 | |
| 34 | void showBlock ( char* msg, Block* block ) |
| 35 | { |
| 36 | printf(" %s\n", msg); |
| 37 | printf(" "); showYMM(&block->a1); printf("\n"); |
| 38 | printf(" "); showYMM(&block->a2); printf("\n"); |
| 39 | printf(" "); showYMM(&block->a3); printf("\n"); |
| 40 | printf(" "); showYMM(&block->a4); printf("\n"); |
| 41 | printf(" %016llx\n", block->u64); |
| 42 | } |
| 43 | |
| 44 | UChar randUChar ( void ) |
| 45 | { |
| 46 | static UInt seed = 80021; |
| 47 | seed = 1103515245 * seed + 12345; |
| 48 | return (seed >> 17) & 0xFF; |
| 49 | } |
| 50 | |
| 51 | void randBlock ( Block* b ) |
| 52 | { |
| 53 | int i; |
| 54 | UChar* p = (UChar*)b; |
| 55 | for (i = 0; i < sizeof(Block); i++) |
| 56 | p[i] = randUChar(); |
| 57 | } |
| 58 | |
| 59 | |
| 60 | /* Generate a function test_NAME, that tests the given insn, in both |
| 61 | its mem and reg forms. The reg form of the insn may mention, as |
| 62 | operands only %ymm6, %ymm7, %ymm8, %ymm9 and %r14. The mem form of |
| 63 | the insn may mention as operands only (%rax), %ymm7, %ymm8, %ymm9 |
| 64 | and %r14. It's OK for the insn to clobber ymm0, as this is needed |
sewardj | 44e5376 | 2015-01-28 11:11:02 +0000 | [diff] [blame] | 65 | for testing PCMPxSTRx, and ymm6, as this is needed for testing |
| 66 | MOVMASK variants. */ |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 67 | |
| 68 | #define GEN_test_RandM(_name, _reg_form, _mem_form) \ |
| 69 | \ |
| 70 | __attribute__ ((noinline)) static void test_##_name ( void ) \ |
| 71 | { \ |
rhyskidd | 917ca31 | 2015-08-15 11:40:27 +0000 | [diff] [blame] | 72 | Block* b = memalign32(sizeof(Block)); \ |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 73 | randBlock(b); \ |
| 74 | printf("%s(reg)\n", #_name); \ |
| 75 | showBlock("before", b); \ |
| 76 | __asm__ __volatile__( \ |
| 77 | "vmovdqa 0(%0),%%ymm7" "\n\t" \ |
| 78 | "vmovdqa 32(%0),%%ymm8" "\n\t" \ |
| 79 | "vmovdqa 64(%0),%%ymm6" "\n\t" \ |
| 80 | "vmovdqa 96(%0),%%ymm9" "\n\t" \ |
| 81 | "movq 128(%0),%%r14" "\n\t" \ |
| 82 | _reg_form "\n\t" \ |
| 83 | "vmovdqa %%ymm7, 0(%0)" "\n\t" \ |
| 84 | "vmovdqa %%ymm8, 32(%0)" "\n\t" \ |
| 85 | "vmovdqa %%ymm6, 64(%0)" "\n\t" \ |
| 86 | "vmovdqa %%ymm9, 96(%0)" "\n\t" \ |
| 87 | "movq %%r14, 128(%0)" "\n\t" \ |
| 88 | : /*OUT*/ \ |
| 89 | : /*IN*/"r"(b) \ |
| 90 | : /*TRASH*/"xmm0","xmm7","xmm8","xmm6","xmm9","r14","memory","cc" \ |
| 91 | ); \ |
| 92 | showBlock("after", b); \ |
| 93 | randBlock(b); \ |
| 94 | printf("%s(mem)\n", #_name); \ |
| 95 | showBlock("before", b); \ |
| 96 | __asm__ __volatile__( \ |
| 97 | "leaq 0(%0),%%rax" "\n\t" \ |
| 98 | "vmovdqa 32(%0),%%ymm8" "\n\t" \ |
| 99 | "vmovdqa 64(%0),%%ymm7" "\n\t" \ |
| 100 | "vmovdqa 96(%0),%%ymm9" "\n\t" \ |
| 101 | "movq 128(%0),%%r14" "\n\t" \ |
| 102 | _mem_form "\n\t" \ |
| 103 | "vmovdqa %%ymm8, 32(%0)" "\n\t" \ |
| 104 | "vmovdqa %%ymm7, 64(%0)" "\n\t" \ |
| 105 | "vmovdqa %%ymm9, 96(%0)" "\n\t" \ |
| 106 | "movq %%r14, 128(%0)" "\n\t" \ |
| 107 | : /*OUT*/ \ |
| 108 | : /*IN*/"r"(b) \ |
sewardj | 44e5376 | 2015-01-28 11:11:02 +0000 | [diff] [blame] | 109 | : /*TRASH*/"xmm6", \ |
| 110 | "xmm0","xmm8","xmm7","xmm9","r14","rax","memory","cc" \ |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 111 | ); \ |
| 112 | showBlock("after", b); \ |
| 113 | printf("\n"); \ |
| 114 | free(b); \ |
| 115 | } |
| 116 | |
| 117 | #define GEN_test_Ronly(_name, _reg_form) \ |
| 118 | GEN_test_RandM(_name, _reg_form, "") |
| 119 | #define GEN_test_Monly(_name, _mem_form) \ |
| 120 | GEN_test_RandM(_name, "", _mem_form) |
| 121 | |
| 122 | /* Vector integers promoved from 128-bit in AVX to 256-bit in AVX2. */ |
| 123 | |
| 124 | GEN_test_RandM(VPOR_256, |
| 125 | "vpor %%ymm6, %%ymm8, %%ymm7", |
| 126 | "vpor (%%rax), %%ymm8, %%ymm7") |
| 127 | |
| 128 | GEN_test_RandM(VPXOR_256, |
| 129 | "vpxor %%ymm6, %%ymm8, %%ymm7", |
| 130 | "vpxor (%%rax), %%ymm8, %%ymm7") |
| 131 | |
| 132 | GEN_test_RandM(VPSUBB_256, |
| 133 | "vpsubb %%ymm6, %%ymm8, %%ymm7", |
| 134 | "vpsubb (%%rax), %%ymm8, %%ymm7") |
| 135 | |
| 136 | GEN_test_RandM(VPSUBD_256, |
| 137 | "vpsubd %%ymm6, %%ymm8, %%ymm7", |
| 138 | "vpsubd (%%rax), %%ymm8, %%ymm7") |
| 139 | |
| 140 | GEN_test_RandM(VPADDD_256, |
| 141 | "vpaddd %%ymm6, %%ymm8, %%ymm7", |
| 142 | "vpaddd (%%rax), %%ymm8, %%ymm7") |
| 143 | |
| 144 | GEN_test_RandM(VPMOVZXWD_256, |
| 145 | "vpmovzxwd %%xmm6, %%ymm8", |
| 146 | "vpmovzxwd (%%rax), %%ymm8") |
| 147 | |
| 148 | GEN_test_RandM(VPMOVZXBW_256, |
| 149 | "vpmovzxbw %%xmm6, %%ymm8", |
| 150 | "vpmovzxbw (%%rax), %%ymm8") |
| 151 | |
| 152 | GEN_test_RandM(VPBLENDVB_256, |
| 153 | "vpblendvb %%ymm9, %%ymm6, %%ymm8, %%ymm7", |
| 154 | "vpblendvb %%ymm9, (%%rax), %%ymm8, %%ymm7") |
| 155 | |
| 156 | GEN_test_RandM(VPMINSD_256, |
| 157 | "vpminsd %%ymm6, %%ymm8, %%ymm7", |
| 158 | "vpminsd (%%rax), %%ymm8, %%ymm7") |
| 159 | |
| 160 | GEN_test_RandM(VPMAXSD_256, |
| 161 | "vpmaxsd %%ymm6, %%ymm8, %%ymm7", |
| 162 | "vpmaxsd (%%rax), %%ymm8, %%ymm7") |
| 163 | |
| 164 | GEN_test_RandM(VPSHUFB_256, |
| 165 | "vpshufb %%ymm6, %%ymm8, %%ymm7", |
| 166 | "vpshufb (%%rax), %%ymm8, %%ymm7") |
| 167 | |
| 168 | GEN_test_RandM(VPUNPCKLBW_256, |
| 169 | "vpunpcklbw %%ymm6, %%ymm8, %%ymm7", |
| 170 | "vpunpcklbw (%%rax), %%ymm8, %%ymm7") |
| 171 | |
| 172 | GEN_test_RandM(VPUNPCKHBW_256, |
| 173 | "vpunpckhbw %%ymm6, %%ymm8, %%ymm7", |
| 174 | "vpunpckhbw (%%rax), %%ymm8, %%ymm7") |
| 175 | |
| 176 | GEN_test_RandM(VPABSD_256, |
| 177 | "vpabsd %%ymm6, %%ymm8", |
| 178 | "vpabsd (%%rax), %%ymm8") |
| 179 | |
| 180 | GEN_test_RandM(VPACKUSWB_256, |
| 181 | "vpackuswb %%ymm9, %%ymm8, %%ymm7", |
| 182 | "vpackuswb (%%rax), %%ymm8, %%ymm7") |
| 183 | |
| 184 | GEN_test_Ronly(VPMOVMSKB_256, |
| 185 | "vpmovmskb %%ymm8, %%r14") |
| 186 | |
| 187 | GEN_test_RandM(VPAND_256, |
| 188 | "vpand %%ymm9, %%ymm8, %%ymm7", |
| 189 | "vpand (%%rax), %%ymm8, %%ymm7") |
| 190 | |
| 191 | GEN_test_RandM(VPCMPEQB_256, |
| 192 | "vpcmpeqb %%ymm9, %%ymm8, %%ymm7", |
| 193 | "vpcmpeqb (%%rax), %%ymm8, %%ymm7") |
| 194 | |
| 195 | GEN_test_RandM(VPSHUFLW_0x39_256, |
| 196 | "vpshuflw $0x39, %%ymm9, %%ymm7", |
| 197 | "vpshuflw $0xC6, (%%rax), %%ymm8") |
| 198 | |
| 199 | GEN_test_RandM(VPSHUFHW_0x39_256, |
| 200 | "vpshufhw $0x39, %%ymm9, %%ymm7", |
| 201 | "vpshufhw $0xC6, (%%rax), %%ymm8") |
| 202 | |
| 203 | GEN_test_RandM(VPMULLW_256, |
| 204 | "vpmullw %%ymm9, %%ymm8, %%ymm7", |
| 205 | "vpmullw (%%rax), %%ymm8, %%ymm7") |
| 206 | |
| 207 | GEN_test_RandM(VPADDUSW_256, |
| 208 | "vpaddusw %%ymm9, %%ymm8, %%ymm7", |
| 209 | "vpaddusw (%%rax), %%ymm8, %%ymm7") |
| 210 | |
| 211 | GEN_test_RandM(VPMULHUW_256, |
| 212 | "vpmulhuw %%ymm9, %%ymm8, %%ymm7", |
| 213 | "vpmulhuw (%%rax), %%ymm8, %%ymm7") |
| 214 | |
| 215 | GEN_test_RandM(VPADDUSB_256, |
| 216 | "vpaddusb %%ymm9, %%ymm8, %%ymm7", |
| 217 | "vpaddusb (%%rax), %%ymm8, %%ymm7") |
| 218 | |
| 219 | GEN_test_RandM(VPUNPCKLWD_256, |
| 220 | "vpunpcklwd %%ymm6, %%ymm8, %%ymm7", |
| 221 | "vpunpcklwd (%%rax), %%ymm8, %%ymm7") |
| 222 | |
| 223 | GEN_test_RandM(VPUNPCKHWD_256, |
| 224 | "vpunpckhwd %%ymm6, %%ymm8, %%ymm7", |
| 225 | "vpunpckhwd (%%rax), %%ymm8, %%ymm7") |
| 226 | |
| 227 | GEN_test_Ronly(VPSLLD_0x05_256, |
| 228 | "vpslld $0x5, %%ymm9, %%ymm7") |
| 229 | |
| 230 | GEN_test_Ronly(VPSRLD_0x05_256, |
| 231 | "vpsrld $0x5, %%ymm9, %%ymm7") |
| 232 | |
| 233 | GEN_test_Ronly(VPSRAD_0x05_256, |
| 234 | "vpsrad $0x5, %%ymm9, %%ymm7") |
| 235 | |
| 236 | GEN_test_RandM(VPSUBUSB_256, |
| 237 | "vpsubusb %%ymm9, %%ymm8, %%ymm7", |
| 238 | "vpsubusb (%%rax), %%ymm8, %%ymm7") |
| 239 | |
| 240 | GEN_test_RandM(VPSUBSB_256, |
| 241 | "vpsubsb %%ymm9, %%ymm8, %%ymm7", |
| 242 | "vpsubsb (%%rax), %%ymm8, %%ymm7") |
| 243 | |
| 244 | GEN_test_Ronly(VPSRLDQ_0x05_256, |
| 245 | "vpsrldq $0x5, %%ymm9, %%ymm7") |
| 246 | |
| 247 | GEN_test_Ronly(VPSLLDQ_0x05_256, |
| 248 | "vpslldq $0x5, %%ymm9, %%ymm7") |
| 249 | |
| 250 | GEN_test_RandM(VPANDN_256, |
| 251 | "vpandn %%ymm9, %%ymm8, %%ymm7", |
| 252 | "vpandn (%%rax), %%ymm8, %%ymm7") |
| 253 | |
| 254 | GEN_test_RandM(VPUNPCKLQDQ_256, |
| 255 | "vpunpcklqdq %%ymm6, %%ymm8, %%ymm7", |
| 256 | "vpunpcklqdq (%%rax), %%ymm8, %%ymm7") |
| 257 | |
| 258 | GEN_test_Ronly(VPSRLW_0x05_256, |
| 259 | "vpsrlw $0x5, %%ymm9, %%ymm7") |
| 260 | |
| 261 | GEN_test_Ronly(VPSLLW_0x05_256, |
| 262 | "vpsllw $0x5, %%ymm9, %%ymm7") |
| 263 | |
| 264 | GEN_test_RandM(VPADDW_256, |
| 265 | "vpaddw %%ymm6, %%ymm8, %%ymm7", |
| 266 | "vpaddw (%%rax), %%ymm8, %%ymm7") |
| 267 | |
| 268 | GEN_test_RandM(VPACKSSDW_256, |
| 269 | "vpackssdw %%ymm9, %%ymm8, %%ymm7", |
| 270 | "vpackssdw (%%rax), %%ymm8, %%ymm7") |
| 271 | |
| 272 | GEN_test_RandM(VPUNPCKLDQ_256, |
| 273 | "vpunpckldq %%ymm6, %%ymm8, %%ymm7", |
| 274 | "vpunpckldq (%%rax), %%ymm8, %%ymm7") |
| 275 | |
| 276 | GEN_test_RandM(VPCMPEQD_256, |
| 277 | "vpcmpeqd %%ymm6, %%ymm8, %%ymm7", |
| 278 | "vpcmpeqd (%%rax), %%ymm8, %%ymm7") |
| 279 | |
| 280 | GEN_test_RandM(VPSHUFD_0x39_256, |
| 281 | "vpshufd $0x39, %%ymm9, %%ymm8", |
| 282 | "vpshufd $0xC6, (%%rax), %%ymm7") |
| 283 | |
| 284 | GEN_test_RandM(VPADDQ_256, |
| 285 | "vpaddq %%ymm6, %%ymm8, %%ymm7", |
| 286 | "vpaddq (%%rax), %%ymm8, %%ymm7") |
| 287 | |
| 288 | GEN_test_RandM(VPSUBQ_256, |
| 289 | "vpsubq %%ymm6, %%ymm8, %%ymm7", |
| 290 | "vpsubq (%%rax), %%ymm8, %%ymm7") |
| 291 | |
| 292 | GEN_test_RandM(VPSUBW_256, |
| 293 | "vpsubw %%ymm6, %%ymm8, %%ymm7", |
| 294 | "vpsubw (%%rax), %%ymm8, %%ymm7") |
| 295 | |
| 296 | GEN_test_RandM(VPCMPEQQ_256, |
| 297 | "vpcmpeqq %%ymm6, %%ymm8, %%ymm7", |
| 298 | "vpcmpeqq (%%rax), %%ymm8, %%ymm7") |
| 299 | |
| 300 | GEN_test_RandM(VPCMPGTQ_256, |
| 301 | "vpcmpgtq %%ymm6, %%ymm8, %%ymm7", |
| 302 | "vpcmpgtq (%%rax), %%ymm8, %%ymm7") |
| 303 | |
| 304 | GEN_test_Ronly(VPSRLQ_0x05_256, |
| 305 | "vpsrlq $0x5, %%ymm9, %%ymm7") |
| 306 | |
| 307 | GEN_test_RandM(VPMULUDQ_256, |
| 308 | "vpmuludq %%ymm6, %%ymm8, %%ymm7", |
| 309 | "vpmuludq (%%rax), %%ymm8, %%ymm7") |
| 310 | |
| 311 | GEN_test_RandM(VPMULDQ_256, |
| 312 | "vpmuldq %%ymm6, %%ymm8, %%ymm7", |
| 313 | "vpmuldq (%%rax), %%ymm8, %%ymm7") |
| 314 | |
| 315 | GEN_test_Ronly(VPSLLQ_0x05_256, |
| 316 | "vpsllq $0x5, %%ymm9, %%ymm7") |
| 317 | |
| 318 | GEN_test_RandM(VPMAXUD_256, |
| 319 | "vpmaxud %%ymm6, %%ymm8, %%ymm7", |
| 320 | "vpmaxud (%%rax), %%ymm8, %%ymm7") |
| 321 | |
| 322 | GEN_test_RandM(VPMINUD_256, |
| 323 | "vpminud %%ymm6, %%ymm8, %%ymm7", |
| 324 | "vpminud (%%rax), %%ymm8, %%ymm7") |
| 325 | |
| 326 | GEN_test_RandM(VPMULLD_256, |
| 327 | "vpmulld %%ymm6, %%ymm8, %%ymm7", |
| 328 | "vpmulld (%%rax), %%ymm8, %%ymm7") |
| 329 | |
| 330 | GEN_test_RandM(VPMAXUW_256, |
| 331 | "vpmaxuw %%ymm6, %%ymm8, %%ymm7", |
| 332 | "vpmaxuw (%%rax), %%ymm8, %%ymm7") |
| 333 | |
| 334 | GEN_test_RandM(VPMINUW_256, |
| 335 | "vpminuw %%ymm6, %%ymm8, %%ymm7", |
| 336 | "vpminuw (%%rax), %%ymm8, %%ymm7") |
| 337 | |
| 338 | GEN_test_RandM(VPMAXSW_256, |
| 339 | "vpmaxsw %%ymm6, %%ymm8, %%ymm7", |
| 340 | "vpmaxsw (%%rax), %%ymm8, %%ymm7") |
| 341 | |
| 342 | GEN_test_RandM(VPMINSW_256, |
| 343 | "vpminsw %%ymm6, %%ymm8, %%ymm7", |
| 344 | "vpminsw (%%rax), %%ymm8, %%ymm7") |
| 345 | |
| 346 | GEN_test_RandM(VPMAXUB_256, |
| 347 | "vpmaxub %%ymm6, %%ymm8, %%ymm7", |
| 348 | "vpmaxub (%%rax), %%ymm8, %%ymm7") |
| 349 | |
| 350 | GEN_test_RandM(VPMINUB_256, |
| 351 | "vpminub %%ymm6, %%ymm8, %%ymm7", |
| 352 | "vpminub (%%rax), %%ymm8, %%ymm7") |
| 353 | |
| 354 | GEN_test_RandM(VPMAXSB_256, |
| 355 | "vpmaxsb %%ymm6, %%ymm8, %%ymm7", |
| 356 | "vpmaxsb (%%rax), %%ymm8, %%ymm7") |
| 357 | |
| 358 | GEN_test_RandM(VPMINSB_256, |
| 359 | "vpminsb %%ymm6, %%ymm8, %%ymm7", |
| 360 | "vpminsb (%%rax), %%ymm8, %%ymm7") |
| 361 | |
| 362 | GEN_test_RandM(VPMOVSXBW_256, |
| 363 | "vpmovsxbw %%xmm6, %%ymm8", |
| 364 | "vpmovsxbw (%%rax), %%ymm8") |
| 365 | |
| 366 | GEN_test_RandM(VPSUBUSW_256, |
| 367 | "vpsubusw %%ymm9, %%ymm8, %%ymm7", |
| 368 | "vpsubusw (%%rax), %%ymm8, %%ymm7") |
| 369 | |
| 370 | GEN_test_RandM(VPSUBSW_256, |
| 371 | "vpsubsw %%ymm9, %%ymm8, %%ymm7", |
| 372 | "vpsubsw (%%rax), %%ymm8, %%ymm7") |
| 373 | |
| 374 | GEN_test_RandM(VPCMPEQW_256, |
| 375 | "vpcmpeqw %%ymm6, %%ymm8, %%ymm7", |
| 376 | "vpcmpeqw (%%rax), %%ymm8, %%ymm7") |
| 377 | |
| 378 | GEN_test_RandM(VPADDB_256, |
| 379 | "vpaddb %%ymm6, %%ymm8, %%ymm7", |
| 380 | "vpaddb (%%rax), %%ymm8, %%ymm7") |
| 381 | |
| 382 | GEN_test_RandM(VPUNPCKHDQ_256, |
| 383 | "vpunpckhdq %%ymm6, %%ymm8, %%ymm7", |
| 384 | "vpunpckhdq (%%rax), %%ymm8, %%ymm7") |
| 385 | |
| 386 | GEN_test_RandM(VPMOVSXDQ_256, |
| 387 | "vpmovsxdq %%xmm6, %%ymm8", |
| 388 | "vpmovsxdq (%%rax), %%ymm8") |
| 389 | |
| 390 | GEN_test_RandM(VPMOVSXWD_256, |
| 391 | "vpmovsxwd %%xmm6, %%ymm8", |
| 392 | "vpmovsxwd (%%rax), %%ymm8") |
| 393 | |
| 394 | GEN_test_RandM(VPMULHW_256, |
| 395 | "vpmulhw %%ymm9, %%ymm8, %%ymm7", |
| 396 | "vpmulhw (%%rax), %%ymm8, %%ymm7") |
| 397 | |
| 398 | GEN_test_RandM(VPUNPCKHQDQ_256, |
| 399 | "vpunpckhqdq %%ymm6, %%ymm8, %%ymm7", |
| 400 | "vpunpckhqdq (%%rax), %%ymm8, %%ymm7") |
| 401 | |
| 402 | GEN_test_Ronly(VPSRAW_0x05_256, |
| 403 | "vpsraw $0x5, %%ymm9, %%ymm7") |
| 404 | |
| 405 | GEN_test_RandM(VPCMPGTB_256, |
| 406 | "vpcmpgtb %%ymm6, %%ymm8, %%ymm7", |
| 407 | "vpcmpgtb (%%rax), %%ymm8, %%ymm7") |
| 408 | |
| 409 | GEN_test_RandM(VPCMPGTW_256, |
| 410 | "vpcmpgtw %%ymm6, %%ymm8, %%ymm7", |
| 411 | "vpcmpgtw (%%rax), %%ymm8, %%ymm7") |
| 412 | |
| 413 | GEN_test_RandM(VPCMPGTD_256, |
| 414 | "vpcmpgtd %%ymm6, %%ymm8, %%ymm7", |
| 415 | "vpcmpgtd (%%rax), %%ymm8, %%ymm7") |
| 416 | |
| 417 | GEN_test_RandM(VPMOVZXBD_256, |
| 418 | "vpmovzxbd %%xmm6, %%ymm8", |
| 419 | "vpmovzxbd (%%rax), %%ymm8") |
| 420 | |
| 421 | GEN_test_RandM(VPMOVSXBD_256, |
| 422 | "vpmovsxbd %%xmm6, %%ymm8", |
| 423 | "vpmovsxbd (%%rax), %%ymm8") |
| 424 | |
| 425 | GEN_test_RandM(VPALIGNR_256_1of3, |
| 426 | "vpalignr $0, %%ymm6, %%ymm8, %%ymm7", |
| 427 | "vpalignr $3, (%%rax), %%ymm8, %%ymm7") |
| 428 | GEN_test_RandM(VPALIGNR_256_2of3, |
| 429 | "vpalignr $6, %%ymm6, %%ymm8, %%ymm7", |
| 430 | "vpalignr $9, (%%rax), %%ymm8, %%ymm7") |
| 431 | GEN_test_RandM(VPALIGNR_256_3of3, |
| 432 | "vpalignr $12, %%ymm6, %%ymm8, %%ymm7", |
| 433 | "vpalignr $15, (%%rax), %%ymm8, %%ymm7") |
| 434 | |
| 435 | GEN_test_RandM(VPBLENDW_256_0x00, |
| 436 | "vpblendw $0x00, %%ymm6, %%ymm8, %%ymm7", |
| 437 | "vpblendw $0x01, (%%rax), %%ymm8, %%ymm7") |
| 438 | GEN_test_RandM(VPBLENDW_256_0xFE, |
| 439 | "vpblendw $0xFE, %%ymm6, %%ymm8, %%ymm7", |
| 440 | "vpblendw $0xFF, (%%rax), %%ymm8, %%ymm7") |
| 441 | GEN_test_RandM(VPBLENDW_256_0x30, |
| 442 | "vpblendw $0x30, %%ymm6, %%ymm8, %%ymm7", |
| 443 | "vpblendw $0x03, (%%rax), %%ymm8, %%ymm7") |
| 444 | GEN_test_RandM(VPBLENDW_256_0x21, |
| 445 | "vpblendw $0x21, %%ymm6, %%ymm8, %%ymm7", |
| 446 | "vpblendw $0x12, (%%rax), %%ymm8, %%ymm7") |
| 447 | GEN_test_RandM(VPBLENDW_256_0xD7, |
| 448 | "vpblendw $0xD7, %%ymm6, %%ymm8, %%ymm7", |
| 449 | "vpblendw $0x6C, (%%rax), %%ymm8, %%ymm7") |
| 450 | GEN_test_RandM(VPBLENDW_256_0xB5, |
| 451 | "vpblendw $0xB5, %%ymm6, %%ymm8, %%ymm7", |
| 452 | "vpblendw $0x4A, (%%rax), %%ymm8, %%ymm7") |
| 453 | GEN_test_RandM(VPBLENDW_256_0x85, |
| 454 | "vpblendw $0x85, %%ymm6, %%ymm8, %%ymm7", |
| 455 | "vpblendw $0xDC, (%%rax), %%ymm8, %%ymm7") |
| 456 | GEN_test_RandM(VPBLENDW_256_0x29, |
| 457 | "vpblendw $0x29, %%ymm6, %%ymm8, %%ymm7", |
| 458 | "vpblendw $0x92, (%%rax), %%ymm8, %%ymm7") |
| 459 | |
| 460 | GEN_test_RandM(VPSLLW_256, |
| 461 | "andl $15, %%r14d;" |
| 462 | "vmovd %%r14d, %%xmm6;" |
| 463 | "vpsllw %%xmm6, %%ymm8, %%ymm9", |
| 464 | "andq $15, 128(%%rax);" |
| 465 | "vpsllw 128(%%rax), %%ymm8, %%ymm9") |
| 466 | |
| 467 | GEN_test_RandM(VPSRLW_256, |
| 468 | "andl $15, %%r14d;" |
| 469 | "vmovd %%r14d, %%xmm6;" |
| 470 | "vpsrlw %%xmm6, %%ymm8, %%ymm9", |
| 471 | "andq $15, 128(%%rax);" |
| 472 | "vpsrlw 128(%%rax), %%ymm8, %%ymm9") |
| 473 | |
| 474 | GEN_test_RandM(VPSRAW_256, |
| 475 | "andl $31, %%r14d;" |
| 476 | "vmovd %%r14d, %%xmm6;" |
| 477 | "vpsraw %%xmm6, %%ymm8, %%ymm9", |
| 478 | "andq $15, 128(%%rax);" |
| 479 | "vpsraw 128(%%rax), %%ymm8, %%ymm9") |
| 480 | |
| 481 | GEN_test_RandM(VPSLLD_256, |
| 482 | "andl $31, %%r14d;" |
| 483 | "vmovd %%r14d, %%xmm6;" |
| 484 | "vpslld %%xmm6, %%ymm8, %%ymm9", |
| 485 | "andq $31, 128(%%rax);" |
| 486 | "vpslld 128(%%rax), %%ymm8, %%ymm9") |
| 487 | |
| 488 | GEN_test_RandM(VPSRLD_256, |
| 489 | "andl $31, %%r14d;" |
| 490 | "vmovd %%r14d, %%xmm6;" |
| 491 | "vpsrld %%xmm6, %%ymm8, %%ymm9", |
| 492 | "andq $31, 128(%%rax);" |
| 493 | "vpsrld 128(%%rax), %%ymm8, %%ymm9") |
| 494 | |
| 495 | GEN_test_RandM(VPSRAD_256, |
| 496 | "andl $31, %%r14d;" |
| 497 | "vmovd %%r14d, %%xmm6;" |
| 498 | "vpsrad %%xmm6, %%ymm8, %%ymm9", |
| 499 | "andq $31, 128(%%rax);" |
| 500 | "vpsrad 128(%%rax), %%ymm8, %%ymm9") |
| 501 | |
| 502 | GEN_test_RandM(VPSLLQ_256, |
| 503 | "andl $63, %%r14d;" |
| 504 | "vmovd %%r14d, %%xmm6;" |
| 505 | "vpsllq %%xmm6, %%ymm8, %%ymm9", |
| 506 | "andq $63, 128(%%rax);" |
| 507 | "vpsllq 128(%%rax), %%ymm8, %%ymm9") |
| 508 | |
| 509 | GEN_test_RandM(VPSRLQ_256, |
| 510 | "andl $63, %%r14d;" |
| 511 | "vmovd %%r14d, %%xmm6;" |
| 512 | "vpsrlq %%xmm6, %%ymm8, %%ymm9", |
| 513 | "andq $63, 128(%%rax);" |
| 514 | "vpsrlq 128(%%rax), %%ymm8, %%ymm9") |
| 515 | |
| 516 | GEN_test_RandM(VPMADDWD_256, |
| 517 | "vpmaddwd %%ymm6, %%ymm8, %%ymm7", |
| 518 | "vpmaddwd (%%rax), %%ymm8, %%ymm7") |
| 519 | |
| 520 | GEN_test_Monly(VMOVNTDQA_256, |
| 521 | "vmovntdqa (%%rax), %%ymm9") |
| 522 | |
| 523 | GEN_test_RandM(VPACKSSWB_256, |
| 524 | "vpacksswb %%ymm6, %%ymm8, %%ymm7", |
| 525 | "vpacksswb (%%rax), %%ymm8, %%ymm7") |
| 526 | |
| 527 | GEN_test_RandM(VPAVGB_256, |
| 528 | "vpavgb %%ymm6, %%ymm8, %%ymm7", |
| 529 | "vpavgb (%%rax), %%ymm8, %%ymm7") |
| 530 | |
| 531 | GEN_test_RandM(VPAVGW_256, |
| 532 | "vpavgw %%ymm6, %%ymm8, %%ymm7", |
| 533 | "vpavgw (%%rax), %%ymm8, %%ymm7") |
| 534 | |
| 535 | GEN_test_RandM(VPADDSB_256, |
| 536 | "vpaddsb %%ymm6, %%ymm8, %%ymm7", |
| 537 | "vpaddsb (%%rax), %%ymm8, %%ymm7") |
| 538 | |
| 539 | GEN_test_RandM(VPADDSW_256, |
| 540 | "vpaddsw %%ymm6, %%ymm8, %%ymm7", |
| 541 | "vpaddsw (%%rax), %%ymm8, %%ymm7") |
| 542 | |
| 543 | GEN_test_RandM(VPHADDW_256, |
| 544 | "vphaddw %%ymm6, %%ymm8, %%ymm7", |
| 545 | "vphaddw (%%rax), %%ymm8, %%ymm7") |
| 546 | |
| 547 | GEN_test_RandM(VPHADDD_256, |
| 548 | "vphaddd %%ymm6, %%ymm8, %%ymm7", |
| 549 | "vphaddd (%%rax), %%ymm8, %%ymm7") |
| 550 | |
| 551 | GEN_test_RandM(VPHADDSW_256, |
| 552 | "vphaddsw %%ymm6, %%ymm8, %%ymm7", |
| 553 | "vphaddsw (%%rax), %%ymm8, %%ymm7") |
| 554 | |
| 555 | GEN_test_RandM(VPMADDUBSW_256, |
| 556 | "vpmaddubsw %%ymm6, %%ymm8, %%ymm7", |
| 557 | "vpmaddubsw (%%rax), %%ymm8, %%ymm7") |
| 558 | |
| 559 | GEN_test_RandM(VPHSUBW_256, |
| 560 | "vphsubw %%ymm6, %%ymm8, %%ymm7", |
| 561 | "vphsubw (%%rax), %%ymm8, %%ymm7") |
| 562 | |
| 563 | GEN_test_RandM(VPHSUBD_256, |
| 564 | "vphsubd %%ymm6, %%ymm8, %%ymm7", |
| 565 | "vphsubd (%%rax), %%ymm8, %%ymm7") |
| 566 | |
| 567 | GEN_test_RandM(VPHSUBSW_256, |
| 568 | "vphsubsw %%ymm6, %%ymm8, %%ymm7", |
| 569 | "vphsubsw (%%rax), %%ymm8, %%ymm7") |
| 570 | |
| 571 | GEN_test_RandM(VPABSB_256, |
| 572 | "vpabsb %%ymm6, %%ymm7", |
| 573 | "vpabsb (%%rax), %%ymm7") |
| 574 | |
| 575 | GEN_test_RandM(VPABSW_256, |
| 576 | "vpabsw %%ymm6, %%ymm7", |
| 577 | "vpabsw (%%rax), %%ymm7") |
| 578 | |
| 579 | GEN_test_RandM(VPMOVSXBQ_256, |
| 580 | "vpmovsxbq %%xmm6, %%ymm8", |
| 581 | "vpmovsxbq (%%rax), %%ymm8") |
| 582 | |
| 583 | GEN_test_RandM(VPMOVSXWQ_256, |
| 584 | "vpmovsxwq %%xmm6, %%ymm8", |
| 585 | "vpmovsxwq (%%rax), %%ymm8") |
| 586 | |
| 587 | GEN_test_RandM(VPACKUSDW_256, |
| 588 | "vpackusdw %%ymm6, %%ymm8, %%ymm7", |
| 589 | "vpackusdw (%%rax), %%ymm8, %%ymm7") |
| 590 | |
| 591 | GEN_test_RandM(VPMOVZXBQ_256, |
| 592 | "vpmovzxbq %%xmm6, %%ymm8", |
| 593 | "vpmovzxbq (%%rax), %%ymm8") |
| 594 | |
| 595 | GEN_test_RandM(VPMOVZXWQ_256, |
| 596 | "vpmovzxwq %%xmm6, %%ymm8", |
| 597 | "vpmovzxwq (%%rax), %%ymm8") |
| 598 | |
| 599 | GEN_test_RandM(VPMOVZXDQ_256, |
| 600 | "vpmovzxdq %%xmm6, %%ymm8", |
| 601 | "vpmovzxdq (%%rax), %%ymm8") |
| 602 | |
| 603 | GEN_test_RandM(VMPSADBW_256_0x0, |
| 604 | "vmpsadbw $0, %%ymm6, %%ymm8, %%ymm7", |
| 605 | "vmpsadbw $0, (%%rax), %%ymm8, %%ymm7") |
| 606 | GEN_test_RandM(VMPSADBW_256_0x39, |
| 607 | "vmpsadbw $0x39, %%ymm6, %%ymm8, %%ymm7", |
| 608 | "vmpsadbw $0x39, (%%rax), %%ymm8, %%ymm7") |
| 609 | GEN_test_RandM(VMPSADBW_256_0x32, |
| 610 | "vmpsadbw $0x32, %%ymm6, %%ymm8, %%ymm7", |
| 611 | "vmpsadbw $0x32, (%%rax), %%ymm8, %%ymm7") |
| 612 | GEN_test_RandM(VMPSADBW_256_0x2b, |
| 613 | "vmpsadbw $0x2b, %%ymm6, %%ymm8, %%ymm7", |
| 614 | "vmpsadbw $0x2b, (%%rax), %%ymm8, %%ymm7") |
| 615 | GEN_test_RandM(VMPSADBW_256_0x24, |
| 616 | "vmpsadbw $0x24, %%ymm6, %%ymm8, %%ymm7", |
| 617 | "vmpsadbw $0x24, (%%rax), %%ymm8, %%ymm7") |
| 618 | GEN_test_RandM(VMPSADBW_256_0x1d, |
| 619 | "vmpsadbw $0x1d, %%ymm6, %%ymm8, %%ymm7", |
| 620 | "vmpsadbw $0x1d, (%%rax), %%ymm8, %%ymm7") |
| 621 | GEN_test_RandM(VMPSADBW_256_0x16, |
| 622 | "vmpsadbw $0x16, %%ymm6, %%ymm8, %%ymm7", |
| 623 | "vmpsadbw $0x16, (%%rax), %%ymm8, %%ymm7") |
| 624 | GEN_test_RandM(VMPSADBW_256_0x0f, |
| 625 | "vmpsadbw $0x0f, %%ymm6, %%ymm8, %%ymm7", |
| 626 | "vmpsadbw $0x0f, (%%rax), %%ymm8, %%ymm7") |
| 627 | |
| 628 | GEN_test_RandM(VPSADBW_256, |
| 629 | "vpsadbw %%ymm6, %%ymm8, %%ymm7", |
| 630 | "vpsadbw (%%rax), %%ymm8, %%ymm7") |
| 631 | |
| 632 | GEN_test_RandM(VPSIGNB_256, |
| 633 | "vpsignb %%ymm6, %%ymm8, %%ymm7", |
| 634 | "vpsignb (%%rax), %%ymm8, %%ymm7") |
| 635 | |
| 636 | GEN_test_RandM(VPSIGNW_256, |
| 637 | "vpsignw %%ymm6, %%ymm8, %%ymm7", |
| 638 | "vpsignw (%%rax), %%ymm8, %%ymm7") |
| 639 | |
| 640 | GEN_test_RandM(VPSIGND_256, |
| 641 | "vpsignd %%ymm6, %%ymm8, %%ymm7", |
| 642 | "vpsignd (%%rax), %%ymm8, %%ymm7") |
| 643 | |
| 644 | GEN_test_RandM(VPMULHRSW_256, |
| 645 | "vpmulhrsw %%ymm6, %%ymm8, %%ymm7", |
| 646 | "vpmulhrsw (%%rax), %%ymm8, %%ymm7") |
| 647 | |
| 648 | /* Instructions new in AVX2. */ |
| 649 | |
| 650 | GEN_test_Monly(VBROADCASTI128, |
| 651 | "vbroadcasti128 (%%rax), %%ymm9") |
| 652 | |
| 653 | GEN_test_RandM(VEXTRACTI128_0x0, |
| 654 | "vextracti128 $0x0, %%ymm7, %%xmm9", |
| 655 | "vextracti128 $0x0, %%ymm7, (%%rax)") |
| 656 | |
| 657 | GEN_test_RandM(VEXTRACTI128_0x1, |
| 658 | "vextracti128 $0x1, %%ymm7, %%xmm9", |
| 659 | "vextracti128 $0x1, %%ymm7, (%%rax)") |
| 660 | |
| 661 | GEN_test_RandM(VINSERTI128_0x0, |
| 662 | "vinserti128 $0x0, %%xmm9, %%ymm7, %%ymm8", |
| 663 | "vinserti128 $0x0, (%%rax), %%ymm7, %%ymm8") |
| 664 | |
| 665 | GEN_test_RandM(VINSERTI128_0x1, |
| 666 | "vinserti128 $0x1, %%xmm9, %%ymm7, %%ymm8", |
| 667 | "vinserti128 $0x1, (%%rax), %%ymm7, %%ymm8") |
| 668 | |
| 669 | GEN_test_RandM(VPERM2I128_0x00, |
| 670 | "vperm2i128 $0x00, %%ymm6, %%ymm8, %%ymm7", |
| 671 | "vperm2i128 $0x00, (%%rax), %%ymm8, %%ymm7") |
| 672 | GEN_test_RandM(VPERM2I128_0xFF, |
| 673 | "vperm2i128 $0xFF, %%ymm6, %%ymm8, %%ymm7", |
| 674 | "vperm2i128 $0xFF, (%%rax), %%ymm8, %%ymm7") |
| 675 | GEN_test_RandM(VPERM2I128_0x30, |
| 676 | "vperm2i128 $0x30, %%ymm6, %%ymm8, %%ymm7", |
| 677 | "vperm2i128 $0x30, (%%rax), %%ymm8, %%ymm7") |
| 678 | GEN_test_RandM(VPERM2I128_0x21, |
| 679 | "vperm2i128 $0x21, %%ymm6, %%ymm8, %%ymm7", |
| 680 | "vperm2i128 $0x21, (%%rax), %%ymm8, %%ymm7") |
| 681 | GEN_test_RandM(VPERM2I128_0x12, |
| 682 | "vperm2i128 $0x12, %%ymm6, %%ymm8, %%ymm7", |
| 683 | "vperm2i128 $0x12, (%%rax), %%ymm8, %%ymm7") |
| 684 | GEN_test_RandM(VPERM2I128_0x03, |
| 685 | "vperm2i128 $0x03, %%ymm6, %%ymm8, %%ymm7", |
| 686 | "vperm2i128 $0x03, (%%rax), %%ymm8, %%ymm7") |
| 687 | GEN_test_RandM(VPERM2I128_0x85, |
| 688 | "vperm2i128 $0x85, %%ymm6, %%ymm8, %%ymm7", |
| 689 | "vperm2i128 $0x85, (%%rax), %%ymm8, %%ymm7") |
| 690 | GEN_test_RandM(VPERM2I128_0x5A, |
| 691 | "vperm2i128 $0x5A, %%ymm6, %%ymm8, %%ymm7", |
| 692 | "vperm2i128 $0x5A, (%%rax), %%ymm8, %%ymm7") |
| 693 | |
| 694 | GEN_test_Ronly(VBROADCASTSS_128, |
| 695 | "vbroadcastss %%xmm9, %%xmm7") |
| 696 | |
| 697 | GEN_test_Ronly(VBROADCASTSS_256, |
| 698 | "vbroadcastss %%xmm9, %%ymm7") |
| 699 | |
| 700 | GEN_test_Ronly(VBROADCASTSD_256, |
| 701 | "vbroadcastsd %%xmm9, %%ymm7") |
| 702 | |
| 703 | GEN_test_RandM(VPERMD, |
| 704 | "vpermd %%ymm6, %%ymm7, %%ymm9", |
| 705 | "vpermd (%%rax), %%ymm7, %%ymm9") |
| 706 | |
| 707 | GEN_test_RandM(VPERMQ_0x00, |
| 708 | "vpermq $0x00, %%ymm6, %%ymm7", |
| 709 | "vpermq $0x01, (%%rax), %%ymm7") |
| 710 | GEN_test_RandM(VPERMQ_0xFE, |
| 711 | "vpermq $0xFE, %%ymm6, %%ymm7", |
| 712 | "vpermq $0xFF, (%%rax), %%ymm7") |
| 713 | GEN_test_RandM(VPERMQ_0x30, |
| 714 | "vpermq $0x30, %%ymm6, %%ymm7", |
| 715 | "vpermq $0x03, (%%rax), %%ymm7") |
| 716 | GEN_test_RandM(VPERMQ_0x21, |
| 717 | "vpermq $0x21, %%ymm6, %%ymm7", |
| 718 | "vpermq $0x12, (%%rax), %%ymm7") |
| 719 | GEN_test_RandM(VPERMQ_0xD7, |
| 720 | "vpermq $0xD7, %%ymm6, %%ymm7", |
| 721 | "vpermq $0x6C, (%%rax), %%ymm7") |
| 722 | GEN_test_RandM(VPERMQ_0xB5, |
| 723 | "vpermq $0xB5, %%ymm6, %%ymm7", |
| 724 | "vpermq $0x4A, (%%rax), %%ymm7") |
| 725 | GEN_test_RandM(VPERMQ_0x85, |
| 726 | "vpermq $0x85, %%ymm6, %%ymm7", |
| 727 | "vpermq $0xDC, (%%rax), %%ymm7") |
| 728 | GEN_test_RandM(VPERMQ_0x29, |
| 729 | "vpermq $0x29, %%ymm6, %%ymm7", |
| 730 | "vpermq $0x92, (%%rax), %%ymm7") |
| 731 | |
| 732 | GEN_test_RandM(VPERMPS, |
| 733 | "vpermps %%ymm6, %%ymm7, %%ymm9", |
| 734 | "vpermps (%%rax), %%ymm7, %%ymm9") |
| 735 | |
| 736 | GEN_test_RandM(VPERMPD_0x00, |
| 737 | "vpermpd $0x00, %%ymm6, %%ymm7", |
| 738 | "vpermpd $0x01, (%%rax), %%ymm7") |
| 739 | GEN_test_RandM(VPERMPD_0xFE, |
| 740 | "vpermpd $0xFE, %%ymm6, %%ymm7", |
| 741 | "vpermpd $0xFF, (%%rax), %%ymm7") |
| 742 | GEN_test_RandM(VPERMPD_0x30, |
| 743 | "vpermpd $0x30, %%ymm6, %%ymm7", |
| 744 | "vpermpd $0x03, (%%rax), %%ymm7") |
| 745 | GEN_test_RandM(VPERMPD_0x21, |
| 746 | "vpermpd $0x21, %%ymm6, %%ymm7", |
| 747 | "vpermpd $0x12, (%%rax), %%ymm7") |
| 748 | GEN_test_RandM(VPERMPD_0xD7, |
| 749 | "vpermpd $0xD7, %%ymm6, %%ymm7", |
| 750 | "vpermpd $0x6C, (%%rax), %%ymm7") |
| 751 | GEN_test_RandM(VPERMPD_0xB5, |
| 752 | "vpermpd $0xB5, %%ymm6, %%ymm7", |
| 753 | "vpermpd $0x4A, (%%rax), %%ymm7") |
| 754 | GEN_test_RandM(VPERMPD_0x85, |
| 755 | "vpermpd $0x85, %%ymm6, %%ymm7", |
| 756 | "vpermpd $0xDC, (%%rax), %%ymm7") |
| 757 | GEN_test_RandM(VPERMPD_0x29, |
| 758 | "vpermpd $0x29, %%ymm6, %%ymm7", |
| 759 | "vpermpd $0x92, (%%rax), %%ymm7") |
| 760 | |
| 761 | GEN_test_RandM(VPBLENDD_128_0x00, |
| 762 | "vpblendd $0x00, %%xmm6, %%xmm8, %%xmm7", |
| 763 | "vpblendd $0x01, (%%rax), %%xmm8, %%xmm7") |
| 764 | GEN_test_RandM(VPBLENDD_128_0x02, |
| 765 | "vpblendd $0x02, %%xmm6, %%xmm8, %%xmm7", |
| 766 | "vpblendd $0x03, (%%rax), %%xmm8, %%xmm7") |
| 767 | GEN_test_RandM(VPBLENDD_128_0x04, |
| 768 | "vpblendd $0x04, %%xmm6, %%xmm8, %%xmm7", |
| 769 | "vpblendd $0x05, (%%rax), %%xmm8, %%xmm7") |
| 770 | GEN_test_RandM(VPBLENDD_128_0x06, |
| 771 | "vpblendd $0x06, %%xmm6, %%xmm8, %%xmm7", |
| 772 | "vpblendd $0x07, (%%rax), %%xmm8, %%xmm7") |
| 773 | GEN_test_RandM(VPBLENDD_128_0x08, |
| 774 | "vpblendd $0x08, %%xmm6, %%xmm8, %%xmm7", |
| 775 | "vpblendd $0x09, (%%rax), %%xmm8, %%xmm7") |
| 776 | GEN_test_RandM(VPBLENDD_128_0x0A, |
| 777 | "vpblendd $0x0A, %%xmm6, %%xmm8, %%xmm7", |
| 778 | "vpblendd $0x0B, (%%rax), %%xmm8, %%xmm7") |
| 779 | GEN_test_RandM(VPBLENDD_128_0x0C, |
| 780 | "vpblendd $0x0C, %%xmm6, %%xmm8, %%xmm7", |
| 781 | "vpblendd $0x0D, (%%rax), %%xmm8, %%xmm7") |
| 782 | GEN_test_RandM(VPBLENDD_128_0x0E, |
| 783 | "vpblendd $0x0E, %%xmm6, %%xmm8, %%xmm7", |
| 784 | "vpblendd $0x0F, (%%rax), %%xmm8, %%xmm7") |
| 785 | |
| 786 | GEN_test_RandM(VPBLENDD_256_0x00, |
| 787 | "vpblendd $0x00, %%ymm6, %%ymm8, %%ymm7", |
| 788 | "vpblendd $0x01, (%%rax), %%ymm8, %%ymm7") |
| 789 | GEN_test_RandM(VPBLENDD_256_0xFE, |
| 790 | "vpblendd $0xFE, %%ymm6, %%ymm8, %%ymm7", |
| 791 | "vpblendd $0xFF, (%%rax), %%ymm8, %%ymm7") |
| 792 | GEN_test_RandM(VPBLENDD_256_0x30, |
| 793 | "vpblendd $0x30, %%ymm6, %%ymm8, %%ymm7", |
| 794 | "vpblendd $0x03, (%%rax), %%ymm8, %%ymm7") |
| 795 | GEN_test_RandM(VPBLENDD_256_0x21, |
| 796 | "vpblendd $0x21, %%ymm6, %%ymm8, %%ymm7", |
| 797 | "vpblendd $0x12, (%%rax), %%ymm8, %%ymm7") |
| 798 | GEN_test_RandM(VPBLENDD_256_0xD7, |
| 799 | "vpblendd $0xD7, %%ymm6, %%ymm8, %%ymm7", |
| 800 | "vpblendd $0x6C, (%%rax), %%ymm8, %%ymm7") |
| 801 | GEN_test_RandM(VPBLENDD_256_0xB5, |
| 802 | "vpblendd $0xB5, %%ymm6, %%ymm8, %%ymm7", |
| 803 | "vpblendd $0x4A, (%%rax), %%ymm8, %%ymm7") |
| 804 | GEN_test_RandM(VPBLENDD_256_0x85, |
| 805 | "vpblendd $0x85, %%ymm6, %%ymm8, %%ymm7", |
| 806 | "vpblendd $0xDC, (%%rax), %%ymm8, %%ymm7") |
| 807 | GEN_test_RandM(VPBLENDD_256_0x29, |
| 808 | "vpblendd $0x29, %%ymm6, %%ymm8, %%ymm7", |
| 809 | "vpblendd $0x92, (%%rax), %%ymm8, %%ymm7") |
| 810 | |
| 811 | GEN_test_RandM(VPSLLVD_128, |
| 812 | "vpslld $27, %%xmm6, %%xmm6;" |
| 813 | "vpsrld $27, %%xmm6, %%xmm6;" |
| 814 | "vpsllvd %%xmm6, %%xmm8, %%xmm7", |
| 815 | "andl $31, (%%rax);" |
| 816 | "andl $31, 4(%%rax);" |
| 817 | "andl $31, 8(%%rax);" |
| 818 | "vpsllvd (%%rax), %%xmm8, %%xmm7") |
| 819 | |
| 820 | GEN_test_RandM(VPSLLVD_256, |
| 821 | "vpslld $27, %%ymm6, %%ymm6;" |
| 822 | "vpsrld $27, %%ymm6, %%ymm6;" |
| 823 | "vpsllvd %%ymm6, %%ymm8, %%ymm7", |
| 824 | "andl $31, (%%rax);" |
| 825 | "andl $31, 4(%%rax);" |
| 826 | "andl $31, 8(%%rax);" |
| 827 | "andl $31, 16(%%rax);" |
| 828 | "andl $31, 20(%%rax);" |
| 829 | "andl $31, 24(%%rax);" |
| 830 | "vpsllvd (%%rax), %%ymm8, %%ymm7") |
| 831 | |
| 832 | GEN_test_RandM(VPSLLVQ_128, |
| 833 | "vpsllq $58, %%xmm6, %%xmm6;" |
| 834 | "vpsrlq $58, %%xmm6, %%xmm6;" |
| 835 | "vpsllvq %%xmm6, %%xmm8, %%xmm7", |
| 836 | "andl $63, (%%rax);" |
| 837 | "vpsllvq (%%rax), %%xmm8, %%xmm7") |
| 838 | |
| 839 | GEN_test_RandM(VPSLLVQ_256, |
| 840 | "vpsllq $58, %%ymm6, %%ymm6;" |
| 841 | "vpsrlq $58, %%ymm6, %%ymm6;" |
| 842 | "vpsllvq %%ymm6, %%ymm8, %%ymm7", |
| 843 | "andl $63, (%%rax);" |
| 844 | "andl $63, 8(%%rax);" |
| 845 | "andl $63, 16(%%rax);" |
| 846 | "vpsllvq (%%rax), %%ymm8, %%ymm7") |
| 847 | |
| 848 | GEN_test_RandM(VPSRLVD_128, |
| 849 | "vpslld $27, %%xmm6, %%xmm6;" |
| 850 | "vpsrld $27, %%xmm6, %%xmm6;" |
| 851 | "vpsrlvd %%xmm6, %%xmm8, %%xmm7", |
| 852 | "andl $31, (%%rax);" |
| 853 | "andl $31, 4(%%rax);" |
| 854 | "andl $31, 8(%%rax);" |
| 855 | "vpsrlvd (%%rax), %%xmm8, %%xmm7") |
| 856 | |
| 857 | GEN_test_RandM(VPSRLVD_256, |
| 858 | "vpslld $27, %%ymm6, %%ymm6;" |
| 859 | "vpsrld $27, %%ymm6, %%ymm6;" |
| 860 | "vpsrlvd %%ymm6, %%ymm8, %%ymm7", |
| 861 | "andl $31, (%%rax);" |
| 862 | "andl $31, 4(%%rax);" |
| 863 | "andl $31, 8(%%rax);" |
| 864 | "andl $31, 16(%%rax);" |
| 865 | "andl $31, 20(%%rax);" |
| 866 | "andl $31, 24(%%rax);" |
| 867 | "vpsrlvd (%%rax), %%ymm8, %%ymm7") |
| 868 | |
| 869 | GEN_test_RandM(VPSRLVQ_128, |
| 870 | "vpsllq $58, %%xmm6, %%xmm6;" |
| 871 | "vpsrlq $58, %%xmm6, %%xmm6;" |
| 872 | "vpsrlvq %%xmm6, %%xmm8, %%xmm7", |
| 873 | "andl $63, (%%rax);" |
| 874 | "vpsrlvq (%%rax), %%xmm8, %%xmm7") |
| 875 | |
| 876 | GEN_test_RandM(VPSRLVQ_256, |
| 877 | "vpsllq $58, %%ymm6, %%ymm6;" |
| 878 | "vpsrlq $58, %%ymm6, %%ymm6;" |
| 879 | "vpsrlvq %%ymm6, %%ymm8, %%ymm7", |
| 880 | "andl $63, (%%rax);" |
| 881 | "andl $63, 8(%%rax);" |
| 882 | "andl $63, 16(%%rax);" |
| 883 | "vpsrlvq (%%rax), %%ymm8, %%ymm7") |
| 884 | |
| 885 | GEN_test_RandM(VPSRAVD_128, |
| 886 | "vpslld $27, %%xmm6, %%xmm6;" |
| 887 | "vpsrld $27, %%xmm6, %%xmm6;" |
| 888 | "vpsravd %%xmm6, %%xmm8, %%xmm7", |
| 889 | "andl $31, (%%rax);" |
| 890 | "andl $31, 4(%%rax);" |
| 891 | "andl $31, 8(%%rax);" |
| 892 | "vpsravd (%%rax), %%xmm8, %%xmm7") |
| 893 | |
| 894 | GEN_test_RandM(VPSRAVD_256, |
| 895 | "vpslld $27, %%ymm6, %%ymm6;" |
| 896 | "vpsrld $27, %%ymm6, %%ymm6;" |
| 897 | "vpsravd %%ymm6, %%ymm8, %%ymm7", |
| 898 | "andl $31, (%%rax);" |
| 899 | "andl $31, 4(%%rax);" |
| 900 | "andl $31, 8(%%rax);" |
| 901 | "andl $31, 16(%%rax);" |
| 902 | "andl $31, 20(%%rax);" |
| 903 | "andl $31, 24(%%rax);" |
| 904 | "vpsravd (%%rax), %%ymm8, %%ymm7") |
| 905 | |
| 906 | GEN_test_RandM(VPBROADCASTB_128, |
| 907 | "vpbroadcastb %%xmm9, %%xmm7", |
| 908 | "vpbroadcastb (%%rax), %%xmm7") |
| 909 | |
| 910 | GEN_test_RandM(VPBROADCASTB_256, |
| 911 | "vpbroadcastb %%xmm9, %%ymm7", |
| 912 | "vpbroadcastb (%%rax), %%ymm7") |
| 913 | |
| 914 | GEN_test_RandM(VPBROADCASTW_128, |
| 915 | "vpbroadcastw %%xmm9, %%xmm7", |
| 916 | "vpbroadcastw (%%rax), %%xmm7") |
| 917 | |
| 918 | GEN_test_RandM(VPBROADCASTW_256, |
| 919 | "vpbroadcastw %%xmm9, %%ymm7", |
| 920 | "vpbroadcastw (%%rax), %%ymm7") |
| 921 | |
| 922 | GEN_test_RandM(VPBROADCASTD_128, |
| 923 | "vpbroadcastd %%xmm9, %%xmm7", |
| 924 | "vpbroadcastd (%%rax), %%xmm7") |
| 925 | |
| 926 | GEN_test_RandM(VPBROADCASTD_256, |
| 927 | "vpbroadcastd %%xmm9, %%ymm7", |
| 928 | "vpbroadcastd (%%rax), %%ymm7") |
| 929 | |
| 930 | GEN_test_RandM(VPBROADCASTQ_128, |
| 931 | "vpbroadcastq %%xmm9, %%xmm7", |
| 932 | "vpbroadcastq (%%rax), %%xmm7") |
| 933 | |
| 934 | GEN_test_RandM(VPBROADCASTQ_256, |
| 935 | "vpbroadcastq %%xmm9, %%ymm7", |
| 936 | "vpbroadcastq (%%rax), %%ymm7") |
| 937 | |
| 938 | GEN_test_Monly(VPMASKMOVD_128_LoadForm, |
| 939 | "vpmaskmovd (%%rax), %%xmm8, %%xmm7;" |
| 940 | "vxorps %%xmm6, %%xmm6, %%xmm6;" |
| 941 | "vpmaskmovd (%%rax,%%rax,4), %%xmm6, %%xmm9") |
| 942 | |
| 943 | GEN_test_Monly(VPMASKMOVD_256_LoadForm, |
| 944 | "vpmaskmovd (%%rax), %%ymm8, %%ymm7;" |
| 945 | "vxorps %%ymm6, %%ymm6, %%ymm6;" |
| 946 | "vpmaskmovd (%%rax,%%rax,4), %%ymm6, %%ymm9") |
| 947 | |
| 948 | GEN_test_Monly(VPMASKMOVQ_128_LoadForm, |
| 949 | "vpmaskmovq (%%rax), %%xmm8, %%xmm7;" |
| 950 | "vxorpd %%xmm6, %%xmm6, %%xmm6;" |
| 951 | "vpmaskmovq (%%rax,%%rax,4), %%xmm6, %%xmm9") |
| 952 | |
| 953 | GEN_test_Monly(VPMASKMOVQ_256_LoadForm, |
| 954 | "vpmaskmovq (%%rax), %%ymm8, %%ymm7;" |
| 955 | "vxorpd %%ymm6, %%ymm6, %%ymm6;" |
| 956 | "vpmaskmovq (%%rax,%%rax,4), %%ymm6, %%ymm9") |
| 957 | |
sewardj | 44e5376 | 2015-01-28 11:11:02 +0000 | [diff] [blame] | 958 | GEN_test_Monly(VPMASKMOVD_128_StoreForm, |
| 959 | "vpmaskmovd %%xmm8, %%xmm7, (%%rax);" |
| 960 | "vxorps %%xmm6, %%xmm6, %%xmm6;" |
| 961 | "vpmaskmovd %%xmm9, %%xmm6, (%%rax,%%rax,4)") |
| 962 | |
| 963 | GEN_test_Monly(VPMASKMOVD_256_StoreForm, |
| 964 | "vpmaskmovd %%ymm8, %%ymm7, (%%rax);" |
| 965 | "vxorps %%ymm6, %%ymm6, %%ymm6;" |
| 966 | "vpmaskmovd %%ymm9, %%ymm6, (%%rax,%%rax,4)") |
| 967 | |
| 968 | GEN_test_Monly(VPMASKMOVQ_128_StoreForm, |
| 969 | "vpmaskmovq %%xmm8, %%xmm7, (%%rax);" |
| 970 | "vxorpd %%xmm6, %%xmm6, %%xmm6;" |
| 971 | "vpmaskmovq %%xmm9, %%xmm6, (%%rax,%%rax,4)") |
| 972 | |
| 973 | GEN_test_Monly(VPMASKMOVQ_256_StoreForm, |
| 974 | "vpmaskmovq %%ymm8, %%ymm7, (%%rax);" |
| 975 | "vxorpd %%ymm6, %%ymm6, %%ymm6;" |
| 976 | "vpmaskmovq %%ymm9, %%ymm6, (%%rax,%%rax,4)") |
| 977 | |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 978 | GEN_test_Ronly(VGATHERDPS_128, |
| 979 | "vpslld $25, %%xmm7, %%xmm8;" |
| 980 | "vpsrld $25, %%xmm8, %%xmm8;" |
| 981 | "vblendvps %%xmm6, %%xmm8, %%xmm7, %%xmm8;" |
rhyskidd | 7232d21 | 2015-08-15 12:21:42 +0000 | [diff] [blame] | 982 | "leaq _randArray(%%rip), %%r14;" |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 983 | "vgatherdps %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;" |
| 984 | "xorl %%r14d, %%r14d") |
| 985 | |
| 986 | GEN_test_Ronly(VGATHERDPS_256, |
| 987 | "vpslld $25, %%ymm7, %%ymm8;" |
| 988 | "vpsrld $25, %%ymm8, %%ymm8;" |
| 989 | "vblendvps %%ymm6, %%ymm8, %%ymm7, %%ymm8;" |
rhyskidd | 7232d21 | 2015-08-15 12:21:42 +0000 | [diff] [blame] | 990 | "leaq _randArray(%%rip), %%r14;" |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 991 | "vgatherdps %%ymm6, 3(%%r14,%%ymm8,4), %%ymm9;" |
| 992 | "xorl %%r14d, %%r14d") |
| 993 | |
| 994 | GEN_test_Ronly(VGATHERQPS_128_1, |
| 995 | "vpsllq $57, %%xmm7, %%xmm8;" |
| 996 | "vpsrlq $57, %%xmm8, %%xmm8;" |
| 997 | "vpmovsxdq %%xmm6, %%xmm9;" |
| 998 | "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;" |
| 999 | "vmovdqa 96(%0), %%ymm9;" |
rhyskidd | 7232d21 | 2015-08-15 12:21:42 +0000 | [diff] [blame] | 1000 | "leaq _randArray(%%rip), %%r14;" |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 1001 | "vgatherqps %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;" |
| 1002 | "xorl %%r14d, %%r14d") |
| 1003 | |
| 1004 | GEN_test_Ronly(VGATHERQPS_256_1, |
| 1005 | "vpsllq $57, %%ymm7, %%ymm8;" |
| 1006 | "vpsrlq $57, %%ymm8, %%ymm8;" |
| 1007 | "vpmovsxdq %%xmm6, %%ymm9;" |
| 1008 | "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;" |
| 1009 | "vmovdqa 96(%0), %%ymm9;" |
rhyskidd | 7232d21 | 2015-08-15 12:21:42 +0000 | [diff] [blame] | 1010 | "leaq _randArray(%%rip), %%r14;" |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 1011 | "vgatherqps %%xmm6, 3(%%r14,%%ymm8,4), %%xmm9;" |
| 1012 | "xorl %%r14d, %%r14d") |
| 1013 | |
| 1014 | GEN_test_Ronly(VGATHERQPS_128_2, |
| 1015 | "vpsllq $57, %%xmm7, %%xmm8;" |
| 1016 | "vpsrlq $57, %%xmm8, %%xmm8;" |
| 1017 | "vpmovsxdq %%xmm6, %%xmm9;" |
| 1018 | "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;" |
| 1019 | "vmovdqa 96(%0), %%ymm9;" |
rhyskidd | 7232d21 | 2015-08-15 12:21:42 +0000 | [diff] [blame] | 1020 | "leaq _randArray(%%rip), %%r14;" |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 1021 | "vmovq %%r14, %%xmm7;" |
| 1022 | "vpsllq $2, %%xmm8, %%xmm8;" |
| 1023 | "vpbroadcastq %%xmm7, %%xmm7;" |
| 1024 | "vpaddq %%xmm7, %%xmm8, %%xmm8;" |
| 1025 | "vgatherqps %%xmm6, 1(,%%xmm8,1), %%xmm9;" |
| 1026 | "vpsubq %%xmm7, %%xmm8, %%xmm8;" |
| 1027 | "vmovdqa 0(%0), %%ymm7;" |
| 1028 | "xorl %%r14d, %%r14d") |
| 1029 | |
| 1030 | GEN_test_Ronly(VGATHERQPS_256_2, |
| 1031 | "vpsllq $57, %%ymm7, %%ymm8;" |
| 1032 | "vpsrlq $57, %%ymm8, %%ymm8;" |
| 1033 | "vpmovsxdq %%xmm6, %%ymm9;" |
| 1034 | "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;" |
| 1035 | "vmovdqa 96(%0), %%ymm9;" |
rhyskidd | 7232d21 | 2015-08-15 12:21:42 +0000 | [diff] [blame] | 1036 | "leaq _randArray(%%rip), %%r14;" |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 1037 | "vmovq %%r14, %%xmm7;" |
| 1038 | "vpsllq $2, %%ymm8, %%ymm8;" |
| 1039 | "vpbroadcastq %%xmm7, %%ymm7;" |
| 1040 | "vpaddq %%ymm7, %%ymm8, %%ymm8;" |
| 1041 | "vgatherqps %%xmm6, 1(,%%ymm8,1), %%xmm9;" |
| 1042 | "vpsubq %%ymm7, %%ymm8, %%ymm8;" |
| 1043 | "vmovdqa 0(%0), %%ymm7;" |
| 1044 | "xorl %%r14d, %%r14d") |
| 1045 | |
| 1046 | GEN_test_Ronly(VGATHERDPD_128, |
| 1047 | "vpslld $26, %%xmm7, %%xmm8;" |
| 1048 | "vpsrld $26, %%xmm8, %%xmm8;" |
| 1049 | "vshufps $13, %%xmm6, %%xmm6, %%xmm9;" |
| 1050 | "vblendvps %%xmm9, %%xmm8, %%xmm7, %%xmm8;" |
| 1051 | "vmovdqa 96(%0), %%ymm9;" |
rhyskidd | 7232d21 | 2015-08-15 12:21:42 +0000 | [diff] [blame] | 1052 | "leaq _randArray(%%rip), %%r14;" |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 1053 | "vgatherdpd %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;" |
| 1054 | "xorl %%r14d, %%r14d") |
| 1055 | |
| 1056 | GEN_test_Ronly(VGATHERDPD_256, |
| 1057 | "vpslld $26, %%ymm7, %%ymm8;" |
| 1058 | "vpsrld $26, %%ymm8, %%ymm8;" |
| 1059 | "vextracti128 $1, %%ymm6, %%xmm9;" |
| 1060 | "vshufps $221, %%ymm9, %%ymm6, %%ymm9;" |
| 1061 | "vblendvps %%ymm9, %%ymm8, %%ymm7, %%ymm8;" |
| 1062 | "vmovdqa 96(%0), %%ymm9;" |
rhyskidd | 7232d21 | 2015-08-15 12:21:42 +0000 | [diff] [blame] | 1063 | "leaq _randArray(%%rip), %%r14;" |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 1064 | "vgatherdpd %%ymm6, 3(%%r14,%%xmm8,8), %%ymm9;" |
| 1065 | "xorl %%r14d, %%r14d") |
| 1066 | |
| 1067 | GEN_test_Ronly(VGATHERQPD_128_1, |
| 1068 | "vpsllq $58, %%xmm7, %%xmm8;" |
| 1069 | "vpsrlq $58, %%xmm8, %%xmm8;" |
| 1070 | "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;" |
rhyskidd | 7232d21 | 2015-08-15 12:21:42 +0000 | [diff] [blame] | 1071 | "leaq _randArray(%%rip), %%r14;" |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 1072 | "vgatherqpd %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;" |
| 1073 | "xorl %%r14d, %%r14d") |
| 1074 | |
| 1075 | GEN_test_Ronly(VGATHERQPD_256_1, |
| 1076 | "vpsllq $58, %%ymm7, %%ymm8;" |
| 1077 | "vpsrlq $58, %%ymm8, %%ymm8;" |
| 1078 | "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;" |
rhyskidd | 7232d21 | 2015-08-15 12:21:42 +0000 | [diff] [blame] | 1079 | "leaq _randArray(%%rip), %%r14;" |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 1080 | "vgatherqpd %%ymm6, 3(%%r14,%%ymm8,8), %%ymm9;" |
| 1081 | "xorl %%r14d, %%r14d") |
| 1082 | |
| 1083 | GEN_test_Ronly(VGATHERQPD_128_2, |
| 1084 | "vpsllq $58, %%xmm7, %%xmm8;" |
| 1085 | "vpsrlq $58, %%xmm8, %%xmm8;" |
| 1086 | "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;" |
rhyskidd | 7232d21 | 2015-08-15 12:21:42 +0000 | [diff] [blame] | 1087 | "leaq _randArray(%%rip), %%r14;" |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 1088 | "vmovq %%r14, %%xmm7;" |
| 1089 | "vpsllq $2, %%xmm8, %%xmm8;" |
| 1090 | "vpbroadcastq %%xmm7, %%xmm7;" |
| 1091 | "vpaddq %%xmm7, %%xmm8, %%xmm8;" |
| 1092 | "vgatherqpd %%xmm6, 1(,%%xmm8,1), %%xmm9;" |
| 1093 | "vpsubq %%xmm7, %%xmm8, %%xmm8;" |
| 1094 | "vmovdqa 0(%0), %%ymm7;" |
| 1095 | "xorl %%r14d, %%r14d") |
| 1096 | |
| 1097 | GEN_test_Ronly(VGATHERQPD_256_2, |
| 1098 | "vpsllq $58, %%ymm7, %%ymm8;" |
| 1099 | "vpsrlq $58, %%ymm8, %%ymm8;" |
| 1100 | "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;" |
rhyskidd | 7232d21 | 2015-08-15 12:21:42 +0000 | [diff] [blame] | 1101 | "leaq _randArray(%%rip), %%r14;" |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 1102 | "vmovq %%r14, %%xmm7;" |
| 1103 | "vpsllq $2, %%ymm8, %%ymm8;" |
| 1104 | "vpbroadcastq %%xmm7, %%ymm7;" |
| 1105 | "vpaddq %%ymm7, %%ymm8, %%ymm8;" |
| 1106 | "vgatherqpd %%ymm6, 1(,%%ymm8,1), %%ymm9;" |
| 1107 | "vpsubq %%ymm7, %%ymm8, %%ymm8;" |
| 1108 | "vmovdqa 0(%0), %%ymm7;" |
| 1109 | "xorl %%r14d, %%r14d") |
| 1110 | |
| 1111 | GEN_test_Ronly(VPGATHERDD_128, |
| 1112 | "vpslld $25, %%xmm7, %%xmm8;" |
| 1113 | "vpsrld $25, %%xmm8, %%xmm8;" |
| 1114 | "vblendvps %%xmm6, %%xmm8, %%xmm7, %%xmm8;" |
rhyskidd | 7232d21 | 2015-08-15 12:21:42 +0000 | [diff] [blame] | 1115 | "leaq _randArray(%%rip), %%r14;" |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 1116 | "vpgatherdd %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;" |
| 1117 | "xorl %%r14d, %%r14d") |
| 1118 | |
| 1119 | GEN_test_Ronly(VPGATHERDD_256, |
| 1120 | "vpslld $25, %%ymm7, %%ymm8;" |
| 1121 | "vpsrld $25, %%ymm8, %%ymm8;" |
| 1122 | "vblendvps %%ymm6, %%ymm8, %%ymm7, %%ymm8;" |
rhyskidd | 7232d21 | 2015-08-15 12:21:42 +0000 | [diff] [blame] | 1123 | "leaq _randArray(%%rip), %%r14;" |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 1124 | "vpgatherdd %%ymm6, 3(%%r14,%%ymm8,4), %%ymm9;" |
| 1125 | "xorl %%r14d, %%r14d") |
| 1126 | |
| 1127 | GEN_test_Ronly(VPGATHERQD_128_1, |
| 1128 | "vpsllq $57, %%xmm7, %%xmm8;" |
| 1129 | "vpsrlq $57, %%xmm8, %%xmm8;" |
| 1130 | "vpmovsxdq %%xmm6, %%xmm9;" |
| 1131 | "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;" |
| 1132 | "vmovdqa 96(%0), %%ymm9;" |
rhyskidd | 7232d21 | 2015-08-15 12:21:42 +0000 | [diff] [blame] | 1133 | "leaq _randArray(%%rip), %%r14;" |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 1134 | "vpgatherqd %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;" |
| 1135 | "xorl %%r14d, %%r14d") |
| 1136 | |
| 1137 | GEN_test_Ronly(VPGATHERQD_256_1, |
| 1138 | "vpsllq $57, %%ymm7, %%ymm8;" |
| 1139 | "vpsrlq $57, %%ymm8, %%ymm8;" |
| 1140 | "vpmovsxdq %%xmm6, %%ymm9;" |
| 1141 | "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;" |
| 1142 | "vmovdqa 96(%0), %%ymm9;" |
rhyskidd | 7232d21 | 2015-08-15 12:21:42 +0000 | [diff] [blame] | 1143 | "leaq _randArray(%%rip), %%r14;" |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 1144 | "vpgatherqd %%xmm6, 3(%%r14,%%ymm8,4), %%xmm9;" |
| 1145 | "xorl %%r14d, %%r14d") |
| 1146 | |
| 1147 | GEN_test_Ronly(VPGATHERQD_128_2, |
| 1148 | "vpsllq $57, %%xmm7, %%xmm8;" |
| 1149 | "vpsrlq $57, %%xmm8, %%xmm8;" |
| 1150 | "vpmovsxdq %%xmm6, %%xmm9;" |
| 1151 | "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;" |
| 1152 | "vmovdqa 96(%0), %%ymm9;" |
rhyskidd | 7232d21 | 2015-08-15 12:21:42 +0000 | [diff] [blame] | 1153 | "leaq _randArray(%%rip), %%r14;" |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 1154 | "vmovq %%r14, %%xmm7;" |
| 1155 | "vpsllq $2, %%xmm8, %%xmm8;" |
| 1156 | "vpbroadcastq %%xmm7, %%xmm7;" |
| 1157 | "vpaddq %%xmm7, %%xmm8, %%xmm8;" |
| 1158 | "vpgatherqd %%xmm6, 1(,%%xmm8,1), %%xmm9;" |
| 1159 | "vpsubq %%xmm7, %%xmm8, %%xmm8;" |
| 1160 | "vmovdqa 0(%0), %%ymm7;" |
| 1161 | "xorl %%r14d, %%r14d") |
| 1162 | |
| 1163 | GEN_test_Ronly(VPGATHERQD_256_2, |
| 1164 | "vpsllq $57, %%ymm7, %%ymm8;" |
| 1165 | "vpsrlq $57, %%ymm8, %%ymm8;" |
| 1166 | "vpmovsxdq %%xmm6, %%ymm9;" |
| 1167 | "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;" |
| 1168 | "vmovdqa 96(%0), %%ymm9;" |
rhyskidd | 7232d21 | 2015-08-15 12:21:42 +0000 | [diff] [blame] | 1169 | "leaq _randArray(%%rip), %%r14;" |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 1170 | "vmovq %%r14, %%xmm7;" |
| 1171 | "vpsllq $2, %%ymm8, %%ymm8;" |
| 1172 | "vpbroadcastq %%xmm7, %%ymm7;" |
| 1173 | "vpaddq %%ymm7, %%ymm8, %%ymm8;" |
| 1174 | "vpgatherqd %%xmm6, 1(,%%ymm8,1), %%xmm9;" |
| 1175 | "vpsubq %%ymm7, %%ymm8, %%ymm8;" |
| 1176 | "vmovdqa 0(%0), %%ymm7;" |
| 1177 | "xorl %%r14d, %%r14d") |
| 1178 | |
| 1179 | GEN_test_Ronly(VPGATHERDQ_128, |
| 1180 | "vpslld $26, %%xmm7, %%xmm8;" |
| 1181 | "vpsrld $26, %%xmm8, %%xmm8;" |
| 1182 | "vshufps $13, %%xmm6, %%xmm6, %%xmm9;" |
| 1183 | "vblendvps %%xmm9, %%xmm8, %%xmm7, %%xmm8;" |
| 1184 | "vmovdqa 96(%0), %%ymm9;" |
rhyskidd | 7232d21 | 2015-08-15 12:21:42 +0000 | [diff] [blame] | 1185 | "leaq _randArray(%%rip), %%r14;" |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 1186 | "vpgatherdq %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;" |
| 1187 | "xorl %%r14d, %%r14d") |
| 1188 | |
| 1189 | GEN_test_Ronly(VPGATHERDQ_256, |
| 1190 | "vpslld $26, %%ymm7, %%ymm8;" |
| 1191 | "vpsrld $26, %%ymm8, %%ymm8;" |
| 1192 | "vextracti128 $1, %%ymm6, %%xmm9;" |
| 1193 | "vshufps $221, %%ymm9, %%ymm6, %%ymm9;" |
| 1194 | "vblendvps %%ymm9, %%ymm8, %%ymm7, %%ymm8;" |
| 1195 | "vmovdqa 96(%0), %%ymm9;" |
rhyskidd | 7232d21 | 2015-08-15 12:21:42 +0000 | [diff] [blame] | 1196 | "leaq _randArray(%%rip), %%r14;" |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 1197 | "vpgatherdq %%ymm6, 3(%%r14,%%xmm8,8), %%ymm9;" |
| 1198 | "xorl %%r14d, %%r14d") |
| 1199 | |
| 1200 | GEN_test_Ronly(VPGATHERQQ_128_1, |
| 1201 | "vpsllq $58, %%xmm7, %%xmm8;" |
| 1202 | "vpsrlq $58, %%xmm8, %%xmm8;" |
| 1203 | "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;" |
rhyskidd | 7232d21 | 2015-08-15 12:21:42 +0000 | [diff] [blame] | 1204 | "leaq _randArray(%%rip), %%r14;" |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 1205 | "vpgatherqq %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;" |
| 1206 | "xorl %%r14d, %%r14d") |
| 1207 | |
| 1208 | GEN_test_Ronly(VPGATHERQQ_256_1, |
| 1209 | "vpsllq $58, %%ymm7, %%ymm8;" |
| 1210 | "vpsrlq $58, %%ymm8, %%ymm8;" |
| 1211 | "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;" |
rhyskidd | 7232d21 | 2015-08-15 12:21:42 +0000 | [diff] [blame] | 1212 | "leaq _randArray(%%rip), %%r14;" |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 1213 | "vpgatherqq %%ymm6, 3(%%r14,%%ymm8,8), %%ymm9;" |
| 1214 | "xorl %%r14d, %%r14d") |
| 1215 | |
| 1216 | GEN_test_Ronly(VPGATHERQQ_128_2, |
| 1217 | "vpsllq $58, %%xmm7, %%xmm8;" |
| 1218 | "vpsrlq $58, %%xmm8, %%xmm8;" |
| 1219 | "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;" |
rhyskidd | 7232d21 | 2015-08-15 12:21:42 +0000 | [diff] [blame] | 1220 | "leaq _randArray(%%rip), %%r14;" |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 1221 | "vmovq %%r14, %%xmm7;" |
| 1222 | "vpsllq $2, %%xmm8, %%xmm8;" |
| 1223 | "vpbroadcastq %%xmm7, %%xmm7;" |
| 1224 | "vpaddq %%xmm7, %%xmm8, %%xmm8;" |
| 1225 | "vpgatherqq %%xmm6, 1(,%%xmm8,1), %%xmm9;" |
| 1226 | "vpsubq %%xmm7, %%xmm8, %%xmm8;" |
| 1227 | "vmovdqa 0(%0), %%ymm7;" |
| 1228 | "xorl %%r14d, %%r14d") |
| 1229 | |
| 1230 | GEN_test_Ronly(VPGATHERQQ_256_2, |
| 1231 | "vpsllq $58, %%ymm7, %%ymm8;" |
| 1232 | "vpsrlq $58, %%ymm8, %%ymm8;" |
| 1233 | "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;" |
rhyskidd | 7232d21 | 2015-08-15 12:21:42 +0000 | [diff] [blame] | 1234 | "leaq _randArray(%%rip), %%r14;" |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 1235 | "vmovq %%r14, %%xmm7;" |
| 1236 | "vpsllq $2, %%ymm8, %%ymm8;" |
| 1237 | "vpbroadcastq %%xmm7, %%ymm7;" |
| 1238 | "vpaddq %%ymm7, %%ymm8, %%ymm8;" |
| 1239 | "vpgatherqq %%ymm6, 1(,%%ymm8,1), %%ymm9;" |
| 1240 | "vpsubq %%ymm7, %%ymm8, %%ymm8;" |
| 1241 | "vmovdqa 0(%0), %%ymm7;" |
| 1242 | "xorl %%r14d, %%r14d") |
| 1243 | |
| 1244 | /* Comment duplicated above, for convenient reference: |
| 1245 | Allowed operands in test insns: |
| 1246 | Reg form: %ymm6, %ymm7, %ymm8, %ymm9 and %r14. |
| 1247 | Mem form: (%rax), %ymm7, %ymm8, %ymm9 and %r14. |
| 1248 | Imm8 etc fields are also allowed, where they make sense. |
sewardj | 44e5376 | 2015-01-28 11:11:02 +0000 | [diff] [blame] | 1249 | Both forms may use ymm0 as scratch. Mem form may also use |
| 1250 | ymm6 as scratch. |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 1251 | */ |
| 1252 | |
| 1253 | #define N_DEFAULT_ITERS 3 |
| 1254 | |
| 1255 | // Do the specified test some number of times |
| 1256 | #define DO_N(_iters, _testfn) \ |
| 1257 | do { int i; for (i = 0; i < (_iters); i++) { test_##_testfn(); } } while (0) |
| 1258 | |
| 1259 | // Do the specified test the default number of times |
| 1260 | #define DO_D(_testfn) DO_N(N_DEFAULT_ITERS, _testfn) |
| 1261 | |
| 1262 | |
| 1263 | int main ( void ) |
| 1264 | { |
| 1265 | DO_D( VPOR_256 ); |
| 1266 | DO_D( VPXOR_256 ); |
| 1267 | DO_D( VPSUBB_256 ); |
| 1268 | DO_D( VPSUBD_256 ); |
| 1269 | DO_D( VPADDD_256 ); |
| 1270 | DO_D( VPMOVZXWD_256 ); |
| 1271 | DO_D( VPMOVZXBW_256 ); |
| 1272 | DO_D( VPBLENDVB_256 ); |
| 1273 | DO_D( VPMINSD_256 ); |
| 1274 | DO_D( VPMAXSD_256 ); |
| 1275 | DO_D( VPSHUFB_256 ); |
| 1276 | DO_D( VPUNPCKLBW_256 ); |
| 1277 | DO_D( VPUNPCKHBW_256 ); |
| 1278 | DO_D( VPABSD_256 ); |
| 1279 | DO_D( VPACKUSWB_256 ); |
| 1280 | DO_D( VPMOVMSKB_256 ); |
| 1281 | DO_D( VPAND_256 ); |
| 1282 | DO_D( VPCMPEQB_256 ); |
| 1283 | DO_D( VPSHUFLW_0x39_256 ); |
| 1284 | DO_D( VPSHUFHW_0x39_256 ); |
| 1285 | DO_D( VPMULLW_256 ); |
| 1286 | DO_D( VPADDUSW_256 ); |
| 1287 | DO_D( VPMULHUW_256 ); |
| 1288 | DO_D( VPADDUSB_256 ); |
| 1289 | DO_D( VPUNPCKLWD_256 ); |
| 1290 | DO_D( VPUNPCKHWD_256 ); |
| 1291 | DO_D( VPSLLD_0x05_256 ); |
| 1292 | DO_D( VPSRLD_0x05_256 ); |
| 1293 | DO_D( VPSRAD_0x05_256 ); |
| 1294 | DO_D( VPSUBUSB_256 ); |
| 1295 | DO_D( VPSUBSB_256 ); |
| 1296 | DO_D( VPSRLDQ_0x05_256 ); |
| 1297 | DO_D( VPSLLDQ_0x05_256 ); |
| 1298 | DO_D( VPANDN_256 ); |
| 1299 | DO_D( VPUNPCKLQDQ_256 ); |
| 1300 | DO_D( VPSRLW_0x05_256 ); |
| 1301 | DO_D( VPSLLW_0x05_256 ); |
| 1302 | DO_D( VPADDW_256 ); |
| 1303 | DO_D( VPACKSSDW_256 ); |
| 1304 | DO_D( VPUNPCKLDQ_256 ); |
| 1305 | DO_D( VPCMPEQD_256 ); |
| 1306 | DO_D( VPSHUFD_0x39_256 ); |
| 1307 | DO_D( VPADDQ_256 ); |
| 1308 | DO_D( VPSUBQ_256 ); |
| 1309 | DO_D( VPSUBW_256 ); |
| 1310 | DO_D( VPCMPEQQ_256 ); |
| 1311 | DO_D( VPCMPGTQ_256 ); |
| 1312 | DO_D( VPSRLQ_0x05_256 ); |
| 1313 | DO_D( VPMULUDQ_256 ); |
| 1314 | DO_D( VPMULDQ_256 ); |
| 1315 | DO_D( VPSLLQ_0x05_256 ); |
| 1316 | DO_D( VPMAXUD_256 ); |
| 1317 | DO_D( VPMINUD_256 ); |
| 1318 | DO_D( VPMULLD_256 ); |
| 1319 | DO_D( VPMAXUW_256 ); |
| 1320 | DO_D( VPMINUW_256 ); |
| 1321 | DO_D( VPMAXSW_256 ); |
| 1322 | DO_D( VPMINSW_256 ); |
| 1323 | DO_D( VPMAXUB_256 ); |
| 1324 | DO_D( VPMINUB_256 ); |
| 1325 | DO_D( VPMAXSB_256 ); |
| 1326 | DO_D( VPMINSB_256 ); |
| 1327 | DO_D( VPMOVSXBW_256 ); |
| 1328 | DO_D( VPSUBUSW_256 ); |
| 1329 | DO_D( VPSUBSW_256 ); |
| 1330 | DO_D( VPCMPEQW_256 ); |
| 1331 | DO_D( VPADDB_256 ); |
| 1332 | DO_D( VPUNPCKHDQ_256 ); |
| 1333 | DO_D( VPMOVSXDQ_256 ); |
| 1334 | DO_D( VPMOVSXWD_256 ); |
| 1335 | DO_D( VPMULHW_256 ); |
| 1336 | DO_D( VPUNPCKHQDQ_256 ); |
| 1337 | DO_D( VPSRAW_0x05_256 ); |
| 1338 | DO_D( VPCMPGTB_256 ); |
| 1339 | DO_D( VPCMPGTW_256 ); |
| 1340 | DO_D( VPCMPGTD_256 ); |
| 1341 | DO_D( VPMOVZXBD_256 ); |
| 1342 | DO_D( VPMOVSXBD_256 ); |
| 1343 | DO_D( VPALIGNR_256_1of3 ); |
| 1344 | DO_D( VPALIGNR_256_2of3 ); |
| 1345 | DO_D( VPALIGNR_256_3of3 ); |
| 1346 | DO_D( VPBLENDW_256_0x00 ); |
| 1347 | DO_D( VPBLENDW_256_0xFE ); |
| 1348 | DO_D( VPBLENDW_256_0x30 ); |
| 1349 | DO_D( VPBLENDW_256_0x21 ); |
| 1350 | DO_D( VPBLENDW_256_0xD7 ); |
| 1351 | DO_D( VPBLENDW_256_0xB5 ); |
| 1352 | DO_D( VPBLENDW_256_0x85 ); |
| 1353 | DO_D( VPBLENDW_256_0x29 ); |
| 1354 | DO_D( VPSLLW_256 ); |
| 1355 | DO_D( VPSRLW_256 ); |
| 1356 | DO_D( VPSRAW_256 ); |
| 1357 | DO_D( VPSLLD_256 ); |
| 1358 | DO_D( VPSRLD_256 ); |
| 1359 | DO_D( VPSRAD_256 ); |
| 1360 | DO_D( VPSLLQ_256 ); |
| 1361 | DO_D( VPSRLQ_256 ); |
| 1362 | DO_D( VPMADDWD_256 ); |
| 1363 | DO_D( VMOVNTDQA_256 ); |
| 1364 | DO_D( VPACKSSWB_256 ); |
| 1365 | DO_D( VPAVGB_256 ); |
| 1366 | DO_D( VPAVGW_256 ); |
| 1367 | DO_D( VPADDSB_256 ); |
| 1368 | DO_D( VPADDSW_256 ); |
| 1369 | DO_D( VPHADDW_256 ); |
| 1370 | DO_D( VPHADDD_256 ); |
| 1371 | DO_D( VPHADDSW_256 ); |
| 1372 | DO_D( VPMADDUBSW_256 ); |
| 1373 | DO_D( VPHSUBW_256 ); |
| 1374 | DO_D( VPHSUBD_256 ); |
| 1375 | DO_D( VPHSUBSW_256 ); |
| 1376 | DO_D( VPABSB_256 ); |
| 1377 | DO_D( VPABSW_256 ); |
| 1378 | DO_D( VPMOVSXBQ_256 ); |
| 1379 | DO_D( VPMOVSXWQ_256 ); |
| 1380 | DO_D( VPACKUSDW_256 ); |
| 1381 | DO_D( VPMOVZXBQ_256 ); |
| 1382 | DO_D( VPMOVZXWQ_256 ); |
| 1383 | DO_D( VPMOVZXDQ_256 ); |
| 1384 | DO_D( VMPSADBW_256_0x0 ); |
| 1385 | DO_D( VMPSADBW_256_0x39 ); |
| 1386 | DO_D( VMPSADBW_256_0x32 ); |
| 1387 | DO_D( VMPSADBW_256_0x2b ); |
| 1388 | DO_D( VMPSADBW_256_0x24 ); |
| 1389 | DO_D( VMPSADBW_256_0x1d ); |
| 1390 | DO_D( VMPSADBW_256_0x16 ); |
| 1391 | DO_D( VMPSADBW_256_0x0f ); |
| 1392 | DO_D( VPSADBW_256 ); |
| 1393 | DO_D( VPSIGNB_256 ); |
| 1394 | DO_D( VPSIGNW_256 ); |
| 1395 | DO_D( VPSIGND_256 ); |
| 1396 | DO_D( VPMULHRSW_256 ); |
| 1397 | DO_D( VBROADCASTI128 ); |
| 1398 | DO_D( VEXTRACTI128_0x0 ); |
| 1399 | DO_D( VEXTRACTI128_0x1 ); |
| 1400 | DO_D( VINSERTI128_0x0 ); |
| 1401 | DO_D( VINSERTI128_0x1 ); |
| 1402 | DO_D( VPERM2I128_0x00 ); |
| 1403 | DO_D( VPERM2I128_0xFF ); |
| 1404 | DO_D( VPERM2I128_0x30 ); |
| 1405 | DO_D( VPERM2I128_0x21 ); |
| 1406 | DO_D( VPERM2I128_0x12 ); |
| 1407 | DO_D( VPERM2I128_0x03 ); |
| 1408 | DO_D( VPERM2I128_0x85 ); |
| 1409 | DO_D( VPERM2I128_0x5A ); |
| 1410 | DO_D( VBROADCASTSS_128 ); |
| 1411 | DO_D( VBROADCASTSS_256 ); |
| 1412 | DO_D( VBROADCASTSD_256 ); |
| 1413 | DO_D( VPERMD ); |
| 1414 | DO_D( VPERMQ_0x00 ); |
| 1415 | DO_D( VPERMQ_0xFE ); |
| 1416 | DO_D( VPERMQ_0x30 ); |
| 1417 | DO_D( VPERMQ_0x21 ); |
| 1418 | DO_D( VPERMQ_0xD7 ); |
| 1419 | DO_D( VPERMQ_0xB5 ); |
| 1420 | DO_D( VPERMQ_0x85 ); |
| 1421 | DO_D( VPERMQ_0x29 ); |
| 1422 | DO_D( VPERMPS ); |
| 1423 | DO_D( VPERMPD_0x00 ); |
| 1424 | DO_D( VPERMPD_0xFE ); |
| 1425 | DO_D( VPERMPD_0x30 ); |
| 1426 | DO_D( VPERMPD_0x21 ); |
| 1427 | DO_D( VPERMPD_0xD7 ); |
| 1428 | DO_D( VPERMPD_0xB5 ); |
| 1429 | DO_D( VPERMPD_0x85 ); |
| 1430 | DO_D( VPERMPD_0x29 ); |
| 1431 | DO_D( VPBLENDD_128_0x00 ); |
| 1432 | DO_D( VPBLENDD_128_0x02 ); |
| 1433 | DO_D( VPBLENDD_128_0x04 ); |
| 1434 | DO_D( VPBLENDD_128_0x06 ); |
| 1435 | DO_D( VPBLENDD_128_0x08 ); |
| 1436 | DO_D( VPBLENDD_128_0x0A ); |
| 1437 | DO_D( VPBLENDD_128_0x0C ); |
| 1438 | DO_D( VPBLENDD_128_0x0E ); |
| 1439 | DO_D( VPBLENDD_256_0x00 ); |
| 1440 | DO_D( VPBLENDD_256_0xFE ); |
| 1441 | DO_D( VPBLENDD_256_0x30 ); |
| 1442 | DO_D( VPBLENDD_256_0x21 ); |
| 1443 | DO_D( VPBLENDD_256_0xD7 ); |
| 1444 | DO_D( VPBLENDD_256_0xB5 ); |
| 1445 | DO_D( VPBLENDD_256_0x85 ); |
| 1446 | DO_D( VPBLENDD_256_0x29 ); |
| 1447 | DO_D( VPSLLVD_128 ); |
| 1448 | DO_D( VPSLLVD_256 ); |
| 1449 | DO_D( VPSLLVQ_128 ); |
| 1450 | DO_D( VPSLLVQ_256 ); |
| 1451 | DO_D( VPSRLVD_128 ); |
| 1452 | DO_D( VPSRLVD_256 ); |
| 1453 | DO_D( VPSRLVQ_128 ); |
| 1454 | DO_D( VPSRLVQ_256 ); |
| 1455 | DO_D( VPSRAVD_128 ); |
| 1456 | DO_D( VPSRAVD_256 ); |
| 1457 | DO_D( VPBROADCASTB_128 ); |
| 1458 | DO_D( VPBROADCASTB_256 ); |
| 1459 | DO_D( VPBROADCASTW_128 ); |
| 1460 | DO_D( VPBROADCASTW_256 ); |
| 1461 | DO_D( VPBROADCASTD_128 ); |
| 1462 | DO_D( VPBROADCASTD_256 ); |
| 1463 | DO_D( VPBROADCASTQ_128 ); |
| 1464 | DO_D( VPBROADCASTQ_256 ); |
| 1465 | DO_D( VPMASKMOVD_128_LoadForm ); |
| 1466 | DO_D( VPMASKMOVD_256_LoadForm ); |
| 1467 | DO_D( VPMASKMOVQ_128_LoadForm ); |
| 1468 | DO_D( VPMASKMOVQ_256_LoadForm ); |
sewardj | 44e5376 | 2015-01-28 11:11:02 +0000 | [diff] [blame] | 1469 | DO_D( VPMASKMOVD_128_StoreForm ); |
| 1470 | DO_D( VPMASKMOVD_256_StoreForm ); |
| 1471 | DO_D( VPMASKMOVQ_128_StoreForm ); |
| 1472 | DO_D( VPMASKMOVQ_256_StoreForm ); |
rhyskidd | 7232d21 | 2015-08-15 12:21:42 +0000 | [diff] [blame] | 1473 | #if defined(VGO_darwin) |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 1474 | { int i; for (i = 0; i < sizeof(randArray); i++) randArray[i] = randUChar(); } |
rhyskidd | 7232d21 | 2015-08-15 12:21:42 +0000 | [diff] [blame] | 1475 | #else |
| 1476 | { int i; for (i = 0; i < sizeof(_randArray); i++) _randArray[i] = randUChar(); } |
| 1477 | #endif |
sewardj | f014538 | 2013-03-27 11:42:05 +0000 | [diff] [blame] | 1478 | DO_D( VGATHERDPS_128 ); |
| 1479 | DO_D( VGATHERDPS_256 ); |
| 1480 | DO_D( VGATHERQPS_128_1 ); |
| 1481 | DO_D( VGATHERQPS_256_1 ); |
| 1482 | DO_D( VGATHERQPS_128_2 ); |
| 1483 | DO_D( VGATHERQPS_256_2 ); |
| 1484 | DO_D( VGATHERDPD_128 ); |
| 1485 | DO_D( VGATHERDPD_256 ); |
| 1486 | DO_D( VGATHERQPD_128_1 ); |
| 1487 | DO_D( VGATHERQPD_256_1 ); |
| 1488 | DO_D( VGATHERQPD_128_2 ); |
| 1489 | DO_D( VGATHERQPD_256_2 ); |
| 1490 | DO_D( VPGATHERDD_128 ); |
| 1491 | DO_D( VPGATHERDD_256 ); |
| 1492 | DO_D( VPGATHERQD_128_1 ); |
| 1493 | DO_D( VPGATHERQD_256_1 ); |
| 1494 | DO_D( VPGATHERQD_128_2 ); |
| 1495 | DO_D( VPGATHERQD_256_2 ); |
| 1496 | DO_D( VPGATHERDQ_128 ); |
| 1497 | DO_D( VPGATHERDQ_256 ); |
| 1498 | DO_D( VPGATHERQQ_128_1 ); |
| 1499 | DO_D( VPGATHERQQ_256_1 ); |
| 1500 | DO_D( VPGATHERQQ_128_2 ); |
| 1501 | DO_D( VPGATHERQQ_256_2 ); |
| 1502 | return 0; |
| 1503 | } |