| #include <stdio.h> |
| #include <stdlib.h> |
| #include <assert.h> |
| #include <math.h> |
| #include "tests/malloc.h" |
| |
| typedef unsigned char UChar; |
| typedef unsigned int UInt; |
| typedef unsigned long int UWord; |
| typedef unsigned long long int ULong; |
| typedef double Double; |
| typedef float Float; |
| |
| #define IS_32_ALIGNED(_ptr) (0 == (0x1F & (UWord)(_ptr))) |
| |
| typedef union { UChar u8[16]; UInt u32[4]; Float f32[4]; Double f64[2]; } XMM; |
| typedef union { UChar u8[32]; UInt u32[8]; XMM xmm[2]; } YMM; |
| typedef struct { YMM r1; YMM r2; YMM r3; YMM r4; YMM m; } Block; |
| |
| void showFloat ( XMM* vec, int idx ) |
| { |
| Float f = vec->f32[idx]; |
| int neg = signbit (f); |
| char sign = neg != 0 ? '-' : ' '; |
| switch (fpclassify (f)) { |
| case FP_NORMAL: { |
| for (int i = idx * 4 + 3; i >= idx * 4; i--) |
| printf("%02x", (UInt)vec->u8[i]); |
| break; |
| } |
| case FP_INFINITE: { |
| printf ("[ %cINF ]", sign); |
| break; |
| } |
| case FP_ZERO: { |
| printf ("[%cZERO ]", sign); |
| break; |
| } |
| case FP_NAN: { |
| printf ("[ NAN ]"); |
| break; |
| } |
| default: { |
| printf ("[%cSUBNR]", sign); |
| break; |
| } |
| } |
| } |
| |
| void showDouble ( XMM* vec, int idx ) |
| { |
| Double d = vec->f64[idx]; |
| int neg = signbit (d); |
| char sign = neg != 0 ? '-' : ' '; |
| switch (fpclassify (d)) { |
| case FP_NORMAL: { |
| for (int i = idx * 8 + 7; i >= idx * 8; i--) |
| printf("%02x", (UInt)vec->u8[i]); |
| break; |
| } |
| case FP_INFINITE: { |
| printf ("[ %cINF ]", sign); |
| break; |
| } |
| case FP_ZERO: { |
| printf ("[ %cZERO ]", sign); |
| break; |
| } |
| case FP_NAN: { |
| printf ("[ NAN ]"); |
| break; |
| } |
| default: { |
| printf ("[ %cSUBNORMAL ]", sign); |
| break; |
| } |
| } |
| } |
| |
| void showXMM ( XMM* vec, int isDouble ) |
| { |
| if (isDouble) { |
| showDouble ( vec, 1 ); |
| printf ("."); |
| showDouble ( vec, 0 ); |
| } else { |
| showFloat ( vec, 3 ); |
| printf ("."); |
| showFloat ( vec, 2 ); |
| printf ("."); |
| showFloat ( vec, 1 ); |
| printf ("."); |
| showFloat ( vec, 0 ); |
| } |
| } |
| |
| void showYMM ( YMM* vec, int isDouble ) |
| { |
| assert(IS_32_ALIGNED(vec)); |
| showXMM ( &vec->xmm[1], isDouble ); |
| printf("."); |
| showXMM ( &vec->xmm[0], isDouble ); |
| } |
| |
| void showBlock ( char* msg, Block* block, int isDouble ) |
| { |
| printf(" %s\n", msg); |
| printf("r1: "); showYMM(&block->r1, isDouble); printf("\n"); |
| printf("r2: "); showYMM(&block->r2, isDouble); printf("\n"); |
| printf("r3: "); showYMM(&block->r3, isDouble); printf("\n"); |
| printf("r4: "); showYMM(&block->r4, isDouble); printf("\n"); |
| printf(" m: "); showYMM(&block->m, isDouble); printf("\n"); |
| } |
| |
| static Double special_values[10]; |
| |
| static __attribute__((noinline)) |
| Double negate ( Double d ) { return -d; } |
| static __attribute__((noinline)) |
| Double divf64 ( Double x, Double y ) { return x/y; } |
| |
| static __attribute__((noinline)) |
| Double plusZero ( void ) { return 0.0; } |
| static __attribute__((noinline)) |
| Double minusZero ( void ) { return negate(plusZero()); } |
| |
| static __attribute__((noinline)) |
| Double plusOne ( void ) { return 1.0; } |
| static __attribute__((noinline)) |
| Double minusOne ( void ) { return negate(plusOne()); } |
| |
| static __attribute__((noinline)) |
| Double plusInf ( void ) { return 1.0 / 0.0; } |
| static __attribute__((noinline)) |
| Double minusInf ( void ) { return negate(plusInf()); } |
| |
| static __attribute__((noinline)) |
| Double plusNaN ( void ) { return divf64(plusInf(),plusInf()); } |
| static __attribute__((noinline)) |
| Double minusNaN ( void ) { return negate(plusNaN()); } |
| |
| static __attribute__((noinline)) |
| Double plusDenorm ( void ) { return 1.23e-315 / 1e3; } |
| static __attribute__((noinline)) |
| Double minusDenorm ( void ) { return negate(plusDenorm()); } |
| |
| static void init_special_values ( void ) |
| { |
| special_values[0] = plusZero(); |
| special_values[1] = minusZero(); |
| special_values[2] = plusOne(); |
| special_values[3] = minusOne(); |
| special_values[4] = plusInf(); |
| special_values[5] = minusInf(); |
| special_values[6] = plusNaN(); |
| special_values[7] = minusNaN(); |
| special_values[8] = plusDenorm(); |
| special_values[9] = minusDenorm(); |
| } |
| |
| void specialFBlock ( Block* b ) |
| { |
| int i; |
| Float* p = (Float*)b; |
| for (i = 0; i < sizeof(Block) / sizeof(Float); i++) |
| p[i] = (Float) special_values[i % 10]; |
| } |
| |
| void specialDBlock ( Block* b ) |
| { |
| int i; |
| Double* p = (Double*)b; |
| for (i = 0; i < sizeof(Block) / sizeof(Double); i++) |
| p[i] = special_values[i % 10]; |
| } |
| |
| UChar randUChar ( void ) |
| { |
| static UInt seed = 80021; |
| seed = 1103515245 * seed + 12345; |
| return (seed >> 17) & 0xFF; |
| } |
| |
| void randBlock ( Block* b ) |
| { |
| int i; |
| UChar* p = (UChar*)b; |
| for (i = 0; i < sizeof(Block); i++) |
| p[i] = randUChar(); |
| } |
| |
| void oneBlock ( Block* b ) |
| { |
| int i; |
| UChar* p = (UChar*)b; |
| for (i = 0; i < sizeof(Block); i++) |
| p[i] = 1; |
| } |
| |
| #define GEN_test(_name, _instr, _isD) \ |
| __attribute__ ((noinline)) void \ |
| test_##_name ( const char *n, Block* b) \ |
| { \ |
| printf("%s %s\n", #_name, n); \ |
| showBlock("before", b, _isD); \ |
| __asm__ __volatile__( \ |
| "vmovdqa 0(%0),%%ymm7" "\n\t" \ |
| "vmovdqa 32(%0),%%ymm8" "\n\t" \ |
| "vmovdqa 64(%0),%%ymm6" "\n\t" \ |
| "vmovdqa 96(%0),%%ymm9" "\n\t" \ |
| "leaq 128(%0),%%r14" "\n\t" \ |
| _instr "\n\t" \ |
| "vmovdqa %%ymm7, 0(%0)" "\n\t" \ |
| "vmovdqa %%ymm8, 32(%0)" "\n\t" \ |
| "vmovdqa %%ymm6, 64(%0)" "\n\t" \ |
| "vmovdqa %%ymm9, 96(%0)" "\n\t" \ |
| : /*OUT*/ \ |
| : /*IN*/"r"(b) \ |
| : /*TRASH*/"xmm7","xmm8","xmm6","xmm9","r14","memory","cc" \ |
| ); \ |
| showBlock("after", b, _isD); \ |
| printf("\n"); \ |
| } |
| |
| /* All these defines do the same thing (and someone with stronger |
| preprocessor foo could probably express things much smaller). |
| They generate 4 different functions to test 4 variants of an |
| fma4 instruction. One with as input 4 registers, one where |
| the output register is also one of the input registers and |
| two versions where different inputs are a memory location. |
| The xmm variants create 128 versions, the ymm variants 256. */ |
| |
| #define GEN_test_VFMADDPD_xmm(_name) \ |
| GEN_test(_name##_xmm, \ |
| "vfmaddpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \ |
| GEN_test(_name##_xmm_src_dst, \ |
| "vfmaddpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \ |
| GEN_test(_name##_xmm_mem1, \ |
| "vfmaddpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \ |
| GEN_test(_name##_xmm_mem2, \ |
| "vfmaddpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1); |
| GEN_test_VFMADDPD_xmm(VFMADDPD) |
| |
| #define GEN_test_VFMADDPD_ymm(_name) \ |
| GEN_test(_name##_ymm, \ |
| "vfmaddpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \ |
| GEN_test(_name##_ymm_src_dst, \ |
| "vfmaddpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \ |
| GEN_test(_name##_ymm_mem1, \ |
| "vfmaddpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \ |
| GEN_test(_name##_ymm_mem2, \ |
| "vfmaddpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1); |
| GEN_test_VFMADDPD_ymm(VFMADDPD) |
| |
| #define GEN_test_VFMADDPS_xmm(_name) \ |
| GEN_test(_name##_xmm, \ |
| "vfmaddps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \ |
| GEN_test(_name##_xmm_src_dst, \ |
| "vfmaddps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \ |
| GEN_test(_name##_xmm_mem1, \ |
| "vfmaddps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \ |
| GEN_test(_name##_xmm_mem2, \ |
| "vfmaddps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0); |
| GEN_test_VFMADDPS_xmm(VFMADDPS) |
| |
| #define GEN_test_VFMADDPS_ymm(_name) \ |
| GEN_test(_name##_ymm, \ |
| "vfmaddps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \ |
| GEN_test(_name##_ymm_src_dst, \ |
| "vfmaddps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \ |
| GEN_test(_name##_ymm_mem1, \ |
| "vfmaddps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \ |
| GEN_test(_name##_ymm_mem2, \ |
| "vfmaddps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0); |
| GEN_test_VFMADDPS_ymm(VFMADDPS) |
| |
| #define GEN_test_VFMADDSD_xmm(_name) \ |
| GEN_test(_name##_xmm, \ |
| "vfmaddsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \ |
| GEN_test(_name##_xmm_src_dst, \ |
| "vfmaddsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \ |
| GEN_test(_name##_xmm_mem1, \ |
| "vfmaddsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \ |
| GEN_test(_name##_xmm_mem2, \ |
| "vfmaddsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1); |
| GEN_test_VFMADDSD_xmm(VFMADDSD) |
| |
| #define GEN_test_VFMADDSS_xmm(_name) \ |
| GEN_test(_name##_xmm, \ |
| "vfmaddss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \ |
| GEN_test(_name##_xmm_src_dst, \ |
| "vfmaddss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \ |
| GEN_test(_name##_xmm_mem1, \ |
| "vfmaddss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \ |
| GEN_test(_name##_xmm_mem2, \ |
| "vfmaddss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0); |
| GEN_test_VFMADDSS_xmm(VFMADDSS) |
| |
| #define GEN_test_VFMADDSUBPD_xmm(_name) \ |
| GEN_test(_name##_xmm, \ |
| "vfmaddsubpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \ |
| GEN_test(_name##_xmm_src_dst, \ |
| "vfmaddsubpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \ |
| GEN_test(_name##_xmm_mem1, \ |
| "vfmaddsubpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \ |
| GEN_test(_name##_xmm_mem2, \ |
| "vfmaddsubpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1); |
| GEN_test_VFMADDSUBPD_xmm(VFMADDSUBPD) |
| |
| #define GEN_test_VFMADDSUBPD_ymm(_name) \ |
| GEN_test(_name##_ymm, \ |
| "vfmaddsubpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \ |
| GEN_test(_name##_ymm_src_dst, \ |
| "vfmaddsubpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \ |
| GEN_test(_name##_ymm_mem1, \ |
| "vfmaddsubpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \ |
| GEN_test(_name##_ymm_mem2, \ |
| "vfmaddsubpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1); |
| GEN_test_VFMADDSUBPD_ymm(VFMADDSUBPD) |
| |
| #define GEN_test_VFMADDSUBPS_xmm(_name) \ |
| GEN_test(_name##_xmm, \ |
| "vfmaddsubps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \ |
| GEN_test(_name##_xmm_src_dst, \ |
| "vfmaddsubps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \ |
| GEN_test(_name##_xmm_mem1, \ |
| "vfmaddsubps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \ |
| GEN_test(_name##_xmm_mem2, \ |
| "vfmaddsubps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0); |
| GEN_test_VFMADDSUBPS_xmm(VFMADDSUBPS) |
| |
| #define GEN_test_VFMADDSUBPS_ymm(_name) \ |
| GEN_test(_name##_ymm, \ |
| "vfmaddsubps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \ |
| GEN_test(_name##_ymm_src_dst, \ |
| "vfmaddsubps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \ |
| GEN_test(_name##_ymm_mem1, \ |
| "vfmaddsubps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \ |
| GEN_test(_name##_ymm_mem2, \ |
| "vfmaddsubps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0); |
| GEN_test_VFMADDSUBPS_ymm(VFMADDSUBPS) |
| |
| #define GEN_test_VFMSUBADDPD_xmm(_name) \ |
| GEN_test(_name##_xmm, \ |
| "vfmsubaddpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \ |
| GEN_test(_name##_xmm_src_dst, \ |
| "vfmsubaddpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \ |
| GEN_test(_name##_xmm_mem1, \ |
| "vfmsubaddpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \ |
| GEN_test(_name##_xmm_mem2, \ |
| "vfmsubaddpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1); |
| GEN_test_VFMSUBADDPD_xmm(VFMSUBADDPD) |
| |
| #define GEN_test_VFMSUBADDPD_ymm(_name) \ |
| GEN_test(_name##_ymm, \ |
| "vfmsubaddpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \ |
| GEN_test(_name##_ymm_src_dst, \ |
| "vfmsubaddpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \ |
| GEN_test(_name##_ymm_mem1, \ |
| "vfmsubaddpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \ |
| GEN_test(_name##_ymm_mem2, \ |
| "vfmsubaddpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1); |
| GEN_test_VFMSUBADDPD_ymm(VFMSUBADDPD) |
| |
| #define GEN_test_VFMSUBADDPS_xmm(_name) \ |
| GEN_test(_name##_xmm, \ |
| "vfmsubaddps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \ |
| GEN_test(_name##_xmm_src_dst, \ |
| "vfmsubaddps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \ |
| GEN_test(_name##_xmm_mem1, \ |
| "vfmsubaddps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \ |
| GEN_test(_name##_xmm_mem2, \ |
| "vfmsubaddps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0); |
| GEN_test_VFMSUBADDPS_xmm(VFMSUBADDPS) |
| |
| #define GEN_test_VFMSUBADDPS_ymm(_name) \ |
| GEN_test(_name##_ymm, \ |
| "vfmsubaddps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \ |
| GEN_test(_name##_ymm_src_dst, \ |
| "vfmsubaddps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \ |
| GEN_test(_name##_ymm_mem1, \ |
| "vfmsubaddps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \ |
| GEN_test(_name##_ymm_mem2, \ |
| "vfmsubaddps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0); |
| GEN_test_VFMSUBADDPS_ymm(VFMSUBADDPS) |
| |
| #define GEN_test_VFMSUBPD_xmm(_name) \ |
| GEN_test(_name##_xmm, \ |
| "vfmsubpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \ |
| GEN_test(_name##_xmm_src_dst, \ |
| "vfmsubpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \ |
| GEN_test(_name##_xmm_mem1, \ |
| "vfmsubpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \ |
| GEN_test(_name##_xmm_mem2, \ |
| "vfmsubpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1); |
| GEN_test_VFMSUBPD_xmm(VFMSUBPD) |
| |
| #define GEN_test_VFMSUBPD_ymm(_name) \ |
| GEN_test(_name##_ymm, \ |
| "vfmsubpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \ |
| GEN_test(_name##_ymm_src_dst, \ |
| "vfmsubpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \ |
| GEN_test(_name##_ymm_mem1, \ |
| "vfmsubpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \ |
| GEN_test(_name##_ymm_mem2, \ |
| "vfmsubpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1); |
| GEN_test_VFMSUBPD_ymm(VFMSUBPD) |
| |
| #define GEN_test_VFMSUBPS_xmm(_name) \ |
| GEN_test(_name##_xmm, \ |
| "vfmsubps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \ |
| GEN_test(_name##_xmm_src_dst, \ |
| "vfmsubps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \ |
| GEN_test(_name##_xmm_mem1, \ |
| "vfmsubps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \ |
| GEN_test(_name##_xmm_mem2, \ |
| "vfmsubps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0); |
| GEN_test_VFMSUBPS_xmm(VFMSUBPS) |
| |
| #define GEN_test_VFMSUBPS_ymm(_name) \ |
| GEN_test(_name##_ymm, \ |
| "vfmsubps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \ |
| GEN_test(_name##_ymm_src_dst, \ |
| "vfmsubps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \ |
| GEN_test(_name##_ymm_mem1, \ |
| "vfmsubps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \ |
| GEN_test(_name##_ymm_mem2, \ |
| "vfmsubps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0); |
| GEN_test_VFMSUBPS_ymm(VFMSUBPS) |
| |
| #define GEN_test_VFMSUBSD_xmm(_name) \ |
| GEN_test(_name##_xmm, \ |
| "vfmsubsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \ |
| GEN_test(_name##_xmm_src_dst, \ |
| "vfmsubsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \ |
| GEN_test(_name##_xmm_mem1, \ |
| "vfmsubsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \ |
| GEN_test(_name##_xmm_mem2, \ |
| "vfmsubsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1); |
| GEN_test_VFMSUBSD_xmm(VFMSUBSD) |
| |
| #define GEN_test_VFMSUBSS_xmm(_name) \ |
| GEN_test(_name##_xmm, \ |
| "vfmsubss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \ |
| GEN_test(_name##_xmm_src_dst, \ |
| "vfmsubss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \ |
| GEN_test(_name##_xmm_mem1, \ |
| "vfmsubss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \ |
| GEN_test(_name##_xmm_mem2, \ |
| "vfmsubss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0); |
| GEN_test_VFMSUBSS_xmm(VFMSUBSS) |
| |
| #define GEN_test_VFNMADDPD_xmm(_name) \ |
| GEN_test(_name##_xmm, \ |
| "vfnmaddpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \ |
| GEN_test(_name##_xmm_src_dst, \ |
| "vfnmaddpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \ |
| GEN_test(_name##_xmm_mem1, \ |
| "vfnmaddpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \ |
| GEN_test(_name##_xmm_mem2, \ |
| "vfnmaddpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1); |
| GEN_test_VFNMADDPD_xmm(VFNMADDPD) |
| |
| #define GEN_test_VFNMADDPD_ymm(_name) \ |
| GEN_test(_name##_ymm, \ |
| "vfnmaddpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \ |
| GEN_test(_name##_ymm_src_dst, \ |
| "vfnmaddpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \ |
| GEN_test(_name##_ymm_mem1, \ |
| "vfnmaddpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \ |
| GEN_test(_name##_ymm_mem2, \ |
| "vfnmaddpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1); |
| GEN_test_VFNMADDPD_ymm(VFNMADDPD) |
| |
| #define GEN_test_VFNMADDPS_xmm(_name) \ |
| GEN_test(_name##_xmm, \ |
| "vfnmaddps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \ |
| GEN_test(_name##_xmm_src_dst, \ |
| "vfnmaddps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \ |
| GEN_test(_name##_xmm_mem1, \ |
| "vfnmaddps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \ |
| GEN_test(_name##_xmm_mem2, \ |
| "vfnmaddps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0); |
| GEN_test_VFNMADDPS_xmm(VFNMADDPS) |
| |
| #define GEN_test_VFNMADDPS_ymm(_name) \ |
| GEN_test(_name##_ymm, \ |
| "vfnmaddps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \ |
| GEN_test(_name##_ymm_src_dst, \ |
| "vfnmaddps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \ |
| GEN_test(_name##_ymm_mem1, \ |
| "vfnmaddps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \ |
| GEN_test(_name##_ymm_mem2, \ |
| "vfnmaddps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0); |
| GEN_test_VFNMADDPS_ymm(VFNMADDPS) |
| |
| #define GEN_test_VFNMADDSD_xmm(_name) \ |
| GEN_test(_name##_xmm, \ |
| "vfnmaddsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \ |
| GEN_test(_name##_xmm_src_dst, \ |
| "vfnmaddsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \ |
| GEN_test(_name##_xmm_mem1, \ |
| "vfnmaddsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \ |
| GEN_test(_name##_xmm_mem2, \ |
| "vfnmaddsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1); |
| GEN_test_VFNMADDSD_xmm(VFNMADDSD) |
| |
| #define GEN_test_VFNMADDSS_xmm(_name) \ |
| GEN_test(_name##_xmm, \ |
| "vfnmaddss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \ |
| GEN_test(_name##_xmm_src_dst, \ |
| "vfnmaddss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \ |
| GEN_test(_name##_xmm_mem1, \ |
| "vfnmaddss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \ |
| GEN_test(_name##_xmm_mem2, \ |
| "vfnmaddss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0); |
| GEN_test_VFNMADDSS_xmm(VFNMADDSS) |
| |
| #define GEN_test_VFNMSUBPD_xmm(_name) \ |
| GEN_test(_name##_xmm, \ |
| "vfnmsubpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \ |
| GEN_test(_name##_xmm_src_dst, \ |
| "vfnmsubpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \ |
| GEN_test(_name##_xmm_mem1, \ |
| "vfnmsubpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \ |
| GEN_test(_name##_xmm_mem2, \ |
| "vfnmsubpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1); |
| GEN_test_VFNMSUBPD_xmm(VFNMSUBPD) |
| |
| #define GEN_test_VFNMSUBPD_ymm(_name) \ |
| GEN_test(_name##_ymm, \ |
| "vfnmsubpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \ |
| GEN_test(_name##_ymm_src_dst, \ |
| "vfnmsubpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \ |
| GEN_test(_name##_ymm_mem1, \ |
| "vfnmsubpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \ |
| GEN_test(_name##_ymm_mem2, \ |
| "vfnmsubpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1); |
| GEN_test_VFNMSUBPD_ymm(VFNMSUBPD) |
| |
| #define GEN_test_VFNMSUBPS_xmm(_name) \ |
| GEN_test(_name##_xmm, \ |
| "vfnmsubps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \ |
| GEN_test(_name##_xmm_src_dst, \ |
| "vfnmsubps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \ |
| GEN_test(_name##_xmm_mem1, \ |
| "vfnmsubps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \ |
| GEN_test(_name##_xmm_mem2, \ |
| "vfnmsubps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0); |
| GEN_test_VFNMSUBPS_xmm(VFNMSUBPS) |
| |
| #define GEN_test_VFNMSUBPS_ymm(_name) \ |
| GEN_test(_name##_ymm, \ |
| "vfnmsubps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \ |
| GEN_test(_name##_ymm_src_dst, \ |
| "vfnmsubps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \ |
| GEN_test(_name##_ymm_mem1, \ |
| "vfnmsubps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \ |
| GEN_test(_name##_ymm_mem2, \ |
| "vfnmsubps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0); |
| GEN_test_VFNMSUBPS_ymm(VFNMSUBPS) |
| |
| #define GEN_test_VFNMSUBSD_xmm(_name) \ |
| GEN_test(_name##_xmm, \ |
| "vfnmsubsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \ |
| GEN_test(_name##_xmm_src_dst, \ |
| "vfnmsubsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \ |
| GEN_test(_name##_xmm_mem1, \ |
| "vfnmsubsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \ |
| GEN_test(_name##_xmm_mem2, \ |
| "vfnmsubsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1); |
| GEN_test_VFNMSUBSD_xmm(VFNMSUBSD) |
| |
| #define GEN_test_VFNMSUBSS_xmm(_name) \ |
| GEN_test(_name##_xmm, \ |
| "vfnmsubss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \ |
| GEN_test(_name##_xmm_src_dst, \ |
| "vfnmsubss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \ |
| GEN_test(_name##_xmm_mem1, \ |
| "vfnmsubss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \ |
| GEN_test(_name##_xmm_mem2, \ |
| "vfnmsubss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0); |
| GEN_test_VFNMSUBSS_xmm(VFNMSUBSS) |
| |
| #define DO_test_block(_name, _sub, _bname, _block) \ |
| test_##_name##_##_sub(_bname,_block); |
| |
| #define DO_test(_name, _sub, _isD) { \ |
| Block* b = memalign32(sizeof(Block)); \ |
| oneBlock(b); \ |
| DO_test_block(_name, _sub, "ones", b); \ |
| if (_isD) { \ |
| specialDBlock(b); \ |
| DO_test_block(_name, _sub, "specialD", b); \ |
| } else { \ |
| specialFBlock(b); \ |
| DO_test_block(_name, _sub, "specialF", b); \ |
| } \ |
| randBlock(b); \ |
| DO_test_block(_name, _sub, "rand", b); \ |
| free(b); \ |
| } |
| |
| #define DO_tests_xmm(_name,_isD) \ |
| DO_test(_name, xmm, _isD); \ |
| DO_test(_name, xmm_src_dst, _isD); \ |
| DO_test(_name, xmm_mem1, _isD); \ |
| DO_test(_name, xmm_mem2, _isD); |
| |
| #define DO_tests_ymm(_name,_isD) \ |
| DO_test(_name, ymm, _isD); \ |
| DO_test(_name, ymm_src_dst, _isD); \ |
| DO_test(_name, ymm_mem1, _isD); \ |
| DO_test(_name, ymm_mem2, _isD); |
| |
| int main ( void ) |
| { |
| init_special_values(); |
| |
| // 128 |
| DO_tests_xmm(VFMADDPD, 1); |
| DO_tests_xmm(VFMADDPS, 0); |
| DO_tests_xmm(VFMADDSD, 1); |
| DO_tests_xmm(VFMADDSS, 0); |
| DO_tests_xmm(VFMADDSUBPD, 1); |
| DO_tests_xmm(VFMADDSUBPS, 0); |
| DO_tests_xmm(VFMSUBADDPD, 1); |
| DO_tests_xmm(VFMSUBADDPS, 0); |
| DO_tests_xmm(VFMSUBPD, 1); |
| DO_tests_xmm(VFMSUBPS, 0); |
| DO_tests_xmm(VFMSUBSD, 1); |
| DO_tests_xmm(VFMSUBSS, 0); |
| DO_tests_xmm(VFNMADDPD, 1); |
| DO_tests_xmm(VFNMADDPS, 0); |
| DO_tests_xmm(VFNMADDSD, 1); |
| DO_tests_xmm(VFNMADDSS, 0); |
| DO_tests_xmm(VFNMSUBPD, 1); |
| DO_tests_xmm(VFNMSUBPS, 0); |
| DO_tests_xmm(VFNMSUBSD, 1); |
| DO_tests_xmm(VFNMSUBSS, 0); |
| |
| // 256 |
| /* |
| DO_tests_ymm(VFMADDPD, 1); |
| DO_tests_ymm(VFMADDPS, 0); |
| DO_tests_ymm(VFMADDSUBPD, 1); |
| DO_tests_ymm(VFMADDSUBPS, 0); |
| DO_tests_ymm(VFMSUBADDPD, 1); |
| DO_tests_ymm(VFMSUBADDPS, 0); |
| DO_tests_ymm(VFMSUBPD, 1); |
| DO_tests_ymm(VFMSUBPS, 0); |
| DO_tests_ymm(VFNMADDPD, 1); |
| DO_tests_ymm(VFNMADDPS, 0); |
| DO_tests_ymm(VFNMSUBPD, 1); |
| DO_tests_ymm(VFNMSUBPS, 0); |
| */ |
| |
| return 0; |
| } |