| Adam Nemet | 9a3ea60 | 2014-07-28 17:14:38 +0000 | [diff] [blame] | 1 | /*===---- avx512fintrin.h - AVX2 intrinsics --------------------------------=== | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 2 | * | 
|  | 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy | 
|  | 4 | * of this software and associated documentation files (the "Software"), to deal | 
|  | 5 | * in the Software without restriction, including without limitation the rights | 
|  | 6 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | 
|  | 7 | * copies of the Software, and to permit persons to whom the Software is | 
|  | 8 | * furnished to do so, subject to the following conditions: | 
|  | 9 | * | 
|  | 10 | * The above copyright notice and this permission notice shall be included in | 
|  | 11 | * all copies or substantial portions of the Software. | 
|  | 12 | * | 
|  | 13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 
|  | 14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 
|  | 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | 
|  | 16 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 
|  | 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | 
|  | 18 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | 
|  | 19 | * THE SOFTWARE. | 
|  | 20 | * | 
|  | 21 | *===-----------------------------------------------------------------------=== | 
|  | 22 | */ | 
|  | 23 | #ifndef __IMMINTRIN_H | 
|  | 24 | #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead." | 
|  | 25 | #endif | 
|  | 26 |  | 
|  | 27 | #ifndef __AVX512FINTRIN_H | 
|  | 28 | #define __AVX512FINTRIN_H | 
|  | 29 |  | 
|  | 30 | typedef double __v8df __attribute__((__vector_size__(64))); | 
|  | 31 | typedef float __v16sf __attribute__((__vector_size__(64))); | 
|  | 32 | typedef long long __v8di __attribute__((__vector_size__(64))); | 
|  | 33 | typedef int __v16si __attribute__((__vector_size__(64))); | 
|  | 34 |  | 
|  | 35 | typedef float __m512 __attribute__((__vector_size__(64))); | 
|  | 36 | typedef double __m512d __attribute__((__vector_size__(64))); | 
|  | 37 | typedef long long __m512i __attribute__((__vector_size__(64))); | 
|  | 38 |  | 
|  | 39 | typedef unsigned char __mmask8; | 
|  | 40 | typedef unsigned short __mmask16; | 
|  | 41 |  | 
|  | 42 | /* Rounding mode macros.  */ | 
|  | 43 | #define _MM_FROUND_TO_NEAREST_INT   0x00 | 
|  | 44 | #define _MM_FROUND_TO_NEG_INF       0x01 | 
|  | 45 | #define _MM_FROUND_TO_POS_INF       0x02 | 
|  | 46 | #define _MM_FROUND_TO_ZERO          0x03 | 
|  | 47 | #define _MM_FROUND_CUR_DIRECTION    0x04 | 
|  | 48 |  | 
| Adam Nemet | 0d5bb55 | 2014-07-28 17:14:40 +0000 | [diff] [blame] | 49 | /* Create vectors with repeated elements */ | 
|  | 50 |  | 
|  | 51 | static  __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 52 | _mm512_setzero_si512(void) | 
|  | 53 | { | 
|  | 54 | return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; | 
|  | 55 | } | 
|  | 56 |  | 
|  | 57 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 58 | _mm512_maskz_set1_epi32(__mmask16 __M, int __A) | 
|  | 59 | { | 
|  | 60 | return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, | 
|  | 61 | (__v16si) | 
|  | 62 | _mm512_setzero_si512 (), | 
|  | 63 | __M); | 
|  | 64 | } | 
|  | 65 |  | 
|  | 66 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 67 | _mm512_maskz_set1_epi64(__mmask8 __M, long long __A) | 
|  | 68 | { | 
|  | 69 | #ifdef __x86_64__ | 
|  | 70 | return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, | 
|  | 71 | (__v8di) | 
|  | 72 | _mm512_setzero_si512 (), | 
|  | 73 | __M); | 
|  | 74 | #else | 
|  | 75 | return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, | 
|  | 76 | (__v8di) | 
|  | 77 | _mm512_setzero_si512 (), | 
|  | 78 | __M); | 
|  | 79 | #endif | 
|  | 80 | } | 
|  | 81 |  | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 82 | static __inline __m512 __attribute__ ((__always_inline__, __nodebug__)) | 
| Adam Nemet | 9a3ea60 | 2014-07-28 17:14:38 +0000 | [diff] [blame] | 83 | _mm512_setzero_ps(void) | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 84 | { | 
|  | 85 | return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, | 
|  | 86 | 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; | 
|  | 87 | } | 
|  | 88 | static  __inline __m512d __attribute__ ((__always_inline__, __nodebug__)) | 
| Adam Nemet | 9a3ea60 | 2014-07-28 17:14:38 +0000 | [diff] [blame] | 89 | _mm512_setzero_pd(void) | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 90 | { | 
|  | 91 | return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; | 
|  | 92 | } | 
| Adam Nemet | 0d5bb55 | 2014-07-28 17:14:40 +0000 | [diff] [blame] | 93 |  | 
| Adam Nemet | f42e7a2 | 2014-07-30 16:51:22 +0000 | [diff] [blame] | 94 | static __inline __m512 __attribute__((__always_inline__, __nodebug__)) | 
|  | 95 | _mm512_set1_ps(float __w) | 
|  | 96 | { | 
|  | 97 | return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w, | 
|  | 98 | __w, __w, __w, __w, __w, __w, __w, __w  }; | 
|  | 99 | } | 
|  | 100 |  | 
|  | 101 | static __inline __m512d __attribute__((__always_inline__, __nodebug__)) | 
|  | 102 | _mm512_set1_pd(double __w) | 
|  | 103 | { | 
|  | 104 | return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w }; | 
|  | 105 | } | 
|  | 106 |  | 
|  | 107 | static __inline __m512i __attribute__((__always_inline__, __nodebug__)) | 
|  | 108 | _mm512_set1_epi32(int __s) | 
|  | 109 | { | 
|  | 110 | return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s, | 
|  | 111 | __s, __s, __s, __s, __s, __s, __s, __s }; | 
|  | 112 | } | 
|  | 113 |  | 
|  | 114 | static __inline __m512i __attribute__((__always_inline__, __nodebug__)) | 
|  | 115 | _mm512_set1_epi64(long long __d) | 
|  | 116 | { | 
|  | 117 | return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d }; | 
|  | 118 | } | 
|  | 119 |  | 
| Adam Nemet | 4abc07c | 2014-08-13 00:29:01 +0000 | [diff] [blame] | 120 | static __inline__ __m512 __attribute__((__always_inline__, __nodebug__)) | 
|  | 121 | _mm512_broadcastss_ps(__m128 __X) | 
|  | 122 | { | 
|  | 123 | float __f = __X[0]; | 
|  | 124 | return (__v16sf){ __f, __f, __f, __f, | 
|  | 125 | __f, __f, __f, __f, | 
|  | 126 | __f, __f, __f, __f, | 
|  | 127 | __f, __f, __f, __f }; | 
|  | 128 | } | 
|  | 129 |  | 
|  | 130 | static __inline__ __m512d __attribute__((__always_inline__, __nodebug__)) | 
|  | 131 | _mm512_broadcastsd_pd(__m128d __X) | 
|  | 132 | { | 
|  | 133 | double __d = __X[0]; | 
|  | 134 | return (__v8df){ __d, __d, __d, __d, | 
|  | 135 | __d, __d, __d, __d }; | 
|  | 136 | } | 
|  | 137 |  | 
| Adam Nemet | c871ff9 | 2014-07-30 16:51:24 +0000 | [diff] [blame] | 138 | /* Cast between vector types */ | 
|  | 139 |  | 
|  | 140 | static __inline __m512d __attribute__((__always_inline__, __nodebug__)) | 
|  | 141 | _mm512_castpd256_pd512(__m256d __a) | 
|  | 142 | { | 
|  | 143 | return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1); | 
|  | 144 | } | 
|  | 145 |  | 
|  | 146 | static __inline __m512 __attribute__((__always_inline__, __nodebug__)) | 
|  | 147 | _mm512_castps256_ps512(__m256 __a) | 
|  | 148 | { | 
|  | 149 | return __builtin_shufflevector(__a, __a, 0,  1,  2,  3,  4,  5,  6,  7, | 
|  | 150 | -1, -1, -1, -1, -1, -1, -1, -1); | 
|  | 151 | } | 
|  | 152 |  | 
|  | 153 | static __inline __m128d __attribute__((__always_inline__, __nodebug__)) | 
|  | 154 | _mm512_castpd512_pd128(__m512d __a) | 
|  | 155 | { | 
|  | 156 | return __builtin_shufflevector(__a, __a, 0, 1); | 
|  | 157 | } | 
|  | 158 |  | 
|  | 159 | static __inline __m128 __attribute__((__always_inline__, __nodebug__)) | 
|  | 160 | _mm512_castps512_ps128(__m512 __a) | 
|  | 161 | { | 
|  | 162 | return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); | 
|  | 163 | } | 
|  | 164 |  | 
| Adam Nemet | 0d5bb55 | 2014-07-28 17:14:40 +0000 | [diff] [blame] | 165 | /* Arithmetic */ | 
|  | 166 |  | 
| Adam Nemet | a3ebe62 | 2014-07-28 17:14:42 +0000 | [diff] [blame] | 167 | static __inline __m512d __attribute__((__always_inline__, __nodebug__)) | 
|  | 168 | _mm512_add_pd(__m512d __a, __m512d __b) | 
|  | 169 | { | 
|  | 170 | return __a + __b; | 
|  | 171 | } | 
|  | 172 |  | 
|  | 173 | static __inline __m512 __attribute__((__always_inline__, __nodebug__)) | 
|  | 174 | _mm512_add_ps(__m512 __a, __m512 __b) | 
|  | 175 | { | 
|  | 176 | return __a + __b; | 
|  | 177 | } | 
|  | 178 |  | 
|  | 179 | static __inline __m512d __attribute__((__always_inline__, __nodebug__)) | 
|  | 180 | _mm512_mul_pd(__m512d __a, __m512d __b) | 
|  | 181 | { | 
|  | 182 | return __a * __b; | 
|  | 183 | } | 
|  | 184 |  | 
|  | 185 | static __inline __m512 __attribute__((__always_inline__, __nodebug__)) | 
|  | 186 | _mm512_mul_ps(__m512 __a, __m512 __b) | 
|  | 187 | { | 
|  | 188 | return __a * __b; | 
|  | 189 | } | 
|  | 190 |  | 
|  | 191 | static __inline __m512d __attribute__((__always_inline__, __nodebug__)) | 
|  | 192 | _mm512_sub_pd(__m512d __a, __m512d __b) | 
|  | 193 | { | 
|  | 194 | return __a - __b; | 
|  | 195 | } | 
|  | 196 |  | 
|  | 197 | static __inline __m512 __attribute__((__always_inline__, __nodebug__)) | 
|  | 198 | _mm512_sub_ps(__m512 __a, __m512 __b) | 
|  | 199 | { | 
|  | 200 | return __a - __b; | 
|  | 201 | } | 
|  | 202 |  | 
| Adam Nemet | 0d5bb55 | 2014-07-28 17:14:40 +0000 | [diff] [blame] | 203 | static  __inline__ __m512d __attribute__((__always_inline__, __nodebug__)) | 
|  | 204 | _mm512_max_pd(__m512d __A, __m512d __B) | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 205 | { | 
| Adam Nemet | 0d5bb55 | 2014-07-28 17:14:40 +0000 | [diff] [blame] | 206 | return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, | 
|  | 207 | (__v8df) __B, | 
|  | 208 | (__v8df) | 
|  | 209 | _mm512_setzero_pd (), | 
|  | 210 | (__mmask8) -1, | 
|  | 211 | _MM_FROUND_CUR_DIRECTION); | 
|  | 212 | } | 
|  | 213 |  | 
|  | 214 | static  __inline__ __m512 __attribute__((__always_inline__, __nodebug__)) | 
|  | 215 | _mm512_max_ps(__m512 __A, __m512 __B) | 
|  | 216 | { | 
|  | 217 | return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, | 
|  | 218 | (__v16sf) __B, | 
|  | 219 | (__v16sf) | 
|  | 220 | _mm512_setzero_ps (), | 
|  | 221 | (__mmask16) -1, | 
|  | 222 | _MM_FROUND_CUR_DIRECTION); | 
|  | 223 | } | 
|  | 224 |  | 
|  | 225 | static __inline __m512i | 
|  | 226 | __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 227 | _mm512_max_epi32(__m512i __A, __m512i __B) | 
|  | 228 | { | 
|  | 229 | return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, | 
|  | 230 | (__v16si) __B, | 
|  | 231 | (__v16si) | 
|  | 232 | _mm512_setzero_si512 (), | 
|  | 233 | (__mmask16) -1); | 
|  | 234 | } | 
|  | 235 |  | 
|  | 236 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 237 | _mm512_max_epu32(__m512i __A, __m512i __B) | 
|  | 238 | { | 
|  | 239 | return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, | 
|  | 240 | (__v16si) __B, | 
|  | 241 | (__v16si) | 
|  | 242 | _mm512_setzero_si512 (), | 
|  | 243 | (__mmask16) -1); | 
|  | 244 | } | 
|  | 245 |  | 
|  | 246 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 247 | _mm512_max_epi64(__m512i __A, __m512i __B) | 
|  | 248 | { | 
|  | 249 | return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, | 
|  | 250 | (__v8di) __B, | 
|  | 251 | (__v8di) | 
|  | 252 | _mm512_setzero_si512 (), | 
|  | 253 | (__mmask8) -1); | 
|  | 254 | } | 
|  | 255 |  | 
|  | 256 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 257 | _mm512_max_epu64(__m512i __A, __m512i __B) | 
|  | 258 | { | 
|  | 259 | return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, | 
|  | 260 | (__v8di) __B, | 
|  | 261 | (__v8di) | 
|  | 262 | _mm512_setzero_si512 (), | 
|  | 263 | (__mmask8) -1); | 
|  | 264 | } | 
|  | 265 |  | 
|  | 266 | static  __inline__ __m512d __attribute__((__always_inline__, __nodebug__)) | 
|  | 267 | _mm512_min_pd(__m512d __A, __m512d __B) | 
|  | 268 | { | 
|  | 269 | return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, | 
|  | 270 | (__v8df) __B, | 
|  | 271 | (__v8df) | 
|  | 272 | _mm512_setzero_pd (), | 
|  | 273 | (__mmask8) -1, | 
|  | 274 | _MM_FROUND_CUR_DIRECTION); | 
|  | 275 | } | 
|  | 276 |  | 
|  | 277 | static  __inline__ __m512 __attribute__((__always_inline__, __nodebug__)) | 
|  | 278 | _mm512_min_ps(__m512 __A, __m512 __B) | 
|  | 279 | { | 
|  | 280 | return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, | 
|  | 281 | (__v16sf) __B, | 
|  | 282 | (__v16sf) | 
|  | 283 | _mm512_setzero_ps (), | 
|  | 284 | (__mmask16) -1, | 
|  | 285 | _MM_FROUND_CUR_DIRECTION); | 
|  | 286 | } | 
|  | 287 |  | 
|  | 288 | static __inline __m512i | 
|  | 289 | __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 290 | _mm512_min_epi32(__m512i __A, __m512i __B) | 
|  | 291 | { | 
|  | 292 | return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, | 
|  | 293 | (__v16si) __B, | 
|  | 294 | (__v16si) | 
|  | 295 | _mm512_setzero_si512 (), | 
|  | 296 | (__mmask16) -1); | 
|  | 297 | } | 
|  | 298 |  | 
|  | 299 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 300 | _mm512_min_epu32(__m512i __A, __m512i __B) | 
|  | 301 | { | 
|  | 302 | return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, | 
|  | 303 | (__v16si) __B, | 
|  | 304 | (__v16si) | 
|  | 305 | _mm512_setzero_si512 (), | 
|  | 306 | (__mmask16) -1); | 
|  | 307 | } | 
|  | 308 |  | 
|  | 309 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 310 | _mm512_min_epi64(__m512i __A, __m512i __B) | 
|  | 311 | { | 
|  | 312 | return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, | 
|  | 313 | (__v8di) __B, | 
|  | 314 | (__v8di) | 
|  | 315 | _mm512_setzero_si512 (), | 
|  | 316 | (__mmask8) -1); | 
|  | 317 | } | 
|  | 318 |  | 
|  | 319 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 320 | _mm512_min_epu64(__m512i __A, __m512i __B) | 
|  | 321 | { | 
|  | 322 | return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, | 
|  | 323 | (__v8di) __B, | 
|  | 324 | (__v8di) | 
|  | 325 | _mm512_setzero_si512 (), | 
|  | 326 | (__mmask8) -1); | 
|  | 327 | } | 
|  | 328 |  | 
|  | 329 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 330 | _mm512_mul_epi32(__m512i __X, __m512i __Y) | 
|  | 331 | { | 
|  | 332 | return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, | 
|  | 333 | (__v16si) __Y, | 
|  | 334 | (__v8di) | 
|  | 335 | _mm512_setzero_si512 (), | 
|  | 336 | (__mmask8) -1); | 
|  | 337 | } | 
|  | 338 |  | 
|  | 339 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 340 | _mm512_mul_epu32(__m512i __X, __m512i __Y) | 
|  | 341 | { | 
|  | 342 | return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, | 
|  | 343 | (__v16si) __Y, | 
|  | 344 | (__v8di) | 
|  | 345 | _mm512_setzero_si512 (), | 
|  | 346 | (__mmask8) -1); | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 347 | } | 
|  | 348 |  | 
|  | 349 | static  __inline__ __m512d __attribute__((__always_inline__, __nodebug__)) | 
|  | 350 | _mm512_sqrt_pd(__m512d a) | 
|  | 351 | { | 
|  | 352 | return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)a, | 
|  | 353 | (__v8df) _mm512_setzero_pd (), | 
|  | 354 | (__mmask8) -1, | 
|  | 355 | _MM_FROUND_CUR_DIRECTION); | 
|  | 356 | } | 
|  | 357 |  | 
|  | 358 | static  __inline__ __m512 __attribute__((__always_inline__, __nodebug__)) | 
|  | 359 | _mm512_sqrt_ps(__m512 a) | 
|  | 360 | { | 
|  | 361 | return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)a, | 
|  | 362 | (__v16sf) _mm512_setzero_ps (), | 
|  | 363 | (__mmask16) -1, | 
|  | 364 | _MM_FROUND_CUR_DIRECTION); | 
|  | 365 | } | 
|  | 366 |  | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 367 | static  __inline__ __m512d __attribute__((__always_inline__, __nodebug__)) | 
|  | 368 | _mm512_rsqrt14_pd(__m512d __A) | 
|  | 369 | { | 
|  | 370 | return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, | 
|  | 371 | (__v8df) | 
|  | 372 | _mm512_setzero_pd (), | 
|  | 373 | (__mmask8) -1);} | 
|  | 374 |  | 
|  | 375 | static  __inline__ __m512 __attribute__((__always_inline__, __nodebug__)) | 
|  | 376 | _mm512_rsqrt14_ps(__m512 __A) | 
|  | 377 | { | 
|  | 378 | return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, | 
|  | 379 | (__v16sf) | 
|  | 380 | _mm512_setzero_ps (), | 
|  | 381 | (__mmask16) -1); | 
|  | 382 | } | 
|  | 383 |  | 
|  | 384 | static  __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | 
|  | 385 | _mm_rsqrt14_ss(__m128 __A, __m128 __B) | 
|  | 386 | { | 
|  | 387 | return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, | 
|  | 388 | (__v4sf) __B, | 
|  | 389 | (__v4sf) | 
|  | 390 | _mm_setzero_ps (), | 
|  | 391 | (__mmask8) -1); | 
|  | 392 | } | 
|  | 393 |  | 
|  | 394 | static  __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) | 
|  | 395 | _mm_rsqrt14_sd(__m128d __A, __m128d __B) | 
|  | 396 | { | 
|  | 397 | return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A, | 
|  | 398 | (__v2df) __B, | 
|  | 399 | (__v2df) | 
|  | 400 | _mm_setzero_pd (), | 
|  | 401 | (__mmask8) -1); | 
|  | 402 | } | 
|  | 403 |  | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 404 | static  __inline__ __m512d __attribute__((__always_inline__, __nodebug__)) | 
|  | 405 | _mm512_rcp14_pd(__m512d __A) | 
|  | 406 | { | 
|  | 407 | return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, | 
|  | 408 | (__v8df) | 
|  | 409 | _mm512_setzero_pd (), | 
|  | 410 | (__mmask8) -1); | 
|  | 411 | } | 
|  | 412 |  | 
|  | 413 | static  __inline__ __m512 __attribute__((__always_inline__, __nodebug__)) | 
|  | 414 | _mm512_rcp14_ps(__m512 __A) | 
|  | 415 | { | 
|  | 416 | return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, | 
|  | 417 | (__v16sf) | 
|  | 418 | _mm512_setzero_ps (), | 
|  | 419 | (__mmask16) -1); | 
|  | 420 | } | 
|  | 421 | static  __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | 
| Adam Nemet | 9a3ea60 | 2014-07-28 17:14:38 +0000 | [diff] [blame] | 422 | _mm_rcp14_ss(__m128 __A, __m128 __B) | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 423 | { | 
|  | 424 | return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, | 
|  | 425 | (__v4sf) __B, | 
|  | 426 | (__v4sf) | 
|  | 427 | _mm_setzero_ps (), | 
|  | 428 | (__mmask8) -1); | 
|  | 429 | } | 
|  | 430 |  | 
|  | 431 | static  __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) | 
| Adam Nemet | 9a3ea60 | 2014-07-28 17:14:38 +0000 | [diff] [blame] | 432 | _mm_rcp14_sd(__m128d __A, __m128d __B) | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 433 | { | 
|  | 434 | return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A, | 
|  | 435 | (__v2df) __B, | 
|  | 436 | (__v2df) | 
|  | 437 | _mm_setzero_pd (), | 
|  | 438 | (__mmask8) -1); | 
|  | 439 | } | 
|  | 440 |  | 
| Adam Nemet | 0d5bb55 | 2014-07-28 17:14:40 +0000 | [diff] [blame] | 441 | static __inline __m512 __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 442 | _mm512_floor_ps(__m512 __A) | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 443 | { | 
| Adam Nemet | 0d5bb55 | 2014-07-28 17:14:40 +0000 | [diff] [blame] | 444 | return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, | 
|  | 445 | _MM_FROUND_FLOOR, | 
|  | 446 | (__v16sf) __A, -1, | 
|  | 447 | _MM_FROUND_CUR_DIRECTION); | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 448 | } | 
|  | 449 |  | 
| Adam Nemet | 0d5bb55 | 2014-07-28 17:14:40 +0000 | [diff] [blame] | 450 | static __inline __m512d __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 451 | _mm512_floor_pd(__m512d __A) | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 452 | { | 
| Adam Nemet | 0d5bb55 | 2014-07-28 17:14:40 +0000 | [diff] [blame] | 453 | return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, | 
|  | 454 | _MM_FROUND_FLOOR, | 
|  | 455 | (__v8df) __A, -1, | 
|  | 456 | _MM_FROUND_CUR_DIRECTION); | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 457 | } | 
|  | 458 |  | 
| Adam Nemet | 0d5bb55 | 2014-07-28 17:14:40 +0000 | [diff] [blame] | 459 | static __inline __m512 __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 460 | _mm512_ceil_ps(__m512 __A) | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 461 | { | 
| Adam Nemet | 0d5bb55 | 2014-07-28 17:14:40 +0000 | [diff] [blame] | 462 | return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, | 
|  | 463 | _MM_FROUND_CEIL, | 
|  | 464 | (__v16sf) __A, -1, | 
|  | 465 | _MM_FROUND_CUR_DIRECTION); | 
|  | 466 | } | 
|  | 467 |  | 
|  | 468 | static __inline __m512d __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 469 | _mm512_ceil_pd(__m512d __A) | 
|  | 470 | { | 
|  | 471 | return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, | 
|  | 472 | _MM_FROUND_CEIL, | 
|  | 473 | (__v8df) __A, -1, | 
|  | 474 | _MM_FROUND_CUR_DIRECTION); | 
|  | 475 | } | 
|  | 476 |  | 
|  | 477 | static __inline __m512i __attribute__ (( __always_inline__, __nodebug__)) | 
|  | 478 | _mm512_abs_epi64(__m512i __A) | 
|  | 479 | { | 
|  | 480 | return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, | 
|  | 481 | (__v8di) | 
|  | 482 | _mm512_setzero_si512 (), | 
|  | 483 | (__mmask8) -1); | 
|  | 484 | } | 
|  | 485 |  | 
|  | 486 | static __inline __m512i __attribute__ (( __always_inline__, __nodebug__)) | 
|  | 487 | _mm512_abs_epi32(__m512i __A) | 
|  | 488 | { | 
|  | 489 | return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, | 
|  | 490 | (__v16si) | 
|  | 491 | _mm512_setzero_si512 (), | 
|  | 492 | (__mmask16) -1); | 
|  | 493 | } | 
|  | 494 |  | 
|  | 495 | static __inline __m512 __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 496 | _mm512_roundscale_ps(__m512 __A, const int __imm) | 
|  | 497 | { | 
|  | 498 | return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm, | 
|  | 499 | (__v16sf) __A, -1, | 
|  | 500 | _MM_FROUND_CUR_DIRECTION); | 
|  | 501 | } | 
|  | 502 | static __inline __m512d __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 503 | _mm512_roundscale_pd(__m512d __A, const int __imm) | 
|  | 504 | { | 
|  | 505 | return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm, | 
|  | 506 | (__v8df) __A, -1, | 
|  | 507 | _MM_FROUND_CUR_DIRECTION); | 
|  | 508 | } | 
|  | 509 |  | 
| Adam Nemet | 2278fcb | 2014-08-14 17:17:57 +0000 | [diff] [blame] | 510 | static __inline__ __m512d __attribute__((__always_inline__, __nodebug__)) | 
|  | 511 | _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) | 
|  | 512 | { | 
|  | 513 | return (__m512d) | 
|  | 514 | __builtin_ia32_vfmaddpd512_mask(__A, | 
|  | 515 | __B, | 
|  | 516 | __C, | 
|  | 517 | (__mmask8) -1, | 
|  | 518 | _MM_FROUND_CUR_DIRECTION); | 
|  | 519 | } | 
|  | 520 |  | 
|  | 521 | static __inline__ __m512d __attribute__((__always_inline__, __nodebug__)) | 
|  | 522 | _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) | 
|  | 523 | { | 
|  | 524 | return (__m512d) | 
|  | 525 | __builtin_ia32_vfmsubpd512_mask(__A, | 
|  | 526 | __B, | 
|  | 527 | __C, | 
|  | 528 | (__mmask8) -1, | 
|  | 529 | _MM_FROUND_CUR_DIRECTION); | 
|  | 530 | } | 
|  | 531 |  | 
|  | 532 | static __inline__ __m512d __attribute__((__always_inline__, __nodebug__)) | 
|  | 533 | _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) | 
|  | 534 | { | 
|  | 535 | return (__m512d) | 
|  | 536 | __builtin_ia32_vfnmaddpd512_mask(__A, | 
|  | 537 | __B, | 
|  | 538 | __C, | 
|  | 539 | (__mmask8) -1, | 
|  | 540 | _MM_FROUND_CUR_DIRECTION); | 
|  | 541 | } | 
|  | 542 |  | 
|  | 543 | static __inline__ __m512 __attribute__((__always_inline__, __nodebug__)) | 
|  | 544 | _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) | 
|  | 545 | { | 
|  | 546 | return (__m512) | 
|  | 547 | __builtin_ia32_vfmaddps512_mask(__A, | 
|  | 548 | __B, | 
|  | 549 | __C, | 
|  | 550 | (__mmask16) -1, | 
|  | 551 | _MM_FROUND_CUR_DIRECTION); | 
|  | 552 | } | 
|  | 553 |  | 
|  | 554 | static __inline__ __m512 __attribute__((__always_inline__, __nodebug__)) | 
|  | 555 | _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) | 
|  | 556 | { | 
|  | 557 | return (__m512) | 
|  | 558 | __builtin_ia32_vfmsubps512_mask(__A, | 
|  | 559 | __B, | 
|  | 560 | __C, | 
|  | 561 | (__mmask16) -1, | 
|  | 562 | _MM_FROUND_CUR_DIRECTION); | 
|  | 563 | } | 
|  | 564 |  | 
|  | 565 | static __inline__ __m512 __attribute__((__always_inline__, __nodebug__)) | 
|  | 566 | _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) | 
|  | 567 | { | 
|  | 568 | return (__m512) | 
|  | 569 | __builtin_ia32_vfnmaddps512_mask(__A, | 
|  | 570 | __B, | 
|  | 571 | __C, | 
|  | 572 | (__mmask16) -1, | 
|  | 573 | _MM_FROUND_CUR_DIRECTION); | 
|  | 574 | } | 
|  | 575 |  | 
| Adam Nemet | 0d5bb55 | 2014-07-28 17:14:40 +0000 | [diff] [blame] | 576 | /* Vector permutations */ | 
|  | 577 |  | 
|  | 578 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 579 | _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) | 
|  | 580 | { | 
|  | 581 | return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I | 
|  | 582 | /* idx */ , | 
|  | 583 | (__v16si) __A, | 
|  | 584 | (__v16si) __B, | 
|  | 585 | (__mmask16) -1); | 
|  | 586 | } | 
|  | 587 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 588 | _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B) | 
|  | 589 | { | 
|  | 590 | return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I | 
|  | 591 | /* idx */ , | 
|  | 592 | (__v8di) __A, | 
|  | 593 | (__v8di) __B, | 
|  | 594 | (__mmask8) -1); | 
|  | 595 | } | 
|  | 596 |  | 
|  | 597 | static __inline __m512d __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 598 | _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B) | 
|  | 599 | { | 
|  | 600 | return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I | 
|  | 601 | /* idx */ , | 
|  | 602 | (__v8df) __A, | 
|  | 603 | (__v8df) __B, | 
|  | 604 | (__mmask8) -1); | 
|  | 605 | } | 
|  | 606 | static __inline __m512 __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 607 | _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) | 
|  | 608 | { | 
|  | 609 | return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I | 
|  | 610 | /* idx */ , | 
|  | 611 | (__v16sf) __A, | 
|  | 612 | (__v16sf) __B, | 
|  | 613 | (__mmask16) -1); | 
|  | 614 | } | 
|  | 615 |  | 
| Adam Nemet | 5bf7baa | 2014-08-05 17:28:23 +0000 | [diff] [blame] | 616 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 617 | _mm512_valign_epi64(__m512i __A, __m512i __B, const int __I) | 
|  | 618 | { | 
|  | 619 | return (__m512i) __builtin_ia32_alignq512_mask((__v8di)__A, | 
|  | 620 | (__v8di)__B, | 
|  | 621 | __I, | 
|  | 622 | (__v8di)_mm512_setzero_si512(), | 
|  | 623 | (__mmask8) -1); | 
|  | 624 | } | 
|  | 625 |  | 
|  | 626 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 627 | _mm512_valign_epi32(__m512i __A, __m512i __B, const int __I) | 
|  | 628 | { | 
|  | 629 | return (__m512i)__builtin_ia32_alignd512_mask((__v16si)__A, | 
|  | 630 | (__v16si)__B, | 
|  | 631 | __I, | 
|  | 632 | (__v16si)_mm512_setzero_si512(), | 
|  | 633 | (__mmask16) -1); | 
|  | 634 | } | 
|  | 635 |  | 
| Adam Nemet | f893ede | 2015-01-19 20:12:05 +0000 | [diff] [blame] | 636 | /* Vector Extract */ | 
|  | 637 |  | 
|  | 638 | #define _mm512_extractf64x4_pd(A, I) __extension__ ({                    \ | 
|  | 639 | __m512d __A = (A);                                                 \ | 
|  | 640 | (__m256d)                                                          \ | 
|  | 641 | __builtin_ia32_extractf64x4_mask((__v8df)__A,                    \ | 
|  | 642 | (I),                            \ | 
|  | 643 | (__v4df)_mm256_setzero_si256(), \ | 
|  | 644 | (__mmask8) -1); }) | 
|  | 645 |  | 
|  | 646 | #define _mm512_extractf32x4_ps(A, I) __extension__ ({                    \ | 
|  | 647 | __m512 __A = (A);                                                  \ | 
|  | 648 | (__m128)                                                           \ | 
|  | 649 | __builtin_ia32_extractf32x4_mask((__v16sf)__A,                   \ | 
|  | 650 | (I),                            \ | 
|  | 651 | (__v4sf)_mm_setzero_ps(),       \ | 
|  | 652 | (__mmask8) -1); }) | 
|  | 653 |  | 
| Adam Nemet | 0d5bb55 | 2014-07-28 17:14:40 +0000 | [diff] [blame] | 654 | /* Vector Blend */ | 
|  | 655 |  | 
|  | 656 | static __inline __m512d __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 657 | _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) | 
|  | 658 | { | 
|  | 659 | return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A, | 
|  | 660 | (__v8df) __W, | 
|  | 661 | (__mmask8) __U); | 
|  | 662 | } | 
|  | 663 |  | 
|  | 664 | static __inline __m512 __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 665 | _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) | 
|  | 666 | { | 
|  | 667 | return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A, | 
|  | 668 | (__v16sf) __W, | 
|  | 669 | (__mmask16) __U); | 
|  | 670 | } | 
|  | 671 |  | 
|  | 672 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 673 | _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) | 
|  | 674 | { | 
|  | 675 | return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A, | 
|  | 676 | (__v8di) __W, | 
|  | 677 | (__mmask8) __U); | 
|  | 678 | } | 
|  | 679 |  | 
|  | 680 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 681 | _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) | 
|  | 682 | { | 
|  | 683 | return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A, | 
|  | 684 | (__v16si) __W, | 
|  | 685 | (__mmask16) __U); | 
|  | 686 | } | 
|  | 687 |  | 
|  | 688 | /* Compare */ | 
|  | 689 |  | 
| Craig Topper | f557b09 | 2015-01-19 01:18:19 +0000 | [diff] [blame] | 690 | #define _mm512_cmp_ps_mask(a, b, p) __extension__ ({ \ | 
|  | 691 | __m512 __a = (a); \ | 
|  | 692 | __m512 __b = (b); \ | 
|  | 693 | (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)__a, (__v16sf)__b, (p), \ | 
|  | 694 | (__mmask16)-1,                   \ | 
|  | 695 | _MM_FROUND_CUR_DIRECTION); }) | 
| Adam Nemet | 0d5bb55 | 2014-07-28 17:14:40 +0000 | [diff] [blame] | 696 |  | 
| Craig Topper | f557b09 | 2015-01-19 01:18:19 +0000 | [diff] [blame] | 697 | #define _mm512_cmp_pd_mask(a, b, p) __extension__ ({ \ | 
|  | 698 | __m512 __a = (a); \ | 
|  | 699 | __m512 __b = (b); \ | 
|  | 700 | (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)__a, (__v8df)__b, (p), \ | 
|  | 701 | (__mmask8)-1,                 \ | 
|  | 702 | _MM_FROUND_CUR_DIRECTION); }) | 
| Adam Nemet | 0d5bb55 | 2014-07-28 17:14:40 +0000 | [diff] [blame] | 703 |  | 
|  | 704 | /* Conversion */ | 
|  | 705 |  | 
|  | 706 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 707 | _mm512_cvttps_epu32(__m512 __A) | 
|  | 708 | { | 
|  | 709 | return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, | 
|  | 710 | (__v16si) | 
|  | 711 | _mm512_setzero_si512 (), | 
|  | 712 | (__mmask16) -1, | 
|  | 713 | _MM_FROUND_CUR_DIRECTION); | 
|  | 714 | } | 
|  | 715 |  | 
|  | 716 | static __inline __m512 __attribute__ (( __always_inline__, __nodebug__)) | 
|  | 717 | _mm512_cvt_roundepi32_ps(__m512i __A, const int __R) | 
|  | 718 | { | 
|  | 719 | return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, | 
|  | 720 | (__v16sf) | 
|  | 721 | _mm512_setzero_ps (), | 
|  | 722 | (__mmask16) -1, | 
|  | 723 | __R); | 
|  | 724 | } | 
|  | 725 |  | 
|  | 726 | static __inline __m512 __attribute__ (( __always_inline__, __nodebug__)) | 
|  | 727 | _mm512_cvt_roundepu32_ps(__m512i __A, const int __R) | 
|  | 728 | { | 
|  | 729 | return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, | 
|  | 730 | (__v16sf) | 
|  | 731 | _mm512_setzero_ps (), | 
|  | 732 | (__mmask16) -1, | 
|  | 733 | __R); | 
|  | 734 | } | 
|  | 735 |  | 
|  | 736 | static __inline __m512d __attribute__ (( __always_inline__, __nodebug__)) | 
|  | 737 | _mm512_cvtepi32_pd(__m256i __A) | 
|  | 738 | { | 
|  | 739 | return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, | 
|  | 740 | (__v8df) | 
|  | 741 | _mm512_setzero_pd (), | 
|  | 742 | (__mmask8) -1); | 
|  | 743 | } | 
|  | 744 |  | 
|  | 745 | static __inline __m512d __attribute__ (( __always_inline__, __nodebug__)) | 
|  | 746 | _mm512_cvtepu32_pd(__m256i __A) | 
|  | 747 | { | 
|  | 748 | return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A, | 
|  | 749 | (__v8df) | 
|  | 750 | _mm512_setzero_pd (), | 
|  | 751 | (__mmask8) -1); | 
|  | 752 | } | 
|  | 753 | static __inline __m256 __attribute__ (( __always_inline__, __nodebug__)) | 
|  | 754 | _mm512_cvt_roundpd_ps(__m512d __A, const int __R) | 
|  | 755 | { | 
|  | 756 | return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, | 
|  | 757 | (__v8sf) | 
|  | 758 | _mm256_setzero_ps (), | 
|  | 759 | (__mmask8) -1, | 
|  | 760 | __R); | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 761 | } | 
|  | 762 |  | 
|  | 763 | static  __inline __m256i __attribute__ ((__always_inline__, __nodebug__)) | 
| Adam Nemet | 9a3ea60 | 2014-07-28 17:14:38 +0000 | [diff] [blame] | 764 | _mm512_cvtps_ph(__m512 __A, const int __I) | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 765 | { | 
|  | 766 | return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, | 
|  | 767 | __I, | 
|  | 768 | (__v16hi) | 
|  | 769 | _mm256_setzero_si256 (), | 
|  | 770 | -1); | 
|  | 771 | } | 
|  | 772 |  | 
|  | 773 | static  __inline __m512 __attribute__ ((__always_inline__, __nodebug__)) | 
| Adam Nemet | 9a3ea60 | 2014-07-28 17:14:38 +0000 | [diff] [blame] | 774 | _mm512_cvtph_ps(__m256i __A) | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 775 | { | 
|  | 776 | return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, | 
|  | 777 | (__v16sf) | 
|  | 778 | _mm512_setzero_ps (), | 
|  | 779 | (__mmask16) -1, | 
|  | 780 | _MM_FROUND_CUR_DIRECTION); | 
|  | 781 | } | 
|  | 782 |  | 
|  | 783 | static __inline __m512i __attribute__((__always_inline__, __nodebug__)) | 
|  | 784 | _mm512_cvttps_epi32(__m512 a) | 
|  | 785 | { | 
|  | 786 | return (__m512i) | 
|  | 787 | __builtin_ia32_cvttps2dq512_mask((__v16sf) a, | 
|  | 788 | (__v16si) _mm512_setzero_si512 (), | 
|  | 789 | (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); | 
|  | 790 | } | 
|  | 791 |  | 
|  | 792 | static __inline __m256i __attribute__((__always_inline__, __nodebug__)) | 
|  | 793 | _mm512_cvttpd_epi32(__m512d a) | 
|  | 794 | { | 
|  | 795 | return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) a, | 
|  | 796 | (__v8si)_mm256_setzero_si256(), | 
|  | 797 | (__mmask8) -1, | 
|  | 798 | _MM_FROUND_CUR_DIRECTION); | 
|  | 799 | } | 
|  | 800 |  | 
|  | 801 | static __inline __m256i __attribute__ ((__always_inline__, __nodebug__)) | 
| Adam Nemet | 9a3ea60 | 2014-07-28 17:14:38 +0000 | [diff] [blame] | 802 | _mm512_cvtt_roundpd_epi32(__m512d __A, const int __R) | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 803 | { | 
|  | 804 | return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, | 
|  | 805 | (__v8si) | 
|  | 806 | _mm256_setzero_si256 (), | 
|  | 807 | (__mmask8) -1, | 
|  | 808 | __R); | 
|  | 809 | } | 
|  | 810 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) | 
| Adam Nemet | 9a3ea60 | 2014-07-28 17:14:38 +0000 | [diff] [blame] | 811 | _mm512_cvtt_roundps_epi32(__m512 __A, const int __R) | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 812 | { | 
|  | 813 | return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, | 
|  | 814 | (__v16si) | 
|  | 815 | _mm512_setzero_si512 (), | 
|  | 816 | (__mmask16) -1, | 
|  | 817 | __R); | 
|  | 818 | } | 
|  | 819 |  | 
|  | 820 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) | 
| Adam Nemet | 9a3ea60 | 2014-07-28 17:14:38 +0000 | [diff] [blame] | 821 | _mm512_cvt_roundps_epi32(__m512 __A, const int __R) | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 822 | { | 
|  | 823 | return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, | 
|  | 824 | (__v16si) | 
|  | 825 | _mm512_setzero_si512 (), | 
|  | 826 | (__mmask16) -1, | 
|  | 827 | __R); | 
|  | 828 | } | 
|  | 829 | static __inline __m256i __attribute__ ((__always_inline__, __nodebug__)) | 
| Adam Nemet | 9a3ea60 | 2014-07-28 17:14:38 +0000 | [diff] [blame] | 830 | _mm512_cvt_roundpd_epi32(__m512d __A, const int __R) | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 831 | { | 
|  | 832 | return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, | 
|  | 833 | (__v8si) | 
|  | 834 | _mm256_setzero_si256 (), | 
|  | 835 | (__mmask8) -1, | 
|  | 836 | __R); | 
|  | 837 | } | 
|  | 838 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) | 
| Adam Nemet | 9a3ea60 | 2014-07-28 17:14:38 +0000 | [diff] [blame] | 839 | _mm512_cvt_roundps_epu32(__m512 __A, const int __R) | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 840 | { | 
|  | 841 | return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, | 
|  | 842 | (__v16si) | 
|  | 843 | _mm512_setzero_si512 (), | 
|  | 844 | (__mmask16) -1, | 
|  | 845 | __R); | 
|  | 846 | } | 
|  | 847 | static __inline __m256i __attribute__ ((__always_inline__, __nodebug__)) | 
| Adam Nemet | 9a3ea60 | 2014-07-28 17:14:38 +0000 | [diff] [blame] | 848 | _mm512_cvt_roundpd_epu32(__m512d __A, const int __R) | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 849 | { | 
|  | 850 | return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, | 
|  | 851 | (__v8si) | 
|  | 852 | _mm256_setzero_si256 (), | 
|  | 853 | (__mmask8) -1, | 
|  | 854 | __R); | 
|  | 855 | } | 
|  | 856 |  | 
| Adam Nemet | 63a951e | 2015-01-14 01:31:17 +0000 | [diff] [blame] | 857 | /* Unpack and Interleave */ | 
|  | 858 | static __inline __m512d __attribute__((__always_inline__, __nodebug__)) | 
|  | 859 | _mm512_unpackhi_pd(__m512d __a, __m512d __b) | 
|  | 860 | { | 
|  | 861 | return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); | 
|  | 862 | } | 
|  | 863 |  | 
|  | 864 | static __inline __m512d __attribute__((__always_inline__, __nodebug__)) | 
|  | 865 | _mm512_unpacklo_pd(__m512d __a, __m512d __b) | 
|  | 866 | { | 
|  | 867 | return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); | 
|  | 868 | } | 
|  | 869 |  | 
|  | 870 | static __inline __m512 __attribute__((__always_inline__, __nodebug__)) | 
|  | 871 | _mm512_unpackhi_ps(__m512 __a, __m512 __b) | 
|  | 872 | { | 
|  | 873 | return __builtin_shufflevector(__a, __b, | 
|  | 874 | 2,    18,    3,    19, | 
|  | 875 | 2+4,  18+4,  3+4,  19+4, | 
|  | 876 | 2+8,  18+8,  3+8,  19+8, | 
|  | 877 | 2+12, 18+12, 3+12, 19+12); | 
|  | 878 | } | 
|  | 879 |  | 
|  | 880 | static __inline __m512 __attribute__((__always_inline__, __nodebug__)) | 
|  | 881 | _mm512_unpacklo_ps(__m512 __a, __m512 __b) | 
|  | 882 | { | 
|  | 883 | return __builtin_shufflevector(__a, __b, | 
|  | 884 | 0,    16,    1,    17, | 
|  | 885 | 0+4,  16+4,  1+4,  17+4, | 
|  | 886 | 0+8,  16+8,  1+8,  17+8, | 
|  | 887 | 0+12, 16+12, 1+12, 17+12); | 
|  | 888 | } | 
|  | 889 |  | 
| Adam Nemet | 0d5bb55 | 2014-07-28 17:14:40 +0000 | [diff] [blame] | 890 | /* Bit Test */ | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 891 |  | 
|  | 892 | static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__)) | 
| Adam Nemet | 9a3ea60 | 2014-07-28 17:14:38 +0000 | [diff] [blame] | 893 | _mm512_test_epi32_mask(__m512i __A, __m512i __B) | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 894 | { | 
|  | 895 | return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A, | 
|  | 896 | (__v16si) __B, | 
|  | 897 | (__mmask16) -1); | 
|  | 898 | } | 
|  | 899 |  | 
|  | 900 | static __inline __mmask8 __attribute__ ((__always_inline__, __nodebug__)) | 
| Adam Nemet | 9a3ea60 | 2014-07-28 17:14:38 +0000 | [diff] [blame] | 901 | _mm512_test_epi64_mask(__m512i __A, __m512i __B) | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 902 | { | 
|  | 903 | return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, | 
|  | 904 | (__v8di) __B, | 
|  | 905 | (__mmask8) -1); | 
|  | 906 | } | 
|  | 907 |  | 
| Adam Nemet | 0d5bb55 | 2014-07-28 17:14:40 +0000 | [diff] [blame] | 908 | /* SIMD load ops */ | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 909 |  | 
|  | 910 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) | 
| Adam Nemet | 9a3ea60 | 2014-07-28 17:14:38 +0000 | [diff] [blame] | 911 | _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P) | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 912 | { | 
|  | 913 | return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P, | 
|  | 914 | (__v16si) | 
|  | 915 | _mm512_setzero_si512 (), | 
|  | 916 | (__mmask16) __U); | 
|  | 917 | } | 
|  | 918 |  | 
|  | 919 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) | 
| Adam Nemet | 9a3ea60 | 2014-07-28 17:14:38 +0000 | [diff] [blame] | 920 | _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P) | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 921 | { | 
|  | 922 | return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P, | 
|  | 923 | (__v8di) | 
|  | 924 | _mm512_setzero_si512 (), | 
|  | 925 | (__mmask8) __U); | 
|  | 926 | } | 
|  | 927 |  | 
|  | 928 | static __inline __m512 __attribute__ ((__always_inline__, __nodebug__)) | 
| Adam Nemet | 9a3ea60 | 2014-07-28 17:14:38 +0000 | [diff] [blame] | 929 | _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P) | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 930 | { | 
|  | 931 | return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P, | 
|  | 932 | (__v16sf) | 
|  | 933 | _mm512_setzero_ps (), | 
|  | 934 | (__mmask16) __U); | 
|  | 935 | } | 
|  | 936 |  | 
|  | 937 | static __inline __m512d __attribute__ ((__always_inline__, __nodebug__)) | 
| Adam Nemet | 9a3ea60 | 2014-07-28 17:14:38 +0000 | [diff] [blame] | 938 | _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P) | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 939 | { | 
|  | 940 | return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P, | 
|  | 941 | (__v8df) | 
|  | 942 | _mm512_setzero_pd (), | 
|  | 943 | (__mmask8) __U); | 
|  | 944 | } | 
|  | 945 |  | 
| Adam Nemet | c0cff24 | 2015-01-16 18:51:50 +0000 | [diff] [blame] | 946 | static __inline __m512 __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 947 | _mm512_maskz_load_ps(__mmask16 __U, void const *__P) | 
|  | 948 | { | 
|  | 949 | return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P, | 
|  | 950 | (__v16sf) | 
|  | 951 | _mm512_setzero_ps (), | 
|  | 952 | (__mmask16) __U); | 
|  | 953 | } | 
|  | 954 |  | 
|  | 955 | static __inline __m512d __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 956 | _mm512_maskz_load_pd(__mmask8 __U, void const *__P) | 
|  | 957 | { | 
|  | 958 | return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P, | 
|  | 959 | (__v8df) | 
|  | 960 | _mm512_setzero_pd (), | 
|  | 961 | (__mmask8) __U); | 
|  | 962 | } | 
|  | 963 |  | 
| Adam Nemet | da82bcc | 2014-07-31 04:00:39 +0000 | [diff] [blame] | 964 | static __inline __m512d __attribute__((__always_inline__, __nodebug__)) | 
|  | 965 | _mm512_loadu_pd(double const *__p) | 
|  | 966 | { | 
|  | 967 | struct __loadu_pd { | 
|  | 968 | __m512d __v; | 
|  | 969 | } __attribute__((packed, may_alias)); | 
|  | 970 | return ((struct __loadu_pd*)__p)->__v; | 
|  | 971 | } | 
|  | 972 |  | 
|  | 973 | static __inline __m512 __attribute__((__always_inline__, __nodebug__)) | 
|  | 974 | _mm512_loadu_ps(float const *__p) | 
|  | 975 | { | 
|  | 976 | struct __loadu_ps { | 
|  | 977 | __m512 __v; | 
|  | 978 | } __attribute__((packed, may_alias)); | 
|  | 979 | return ((struct __loadu_ps*)__p)->__v; | 
|  | 980 | } | 
|  | 981 |  | 
| Adam Nemet | c0cff24 | 2015-01-16 18:51:50 +0000 | [diff] [blame] | 982 | static __inline __m512 __attribute__((__always_inline__, __nodebug__)) | 
|  | 983 | _mm512_load_ps(double const *__p) | 
|  | 984 | { | 
|  | 985 | return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p, | 
|  | 986 | (__v16sf) | 
|  | 987 | _mm512_setzero_ps (), | 
|  | 988 | (__mmask16) -1); | 
|  | 989 | } | 
|  | 990 |  | 
|  | 991 | static __inline __m512d __attribute__((__always_inline__, __nodebug__)) | 
|  | 992 | _mm512_load_pd(float const *__p) | 
|  | 993 | { | 
|  | 994 | return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p, | 
|  | 995 | (__v8df) | 
|  | 996 | _mm512_setzero_pd (), | 
|  | 997 | (__mmask8) -1); | 
|  | 998 | } | 
|  | 999 |  | 
| Adam Nemet | 0d5bb55 | 2014-07-28 17:14:40 +0000 | [diff] [blame] | 1000 | /* SIMD store ops */ | 
|  | 1001 |  | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 1002 | static __inline void __attribute__ ((__always_inline__, __nodebug__)) | 
| Adam Nemet | 9a3ea60 | 2014-07-28 17:14:38 +0000 | [diff] [blame] | 1003 | _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A) | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 1004 | { | 
|  | 1005 | __builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A, | 
|  | 1006 | (__mmask8) __U); | 
|  | 1007 | } | 
|  | 1008 |  | 
|  | 1009 | static __inline void __attribute__ ((__always_inline__, __nodebug__)) | 
| Adam Nemet | 9a3ea60 | 2014-07-28 17:14:38 +0000 | [diff] [blame] | 1010 | _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A) | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 1011 | { | 
|  | 1012 | __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A, | 
|  | 1013 | (__mmask16) __U); | 
|  | 1014 | } | 
|  | 1015 |  | 
|  | 1016 | static __inline void __attribute__ ((__always_inline__, __nodebug__)) | 
| Adam Nemet | 9a3ea60 | 2014-07-28 17:14:38 +0000 | [diff] [blame] | 1017 | _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A) | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 1018 | { | 
|  | 1019 | __builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U); | 
|  | 1020 | } | 
|  | 1021 |  | 
|  | 1022 | static __inline void __attribute__ ((__always_inline__, __nodebug__)) | 
| Adam Nemet | fce1ad0 | 2014-07-28 17:14:45 +0000 | [diff] [blame] | 1023 | _mm512_storeu_pd(void *__P, __m512d __A) | 
|  | 1024 | { | 
|  | 1025 | __builtin_ia32_storeupd512_mask((__v8df *)__P, (__v8df)__A, (__mmask8)-1); | 
|  | 1026 | } | 
|  | 1027 |  | 
|  | 1028 | static __inline void __attribute__ ((__always_inline__, __nodebug__)) | 
| Adam Nemet | 9a3ea60 | 2014-07-28 17:14:38 +0000 | [diff] [blame] | 1029 | _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A) | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 1030 | { | 
|  | 1031 | __builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A, | 
|  | 1032 | (__mmask16) __U); | 
|  | 1033 | } | 
|  | 1034 |  | 
| Adam Nemet | fce1ad0 | 2014-07-28 17:14:45 +0000 | [diff] [blame] | 1035 | static __inline void __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 1036 | _mm512_storeu_ps(void *__P, __m512 __A) | 
|  | 1037 | { | 
|  | 1038 | __builtin_ia32_storeups512_mask((__v16sf *)__P, (__v16sf)__A, (__mmask16)-1); | 
|  | 1039 | } | 
|  | 1040 |  | 
|  | 1041 | static __inline void __attribute__ ((__always_inline__, __nodebug__)) | 
| Adam Nemet | c0cff24 | 2015-01-16 18:51:50 +0000 | [diff] [blame] | 1042 | _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A) | 
| Adam Nemet | fce1ad0 | 2014-07-28 17:14:45 +0000 | [diff] [blame] | 1043 | { | 
| Adam Nemet | c0cff24 | 2015-01-16 18:51:50 +0000 | [diff] [blame] | 1044 | __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U); | 
| Adam Nemet | fce1ad0 | 2014-07-28 17:14:45 +0000 | [diff] [blame] | 1045 | } | 
|  | 1046 |  | 
|  | 1047 | static __inline void __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 1048 | _mm512_store_pd(void *__P, __m512d __A) | 
|  | 1049 | { | 
|  | 1050 | *(__m512d*)__P = __A; | 
|  | 1051 | } | 
|  | 1052 |  | 
| Adam Nemet | c0cff24 | 2015-01-16 18:51:50 +0000 | [diff] [blame] | 1053 | static __inline void __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 1054 | _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A) | 
|  | 1055 | { | 
|  | 1056 | __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A, | 
|  | 1057 | (__mmask16) __U); | 
|  | 1058 | } | 
|  | 1059 |  | 
|  | 1060 | static __inline void __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 1061 | _mm512_store_ps(void *__P, __m512 __A) | 
|  | 1062 | { | 
|  | 1063 | *(__m512*)__P = __A; | 
|  | 1064 | } | 
|  | 1065 |  | 
| Adam Nemet | 2db1d2f | 2014-07-30 16:51:27 +0000 | [diff] [blame] | 1066 | /* Mask ops */ | 
|  | 1067 |  | 
|  | 1068 | static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__)) | 
|  | 1069 | _mm512_knot(__mmask16 __M) | 
|  | 1070 | { | 
|  | 1071 | return __builtin_ia32_knothi(__M); | 
|  | 1072 | } | 
|  | 1073 |  | 
| Robert Khasanov | b9f3a91 | 2014-10-08 17:18:13 +0000 | [diff] [blame] | 1074 | /* Integer compare */ | 
|  | 1075 |  | 
|  | 1076 | static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1077 | _mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) { | 
|  | 1078 | return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b, | 
|  | 1079 | (__mmask16)-1); | 
|  | 1080 | } | 
|  | 1081 |  | 
|  | 1082 | static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1083 | _mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { | 
|  | 1084 | return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b, | 
|  | 1085 | __u); | 
|  | 1086 | } | 
|  | 1087 |  | 
| Craig Topper | 4cac1c2 | 2015-01-25 23:30:07 +0000 | [diff] [blame] | 1088 | static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1089 | _mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) { | 
|  | 1090 | return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0, | 
|  | 1091 | (__mmask16)-1); | 
|  | 1092 | } | 
|  | 1093 |  | 
|  | 1094 | static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1095 | _mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { | 
|  | 1096 | return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0, | 
|  | 1097 | __u); | 
|  | 1098 | } | 
|  | 1099 |  | 
| Robert Khasanov | b9f3a91 | 2014-10-08 17:18:13 +0000 | [diff] [blame] | 1100 | static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1101 | _mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { | 
|  | 1102 | return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b, | 
|  | 1103 | __u); | 
|  | 1104 | } | 
|  | 1105 |  | 
|  | 1106 | static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1107 | _mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) { | 
|  | 1108 | return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b, | 
|  | 1109 | (__mmask8)-1); | 
|  | 1110 | } | 
|  | 1111 |  | 
| Craig Topper | 4cac1c2 | 2015-01-25 23:30:07 +0000 | [diff] [blame] | 1112 | static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1113 | _mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) { | 
|  | 1114 | return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0, | 
|  | 1115 | (__mmask8)-1); | 
|  | 1116 | } | 
|  | 1117 |  | 
|  | 1118 | static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1119 | _mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { | 
|  | 1120 | return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0, | 
|  | 1121 | __u); | 
|  | 1122 | } | 
|  | 1123 |  | 
|  | 1124 | static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1125 | _mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) { | 
|  | 1126 | return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5, | 
|  | 1127 | (__mmask16)-1); | 
|  | 1128 | } | 
|  | 1129 |  | 
|  | 1130 | static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1131 | _mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { | 
|  | 1132 | return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5, | 
|  | 1133 | __u); | 
|  | 1134 | } | 
|  | 1135 |  | 
|  | 1136 | static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1137 | _mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) { | 
|  | 1138 | return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5, | 
|  | 1139 | (__mmask16)-1); | 
|  | 1140 | } | 
|  | 1141 |  | 
|  | 1142 | static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1143 | _mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { | 
|  | 1144 | return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5, | 
|  | 1145 | __u); | 
|  | 1146 | } | 
|  | 1147 |  | 
|  | 1148 | static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1149 | _mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) { | 
|  | 1150 | return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5, | 
|  | 1151 | (__mmask8)-1); | 
|  | 1152 | } | 
|  | 1153 |  | 
|  | 1154 | static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1155 | _mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { | 
|  | 1156 | return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5, | 
|  | 1157 | __u); | 
|  | 1158 | } | 
|  | 1159 |  | 
|  | 1160 | static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1161 | _mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) { | 
|  | 1162 | return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5, | 
|  | 1163 | (__mmask8)-1); | 
|  | 1164 | } | 
|  | 1165 |  | 
|  | 1166 | static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1167 | _mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { | 
|  | 1168 | return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5, | 
|  | 1169 | __u); | 
|  | 1170 | } | 
|  | 1171 |  | 
|  | 1172 | static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1173 | _mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) { | 
|  | 1174 | return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b, | 
|  | 1175 | (__mmask16)-1); | 
|  | 1176 | } | 
|  | 1177 |  | 
|  | 1178 | static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1179 | _mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { | 
|  | 1180 | return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b, | 
|  | 1181 | __u); | 
|  | 1182 | } | 
|  | 1183 |  | 
|  | 1184 | static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1185 | _mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) { | 
|  | 1186 | return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6, | 
|  | 1187 | (__mmask16)-1); | 
|  | 1188 | } | 
|  | 1189 |  | 
|  | 1190 | static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1191 | _mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { | 
|  | 1192 | return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6, | 
|  | 1193 | __u); | 
|  | 1194 | } | 
|  | 1195 |  | 
|  | 1196 | static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1197 | _mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { | 
|  | 1198 | return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b, | 
|  | 1199 | __u); | 
|  | 1200 | } | 
|  | 1201 |  | 
|  | 1202 | static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1203 | _mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) { | 
|  | 1204 | return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b, | 
|  | 1205 | (__mmask8)-1); | 
|  | 1206 | } | 
|  | 1207 |  | 
|  | 1208 | static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1209 | _mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) { | 
|  | 1210 | return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6, | 
|  | 1211 | (__mmask8)-1); | 
|  | 1212 | } | 
|  | 1213 |  | 
|  | 1214 | static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1215 | _mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { | 
|  | 1216 | return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6, | 
|  | 1217 | __u); | 
|  | 1218 | } | 
|  | 1219 |  | 
|  | 1220 | static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1221 | _mm512_cmple_epi32_mask(__m512i __a, __m512i __b) { | 
|  | 1222 | return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2, | 
|  | 1223 | (__mmask16)-1); | 
|  | 1224 | } | 
|  | 1225 |  | 
|  | 1226 | static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1227 | _mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { | 
|  | 1228 | return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2, | 
|  | 1229 | __u); | 
|  | 1230 | } | 
|  | 1231 |  | 
|  | 1232 | static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1233 | _mm512_cmple_epu32_mask(__m512i __a, __m512i __b) { | 
|  | 1234 | return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2, | 
|  | 1235 | (__mmask16)-1); | 
|  | 1236 | } | 
|  | 1237 |  | 
|  | 1238 | static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1239 | _mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { | 
|  | 1240 | return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2, | 
|  | 1241 | __u); | 
|  | 1242 | } | 
|  | 1243 |  | 
|  | 1244 | static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1245 | _mm512_cmple_epi64_mask(__m512i __a, __m512i __b) { | 
|  | 1246 | return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2, | 
|  | 1247 | (__mmask8)-1); | 
|  | 1248 | } | 
|  | 1249 |  | 
|  | 1250 | static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1251 | _mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { | 
|  | 1252 | return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2, | 
|  | 1253 | __u); | 
|  | 1254 | } | 
|  | 1255 |  | 
|  | 1256 | static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1257 | _mm512_cmple_epu64_mask(__m512i __a, __m512i __b) { | 
|  | 1258 | return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2, | 
|  | 1259 | (__mmask8)-1); | 
|  | 1260 | } | 
|  | 1261 |  | 
|  | 1262 | static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1263 | _mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { | 
|  | 1264 | return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2, | 
|  | 1265 | __u); | 
|  | 1266 | } | 
|  | 1267 |  | 
|  | 1268 | static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1269 | _mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) { | 
|  | 1270 | return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1, | 
|  | 1271 | (__mmask16)-1); | 
|  | 1272 | } | 
|  | 1273 |  | 
|  | 1274 | static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1275 | _mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { | 
|  | 1276 | return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1, | 
|  | 1277 | __u); | 
|  | 1278 | } | 
|  | 1279 |  | 
|  | 1280 | static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1281 | _mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) { | 
|  | 1282 | return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1, | 
|  | 1283 | (__mmask16)-1); | 
|  | 1284 | } | 
|  | 1285 |  | 
|  | 1286 | static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1287 | _mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { | 
|  | 1288 | return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1, | 
|  | 1289 | __u); | 
|  | 1290 | } | 
|  | 1291 |  | 
|  | 1292 | static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1293 | _mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) { | 
|  | 1294 | return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1, | 
|  | 1295 | (__mmask8)-1); | 
|  | 1296 | } | 
|  | 1297 |  | 
|  | 1298 | static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1299 | _mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { | 
|  | 1300 | return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1, | 
|  | 1301 | __u); | 
|  | 1302 | } | 
|  | 1303 |  | 
|  | 1304 | static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1305 | _mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) { | 
|  | 1306 | return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1, | 
|  | 1307 | (__mmask8)-1); | 
|  | 1308 | } | 
|  | 1309 |  | 
|  | 1310 | static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1311 | _mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { | 
|  | 1312 | return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1, | 
|  | 1313 | __u); | 
|  | 1314 | } | 
|  | 1315 |  | 
|  | 1316 | static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1317 | _mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) { | 
|  | 1318 | return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4, | 
|  | 1319 | (__mmask16)-1); | 
|  | 1320 | } | 
|  | 1321 |  | 
|  | 1322 | static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1323 | _mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { | 
|  | 1324 | return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4, | 
|  | 1325 | __u); | 
|  | 1326 | } | 
|  | 1327 |  | 
|  | 1328 | static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1329 | _mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) { | 
|  | 1330 | return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4, | 
|  | 1331 | (__mmask16)-1); | 
|  | 1332 | } | 
|  | 1333 |  | 
|  | 1334 | static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1335 | _mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { | 
|  | 1336 | return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4, | 
|  | 1337 | __u); | 
|  | 1338 | } | 
|  | 1339 |  | 
|  | 1340 | static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1341 | _mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) { | 
|  | 1342 | return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4, | 
|  | 1343 | (__mmask8)-1); | 
|  | 1344 | } | 
|  | 1345 |  | 
|  | 1346 | static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1347 | _mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { | 
|  | 1348 | return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4, | 
|  | 1349 | __u); | 
|  | 1350 | } | 
|  | 1351 |  | 
|  | 1352 | static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1353 | _mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) { | 
|  | 1354 | return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4, | 
|  | 1355 | (__mmask8)-1); | 
|  | 1356 | } | 
|  | 1357 |  | 
|  | 1358 | static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) | 
|  | 1359 | _mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { | 
|  | 1360 | return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4, | 
|  | 1361 | __u); | 
|  | 1362 | } | 
|  | 1363 |  | 
|  | 1364 | #define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \ | 
|  | 1365 | __m512i __a = (a); \ | 
|  | 1366 | __m512i __b = (b); \ | 
|  | 1367 | (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, (p), \ | 
|  | 1368 | (__mmask16)-1); }) | 
|  | 1369 |  | 
|  | 1370 | #define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \ | 
|  | 1371 | __m512i __a = (a); \ | 
|  | 1372 | __m512i __b = (b); \ | 
|  | 1373 | (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, (p), \ | 
|  | 1374 | (__mmask16)-1); }) | 
|  | 1375 |  | 
|  | 1376 | #define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \ | 
|  | 1377 | __m512i __a = (a); \ | 
|  | 1378 | __m512i __b = (b); \ | 
|  | 1379 | (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, (p), \ | 
|  | 1380 | (__mmask8)-1); }) | 
|  | 1381 |  | 
|  | 1382 | #define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \ | 
|  | 1383 | __m512i __a = (a); \ | 
|  | 1384 | __m512i __b = (b); \ | 
|  | 1385 | (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \ | 
|  | 1386 | (__mmask8)-1); }) | 
|  | 1387 |  | 
|  | 1388 | #define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ | 
|  | 1389 | __m512i __a = (a); \ | 
|  | 1390 | __m512i __b = (b); \ | 
|  | 1391 | (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, (p), \ | 
|  | 1392 | (__mmask16)(m)); }) | 
|  | 1393 |  | 
|  | 1394 | #define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ | 
|  | 1395 | __m512i __a = (a); \ | 
|  | 1396 | __m512i __b = (b); \ | 
|  | 1397 | (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, (p), \ | 
|  | 1398 | (__mmask16)(m)); }) | 
|  | 1399 |  | 
|  | 1400 | #define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ | 
|  | 1401 | __m512i __a = (a); \ | 
|  | 1402 | __m512i __b = (b); \ | 
|  | 1403 | (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, (p), \ | 
|  | 1404 | (__mmask8)(m)); }) | 
|  | 1405 |  | 
|  | 1406 | #define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ | 
|  | 1407 | __m512i __a = (a); \ | 
|  | 1408 | __m512i __b = (b); \ | 
|  | 1409 | (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \ | 
|  | 1410 | (__mmask8)(m)); }) | 
| Elena Demikhovsky | fcc6df3 | 2014-07-22 11:31:39 +0000 | [diff] [blame] | 1411 | #endif // __AVX512FINTRIN_H |