blob: d5e04ee1d26f8b59af201104522a61a954017c2d [file] [log] [blame]
Adam Nemet9a3ea602014-07-28 17:14:38 +00001/*===---- avx512fintrin.h - AVX2 intrinsics --------------------------------===
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00002 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23#ifndef __IMMINTRIN_H
24#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
25#endif
26
27#ifndef __AVX512FINTRIN_H
28#define __AVX512FINTRIN_H
29
30typedef double __v8df __attribute__((__vector_size__(64)));
31typedef float __v16sf __attribute__((__vector_size__(64)));
32typedef long long __v8di __attribute__((__vector_size__(64)));
33typedef int __v16si __attribute__((__vector_size__(64)));
34
35typedef float __m512 __attribute__((__vector_size__(64)));
36typedef double __m512d __attribute__((__vector_size__(64)));
37typedef long long __m512i __attribute__((__vector_size__(64)));
38
39typedef unsigned char __mmask8;
40typedef unsigned short __mmask16;
41
42/* Rounding mode macros. */
43#define _MM_FROUND_TO_NEAREST_INT 0x00
44#define _MM_FROUND_TO_NEG_INF 0x01
45#define _MM_FROUND_TO_POS_INF 0x02
46#define _MM_FROUND_TO_ZERO 0x03
47#define _MM_FROUND_CUR_DIRECTION 0x04
48
Adam Nemet0d5bb552014-07-28 17:14:40 +000049/* Create vectors with repeated elements */
50
51static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
52_mm512_setzero_si512(void)
53{
54 return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
55}
56
57static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
58_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
59{
60 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
61 (__v16si)
62 _mm512_setzero_si512 (),
63 __M);
64}
65
66static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
67_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
68{
69#ifdef __x86_64__
70 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
71 (__v8di)
72 _mm512_setzero_si512 (),
73 __M);
74#else
75 return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
76 (__v8di)
77 _mm512_setzero_si512 (),
78 __M);
79#endif
80}
81
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000082static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +000083_mm512_setzero_ps(void)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000084{
85 return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
86 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
87}
88static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +000089_mm512_setzero_pd(void)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000090{
91 return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
92}
Adam Nemet0d5bb552014-07-28 17:14:40 +000093
Adam Nemetf42e7a22014-07-30 16:51:22 +000094static __inline __m512 __attribute__((__always_inline__, __nodebug__))
95_mm512_set1_ps(float __w)
96{
97 return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
98 __w, __w, __w, __w, __w, __w, __w, __w };
99}
100
101static __inline __m512d __attribute__((__always_inline__, __nodebug__))
102_mm512_set1_pd(double __w)
103{
104 return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
105}
106
107static __inline __m512i __attribute__((__always_inline__, __nodebug__))
108_mm512_set1_epi32(int __s)
109{
110 return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
111 __s, __s, __s, __s, __s, __s, __s, __s };
112}
113
114static __inline __m512i __attribute__((__always_inline__, __nodebug__))
115_mm512_set1_epi64(long long __d)
116{
117 return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
118}
119
Adam Nemet4abc07c2014-08-13 00:29:01 +0000120static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
121_mm512_broadcastss_ps(__m128 __X)
122{
123 float __f = __X[0];
124 return (__v16sf){ __f, __f, __f, __f,
125 __f, __f, __f, __f,
126 __f, __f, __f, __f,
127 __f, __f, __f, __f };
128}
129
130static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
131_mm512_broadcastsd_pd(__m128d __X)
132{
133 double __d = __X[0];
134 return (__v8df){ __d, __d, __d, __d,
135 __d, __d, __d, __d };
136}
137
Adam Nemetc871ff92014-07-30 16:51:24 +0000138/* Cast between vector types */
139
140static __inline __m512d __attribute__((__always_inline__, __nodebug__))
141_mm512_castpd256_pd512(__m256d __a)
142{
143 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
144}
145
146static __inline __m512 __attribute__((__always_inline__, __nodebug__))
147_mm512_castps256_ps512(__m256 __a)
148{
149 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7,
150 -1, -1, -1, -1, -1, -1, -1, -1);
151}
152
153static __inline __m128d __attribute__((__always_inline__, __nodebug__))
154_mm512_castpd512_pd128(__m512d __a)
155{
156 return __builtin_shufflevector(__a, __a, 0, 1);
157}
158
159static __inline __m128 __attribute__((__always_inline__, __nodebug__))
160_mm512_castps512_ps128(__m512 __a)
161{
162 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
163}
164
Adam Nemet0d5bb552014-07-28 17:14:40 +0000165/* Arithmetic */
166
Adam Nemeta3ebe622014-07-28 17:14:42 +0000167static __inline __m512d __attribute__((__always_inline__, __nodebug__))
168_mm512_add_pd(__m512d __a, __m512d __b)
169{
170 return __a + __b;
171}
172
173static __inline __m512 __attribute__((__always_inline__, __nodebug__))
174_mm512_add_ps(__m512 __a, __m512 __b)
175{
176 return __a + __b;
177}
178
179static __inline __m512d __attribute__((__always_inline__, __nodebug__))
180_mm512_mul_pd(__m512d __a, __m512d __b)
181{
182 return __a * __b;
183}
184
185static __inline __m512 __attribute__((__always_inline__, __nodebug__))
186_mm512_mul_ps(__m512 __a, __m512 __b)
187{
188 return __a * __b;
189}
190
191static __inline __m512d __attribute__((__always_inline__, __nodebug__))
192_mm512_sub_pd(__m512d __a, __m512d __b)
193{
194 return __a - __b;
195}
196
197static __inline __m512 __attribute__((__always_inline__, __nodebug__))
198_mm512_sub_ps(__m512 __a, __m512 __b)
199{
200 return __a - __b;
201}
202
Adam Nemet0d5bb552014-07-28 17:14:40 +0000203static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
204_mm512_max_pd(__m512d __A, __m512d __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000205{
Adam Nemet0d5bb552014-07-28 17:14:40 +0000206 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
207 (__v8df) __B,
208 (__v8df)
209 _mm512_setzero_pd (),
210 (__mmask8) -1,
211 _MM_FROUND_CUR_DIRECTION);
212}
213
214static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
215_mm512_max_ps(__m512 __A, __m512 __B)
216{
217 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
218 (__v16sf) __B,
219 (__v16sf)
220 _mm512_setzero_ps (),
221 (__mmask16) -1,
222 _MM_FROUND_CUR_DIRECTION);
223}
224
225static __inline __m512i
226__attribute__ ((__always_inline__, __nodebug__))
227_mm512_max_epi32(__m512i __A, __m512i __B)
228{
229 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
230 (__v16si) __B,
231 (__v16si)
232 _mm512_setzero_si512 (),
233 (__mmask16) -1);
234}
235
236static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
237_mm512_max_epu32(__m512i __A, __m512i __B)
238{
239 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
240 (__v16si) __B,
241 (__v16si)
242 _mm512_setzero_si512 (),
243 (__mmask16) -1);
244}
245
246static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
247_mm512_max_epi64(__m512i __A, __m512i __B)
248{
249 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
250 (__v8di) __B,
251 (__v8di)
252 _mm512_setzero_si512 (),
253 (__mmask8) -1);
254}
255
256static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
257_mm512_max_epu64(__m512i __A, __m512i __B)
258{
259 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
260 (__v8di) __B,
261 (__v8di)
262 _mm512_setzero_si512 (),
263 (__mmask8) -1);
264}
265
266static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
267_mm512_min_pd(__m512d __A, __m512d __B)
268{
269 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
270 (__v8df) __B,
271 (__v8df)
272 _mm512_setzero_pd (),
273 (__mmask8) -1,
274 _MM_FROUND_CUR_DIRECTION);
275}
276
277static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
278_mm512_min_ps(__m512 __A, __m512 __B)
279{
280 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
281 (__v16sf) __B,
282 (__v16sf)
283 _mm512_setzero_ps (),
284 (__mmask16) -1,
285 _MM_FROUND_CUR_DIRECTION);
286}
287
288static __inline __m512i
289__attribute__ ((__always_inline__, __nodebug__))
290_mm512_min_epi32(__m512i __A, __m512i __B)
291{
292 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
293 (__v16si) __B,
294 (__v16si)
295 _mm512_setzero_si512 (),
296 (__mmask16) -1);
297}
298
299static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
300_mm512_min_epu32(__m512i __A, __m512i __B)
301{
302 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
303 (__v16si) __B,
304 (__v16si)
305 _mm512_setzero_si512 (),
306 (__mmask16) -1);
307}
308
309static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
310_mm512_min_epi64(__m512i __A, __m512i __B)
311{
312 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
313 (__v8di) __B,
314 (__v8di)
315 _mm512_setzero_si512 (),
316 (__mmask8) -1);
317}
318
319static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
320_mm512_min_epu64(__m512i __A, __m512i __B)
321{
322 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
323 (__v8di) __B,
324 (__v8di)
325 _mm512_setzero_si512 (),
326 (__mmask8) -1);
327}
328
329static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
330_mm512_mul_epi32(__m512i __X, __m512i __Y)
331{
332 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
333 (__v16si) __Y,
334 (__v8di)
335 _mm512_setzero_si512 (),
336 (__mmask8) -1);
337}
338
339static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
340_mm512_mul_epu32(__m512i __X, __m512i __Y)
341{
342 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
343 (__v16si) __Y,
344 (__v8di)
345 _mm512_setzero_si512 (),
346 (__mmask8) -1);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000347}
348
349static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
350_mm512_sqrt_pd(__m512d a)
351{
352 return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)a,
353 (__v8df) _mm512_setzero_pd (),
354 (__mmask8) -1,
355 _MM_FROUND_CUR_DIRECTION);
356}
357
358static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
359_mm512_sqrt_ps(__m512 a)
360{
361 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)a,
362 (__v16sf) _mm512_setzero_ps (),
363 (__mmask16) -1,
364 _MM_FROUND_CUR_DIRECTION);
365}
366
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000367static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
368_mm512_rsqrt14_pd(__m512d __A)
369{
370 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
371 (__v8df)
372 _mm512_setzero_pd (),
373 (__mmask8) -1);}
374
375static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
376_mm512_rsqrt14_ps(__m512 __A)
377{
378 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
379 (__v16sf)
380 _mm512_setzero_ps (),
381 (__mmask16) -1);
382}
383
384static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
385_mm_rsqrt14_ss(__m128 __A, __m128 __B)
386{
387 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
388 (__v4sf) __B,
389 (__v4sf)
390 _mm_setzero_ps (),
391 (__mmask8) -1);
392}
393
394static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
395_mm_rsqrt14_sd(__m128d __A, __m128d __B)
396{
397 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
398 (__v2df) __B,
399 (__v2df)
400 _mm_setzero_pd (),
401 (__mmask8) -1);
402}
403
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000404static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
405_mm512_rcp14_pd(__m512d __A)
406{
407 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
408 (__v8df)
409 _mm512_setzero_pd (),
410 (__mmask8) -1);
411}
412
413static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
414_mm512_rcp14_ps(__m512 __A)
415{
416 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
417 (__v16sf)
418 _mm512_setzero_ps (),
419 (__mmask16) -1);
420}
421static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000422_mm_rcp14_ss(__m128 __A, __m128 __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000423{
424 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
425 (__v4sf) __B,
426 (__v4sf)
427 _mm_setzero_ps (),
428 (__mmask8) -1);
429}
430
431static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000432_mm_rcp14_sd(__m128d __A, __m128d __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000433{
434 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
435 (__v2df) __B,
436 (__v2df)
437 _mm_setzero_pd (),
438 (__mmask8) -1);
439}
440
Adam Nemet0d5bb552014-07-28 17:14:40 +0000441static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
442_mm512_floor_ps(__m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000443{
Adam Nemet0d5bb552014-07-28 17:14:40 +0000444 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
445 _MM_FROUND_FLOOR,
446 (__v16sf) __A, -1,
447 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000448}
449
Adam Nemet0d5bb552014-07-28 17:14:40 +0000450static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
451_mm512_floor_pd(__m512d __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000452{
Adam Nemet0d5bb552014-07-28 17:14:40 +0000453 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
454 _MM_FROUND_FLOOR,
455 (__v8df) __A, -1,
456 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000457}
458
Adam Nemet0d5bb552014-07-28 17:14:40 +0000459static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
460_mm512_ceil_ps(__m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000461{
Adam Nemet0d5bb552014-07-28 17:14:40 +0000462 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
463 _MM_FROUND_CEIL,
464 (__v16sf) __A, -1,
465 _MM_FROUND_CUR_DIRECTION);
466}
467
468static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
469_mm512_ceil_pd(__m512d __A)
470{
471 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
472 _MM_FROUND_CEIL,
473 (__v8df) __A, -1,
474 _MM_FROUND_CUR_DIRECTION);
475}
476
477static __inline __m512i __attribute__ (( __always_inline__, __nodebug__))
478_mm512_abs_epi64(__m512i __A)
479{
480 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
481 (__v8di)
482 _mm512_setzero_si512 (),
483 (__mmask8) -1);
484}
485
486static __inline __m512i __attribute__ (( __always_inline__, __nodebug__))
487_mm512_abs_epi32(__m512i __A)
488{
489 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
490 (__v16si)
491 _mm512_setzero_si512 (),
492 (__mmask16) -1);
493}
494
495static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
496_mm512_roundscale_ps(__m512 __A, const int __imm)
497{
498 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
499 (__v16sf) __A, -1,
500 _MM_FROUND_CUR_DIRECTION);
501}
502static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
503_mm512_roundscale_pd(__m512d __A, const int __imm)
504{
505 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
506 (__v8df) __A, -1,
507 _MM_FROUND_CUR_DIRECTION);
508}
509
Adam Nemet2278fcb2014-08-14 17:17:57 +0000510static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
511_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
512{
513 return (__m512d)
514 __builtin_ia32_vfmaddpd512_mask(__A,
515 __B,
516 __C,
517 (__mmask8) -1,
518 _MM_FROUND_CUR_DIRECTION);
519}
520
521static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
522_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
523{
524 return (__m512d)
525 __builtin_ia32_vfmsubpd512_mask(__A,
526 __B,
527 __C,
528 (__mmask8) -1,
529 _MM_FROUND_CUR_DIRECTION);
530}
531
532static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
533_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
534{
535 return (__m512d)
536 __builtin_ia32_vfnmaddpd512_mask(__A,
537 __B,
538 __C,
539 (__mmask8) -1,
540 _MM_FROUND_CUR_DIRECTION);
541}
542
543static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
544_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
545{
546 return (__m512)
547 __builtin_ia32_vfmaddps512_mask(__A,
548 __B,
549 __C,
550 (__mmask16) -1,
551 _MM_FROUND_CUR_DIRECTION);
552}
553
554static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
555_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
556{
557 return (__m512)
558 __builtin_ia32_vfmsubps512_mask(__A,
559 __B,
560 __C,
561 (__mmask16) -1,
562 _MM_FROUND_CUR_DIRECTION);
563}
564
565static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
566_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
567{
568 return (__m512)
569 __builtin_ia32_vfnmaddps512_mask(__A,
570 __B,
571 __C,
572 (__mmask16) -1,
573 _MM_FROUND_CUR_DIRECTION);
574}
575
Adam Nemet0d5bb552014-07-28 17:14:40 +0000576/* Vector permutations */
577
578static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
579_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
580{
581 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
582 /* idx */ ,
583 (__v16si) __A,
584 (__v16si) __B,
585 (__mmask16) -1);
586}
587static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
588_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
589{
590 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
591 /* idx */ ,
592 (__v8di) __A,
593 (__v8di) __B,
594 (__mmask8) -1);
595}
596
597static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
598_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
599{
600 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
601 /* idx */ ,
602 (__v8df) __A,
603 (__v8df) __B,
604 (__mmask8) -1);
605}
606static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
607_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
608{
609 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
610 /* idx */ ,
611 (__v16sf) __A,
612 (__v16sf) __B,
613 (__mmask16) -1);
614}
615
Adam Nemet5bf7baa2014-08-05 17:28:23 +0000616static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
617_mm512_valign_epi64(__m512i __A, __m512i __B, const int __I)
618{
619 return (__m512i) __builtin_ia32_alignq512_mask((__v8di)__A,
620 (__v8di)__B,
621 __I,
622 (__v8di)_mm512_setzero_si512(),
623 (__mmask8) -1);
624}
625
626static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
627_mm512_valign_epi32(__m512i __A, __m512i __B, const int __I)
628{
629 return (__m512i)__builtin_ia32_alignd512_mask((__v16si)__A,
630 (__v16si)__B,
631 __I,
632 (__v16si)_mm512_setzero_si512(),
633 (__mmask16) -1);
634}
635
Adam Nemetf893ede2015-01-19 20:12:05 +0000636/* Vector Extract */
637
638#define _mm512_extractf64x4_pd(A, I) __extension__ ({ \
639 __m512d __A = (A); \
640 (__m256d) \
641 __builtin_ia32_extractf64x4_mask((__v8df)__A, \
642 (I), \
643 (__v4df)_mm256_setzero_si256(), \
644 (__mmask8) -1); })
645
646#define _mm512_extractf32x4_ps(A, I) __extension__ ({ \
647 __m512 __A = (A); \
648 (__m128) \
649 __builtin_ia32_extractf32x4_mask((__v16sf)__A, \
650 (I), \
651 (__v4sf)_mm_setzero_ps(), \
652 (__mmask8) -1); })
653
Adam Nemet0d5bb552014-07-28 17:14:40 +0000654/* Vector Blend */
655
656static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
657_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
658{
659 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
660 (__v8df) __W,
661 (__mmask8) __U);
662}
663
664static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
665_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
666{
667 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
668 (__v16sf) __W,
669 (__mmask16) __U);
670}
671
672static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
673_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
674{
675 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
676 (__v8di) __W,
677 (__mmask8) __U);
678}
679
680static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
681_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
682{
683 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
684 (__v16si) __W,
685 (__mmask16) __U);
686}
687
688/* Compare */
689
Craig Topperf557b092015-01-19 01:18:19 +0000690#define _mm512_cmp_ps_mask(a, b, p) __extension__ ({ \
691 __m512 __a = (a); \
692 __m512 __b = (b); \
693 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)__a, (__v16sf)__b, (p), \
694 (__mmask16)-1, \
695 _MM_FROUND_CUR_DIRECTION); })
Adam Nemet0d5bb552014-07-28 17:14:40 +0000696
Craig Topperf557b092015-01-19 01:18:19 +0000697#define _mm512_cmp_pd_mask(a, b, p) __extension__ ({ \
698 __m512 __a = (a); \
699 __m512 __b = (b); \
700 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)__a, (__v8df)__b, (p), \
701 (__mmask8)-1, \
702 _MM_FROUND_CUR_DIRECTION); })
Adam Nemet0d5bb552014-07-28 17:14:40 +0000703
704/* Conversion */
705
706static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
707_mm512_cvttps_epu32(__m512 __A)
708{
709 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
710 (__v16si)
711 _mm512_setzero_si512 (),
712 (__mmask16) -1,
713 _MM_FROUND_CUR_DIRECTION);
714}
715
716static __inline __m512 __attribute__ (( __always_inline__, __nodebug__))
717_mm512_cvt_roundepi32_ps(__m512i __A, const int __R)
718{
719 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
720 (__v16sf)
721 _mm512_setzero_ps (),
722 (__mmask16) -1,
723 __R);
724}
725
726static __inline __m512 __attribute__ (( __always_inline__, __nodebug__))
727_mm512_cvt_roundepu32_ps(__m512i __A, const int __R)
728{
729 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
730 (__v16sf)
731 _mm512_setzero_ps (),
732 (__mmask16) -1,
733 __R);
734}
735
736static __inline __m512d __attribute__ (( __always_inline__, __nodebug__))
737_mm512_cvtepi32_pd(__m256i __A)
738{
739 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
740 (__v8df)
741 _mm512_setzero_pd (),
742 (__mmask8) -1);
743}
744
745static __inline __m512d __attribute__ (( __always_inline__, __nodebug__))
746_mm512_cvtepu32_pd(__m256i __A)
747{
748 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
749 (__v8df)
750 _mm512_setzero_pd (),
751 (__mmask8) -1);
752}
753static __inline __m256 __attribute__ (( __always_inline__, __nodebug__))
754_mm512_cvt_roundpd_ps(__m512d __A, const int __R)
755{
756 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
757 (__v8sf)
758 _mm256_setzero_ps (),
759 (__mmask8) -1,
760 __R);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000761}
762
763static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000764_mm512_cvtps_ph(__m512 __A, const int __I)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000765{
766 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
767 __I,
768 (__v16hi)
769 _mm256_setzero_si256 (),
770 -1);
771}
772
773static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000774_mm512_cvtph_ps(__m256i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000775{
776 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
777 (__v16sf)
778 _mm512_setzero_ps (),
779 (__mmask16) -1,
780 _MM_FROUND_CUR_DIRECTION);
781}
782
783static __inline __m512i __attribute__((__always_inline__, __nodebug__))
784_mm512_cvttps_epi32(__m512 a)
785{
786 return (__m512i)
787 __builtin_ia32_cvttps2dq512_mask((__v16sf) a,
788 (__v16si) _mm512_setzero_si512 (),
789 (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
790}
791
792static __inline __m256i __attribute__((__always_inline__, __nodebug__))
793_mm512_cvttpd_epi32(__m512d a)
794{
795 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) a,
796 (__v8si)_mm256_setzero_si256(),
797 (__mmask8) -1,
798 _MM_FROUND_CUR_DIRECTION);
799}
800
801static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000802_mm512_cvtt_roundpd_epi32(__m512d __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000803{
804 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
805 (__v8si)
806 _mm256_setzero_si256 (),
807 (__mmask8) -1,
808 __R);
809}
810static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000811_mm512_cvtt_roundps_epi32(__m512 __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000812{
813 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
814 (__v16si)
815 _mm512_setzero_si512 (),
816 (__mmask16) -1,
817 __R);
818}
819
820static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000821_mm512_cvt_roundps_epi32(__m512 __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000822{
823 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
824 (__v16si)
825 _mm512_setzero_si512 (),
826 (__mmask16) -1,
827 __R);
828}
829static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000830_mm512_cvt_roundpd_epi32(__m512d __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000831{
832 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
833 (__v8si)
834 _mm256_setzero_si256 (),
835 (__mmask8) -1,
836 __R);
837}
838static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000839_mm512_cvt_roundps_epu32(__m512 __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000840{
841 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
842 (__v16si)
843 _mm512_setzero_si512 (),
844 (__mmask16) -1,
845 __R);
846}
847static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000848_mm512_cvt_roundpd_epu32(__m512d __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000849{
850 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
851 (__v8si)
852 _mm256_setzero_si256 (),
853 (__mmask8) -1,
854 __R);
855}
856
Adam Nemet63a951e2015-01-14 01:31:17 +0000857/* Unpack and Interleave */
858static __inline __m512d __attribute__((__always_inline__, __nodebug__))
859_mm512_unpackhi_pd(__m512d __a, __m512d __b)
860{
861 return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
862}
863
864static __inline __m512d __attribute__((__always_inline__, __nodebug__))
865_mm512_unpacklo_pd(__m512d __a, __m512d __b)
866{
867 return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
868}
869
870static __inline __m512 __attribute__((__always_inline__, __nodebug__))
871_mm512_unpackhi_ps(__m512 __a, __m512 __b)
872{
873 return __builtin_shufflevector(__a, __b,
874 2, 18, 3, 19,
875 2+4, 18+4, 3+4, 19+4,
876 2+8, 18+8, 3+8, 19+8,
877 2+12, 18+12, 3+12, 19+12);
878}
879
880static __inline __m512 __attribute__((__always_inline__, __nodebug__))
881_mm512_unpacklo_ps(__m512 __a, __m512 __b)
882{
883 return __builtin_shufflevector(__a, __b,
884 0, 16, 1, 17,
885 0+4, 16+4, 1+4, 17+4,
886 0+8, 16+8, 1+8, 17+8,
887 0+12, 16+12, 1+12, 17+12);
888}
889
Adam Nemet0d5bb552014-07-28 17:14:40 +0000890/* Bit Test */
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000891
892static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000893_mm512_test_epi32_mask(__m512i __A, __m512i __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000894{
895 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
896 (__v16si) __B,
897 (__mmask16) -1);
898}
899
900static __inline __mmask8 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000901_mm512_test_epi64_mask(__m512i __A, __m512i __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000902{
903 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
904 (__v8di) __B,
905 (__mmask8) -1);
906}
907
Adam Nemet0d5bb552014-07-28 17:14:40 +0000908/* SIMD load ops */
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000909
910static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000911_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000912{
913 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P,
914 (__v16si)
915 _mm512_setzero_si512 (),
916 (__mmask16) __U);
917}
918
919static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000920_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000921{
922 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P,
923 (__v8di)
924 _mm512_setzero_si512 (),
925 (__mmask8) __U);
926}
927
928static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000929_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000930{
931 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P,
932 (__v16sf)
933 _mm512_setzero_ps (),
934 (__mmask16) __U);
935}
936
937static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000938_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000939{
940 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P,
941 (__v8df)
942 _mm512_setzero_pd (),
943 (__mmask8) __U);
944}
945
Adam Nemetc0cff242015-01-16 18:51:50 +0000946static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
947_mm512_maskz_load_ps(__mmask16 __U, void const *__P)
948{
949 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
950 (__v16sf)
951 _mm512_setzero_ps (),
952 (__mmask16) __U);
953}
954
955static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
956_mm512_maskz_load_pd(__mmask8 __U, void const *__P)
957{
958 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
959 (__v8df)
960 _mm512_setzero_pd (),
961 (__mmask8) __U);
962}
963
Adam Nemetda82bcc2014-07-31 04:00:39 +0000964static __inline __m512d __attribute__((__always_inline__, __nodebug__))
965_mm512_loadu_pd(double const *__p)
966{
967 struct __loadu_pd {
968 __m512d __v;
969 } __attribute__((packed, may_alias));
970 return ((struct __loadu_pd*)__p)->__v;
971}
972
973static __inline __m512 __attribute__((__always_inline__, __nodebug__))
974_mm512_loadu_ps(float const *__p)
975{
976 struct __loadu_ps {
977 __m512 __v;
978 } __attribute__((packed, may_alias));
979 return ((struct __loadu_ps*)__p)->__v;
980}
981
Adam Nemetc0cff242015-01-16 18:51:50 +0000982static __inline __m512 __attribute__((__always_inline__, __nodebug__))
983_mm512_load_ps(double const *__p)
984{
985 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
986 (__v16sf)
987 _mm512_setzero_ps (),
988 (__mmask16) -1);
989}
990
991static __inline __m512d __attribute__((__always_inline__, __nodebug__))
992_mm512_load_pd(float const *__p)
993{
994 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
995 (__v8df)
996 _mm512_setzero_pd (),
997 (__mmask8) -1);
998}
999
Adam Nemet0d5bb552014-07-28 17:14:40 +00001000/* SIMD store ops */
1001
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001002static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +00001003_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001004{
1005 __builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A,
1006 (__mmask8) __U);
1007}
1008
1009static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +00001010_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001011{
1012 __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A,
1013 (__mmask16) __U);
1014}
1015
1016static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +00001017_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001018{
1019 __builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
1020}
1021
1022static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemetfce1ad02014-07-28 17:14:45 +00001023_mm512_storeu_pd(void *__P, __m512d __A)
1024{
1025 __builtin_ia32_storeupd512_mask((__v8df *)__P, (__v8df)__A, (__mmask8)-1);
1026}
1027
1028static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +00001029_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001030{
1031 __builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A,
1032 (__mmask16) __U);
1033}
1034
Adam Nemetfce1ad02014-07-28 17:14:45 +00001035static __inline void __attribute__ ((__always_inline__, __nodebug__))
1036_mm512_storeu_ps(void *__P, __m512 __A)
1037{
1038 __builtin_ia32_storeups512_mask((__v16sf *)__P, (__v16sf)__A, (__mmask16)-1);
1039}
1040
1041static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemetc0cff242015-01-16 18:51:50 +00001042_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
Adam Nemetfce1ad02014-07-28 17:14:45 +00001043{
Adam Nemetc0cff242015-01-16 18:51:50 +00001044 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
Adam Nemetfce1ad02014-07-28 17:14:45 +00001045}
1046
1047static __inline void __attribute__ ((__always_inline__, __nodebug__))
1048_mm512_store_pd(void *__P, __m512d __A)
1049{
1050 *(__m512d*)__P = __A;
1051}
1052
Adam Nemetc0cff242015-01-16 18:51:50 +00001053static __inline void __attribute__ ((__always_inline__, __nodebug__))
1054_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
1055{
1056 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
1057 (__mmask16) __U);
1058}
1059
1060static __inline void __attribute__ ((__always_inline__, __nodebug__))
1061_mm512_store_ps(void *__P, __m512 __A)
1062{
1063 *(__m512*)__P = __A;
1064}
1065
Adam Nemet2db1d2f2014-07-30 16:51:27 +00001066/* Mask ops */
1067
1068static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
1069_mm512_knot(__mmask16 __M)
1070{
1071 return __builtin_ia32_knothi(__M);
1072}
1073
Robert Khasanovb9f3a912014-10-08 17:18:13 +00001074/* Integer compare */
1075
1076static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1077_mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
1078 return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
1079 (__mmask16)-1);
1080}
1081
1082static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1083_mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
1084 return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
1085 __u);
1086}
1087
Craig Topper4cac1c22015-01-25 23:30:07 +00001088static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1089_mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
1090 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
1091 (__mmask16)-1);
1092}
1093
1094static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1095_mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
1096 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
1097 __u);
1098}
1099
Robert Khasanovb9f3a912014-10-08 17:18:13 +00001100static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1101_mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
1102 return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
1103 __u);
1104}
1105
1106static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1107_mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
1108 return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
1109 (__mmask8)-1);
1110}
1111
Craig Topper4cac1c22015-01-25 23:30:07 +00001112static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1113_mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) {
1114 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
1115 (__mmask8)-1);
1116}
1117
1118static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1119_mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
1120 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
1121 __u);
1122}
1123
1124static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1125_mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) {
1126 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
1127 (__mmask16)-1);
1128}
1129
1130static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1131_mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
1132 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
1133 __u);
1134}
1135
1136static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1137_mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) {
1138 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
1139 (__mmask16)-1);
1140}
1141
1142static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1143_mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
1144 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
1145 __u);
1146}
1147
1148static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1149_mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) {
1150 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
1151 (__mmask8)-1);
1152}
1153
1154static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1155_mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
1156 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
1157 __u);
1158}
1159
1160static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1161_mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) {
1162 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
1163 (__mmask8)-1);
1164}
1165
1166static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1167_mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
1168 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
1169 __u);
1170}
1171
1172static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1173_mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) {
1174 return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
1175 (__mmask16)-1);
1176}
1177
1178static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1179_mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
1180 return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
1181 __u);
1182}
1183
1184static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1185_mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) {
1186 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
1187 (__mmask16)-1);
1188}
1189
1190static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1191_mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
1192 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
1193 __u);
1194}
1195
1196static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1197_mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
1198 return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
1199 __u);
1200}
1201
1202static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1203_mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) {
1204 return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
1205 (__mmask8)-1);
1206}
1207
1208static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1209_mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) {
1210 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
1211 (__mmask8)-1);
1212}
1213
1214static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1215_mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
1216 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
1217 __u);
1218}
1219
1220static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1221_mm512_cmple_epi32_mask(__m512i __a, __m512i __b) {
1222 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
1223 (__mmask16)-1);
1224}
1225
1226static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1227_mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
1228 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
1229 __u);
1230}
1231
1232static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1233_mm512_cmple_epu32_mask(__m512i __a, __m512i __b) {
1234 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
1235 (__mmask16)-1);
1236}
1237
1238static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1239_mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
1240 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
1241 __u);
1242}
1243
1244static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1245_mm512_cmple_epi64_mask(__m512i __a, __m512i __b) {
1246 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
1247 (__mmask8)-1);
1248}
1249
1250static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1251_mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
1252 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
1253 __u);
1254}
1255
1256static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1257_mm512_cmple_epu64_mask(__m512i __a, __m512i __b) {
1258 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
1259 (__mmask8)-1);
1260}
1261
1262static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1263_mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
1264 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
1265 __u);
1266}
1267
1268static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1269_mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) {
1270 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
1271 (__mmask16)-1);
1272}
1273
1274static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1275_mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
1276 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
1277 __u);
1278}
1279
1280static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1281_mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) {
1282 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
1283 (__mmask16)-1);
1284}
1285
1286static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1287_mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
1288 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
1289 __u);
1290}
1291
1292static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1293_mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) {
1294 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
1295 (__mmask8)-1);
1296}
1297
1298static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1299_mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
1300 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
1301 __u);
1302}
1303
1304static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1305_mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) {
1306 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
1307 (__mmask8)-1);
1308}
1309
1310static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1311_mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
1312 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
1313 __u);
1314}
1315
1316static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1317_mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) {
1318 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
1319 (__mmask16)-1);
1320}
1321
1322static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1323_mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
1324 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
1325 __u);
1326}
1327
1328static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1329_mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) {
1330 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
1331 (__mmask16)-1);
1332}
1333
1334static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1335_mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
1336 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
1337 __u);
1338}
1339
1340static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1341_mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) {
1342 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
1343 (__mmask8)-1);
1344}
1345
1346static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1347_mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
1348 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
1349 __u);
1350}
1351
1352static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1353_mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) {
1354 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
1355 (__mmask8)-1);
1356}
1357
1358static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1359_mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
1360 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
1361 __u);
1362}
1363
1364#define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
1365 __m512i __a = (a); \
1366 __m512i __b = (b); \
1367 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
1368 (__mmask16)-1); })
1369
1370#define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
1371 __m512i __a = (a); \
1372 __m512i __b = (b); \
1373 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
1374 (__mmask16)-1); })
1375
1376#define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
1377 __m512i __a = (a); \
1378 __m512i __b = (b); \
1379 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
1380 (__mmask8)-1); })
1381
1382#define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
1383 __m512i __a = (a); \
1384 __m512i __b = (b); \
1385 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
1386 (__mmask8)-1); })
1387
1388#define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
1389 __m512i __a = (a); \
1390 __m512i __b = (b); \
1391 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
1392 (__mmask16)(m)); })
1393
1394#define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
1395 __m512i __a = (a); \
1396 __m512i __b = (b); \
1397 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
1398 (__mmask16)(m)); })
1399
1400#define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
1401 __m512i __a = (a); \
1402 __m512i __b = (b); \
1403 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
1404 (__mmask8)(m)); })
1405
1406#define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
1407 __m512i __a = (a); \
1408 __m512i __b = (b); \
1409 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
1410 (__mmask8)(m)); })
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001411#endif // __AVX512FINTRIN_H