blob: 8aa6cecf9d37939eed499756be40325177a9a3f3 [file] [log] [blame]
Adam Nemet9a3ea602014-07-28 17:14:38 +00001/*===---- avx512fintrin.h - AVX2 intrinsics --------------------------------===
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00002 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23#ifndef __IMMINTRIN_H
24#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
25#endif
26
27#ifndef __AVX512FINTRIN_H
28#define __AVX512FINTRIN_H
29
30typedef double __v8df __attribute__((__vector_size__(64)));
31typedef float __v16sf __attribute__((__vector_size__(64)));
32typedef long long __v8di __attribute__((__vector_size__(64)));
33typedef int __v16si __attribute__((__vector_size__(64)));
34
35typedef float __m512 __attribute__((__vector_size__(64)));
36typedef double __m512d __attribute__((__vector_size__(64)));
37typedef long long __m512i __attribute__((__vector_size__(64)));
38
39typedef unsigned char __mmask8;
40typedef unsigned short __mmask16;
41
42/* Rounding mode macros. */
43#define _MM_FROUND_TO_NEAREST_INT 0x00
44#define _MM_FROUND_TO_NEG_INF 0x01
45#define _MM_FROUND_TO_POS_INF 0x02
46#define _MM_FROUND_TO_ZERO 0x03
47#define _MM_FROUND_CUR_DIRECTION 0x04
48
Adam Nemet0d5bb552014-07-28 17:14:40 +000049/* Create vectors with repeated elements */
50
51static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
52_mm512_setzero_si512(void)
53{
54 return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
55}
56
57static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
58_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
59{
60 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
61 (__v16si)
62 _mm512_setzero_si512 (),
63 __M);
64}
65
66static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
67_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
68{
69#ifdef __x86_64__
70 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
71 (__v8di)
72 _mm512_setzero_si512 (),
73 __M);
74#else
75 return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
76 (__v8di)
77 _mm512_setzero_si512 (),
78 __M);
79#endif
80}
81
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000082static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +000083_mm512_setzero_ps(void)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000084{
85 return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
86 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
87}
88static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +000089_mm512_setzero_pd(void)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000090{
91 return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
92}
Adam Nemet0d5bb552014-07-28 17:14:40 +000093
Adam Nemetf42e7a22014-07-30 16:51:22 +000094static __inline __m512 __attribute__((__always_inline__, __nodebug__))
95_mm512_set1_ps(float __w)
96{
97 return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
98 __w, __w, __w, __w, __w, __w, __w, __w };
99}
100
101static __inline __m512d __attribute__((__always_inline__, __nodebug__))
102_mm512_set1_pd(double __w)
103{
104 return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
105}
106
107static __inline __m512i __attribute__((__always_inline__, __nodebug__))
108_mm512_set1_epi32(int __s)
109{
110 return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
111 __s, __s, __s, __s, __s, __s, __s, __s };
112}
113
114static __inline __m512i __attribute__((__always_inline__, __nodebug__))
115_mm512_set1_epi64(long long __d)
116{
117 return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
118}
119
Adam Nemet4abc07c2014-08-13 00:29:01 +0000120static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
121_mm512_broadcastss_ps(__m128 __X)
122{
123 float __f = __X[0];
124 return (__v16sf){ __f, __f, __f, __f,
125 __f, __f, __f, __f,
126 __f, __f, __f, __f,
127 __f, __f, __f, __f };
128}
129
130static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
131_mm512_broadcastsd_pd(__m128d __X)
132{
133 double __d = __X[0];
134 return (__v8df){ __d, __d, __d, __d,
135 __d, __d, __d, __d };
136}
137
Adam Nemetc871ff92014-07-30 16:51:24 +0000138/* Cast between vector types */
139
140static __inline __m512d __attribute__((__always_inline__, __nodebug__))
141_mm512_castpd256_pd512(__m256d __a)
142{
143 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
144}
145
146static __inline __m512 __attribute__((__always_inline__, __nodebug__))
147_mm512_castps256_ps512(__m256 __a)
148{
149 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7,
150 -1, -1, -1, -1, -1, -1, -1, -1);
151}
152
153static __inline __m128d __attribute__((__always_inline__, __nodebug__))
154_mm512_castpd512_pd128(__m512d __a)
155{
156 return __builtin_shufflevector(__a, __a, 0, 1);
157}
158
159static __inline __m128 __attribute__((__always_inline__, __nodebug__))
160_mm512_castps512_ps128(__m512 __a)
161{
162 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
163}
164
Adam Nemet0d5bb552014-07-28 17:14:40 +0000165/* Arithmetic */
166
Adam Nemeta3ebe622014-07-28 17:14:42 +0000167static __inline __m512d __attribute__((__always_inline__, __nodebug__))
168_mm512_add_pd(__m512d __a, __m512d __b)
169{
170 return __a + __b;
171}
172
173static __inline __m512 __attribute__((__always_inline__, __nodebug__))
174_mm512_add_ps(__m512 __a, __m512 __b)
175{
176 return __a + __b;
177}
178
179static __inline __m512d __attribute__((__always_inline__, __nodebug__))
180_mm512_mul_pd(__m512d __a, __m512d __b)
181{
182 return __a * __b;
183}
184
185static __inline __m512 __attribute__((__always_inline__, __nodebug__))
186_mm512_mul_ps(__m512 __a, __m512 __b)
187{
188 return __a * __b;
189}
190
191static __inline __m512d __attribute__((__always_inline__, __nodebug__))
192_mm512_sub_pd(__m512d __a, __m512d __b)
193{
194 return __a - __b;
195}
196
197static __inline __m512 __attribute__((__always_inline__, __nodebug__))
198_mm512_sub_ps(__m512 __a, __m512 __b)
199{
200 return __a - __b;
201}
202
Adam Nemet0d5bb552014-07-28 17:14:40 +0000203static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
204_mm512_max_pd(__m512d __A, __m512d __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000205{
Adam Nemet0d5bb552014-07-28 17:14:40 +0000206 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
207 (__v8df) __B,
208 (__v8df)
209 _mm512_setzero_pd (),
210 (__mmask8) -1,
211 _MM_FROUND_CUR_DIRECTION);
212}
213
214static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
215_mm512_max_ps(__m512 __A, __m512 __B)
216{
217 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
218 (__v16sf) __B,
219 (__v16sf)
220 _mm512_setzero_ps (),
221 (__mmask16) -1,
222 _MM_FROUND_CUR_DIRECTION);
223}
224
225static __inline __m512i
226__attribute__ ((__always_inline__, __nodebug__))
227_mm512_max_epi32(__m512i __A, __m512i __B)
228{
229 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
230 (__v16si) __B,
231 (__v16si)
232 _mm512_setzero_si512 (),
233 (__mmask16) -1);
234}
235
236static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
237_mm512_max_epu32(__m512i __A, __m512i __B)
238{
239 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
240 (__v16si) __B,
241 (__v16si)
242 _mm512_setzero_si512 (),
243 (__mmask16) -1);
244}
245
246static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
247_mm512_max_epi64(__m512i __A, __m512i __B)
248{
249 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
250 (__v8di) __B,
251 (__v8di)
252 _mm512_setzero_si512 (),
253 (__mmask8) -1);
254}
255
256static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
257_mm512_max_epu64(__m512i __A, __m512i __B)
258{
259 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
260 (__v8di) __B,
261 (__v8di)
262 _mm512_setzero_si512 (),
263 (__mmask8) -1);
264}
265
266static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
267_mm512_min_pd(__m512d __A, __m512d __B)
268{
269 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
270 (__v8df) __B,
271 (__v8df)
272 _mm512_setzero_pd (),
273 (__mmask8) -1,
274 _MM_FROUND_CUR_DIRECTION);
275}
276
277static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
278_mm512_min_ps(__m512 __A, __m512 __B)
279{
280 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
281 (__v16sf) __B,
282 (__v16sf)
283 _mm512_setzero_ps (),
284 (__mmask16) -1,
285 _MM_FROUND_CUR_DIRECTION);
286}
287
288static __inline __m512i
289__attribute__ ((__always_inline__, __nodebug__))
290_mm512_min_epi32(__m512i __A, __m512i __B)
291{
292 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
293 (__v16si) __B,
294 (__v16si)
295 _mm512_setzero_si512 (),
296 (__mmask16) -1);
297}
298
299static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
300_mm512_min_epu32(__m512i __A, __m512i __B)
301{
302 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
303 (__v16si) __B,
304 (__v16si)
305 _mm512_setzero_si512 (),
306 (__mmask16) -1);
307}
308
309static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
310_mm512_min_epi64(__m512i __A, __m512i __B)
311{
312 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
313 (__v8di) __B,
314 (__v8di)
315 _mm512_setzero_si512 (),
316 (__mmask8) -1);
317}
318
319static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
320_mm512_min_epu64(__m512i __A, __m512i __B)
321{
322 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
323 (__v8di) __B,
324 (__v8di)
325 _mm512_setzero_si512 (),
326 (__mmask8) -1);
327}
328
329static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
330_mm512_mul_epi32(__m512i __X, __m512i __Y)
331{
332 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
333 (__v16si) __Y,
334 (__v8di)
335 _mm512_setzero_si512 (),
336 (__mmask8) -1);
337}
338
339static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
340_mm512_mul_epu32(__m512i __X, __m512i __Y)
341{
342 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
343 (__v16si) __Y,
344 (__v8di)
345 _mm512_setzero_si512 (),
346 (__mmask8) -1);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000347}
348
349static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
350_mm512_sqrt_pd(__m512d a)
351{
352 return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)a,
353 (__v8df) _mm512_setzero_pd (),
354 (__mmask8) -1,
355 _MM_FROUND_CUR_DIRECTION);
356}
357
358static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
359_mm512_sqrt_ps(__m512 a)
360{
361 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)a,
362 (__v16sf) _mm512_setzero_ps (),
363 (__mmask16) -1,
364 _MM_FROUND_CUR_DIRECTION);
365}
366
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000367static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
368_mm512_rsqrt14_pd(__m512d __A)
369{
370 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
371 (__v8df)
372 _mm512_setzero_pd (),
373 (__mmask8) -1);}
374
375static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
376_mm512_rsqrt14_ps(__m512 __A)
377{
378 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
379 (__v16sf)
380 _mm512_setzero_ps (),
381 (__mmask16) -1);
382}
383
384static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
385_mm_rsqrt14_ss(__m128 __A, __m128 __B)
386{
387 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
388 (__v4sf) __B,
389 (__v4sf)
390 _mm_setzero_ps (),
391 (__mmask8) -1);
392}
393
394static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
395_mm_rsqrt14_sd(__m128d __A, __m128d __B)
396{
397 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
398 (__v2df) __B,
399 (__v2df)
400 _mm_setzero_pd (),
401 (__mmask8) -1);
402}
403
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000404static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
405_mm512_rcp14_pd(__m512d __A)
406{
407 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
408 (__v8df)
409 _mm512_setzero_pd (),
410 (__mmask8) -1);
411}
412
413static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
414_mm512_rcp14_ps(__m512 __A)
415{
416 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
417 (__v16sf)
418 _mm512_setzero_ps (),
419 (__mmask16) -1);
420}
421static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000422_mm_rcp14_ss(__m128 __A, __m128 __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000423{
424 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
425 (__v4sf) __B,
426 (__v4sf)
427 _mm_setzero_ps (),
428 (__mmask8) -1);
429}
430
431static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000432_mm_rcp14_sd(__m128d __A, __m128d __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000433{
434 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
435 (__v2df) __B,
436 (__v2df)
437 _mm_setzero_pd (),
438 (__mmask8) -1);
439}
440
Adam Nemet0d5bb552014-07-28 17:14:40 +0000441static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
442_mm512_floor_ps(__m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000443{
Adam Nemet0d5bb552014-07-28 17:14:40 +0000444 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
445 _MM_FROUND_FLOOR,
446 (__v16sf) __A, -1,
447 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000448}
449
Adam Nemet0d5bb552014-07-28 17:14:40 +0000450static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
451_mm512_floor_pd(__m512d __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000452{
Adam Nemet0d5bb552014-07-28 17:14:40 +0000453 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
454 _MM_FROUND_FLOOR,
455 (__v8df) __A, -1,
456 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000457}
458
Adam Nemet0d5bb552014-07-28 17:14:40 +0000459static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
460_mm512_ceil_ps(__m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000461{
Adam Nemet0d5bb552014-07-28 17:14:40 +0000462 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
463 _MM_FROUND_CEIL,
464 (__v16sf) __A, -1,
465 _MM_FROUND_CUR_DIRECTION);
466}
467
468static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
469_mm512_ceil_pd(__m512d __A)
470{
471 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
472 _MM_FROUND_CEIL,
473 (__v8df) __A, -1,
474 _MM_FROUND_CUR_DIRECTION);
475}
476
477static __inline __m512i __attribute__ (( __always_inline__, __nodebug__))
478_mm512_abs_epi64(__m512i __A)
479{
480 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
481 (__v8di)
482 _mm512_setzero_si512 (),
483 (__mmask8) -1);
484}
485
486static __inline __m512i __attribute__ (( __always_inline__, __nodebug__))
487_mm512_abs_epi32(__m512i __A)
488{
489 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
490 (__v16si)
491 _mm512_setzero_si512 (),
492 (__mmask16) -1);
493}
494
Craig Topper72c7d512015-02-01 07:35:35 +0000495#define _mm512_roundscale_ps(A, B) __extension__ ({ \
496 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(A), (B), (__v16sf)(A), \
497 -1, _MM_FROUND_CUR_DIRECTION); })
498
499#define _mm512_roundscale_pd(A, B) __extension__ ({ \
500 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(A), (B), (__v8df)(A), \
501 -1, _MM_FROUND_CUR_DIRECTION); })
Adam Nemet0d5bb552014-07-28 17:14:40 +0000502
Adam Nemet2278fcb2014-08-14 17:17:57 +0000503static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
504_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
505{
506 return (__m512d)
507 __builtin_ia32_vfmaddpd512_mask(__A,
508 __B,
509 __C,
510 (__mmask8) -1,
511 _MM_FROUND_CUR_DIRECTION);
512}
513
514static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
515_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
516{
517 return (__m512d)
518 __builtin_ia32_vfmsubpd512_mask(__A,
519 __B,
520 __C,
521 (__mmask8) -1,
522 _MM_FROUND_CUR_DIRECTION);
523}
524
525static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
526_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
527{
528 return (__m512d)
529 __builtin_ia32_vfnmaddpd512_mask(__A,
530 __B,
531 __C,
532 (__mmask8) -1,
533 _MM_FROUND_CUR_DIRECTION);
534}
535
536static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
537_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
538{
539 return (__m512)
540 __builtin_ia32_vfmaddps512_mask(__A,
541 __B,
542 __C,
543 (__mmask16) -1,
544 _MM_FROUND_CUR_DIRECTION);
545}
546
547static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
548_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
549{
550 return (__m512)
551 __builtin_ia32_vfmsubps512_mask(__A,
552 __B,
553 __C,
554 (__mmask16) -1,
555 _MM_FROUND_CUR_DIRECTION);
556}
557
558static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
559_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
560{
561 return (__m512)
562 __builtin_ia32_vfnmaddps512_mask(__A,
563 __B,
564 __C,
565 (__mmask16) -1,
566 _MM_FROUND_CUR_DIRECTION);
567}
568
Adam Nemet0d5bb552014-07-28 17:14:40 +0000569/* Vector permutations */
570
571static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
572_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
573{
574 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
575 /* idx */ ,
576 (__v16si) __A,
577 (__v16si) __B,
578 (__mmask16) -1);
579}
580static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
581_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
582{
583 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
584 /* idx */ ,
585 (__v8di) __A,
586 (__v8di) __B,
587 (__mmask8) -1);
588}
589
590static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
591_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
592{
593 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
594 /* idx */ ,
595 (__v8df) __A,
596 (__v8df) __B,
597 (__mmask8) -1);
598}
599static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
600_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
601{
602 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
603 /* idx */ ,
604 (__v16sf) __A,
605 (__v16sf) __B,
606 (__mmask16) -1);
607}
608
Adam Nemet5bf7baa2014-08-05 17:28:23 +0000609static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
610_mm512_valign_epi64(__m512i __A, __m512i __B, const int __I)
611{
612 return (__m512i) __builtin_ia32_alignq512_mask((__v8di)__A,
613 (__v8di)__B,
614 __I,
615 (__v8di)_mm512_setzero_si512(),
616 (__mmask8) -1);
617}
618
619static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
620_mm512_valign_epi32(__m512i __A, __m512i __B, const int __I)
621{
622 return (__m512i)__builtin_ia32_alignd512_mask((__v16si)__A,
623 (__v16si)__B,
624 __I,
625 (__v16si)_mm512_setzero_si512(),
626 (__mmask16) -1);
627}
628
Adam Nemetf893ede2015-01-19 20:12:05 +0000629/* Vector Extract */
630
631#define _mm512_extractf64x4_pd(A, I) __extension__ ({ \
632 __m512d __A = (A); \
633 (__m256d) \
634 __builtin_ia32_extractf64x4_mask((__v8df)__A, \
635 (I), \
636 (__v4df)_mm256_setzero_si256(), \
637 (__mmask8) -1); })
638
639#define _mm512_extractf32x4_ps(A, I) __extension__ ({ \
640 __m512 __A = (A); \
641 (__m128) \
642 __builtin_ia32_extractf32x4_mask((__v16sf)__A, \
643 (I), \
644 (__v4sf)_mm_setzero_ps(), \
645 (__mmask8) -1); })
646
Adam Nemet0d5bb552014-07-28 17:14:40 +0000647/* Vector Blend */
648
649static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
650_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
651{
652 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
653 (__v8df) __W,
654 (__mmask8) __U);
655}
656
657static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
658_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
659{
660 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
661 (__v16sf) __W,
662 (__mmask16) __U);
663}
664
665static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
666_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
667{
668 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
669 (__v8di) __W,
670 (__mmask8) __U);
671}
672
673static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
674_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
675{
676 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
677 (__v16si) __W,
678 (__mmask16) __U);
679}
680
681/* Compare */
682
Craig Topperf557b092015-01-19 01:18:19 +0000683#define _mm512_cmp_ps_mask(a, b, p) __extension__ ({ \
684 __m512 __a = (a); \
685 __m512 __b = (b); \
686 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)__a, (__v16sf)__b, (p), \
687 (__mmask16)-1, \
688 _MM_FROUND_CUR_DIRECTION); })
Adam Nemet0d5bb552014-07-28 17:14:40 +0000689
Craig Topperf557b092015-01-19 01:18:19 +0000690#define _mm512_cmp_pd_mask(a, b, p) __extension__ ({ \
691 __m512 __a = (a); \
692 __m512 __b = (b); \
693 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)__a, (__v8df)__b, (p), \
694 (__mmask8)-1, \
695 _MM_FROUND_CUR_DIRECTION); })
Adam Nemet0d5bb552014-07-28 17:14:40 +0000696
697/* Conversion */
698
699static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
700_mm512_cvttps_epu32(__m512 __A)
701{
702 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
703 (__v16si)
704 _mm512_setzero_si512 (),
705 (__mmask16) -1,
706 _MM_FROUND_CUR_DIRECTION);
707}
708
Craig Topper72c7d512015-02-01 07:35:35 +0000709#define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \
710 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), \
711 (__v16sf)_mm512_setzero_ps(), \
712 (__mmask16)-1, (R)); })
Adam Nemet0d5bb552014-07-28 17:14:40 +0000713
Craig Topper72c7d512015-02-01 07:35:35 +0000714#define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
715 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), \
716 (__v16sf)_mm512_setzero_ps(), \
717 (__mmask16)-1, (R)); })
Adam Nemet0d5bb552014-07-28 17:14:40 +0000718
719static __inline __m512d __attribute__ (( __always_inline__, __nodebug__))
720_mm512_cvtepi32_pd(__m256i __A)
721{
722 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
723 (__v8df)
724 _mm512_setzero_pd (),
725 (__mmask8) -1);
726}
727
728static __inline __m512d __attribute__ (( __always_inline__, __nodebug__))
729_mm512_cvtepu32_pd(__m256i __A)
730{
731 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
732 (__v8df)
733 _mm512_setzero_pd (),
734 (__mmask8) -1);
735}
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000736
Craig Topper72c7d512015-02-01 07:35:35 +0000737#define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \
738 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(A), \
739 (__v8sf)_mm256_setzero_ps(), \
740 (__mmask8)-1, (R)); })
741
742#define _mm512_cvtps_ph(A, I) __extension__ ({ \
743 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(A), (I), \
744 (__v16hi)_mm256_setzero_si256(), \
745 -1); })
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000746
747static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000748_mm512_cvtph_ps(__m256i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000749{
750 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
751 (__v16sf)
752 _mm512_setzero_ps (),
753 (__mmask16) -1,
754 _MM_FROUND_CUR_DIRECTION);
755}
756
757static __inline __m512i __attribute__((__always_inline__, __nodebug__))
758_mm512_cvttps_epi32(__m512 a)
759{
760 return (__m512i)
761 __builtin_ia32_cvttps2dq512_mask((__v16sf) a,
762 (__v16si) _mm512_setzero_si512 (),
763 (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
764}
765
766static __inline __m256i __attribute__((__always_inline__, __nodebug__))
767_mm512_cvttpd_epi32(__m512d a)
768{
769 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) a,
770 (__v8si)_mm256_setzero_si256(),
771 (__mmask8) -1,
772 _MM_FROUND_CUR_DIRECTION);
773}
774
Craig Topper72c7d512015-02-01 07:35:35 +0000775#define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \
776 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(A), \
777 (__v8si)_mm256_setzero_si256(), \
778 (__mmask8)-1, (R)); })
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000779
Craig Topper72c7d512015-02-01 07:35:35 +0000780#define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \
781 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(A), \
782 (__v16si)_mm512_setzero_si512(), \
783 (__mmask16)-1, (R)); })
784
785#define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \
786 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(A), \
787 (__v16si)_mm512_setzero_si512(), \
788 (__mmask16)-1, (R)); })
789
790#define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \
791 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(A), \
792 (__v8si)_mm256_setzero_si256(), \
793 (__mmask8)-1, (R)); })
794
795#define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \
796 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(A), \
797 (__v16si)_mm512_setzero_si512(), \
798 (__mmask16)-1, (R)); })
799
800#define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \
801 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(A), \
802 (__v8si)_mm256_setzero_si256(), \
803 (__mmask8) -1, (R)); })
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000804
Adam Nemet63a951e2015-01-14 01:31:17 +0000805/* Unpack and Interleave */
806static __inline __m512d __attribute__((__always_inline__, __nodebug__))
807_mm512_unpackhi_pd(__m512d __a, __m512d __b)
808{
809 return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
810}
811
812static __inline __m512d __attribute__((__always_inline__, __nodebug__))
813_mm512_unpacklo_pd(__m512d __a, __m512d __b)
814{
815 return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
816}
817
818static __inline __m512 __attribute__((__always_inline__, __nodebug__))
819_mm512_unpackhi_ps(__m512 __a, __m512 __b)
820{
821 return __builtin_shufflevector(__a, __b,
822 2, 18, 3, 19,
823 2+4, 18+4, 3+4, 19+4,
824 2+8, 18+8, 3+8, 19+8,
825 2+12, 18+12, 3+12, 19+12);
826}
827
828static __inline __m512 __attribute__((__always_inline__, __nodebug__))
829_mm512_unpacklo_ps(__m512 __a, __m512 __b)
830{
831 return __builtin_shufflevector(__a, __b,
832 0, 16, 1, 17,
833 0+4, 16+4, 1+4, 17+4,
834 0+8, 16+8, 1+8, 17+8,
835 0+12, 16+12, 1+12, 17+12);
836}
837
Adam Nemet0d5bb552014-07-28 17:14:40 +0000838/* Bit Test */
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000839
840static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000841_mm512_test_epi32_mask(__m512i __A, __m512i __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000842{
843 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
844 (__v16si) __B,
845 (__mmask16) -1);
846}
847
848static __inline __mmask8 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000849_mm512_test_epi64_mask(__m512i __A, __m512i __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000850{
851 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
852 (__v8di) __B,
853 (__mmask8) -1);
854}
855
Adam Nemet0d5bb552014-07-28 17:14:40 +0000856/* SIMD load ops */
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000857
858static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000859_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000860{
861 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P,
862 (__v16si)
863 _mm512_setzero_si512 (),
864 (__mmask16) __U);
865}
866
867static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000868_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000869{
870 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P,
871 (__v8di)
872 _mm512_setzero_si512 (),
873 (__mmask8) __U);
874}
875
876static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000877_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000878{
879 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P,
880 (__v16sf)
881 _mm512_setzero_ps (),
882 (__mmask16) __U);
883}
884
885static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000886_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000887{
888 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P,
889 (__v8df)
890 _mm512_setzero_pd (),
891 (__mmask8) __U);
892}
893
Adam Nemetc0cff242015-01-16 18:51:50 +0000894static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
895_mm512_maskz_load_ps(__mmask16 __U, void const *__P)
896{
897 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
898 (__v16sf)
899 _mm512_setzero_ps (),
900 (__mmask16) __U);
901}
902
903static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
904_mm512_maskz_load_pd(__mmask8 __U, void const *__P)
905{
906 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
907 (__v8df)
908 _mm512_setzero_pd (),
909 (__mmask8) __U);
910}
911
Adam Nemetda82bcc2014-07-31 04:00:39 +0000912static __inline __m512d __attribute__((__always_inline__, __nodebug__))
913_mm512_loadu_pd(double const *__p)
914{
915 struct __loadu_pd {
916 __m512d __v;
917 } __attribute__((packed, may_alias));
918 return ((struct __loadu_pd*)__p)->__v;
919}
920
921static __inline __m512 __attribute__((__always_inline__, __nodebug__))
922_mm512_loadu_ps(float const *__p)
923{
924 struct __loadu_ps {
925 __m512 __v;
926 } __attribute__((packed, may_alias));
927 return ((struct __loadu_ps*)__p)->__v;
928}
929
Adam Nemetc0cff242015-01-16 18:51:50 +0000930static __inline __m512 __attribute__((__always_inline__, __nodebug__))
931_mm512_load_ps(double const *__p)
932{
933 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
934 (__v16sf)
935 _mm512_setzero_ps (),
936 (__mmask16) -1);
937}
938
939static __inline __m512d __attribute__((__always_inline__, __nodebug__))
940_mm512_load_pd(float const *__p)
941{
942 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
943 (__v8df)
944 _mm512_setzero_pd (),
945 (__mmask8) -1);
946}
947
Adam Nemet0d5bb552014-07-28 17:14:40 +0000948/* SIMD store ops */
949
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000950static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000951_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000952{
953 __builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A,
954 (__mmask8) __U);
955}
956
957static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000958_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000959{
960 __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A,
961 (__mmask16) __U);
962}
963
964static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000965_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000966{
967 __builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
968}
969
970static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemetfce1ad02014-07-28 17:14:45 +0000971_mm512_storeu_pd(void *__P, __m512d __A)
972{
973 __builtin_ia32_storeupd512_mask((__v8df *)__P, (__v8df)__A, (__mmask8)-1);
974}
975
976static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000977_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000978{
979 __builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A,
980 (__mmask16) __U);
981}
982
Adam Nemetfce1ad02014-07-28 17:14:45 +0000983static __inline void __attribute__ ((__always_inline__, __nodebug__))
984_mm512_storeu_ps(void *__P, __m512 __A)
985{
986 __builtin_ia32_storeups512_mask((__v16sf *)__P, (__v16sf)__A, (__mmask16)-1);
987}
988
989static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemetc0cff242015-01-16 18:51:50 +0000990_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
Adam Nemetfce1ad02014-07-28 17:14:45 +0000991{
Adam Nemetc0cff242015-01-16 18:51:50 +0000992 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
Adam Nemetfce1ad02014-07-28 17:14:45 +0000993}
994
995static __inline void __attribute__ ((__always_inline__, __nodebug__))
996_mm512_store_pd(void *__P, __m512d __A)
997{
998 *(__m512d*)__P = __A;
999}
1000
Adam Nemetc0cff242015-01-16 18:51:50 +00001001static __inline void __attribute__ ((__always_inline__, __nodebug__))
1002_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
1003{
1004 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
1005 (__mmask16) __U);
1006}
1007
1008static __inline void __attribute__ ((__always_inline__, __nodebug__))
1009_mm512_store_ps(void *__P, __m512 __A)
1010{
1011 *(__m512*)__P = __A;
1012}
1013
Adam Nemet2db1d2f2014-07-30 16:51:27 +00001014/* Mask ops */
1015
1016static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
1017_mm512_knot(__mmask16 __M)
1018{
1019 return __builtin_ia32_knothi(__M);
1020}
1021
Robert Khasanovb9f3a912014-10-08 17:18:13 +00001022/* Integer compare */
1023
1024static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1025_mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
1026 return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
1027 (__mmask16)-1);
1028}
1029
1030static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1031_mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
1032 return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
1033 __u);
1034}
1035
Craig Topper4cac1c22015-01-25 23:30:07 +00001036static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1037_mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
1038 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
1039 (__mmask16)-1);
1040}
1041
1042static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1043_mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
1044 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
1045 __u);
1046}
1047
Robert Khasanovb9f3a912014-10-08 17:18:13 +00001048static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1049_mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
1050 return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
1051 __u);
1052}
1053
1054static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1055_mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
1056 return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
1057 (__mmask8)-1);
1058}
1059
Craig Topper4cac1c22015-01-25 23:30:07 +00001060static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1061_mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) {
1062 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
1063 (__mmask8)-1);
1064}
1065
1066static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1067_mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
1068 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
1069 __u);
1070}
1071
1072static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1073_mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) {
1074 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
1075 (__mmask16)-1);
1076}
1077
1078static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1079_mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
1080 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
1081 __u);
1082}
1083
1084static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1085_mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) {
1086 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
1087 (__mmask16)-1);
1088}
1089
1090static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1091_mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
1092 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
1093 __u);
1094}
1095
1096static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1097_mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) {
1098 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
1099 (__mmask8)-1);
1100}
1101
1102static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1103_mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
1104 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
1105 __u);
1106}
1107
1108static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1109_mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) {
1110 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
1111 (__mmask8)-1);
1112}
1113
1114static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1115_mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
1116 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
1117 __u);
1118}
1119
1120static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1121_mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) {
1122 return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
1123 (__mmask16)-1);
1124}
1125
1126static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1127_mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
1128 return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
1129 __u);
1130}
1131
1132static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1133_mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) {
1134 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
1135 (__mmask16)-1);
1136}
1137
1138static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1139_mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
1140 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
1141 __u);
1142}
1143
1144static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1145_mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
1146 return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
1147 __u);
1148}
1149
1150static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1151_mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) {
1152 return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
1153 (__mmask8)-1);
1154}
1155
1156static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1157_mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) {
1158 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
1159 (__mmask8)-1);
1160}
1161
1162static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1163_mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
1164 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
1165 __u);
1166}
1167
1168static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1169_mm512_cmple_epi32_mask(__m512i __a, __m512i __b) {
1170 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
1171 (__mmask16)-1);
1172}
1173
1174static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1175_mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
1176 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
1177 __u);
1178}
1179
1180static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1181_mm512_cmple_epu32_mask(__m512i __a, __m512i __b) {
1182 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
1183 (__mmask16)-1);
1184}
1185
1186static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1187_mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
1188 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
1189 __u);
1190}
1191
1192static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1193_mm512_cmple_epi64_mask(__m512i __a, __m512i __b) {
1194 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
1195 (__mmask8)-1);
1196}
1197
1198static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1199_mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
1200 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
1201 __u);
1202}
1203
1204static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1205_mm512_cmple_epu64_mask(__m512i __a, __m512i __b) {
1206 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
1207 (__mmask8)-1);
1208}
1209
1210static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1211_mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
1212 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
1213 __u);
1214}
1215
1216static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1217_mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) {
1218 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
1219 (__mmask16)-1);
1220}
1221
1222static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1223_mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
1224 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
1225 __u);
1226}
1227
1228static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1229_mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) {
1230 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
1231 (__mmask16)-1);
1232}
1233
1234static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1235_mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
1236 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
1237 __u);
1238}
1239
1240static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1241_mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) {
1242 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
1243 (__mmask8)-1);
1244}
1245
1246static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1247_mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
1248 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
1249 __u);
1250}
1251
1252static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1253_mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) {
1254 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
1255 (__mmask8)-1);
1256}
1257
1258static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1259_mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
1260 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
1261 __u);
1262}
1263
1264static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1265_mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) {
1266 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
1267 (__mmask16)-1);
1268}
1269
1270static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1271_mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
1272 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
1273 __u);
1274}
1275
1276static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1277_mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) {
1278 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
1279 (__mmask16)-1);
1280}
1281
1282static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
1283_mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
1284 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
1285 __u);
1286}
1287
1288static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1289_mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) {
1290 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
1291 (__mmask8)-1);
1292}
1293
1294static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1295_mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
1296 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
1297 __u);
1298}
1299
1300static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1301_mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) {
1302 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
1303 (__mmask8)-1);
1304}
1305
1306static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
1307_mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
1308 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
1309 __u);
1310}
1311
1312#define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
1313 __m512i __a = (a); \
1314 __m512i __b = (b); \
1315 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
1316 (__mmask16)-1); })
1317
1318#define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
1319 __m512i __a = (a); \
1320 __m512i __b = (b); \
1321 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
1322 (__mmask16)-1); })
1323
1324#define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
1325 __m512i __a = (a); \
1326 __m512i __b = (b); \
1327 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
1328 (__mmask8)-1); })
1329
1330#define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
1331 __m512i __a = (a); \
1332 __m512i __b = (b); \
1333 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
1334 (__mmask8)-1); })
1335
1336#define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
1337 __m512i __a = (a); \
1338 __m512i __b = (b); \
1339 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
1340 (__mmask16)(m)); })
1341
1342#define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
1343 __m512i __a = (a); \
1344 __m512i __b = (b); \
1345 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
1346 (__mmask16)(m)); })
1347
1348#define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
1349 __m512i __a = (a); \
1350 __m512i __b = (b); \
1351 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
1352 (__mmask8)(m)); })
1353
1354#define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
1355 __m512i __a = (a); \
1356 __m512i __b = (b); \
1357 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
1358 (__mmask8)(m)); })
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001359#endif // __AVX512FINTRIN_H