blob: e1f81a0e2a1f6645a4f6bc242ab83af60a0a1098 [file] [log] [blame]
Ben Murdoch097c5b22016-05-18 11:27:45 +01001/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23#ifndef __IMMINTRIN_H
24#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
25#endif
26
27#ifndef __AVX512FINTRIN_H
28#define __AVX512FINTRIN_H
29
30typedef double __v8df __attribute__((__vector_size__(64)));
31typedef float __v16sf __attribute__((__vector_size__(64)));
32typedef long long __v8di __attribute__((__vector_size__(64)));
33typedef int __v16si __attribute__((__vector_size__(64)));
34
35typedef float __m512 __attribute__((__vector_size__(64)));
36typedef double __m512d __attribute__((__vector_size__(64)));
37typedef long long __m512i __attribute__((__vector_size__(64)));
38
39typedef unsigned char __mmask8;
40typedef unsigned short __mmask16;
41
42/* Rounding mode macros. */
43#define _MM_FROUND_TO_NEAREST_INT 0x00
44#define _MM_FROUND_TO_NEG_INF 0x01
45#define _MM_FROUND_TO_POS_INF 0x02
46#define _MM_FROUND_TO_ZERO 0x03
47#define _MM_FROUND_CUR_DIRECTION 0x04
48
49typedef enum
50{
Ben Murdoch097c5b22016-05-18 11:27:45 +010051 _MM_MANT_NORM_1_2, /* interval [1, 2) */
52 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
53 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
54 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
55} _MM_MANTISSA_NORM_ENUM;
56
57typedef enum
58{
59 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
60 _MM_MANT_SIGN_zero, /* sign = 0 */
61 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
62} _MM_MANTISSA_SIGN_ENUM;
63
64/* Define the default attributes for the functions in this file. */
65#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
66
67/* Create vectors with repeated elements */
68
69static __inline __m512i __DEFAULT_FN_ATTRS
70_mm512_setzero_si512(void)
71{
72 return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
73}
74
75static __inline__ __m512d __DEFAULT_FN_ATTRS
76_mm512_undefined_pd()
77{
78 return (__m512d)__builtin_ia32_undef512();
79}
80
81static __inline__ __m512 __DEFAULT_FN_ATTRS
82_mm512_undefined()
83{
84 return (__m512)__builtin_ia32_undef512();
85}
86
87static __inline__ __m512 __DEFAULT_FN_ATTRS
88_mm512_undefined_ps()
89{
90 return (__m512)__builtin_ia32_undef512();
91}
92
93static __inline__ __m512i __DEFAULT_FN_ATTRS
94_mm512_undefined_epi32()
95{
96 return (__m512i)__builtin_ia32_undef512();
97}
Ben Murdoch097c5b22016-05-18 11:27:45 +010098
99static __inline __m512i __DEFAULT_FN_ATTRS
100_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
101{
102 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
103 (__v16si)
104 _mm512_setzero_si512 (),
105 __M);
106}
107
108static __inline __m512i __DEFAULT_FN_ATTRS
109_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
110{
111#ifdef __x86_64__
112 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
113 (__v8di)
114 _mm512_setzero_si512 (),
115 __M);
116#else
117 return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
118 (__v8di)
119 _mm512_setzero_si512 (),
120 __M);
121#endif
122}
123
124static __inline __m512 __DEFAULT_FN_ATTRS
125_mm512_setzero_ps(void)
126{
127 return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
128 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
129}
130static __inline __m512d __DEFAULT_FN_ATTRS
131_mm512_setzero_pd(void)
132{
133 return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
134}
135
136static __inline __m512 __DEFAULT_FN_ATTRS
137_mm512_set1_ps(float __w)
138{
139 return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
140 __w, __w, __w, __w, __w, __w, __w, __w };
141}
142
143static __inline __m512d __DEFAULT_FN_ATTRS
144_mm512_set1_pd(double __w)
145{
146 return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
147}
148
149static __inline __m512i __DEFAULT_FN_ATTRS
150_mm512_set1_epi32(int __s)
151{
152 return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
153 __s, __s, __s, __s, __s, __s, __s, __s };
154}
155
156static __inline __m512i __DEFAULT_FN_ATTRS
157_mm512_set1_epi64(long long __d)
158{
159 return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
160}
161
162static __inline__ __m512 __DEFAULT_FN_ATTRS
163_mm512_broadcastss_ps(__m128 __X)
164{
165 float __f = __X[0];
166 return (__v16sf){ __f, __f, __f, __f,
167 __f, __f, __f, __f,
168 __f, __f, __f, __f,
169 __f, __f, __f, __f };
170}
171
172static __inline__ __m512d __DEFAULT_FN_ATTRS
173_mm512_broadcastsd_pd(__m128d __X)
174{
175 double __d = __X[0];
176 return (__v8df){ __d, __d, __d, __d,
177 __d, __d, __d, __d };
178}
179
180/* Cast between vector types */
181
182static __inline __m512d __DEFAULT_FN_ATTRS
183_mm512_castpd256_pd512(__m256d __a)
184{
185 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
186}
187
188static __inline __m512 __DEFAULT_FN_ATTRS
189_mm512_castps256_ps512(__m256 __a)
190{
191 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7,
192 -1, -1, -1, -1, -1, -1, -1, -1);
193}
194
195static __inline __m128d __DEFAULT_FN_ATTRS
196_mm512_castpd512_pd128(__m512d __a)
197{
198 return __builtin_shufflevector(__a, __a, 0, 1);
199}
200
201static __inline __m128 __DEFAULT_FN_ATTRS
202_mm512_castps512_ps128(__m512 __a)
203{
204 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
205}
206
Ben Murdoch097c5b22016-05-18 11:27:45 +0100207/* Bitwise operators */
208static __inline__ __m512i __DEFAULT_FN_ATTRS
209_mm512_and_epi32(__m512i __a, __m512i __b)
210{
211 return __a & __b;
212}
213
214static __inline__ __m512i __DEFAULT_FN_ATTRS
215_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
216{
217 return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a,
218 (__v16si) __b,
219 (__v16si) __src,
220 (__mmask16) __k);
221}
222static __inline__ __m512i __DEFAULT_FN_ATTRS
223_mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
224{
225 return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a,
226 (__v16si) __b,
227 (__v16si)
228 _mm512_setzero_si512 (),
229 (__mmask16) __k);
230}
231
232static __inline__ __m512i __DEFAULT_FN_ATTRS
233_mm512_and_epi64(__m512i __a, __m512i __b)
234{
235 return __a & __b;
236}
237
238static __inline__ __m512i __DEFAULT_FN_ATTRS
239_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
240{
241 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a,
242 (__v8di) __b,
243 (__v8di) __src,
244 (__mmask8) __k);
245}
246static __inline__ __m512i __DEFAULT_FN_ATTRS
247_mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
248{
249 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a,
250 (__v8di) __b,
251 (__v8di)
252 _mm512_setzero_si512 (),
253 (__mmask8) __k);
254}
255
256static __inline__ __m512i __DEFAULT_FN_ATTRS
257_mm512_andnot_epi32 (__m512i __A, __m512i __B)
258{
259 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
260 (__v16si) __B,
261 (__v16si)
262 _mm512_setzero_si512 (),
263 (__mmask16) -1);
264}
265
266static __inline__ __m512i __DEFAULT_FN_ATTRS
267_mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
268{
269 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
270 (__v16si) __B,
271 (__v16si) __W,
272 (__mmask16) __U);
273}
274
275static __inline__ __m512i __DEFAULT_FN_ATTRS
276_mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
277{
278 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
279 (__v16si) __B,
280 (__v16si)
281 _mm512_setzero_si512 (),
282 (__mmask16) __U);
283}
284
285static __inline__ __m512i __DEFAULT_FN_ATTRS
286_mm512_andnot_epi64 (__m512i __A, __m512i __B)
287{
288 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
289 (__v8di) __B,
290 (__v8di)
291 _mm512_setzero_si512 (),
292 (__mmask8) -1);
293}
294
295static __inline__ __m512i __DEFAULT_FN_ATTRS
296_mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
297{
298 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
299 (__v8di) __B,
300 (__v8di) __W, __U);
301}
302
303static __inline__ __m512i __DEFAULT_FN_ATTRS
304_mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
305{
306 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
307 (__v8di) __B,
308 (__v8di)
309 _mm512_setzero_pd (),
310 __U);
311}
312static __inline__ __m512i __DEFAULT_FN_ATTRS
313_mm512_or_epi32(__m512i __a, __m512i __b)
314{
315 return __a | __b;
316}
317
318static __inline__ __m512i __DEFAULT_FN_ATTRS
319_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
320{
321 return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a,
322 (__v16si) __b,
323 (__v16si) __src,
324 (__mmask16) __k);
325}
326static __inline__ __m512i __DEFAULT_FN_ATTRS
327_mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
328{
329 return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a,
330 (__v16si) __b,
331 (__v16si)
332 _mm512_setzero_si512 (),
333 (__mmask16) __k);
334}
335
336static __inline__ __m512i __DEFAULT_FN_ATTRS
337_mm512_or_epi64(__m512i __a, __m512i __b)
338{
339 return __a | __b;
340}
341
342static __inline__ __m512i __DEFAULT_FN_ATTRS
343_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
344{
345 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a,
346 (__v8di) __b,
347 (__v8di) __src,
348 (__mmask8) __k);
349}
350static __inline__ __m512i __DEFAULT_FN_ATTRS
351_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
352{
353 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a,
354 (__v8di) __b,
355 (__v8di)
356 _mm512_setzero_si512 (),
357 (__mmask8) __k);
358}
359
360static __inline__ __m512i __DEFAULT_FN_ATTRS
361_mm512_xor_epi32(__m512i __a, __m512i __b)
362{
363 return __a ^ __b;
364}
365
366static __inline__ __m512i __DEFAULT_FN_ATTRS
367_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
368{
369 return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a,
370 (__v16si) __b,
371 (__v16si) __src,
372 (__mmask16) __k);
373}
374static __inline__ __m512i __DEFAULT_FN_ATTRS
375_mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
376{
377 return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a,
378 (__v16si) __b,
379 (__v16si)
380 _mm512_setzero_si512 (),
381 (__mmask16) __k);
382}
383
384static __inline__ __m512i __DEFAULT_FN_ATTRS
385_mm512_xor_epi64(__m512i __a, __m512i __b)
386{
387 return __a ^ __b;
388}
389
390static __inline__ __m512i __DEFAULT_FN_ATTRS
391_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
392{
393 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a,
394 (__v8di) __b,
395 (__v8di) __src,
396 (__mmask8) __k);
397}
398static __inline__ __m512i __DEFAULT_FN_ATTRS
399_mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
400{
401 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a,
402 (__v8di) __b,
403 (__v8di)
404 _mm512_setzero_si512 (),
405 (__mmask8) __k);
406}
407
408static __inline__ __m512i __DEFAULT_FN_ATTRS
409_mm512_and_si512(__m512i __a, __m512i __b)
410{
411 return __a & __b;
412}
413
414static __inline__ __m512i __DEFAULT_FN_ATTRS
415_mm512_or_si512(__m512i __a, __m512i __b)
416{
417 return __a | __b;
418}
419
420static __inline__ __m512i __DEFAULT_FN_ATTRS
421_mm512_xor_si512(__m512i __a, __m512i __b)
422{
423 return __a ^ __b;
424}
425/* Arithmetic */
426
427static __inline __m512d __DEFAULT_FN_ATTRS
428_mm512_add_pd(__m512d __a, __m512d __b)
429{
430 return __a + __b;
431}
432
433static __inline __m512 __DEFAULT_FN_ATTRS
434_mm512_add_ps(__m512 __a, __m512 __b)
435{
436 return __a + __b;
437}
438
439static __inline __m512d __DEFAULT_FN_ATTRS
440_mm512_mul_pd(__m512d __a, __m512d __b)
441{
442 return __a * __b;
443}
444
445static __inline __m512 __DEFAULT_FN_ATTRS
446_mm512_mul_ps(__m512 __a, __m512 __b)
447{
448 return __a * __b;
449}
450
451static __inline __m512d __DEFAULT_FN_ATTRS
452_mm512_sub_pd(__m512d __a, __m512d __b)
453{
454 return __a - __b;
455}
456
457static __inline __m512 __DEFAULT_FN_ATTRS
458_mm512_sub_ps(__m512 __a, __m512 __b)
459{
460 return __a - __b;
461}
462
463static __inline__ __m512i __DEFAULT_FN_ATTRS
464_mm512_add_epi64 (__m512i __A, __m512i __B)
465{
466 return (__m512i) ((__v8di) __A + (__v8di) __B);
467}
468
469static __inline__ __m512i __DEFAULT_FN_ATTRS
470_mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
471{
472 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
473 (__v8di) __B,
474 (__v8di) __W,
475 (__mmask8) __U);
476}
477
478static __inline__ __m512i __DEFAULT_FN_ATTRS
479_mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
480{
481 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
482 (__v8di) __B,
483 (__v8di)
484 _mm512_setzero_si512 (),
485 (__mmask8) __U);
486}
487
488static __inline__ __m512i __DEFAULT_FN_ATTRS
489_mm512_sub_epi64 (__m512i __A, __m512i __B)
490{
491 return (__m512i) ((__v8di) __A - (__v8di) __B);
492}
493
494static __inline__ __m512i __DEFAULT_FN_ATTRS
495_mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
496{
497 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
498 (__v8di) __B,
499 (__v8di) __W,
500 (__mmask8) __U);
501}
502
503static __inline__ __m512i __DEFAULT_FN_ATTRS
504_mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
505{
506 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
507 (__v8di) __B,
508 (__v8di)
509 _mm512_setzero_si512 (),
510 (__mmask8) __U);
511}
512
513static __inline__ __m512i __DEFAULT_FN_ATTRS
514_mm512_add_epi32 (__m512i __A, __m512i __B)
515{
516 return (__m512i) ((__v16si) __A + (__v16si) __B);
517}
518
519static __inline__ __m512i __DEFAULT_FN_ATTRS
520_mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
521{
522 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
523 (__v16si) __B,
524 (__v16si) __W,
525 (__mmask16) __U);
526}
527
528static __inline__ __m512i __DEFAULT_FN_ATTRS
529_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
530{
531 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
532 (__v16si) __B,
533 (__v16si)
534 _mm512_setzero_si512 (),
535 (__mmask16) __U);
536}
537
538static __inline__ __m512i __DEFAULT_FN_ATTRS
539_mm512_sub_epi32 (__m512i __A, __m512i __B)
540{
541 return (__m512i) ((__v16si) __A - (__v16si) __B);
542}
543
544static __inline__ __m512i __DEFAULT_FN_ATTRS
545_mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
546{
547 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
548 (__v16si) __B,
549 (__v16si) __W,
550 (__mmask16) __U);
551}
552
553static __inline__ __m512i __DEFAULT_FN_ATTRS
554_mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
555{
556 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
557 (__v16si) __B,
558 (__v16si)
559 _mm512_setzero_si512 (),
560 (__mmask16) __U);
561}
562
563static __inline__ __m512d __DEFAULT_FN_ATTRS
564_mm512_max_pd(__m512d __A, __m512d __B)
565{
566 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
567 (__v8df) __B,
568 (__v8df)
569 _mm512_setzero_pd (),
570 (__mmask8) -1,
571 _MM_FROUND_CUR_DIRECTION);
572}
573
574static __inline__ __m512 __DEFAULT_FN_ATTRS
575_mm512_max_ps(__m512 __A, __m512 __B)
576{
577 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
578 (__v16sf) __B,
579 (__v16sf)
580 _mm512_setzero_ps (),
581 (__mmask16) -1,
582 _MM_FROUND_CUR_DIRECTION);
583}
584
585static __inline__ __m128 __DEFAULT_FN_ATTRS
586_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
587 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
588 (__v4sf) __B,
589 (__v4sf) __W,
590 (__mmask8) __U,
591 _MM_FROUND_CUR_DIRECTION);
592}
593
594static __inline__ __m128 __DEFAULT_FN_ATTRS
595_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
596 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
597 (__v4sf) __B,
598 (__v4sf) _mm_setzero_ps (),
599 (__mmask8) __U,
600 _MM_FROUND_CUR_DIRECTION);
601}
602
603#define _mm_max_round_ss(__A, __B, __R) __extension__ ({ \
604 (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, (__v4sf) __B, \
605 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
606
607#define _mm_mask_max_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
608 (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, (__v4sf) __B, \
609 (__v4sf) __W, (__mmask8) __U,__R); })
610
611#define _mm_maskz_max_round_ss(__U, __A, __B, __R) __extension__ ({ \
612 (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, (__v4sf) __B, \
613 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
614
615static __inline__ __m128d __DEFAULT_FN_ATTRS
616_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
617 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
618 (__v2df) __B,
619 (__v2df) __W,
620 (__mmask8) __U,
621 _MM_FROUND_CUR_DIRECTION);
622}
623
624static __inline__ __m128d __DEFAULT_FN_ATTRS
625_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
626 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
627 (__v2df) __B,
628 (__v2df) _mm_setzero_pd (),
629 (__mmask8) __U,
630 _MM_FROUND_CUR_DIRECTION);
631}
632
633#define _mm_max_round_sd(__A, __B, __R) __extension__ ({ \
634 (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, (__v2df) __B, \
635 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
636
637#define _mm_mask_max_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
638 (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, (__v2df) __B, \
639 (__v2df) __W, (__mmask8) __U,__R); })
640
641#define _mm_maskz_max_round_sd(__U, __A, __B, __R) __extension__ ({ \
642 (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, (__v2df) __B, \
643 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
644
645static __inline __m512i
646__DEFAULT_FN_ATTRS
647_mm512_max_epi32(__m512i __A, __m512i __B)
648{
649 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
650 (__v16si) __B,
651 (__v16si)
652 _mm512_setzero_si512 (),
653 (__mmask16) -1);
654}
655
656static __inline __m512i __DEFAULT_FN_ATTRS
657_mm512_max_epu32(__m512i __A, __m512i __B)
658{
659 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
660 (__v16si) __B,
661 (__v16si)
662 _mm512_setzero_si512 (),
663 (__mmask16) -1);
664}
665
666static __inline __m512i __DEFAULT_FN_ATTRS
667_mm512_max_epi64(__m512i __A, __m512i __B)
668{
669 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
670 (__v8di) __B,
671 (__v8di)
672 _mm512_setzero_si512 (),
673 (__mmask8) -1);
674}
675
676static __inline __m512i __DEFAULT_FN_ATTRS
677_mm512_max_epu64(__m512i __A, __m512i __B)
678{
679 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
680 (__v8di) __B,
681 (__v8di)
682 _mm512_setzero_si512 (),
683 (__mmask8) -1);
684}
685
686static __inline__ __m512d __DEFAULT_FN_ATTRS
687_mm512_min_pd(__m512d __A, __m512d __B)
688{
689 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
690 (__v8df) __B,
691 (__v8df)
692 _mm512_setzero_pd (),
693 (__mmask8) -1,
694 _MM_FROUND_CUR_DIRECTION);
695}
696
697static __inline__ __m512 __DEFAULT_FN_ATTRS
698_mm512_min_ps(__m512 __A, __m512 __B)
699{
700 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
701 (__v16sf) __B,
702 (__v16sf)
703 _mm512_setzero_ps (),
704 (__mmask16) -1,
705 _MM_FROUND_CUR_DIRECTION);
706}
707
708static __inline__ __m128 __DEFAULT_FN_ATTRS
709_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
710 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
711 (__v4sf) __B,
712 (__v4sf) __W,
713 (__mmask8) __U,
714 _MM_FROUND_CUR_DIRECTION);
715}
716
717static __inline__ __m128 __DEFAULT_FN_ATTRS
718_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
719 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
720 (__v4sf) __B,
721 (__v4sf) _mm_setzero_ps (),
722 (__mmask8) __U,
723 _MM_FROUND_CUR_DIRECTION);
724}
725
726#define _mm_min_round_ss(__A, __B, __R) __extension__ ({ \
727 (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, (__v4sf) __B, \
728 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
729
730#define _mm_mask_min_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
731 (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, (__v4sf) __B, \
732 (__v4sf) __W, (__mmask8) __U,__R); })
733
734#define _mm_maskz_min_round_ss(__U, __A, __B, __R) __extension__ ({ \
735 (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, (__v4sf) __B, \
736 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
737
738static __inline__ __m128d __DEFAULT_FN_ATTRS
739_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
740 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
741 (__v2df) __B,
742 (__v2df) __W,
743 (__mmask8) __U,
744 _MM_FROUND_CUR_DIRECTION);
745}
746
747static __inline__ __m128d __DEFAULT_FN_ATTRS
748_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
749 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
750 (__v2df) __B,
751 (__v2df) _mm_setzero_pd (),
752 (__mmask8) __U,
753 _MM_FROUND_CUR_DIRECTION);
754}
755
756#define _mm_min_round_sd(__A, __B, __R) __extension__ ({ \
757 (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, (__v2df) __B, \
758 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
759
760#define _mm_mask_min_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
761 (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, (__v2df) __B, \
762 (__v2df) __W, (__mmask8) __U,__R); })
763
764#define _mm_maskz_min_round_sd(__U, __A, __B, __R) __extension__ ({ \
765 (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, (__v2df) __B, \
766 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
767
768static __inline __m512i
769__DEFAULT_FN_ATTRS
770_mm512_min_epi32(__m512i __A, __m512i __B)
771{
772 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
773 (__v16si) __B,
774 (__v16si)
775 _mm512_setzero_si512 (),
776 (__mmask16) -1);
777}
778
779static __inline __m512i __DEFAULT_FN_ATTRS
780_mm512_min_epu32(__m512i __A, __m512i __B)
781{
782 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
783 (__v16si) __B,
784 (__v16si)
785 _mm512_setzero_si512 (),
786 (__mmask16) -1);
787}
788
789static __inline __m512i __DEFAULT_FN_ATTRS
790_mm512_min_epi64(__m512i __A, __m512i __B)
791{
792 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
793 (__v8di) __B,
794 (__v8di)
795 _mm512_setzero_si512 (),
796 (__mmask8) -1);
797}
798
799static __inline __m512i __DEFAULT_FN_ATTRS
800_mm512_min_epu64(__m512i __A, __m512i __B)
801{
802 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
803 (__v8di) __B,
804 (__v8di)
805 _mm512_setzero_si512 (),
806 (__mmask8) -1);
807}
808
809static __inline __m512i __DEFAULT_FN_ATTRS
810_mm512_mul_epi32(__m512i __X, __m512i __Y)
811{
812 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
813 (__v16si) __Y,
814 (__v8di)
815 _mm512_setzero_si512 (),
816 (__mmask8) -1);
817}
818
819static __inline __m512i __DEFAULT_FN_ATTRS
820_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
821{
822 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
823 (__v16si) __Y,
824 (__v8di) __W, __M);
825}
826
827static __inline __m512i __DEFAULT_FN_ATTRS
828_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
829{
830 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
831 (__v16si) __Y,
832 (__v8di)
833 _mm512_setzero_si512 (),
834 __M);
835}
836
837static __inline __m512i __DEFAULT_FN_ATTRS
838_mm512_mul_epu32(__m512i __X, __m512i __Y)
839{
840 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
841 (__v16si) __Y,
842 (__v8di)
843 _mm512_setzero_si512 (),
844 (__mmask8) -1);
845}
846
847static __inline __m512i __DEFAULT_FN_ATTRS
848_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
849{
850 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
851 (__v16si) __Y,
852 (__v8di) __W, __M);
853}
854
855static __inline __m512i __DEFAULT_FN_ATTRS
856_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
857{
858 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
859 (__v16si) __Y,
860 (__v8di)
861 _mm512_setzero_si512 (),
862 __M);
863}
864
865static __inline __m512i __DEFAULT_FN_ATTRS
866_mm512_mullo_epi32 (__m512i __A, __m512i __B)
867{
868 return (__m512i) ((__v16si) __A * (__v16si) __B);
869}
870
871static __inline __m512i __DEFAULT_FN_ATTRS
872_mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
873{
874 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
875 (__v16si) __B,
876 (__v16si)
877 _mm512_setzero_si512 (),
878 __M);
879}
880
881static __inline __m512i __DEFAULT_FN_ATTRS
882_mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
883{
884 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
885 (__v16si) __B,
886 (__v16si) __W, __M);
887}
888
889static __inline__ __m512d __DEFAULT_FN_ATTRS
890_mm512_sqrt_pd(__m512d __a)
891{
892 return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a,
893 (__v8df) _mm512_setzero_pd (),
894 (__mmask8) -1,
895 _MM_FROUND_CUR_DIRECTION);
896}
897
898static __inline__ __m512 __DEFAULT_FN_ATTRS
899_mm512_sqrt_ps(__m512 __a)
900{
901 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a,
902 (__v16sf) _mm512_setzero_ps (),
903 (__mmask16) -1,
904 _MM_FROUND_CUR_DIRECTION);
905}
906
907static __inline__ __m512d __DEFAULT_FN_ATTRS
908_mm512_rsqrt14_pd(__m512d __A)
909{
910 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
911 (__v8df)
912 _mm512_setzero_pd (),
913 (__mmask8) -1);}
914
915static __inline__ __m512 __DEFAULT_FN_ATTRS
916_mm512_rsqrt14_ps(__m512 __A)
917{
918 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
919 (__v16sf)
920 _mm512_setzero_ps (),
921 (__mmask16) -1);
922}
923
924static __inline__ __m128 __DEFAULT_FN_ATTRS
925_mm_rsqrt14_ss(__m128 __A, __m128 __B)
926{
927 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
928 (__v4sf) __B,
929 (__v4sf)
930 _mm_setzero_ps (),
931 (__mmask8) -1);
932}
933
Ben Murdoch097c5b22016-05-18 11:27:45 +0100934static __inline__ __m128d __DEFAULT_FN_ATTRS
935_mm_rsqrt14_sd(__m128d __A, __m128d __B)
936{
937 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
938 (__v2df) __B,
939 (__v2df)
940 _mm_setzero_pd (),
941 (__mmask8) -1);
942}
943
Ben Murdoch097c5b22016-05-18 11:27:45 +0100944static __inline__ __m512d __DEFAULT_FN_ATTRS
945_mm512_rcp14_pd(__m512d __A)
946{
947 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
948 (__v8df)
949 _mm512_setzero_pd (),
950 (__mmask8) -1);
951}
952
953static __inline__ __m512 __DEFAULT_FN_ATTRS
954_mm512_rcp14_ps(__m512 __A)
955{
956 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
957 (__v16sf)
958 _mm512_setzero_ps (),
959 (__mmask16) -1);
960}
961static __inline__ __m128 __DEFAULT_FN_ATTRS
962_mm_rcp14_ss(__m128 __A, __m128 __B)
963{
964 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
965 (__v4sf) __B,
966 (__v4sf)
967 _mm_setzero_ps (),
968 (__mmask8) -1);
969}
970
Ben Murdoch097c5b22016-05-18 11:27:45 +0100971static __inline__ __m128d __DEFAULT_FN_ATTRS
972_mm_rcp14_sd(__m128d __A, __m128d __B)
973{
974 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
975 (__v2df) __B,
976 (__v2df)
977 _mm_setzero_pd (),
978 (__mmask8) -1);
979}
980
Ben Murdoch097c5b22016-05-18 11:27:45 +0100981static __inline __m512 __DEFAULT_FN_ATTRS
982_mm512_floor_ps(__m512 __A)
983{
984 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
985 _MM_FROUND_FLOOR,
986 (__v16sf) __A, -1,
987 _MM_FROUND_CUR_DIRECTION);
988}
989
990static __inline __m512d __DEFAULT_FN_ATTRS
991_mm512_floor_pd(__m512d __A)
992{
993 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
994 _MM_FROUND_FLOOR,
995 (__v8df) __A, -1,
996 _MM_FROUND_CUR_DIRECTION);
997}
998
999static __inline __m512 __DEFAULT_FN_ATTRS
1000_mm512_ceil_ps(__m512 __A)
1001{
1002 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1003 _MM_FROUND_CEIL,
1004 (__v16sf) __A, -1,
1005 _MM_FROUND_CUR_DIRECTION);
1006}
1007
1008static __inline __m512d __DEFAULT_FN_ATTRS
1009_mm512_ceil_pd(__m512d __A)
1010{
1011 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1012 _MM_FROUND_CEIL,
1013 (__v8df) __A, -1,
1014 _MM_FROUND_CUR_DIRECTION);
1015}
1016
1017static __inline __m512i __DEFAULT_FN_ATTRS
1018_mm512_abs_epi64(__m512i __A)
1019{
1020 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1021 (__v8di)
1022 _mm512_setzero_si512 (),
1023 (__mmask8) -1);
1024}
1025
1026static __inline __m512i __DEFAULT_FN_ATTRS
1027_mm512_abs_epi32(__m512i __A)
1028{
1029 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
1030 (__v16si)
1031 _mm512_setzero_si512 (),
1032 (__mmask16) -1);
1033}
1034
1035static __inline__ __m128 __DEFAULT_FN_ATTRS
1036_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1037 return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
1038 (__v4sf) __B,
1039 (__v4sf) __W,
1040 (__mmask8) __U,
1041 _MM_FROUND_CUR_DIRECTION);
1042}
1043
1044static __inline__ __m128 __DEFAULT_FN_ATTRS
1045_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1046 return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
1047 (__v4sf) __B,
1048 (__v4sf) _mm_setzero_ps (),
1049 (__mmask8) __U,
1050 _MM_FROUND_CUR_DIRECTION);
1051}
1052
1053#define _mm_add_round_ss(__A, __B, __R) __extension__ ({ \
1054 (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A, (__v4sf) __B, \
1055 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
1056
1057#define _mm_mask_add_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
1058 (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A, (__v4sf) __B, \
1059 (__v4sf) __W, (__mmask8) __U,__R); })
1060
1061#define _mm_maskz_add_round_ss(__U, __A, __B, __R) __extension__ ({ \
1062 (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A, (__v4sf) __B, \
1063 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
1064
1065static __inline__ __m128d __DEFAULT_FN_ATTRS
1066_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1067 return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
1068 (__v2df) __B,
1069 (__v2df) __W,
1070 (__mmask8) __U,
1071 _MM_FROUND_CUR_DIRECTION);
1072}
1073
1074static __inline__ __m128d __DEFAULT_FN_ATTRS
1075_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1076 return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
1077 (__v2df) __B,
1078 (__v2df) _mm_setzero_pd (),
1079 (__mmask8) __U,
1080 _MM_FROUND_CUR_DIRECTION);
1081}
1082#define _mm_add_round_sd(__A, __B, __R) __extension__ ({ \
1083 (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A, (__v2df) __B, \
1084 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
1085
1086#define _mm_mask_add_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
1087 (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A, (__v2df) __B, \
1088 (__v2df) __W, (__mmask8) __U,__R); })
1089
1090#define _mm_maskz_add_round_sd(__U, __A, __B, __R) __extension__ ({ \
1091 (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A, (__v2df) __B, \
1092 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
1093
1094static __inline__ __m512d __DEFAULT_FN_ATTRS
1095_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1096 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
1097 (__v8df) __B,
1098 (__v8df) __W,
1099 (__mmask8) __U,
1100 _MM_FROUND_CUR_DIRECTION);
1101}
1102
1103static __inline__ __m512d __DEFAULT_FN_ATTRS
1104_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1105 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
1106 (__v8df) __B,
1107 (__v8df) _mm512_setzero_pd (),
1108 (__mmask8) __U,
1109 _MM_FROUND_CUR_DIRECTION);
1110}
1111
1112static __inline__ __m512 __DEFAULT_FN_ATTRS
1113_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1114 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
1115 (__v16sf) __B,
1116 (__v16sf) __W,
1117 (__mmask16) __U,
1118 _MM_FROUND_CUR_DIRECTION);
1119}
1120
1121static __inline__ __m512 __DEFAULT_FN_ATTRS
1122_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1123 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
1124 (__v16sf) __B,
1125 (__v16sf) _mm512_setzero_ps (),
1126 (__mmask16) __U,
1127 _MM_FROUND_CUR_DIRECTION);
1128}
1129
1130#define _mm512_add_round_pd(__A, __B, __R) __extension__ ({ \
1131 (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, (__v8df) __B, \
1132 (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
1133
1134#define _mm512_mask_add_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
1135 (__m512d) __builtin_ia32_addpd512_mask((__v8df) __A, (__v8df) __B, \
1136 (__v8df) __W, (__mmask8) __U, __R); })
1137
1138#define _mm512_maskz_add_round_pd(__U, __A, __B, __R) __extension__ ({ \
1139 (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, (__v8df) __B, \
1140 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R); })
1141
1142#define _mm512_add_round_ps(__A, __B, __R) __extension__ ({ \
1143 (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
1144 (__v16sf) _mm512_setzero_ps(), (__mmask16) -1, __R); })
1145
1146#define _mm512_mask_add_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
1147 (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
1148 (__v16sf) __W, (__mmask16)__U, __R); })
1149
1150#define _mm512_maskz_add_round_ps(__U, __A, __B, __R) __extension__ ({ \
1151 (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
1152 (__v16sf) _mm512_setzero_ps(), (__mmask16)__U, __R); })
1153
1154static __inline__ __m128 __DEFAULT_FN_ATTRS
1155_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1156 return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
1157 (__v4sf) __B,
1158 (__v4sf) __W,
1159 (__mmask8) __U,
1160 _MM_FROUND_CUR_DIRECTION);
1161}
1162
1163static __inline__ __m128 __DEFAULT_FN_ATTRS
1164_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1165 return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
1166 (__v4sf) __B,
1167 (__v4sf) _mm_setzero_ps (),
1168 (__mmask8) __U,
1169 _MM_FROUND_CUR_DIRECTION);
1170}
1171#define _mm_sub_round_ss(__A, __B, __R) __extension__ ({ \
1172 (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A, (__v4sf) __B, \
1173 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
1174
1175#define _mm_mask_sub_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
1176 (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A, (__v4sf) __B, \
1177 (__v4sf) __W, (__mmask8) __U,__R); })
1178
1179#define _mm_maskz_sub_round_ss(__U, __A, __B, __R) __extension__ ({ \
1180 (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A, (__v4sf) __B, \
1181 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
1182
1183static __inline__ __m128d __DEFAULT_FN_ATTRS
1184_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1185 return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
1186 (__v2df) __B,
1187 (__v2df) __W,
1188 (__mmask8) __U,
1189 _MM_FROUND_CUR_DIRECTION);
1190}
1191
1192static __inline__ __m128d __DEFAULT_FN_ATTRS
1193_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1194 return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
1195 (__v2df) __B,
1196 (__v2df) _mm_setzero_pd (),
1197 (__mmask8) __U,
1198 _MM_FROUND_CUR_DIRECTION);
1199}
1200
1201#define _mm_sub_round_sd(__A, __B, __R) __extension__ ({ \
1202 (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A, (__v2df) __B, \
1203 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
1204
1205#define _mm_mask_sub_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
1206 (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A, (__v2df) __B, \
1207 (__v2df) __W, (__mmask8) __U,__R); })
1208
1209#define _mm_maskz_sub_round_sd(__U, __A, __B, __R) __extension__ ({ \
1210 (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A, (__v2df) __B, \
1211 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
1212
1213static __inline__ __m512d __DEFAULT_FN_ATTRS
1214_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1215 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
1216 (__v8df) __B,
1217 (__v8df) __W,
1218 (__mmask8) __U,
1219 _MM_FROUND_CUR_DIRECTION);
1220}
1221
1222static __inline__ __m512d __DEFAULT_FN_ATTRS
1223_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1224 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
1225 (__v8df) __B,
1226 (__v8df)
1227 _mm512_setzero_pd (),
1228 (__mmask8) __U,
1229 _MM_FROUND_CUR_DIRECTION);
1230}
1231
1232static __inline__ __m512 __DEFAULT_FN_ATTRS
1233_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1234 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
1235 (__v16sf) __B,
1236 (__v16sf) __W,
1237 (__mmask16) __U,
1238 _MM_FROUND_CUR_DIRECTION);
1239}
1240
1241static __inline__ __m512 __DEFAULT_FN_ATTRS
1242_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1243 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
1244 (__v16sf) __B,
1245 (__v16sf)
1246 _mm512_setzero_ps (),
1247 (__mmask16) __U,
1248 _MM_FROUND_CUR_DIRECTION);
1249}
1250
1251#define _mm512_sub_round_pd(__A, __B, __R) __extension__ ({ \
1252 (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B,\
1253 (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
1254
1255#define _mm512_mask_sub_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
1256 (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B, \
1257 (__v8df) __W, (__mmask8) __U, __R); })
1258
1259#define _mm512_maskz_sub_round_pd(__U, __A, __B, __R) __extension__ ({ \
1260 (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B, \
1261 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
1262
1263#define _mm512_sub_round_ps(__A, __B, __R) __extension__ ({ \
1264 (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
1265 (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
1266
1267#define _mm512_mask_sub_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
1268 (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
1269 (__v16sf) __W, (__mmask16) __U, __R); });
1270
1271#define _mm512_maskz_sub_round_ps(__U, __A, __B, __R) __extension__ ({ \
1272 (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
1273 (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
1274
1275static __inline__ __m128 __DEFAULT_FN_ATTRS
1276_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1277 return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
1278 (__v4sf) __B,
1279 (__v4sf) __W,
1280 (__mmask8) __U,
1281 _MM_FROUND_CUR_DIRECTION);
1282}
1283
1284static __inline__ __m128 __DEFAULT_FN_ATTRS
1285_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1286 return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
1287 (__v4sf) __B,
1288 (__v4sf) _mm_setzero_ps (),
1289 (__mmask8) __U,
1290 _MM_FROUND_CUR_DIRECTION);
1291}
1292#define _mm_mul_round_ss(__A, __B, __R) __extension__ ({ \
1293 (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A, (__v4sf) __B, \
1294 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
1295
1296#define _mm_mask_mul_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
1297 (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A, (__v4sf) __B, \
1298 (__v4sf) __W, (__mmask8) __U,__R); })
1299
1300#define _mm_maskz_mul_round_ss(__U, __A, __B, __R) __extension__ ({ \
1301 (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A, (__v4sf) __B, \
1302 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
1303
1304static __inline__ __m128d __DEFAULT_FN_ATTRS
1305_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1306 return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
1307 (__v2df) __B,
1308 (__v2df) __W,
1309 (__mmask8) __U,
1310 _MM_FROUND_CUR_DIRECTION);
1311}
1312
1313static __inline__ __m128d __DEFAULT_FN_ATTRS
1314_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1315 return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
1316 (__v2df) __B,
1317 (__v2df) _mm_setzero_pd (),
1318 (__mmask8) __U,
1319 _MM_FROUND_CUR_DIRECTION);
1320}
1321
1322#define _mm_mul_round_sd(__A, __B, __R) __extension__ ({ \
1323 (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A, (__v2df) __B, \
1324 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
1325
1326#define _mm_mask_mul_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
1327 (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A, (__v2df) __B, \
1328 (__v2df) __W, (__mmask8) __U,__R); })
1329
1330#define _mm_maskz_mul_round_sd(__U, __A, __B, __R) __extension__ ({ \
1331 (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A, (__v2df) __B, \
1332 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
1333
1334static __inline__ __m512d __DEFAULT_FN_ATTRS
1335_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1336 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
1337 (__v8df) __B,
1338 (__v8df) __W,
1339 (__mmask8) __U,
1340 _MM_FROUND_CUR_DIRECTION);
1341}
1342
1343static __inline__ __m512d __DEFAULT_FN_ATTRS
1344_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1345 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
1346 (__v8df) __B,
1347 (__v8df)
1348 _mm512_setzero_pd (),
1349 (__mmask8) __U,
1350 _MM_FROUND_CUR_DIRECTION);
1351}
1352
1353static __inline__ __m512 __DEFAULT_FN_ATTRS
1354_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1355 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
1356 (__v16sf) __B,
1357 (__v16sf) __W,
1358 (__mmask16) __U,
1359 _MM_FROUND_CUR_DIRECTION);
1360}
1361
1362static __inline__ __m512 __DEFAULT_FN_ATTRS
1363_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1364 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
1365 (__v16sf) __B,
1366 (__v16sf)
1367 _mm512_setzero_ps (),
1368 (__mmask16) __U,
1369 _MM_FROUND_CUR_DIRECTION);
1370}
1371
1372#define _mm512_mul_round_pd(__A, __B, __R) __extension__ ({ \
1373 (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B,\
1374 (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
1375
1376#define _mm512_mask_mul_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
1377 (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B, \
1378 (__v8df) __W, (__mmask8) __U, __R); })
1379
1380#define _mm512_maskz_mul_round_pd(__U, __A, __B, __R) __extension__ ({ \
1381 (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B, \
1382 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
1383
1384#define _mm512_mul_round_ps(__A, __B, __R) __extension__ ({ \
1385 (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
1386 (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
1387
1388#define _mm512_mask_mul_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
1389 (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
1390 (__v16sf) __W, (__mmask16) __U, __R); });
1391
1392#define _mm512_maskz_mul_round_ps(__U, __A, __B, __R) __extension__ ({ \
1393 (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
1394 (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
1395
1396static __inline__ __m128 __DEFAULT_FN_ATTRS
1397_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1398 return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
1399 (__v4sf) __B,
1400 (__v4sf) __W,
1401 (__mmask8) __U,
1402 _MM_FROUND_CUR_DIRECTION);
1403}
1404
1405static __inline__ __m128 __DEFAULT_FN_ATTRS
1406_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1407 return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
1408 (__v4sf) __B,
1409 (__v4sf) _mm_setzero_ps (),
1410 (__mmask8) __U,
1411 _MM_FROUND_CUR_DIRECTION);
1412}
1413
1414#define _mm_div_round_ss(__A, __B, __R) __extension__ ({ \
1415 (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A, (__v4sf) __B, \
1416 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
1417
1418#define _mm_mask_div_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
1419 (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A, (__v4sf) __B, \
1420 (__v4sf) __W, (__mmask8) __U,__R); })
1421
1422#define _mm_maskz_div_round_ss(__U, __A, __B, __R) __extension__ ({ \
1423 (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A, (__v4sf) __B, \
1424 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
1425
1426static __inline__ __m128d __DEFAULT_FN_ATTRS
1427_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1428 return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
1429 (__v2df) __B,
1430 (__v2df) __W,
1431 (__mmask8) __U,
1432 _MM_FROUND_CUR_DIRECTION);
1433}
1434
1435static __inline__ __m128d __DEFAULT_FN_ATTRS
1436_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1437 return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
1438 (__v2df) __B,
1439 (__v2df) _mm_setzero_pd (),
1440 (__mmask8) __U,
1441 _MM_FROUND_CUR_DIRECTION);
1442}
1443
1444#define _mm_div_round_sd(__A, __B, __R) __extension__ ({ \
1445 (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A, (__v2df) __B, \
1446 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
1447
1448#define _mm_mask_div_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
1449 (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A, (__v2df) __B, \
1450 (__v2df) __W, (__mmask8) __U,__R); })
1451
1452#define _mm_maskz_div_round_sd(__U, __A, __B, __R) __extension__ ({ \
1453 (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A, (__v2df) __B, \
1454 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
1455
1456static __inline__ __m512d __DEFAULT_FN_ATTRS
1457_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1458 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A,
1459 (__v8df) __B,
1460 (__v8df) __W,
1461 (__mmask8) __U,
1462 _MM_FROUND_CUR_DIRECTION);
1463}
1464
1465static __inline__ __m512d __DEFAULT_FN_ATTRS
1466_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1467 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A,
1468 (__v8df) __B,
1469 (__v8df)
1470 _mm512_setzero_pd (),
1471 (__mmask8) __U,
1472 _MM_FROUND_CUR_DIRECTION);
1473}
1474
1475static __inline__ __m512 __DEFAULT_FN_ATTRS
1476_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1477 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
1478 (__v16sf) __B,
1479 (__v16sf) __W,
1480 (__mmask16) __U,
1481 _MM_FROUND_CUR_DIRECTION);
1482}
1483
1484static __inline__ __m512 __DEFAULT_FN_ATTRS
1485_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1486 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
1487 (__v16sf) __B,
1488 (__v16sf)
1489 _mm512_setzero_ps (),
1490 (__mmask16) __U,
1491 _MM_FROUND_CUR_DIRECTION);
1492}
1493
1494#define _mm512_div_round_pd(__A, __B, __R) __extension__ ({ \
1495 (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B,\
1496 (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
1497
1498#define _mm512_mask_div_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
1499 (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B, \
1500 (__v8df) __W, (__mmask8) __U, __R); })
1501
1502#define _mm512_maskz_div_round_pd(__U, __A, __B, __R) __extension__ ({ \
1503 (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B, \
1504 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
1505
1506#define _mm512_div_round_ps(__A, __B, __R) __extension__ ({ \
1507 (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
1508 (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
1509
1510#define _mm512_mask_div_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
1511 (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
1512 (__v16sf) __W, (__mmask16) __U, __R); });
1513
1514#define _mm512_maskz_div_round_ps(__U, __A, __B, __R) __extension__ ({ \
1515 (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
1516 (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
1517
1518#define _mm512_roundscale_ps(A, B) __extension__ ({ \
1519 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(A), (B), (__v16sf)(A), \
1520 -1, _MM_FROUND_CUR_DIRECTION); })
1521
1522#define _mm512_roundscale_pd(A, B) __extension__ ({ \
1523 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(A), (B), (__v8df)(A), \
1524 -1, _MM_FROUND_CUR_DIRECTION); })
1525
1526#define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
1527 (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
1528 (__v8df) (B), (__v8df) (C), \
1529 (__mmask8) -1, (R)); })
1530
1531
1532#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \
1533 (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
1534 (__v8df) (B), (__v8df) (C), \
1535 (__mmask8) (U), (R)); })
1536
1537
1538#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \
1539 (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) (A), \
1540 (__v8df) (B), (__v8df) (C), \
1541 (__mmask8) (U), (R)); })
1542
1543
1544#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \
1545 (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \
1546 (__v8df) (B), (__v8df) (C), \
1547 (__mmask8) (U), (R)); })
1548
1549
1550#define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \
1551 (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
1552 (__v8df) (B), -(__v8df) (C), \
1553 (__mmask8) -1, (R)); })
1554
1555
1556#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \
1557 (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
1558 (__v8df) (B), -(__v8df) (C), \
1559 (__mmask8) (U), (R)); })
1560
1561
1562#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \
1563 (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \
1564 (__v8df) (B), -(__v8df) (C), \
1565 (__mmask8) (U), (R)); })
1566
1567
1568#define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \
1569 (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \
1570 (__v8df) (B), (__v8df) (C), \
1571 (__mmask8) -1, (R)); })
1572
1573
1574#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \
1575 (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) (A), \
1576 (__v8df) (B), (__v8df) (C), \
1577 (__mmask8) (U), (R)); })
1578
1579
1580#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \
1581 (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \
1582 (__v8df) (B), (__v8df) (C), \
1583 (__mmask8) (U), (R)); })
1584
1585
1586#define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \
1587 (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \
1588 (__v8df) (B), -(__v8df) (C), \
1589 (__mmask8) -1, (R)); })
1590
1591
1592#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \
1593 (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \
1594 (__v8df) (B), -(__v8df) (C), \
1595 (__mmask8) (U), (R)); })
1596
1597
1598static __inline__ __m512d __DEFAULT_FN_ATTRS
1599_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
1600{
1601 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
1602 (__v8df) __B,
1603 (__v8df) __C,
1604 (__mmask8) -1,
1605 _MM_FROUND_CUR_DIRECTION);
1606}
1607
1608static __inline__ __m512d __DEFAULT_FN_ATTRS
1609_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
1610{
1611 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
1612 (__v8df) __B,
1613 (__v8df) __C,
1614 (__mmask8) __U,
1615 _MM_FROUND_CUR_DIRECTION);
1616}
1617
1618static __inline__ __m512d __DEFAULT_FN_ATTRS
1619_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
1620{
1621 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
1622 (__v8df) __B,
1623 (__v8df) __C,
1624 (__mmask8) __U,
1625 _MM_FROUND_CUR_DIRECTION);
1626}
1627
1628static __inline__ __m512d __DEFAULT_FN_ATTRS
1629_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1630{
1631 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
1632 (__v8df) __B,
1633 (__v8df) __C,
1634 (__mmask8) __U,
1635 _MM_FROUND_CUR_DIRECTION);
1636}
1637
1638static __inline__ __m512d __DEFAULT_FN_ATTRS
1639_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
1640{
1641 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
1642 (__v8df) __B,
1643 -(__v8df) __C,
1644 (__mmask8) -1,
1645 _MM_FROUND_CUR_DIRECTION);
1646}
1647
1648static __inline__ __m512d __DEFAULT_FN_ATTRS
1649_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
1650{
1651 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
1652 (__v8df) __B,
1653 -(__v8df) __C,
1654 (__mmask8) __U,
1655 _MM_FROUND_CUR_DIRECTION);
1656}
1657
1658static __inline__ __m512d __DEFAULT_FN_ATTRS
1659_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1660{
1661 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
1662 (__v8df) __B,
1663 -(__v8df) __C,
1664 (__mmask8) __U,
1665 _MM_FROUND_CUR_DIRECTION);
1666}
1667
1668static __inline__ __m512d __DEFAULT_FN_ATTRS
1669_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
1670{
1671 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
1672 (__v8df) __B,
1673 (__v8df) __C,
1674 (__mmask8) -1,
1675 _MM_FROUND_CUR_DIRECTION);
1676}
1677
1678static __inline__ __m512d __DEFAULT_FN_ATTRS
1679_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
1680{
1681 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
1682 (__v8df) __B,
1683 (__v8df) __C,
1684 (__mmask8) __U,
1685 _MM_FROUND_CUR_DIRECTION);
1686}
1687
1688static __inline__ __m512d __DEFAULT_FN_ATTRS
1689_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1690{
1691 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
1692 (__v8df) __B,
1693 (__v8df) __C,
1694 (__mmask8) __U,
1695 _MM_FROUND_CUR_DIRECTION);
1696}
1697
1698static __inline__ __m512d __DEFAULT_FN_ATTRS
1699_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
1700{
1701 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
1702 (__v8df) __B,
1703 -(__v8df) __C,
1704 (__mmask8) -1,
1705 _MM_FROUND_CUR_DIRECTION);
1706}
1707
1708static __inline__ __m512d __DEFAULT_FN_ATTRS
1709_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1710{
1711 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
1712 (__v8df) __B,
1713 -(__v8df) __C,
1714 (__mmask8) __U,
1715 _MM_FROUND_CUR_DIRECTION);
1716}
1717
1718#define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \
1719 (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1720 (__v16sf) (B), (__v16sf) (C), \
1721 (__mmask16) -1, (R)); })
1722
1723
1724#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \
1725 (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1726 (__v16sf) (B), (__v16sf) (C), \
1727 (__mmask16) (U), (R)); })
1728
1729
1730#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \
1731 (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) (A), \
1732 (__v16sf) (B), (__v16sf) (C), \
1733 (__mmask16) (U), (R)); })
1734
1735
1736#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \
1737 (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \
1738 (__v16sf) (B), (__v16sf) (C), \
1739 (__mmask16) (U), (R)); })
1740
1741
1742#define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \
1743 (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1744 (__v16sf) (B), -(__v16sf) (C), \
1745 (__mmask16) -1, (R)); })
1746
1747
1748#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \
1749 (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1750 (__v16sf) (B), -(__v16sf) (C), \
1751 (__mmask16) (U), (R)); })
1752
1753
1754#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \
1755 (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \
1756 (__v16sf) (B), -(__v16sf) (C), \
1757 (__mmask16) (U), (R)); })
1758
1759
1760#define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \
1761 (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \
1762 (__v16sf) (B), (__v16sf) (C), \
1763 (__mmask16) -1, (R)); })
1764
1765
1766#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \
1767 (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) (A), \
1768 (__v16sf) (B), (__v16sf) (C), \
1769 (__mmask16) (U), (R)); })
1770
1771
1772#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \
1773 (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \
1774 (__v16sf) (B), (__v16sf) (C), \
1775 (__mmask16) (U), (R)); })
1776
1777
1778#define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \
1779 (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \
1780 (__v16sf) (B), -(__v16sf) (C), \
1781 (__mmask16) -1, (R)); })
1782
1783
1784#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \
1785 (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \
1786 (__v16sf) (B), -(__v16sf) (C), \
1787 (__mmask16) (U), (R)); })
1788
1789
1790static __inline__ __m512 __DEFAULT_FN_ATTRS
1791_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
1792{
1793 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
1794 (__v16sf) __B,
1795 (__v16sf) __C,
1796 (__mmask16) -1,
1797 _MM_FROUND_CUR_DIRECTION);
1798}
1799
1800static __inline__ __m512 __DEFAULT_FN_ATTRS
1801_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
1802{
1803 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
1804 (__v16sf) __B,
1805 (__v16sf) __C,
1806 (__mmask16) __U,
1807 _MM_FROUND_CUR_DIRECTION);
1808}
1809
1810static __inline__ __m512 __DEFAULT_FN_ATTRS
1811_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
1812{
1813 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
1814 (__v16sf) __B,
1815 (__v16sf) __C,
1816 (__mmask16) __U,
1817 _MM_FROUND_CUR_DIRECTION);
1818}
1819
1820static __inline__ __m512 __DEFAULT_FN_ATTRS
1821_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
1822{
1823 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
1824 (__v16sf) __B,
1825 (__v16sf) __C,
1826 (__mmask16) __U,
1827 _MM_FROUND_CUR_DIRECTION);
1828}
1829
1830static __inline__ __m512 __DEFAULT_FN_ATTRS
1831_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
1832{
1833 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
1834 (__v16sf) __B,
1835 -(__v16sf) __C,
1836 (__mmask16) -1,
1837 _MM_FROUND_CUR_DIRECTION);
1838}
1839
1840static __inline__ __m512 __DEFAULT_FN_ATTRS
1841_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
1842{
1843 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
1844 (__v16sf) __B,
1845 -(__v16sf) __C,
1846 (__mmask16) __U,
1847 _MM_FROUND_CUR_DIRECTION);
1848}
1849
1850static __inline__ __m512 __DEFAULT_FN_ATTRS
1851_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
1852{
1853 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
1854 (__v16sf) __B,
1855 -(__v16sf) __C,
1856 (__mmask16) __U,
1857 _MM_FROUND_CUR_DIRECTION);
1858}
1859
1860static __inline__ __m512 __DEFAULT_FN_ATTRS
1861_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
1862{
1863 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
1864 (__v16sf) __B,
1865 (__v16sf) __C,
1866 (__mmask16) -1,
1867 _MM_FROUND_CUR_DIRECTION);
1868}
1869
1870static __inline__ __m512 __DEFAULT_FN_ATTRS
1871_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
1872{
1873 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
1874 (__v16sf) __B,
1875 (__v16sf) __C,
1876 (__mmask16) __U,
1877 _MM_FROUND_CUR_DIRECTION);
1878}
1879
1880static __inline__ __m512 __DEFAULT_FN_ATTRS
1881_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
1882{
1883 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
1884 (__v16sf) __B,
1885 (__v16sf) __C,
1886 (__mmask16) __U,
1887 _MM_FROUND_CUR_DIRECTION);
1888}
1889
1890static __inline__ __m512 __DEFAULT_FN_ATTRS
1891_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
1892{
1893 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
1894 (__v16sf) __B,
1895 -(__v16sf) __C,
1896 (__mmask16) -1,
1897 _MM_FROUND_CUR_DIRECTION);
1898}
1899
1900static __inline__ __m512 __DEFAULT_FN_ATTRS
1901_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
1902{
1903 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
1904 (__v16sf) __B,
1905 -(__v16sf) __C,
1906 (__mmask16) __U,
1907 _MM_FROUND_CUR_DIRECTION);
1908}
1909
1910#define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \
1911 (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
1912 (__v8df) (B), (__v8df) (C), \
1913 (__mmask8) -1, (R)); })
1914
1915
1916#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \
1917 (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
1918 (__v8df) (B), (__v8df) (C), \
1919 (__mmask8) (U), (R)); })
1920
1921
1922#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \
1923 (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) (A), \
1924 (__v8df) (B), (__v8df) (C), \
1925 (__mmask8) (U), (R)); })
1926
1927
1928#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \
1929 (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \
1930 (__v8df) (B), (__v8df) (C), \
1931 (__mmask8) (U), (R)); })
1932
1933
1934#define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \
1935 (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
1936 (__v8df) (B), -(__v8df) (C), \
1937 (__mmask8) -1, (R)); })
1938
1939
1940#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \
1941 (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
1942 (__v8df) (B), -(__v8df) (C), \
1943 (__mmask8) (U), (R)); })
1944
1945
1946#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \
1947 (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \
1948 (__v8df) (B), -(__v8df) (C), \
1949 (__mmask8) (U), (R)); })
1950
1951
1952static __inline__ __m512d __DEFAULT_FN_ATTRS
1953_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
1954{
1955 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
1956 (__v8df) __B,
1957 (__v8df) __C,
1958 (__mmask8) -1,
1959 _MM_FROUND_CUR_DIRECTION);
1960}
1961
1962static __inline__ __m512d __DEFAULT_FN_ATTRS
1963_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
1964{
1965 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
1966 (__v8df) __B,
1967 (__v8df) __C,
1968 (__mmask8) __U,
1969 _MM_FROUND_CUR_DIRECTION);
1970}
1971
1972static __inline__ __m512d __DEFAULT_FN_ATTRS
1973_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
1974{
1975 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
1976 (__v8df) __B,
1977 (__v8df) __C,
1978 (__mmask8) __U,
1979 _MM_FROUND_CUR_DIRECTION);
1980}
1981
1982static __inline__ __m512d __DEFAULT_FN_ATTRS
1983_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1984{
1985 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
1986 (__v8df) __B,
1987 (__v8df) __C,
1988 (__mmask8) __U,
1989 _MM_FROUND_CUR_DIRECTION);
1990}
1991
1992static __inline__ __m512d __DEFAULT_FN_ATTRS
1993_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
1994{
1995 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
1996 (__v8df) __B,
1997 -(__v8df) __C,
1998 (__mmask8) -1,
1999 _MM_FROUND_CUR_DIRECTION);
2000}
2001
2002static __inline__ __m512d __DEFAULT_FN_ATTRS
2003_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2004{
2005 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2006 (__v8df) __B,
2007 -(__v8df) __C,
2008 (__mmask8) __U,
2009 _MM_FROUND_CUR_DIRECTION);
2010}
2011
2012static __inline__ __m512d __DEFAULT_FN_ATTRS
2013_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2014{
2015 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2016 (__v8df) __B,
2017 -(__v8df) __C,
2018 (__mmask8) __U,
2019 _MM_FROUND_CUR_DIRECTION);
2020}
2021
2022#define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \
2023 (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
2024 (__v16sf) (B), (__v16sf) (C), \
2025 (__mmask16) -1, (R)); })
2026
2027
2028#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \
2029 (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
2030 (__v16sf) (B), (__v16sf) (C), \
2031 (__mmask16) (U), (R)); })
2032
2033
2034#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \
2035 (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) (A), \
2036 (__v16sf) (B), (__v16sf) (C), \
2037 (__mmask16) (U), (R)); })
2038
2039
2040#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \
2041 (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \
2042 (__v16sf) (B), (__v16sf) (C), \
2043 (__mmask16) (U), (R)); })
2044
2045
2046#define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \
2047 (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
2048 (__v16sf) (B), -(__v16sf) (C), \
2049 (__mmask16) -1, (R)); })
2050
2051
2052#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \
2053 (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
2054 (__v16sf) (B), -(__v16sf) (C), \
2055 (__mmask16) (U), (R)); })
2056
2057
2058#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \
2059 (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \
2060 (__v16sf) (B), -(__v16sf) (C), \
2061 (__mmask16) (U), (R)); })
2062
2063
2064static __inline__ __m512 __DEFAULT_FN_ATTRS
2065_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
2066{
2067 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2068 (__v16sf) __B,
2069 (__v16sf) __C,
2070 (__mmask16) -1,
2071 _MM_FROUND_CUR_DIRECTION);
2072}
2073
2074static __inline__ __m512 __DEFAULT_FN_ATTRS
2075_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2076{
2077 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2078 (__v16sf) __B,
2079 (__v16sf) __C,
2080 (__mmask16) __U,
2081 _MM_FROUND_CUR_DIRECTION);
2082}
2083
2084static __inline__ __m512 __DEFAULT_FN_ATTRS
2085_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2086{
2087 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
2088 (__v16sf) __B,
2089 (__v16sf) __C,
2090 (__mmask16) __U,
2091 _MM_FROUND_CUR_DIRECTION);
2092}
2093
2094static __inline__ __m512 __DEFAULT_FN_ATTRS
2095_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2096{
2097 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2098 (__v16sf) __B,
2099 (__v16sf) __C,
2100 (__mmask16) __U,
2101 _MM_FROUND_CUR_DIRECTION);
2102}
2103
2104static __inline__ __m512 __DEFAULT_FN_ATTRS
2105_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
2106{
2107 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2108 (__v16sf) __B,
2109 -(__v16sf) __C,
2110 (__mmask16) -1,
2111 _MM_FROUND_CUR_DIRECTION);
2112}
2113
2114static __inline__ __m512 __DEFAULT_FN_ATTRS
2115_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2116{
2117 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2118 (__v16sf) __B,
2119 -(__v16sf) __C,
2120 (__mmask16) __U,
2121 _MM_FROUND_CUR_DIRECTION);
2122}
2123
2124static __inline__ __m512 __DEFAULT_FN_ATTRS
2125_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2126{
2127 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2128 (__v16sf) __B,
2129 -(__v16sf) __C,
2130 (__mmask16) __U,
2131 _MM_FROUND_CUR_DIRECTION);
2132}
2133
2134#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \
2135 (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) (A), \
2136 (__v8df) (B), (__v8df) (C), \
2137 (__mmask8) (U), (R)); })
2138
2139
2140static __inline__ __m512d __DEFAULT_FN_ATTRS
2141_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2142{
2143 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
2144 (__v8df) __B,
2145 (__v8df) __C,
2146 (__mmask8) __U,
2147 _MM_FROUND_CUR_DIRECTION);
2148}
2149
2150#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \
2151 (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) (A), \
2152 (__v16sf) (B), (__v16sf) (C), \
2153 (__mmask16) (U), (R)); })
2154
2155
2156static __inline__ __m512 __DEFAULT_FN_ATTRS
2157_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2158{
2159 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
2160 (__v16sf) __B,
2161 (__v16sf) __C,
2162 (__mmask16) __U,
2163 _MM_FROUND_CUR_DIRECTION);
2164}
2165
2166#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \
2167 (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) (A), \
2168 (__v8df) (B), (__v8df) (C), \
2169 (__mmask8) (U), (R)); })
2170
2171
2172static __inline__ __m512d __DEFAULT_FN_ATTRS
2173_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2174{
2175 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
2176 (__v8df) __B,
2177 (__v8df) __C,
2178 (__mmask8) __U,
2179 _MM_FROUND_CUR_DIRECTION);
2180}
2181
2182#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \
2183 (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) (A), \
2184 (__v16sf) (B), (__v16sf) (C), \
2185 (__mmask16) (U), (R)); })
2186
2187
2188static __inline__ __m512 __DEFAULT_FN_ATTRS
2189_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2190{
2191 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
2192 (__v16sf) __B,
2193 (__v16sf) __C,
2194 (__mmask16) __U,
2195 _MM_FROUND_CUR_DIRECTION);
2196}
2197
2198#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \
2199 (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) (A), \
2200 (__v8df) (B), (__v8df) (C), \
2201 (__mmask8) (U), (R)); })
2202
2203
2204static __inline__ __m512d __DEFAULT_FN_ATTRS
2205_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2206{
2207 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
2208 (__v8df) __B,
2209 (__v8df) __C,
2210 (__mmask8) __U,
2211 _MM_FROUND_CUR_DIRECTION);
2212}
2213
2214#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \
2215 (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) (A), \
2216 (__v16sf) (B), (__v16sf) (C), \
2217 (__mmask16) (U), (R)); })
2218
2219
2220static __inline__ __m512 __DEFAULT_FN_ATTRS
2221_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2222{
2223 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
2224 (__v16sf) __B,
2225 (__v16sf) __C,
2226 (__mmask16) __U,
2227 _MM_FROUND_CUR_DIRECTION);
2228}
2229
2230#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \
2231 (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) (A), \
2232 (__v8df) (B), (__v8df) (C), \
2233 (__mmask8) (U), (R)); })
2234
2235
2236#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \
2237 (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) (A), \
2238 (__v8df) (B), (__v8df) (C), \
2239 (__mmask8) (U), (R)); })
2240
2241
2242static __inline__ __m512d __DEFAULT_FN_ATTRS
2243_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2244{
2245 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
2246 (__v8df) __B,
2247 (__v8df) __C,
2248 (__mmask8) __U,
2249 _MM_FROUND_CUR_DIRECTION);
2250}
2251
2252static __inline__ __m512d __DEFAULT_FN_ATTRS
2253_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2254{
2255 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
2256 (__v8df) __B,
2257 (__v8df) __C,
2258 (__mmask8) __U,
2259 _MM_FROUND_CUR_DIRECTION);
2260}
2261
2262#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \
2263 (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) (A), \
2264 (__v16sf) (B), (__v16sf) (C), \
2265 (__mmask16) (U), (R)); })
2266
2267
2268#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \
2269 (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) (A), \
2270 (__v16sf) (B), (__v16sf) (C), \
2271 (__mmask16) (U), (R)); })
2272
2273
2274static __inline__ __m512 __DEFAULT_FN_ATTRS
2275_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2276{
2277 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
2278 (__v16sf) __B,
2279 (__v16sf) __C,
2280 (__mmask16) __U,
2281 _MM_FROUND_CUR_DIRECTION);
2282}
2283
2284static __inline__ __m512 __DEFAULT_FN_ATTRS
2285_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2286{
2287 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
2288 (__v16sf) __B,
2289 (__v16sf) __C,
2290 (__mmask16) __U,
2291 _MM_FROUND_CUR_DIRECTION);
2292}
2293
2294
2295
2296/* Vector permutations */
2297
2298static __inline __m512i __DEFAULT_FN_ATTRS
2299_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
2300{
2301 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
2302 /* idx */ ,
2303 (__v16si) __A,
2304 (__v16si) __B,
2305 (__mmask16) -1);
2306}
2307static __inline __m512i __DEFAULT_FN_ATTRS
2308_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
2309{
2310 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
2311 /* idx */ ,
2312 (__v8di) __A,
2313 (__v8di) __B,
2314 (__mmask8) -1);
2315}
2316
2317static __inline __m512d __DEFAULT_FN_ATTRS
2318_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
2319{
2320 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
2321 /* idx */ ,
2322 (__v8df) __A,
2323 (__v8df) __B,
2324 (__mmask8) -1);
2325}
2326static __inline __m512 __DEFAULT_FN_ATTRS
2327_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
2328{
2329 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
2330 /* idx */ ,
2331 (__v16sf) __A,
2332 (__v16sf) __B,
2333 (__mmask16) -1);
2334}
2335
2336#define _mm512_alignr_epi64(A, B, I) __extension__ ({ \
2337 (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \
2338 (__v8di)(__m512i)(B), \
2339 (I), (__v8di)_mm512_setzero_si512(), \
2340 (__mmask8)-1); })
2341
Ben Murdoch097c5b22016-05-18 11:27:45 +01002342#define _mm512_alignr_epi32(A, B, I) __extension__ ({ \
Ben Murdochc5610432016-08-08 18:44:38 +01002343 (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \
Ben Murdoch097c5b22016-05-18 11:27:45 +01002344 (__v16si)(__m512i)(B), \
2345 (I), (__v16si)_mm512_setzero_si512(), \
Ben Murdochc5610432016-08-08 18:44:38 +01002346 (__mmask16)-1); })
Ben Murdoch097c5b22016-05-18 11:27:45 +01002347
Ben Murdoch097c5b22016-05-18 11:27:45 +01002348/* Vector Extract */
2349
2350#define _mm512_extractf64x4_pd(A, I) __extension__ ({ \
2351 (__m256d) \
2352 __builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), \
2353 (I), \
2354 (__v4df)_mm256_setzero_si256(), \
2355 (__mmask8) -1); })
2356
2357#define _mm512_extractf32x4_ps(A, I) __extension__ ({ \
2358 (__m128) \
2359 __builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), \
2360 (I), \
2361 (__v4sf)_mm_setzero_ps(), \
2362 (__mmask8) -1); })
2363
2364/* Vector Blend */
2365
2366static __inline __m512d __DEFAULT_FN_ATTRS
2367_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
2368{
2369 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
2370 (__v8df) __W,
2371 (__mmask8) __U);
2372}
2373
2374static __inline __m512 __DEFAULT_FN_ATTRS
2375_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
2376{
2377 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
2378 (__v16sf) __W,
2379 (__mmask16) __U);
2380}
2381
2382static __inline __m512i __DEFAULT_FN_ATTRS
2383_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
2384{
2385 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
2386 (__v8di) __W,
2387 (__mmask8) __U);
2388}
2389
2390static __inline __m512i __DEFAULT_FN_ATTRS
2391_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
2392{
2393 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
2394 (__v16si) __W,
2395 (__mmask16) __U);
2396}
2397
2398/* Compare */
2399
2400#define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \
2401 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
2402 (__v16sf)(__m512)(B), \
2403 (P), (__mmask16)-1, (R)); })
2404
2405#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \
2406 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
2407 (__v16sf)(__m512)(B), \
2408 (P), (__mmask16)(U), (R)); })
2409
2410#define _mm512_cmp_ps_mask(A, B, P) \
2411 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
2412
2413#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
2414 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
2415
2416#define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \
2417 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
2418 (__v8df)(__m512d)(B), \
2419 (P), (__mmask8)-1, (R)); })
2420
2421#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \
2422 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
2423 (__v8df)(__m512d)(B), \
2424 (P), (__mmask8)(U), (R)); })
2425
2426#define _mm512_cmp_pd_mask(A, B, P) \
2427 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
2428
2429#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
2430 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
2431
2432/* Conversion */
2433
2434static __inline __m512i __DEFAULT_FN_ATTRS
2435_mm512_cvttps_epu32(__m512 __A)
2436{
2437 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
2438 (__v16si)
2439 _mm512_setzero_si512 (),
2440 (__mmask16) -1,
2441 _MM_FROUND_CUR_DIRECTION);
2442}
2443
2444#define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \
2445 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), \
2446 (__v16sf)_mm512_setzero_ps(), \
2447 (__mmask16)-1, (R)); })
2448
2449#define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
2450 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), \
2451 (__v16sf)_mm512_setzero_ps(), \
2452 (__mmask16)-1, (R)); })
2453
2454static __inline __m512d __DEFAULT_FN_ATTRS
2455_mm512_cvtepi32_pd(__m256i __A)
2456{
2457 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
2458 (__v8df)
2459 _mm512_setzero_pd (),
2460 (__mmask8) -1);
2461}
2462
2463static __inline __m512d __DEFAULT_FN_ATTRS
2464_mm512_cvtepu32_pd(__m256i __A)
2465{
2466 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
2467 (__v8df)
2468 _mm512_setzero_pd (),
2469 (__mmask8) -1);
2470}
2471
2472#define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \
2473 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(A), \
2474 (__v8sf)_mm256_setzero_ps(), \
2475 (__mmask8)-1, (R)); })
2476
2477#define _mm512_cvtps_ph(A, I) __extension__ ({ \
2478 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(A), (I), \
2479 (__v16hi)_mm256_setzero_si256(), \
2480 -1); })
2481
2482static __inline __m512 __DEFAULT_FN_ATTRS
2483_mm512_cvtph_ps(__m256i __A)
2484{
2485 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
2486 (__v16sf)
2487 _mm512_setzero_ps (),
2488 (__mmask16) -1,
2489 _MM_FROUND_CUR_DIRECTION);
2490}
2491
2492static __inline __m512i __DEFAULT_FN_ATTRS
2493_mm512_cvttps_epi32(__m512 __a)
2494{
2495 return (__m512i)
2496 __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
2497 (__v16si) _mm512_setzero_si512 (),
2498 (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
2499}
2500
2501static __inline __m256i __DEFAULT_FN_ATTRS
2502_mm512_cvttpd_epi32(__m512d __a)
2503{
2504 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
2505 (__v8si)_mm256_setzero_si256(),
2506 (__mmask8) -1,
2507 _MM_FROUND_CUR_DIRECTION);
2508}
2509
2510#define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \
2511 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(A), \
2512 (__v8si)_mm256_setzero_si256(), \
2513 (__mmask8)-1, (R)); })
2514
2515#define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \
2516 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(A), \
2517 (__v16si)_mm512_setzero_si512(), \
2518 (__mmask16)-1, (R)); })
2519
2520#define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \
2521 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(A), \
2522 (__v16si)_mm512_setzero_si512(), \
2523 (__mmask16)-1, (R)); })
2524
2525#define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \
2526 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(A), \
2527 (__v8si)_mm256_setzero_si256(), \
2528 (__mmask8)-1, (R)); })
2529
2530#define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \
2531 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(A), \
2532 (__v16si)_mm512_setzero_si512(), \
2533 (__mmask16)-1, (R)); })
2534
2535#define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \
2536 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(A), \
2537 (__v8si)_mm256_setzero_si256(), \
2538 (__mmask8) -1, (R)); })
2539
2540/* Unpack and Interleave */
2541static __inline __m512d __DEFAULT_FN_ATTRS
2542_mm512_unpackhi_pd(__m512d __a, __m512d __b)
2543{
2544 return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
2545}
2546
2547static __inline __m512d __DEFAULT_FN_ATTRS
2548_mm512_unpacklo_pd(__m512d __a, __m512d __b)
2549{
2550 return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
2551}
2552
2553static __inline __m512 __DEFAULT_FN_ATTRS
2554_mm512_unpackhi_ps(__m512 __a, __m512 __b)
2555{
2556 return __builtin_shufflevector(__a, __b,
2557 2, 18, 3, 19,
2558 2+4, 18+4, 3+4, 19+4,
2559 2+8, 18+8, 3+8, 19+8,
2560 2+12, 18+12, 3+12, 19+12);
2561}
2562
2563static __inline __m512 __DEFAULT_FN_ATTRS
2564_mm512_unpacklo_ps(__m512 __a, __m512 __b)
2565{
2566 return __builtin_shufflevector(__a, __b,
2567 0, 16, 1, 17,
2568 0+4, 16+4, 1+4, 17+4,
2569 0+8, 16+8, 1+8, 17+8,
2570 0+12, 16+12, 1+12, 17+12);
2571}
2572
2573/* Bit Test */
2574
2575static __inline __mmask16 __DEFAULT_FN_ATTRS
2576_mm512_test_epi32_mask(__m512i __A, __m512i __B)
2577{
2578 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
2579 (__v16si) __B,
2580 (__mmask16) -1);
2581}
2582
2583static __inline __mmask8 __DEFAULT_FN_ATTRS
2584_mm512_test_epi64_mask(__m512i __A, __m512i __B)
2585{
2586 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
2587 (__v8di) __B,
2588 (__mmask8) -1);
2589}
2590
2591/* SIMD load ops */
2592
2593static __inline __m512i __DEFAULT_FN_ATTRS
2594_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
2595{
2596 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P,
2597 (__v16si)
2598 _mm512_setzero_si512 (),
2599 (__mmask16) __U);
2600}
2601
2602static __inline __m512i __DEFAULT_FN_ATTRS
2603_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
2604{
2605 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P,
2606 (__v8di)
2607 _mm512_setzero_si512 (),
2608 (__mmask8) __U);
2609}
2610
2611static __inline __m512 __DEFAULT_FN_ATTRS
2612_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
2613{
2614 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P,
2615 (__v16sf)
2616 _mm512_setzero_ps (),
2617 (__mmask16) __U);
2618}
2619
2620static __inline __m512d __DEFAULT_FN_ATTRS
2621_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
2622{
2623 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P,
2624 (__v8df)
2625 _mm512_setzero_pd (),
2626 (__mmask8) __U);
2627}
2628
2629static __inline __m512 __DEFAULT_FN_ATTRS
2630_mm512_maskz_load_ps(__mmask16 __U, void const *__P)
2631{
2632 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
2633 (__v16sf)
2634 _mm512_setzero_ps (),
2635 (__mmask16) __U);
2636}
2637
2638static __inline __m512d __DEFAULT_FN_ATTRS
2639_mm512_maskz_load_pd(__mmask8 __U, void const *__P)
2640{
2641 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
2642 (__v8df)
2643 _mm512_setzero_pd (),
2644 (__mmask8) __U);
2645}
2646
2647static __inline __m512d __DEFAULT_FN_ATTRS
2648_mm512_loadu_pd(double const *__p)
2649{
2650 struct __loadu_pd {
2651 __m512d __v;
2652 } __attribute__((__packed__, __may_alias__));
2653 return ((struct __loadu_pd*)__p)->__v;
2654}
2655
2656static __inline __m512 __DEFAULT_FN_ATTRS
2657_mm512_loadu_ps(float const *__p)
2658{
2659 struct __loadu_ps {
2660 __m512 __v;
2661 } __attribute__((__packed__, __may_alias__));
2662 return ((struct __loadu_ps*)__p)->__v;
2663}
2664
2665static __inline __m512 __DEFAULT_FN_ATTRS
2666_mm512_load_ps(float const *__p)
2667{
2668 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
2669 (__v16sf)
2670 _mm512_setzero_ps (),
2671 (__mmask16) -1);
2672}
2673
2674static __inline __m512d __DEFAULT_FN_ATTRS
2675_mm512_load_pd(double const *__p)
2676{
2677 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
2678 (__v8df)
2679 _mm512_setzero_pd (),
2680 (__mmask8) -1);
2681}
2682
2683/* SIMD store ops */
2684
2685static __inline void __DEFAULT_FN_ATTRS
2686_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
2687{
2688 __builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A,
2689 (__mmask8) __U);
2690}
2691
2692static __inline void __DEFAULT_FN_ATTRS
2693_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
2694{
2695 __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A,
2696 (__mmask16) __U);
2697}
2698
2699static __inline void __DEFAULT_FN_ATTRS
2700_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
2701{
2702 __builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
2703}
2704
2705static __inline void __DEFAULT_FN_ATTRS
2706_mm512_storeu_pd(void *__P, __m512d __A)
2707{
2708 __builtin_ia32_storeupd512_mask((__v8df *)__P, (__v8df)__A, (__mmask8)-1);
2709}
2710
2711static __inline void __DEFAULT_FN_ATTRS
2712_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
2713{
2714 __builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A,
2715 (__mmask16) __U);
2716}
2717
2718static __inline void __DEFAULT_FN_ATTRS
2719_mm512_storeu_ps(void *__P, __m512 __A)
2720{
2721 __builtin_ia32_storeups512_mask((__v16sf *)__P, (__v16sf)__A, (__mmask16)-1);
2722}
2723
2724static __inline void __DEFAULT_FN_ATTRS
2725_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
2726{
2727 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
2728}
2729
2730static __inline void __DEFAULT_FN_ATTRS
2731_mm512_store_pd(void *__P, __m512d __A)
2732{
2733 *(__m512d*)__P = __A;
2734}
2735
2736static __inline void __DEFAULT_FN_ATTRS
2737_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
2738{
2739 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
2740 (__mmask16) __U);
2741}
2742
2743static __inline void __DEFAULT_FN_ATTRS
2744_mm512_store_ps(void *__P, __m512 __A)
2745{
2746 *(__m512*)__P = __A;
2747}
2748
2749/* Mask ops */
2750
2751static __inline __mmask16 __DEFAULT_FN_ATTRS
2752_mm512_knot(__mmask16 __M)
2753{
2754 return __builtin_ia32_knothi(__M);
2755}
2756
2757/* Integer compare */
2758
2759static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2760_mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
2761 return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
2762 (__mmask16)-1);
2763}
2764
2765static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2766_mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2767 return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
2768 __u);
2769}
2770
2771static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2772_mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
2773 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
2774 (__mmask16)-1);
2775}
2776
2777static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2778_mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2779 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
2780 __u);
2781}
2782
2783static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2784_mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2785 return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
2786 __u);
2787}
2788
2789static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2790_mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
2791 return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
2792 (__mmask8)-1);
2793}
2794
2795static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2796_mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) {
2797 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
2798 (__mmask8)-1);
2799}
2800
2801static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2802_mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2803 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
2804 __u);
2805}
2806
2807static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2808_mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) {
2809 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
2810 (__mmask16)-1);
2811}
2812
2813static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2814_mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2815 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
2816 __u);
2817}
2818
2819static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2820_mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) {
2821 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
2822 (__mmask16)-1);
2823}
2824
2825static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2826_mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2827 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
2828 __u);
2829}
2830
2831static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2832_mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) {
2833 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
2834 (__mmask8)-1);
2835}
2836
2837static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2838_mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2839 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
2840 __u);
2841}
2842
2843static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2844_mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) {
2845 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
2846 (__mmask8)-1);
2847}
2848
2849static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2850_mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2851 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
2852 __u);
2853}
2854
2855static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2856_mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) {
2857 return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
2858 (__mmask16)-1);
2859}
2860
2861static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2862_mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2863 return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
2864 __u);
2865}
2866
2867static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2868_mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) {
2869 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
2870 (__mmask16)-1);
2871}
2872
2873static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2874_mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2875 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
2876 __u);
2877}
2878
2879static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2880_mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2881 return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
2882 __u);
2883}
2884
2885static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2886_mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) {
2887 return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
2888 (__mmask8)-1);
2889}
2890
2891static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2892_mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) {
2893 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
2894 (__mmask8)-1);
2895}
2896
2897static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2898_mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2899 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
2900 __u);
2901}
2902
2903static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2904_mm512_cmple_epi32_mask(__m512i __a, __m512i __b) {
2905 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
2906 (__mmask16)-1);
2907}
2908
2909static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2910_mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2911 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
2912 __u);
2913}
2914
2915static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2916_mm512_cmple_epu32_mask(__m512i __a, __m512i __b) {
2917 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
2918 (__mmask16)-1);
2919}
2920
2921static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2922_mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2923 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
2924 __u);
2925}
2926
2927static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2928_mm512_cmple_epi64_mask(__m512i __a, __m512i __b) {
2929 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
2930 (__mmask8)-1);
2931}
2932
2933static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2934_mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2935 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
2936 __u);
2937}
2938
2939static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2940_mm512_cmple_epu64_mask(__m512i __a, __m512i __b) {
2941 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
2942 (__mmask8)-1);
2943}
2944
2945static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2946_mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2947 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
2948 __u);
2949}
2950
2951static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2952_mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) {
2953 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
2954 (__mmask16)-1);
2955}
2956
2957static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2958_mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2959 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
2960 __u);
2961}
2962
2963static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2964_mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) {
2965 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
2966 (__mmask16)-1);
2967}
2968
2969static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2970_mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2971 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
2972 __u);
2973}
2974
2975static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2976_mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) {
2977 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
2978 (__mmask8)-1);
2979}
2980
2981static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2982_mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2983 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
2984 __u);
2985}
2986
2987static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2988_mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) {
2989 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
2990 (__mmask8)-1);
2991}
2992
2993static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2994_mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2995 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
2996 __u);
2997}
2998
2999static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3000_mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) {
3001 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
3002 (__mmask16)-1);
3003}
3004
3005static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3006_mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
3007 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
3008 __u);
3009}
3010
3011static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3012_mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) {
3013 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
3014 (__mmask16)-1);
3015}
3016
3017static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3018_mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
3019 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
3020 __u);
3021}
3022
3023static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3024_mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) {
3025 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
3026 (__mmask8)-1);
3027}
3028
3029static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3030_mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3031 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
3032 __u);
3033}
3034
3035static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3036_mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) {
3037 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
3038 (__mmask8)-1);
3039}
3040
3041static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3042_mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3043 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
3044 __u);
3045}
3046
3047static __inline__ __m512i __DEFAULT_FN_ATTRS
3048_mm512_cvtepi8_epi32 (__m128i __A)
3049{
3050 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
3051 (__v16si)
3052 _mm512_setzero_si512 (),
3053 (__mmask16) -1);
3054}
3055
3056static __inline__ __m512i __DEFAULT_FN_ATTRS
3057_mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
3058{
3059 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
3060 (__v16si) __W,
3061 (__mmask16) __U);
3062}
3063
3064static __inline__ __m512i __DEFAULT_FN_ATTRS
3065_mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
3066{
3067 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
3068 (__v16si)
3069 _mm512_setzero_si512 (),
3070 (__mmask16) __U);
3071}
3072
3073static __inline__ __m512i __DEFAULT_FN_ATTRS
3074_mm512_cvtepi8_epi64 (__m128i __A)
3075{
3076 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
3077 (__v8di)
3078 _mm512_setzero_si512 (),
3079 (__mmask8) -1);
3080}
3081
3082static __inline__ __m512i __DEFAULT_FN_ATTRS
3083_mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
3084{
3085 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
3086 (__v8di) __W,
3087 (__mmask8) __U);
3088}
3089
3090static __inline__ __m512i __DEFAULT_FN_ATTRS
3091_mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
3092{
3093 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
3094 (__v8di)
3095 _mm512_setzero_si512 (),
3096 (__mmask8) __U);
3097}
3098
3099static __inline__ __m512i __DEFAULT_FN_ATTRS
3100_mm512_cvtepi32_epi64 (__m256i __X)
3101{
3102 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
3103 (__v8di)
3104 _mm512_setzero_si512 (),
3105 (__mmask8) -1);
3106}
3107
3108static __inline__ __m512i __DEFAULT_FN_ATTRS
3109_mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
3110{
3111 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
3112 (__v8di) __W,
3113 (__mmask8) __U);
3114}
3115
3116static __inline__ __m512i __DEFAULT_FN_ATTRS
3117_mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
3118{
3119 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
3120 (__v8di)
3121 _mm512_setzero_si512 (),
3122 (__mmask8) __U);
3123}
3124
3125static __inline__ __m512i __DEFAULT_FN_ATTRS
3126_mm512_cvtepi16_epi32 (__m256i __A)
3127{
3128 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
3129 (__v16si)
3130 _mm512_setzero_si512 (),
3131 (__mmask16) -1);
3132}
3133
3134static __inline__ __m512i __DEFAULT_FN_ATTRS
3135_mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
3136{
3137 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
3138 (__v16si) __W,
3139 (__mmask16) __U);
3140}
3141
3142static __inline__ __m512i __DEFAULT_FN_ATTRS
3143_mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
3144{
3145 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
3146 (__v16si)
3147 _mm512_setzero_si512 (),
3148 (__mmask16) __U);
3149}
3150
3151static __inline__ __m512i __DEFAULT_FN_ATTRS
3152_mm512_cvtepi16_epi64 (__m128i __A)
3153{
3154 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
3155 (__v8di)
3156 _mm512_setzero_si512 (),
3157 (__mmask8) -1);
3158}
3159
3160static __inline__ __m512i __DEFAULT_FN_ATTRS
3161_mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
3162{
3163 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
3164 (__v8di) __W,
3165 (__mmask8) __U);
3166}
3167
3168static __inline__ __m512i __DEFAULT_FN_ATTRS
3169_mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
3170{
3171 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
3172 (__v8di)
3173 _mm512_setzero_si512 (),
3174 (__mmask8) __U);
3175}
3176
3177static __inline__ __m512i __DEFAULT_FN_ATTRS
3178_mm512_cvtepu8_epi32 (__m128i __A)
3179{
3180 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
3181 (__v16si)
3182 _mm512_setzero_si512 (),
3183 (__mmask16) -1);
3184}
3185
3186static __inline__ __m512i __DEFAULT_FN_ATTRS
3187_mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
3188{
3189 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
3190 (__v16si) __W,
3191 (__mmask16) __U);
3192}
3193
3194static __inline__ __m512i __DEFAULT_FN_ATTRS
3195_mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
3196{
3197 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
3198 (__v16si)
3199 _mm512_setzero_si512 (),
3200 (__mmask16) __U);
3201}
3202
3203static __inline__ __m512i __DEFAULT_FN_ATTRS
3204_mm512_cvtepu8_epi64 (__m128i __A)
3205{
3206 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
3207 (__v8di)
3208 _mm512_setzero_si512 (),
3209 (__mmask8) -1);
3210}
3211
3212static __inline__ __m512i __DEFAULT_FN_ATTRS
3213_mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
3214{
3215 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
3216 (__v8di) __W,
3217 (__mmask8) __U);
3218}
3219
3220static __inline__ __m512i __DEFAULT_FN_ATTRS
3221_mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
3222{
3223 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
3224 (__v8di)
3225 _mm512_setzero_si512 (),
3226 (__mmask8) __U);
3227}
3228
3229static __inline__ __m512i __DEFAULT_FN_ATTRS
3230_mm512_cvtepu32_epi64 (__m256i __X)
3231{
3232 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
3233 (__v8di)
3234 _mm512_setzero_si512 (),
3235 (__mmask8) -1);
3236}
3237
3238static __inline__ __m512i __DEFAULT_FN_ATTRS
3239_mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
3240{
3241 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
3242 (__v8di) __W,
3243 (__mmask8) __U);
3244}
3245
3246static __inline__ __m512i __DEFAULT_FN_ATTRS
3247_mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
3248{
3249 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
3250 (__v8di)
3251 _mm512_setzero_si512 (),
3252 (__mmask8) __U);
3253}
3254
3255static __inline__ __m512i __DEFAULT_FN_ATTRS
3256_mm512_cvtepu16_epi32 (__m256i __A)
3257{
3258 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
3259 (__v16si)
3260 _mm512_setzero_si512 (),
3261 (__mmask16) -1);
3262}
3263
3264static __inline__ __m512i __DEFAULT_FN_ATTRS
3265_mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
3266{
3267 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
3268 (__v16si) __W,
3269 (__mmask16) __U);
3270}
3271
3272static __inline__ __m512i __DEFAULT_FN_ATTRS
3273_mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
3274{
3275 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
3276 (__v16si)
3277 _mm512_setzero_si512 (),
3278 (__mmask16) __U);
3279}
3280
3281static __inline__ __m512i __DEFAULT_FN_ATTRS
3282_mm512_cvtepu16_epi64 (__m128i __A)
3283{
3284 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
3285 (__v8di)
3286 _mm512_setzero_si512 (),
3287 (__mmask8) -1);
3288}
3289
3290static __inline__ __m512i __DEFAULT_FN_ATTRS
3291_mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
3292{
3293 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
3294 (__v8di) __W,
3295 (__mmask8) __U);
3296}
3297
3298static __inline__ __m512i __DEFAULT_FN_ATTRS
3299_mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3300{
3301 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
3302 (__v8di)
3303 _mm512_setzero_si512 (),
3304 (__mmask8) __U);
3305}
3306
3307static __inline__ __m512i __DEFAULT_FN_ATTRS
3308_mm512_rorv_epi32 (__m512i __A, __m512i __B)
3309{
3310 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
3311 (__v16si) __B,
3312 (__v16si)
3313 _mm512_setzero_si512 (),
3314 (__mmask16) -1);
3315}
3316
3317static __inline__ __m512i __DEFAULT_FN_ATTRS
3318_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
3319{
3320 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
3321 (__v16si) __B,
3322 (__v16si) __W,
3323 (__mmask16) __U);
3324}
3325
3326static __inline__ __m512i __DEFAULT_FN_ATTRS
3327_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
3328{
3329 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
3330 (__v16si) __B,
3331 (__v16si)
3332 _mm512_setzero_si512 (),
3333 (__mmask16) __U);
3334}
3335
3336static __inline__ __m512i __DEFAULT_FN_ATTRS
3337_mm512_rorv_epi64 (__m512i __A, __m512i __B)
3338{
3339 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
3340 (__v8di) __B,
3341 (__v8di)
3342 _mm512_setzero_si512 (),
3343 (__mmask8) -1);
3344}
3345
3346static __inline__ __m512i __DEFAULT_FN_ATTRS
3347_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
3348{
3349 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
3350 (__v8di) __B,
3351 (__v8di) __W,
3352 (__mmask8) __U);
3353}
3354
3355static __inline__ __m512i __DEFAULT_FN_ATTRS
3356_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
3357{
3358 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
3359 (__v8di) __B,
3360 (__v8di)
3361 _mm512_setzero_si512 (),
3362 (__mmask8) __U);
3363}
3364
3365
3366
3367#define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
3368 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
3369 (__v16si)(__m512i)(b), (p), \
3370 (__mmask16)-1); })
3371
3372#define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
3373 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
3374 (__v16si)(__m512i)(b), (p), \
3375 (__mmask16)-1); })
3376
3377#define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
3378 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
3379 (__v8di)(__m512i)(b), (p), \
3380 (__mmask8)-1); })
3381
3382#define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
3383 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
3384 (__v8di)(__m512i)(b), (p), \
3385 (__mmask8)-1); })
3386
3387#define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
3388 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
3389 (__v16si)(__m512i)(b), (p), \
3390 (__mmask16)(m)); })
3391
3392#define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
3393 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
3394 (__v16si)(__m512i)(b), (p), \
3395 (__mmask16)(m)); })
3396
3397#define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
3398 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
3399 (__v8di)(__m512i)(b), (p), \
3400 (__mmask8)(m)); })
3401
3402#define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
3403 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
3404 (__v8di)(__m512i)(b), (p), \
3405 (__mmask8)(m)); })
3406
3407#define _mm512_rol_epi32(a, b) __extension__ ({ \
3408 (__m512i) __builtin_ia32_prold512_mask ((__v16si) (a), (b),\
3409 (__v16si)\
3410 _mm512_setzero_si512 (),\
3411 (__mmask16) -1); })
3412
3413#define _mm512_mask_rol_epi32(W, U, a, b) __extension__ ({ \
3414 (__m512i) __builtin_ia32_prold512_mask ((__v16si) (a), (b),\
3415 (__v16si) (W),\
3416 (__mmask16) (U)); })
3417
3418#define _mm512_maskz_rol_epi32(U, a, b) __extension__ ({ \
3419 (__m512i) __builtin_ia32_prold512_mask ((__v16si) (a), (b),\
3420 (__v16si)\
3421 _mm512_setzero_si512 (),\
3422 (__mmask16) (U)); })
3423
3424#define _mm512_rol_epi64(a, b) __extension__ ({ \
3425 (__m512i) __builtin_ia32_prolq512_mask ((__v8di) (a), (b),\
3426 (__v8di)\
3427 _mm512_setzero_si512 (),\
3428 (__mmask8) -1); })
3429
3430#define _mm512_mask_rol_epi64(W, U, a, b) __extension__ ({ \
3431 (__m512i) __builtin_ia32_prolq512_mask ((__v8di) (a), (b),\
3432 (__v8di) (W),\
3433 (__mmask8) (U)); })
3434
3435#define _mm512_maskz_rol_epi64(U, a, b) __extension__ ({ \
3436 (__m512i) __builtin_ia32_prolq512_mask ((__v8di) (a), (b),\
3437 (__v8di)\
3438 _mm512_setzero_si512 (),\
3439 (__mmask8) (U)); })
3440static __inline__ __m512i __DEFAULT_FN_ATTRS
3441_mm512_rolv_epi32 (__m512i __A, __m512i __B)
3442{
3443 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
3444 (__v16si) __B,
3445 (__v16si)
3446 _mm512_setzero_si512 (),
3447 (__mmask16) -1);
3448}
3449
3450static __inline__ __m512i __DEFAULT_FN_ATTRS
3451_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
3452{
3453 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
3454 (__v16si) __B,
3455 (__v16si) __W,
3456 (__mmask16) __U);
3457}
3458
3459static __inline__ __m512i __DEFAULT_FN_ATTRS
3460_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
3461{
3462 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
3463 (__v16si) __B,
3464 (__v16si)
3465 _mm512_setzero_si512 (),
3466 (__mmask16) __U);
3467}
3468
3469static __inline__ __m512i __DEFAULT_FN_ATTRS
3470_mm512_rolv_epi64 (__m512i __A, __m512i __B)
3471{
3472 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
3473 (__v8di) __B,
3474 (__v8di)
3475 _mm512_setzero_si512 (),
3476 (__mmask8) -1);
3477}
3478
3479static __inline__ __m512i __DEFAULT_FN_ATTRS
3480_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
3481{
3482 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
3483 (__v8di) __B,
3484 (__v8di) __W,
3485 (__mmask8) __U);
3486}
3487
3488static __inline__ __m512i __DEFAULT_FN_ATTRS
3489_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
3490{
3491 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
3492 (__v8di) __B,
3493 (__v8di)
3494 _mm512_setzero_si512 (),
3495 (__mmask8) __U);
3496}
3497
3498#define _mm512_ror_epi32( __A, __B) __extension__ ({ \
3499__builtin_ia32_prord512_mask ((__v16si)( __A),( __B),\
3500 (__v16si)\
3501 _mm512_setzero_si512 (),\
3502 (__mmask16) -1);\
3503})
3504
3505#define _mm512_mask_ror_epi32( __W, __U, __A, __B) __extension__ ({ \
3506__builtin_ia32_prord512_mask ((__v16si)( __A),( __B),\
3507 (__v16si)( __W),\
3508 (__mmask16)( __U));\
3509})
3510
3511#define _mm512_maskz_ror_epi32( __U, __A, __B) __extension__ ({ \
3512__builtin_ia32_prord512_mask ((__v16si)( __A),( __B),\
3513 (__v16si)\
3514 _mm512_setzero_si512 (),\
3515 (__mmask16)( __U));\
3516})
3517
3518#define _mm512_ror_epi64( __A, __B) __extension__ ({ \
3519__builtin_ia32_prorq512_mask ((__v8di)( __A),( __B),\
3520 (__v8di)\
3521 _mm512_setzero_si512 (),\
3522 (__mmask8) -1);\
3523})
3524
3525#define _mm512_mask_ror_epi64( __W, __U, __A, __B) __extension__ ({ \
3526__builtin_ia32_prorq512_mask ((__v8di)( __A),( __B),\
3527 (__v8di)( __W),\
3528 (__mmask8)( __U));\
3529})
3530
3531#define _mm512_maskz_ror_epi64( __U, __A, __B) __extension__ ({ \
3532__builtin_ia32_prorq512_mask ((__v8di)( __A),( __B),\
3533 (__v8di)\
3534 _mm512_setzero_si512 (),\
3535 (__mmask8)( __U));\
3536})
3537
3538#define _mm512_slli_epi32( __A, __B) __extension__ ({ \
3539__builtin_ia32_pslldi512_mask ((__v16si)( __A),( __B),\
3540 (__v16si)\
3541 _mm512_setzero_si512 (),\
3542 (__mmask16) -1);\
3543})
3544
3545#define _mm512_mask_slli_epi32( __W, __U, __A ,__B) __extension__ ({ \
3546__builtin_ia32_pslldi512_mask ((__v16si) (__A), (__B),\
3547 (__v16si)( __W),\
3548 (__mmask16)( __U));\
3549})
3550
3551#define _mm512_maskz_slli_epi32( __U, __A, __B) __extension__ ({ \
3552__builtin_ia32_pslldi512_mask ((__v16si)( __A),( __B),\
3553 (__v16si)\
3554 _mm512_setzero_si512 (),\
3555 (__mmask16)( __U));\
3556})
3557
3558#define _mm512_slli_epi64( __A, __B) __extension__ ({ \
3559__builtin_ia32_psllqi512_mask ((__v8di)( __A),( __B),\
3560 (__v8di)\
3561 _mm512_setzero_si512 (),\
3562 (__mmask8) -1);\
3563})
3564
3565#define _mm512_mask_slli_epi64( __W, __U, __A ,__B) __extension__ ({ \
3566__builtin_ia32_psllqi512_mask ((__v8di) (__A), (__B),\
3567 (__v8di)( __W),\
3568 (__mmask8)( __U));\
3569})
3570
3571#define _mm512_maskz_slli_epi64( __U, __A, __B) __extension__ ({ \
3572__builtin_ia32_psllqi512_mask ((__v8di)( __A),( __B),\
3573 (__v8di)\
3574 _mm512_setzero_si512 (),\
3575 (__mmask8)( __U));\
3576})
3577
3578
3579
3580#define _mm512_srli_epi32( __A, __B) __extension__ ({ \
3581__builtin_ia32_psrldi512_mask ((__v16si)( __A),( __B),\
3582 (__v16si)\
3583 _mm512_setzero_si512 (),\
3584 (__mmask16) -1);\
3585})
3586
3587#define _mm512_mask_srli_epi32( __W, __U, __A, __B) __extension__ ({ \
3588__builtin_ia32_psrldi512_mask ((__v16si)( __A),( __B),\
3589 (__v16si)( __W),\
3590 (__mmask16)( __U));\
3591})
3592
3593#define _mm512_maskz_srli_epi32( __U, __A, __B) __extension__ ({ \
3594__builtin_ia32_psrldi512_mask ((__v16si)( __A),( __B),\
3595 (__v16si)\
3596 _mm512_setzero_si512 (),\
3597 (__mmask16)( __U));\
3598})
3599
3600#define _mm512_srli_epi64( __A, __B) __extension__ ({ \
3601__builtin_ia32_psrlqi512_mask ((__v8di)( __A),( __B),\
3602 (__v8di)\
3603 _mm512_setzero_si512 (),\
3604 (__mmask8) -1);\
3605})
3606
3607#define _mm512_mask_srli_epi64( __W, __U, __A, __B) __extension__ ({ \
3608__builtin_ia32_psrlqi512_mask ((__v8di)( __A),( __B),\
3609 (__v8di)( __W),\
3610 (__mmask8)( __U));\
3611})
3612
3613#define _mm512_maskz_srli_epi64( __U, __A, __B) __extension__ ({ \
3614__builtin_ia32_psrlqi512_mask ((__v8di)( __A),( __B),\
3615 (__v8di)\
3616 _mm512_setzero_si512 (),\
3617 (__mmask8)( __U));\
3618})
3619
3620static __inline__ __m512i __DEFAULT_FN_ATTRS
3621_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
3622{
3623 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
3624 (__v16si) __W,
3625 (__mmask16) __U);
3626}
3627
3628static __inline__ __m512i __DEFAULT_FN_ATTRS
3629_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
3630{
3631 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
3632 (__v16si)
3633 _mm512_setzero_si512 (),
3634 (__mmask16) __U);
3635}
3636
3637static __inline__ void __DEFAULT_FN_ATTRS
3638_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
3639{
3640 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
3641 (__mmask16) __U);
3642}
3643
3644static __inline__ __m512i __DEFAULT_FN_ATTRS
3645_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
3646{
3647 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
3648 (__v8di) __W,
3649 (__mmask8) __U);
3650}
3651
3652static __inline__ __m512i __DEFAULT_FN_ATTRS
3653_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
3654{
3655 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
3656 (__v8di)
3657 _mm512_setzero_si512 (),
3658 (__mmask8) __U);
3659}
3660
3661static __inline__ __m512i __DEFAULT_FN_ATTRS
3662_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
3663{
3664 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
3665 (__v8di) __W,
3666 (__mmask8) __U);
3667}
3668
3669static __inline__ __m512i __DEFAULT_FN_ATTRS
3670_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
3671{
3672 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
3673 (__v8di)
3674 _mm512_setzero_si512 (),
3675 (__mmask8) __U);
3676}
3677
3678static __inline__ void __DEFAULT_FN_ATTRS
3679_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
3680{
3681 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
3682 (__mmask8) __U);
3683}
3684
3685
3686
3687static __inline__ __m512d __DEFAULT_FN_ATTRS
3688_mm512_movedup_pd (__m512d __A)
3689{
3690 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
3691 (__v8df)
3692 _mm512_undefined_pd (),
3693 (__mmask8) -1);
3694}
3695
3696static __inline__ __m512d __DEFAULT_FN_ATTRS
3697_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
3698{
3699 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
3700 (__v8df) __W,
3701 (__mmask8) __U);
3702}
3703
3704static __inline__ __m512d __DEFAULT_FN_ATTRS
3705_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
3706{
3707 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
3708 (__v8df)
3709 _mm512_setzero_pd (),
3710 (__mmask8) __U);
3711}
3712
3713#define _mm512_fixupimm_round_pd( __A, __B, __C, __imm, __R) __extension__ ({ \
3714__builtin_ia32_fixupimmpd512_mask ((__v8df)( __A),\
3715 (__v8df)( __B),\
3716 (__v8di)( __C),\
3717 (__imm),\
3718 (__mmask8) -1, (__R));\
3719})
3720
3721#define _mm512_mask_fixupimm_round_pd( __A, __U, __B, __C, __imm, __R) __extension__ ({ \
3722__builtin_ia32_fixupimmpd512_mask ((__v8df)( __A),\
3723 (__v8df)( __B),\
3724 (__v8di)( __C),\
3725 (__imm),\
3726 (__mmask8)( __U), (__R));\
3727})
3728
3729#define _mm512_fixupimm_pd( __A, __B, __C, __imm) __extension__ ({ \
3730__builtin_ia32_fixupimmpd512_mask ((__v8df)( __A),\
3731 (__v8df)( __B),\
3732 (__v8di)( __C),\
3733 ( __imm),\
3734 (__mmask8) -1,\
3735 _MM_FROUND_CUR_DIRECTION);\
3736})
3737
3738#define _mm512_mask_fixupimm_pd( __A, __U, __B, __C, __imm) __extension__ ({ \
3739__builtin_ia32_fixupimmpd512_mask ((__v8df)( __A),\
3740 (__v8df)( __B),\
3741 (__v8di)( __C),\
3742 ( __imm),\
3743 (__mmask8)( __U),\
3744 _MM_FROUND_CUR_DIRECTION);\
3745})
3746
3747#define _mm512_maskz_fixupimm_round_pd( __U, __A, __B, __C, __imm, __R) __extension__ ({ \
3748__builtin_ia32_fixupimmpd512_maskz ((__v8df)( __A),\
3749 (__v8df)( __B),\
3750 (__v8di)( __C),\
3751 (__imm),\
3752 (__mmask8)( __U), (__R));\
3753})
3754
3755#define _mm512_maskz_fixupimm_pd( __U, __A, __B, __C, __imm) __extension__ ({ \
3756__builtin_ia32_fixupimmpd512_maskz ((__v8df)( __A),\
3757 (__v8df)( __B),\
3758 (__v8di)( __C),\
3759 ( __imm),\
3760 (__mmask8)( __U),\
3761 _MM_FROUND_CUR_DIRECTION);\
3762})
3763
3764#define _mm512_fixupimm_round_ps( __A, __B, __C, __imm, __R) __extension__ ({ \
3765__builtin_ia32_fixupimmps512_mask ((__v16sf)( __A),\
3766 (__v16sf)( __B),\
3767 (__v16si)( __C),\
3768 (__imm),\
3769 (__mmask16) -1, (__R));\
3770})
3771
3772#define _mm512_mask_fixupimm_round_ps( __A, __U, __B, __C, __imm, __R) __extension__ ({ \
3773__builtin_ia32_fixupimmps512_mask ((__v16sf)( __A),\
3774 (__v16sf)( __B),\
3775 (__v16si)( __C),\
3776 (__imm),\
3777 (__mmask16)( __U), (__R));\
3778})
3779
3780#define _mm512_fixupimm_ps( __A, __B, __C, __imm) __extension__ ({ \
3781__builtin_ia32_fixupimmps512_mask ((__v16sf)( __A),\
3782 (__v16sf)( __B),\
3783 (__v16si)( __C),\
3784 ( __imm),\
3785 (__mmask16) -1,\
3786 _MM_FROUND_CUR_DIRECTION);\
3787})
3788
3789#define _mm512_mask_fixupimm_ps( __A, __U, __B, __C, __imm) __extension__ ({ \
3790__builtin_ia32_fixupimmps512_mask ((__v16sf)( __A),\
3791 (__v16sf)( __B),\
3792 (__v16si)( __C),\
3793 ( __imm),\
3794 (__mmask16)( __U),\
3795 _MM_FROUND_CUR_DIRECTION);\
3796})
3797
3798#define _mm512_maskz_fixupimm_round_ps( __U, __A, __B, __C, __imm, __R) __extension__ ({ \
3799__builtin_ia32_fixupimmps512_maskz ((__v16sf)( __A),\
3800 (__v16sf)( __B),\
3801 (__v16si)( __C),\
3802 (__imm),\
3803 (__mmask16)( __U), (__R));\
3804})
3805
3806#define _mm512_maskz_fixupimm_ps( __U, __A, __B, __C, __imm) __extension__ ({ \
3807__builtin_ia32_fixupimmps512_maskz ((__v16sf)( __A),\
3808 (__v16sf)( __B),\
3809 (__v16si)( __C),\
3810 ( __imm),\
3811 (__mmask16)( __U),\
3812 _MM_FROUND_CUR_DIRECTION);\
3813})
3814
3815#define _mm_fixupimm_round_sd( __A, __B, __C, __imm, __R) __extension__ ({ \
3816__builtin_ia32_fixupimmsd_mask ((__v2df)( __A),\
3817 (__v2df)( __B),\
3818 (__v2di)( __C), __imm,\
3819 (__mmask8) -1, (__R));\
3820})
3821
3822#define _mm_mask_fixupimm_round_sd( __A, __U, __B, __C, __imm, __R) __extension__ ({ \
3823__builtin_ia32_fixupimmsd_mask ((__v2df)( __A),\
3824 (__v2df)( __B),\
3825 (__v2di)( __C), __imm,\
3826 (__mmask8)( __U), (__R));\
3827})
3828
3829#define _mm_fixupimm_sd( __A, __B, __C, __imm) __extension__ ({ \
3830__builtin_ia32_fixupimmsd_mask ((__v2df)( __A),\
3831 (__v2df)( __B),\
3832 (__v2di)( __C),( __imm),\
3833 (__mmask8) -1,\
3834 _MM_FROUND_CUR_DIRECTION);\
3835})
3836
3837#define _mm_mask_fixupimm_sd( __A, __U, __B, __C, __imm) __extension__ ({ \
3838__builtin_ia32_fixupimmsd_mask ((__v2df)( __A),\
3839 (__v2df)( __B),\
3840 (__v2di)( __C),( __imm),\
3841 (__mmask8)( __U),\
3842 _MM_FROUND_CUR_DIRECTION);\
3843})
3844
3845#define _mm_maskz_fixupimm_round_sd( __U, __A, __B, __C, __imm, __R) __extension__ ({ \
3846__builtin_ia32_fixupimmsd_maskz ((__v2df)( __A),\
3847 (__v2df)( __B),\
3848 (__v2di)( __C),\
3849 __imm,\
3850 (__mmask8)( __U), (__R));\
3851})
3852
3853#define _mm_maskz_fixupimm_sd( __U, __A, __B, __C, __imm) __extension__ ({ \
3854__builtin_ia32_fixupimmsd_maskz ((__v2df)( __A),\
3855 (__v2df)( __B),\
3856 (__v2di)( __C),\
3857 ( __imm),\
3858 (__mmask8)( __U),\
3859 _MM_FROUND_CUR_DIRECTION);\
3860})
3861
3862#define _mm_fixupimm_round_ss( __A, __B, __C, __imm, __R) __extension__ ({ \
3863__builtin_ia32_fixupimmss_mask ((__v4sf)( __A),\
3864 (__v4sf)( __B),\
3865 (__v4si)( __C), (__imm),\
3866 (__mmask8) -1, (__R));\
3867})
3868
3869#define _mm_mask_fixupimm_round_ss( __A, __U, __B, __C, __imm, __R) __extension__ ({ \
3870__builtin_ia32_fixupimmss_mask ((__v4sf)( __A),\
3871 (__v4sf)( __B),\
3872 (__v4si)( __C), (__imm),\
3873 (__mmask8)( __U), (__R));\
3874})
3875
3876#define _mm_fixupimm_ss( __A, __B, __C, __imm) __extension__ ({ \
3877__builtin_ia32_fixupimmss_mask ((__v4sf)( __A),\
3878 (__v4sf)( __B),\
3879 (__v4si)( __C),( __imm),\
3880 (__mmask8) -1,\
3881 _MM_FROUND_CUR_DIRECTION);\
3882})
3883
3884#define _mm_mask_fixupimm_ss( __A, __U, __B, __C, __imm) __extension__ ({ \
3885__builtin_ia32_fixupimmss_mask ((__v4sf)( __A),\
3886 (__v4sf)( __B),\
3887 (__v4si)( __C),( __imm),\
3888 (__mmask8)( __U),\
3889 _MM_FROUND_CUR_DIRECTION);\
3890})
3891
3892#define _mm_maskz_fixupimm_round_ss( __U, __A, __B, __C, __imm, __R) __extension__ ({ \
3893__builtin_ia32_fixupimmss_maskz ((__v4sf)( __A),\
3894 (__v4sf)( __B),\
3895 (__v4si)( __C), (__imm),\
3896 (__mmask8)( __U), (__R));\
3897})
3898
3899#define _mm_maskz_fixupimm_ss( __U, __A, __B, __C, __imm) __extension__ ({ \
3900__builtin_ia32_fixupimmss_maskz ((__v4sf)( __A),\
3901 (__v4sf)( __B),\
3902 (__v4si)( __C),( __imm),\
3903 (__mmask8)( __U),\
3904 _MM_FROUND_CUR_DIRECTION);\
3905})
3906
3907#define _mm_getexp_round_sd( __A, __B ,__R) __extension__ ({ \
3908__builtin_ia32_getexpsd128_round_mask ((__v2df)(__A),\
3909 (__v2df)( __B), (__v2df) _mm_setzero_pd(), (__mmask8) -1,\
3910 ( __R));\
3911})
3912
3913
3914static __inline__ __m128d __DEFAULT_FN_ATTRS
3915_mm_getexp_sd (__m128d __A, __m128d __B)
3916{
3917 return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
3918 (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
3919}
3920
Ben Murdoch097c5b22016-05-18 11:27:45 +01003921#define _mm_getexp_round_ss( __A, __B, __R) __extension__ ({ \
3922__builtin_ia32_getexpss128_round_mask ((__v4sf)( __A),\
3923 (__v4sf)( __B), (__v4sf) _mm_setzero_ps(), (__mmask8) -1,\
3924 ( __R));\
3925})
3926
3927static __inline__ __m128 __DEFAULT_FN_ATTRS
3928_mm_getexp_ss (__m128 __A, __m128 __B)
3929{
3930 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
3931 (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
3932}
3933
Ben Murdoch097c5b22016-05-18 11:27:45 +01003934#define _mm_getmant_round_sd( __A, __B, __C, __D, __R) __extension__ ({ \
3935__builtin_ia32_getmantsd_round_mask ((__v2df)( __A),\
3936 (__v2df)( __B),\
3937 (( __D) << 2) |( __C), (__v2df) _mm_setzero_pd(), (__mmask8) -1,\
3938 ( __R));\
3939})
3940
3941#define _mm_getmant_sd( __A, __B, __C, __D) __extension__ ({ \
3942__builtin_ia32_getmantsd_round_mask ((__v2df)( __A),\
3943 (__v2df)( __B),\
3944 (( __D) << 2) |( __C), (__v2df) _mm_setzero_pd(), (__mmask8) -1,\
3945 _MM_FROUND_CUR_DIRECTION);\
3946})
3947
Ben Murdoch097c5b22016-05-18 11:27:45 +01003948#define _mm_getmant_round_ss( __A, __B, __C, __D, __R) __extension__ ({ \
3949__builtin_ia32_getmantss_round_mask ((__v4sf)( __A),\
3950 (__v4sf)( __B),\
3951 ((__D) << 2) |( __C), (__v4sf) _mm_setzero_ps(), (__mmask8) -1,\
3952 ( __R));\
3953})
3954
3955#define _mm_getmant_ss(__A, __B, __C, __D) __extension__ ({ \
3956__builtin_ia32_getmantss_round_mask ((__v4sf)( __A),\
3957 (__v4sf)( __B),\
3958 ((__D) << 2) |( __C), (__v4sf) _mm_setzero_ps(), (__mmask8) -1,\
3959 _MM_FROUND_CUR_DIRECTION);\
3960})
3961
Ben Murdoch097c5b22016-05-18 11:27:45 +01003962
3963static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3964_mm512_kmov (__mmask16 __A)
3965{
3966 return __A;
3967}
3968
3969#define _mm_comi_round_sd(__A, __B, __P, __R) __extension__ ({\
3970__builtin_ia32_vcomisd ((__v2df) (__A), (__v2df) (__B), ( __P), ( __R));\
3971})
3972
3973#define _mm_comi_round_ss( __A, __B, __P, __R) __extension__ ({\
3974__builtin_ia32_vcomiss ((__v4sf) (__A), (__v4sf) (__B), ( __P), ( __R));\
3975})
3976
Ben Murdoch097c5b22016-05-18 11:27:45 +01003977
3978#undef __DEFAULT_FN_ATTRS
3979
3980#endif // __AVX512FINTRIN_H