blob: 6500bb5ef55f60cfa1b4866ddedc9b163e9516a0 [file] [log] [blame]
Ben Murdoch097c5b22016-05-18 11:27:45 +01001/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23#ifndef __IMMINTRIN_H
24#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
25#endif
26
27#ifndef __AVX512FINTRIN_H
28#define __AVX512FINTRIN_H
29
30typedef double __v8df __attribute__((__vector_size__(64)));
31typedef float __v16sf __attribute__((__vector_size__(64)));
32typedef long long __v8di __attribute__((__vector_size__(64)));
33typedef int __v16si __attribute__((__vector_size__(64)));
34
35typedef float __m512 __attribute__((__vector_size__(64)));
36typedef double __m512d __attribute__((__vector_size__(64)));
37typedef long long __m512i __attribute__((__vector_size__(64)));
38
39typedef unsigned char __mmask8;
40typedef unsigned short __mmask16;
41
42/* Rounding mode macros. */
43#define _MM_FROUND_TO_NEAREST_INT 0x00
44#define _MM_FROUND_TO_NEG_INF 0x01
45#define _MM_FROUND_TO_POS_INF 0x02
46#define _MM_FROUND_TO_ZERO 0x03
47#define _MM_FROUND_CUR_DIRECTION 0x04
48
49typedef enum
50{
Ben Murdoch61f157c2016-09-16 13:49:30 +010051 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
52 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
53 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
54 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
55 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
56 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
57 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
58 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
59 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
60 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
61 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
62 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
63 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
64 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
65 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
66 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
67 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
68 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
69 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
70 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
71 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
72 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
73 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
74 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
75 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
76 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
77 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
78 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
79 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
80 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
81 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
82 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
83 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
84 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
85 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
86 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
87 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
88 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
89 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
90 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
91 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
92 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
93 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
94 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
95 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
96 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
97 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
98 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
99 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
100 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
101 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
102 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
103 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
104 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
105 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
106 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
107 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
108 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
109 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
110 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
111 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
112 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
113 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
114 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
115 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
116 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
117 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
118 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
119 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
120 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
121 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
122 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
123 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
124 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
125 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
126 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
127 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
128 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
129 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
130 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
131 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
132 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
133 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
134 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
135 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
136 _MM_PERM_DDDD = 0xFF
137} _MM_PERM_ENUM;
138
139typedef enum
140{
Ben Murdoch097c5b22016-05-18 11:27:45 +0100141 _MM_MANT_NORM_1_2, /* interval [1, 2) */
142 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
143 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
144 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
145} _MM_MANTISSA_NORM_ENUM;
146
147typedef enum
148{
149 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
150 _MM_MANT_SIGN_zero, /* sign = 0 */
151 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
152} _MM_MANTISSA_SIGN_ENUM;
153
154/* Define the default attributes for the functions in this file. */
155#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
156
157/* Create vectors with repeated elements */
158
159static __inline __m512i __DEFAULT_FN_ATTRS
160_mm512_setzero_si512(void)
161{
162 return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
163}
164
165static __inline__ __m512d __DEFAULT_FN_ATTRS
166_mm512_undefined_pd()
167{
168 return (__m512d)__builtin_ia32_undef512();
169}
170
171static __inline__ __m512 __DEFAULT_FN_ATTRS
172_mm512_undefined()
173{
174 return (__m512)__builtin_ia32_undef512();
175}
176
177static __inline__ __m512 __DEFAULT_FN_ATTRS
178_mm512_undefined_ps()
179{
180 return (__m512)__builtin_ia32_undef512();
181}
182
183static __inline__ __m512i __DEFAULT_FN_ATTRS
184_mm512_undefined_epi32()
185{
186 return (__m512i)__builtin_ia32_undef512();
187}
Ben Murdoch61f157c2016-09-16 13:49:30 +0100188static __inline__ __m512i __DEFAULT_FN_ATTRS
189_mm512_broadcastd_epi32 (__m128i __A)
190{
191 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
192 (__v16si)
193 _mm512_undefined_epi32 (),
194 (__mmask16) -1);
195}
196
197static __inline__ __m512i __DEFAULT_FN_ATTRS
198_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
199{
200 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
201 (__v16si) __O, __M);
202}
203
204static __inline__ __m512i __DEFAULT_FN_ATTRS
205_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
206{
207 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
208 (__v16si)
209 _mm512_setzero_si512 (),
210 __M);
211}
212
213static __inline__ __m512i __DEFAULT_FN_ATTRS
214_mm512_broadcastq_epi64 (__m128i __A)
215{
216 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
217 (__v8di)
218 _mm512_undefined_pd (),
219 (__mmask8) -1);
220}
221
222static __inline__ __m512i __DEFAULT_FN_ATTRS
223_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
224{
225 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
226 (__v8di) __O, __M);
227}
228
229static __inline__ __m512i __DEFAULT_FN_ATTRS
230_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
231{
232 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
233 (__v8di)
234 _mm512_setzero_si512 (),
235 __M);
236}
Ben Murdoch097c5b22016-05-18 11:27:45 +0100237
238static __inline __m512i __DEFAULT_FN_ATTRS
239_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
240{
241 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
242 (__v16si)
243 _mm512_setzero_si512 (),
244 __M);
245}
246
247static __inline __m512i __DEFAULT_FN_ATTRS
248_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
249{
250#ifdef __x86_64__
251 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
252 (__v8di)
253 _mm512_setzero_si512 (),
254 __M);
255#else
256 return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
257 (__v8di)
258 _mm512_setzero_si512 (),
259 __M);
260#endif
261}
262
263static __inline __m512 __DEFAULT_FN_ATTRS
264_mm512_setzero_ps(void)
265{
266 return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
267 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
268}
269static __inline __m512d __DEFAULT_FN_ATTRS
270_mm512_setzero_pd(void)
271{
272 return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
273}
274
275static __inline __m512 __DEFAULT_FN_ATTRS
276_mm512_set1_ps(float __w)
277{
278 return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
279 __w, __w, __w, __w, __w, __w, __w, __w };
280}
281
282static __inline __m512d __DEFAULT_FN_ATTRS
283_mm512_set1_pd(double __w)
284{
285 return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
286}
287
288static __inline __m512i __DEFAULT_FN_ATTRS
289_mm512_set1_epi32(int __s)
290{
291 return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
292 __s, __s, __s, __s, __s, __s, __s, __s };
293}
294
295static __inline __m512i __DEFAULT_FN_ATTRS
296_mm512_set1_epi64(long long __d)
297{
298 return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
299}
300
301static __inline__ __m512 __DEFAULT_FN_ATTRS
302_mm512_broadcastss_ps(__m128 __X)
303{
304 float __f = __X[0];
305 return (__v16sf){ __f, __f, __f, __f,
306 __f, __f, __f, __f,
307 __f, __f, __f, __f,
308 __f, __f, __f, __f };
309}
310
311static __inline__ __m512d __DEFAULT_FN_ATTRS
312_mm512_broadcastsd_pd(__m128d __X)
313{
314 double __d = __X[0];
315 return (__v8df){ __d, __d, __d, __d,
316 __d, __d, __d, __d };
317}
318
319/* Cast between vector types */
320
321static __inline __m512d __DEFAULT_FN_ATTRS
322_mm512_castpd256_pd512(__m256d __a)
323{
324 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
325}
326
327static __inline __m512 __DEFAULT_FN_ATTRS
328_mm512_castps256_ps512(__m256 __a)
329{
330 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7,
331 -1, -1, -1, -1, -1, -1, -1, -1);
332}
333
334static __inline __m128d __DEFAULT_FN_ATTRS
335_mm512_castpd512_pd128(__m512d __a)
336{
337 return __builtin_shufflevector(__a, __a, 0, 1);
338}
339
340static __inline __m128 __DEFAULT_FN_ATTRS
341_mm512_castps512_ps128(__m512 __a)
342{
343 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
344}
345
Ben Murdoch61f157c2016-09-16 13:49:30 +0100346
347static __inline__ __m512d __DEFAULT_FN_ATTRS
348_mm512_castpd128_pd512 (__m128d __A)
349{
350 return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
351}
352
353static __inline__ __m512 __DEFAULT_FN_ATTRS
354_mm512_castps128_ps512 (__m128 __A)
355{
356 return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
357}
358
359static __inline__ __m512i __DEFAULT_FN_ATTRS
360_mm512_castsi128_si512 (__m128i __A)
361{
362 return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
363}
364
365static __inline__ __m512i __DEFAULT_FN_ATTRS
366_mm512_castsi256_si512 (__m256i __A)
367{
368 return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
369}
370
Ben Murdoch097c5b22016-05-18 11:27:45 +0100371/* Bitwise operators */
372static __inline__ __m512i __DEFAULT_FN_ATTRS
373_mm512_and_epi32(__m512i __a, __m512i __b)
374{
375 return __a & __b;
376}
377
378static __inline__ __m512i __DEFAULT_FN_ATTRS
379_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
380{
381 return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a,
382 (__v16si) __b,
383 (__v16si) __src,
384 (__mmask16) __k);
385}
386static __inline__ __m512i __DEFAULT_FN_ATTRS
387_mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
388{
389 return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a,
390 (__v16si) __b,
391 (__v16si)
392 _mm512_setzero_si512 (),
393 (__mmask16) __k);
394}
395
396static __inline__ __m512i __DEFAULT_FN_ATTRS
397_mm512_and_epi64(__m512i __a, __m512i __b)
398{
399 return __a & __b;
400}
401
402static __inline__ __m512i __DEFAULT_FN_ATTRS
403_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
404{
405 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a,
406 (__v8di) __b,
407 (__v8di) __src,
408 (__mmask8) __k);
409}
410static __inline__ __m512i __DEFAULT_FN_ATTRS
411_mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
412{
413 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a,
414 (__v8di) __b,
415 (__v8di)
416 _mm512_setzero_si512 (),
417 (__mmask8) __k);
418}
419
420static __inline__ __m512i __DEFAULT_FN_ATTRS
421_mm512_andnot_epi32 (__m512i __A, __m512i __B)
422{
423 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
424 (__v16si) __B,
425 (__v16si)
426 _mm512_setzero_si512 (),
427 (__mmask16) -1);
428}
429
430static __inline__ __m512i __DEFAULT_FN_ATTRS
431_mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
432{
433 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
434 (__v16si) __B,
435 (__v16si) __W,
436 (__mmask16) __U);
437}
438
439static __inline__ __m512i __DEFAULT_FN_ATTRS
440_mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
441{
442 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
443 (__v16si) __B,
444 (__v16si)
445 _mm512_setzero_si512 (),
446 (__mmask16) __U);
447}
448
449static __inline__ __m512i __DEFAULT_FN_ATTRS
450_mm512_andnot_epi64 (__m512i __A, __m512i __B)
451{
452 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
453 (__v8di) __B,
454 (__v8di)
455 _mm512_setzero_si512 (),
456 (__mmask8) -1);
457}
458
459static __inline__ __m512i __DEFAULT_FN_ATTRS
460_mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
461{
462 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
463 (__v8di) __B,
464 (__v8di) __W, __U);
465}
466
467static __inline__ __m512i __DEFAULT_FN_ATTRS
468_mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
469{
470 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
471 (__v8di) __B,
472 (__v8di)
473 _mm512_setzero_pd (),
474 __U);
475}
476static __inline__ __m512i __DEFAULT_FN_ATTRS
477_mm512_or_epi32(__m512i __a, __m512i __b)
478{
479 return __a | __b;
480}
481
482static __inline__ __m512i __DEFAULT_FN_ATTRS
483_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
484{
485 return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a,
486 (__v16si) __b,
487 (__v16si) __src,
488 (__mmask16) __k);
489}
490static __inline__ __m512i __DEFAULT_FN_ATTRS
491_mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
492{
493 return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a,
494 (__v16si) __b,
495 (__v16si)
496 _mm512_setzero_si512 (),
497 (__mmask16) __k);
498}
499
500static __inline__ __m512i __DEFAULT_FN_ATTRS
501_mm512_or_epi64(__m512i __a, __m512i __b)
502{
503 return __a | __b;
504}
505
506static __inline__ __m512i __DEFAULT_FN_ATTRS
507_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
508{
509 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a,
510 (__v8di) __b,
511 (__v8di) __src,
512 (__mmask8) __k);
513}
514static __inline__ __m512i __DEFAULT_FN_ATTRS
515_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
516{
517 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a,
518 (__v8di) __b,
519 (__v8di)
520 _mm512_setzero_si512 (),
521 (__mmask8) __k);
522}
523
524static __inline__ __m512i __DEFAULT_FN_ATTRS
525_mm512_xor_epi32(__m512i __a, __m512i __b)
526{
527 return __a ^ __b;
528}
529
530static __inline__ __m512i __DEFAULT_FN_ATTRS
531_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
532{
533 return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a,
534 (__v16si) __b,
535 (__v16si) __src,
536 (__mmask16) __k);
537}
538static __inline__ __m512i __DEFAULT_FN_ATTRS
539_mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
540{
541 return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a,
542 (__v16si) __b,
543 (__v16si)
544 _mm512_setzero_si512 (),
545 (__mmask16) __k);
546}
547
548static __inline__ __m512i __DEFAULT_FN_ATTRS
549_mm512_xor_epi64(__m512i __a, __m512i __b)
550{
551 return __a ^ __b;
552}
553
554static __inline__ __m512i __DEFAULT_FN_ATTRS
555_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
556{
557 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a,
558 (__v8di) __b,
559 (__v8di) __src,
560 (__mmask8) __k);
561}
562static __inline__ __m512i __DEFAULT_FN_ATTRS
563_mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
564{
565 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a,
566 (__v8di) __b,
567 (__v8di)
568 _mm512_setzero_si512 (),
569 (__mmask8) __k);
570}
571
572static __inline__ __m512i __DEFAULT_FN_ATTRS
573_mm512_and_si512(__m512i __a, __m512i __b)
574{
575 return __a & __b;
576}
577
578static __inline__ __m512i __DEFAULT_FN_ATTRS
579_mm512_or_si512(__m512i __a, __m512i __b)
580{
581 return __a | __b;
582}
583
584static __inline__ __m512i __DEFAULT_FN_ATTRS
585_mm512_xor_si512(__m512i __a, __m512i __b)
586{
587 return __a ^ __b;
588}
589/* Arithmetic */
590
591static __inline __m512d __DEFAULT_FN_ATTRS
592_mm512_add_pd(__m512d __a, __m512d __b)
593{
594 return __a + __b;
595}
596
597static __inline __m512 __DEFAULT_FN_ATTRS
598_mm512_add_ps(__m512 __a, __m512 __b)
599{
600 return __a + __b;
601}
602
603static __inline __m512d __DEFAULT_FN_ATTRS
604_mm512_mul_pd(__m512d __a, __m512d __b)
605{
606 return __a * __b;
607}
608
609static __inline __m512 __DEFAULT_FN_ATTRS
610_mm512_mul_ps(__m512 __a, __m512 __b)
611{
612 return __a * __b;
613}
614
615static __inline __m512d __DEFAULT_FN_ATTRS
616_mm512_sub_pd(__m512d __a, __m512d __b)
617{
618 return __a - __b;
619}
620
621static __inline __m512 __DEFAULT_FN_ATTRS
622_mm512_sub_ps(__m512 __a, __m512 __b)
623{
624 return __a - __b;
625}
626
627static __inline__ __m512i __DEFAULT_FN_ATTRS
628_mm512_add_epi64 (__m512i __A, __m512i __B)
629{
630 return (__m512i) ((__v8di) __A + (__v8di) __B);
631}
632
633static __inline__ __m512i __DEFAULT_FN_ATTRS
634_mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
635{
636 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
637 (__v8di) __B,
638 (__v8di) __W,
639 (__mmask8) __U);
640}
641
642static __inline__ __m512i __DEFAULT_FN_ATTRS
643_mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
644{
645 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
646 (__v8di) __B,
647 (__v8di)
648 _mm512_setzero_si512 (),
649 (__mmask8) __U);
650}
651
652static __inline__ __m512i __DEFAULT_FN_ATTRS
653_mm512_sub_epi64 (__m512i __A, __m512i __B)
654{
655 return (__m512i) ((__v8di) __A - (__v8di) __B);
656}
657
658static __inline__ __m512i __DEFAULT_FN_ATTRS
659_mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
660{
661 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
662 (__v8di) __B,
663 (__v8di) __W,
664 (__mmask8) __U);
665}
666
667static __inline__ __m512i __DEFAULT_FN_ATTRS
668_mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
669{
670 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
671 (__v8di) __B,
672 (__v8di)
673 _mm512_setzero_si512 (),
674 (__mmask8) __U);
675}
676
677static __inline__ __m512i __DEFAULT_FN_ATTRS
678_mm512_add_epi32 (__m512i __A, __m512i __B)
679{
680 return (__m512i) ((__v16si) __A + (__v16si) __B);
681}
682
683static __inline__ __m512i __DEFAULT_FN_ATTRS
684_mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
685{
686 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
687 (__v16si) __B,
688 (__v16si) __W,
689 (__mmask16) __U);
690}
691
692static __inline__ __m512i __DEFAULT_FN_ATTRS
693_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
694{
695 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
696 (__v16si) __B,
697 (__v16si)
698 _mm512_setzero_si512 (),
699 (__mmask16) __U);
700}
701
702static __inline__ __m512i __DEFAULT_FN_ATTRS
703_mm512_sub_epi32 (__m512i __A, __m512i __B)
704{
705 return (__m512i) ((__v16si) __A - (__v16si) __B);
706}
707
708static __inline__ __m512i __DEFAULT_FN_ATTRS
709_mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
710{
711 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
712 (__v16si) __B,
713 (__v16si) __W,
714 (__mmask16) __U);
715}
716
717static __inline__ __m512i __DEFAULT_FN_ATTRS
718_mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
719{
720 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
721 (__v16si) __B,
722 (__v16si)
723 _mm512_setzero_si512 (),
724 (__mmask16) __U);
725}
726
727static __inline__ __m512d __DEFAULT_FN_ATTRS
728_mm512_max_pd(__m512d __A, __m512d __B)
729{
730 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
731 (__v8df) __B,
732 (__v8df)
733 _mm512_setzero_pd (),
734 (__mmask8) -1,
735 _MM_FROUND_CUR_DIRECTION);
736}
737
738static __inline__ __m512 __DEFAULT_FN_ATTRS
739_mm512_max_ps(__m512 __A, __m512 __B)
740{
741 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
742 (__v16sf) __B,
743 (__v16sf)
744 _mm512_setzero_ps (),
745 (__mmask16) -1,
746 _MM_FROUND_CUR_DIRECTION);
747}
748
749static __inline__ __m128 __DEFAULT_FN_ATTRS
750_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
751 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
752 (__v4sf) __B,
753 (__v4sf) __W,
754 (__mmask8) __U,
755 _MM_FROUND_CUR_DIRECTION);
756}
757
758static __inline__ __m128 __DEFAULT_FN_ATTRS
759_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
760 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
761 (__v4sf) __B,
762 (__v4sf) _mm_setzero_ps (),
763 (__mmask8) __U,
764 _MM_FROUND_CUR_DIRECTION);
765}
766
767#define _mm_max_round_ss(__A, __B, __R) __extension__ ({ \
768 (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, (__v4sf) __B, \
769 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
770
771#define _mm_mask_max_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
772 (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, (__v4sf) __B, \
773 (__v4sf) __W, (__mmask8) __U,__R); })
774
775#define _mm_maskz_max_round_ss(__U, __A, __B, __R) __extension__ ({ \
776 (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, (__v4sf) __B, \
777 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
778
779static __inline__ __m128d __DEFAULT_FN_ATTRS
780_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
781 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
782 (__v2df) __B,
783 (__v2df) __W,
784 (__mmask8) __U,
785 _MM_FROUND_CUR_DIRECTION);
786}
787
788static __inline__ __m128d __DEFAULT_FN_ATTRS
789_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
790 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
791 (__v2df) __B,
792 (__v2df) _mm_setzero_pd (),
793 (__mmask8) __U,
794 _MM_FROUND_CUR_DIRECTION);
795}
796
797#define _mm_max_round_sd(__A, __B, __R) __extension__ ({ \
798 (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, (__v2df) __B, \
799 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
800
801#define _mm_mask_max_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
802 (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, (__v2df) __B, \
803 (__v2df) __W, (__mmask8) __U,__R); })
804
805#define _mm_maskz_max_round_sd(__U, __A, __B, __R) __extension__ ({ \
806 (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, (__v2df) __B, \
807 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
808
809static __inline __m512i
810__DEFAULT_FN_ATTRS
811_mm512_max_epi32(__m512i __A, __m512i __B)
812{
813 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
814 (__v16si) __B,
815 (__v16si)
816 _mm512_setzero_si512 (),
817 (__mmask16) -1);
818}
819
820static __inline __m512i __DEFAULT_FN_ATTRS
821_mm512_max_epu32(__m512i __A, __m512i __B)
822{
823 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
824 (__v16si) __B,
825 (__v16si)
826 _mm512_setzero_si512 (),
827 (__mmask16) -1);
828}
829
830static __inline __m512i __DEFAULT_FN_ATTRS
831_mm512_max_epi64(__m512i __A, __m512i __B)
832{
833 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
834 (__v8di) __B,
835 (__v8di)
836 _mm512_setzero_si512 (),
837 (__mmask8) -1);
838}
839
840static __inline __m512i __DEFAULT_FN_ATTRS
841_mm512_max_epu64(__m512i __A, __m512i __B)
842{
843 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
844 (__v8di) __B,
845 (__v8di)
846 _mm512_setzero_si512 (),
847 (__mmask8) -1);
848}
849
850static __inline__ __m512d __DEFAULT_FN_ATTRS
851_mm512_min_pd(__m512d __A, __m512d __B)
852{
853 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
854 (__v8df) __B,
855 (__v8df)
856 _mm512_setzero_pd (),
857 (__mmask8) -1,
858 _MM_FROUND_CUR_DIRECTION);
859}
860
861static __inline__ __m512 __DEFAULT_FN_ATTRS
862_mm512_min_ps(__m512 __A, __m512 __B)
863{
864 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
865 (__v16sf) __B,
866 (__v16sf)
867 _mm512_setzero_ps (),
868 (__mmask16) -1,
869 _MM_FROUND_CUR_DIRECTION);
870}
871
872static __inline__ __m128 __DEFAULT_FN_ATTRS
873_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
874 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
875 (__v4sf) __B,
876 (__v4sf) __W,
877 (__mmask8) __U,
878 _MM_FROUND_CUR_DIRECTION);
879}
880
881static __inline__ __m128 __DEFAULT_FN_ATTRS
882_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
883 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
884 (__v4sf) __B,
885 (__v4sf) _mm_setzero_ps (),
886 (__mmask8) __U,
887 _MM_FROUND_CUR_DIRECTION);
888}
889
890#define _mm_min_round_ss(__A, __B, __R) __extension__ ({ \
891 (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, (__v4sf) __B, \
892 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
893
894#define _mm_mask_min_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
895 (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, (__v4sf) __B, \
896 (__v4sf) __W, (__mmask8) __U,__R); })
897
898#define _mm_maskz_min_round_ss(__U, __A, __B, __R) __extension__ ({ \
899 (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, (__v4sf) __B, \
900 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
901
902static __inline__ __m128d __DEFAULT_FN_ATTRS
903_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
904 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
905 (__v2df) __B,
906 (__v2df) __W,
907 (__mmask8) __U,
908 _MM_FROUND_CUR_DIRECTION);
909}
910
911static __inline__ __m128d __DEFAULT_FN_ATTRS
912_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
913 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
914 (__v2df) __B,
915 (__v2df) _mm_setzero_pd (),
916 (__mmask8) __U,
917 _MM_FROUND_CUR_DIRECTION);
918}
919
920#define _mm_min_round_sd(__A, __B, __R) __extension__ ({ \
921 (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, (__v2df) __B, \
922 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
923
924#define _mm_mask_min_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
925 (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, (__v2df) __B, \
926 (__v2df) __W, (__mmask8) __U,__R); })
927
928#define _mm_maskz_min_round_sd(__U, __A, __B, __R) __extension__ ({ \
929 (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, (__v2df) __B, \
930 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
931
932static __inline __m512i
933__DEFAULT_FN_ATTRS
934_mm512_min_epi32(__m512i __A, __m512i __B)
935{
936 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
937 (__v16si) __B,
938 (__v16si)
939 _mm512_setzero_si512 (),
940 (__mmask16) -1);
941}
942
943static __inline __m512i __DEFAULT_FN_ATTRS
944_mm512_min_epu32(__m512i __A, __m512i __B)
945{
946 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
947 (__v16si) __B,
948 (__v16si)
949 _mm512_setzero_si512 (),
950 (__mmask16) -1);
951}
952
953static __inline __m512i __DEFAULT_FN_ATTRS
954_mm512_min_epi64(__m512i __A, __m512i __B)
955{
956 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
957 (__v8di) __B,
958 (__v8di)
959 _mm512_setzero_si512 (),
960 (__mmask8) -1);
961}
962
963static __inline __m512i __DEFAULT_FN_ATTRS
964_mm512_min_epu64(__m512i __A, __m512i __B)
965{
966 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
967 (__v8di) __B,
968 (__v8di)
969 _mm512_setzero_si512 (),
970 (__mmask8) -1);
971}
972
973static __inline __m512i __DEFAULT_FN_ATTRS
974_mm512_mul_epi32(__m512i __X, __m512i __Y)
975{
976 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
977 (__v16si) __Y,
978 (__v8di)
979 _mm512_setzero_si512 (),
980 (__mmask8) -1);
981}
982
983static __inline __m512i __DEFAULT_FN_ATTRS
984_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
985{
986 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
987 (__v16si) __Y,
988 (__v8di) __W, __M);
989}
990
991static __inline __m512i __DEFAULT_FN_ATTRS
992_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
993{
994 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
995 (__v16si) __Y,
996 (__v8di)
997 _mm512_setzero_si512 (),
998 __M);
999}
1000
1001static __inline __m512i __DEFAULT_FN_ATTRS
1002_mm512_mul_epu32(__m512i __X, __m512i __Y)
1003{
1004 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
1005 (__v16si) __Y,
1006 (__v8di)
1007 _mm512_setzero_si512 (),
1008 (__mmask8) -1);
1009}
1010
1011static __inline __m512i __DEFAULT_FN_ATTRS
1012_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1013{
1014 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
1015 (__v16si) __Y,
1016 (__v8di) __W, __M);
1017}
1018
1019static __inline __m512i __DEFAULT_FN_ATTRS
1020_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
1021{
1022 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
1023 (__v16si) __Y,
1024 (__v8di)
1025 _mm512_setzero_si512 (),
1026 __M);
1027}
1028
1029static __inline __m512i __DEFAULT_FN_ATTRS
1030_mm512_mullo_epi32 (__m512i __A, __m512i __B)
1031{
1032 return (__m512i) ((__v16si) __A * (__v16si) __B);
1033}
1034
1035static __inline __m512i __DEFAULT_FN_ATTRS
1036_mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1037{
1038 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
1039 (__v16si) __B,
1040 (__v16si)
1041 _mm512_setzero_si512 (),
1042 __M);
1043}
1044
1045static __inline __m512i __DEFAULT_FN_ATTRS
1046_mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1047{
1048 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
1049 (__v16si) __B,
1050 (__v16si) __W, __M);
1051}
1052
1053static __inline__ __m512d __DEFAULT_FN_ATTRS
1054_mm512_sqrt_pd(__m512d __a)
1055{
1056 return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a,
1057 (__v8df) _mm512_setzero_pd (),
1058 (__mmask8) -1,
1059 _MM_FROUND_CUR_DIRECTION);
1060}
1061
1062static __inline__ __m512 __DEFAULT_FN_ATTRS
1063_mm512_sqrt_ps(__m512 __a)
1064{
1065 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a,
1066 (__v16sf) _mm512_setzero_ps (),
1067 (__mmask16) -1,
1068 _MM_FROUND_CUR_DIRECTION);
1069}
1070
1071static __inline__ __m512d __DEFAULT_FN_ATTRS
1072_mm512_rsqrt14_pd(__m512d __A)
1073{
1074 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1075 (__v8df)
1076 _mm512_setzero_pd (),
1077 (__mmask8) -1);}
1078
1079static __inline__ __m512 __DEFAULT_FN_ATTRS
1080_mm512_rsqrt14_ps(__m512 __A)
1081{
1082 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1083 (__v16sf)
1084 _mm512_setzero_ps (),
1085 (__mmask16) -1);
1086}
1087
1088static __inline__ __m128 __DEFAULT_FN_ATTRS
1089_mm_rsqrt14_ss(__m128 __A, __m128 __B)
1090{
1091 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1092 (__v4sf) __B,
1093 (__v4sf)
1094 _mm_setzero_ps (),
1095 (__mmask8) -1);
1096}
1097
Ben Murdoch61f157c2016-09-16 13:49:30 +01001098static __inline__ __m128 __DEFAULT_FN_ATTRS
1099_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1100{
1101 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1102 (__v4sf) __B,
1103 (__v4sf) __W,
1104 (__mmask8) __U);
1105}
1106
1107static __inline__ __m128 __DEFAULT_FN_ATTRS
1108_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1109{
1110 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1111 (__v4sf) __B,
1112 (__v4sf) _mm_setzero_ps (),
1113 (__mmask8) __U);
1114}
1115
Ben Murdoch097c5b22016-05-18 11:27:45 +01001116static __inline__ __m128d __DEFAULT_FN_ATTRS
1117_mm_rsqrt14_sd(__m128d __A, __m128d __B)
1118{
1119 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1120 (__v2df) __B,
1121 (__v2df)
1122 _mm_setzero_pd (),
1123 (__mmask8) -1);
1124}
1125
Ben Murdoch61f157c2016-09-16 13:49:30 +01001126static __inline__ __m128d __DEFAULT_FN_ATTRS
1127_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1128{
1129 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1130 (__v2df) __B,
1131 (__v2df) __W,
1132 (__mmask8) __U);
1133}
1134
1135static __inline__ __m128d __DEFAULT_FN_ATTRS
1136_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1137{
1138 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1139 (__v2df) __B,
1140 (__v2df) _mm_setzero_pd (),
1141 (__mmask8) __U);
1142}
1143
Ben Murdoch097c5b22016-05-18 11:27:45 +01001144static __inline__ __m512d __DEFAULT_FN_ATTRS
1145_mm512_rcp14_pd(__m512d __A)
1146{
1147 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1148 (__v8df)
1149 _mm512_setzero_pd (),
1150 (__mmask8) -1);
1151}
1152
1153static __inline__ __m512 __DEFAULT_FN_ATTRS
1154_mm512_rcp14_ps(__m512 __A)
1155{
1156 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1157 (__v16sf)
1158 _mm512_setzero_ps (),
1159 (__mmask16) -1);
1160}
1161static __inline__ __m128 __DEFAULT_FN_ATTRS
1162_mm_rcp14_ss(__m128 __A, __m128 __B)
1163{
1164 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1165 (__v4sf) __B,
1166 (__v4sf)
1167 _mm_setzero_ps (),
1168 (__mmask8) -1);
1169}
1170
Ben Murdoch61f157c2016-09-16 13:49:30 +01001171static __inline__ __m128 __DEFAULT_FN_ATTRS
1172_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1173{
1174 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1175 (__v4sf) __B,
1176 (__v4sf) __W,
1177 (__mmask8) __U);
1178}
1179
1180static __inline__ __m128 __DEFAULT_FN_ATTRS
1181_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1182{
1183 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1184 (__v4sf) __B,
1185 (__v4sf) _mm_setzero_ps (),
1186 (__mmask8) __U);
1187}
1188
Ben Murdoch097c5b22016-05-18 11:27:45 +01001189static __inline__ __m128d __DEFAULT_FN_ATTRS
1190_mm_rcp14_sd(__m128d __A, __m128d __B)
1191{
1192 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1193 (__v2df) __B,
1194 (__v2df)
1195 _mm_setzero_pd (),
1196 (__mmask8) -1);
1197}
1198
Ben Murdoch61f157c2016-09-16 13:49:30 +01001199static __inline__ __m128d __DEFAULT_FN_ATTRS
1200_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1201{
1202 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1203 (__v2df) __B,
1204 (__v2df) __W,
1205 (__mmask8) __U);
1206}
1207
1208static __inline__ __m128d __DEFAULT_FN_ATTRS
1209_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1210{
1211 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1212 (__v2df) __B,
1213 (__v2df) _mm_setzero_pd (),
1214 (__mmask8) __U);
1215}
1216
Ben Murdoch097c5b22016-05-18 11:27:45 +01001217static __inline __m512 __DEFAULT_FN_ATTRS
1218_mm512_floor_ps(__m512 __A)
1219{
1220 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1221 _MM_FROUND_FLOOR,
1222 (__v16sf) __A, -1,
1223 _MM_FROUND_CUR_DIRECTION);
1224}
1225
1226static __inline __m512d __DEFAULT_FN_ATTRS
1227_mm512_floor_pd(__m512d __A)
1228{
1229 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1230 _MM_FROUND_FLOOR,
1231 (__v8df) __A, -1,
1232 _MM_FROUND_CUR_DIRECTION);
1233}
1234
1235static __inline __m512 __DEFAULT_FN_ATTRS
1236_mm512_ceil_ps(__m512 __A)
1237{
1238 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1239 _MM_FROUND_CEIL,
1240 (__v16sf) __A, -1,
1241 _MM_FROUND_CUR_DIRECTION);
1242}
1243
1244static __inline __m512d __DEFAULT_FN_ATTRS
1245_mm512_ceil_pd(__m512d __A)
1246{
1247 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1248 _MM_FROUND_CEIL,
1249 (__v8df) __A, -1,
1250 _MM_FROUND_CUR_DIRECTION);
1251}
1252
1253static __inline __m512i __DEFAULT_FN_ATTRS
1254_mm512_abs_epi64(__m512i __A)
1255{
1256 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1257 (__v8di)
1258 _mm512_setzero_si512 (),
1259 (__mmask8) -1);
1260}
1261
1262static __inline __m512i __DEFAULT_FN_ATTRS
1263_mm512_abs_epi32(__m512i __A)
1264{
1265 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
1266 (__v16si)
1267 _mm512_setzero_si512 (),
1268 (__mmask16) -1);
1269}
1270
1271static __inline__ __m128 __DEFAULT_FN_ATTRS
1272_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1273 return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
1274 (__v4sf) __B,
1275 (__v4sf) __W,
1276 (__mmask8) __U,
1277 _MM_FROUND_CUR_DIRECTION);
1278}
1279
1280static __inline__ __m128 __DEFAULT_FN_ATTRS
1281_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1282 return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
1283 (__v4sf) __B,
1284 (__v4sf) _mm_setzero_ps (),
1285 (__mmask8) __U,
1286 _MM_FROUND_CUR_DIRECTION);
1287}
1288
1289#define _mm_add_round_ss(__A, __B, __R) __extension__ ({ \
1290 (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A, (__v4sf) __B, \
1291 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
1292
1293#define _mm_mask_add_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
1294 (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A, (__v4sf) __B, \
1295 (__v4sf) __W, (__mmask8) __U,__R); })
1296
1297#define _mm_maskz_add_round_ss(__U, __A, __B, __R) __extension__ ({ \
1298 (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A, (__v4sf) __B, \
1299 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
1300
1301static __inline__ __m128d __DEFAULT_FN_ATTRS
1302_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1303 return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
1304 (__v2df) __B,
1305 (__v2df) __W,
1306 (__mmask8) __U,
1307 _MM_FROUND_CUR_DIRECTION);
1308}
1309
1310static __inline__ __m128d __DEFAULT_FN_ATTRS
1311_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1312 return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
1313 (__v2df) __B,
1314 (__v2df) _mm_setzero_pd (),
1315 (__mmask8) __U,
1316 _MM_FROUND_CUR_DIRECTION);
1317}
1318#define _mm_add_round_sd(__A, __B, __R) __extension__ ({ \
1319 (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A, (__v2df) __B, \
1320 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
1321
1322#define _mm_mask_add_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
1323 (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A, (__v2df) __B, \
1324 (__v2df) __W, (__mmask8) __U,__R); })
1325
1326#define _mm_maskz_add_round_sd(__U, __A, __B, __R) __extension__ ({ \
1327 (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A, (__v2df) __B, \
1328 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
1329
1330static __inline__ __m512d __DEFAULT_FN_ATTRS
1331_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1332 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
1333 (__v8df) __B,
1334 (__v8df) __W,
1335 (__mmask8) __U,
1336 _MM_FROUND_CUR_DIRECTION);
1337}
1338
1339static __inline__ __m512d __DEFAULT_FN_ATTRS
1340_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1341 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
1342 (__v8df) __B,
1343 (__v8df) _mm512_setzero_pd (),
1344 (__mmask8) __U,
1345 _MM_FROUND_CUR_DIRECTION);
1346}
1347
1348static __inline__ __m512 __DEFAULT_FN_ATTRS
1349_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1350 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
1351 (__v16sf) __B,
1352 (__v16sf) __W,
1353 (__mmask16) __U,
1354 _MM_FROUND_CUR_DIRECTION);
1355}
1356
1357static __inline__ __m512 __DEFAULT_FN_ATTRS
1358_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1359 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
1360 (__v16sf) __B,
1361 (__v16sf) _mm512_setzero_ps (),
1362 (__mmask16) __U,
1363 _MM_FROUND_CUR_DIRECTION);
1364}
1365
1366#define _mm512_add_round_pd(__A, __B, __R) __extension__ ({ \
1367 (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, (__v8df) __B, \
1368 (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
1369
1370#define _mm512_mask_add_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
1371 (__m512d) __builtin_ia32_addpd512_mask((__v8df) __A, (__v8df) __B, \
1372 (__v8df) __W, (__mmask8) __U, __R); })
1373
1374#define _mm512_maskz_add_round_pd(__U, __A, __B, __R) __extension__ ({ \
1375 (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, (__v8df) __B, \
1376 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R); })
1377
1378#define _mm512_add_round_ps(__A, __B, __R) __extension__ ({ \
1379 (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
1380 (__v16sf) _mm512_setzero_ps(), (__mmask16) -1, __R); })
1381
1382#define _mm512_mask_add_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
1383 (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
1384 (__v16sf) __W, (__mmask16)__U, __R); })
1385
1386#define _mm512_maskz_add_round_ps(__U, __A, __B, __R) __extension__ ({ \
1387 (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
1388 (__v16sf) _mm512_setzero_ps(), (__mmask16)__U, __R); })
1389
1390static __inline__ __m128 __DEFAULT_FN_ATTRS
1391_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1392 return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
1393 (__v4sf) __B,
1394 (__v4sf) __W,
1395 (__mmask8) __U,
1396 _MM_FROUND_CUR_DIRECTION);
1397}
1398
1399static __inline__ __m128 __DEFAULT_FN_ATTRS
1400_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1401 return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
1402 (__v4sf) __B,
1403 (__v4sf) _mm_setzero_ps (),
1404 (__mmask8) __U,
1405 _MM_FROUND_CUR_DIRECTION);
1406}
1407#define _mm_sub_round_ss(__A, __B, __R) __extension__ ({ \
1408 (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A, (__v4sf) __B, \
1409 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
1410
1411#define _mm_mask_sub_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
1412 (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A, (__v4sf) __B, \
1413 (__v4sf) __W, (__mmask8) __U,__R); })
1414
1415#define _mm_maskz_sub_round_ss(__U, __A, __B, __R) __extension__ ({ \
1416 (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A, (__v4sf) __B, \
1417 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
1418
1419static __inline__ __m128d __DEFAULT_FN_ATTRS
1420_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1421 return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
1422 (__v2df) __B,
1423 (__v2df) __W,
1424 (__mmask8) __U,
1425 _MM_FROUND_CUR_DIRECTION);
1426}
1427
1428static __inline__ __m128d __DEFAULT_FN_ATTRS
1429_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1430 return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
1431 (__v2df) __B,
1432 (__v2df) _mm_setzero_pd (),
1433 (__mmask8) __U,
1434 _MM_FROUND_CUR_DIRECTION);
1435}
1436
1437#define _mm_sub_round_sd(__A, __B, __R) __extension__ ({ \
1438 (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A, (__v2df) __B, \
1439 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
1440
1441#define _mm_mask_sub_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
1442 (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A, (__v2df) __B, \
1443 (__v2df) __W, (__mmask8) __U,__R); })
1444
1445#define _mm_maskz_sub_round_sd(__U, __A, __B, __R) __extension__ ({ \
1446 (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A, (__v2df) __B, \
1447 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
1448
1449static __inline__ __m512d __DEFAULT_FN_ATTRS
1450_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1451 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
1452 (__v8df) __B,
1453 (__v8df) __W,
1454 (__mmask8) __U,
1455 _MM_FROUND_CUR_DIRECTION);
1456}
1457
1458static __inline__ __m512d __DEFAULT_FN_ATTRS
1459_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1460 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
1461 (__v8df) __B,
1462 (__v8df)
1463 _mm512_setzero_pd (),
1464 (__mmask8) __U,
1465 _MM_FROUND_CUR_DIRECTION);
1466}
1467
1468static __inline__ __m512 __DEFAULT_FN_ATTRS
1469_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1470 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
1471 (__v16sf) __B,
1472 (__v16sf) __W,
1473 (__mmask16) __U,
1474 _MM_FROUND_CUR_DIRECTION);
1475}
1476
1477static __inline__ __m512 __DEFAULT_FN_ATTRS
1478_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1479 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
1480 (__v16sf) __B,
1481 (__v16sf)
1482 _mm512_setzero_ps (),
1483 (__mmask16) __U,
1484 _MM_FROUND_CUR_DIRECTION);
1485}
1486
1487#define _mm512_sub_round_pd(__A, __B, __R) __extension__ ({ \
1488 (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B,\
1489 (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
1490
1491#define _mm512_mask_sub_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
1492 (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B, \
1493 (__v8df) __W, (__mmask8) __U, __R); })
1494
1495#define _mm512_maskz_sub_round_pd(__U, __A, __B, __R) __extension__ ({ \
1496 (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B, \
1497 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
1498
1499#define _mm512_sub_round_ps(__A, __B, __R) __extension__ ({ \
1500 (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
1501 (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
1502
1503#define _mm512_mask_sub_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
1504 (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
1505 (__v16sf) __W, (__mmask16) __U, __R); });
1506
1507#define _mm512_maskz_sub_round_ps(__U, __A, __B, __R) __extension__ ({ \
1508 (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
1509 (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
1510
1511static __inline__ __m128 __DEFAULT_FN_ATTRS
1512_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1513 return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
1514 (__v4sf) __B,
1515 (__v4sf) __W,
1516 (__mmask8) __U,
1517 _MM_FROUND_CUR_DIRECTION);
1518}
1519
1520static __inline__ __m128 __DEFAULT_FN_ATTRS
1521_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1522 return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
1523 (__v4sf) __B,
1524 (__v4sf) _mm_setzero_ps (),
1525 (__mmask8) __U,
1526 _MM_FROUND_CUR_DIRECTION);
1527}
1528#define _mm_mul_round_ss(__A, __B, __R) __extension__ ({ \
1529 (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A, (__v4sf) __B, \
1530 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
1531
1532#define _mm_mask_mul_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
1533 (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A, (__v4sf) __B, \
1534 (__v4sf) __W, (__mmask8) __U,__R); })
1535
1536#define _mm_maskz_mul_round_ss(__U, __A, __B, __R) __extension__ ({ \
1537 (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A, (__v4sf) __B, \
1538 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
1539
1540static __inline__ __m128d __DEFAULT_FN_ATTRS
1541_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1542 return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
1543 (__v2df) __B,
1544 (__v2df) __W,
1545 (__mmask8) __U,
1546 _MM_FROUND_CUR_DIRECTION);
1547}
1548
1549static __inline__ __m128d __DEFAULT_FN_ATTRS
1550_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1551 return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
1552 (__v2df) __B,
1553 (__v2df) _mm_setzero_pd (),
1554 (__mmask8) __U,
1555 _MM_FROUND_CUR_DIRECTION);
1556}
1557
1558#define _mm_mul_round_sd(__A, __B, __R) __extension__ ({ \
1559 (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A, (__v2df) __B, \
1560 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
1561
1562#define _mm_mask_mul_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
1563 (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A, (__v2df) __B, \
1564 (__v2df) __W, (__mmask8) __U,__R); })
1565
1566#define _mm_maskz_mul_round_sd(__U, __A, __B, __R) __extension__ ({ \
1567 (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A, (__v2df) __B, \
1568 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
1569
1570static __inline__ __m512d __DEFAULT_FN_ATTRS
1571_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1572 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
1573 (__v8df) __B,
1574 (__v8df) __W,
1575 (__mmask8) __U,
1576 _MM_FROUND_CUR_DIRECTION);
1577}
1578
1579static __inline__ __m512d __DEFAULT_FN_ATTRS
1580_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1581 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
1582 (__v8df) __B,
1583 (__v8df)
1584 _mm512_setzero_pd (),
1585 (__mmask8) __U,
1586 _MM_FROUND_CUR_DIRECTION);
1587}
1588
1589static __inline__ __m512 __DEFAULT_FN_ATTRS
1590_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1591 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
1592 (__v16sf) __B,
1593 (__v16sf) __W,
1594 (__mmask16) __U,
1595 _MM_FROUND_CUR_DIRECTION);
1596}
1597
1598static __inline__ __m512 __DEFAULT_FN_ATTRS
1599_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1600 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
1601 (__v16sf) __B,
1602 (__v16sf)
1603 _mm512_setzero_ps (),
1604 (__mmask16) __U,
1605 _MM_FROUND_CUR_DIRECTION);
1606}
1607
1608#define _mm512_mul_round_pd(__A, __B, __R) __extension__ ({ \
1609 (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B,\
1610 (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
1611
1612#define _mm512_mask_mul_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
1613 (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B, \
1614 (__v8df) __W, (__mmask8) __U, __R); })
1615
1616#define _mm512_maskz_mul_round_pd(__U, __A, __B, __R) __extension__ ({ \
1617 (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B, \
1618 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
1619
1620#define _mm512_mul_round_ps(__A, __B, __R) __extension__ ({ \
1621 (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
1622 (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
1623
1624#define _mm512_mask_mul_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
1625 (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
1626 (__v16sf) __W, (__mmask16) __U, __R); });
1627
1628#define _mm512_maskz_mul_round_ps(__U, __A, __B, __R) __extension__ ({ \
1629 (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
1630 (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
1631
1632static __inline__ __m128 __DEFAULT_FN_ATTRS
1633_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1634 return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
1635 (__v4sf) __B,
1636 (__v4sf) __W,
1637 (__mmask8) __U,
1638 _MM_FROUND_CUR_DIRECTION);
1639}
1640
1641static __inline__ __m128 __DEFAULT_FN_ATTRS
1642_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1643 return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
1644 (__v4sf) __B,
1645 (__v4sf) _mm_setzero_ps (),
1646 (__mmask8) __U,
1647 _MM_FROUND_CUR_DIRECTION);
1648}
1649
1650#define _mm_div_round_ss(__A, __B, __R) __extension__ ({ \
1651 (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A, (__v4sf) __B, \
1652 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
1653
1654#define _mm_mask_div_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
1655 (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A, (__v4sf) __B, \
1656 (__v4sf) __W, (__mmask8) __U,__R); })
1657
1658#define _mm_maskz_div_round_ss(__U, __A, __B, __R) __extension__ ({ \
1659 (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A, (__v4sf) __B, \
1660 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
1661
1662static __inline__ __m128d __DEFAULT_FN_ATTRS
1663_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1664 return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
1665 (__v2df) __B,
1666 (__v2df) __W,
1667 (__mmask8) __U,
1668 _MM_FROUND_CUR_DIRECTION);
1669}
1670
1671static __inline__ __m128d __DEFAULT_FN_ATTRS
1672_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1673 return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
1674 (__v2df) __B,
1675 (__v2df) _mm_setzero_pd (),
1676 (__mmask8) __U,
1677 _MM_FROUND_CUR_DIRECTION);
1678}
1679
1680#define _mm_div_round_sd(__A, __B, __R) __extension__ ({ \
1681 (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A, (__v2df) __B, \
1682 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
1683
1684#define _mm_mask_div_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
1685 (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A, (__v2df) __B, \
1686 (__v2df) __W, (__mmask8) __U,__R); })
1687
1688#define _mm_maskz_div_round_sd(__U, __A, __B, __R) __extension__ ({ \
1689 (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A, (__v2df) __B, \
1690 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
1691
1692static __inline__ __m512d __DEFAULT_FN_ATTRS
1693_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1694 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A,
1695 (__v8df) __B,
1696 (__v8df) __W,
1697 (__mmask8) __U,
1698 _MM_FROUND_CUR_DIRECTION);
1699}
1700
1701static __inline__ __m512d __DEFAULT_FN_ATTRS
1702_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1703 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A,
1704 (__v8df) __B,
1705 (__v8df)
1706 _mm512_setzero_pd (),
1707 (__mmask8) __U,
1708 _MM_FROUND_CUR_DIRECTION);
1709}
1710
1711static __inline__ __m512 __DEFAULT_FN_ATTRS
1712_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1713 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
1714 (__v16sf) __B,
1715 (__v16sf) __W,
1716 (__mmask16) __U,
1717 _MM_FROUND_CUR_DIRECTION);
1718}
1719
1720static __inline__ __m512 __DEFAULT_FN_ATTRS
1721_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1722 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
1723 (__v16sf) __B,
1724 (__v16sf)
1725 _mm512_setzero_ps (),
1726 (__mmask16) __U,
1727 _MM_FROUND_CUR_DIRECTION);
1728}
1729
1730#define _mm512_div_round_pd(__A, __B, __R) __extension__ ({ \
1731 (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B,\
1732 (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
1733
1734#define _mm512_mask_div_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
1735 (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B, \
1736 (__v8df) __W, (__mmask8) __U, __R); })
1737
1738#define _mm512_maskz_div_round_pd(__U, __A, __B, __R) __extension__ ({ \
1739 (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B, \
1740 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
1741
1742#define _mm512_div_round_ps(__A, __B, __R) __extension__ ({ \
1743 (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
1744 (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
1745
1746#define _mm512_mask_div_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
1747 (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
1748 (__v16sf) __W, (__mmask16) __U, __R); });
1749
1750#define _mm512_maskz_div_round_ps(__U, __A, __B, __R) __extension__ ({ \
1751 (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
1752 (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
1753
1754#define _mm512_roundscale_ps(A, B) __extension__ ({ \
1755 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(A), (B), (__v16sf)(A), \
1756 -1, _MM_FROUND_CUR_DIRECTION); })
1757
1758#define _mm512_roundscale_pd(A, B) __extension__ ({ \
1759 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(A), (B), (__v8df)(A), \
1760 -1, _MM_FROUND_CUR_DIRECTION); })
1761
1762#define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
1763 (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
1764 (__v8df) (B), (__v8df) (C), \
1765 (__mmask8) -1, (R)); })
1766
1767
1768#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \
1769 (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
1770 (__v8df) (B), (__v8df) (C), \
1771 (__mmask8) (U), (R)); })
1772
1773
1774#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \
1775 (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) (A), \
1776 (__v8df) (B), (__v8df) (C), \
1777 (__mmask8) (U), (R)); })
1778
1779
1780#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \
1781 (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \
1782 (__v8df) (B), (__v8df) (C), \
1783 (__mmask8) (U), (R)); })
1784
1785
1786#define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \
1787 (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
1788 (__v8df) (B), -(__v8df) (C), \
1789 (__mmask8) -1, (R)); })
1790
1791
1792#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \
1793 (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
1794 (__v8df) (B), -(__v8df) (C), \
1795 (__mmask8) (U), (R)); })
1796
1797
1798#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \
1799 (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \
1800 (__v8df) (B), -(__v8df) (C), \
1801 (__mmask8) (U), (R)); })
1802
1803
1804#define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \
1805 (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \
1806 (__v8df) (B), (__v8df) (C), \
1807 (__mmask8) -1, (R)); })
1808
1809
1810#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \
1811 (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) (A), \
1812 (__v8df) (B), (__v8df) (C), \
1813 (__mmask8) (U), (R)); })
1814
1815
1816#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \
1817 (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \
1818 (__v8df) (B), (__v8df) (C), \
1819 (__mmask8) (U), (R)); })
1820
1821
1822#define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \
1823 (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \
1824 (__v8df) (B), -(__v8df) (C), \
1825 (__mmask8) -1, (R)); })
1826
1827
1828#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \
1829 (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \
1830 (__v8df) (B), -(__v8df) (C), \
1831 (__mmask8) (U), (R)); })
1832
1833
1834static __inline__ __m512d __DEFAULT_FN_ATTRS
1835_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
1836{
1837 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
1838 (__v8df) __B,
1839 (__v8df) __C,
1840 (__mmask8) -1,
1841 _MM_FROUND_CUR_DIRECTION);
1842}
1843
1844static __inline__ __m512d __DEFAULT_FN_ATTRS
1845_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
1846{
1847 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
1848 (__v8df) __B,
1849 (__v8df) __C,
1850 (__mmask8) __U,
1851 _MM_FROUND_CUR_DIRECTION);
1852}
1853
1854static __inline__ __m512d __DEFAULT_FN_ATTRS
1855_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
1856{
1857 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
1858 (__v8df) __B,
1859 (__v8df) __C,
1860 (__mmask8) __U,
1861 _MM_FROUND_CUR_DIRECTION);
1862}
1863
1864static __inline__ __m512d __DEFAULT_FN_ATTRS
1865_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1866{
1867 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
1868 (__v8df) __B,
1869 (__v8df) __C,
1870 (__mmask8) __U,
1871 _MM_FROUND_CUR_DIRECTION);
1872}
1873
1874static __inline__ __m512d __DEFAULT_FN_ATTRS
1875_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
1876{
1877 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
1878 (__v8df) __B,
1879 -(__v8df) __C,
1880 (__mmask8) -1,
1881 _MM_FROUND_CUR_DIRECTION);
1882}
1883
1884static __inline__ __m512d __DEFAULT_FN_ATTRS
1885_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
1886{
1887 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
1888 (__v8df) __B,
1889 -(__v8df) __C,
1890 (__mmask8) __U,
1891 _MM_FROUND_CUR_DIRECTION);
1892}
1893
1894static __inline__ __m512d __DEFAULT_FN_ATTRS
1895_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1896{
1897 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
1898 (__v8df) __B,
1899 -(__v8df) __C,
1900 (__mmask8) __U,
1901 _MM_FROUND_CUR_DIRECTION);
1902}
1903
1904static __inline__ __m512d __DEFAULT_FN_ATTRS
1905_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
1906{
1907 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
1908 (__v8df) __B,
1909 (__v8df) __C,
1910 (__mmask8) -1,
1911 _MM_FROUND_CUR_DIRECTION);
1912}
1913
1914static __inline__ __m512d __DEFAULT_FN_ATTRS
1915_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
1916{
1917 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
1918 (__v8df) __B,
1919 (__v8df) __C,
1920 (__mmask8) __U,
1921 _MM_FROUND_CUR_DIRECTION);
1922}
1923
1924static __inline__ __m512d __DEFAULT_FN_ATTRS
1925_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1926{
1927 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
1928 (__v8df) __B,
1929 (__v8df) __C,
1930 (__mmask8) __U,
1931 _MM_FROUND_CUR_DIRECTION);
1932}
1933
1934static __inline__ __m512d __DEFAULT_FN_ATTRS
1935_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
1936{
1937 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
1938 (__v8df) __B,
1939 -(__v8df) __C,
1940 (__mmask8) -1,
1941 _MM_FROUND_CUR_DIRECTION);
1942}
1943
1944static __inline__ __m512d __DEFAULT_FN_ATTRS
1945_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1946{
1947 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
1948 (__v8df) __B,
1949 -(__v8df) __C,
1950 (__mmask8) __U,
1951 _MM_FROUND_CUR_DIRECTION);
1952}
1953
1954#define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \
1955 (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1956 (__v16sf) (B), (__v16sf) (C), \
1957 (__mmask16) -1, (R)); })
1958
1959
1960#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \
1961 (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1962 (__v16sf) (B), (__v16sf) (C), \
1963 (__mmask16) (U), (R)); })
1964
1965
1966#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \
1967 (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) (A), \
1968 (__v16sf) (B), (__v16sf) (C), \
1969 (__mmask16) (U), (R)); })
1970
1971
1972#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \
1973 (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \
1974 (__v16sf) (B), (__v16sf) (C), \
1975 (__mmask16) (U), (R)); })
1976
1977
1978#define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \
1979 (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1980 (__v16sf) (B), -(__v16sf) (C), \
1981 (__mmask16) -1, (R)); })
1982
1983
1984#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \
1985 (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1986 (__v16sf) (B), -(__v16sf) (C), \
1987 (__mmask16) (U), (R)); })
1988
1989
1990#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \
1991 (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \
1992 (__v16sf) (B), -(__v16sf) (C), \
1993 (__mmask16) (U), (R)); })
1994
1995
1996#define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \
1997 (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \
1998 (__v16sf) (B), (__v16sf) (C), \
1999 (__mmask16) -1, (R)); })
2000
2001
2002#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \
2003 (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) (A), \
2004 (__v16sf) (B), (__v16sf) (C), \
2005 (__mmask16) (U), (R)); })
2006
2007
2008#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \
2009 (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \
2010 (__v16sf) (B), (__v16sf) (C), \
2011 (__mmask16) (U), (R)); })
2012
2013
2014#define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \
2015 (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \
2016 (__v16sf) (B), -(__v16sf) (C), \
2017 (__mmask16) -1, (R)); })
2018
2019
2020#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \
2021 (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \
2022 (__v16sf) (B), -(__v16sf) (C), \
2023 (__mmask16) (U), (R)); })
2024
2025
2026static __inline__ __m512 __DEFAULT_FN_ATTRS
2027_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2028{
2029 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2030 (__v16sf) __B,
2031 (__v16sf) __C,
2032 (__mmask16) -1,
2033 _MM_FROUND_CUR_DIRECTION);
2034}
2035
2036static __inline__ __m512 __DEFAULT_FN_ATTRS
2037_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2038{
2039 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2040 (__v16sf) __B,
2041 (__v16sf) __C,
2042 (__mmask16) __U,
2043 _MM_FROUND_CUR_DIRECTION);
2044}
2045
2046static __inline__ __m512 __DEFAULT_FN_ATTRS
2047_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2048{
2049 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2050 (__v16sf) __B,
2051 (__v16sf) __C,
2052 (__mmask16) __U,
2053 _MM_FROUND_CUR_DIRECTION);
2054}
2055
2056static __inline__ __m512 __DEFAULT_FN_ATTRS
2057_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2058{
2059 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2060 (__v16sf) __B,
2061 (__v16sf) __C,
2062 (__mmask16) __U,
2063 _MM_FROUND_CUR_DIRECTION);
2064}
2065
2066static __inline__ __m512 __DEFAULT_FN_ATTRS
2067_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2068{
2069 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2070 (__v16sf) __B,
2071 -(__v16sf) __C,
2072 (__mmask16) -1,
2073 _MM_FROUND_CUR_DIRECTION);
2074}
2075
2076static __inline__ __m512 __DEFAULT_FN_ATTRS
2077_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2078{
2079 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2080 (__v16sf) __B,
2081 -(__v16sf) __C,
2082 (__mmask16) __U,
2083 _MM_FROUND_CUR_DIRECTION);
2084}
2085
2086static __inline__ __m512 __DEFAULT_FN_ATTRS
2087_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2088{
2089 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2090 (__v16sf) __B,
2091 -(__v16sf) __C,
2092 (__mmask16) __U,
2093 _MM_FROUND_CUR_DIRECTION);
2094}
2095
2096static __inline__ __m512 __DEFAULT_FN_ATTRS
2097_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2098{
2099 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
2100 (__v16sf) __B,
2101 (__v16sf) __C,
2102 (__mmask16) -1,
2103 _MM_FROUND_CUR_DIRECTION);
2104}
2105
2106static __inline__ __m512 __DEFAULT_FN_ATTRS
2107_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2108{
2109 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
2110 (__v16sf) __B,
2111 (__v16sf) __C,
2112 (__mmask16) __U,
2113 _MM_FROUND_CUR_DIRECTION);
2114}
2115
2116static __inline__ __m512 __DEFAULT_FN_ATTRS
2117_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2118{
2119 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2120 (__v16sf) __B,
2121 (__v16sf) __C,
2122 (__mmask16) __U,
2123 _MM_FROUND_CUR_DIRECTION);
2124}
2125
2126static __inline__ __m512 __DEFAULT_FN_ATTRS
2127_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2128{
2129 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
2130 (__v16sf) __B,
2131 -(__v16sf) __C,
2132 (__mmask16) -1,
2133 _MM_FROUND_CUR_DIRECTION);
2134}
2135
2136static __inline__ __m512 __DEFAULT_FN_ATTRS
2137_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2138{
2139 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2140 (__v16sf) __B,
2141 -(__v16sf) __C,
2142 (__mmask16) __U,
2143 _MM_FROUND_CUR_DIRECTION);
2144}
2145
2146#define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \
2147 (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
2148 (__v8df) (B), (__v8df) (C), \
2149 (__mmask8) -1, (R)); })
2150
2151
2152#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \
2153 (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
2154 (__v8df) (B), (__v8df) (C), \
2155 (__mmask8) (U), (R)); })
2156
2157
2158#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \
2159 (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) (A), \
2160 (__v8df) (B), (__v8df) (C), \
2161 (__mmask8) (U), (R)); })
2162
2163
2164#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \
2165 (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \
2166 (__v8df) (B), (__v8df) (C), \
2167 (__mmask8) (U), (R)); })
2168
2169
2170#define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \
2171 (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
2172 (__v8df) (B), -(__v8df) (C), \
2173 (__mmask8) -1, (R)); })
2174
2175
2176#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \
2177 (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
2178 (__v8df) (B), -(__v8df) (C), \
2179 (__mmask8) (U), (R)); })
2180
2181
2182#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \
2183 (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \
2184 (__v8df) (B), -(__v8df) (C), \
2185 (__mmask8) (U), (R)); })
2186
2187
2188static __inline__ __m512d __DEFAULT_FN_ATTRS
2189_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
2190{
2191 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2192 (__v8df) __B,
2193 (__v8df) __C,
2194 (__mmask8) -1,
2195 _MM_FROUND_CUR_DIRECTION);
2196}
2197
2198static __inline__ __m512d __DEFAULT_FN_ATTRS
2199_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2200{
2201 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2202 (__v8df) __B,
2203 (__v8df) __C,
2204 (__mmask8) __U,
2205 _MM_FROUND_CUR_DIRECTION);
2206}
2207
2208static __inline__ __m512d __DEFAULT_FN_ATTRS
2209_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2210{
2211 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2212 (__v8df) __B,
2213 (__v8df) __C,
2214 (__mmask8) __U,
2215 _MM_FROUND_CUR_DIRECTION);
2216}
2217
2218static __inline__ __m512d __DEFAULT_FN_ATTRS
2219_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2220{
2221 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2222 (__v8df) __B,
2223 (__v8df) __C,
2224 (__mmask8) __U,
2225 _MM_FROUND_CUR_DIRECTION);
2226}
2227
2228static __inline__ __m512d __DEFAULT_FN_ATTRS
2229_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
2230{
2231 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2232 (__v8df) __B,
2233 -(__v8df) __C,
2234 (__mmask8) -1,
2235 _MM_FROUND_CUR_DIRECTION);
2236}
2237
2238static __inline__ __m512d __DEFAULT_FN_ATTRS
2239_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2240{
2241 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2242 (__v8df) __B,
2243 -(__v8df) __C,
2244 (__mmask8) __U,
2245 _MM_FROUND_CUR_DIRECTION);
2246}
2247
2248static __inline__ __m512d __DEFAULT_FN_ATTRS
2249_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2250{
2251 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2252 (__v8df) __B,
2253 -(__v8df) __C,
2254 (__mmask8) __U,
2255 _MM_FROUND_CUR_DIRECTION);
2256}
2257
2258#define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \
2259 (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
2260 (__v16sf) (B), (__v16sf) (C), \
2261 (__mmask16) -1, (R)); })
2262
2263
2264#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \
2265 (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
2266 (__v16sf) (B), (__v16sf) (C), \
2267 (__mmask16) (U), (R)); })
2268
2269
2270#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \
2271 (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) (A), \
2272 (__v16sf) (B), (__v16sf) (C), \
2273 (__mmask16) (U), (R)); })
2274
2275
2276#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \
2277 (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \
2278 (__v16sf) (B), (__v16sf) (C), \
2279 (__mmask16) (U), (R)); })
2280
2281
2282#define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \
2283 (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
2284 (__v16sf) (B), -(__v16sf) (C), \
2285 (__mmask16) -1, (R)); })
2286
2287
2288#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \
2289 (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
2290 (__v16sf) (B), -(__v16sf) (C), \
2291 (__mmask16) (U), (R)); })
2292
2293
2294#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \
2295 (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \
2296 (__v16sf) (B), -(__v16sf) (C), \
2297 (__mmask16) (U), (R)); })
2298
2299
2300static __inline__ __m512 __DEFAULT_FN_ATTRS
2301_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
2302{
2303 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2304 (__v16sf) __B,
2305 (__v16sf) __C,
2306 (__mmask16) -1,
2307 _MM_FROUND_CUR_DIRECTION);
2308}
2309
2310static __inline__ __m512 __DEFAULT_FN_ATTRS
2311_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2312{
2313 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2314 (__v16sf) __B,
2315 (__v16sf) __C,
2316 (__mmask16) __U,
2317 _MM_FROUND_CUR_DIRECTION);
2318}
2319
2320static __inline__ __m512 __DEFAULT_FN_ATTRS
2321_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2322{
2323 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
2324 (__v16sf) __B,
2325 (__v16sf) __C,
2326 (__mmask16) __U,
2327 _MM_FROUND_CUR_DIRECTION);
2328}
2329
2330static __inline__ __m512 __DEFAULT_FN_ATTRS
2331_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2332{
2333 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2334 (__v16sf) __B,
2335 (__v16sf) __C,
2336 (__mmask16) __U,
2337 _MM_FROUND_CUR_DIRECTION);
2338}
2339
2340static __inline__ __m512 __DEFAULT_FN_ATTRS
2341_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
2342{
2343 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2344 (__v16sf) __B,
2345 -(__v16sf) __C,
2346 (__mmask16) -1,
2347 _MM_FROUND_CUR_DIRECTION);
2348}
2349
2350static __inline__ __m512 __DEFAULT_FN_ATTRS
2351_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2352{
2353 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2354 (__v16sf) __B,
2355 -(__v16sf) __C,
2356 (__mmask16) __U,
2357 _MM_FROUND_CUR_DIRECTION);
2358}
2359
2360static __inline__ __m512 __DEFAULT_FN_ATTRS
2361_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2362{
2363 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2364 (__v16sf) __B,
2365 -(__v16sf) __C,
2366 (__mmask16) __U,
2367 _MM_FROUND_CUR_DIRECTION);
2368}
2369
2370#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \
2371 (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) (A), \
2372 (__v8df) (B), (__v8df) (C), \
2373 (__mmask8) (U), (R)); })
2374
2375
2376static __inline__ __m512d __DEFAULT_FN_ATTRS
2377_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2378{
2379 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
2380 (__v8df) __B,
2381 (__v8df) __C,
2382 (__mmask8) __U,
2383 _MM_FROUND_CUR_DIRECTION);
2384}
2385
2386#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \
2387 (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) (A), \
2388 (__v16sf) (B), (__v16sf) (C), \
2389 (__mmask16) (U), (R)); })
2390
2391
2392static __inline__ __m512 __DEFAULT_FN_ATTRS
2393_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2394{
2395 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
2396 (__v16sf) __B,
2397 (__v16sf) __C,
2398 (__mmask16) __U,
2399 _MM_FROUND_CUR_DIRECTION);
2400}
2401
2402#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \
2403 (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) (A), \
2404 (__v8df) (B), (__v8df) (C), \
2405 (__mmask8) (U), (R)); })
2406
2407
2408static __inline__ __m512d __DEFAULT_FN_ATTRS
2409_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2410{
2411 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
2412 (__v8df) __B,
2413 (__v8df) __C,
2414 (__mmask8) __U,
2415 _MM_FROUND_CUR_DIRECTION);
2416}
2417
2418#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \
2419 (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) (A), \
2420 (__v16sf) (B), (__v16sf) (C), \
2421 (__mmask16) (U), (R)); })
2422
2423
2424static __inline__ __m512 __DEFAULT_FN_ATTRS
2425_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2426{
2427 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
2428 (__v16sf) __B,
2429 (__v16sf) __C,
2430 (__mmask16) __U,
2431 _MM_FROUND_CUR_DIRECTION);
2432}
2433
2434#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \
2435 (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) (A), \
2436 (__v8df) (B), (__v8df) (C), \
2437 (__mmask8) (U), (R)); })
2438
2439
2440static __inline__ __m512d __DEFAULT_FN_ATTRS
2441_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2442{
2443 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
2444 (__v8df) __B,
2445 (__v8df) __C,
2446 (__mmask8) __U,
2447 _MM_FROUND_CUR_DIRECTION);
2448}
2449
2450#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \
2451 (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) (A), \
2452 (__v16sf) (B), (__v16sf) (C), \
2453 (__mmask16) (U), (R)); })
2454
2455
2456static __inline__ __m512 __DEFAULT_FN_ATTRS
2457_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2458{
2459 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
2460 (__v16sf) __B,
2461 (__v16sf) __C,
2462 (__mmask16) __U,
2463 _MM_FROUND_CUR_DIRECTION);
2464}
2465
2466#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \
2467 (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) (A), \
2468 (__v8df) (B), (__v8df) (C), \
2469 (__mmask8) (U), (R)); })
2470
2471
2472#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \
2473 (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) (A), \
2474 (__v8df) (B), (__v8df) (C), \
2475 (__mmask8) (U), (R)); })
2476
2477
2478static __inline__ __m512d __DEFAULT_FN_ATTRS
2479_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2480{
2481 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
2482 (__v8df) __B,
2483 (__v8df) __C,
2484 (__mmask8) __U,
2485 _MM_FROUND_CUR_DIRECTION);
2486}
2487
2488static __inline__ __m512d __DEFAULT_FN_ATTRS
2489_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2490{
2491 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
2492 (__v8df) __B,
2493 (__v8df) __C,
2494 (__mmask8) __U,
2495 _MM_FROUND_CUR_DIRECTION);
2496}
2497
2498#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \
2499 (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) (A), \
2500 (__v16sf) (B), (__v16sf) (C), \
2501 (__mmask16) (U), (R)); })
2502
2503
2504#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \
2505 (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) (A), \
2506 (__v16sf) (B), (__v16sf) (C), \
2507 (__mmask16) (U), (R)); })
2508
2509
2510static __inline__ __m512 __DEFAULT_FN_ATTRS
2511_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2512{
2513 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
2514 (__v16sf) __B,
2515 (__v16sf) __C,
2516 (__mmask16) __U,
2517 _MM_FROUND_CUR_DIRECTION);
2518}
2519
2520static __inline__ __m512 __DEFAULT_FN_ATTRS
2521_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2522{
2523 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
2524 (__v16sf) __B,
2525 (__v16sf) __C,
2526 (__mmask16) __U,
2527 _MM_FROUND_CUR_DIRECTION);
2528}
2529
2530
2531
2532/* Vector permutations */
2533
2534static __inline __m512i __DEFAULT_FN_ATTRS
2535_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
2536{
2537 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
2538 /* idx */ ,
2539 (__v16si) __A,
2540 (__v16si) __B,
2541 (__mmask16) -1);
2542}
2543static __inline __m512i __DEFAULT_FN_ATTRS
2544_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
2545{
2546 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
2547 /* idx */ ,
2548 (__v8di) __A,
2549 (__v8di) __B,
2550 (__mmask8) -1);
2551}
2552
2553static __inline __m512d __DEFAULT_FN_ATTRS
2554_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
2555{
2556 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
2557 /* idx */ ,
2558 (__v8df) __A,
2559 (__v8df) __B,
2560 (__mmask8) -1);
2561}
2562static __inline __m512 __DEFAULT_FN_ATTRS
2563_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
2564{
2565 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
2566 /* idx */ ,
2567 (__v16sf) __A,
2568 (__v16sf) __B,
2569 (__mmask16) -1);
2570}
2571
2572#define _mm512_alignr_epi64(A, B, I) __extension__ ({ \
2573 (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \
2574 (__v8di)(__m512i)(B), \
2575 (I), (__v8di)_mm512_setzero_si512(), \
2576 (__mmask8)-1); })
2577
Ben Murdoch61f157c2016-09-16 13:49:30 +01002578#define _mm512_mask_alignr_epi64( __W, __U, __A, __B, __imm) __extension__({\
2579 (__m512i)__builtin_ia32_alignq512_mask ((__v8di) __A,\
2580 (__v8di) __B, __imm,\
2581 (__v8di) __W,\
2582 (__mmask8) __U);\
2583})
2584
2585#define _mm512_maskz_alignr_epi64( __U, __A, __B, __imm) __extension__({\
2586 (__m512i)__builtin_ia32_alignq512_mask ((__v8di) __A,\
2587 (__v8di) __B, __imm,\
2588 (__v8di) _mm512_setzero_si512 (),\
2589 (__mmask8) __U);\
2590})
2591
Ben Murdoch097c5b22016-05-18 11:27:45 +01002592#define _mm512_alignr_epi32(A, B, I) __extension__ ({ \
Ben Murdoch61f157c2016-09-16 13:49:30 +01002593 (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \
Ben Murdoch097c5b22016-05-18 11:27:45 +01002594 (__v16si)(__m512i)(B), \
2595 (I), (__v16si)_mm512_setzero_si512(), \
Ben Murdoch61f157c2016-09-16 13:49:30 +01002596 (__mmask16)-1);\
2597})
2598
2599#define _mm512_mask_alignr_epi32( __W, __U, __A, __B, __imm) __extension__ ({\
2600 (__m512i) __builtin_ia32_alignd512_mask((__v16si) __A,\
2601 (__v16si) __B, __imm,\
2602 (__v16si) __W,\
2603 (__mmask16) __U);\
2604})
Ben Murdoch097c5b22016-05-18 11:27:45 +01002605
Ben Murdoch61f157c2016-09-16 13:49:30 +01002606#define _mm512_maskz_alignr_epi32( __U, __A, __B, __imm) __extension__({\
2607 (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,\
2608 (__v16si) __B, __imm,\
2609 (__v16si) _mm512_setzero_si512 (),\
2610 (__mmask16) __U);\
2611})
Ben Murdoch097c5b22016-05-18 11:27:45 +01002612/* Vector Extract */
2613
2614#define _mm512_extractf64x4_pd(A, I) __extension__ ({ \
2615 (__m256d) \
2616 __builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), \
2617 (I), \
2618 (__v4df)_mm256_setzero_si256(), \
2619 (__mmask8) -1); })
2620
2621#define _mm512_extractf32x4_ps(A, I) __extension__ ({ \
2622 (__m128) \
2623 __builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), \
2624 (I), \
2625 (__v4sf)_mm_setzero_ps(), \
2626 (__mmask8) -1); })
2627
2628/* Vector Blend */
2629
2630static __inline __m512d __DEFAULT_FN_ATTRS
2631_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
2632{
2633 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
2634 (__v8df) __W,
2635 (__mmask8) __U);
2636}
2637
2638static __inline __m512 __DEFAULT_FN_ATTRS
2639_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
2640{
2641 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
2642 (__v16sf) __W,
2643 (__mmask16) __U);
2644}
2645
2646static __inline __m512i __DEFAULT_FN_ATTRS
2647_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
2648{
2649 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
2650 (__v8di) __W,
2651 (__mmask8) __U);
2652}
2653
2654static __inline __m512i __DEFAULT_FN_ATTRS
2655_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
2656{
2657 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
2658 (__v16si) __W,
2659 (__mmask16) __U);
2660}
2661
2662/* Compare */
2663
2664#define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \
2665 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
2666 (__v16sf)(__m512)(B), \
2667 (P), (__mmask16)-1, (R)); })
2668
2669#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \
2670 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
2671 (__v16sf)(__m512)(B), \
2672 (P), (__mmask16)(U), (R)); })
2673
2674#define _mm512_cmp_ps_mask(A, B, P) \
2675 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
2676
2677#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
2678 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
2679
2680#define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \
2681 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
2682 (__v8df)(__m512d)(B), \
2683 (P), (__mmask8)-1, (R)); })
2684
2685#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \
2686 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
2687 (__v8df)(__m512d)(B), \
2688 (P), (__mmask8)(U), (R)); })
2689
2690#define _mm512_cmp_pd_mask(A, B, P) \
2691 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
2692
2693#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
2694 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
2695
2696/* Conversion */
2697
2698static __inline __m512i __DEFAULT_FN_ATTRS
2699_mm512_cvttps_epu32(__m512 __A)
2700{
2701 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
2702 (__v16si)
2703 _mm512_setzero_si512 (),
2704 (__mmask16) -1,
2705 _MM_FROUND_CUR_DIRECTION);
2706}
2707
2708#define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \
2709 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), \
2710 (__v16sf)_mm512_setzero_ps(), \
2711 (__mmask16)-1, (R)); })
2712
2713#define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
2714 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), \
2715 (__v16sf)_mm512_setzero_ps(), \
2716 (__mmask16)-1, (R)); })
2717
2718static __inline __m512d __DEFAULT_FN_ATTRS
2719_mm512_cvtepi32_pd(__m256i __A)
2720{
2721 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
2722 (__v8df)
2723 _mm512_setzero_pd (),
2724 (__mmask8) -1);
2725}
2726
2727static __inline __m512d __DEFAULT_FN_ATTRS
2728_mm512_cvtepu32_pd(__m256i __A)
2729{
2730 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
2731 (__v8df)
2732 _mm512_setzero_pd (),
2733 (__mmask8) -1);
2734}
2735
2736#define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \
2737 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(A), \
2738 (__v8sf)_mm256_setzero_ps(), \
2739 (__mmask8)-1, (R)); })
2740
2741#define _mm512_cvtps_ph(A, I) __extension__ ({ \
2742 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(A), (I), \
2743 (__v16hi)_mm256_setzero_si256(), \
2744 -1); })
2745
2746static __inline __m512 __DEFAULT_FN_ATTRS
2747_mm512_cvtph_ps(__m256i __A)
2748{
2749 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
2750 (__v16sf)
2751 _mm512_setzero_ps (),
2752 (__mmask16) -1,
2753 _MM_FROUND_CUR_DIRECTION);
2754}
2755
2756static __inline __m512i __DEFAULT_FN_ATTRS
2757_mm512_cvttps_epi32(__m512 __a)
2758{
2759 return (__m512i)
2760 __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
2761 (__v16si) _mm512_setzero_si512 (),
2762 (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
2763}
2764
2765static __inline __m256i __DEFAULT_FN_ATTRS
2766_mm512_cvttpd_epi32(__m512d __a)
2767{
2768 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
2769 (__v8si)_mm256_setzero_si256(),
2770 (__mmask8) -1,
2771 _MM_FROUND_CUR_DIRECTION);
2772}
2773
2774#define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \
2775 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(A), \
2776 (__v8si)_mm256_setzero_si256(), \
2777 (__mmask8)-1, (R)); })
2778
2779#define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \
2780 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(A), \
2781 (__v16si)_mm512_setzero_si512(), \
2782 (__mmask16)-1, (R)); })
2783
2784#define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \
2785 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(A), \
2786 (__v16si)_mm512_setzero_si512(), \
2787 (__mmask16)-1, (R)); })
2788
2789#define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \
2790 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(A), \
2791 (__v8si)_mm256_setzero_si256(), \
2792 (__mmask8)-1, (R)); })
2793
2794#define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \
2795 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(A), \
2796 (__v16si)_mm512_setzero_si512(), \
2797 (__mmask16)-1, (R)); })
2798
2799#define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \
2800 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(A), \
2801 (__v8si)_mm256_setzero_si256(), \
2802 (__mmask8) -1, (R)); })
2803
2804/* Unpack and Interleave */
2805static __inline __m512d __DEFAULT_FN_ATTRS
2806_mm512_unpackhi_pd(__m512d __a, __m512d __b)
2807{
2808 return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
2809}
2810
2811static __inline __m512d __DEFAULT_FN_ATTRS
2812_mm512_unpacklo_pd(__m512d __a, __m512d __b)
2813{
2814 return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
2815}
2816
2817static __inline __m512 __DEFAULT_FN_ATTRS
2818_mm512_unpackhi_ps(__m512 __a, __m512 __b)
2819{
2820 return __builtin_shufflevector(__a, __b,
2821 2, 18, 3, 19,
2822 2+4, 18+4, 3+4, 19+4,
2823 2+8, 18+8, 3+8, 19+8,
2824 2+12, 18+12, 3+12, 19+12);
2825}
2826
2827static __inline __m512 __DEFAULT_FN_ATTRS
2828_mm512_unpacklo_ps(__m512 __a, __m512 __b)
2829{
2830 return __builtin_shufflevector(__a, __b,
2831 0, 16, 1, 17,
2832 0+4, 16+4, 1+4, 17+4,
2833 0+8, 16+8, 1+8, 17+8,
2834 0+12, 16+12, 1+12, 17+12);
2835}
2836
2837/* Bit Test */
2838
2839static __inline __mmask16 __DEFAULT_FN_ATTRS
2840_mm512_test_epi32_mask(__m512i __A, __m512i __B)
2841{
2842 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
2843 (__v16si) __B,
2844 (__mmask16) -1);
2845}
2846
2847static __inline __mmask8 __DEFAULT_FN_ATTRS
2848_mm512_test_epi64_mask(__m512i __A, __m512i __B)
2849{
2850 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
2851 (__v8di) __B,
2852 (__mmask8) -1);
2853}
2854
2855/* SIMD load ops */
2856
2857static __inline __m512i __DEFAULT_FN_ATTRS
2858_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
2859{
2860 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P,
2861 (__v16si)
2862 _mm512_setzero_si512 (),
2863 (__mmask16) __U);
2864}
2865
2866static __inline __m512i __DEFAULT_FN_ATTRS
2867_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
2868{
2869 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P,
2870 (__v8di)
2871 _mm512_setzero_si512 (),
2872 (__mmask8) __U);
2873}
2874
2875static __inline __m512 __DEFAULT_FN_ATTRS
2876_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
2877{
2878 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P,
2879 (__v16sf)
2880 _mm512_setzero_ps (),
2881 (__mmask16) __U);
2882}
2883
2884static __inline __m512d __DEFAULT_FN_ATTRS
2885_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
2886{
2887 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P,
2888 (__v8df)
2889 _mm512_setzero_pd (),
2890 (__mmask8) __U);
2891}
2892
2893static __inline __m512 __DEFAULT_FN_ATTRS
2894_mm512_maskz_load_ps(__mmask16 __U, void const *__P)
2895{
2896 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
2897 (__v16sf)
2898 _mm512_setzero_ps (),
2899 (__mmask16) __U);
2900}
2901
2902static __inline __m512d __DEFAULT_FN_ATTRS
2903_mm512_maskz_load_pd(__mmask8 __U, void const *__P)
2904{
2905 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
2906 (__v8df)
2907 _mm512_setzero_pd (),
2908 (__mmask8) __U);
2909}
2910
2911static __inline __m512d __DEFAULT_FN_ATTRS
2912_mm512_loadu_pd(double const *__p)
2913{
2914 struct __loadu_pd {
2915 __m512d __v;
2916 } __attribute__((__packed__, __may_alias__));
2917 return ((struct __loadu_pd*)__p)->__v;
2918}
2919
2920static __inline __m512 __DEFAULT_FN_ATTRS
2921_mm512_loadu_ps(float const *__p)
2922{
2923 struct __loadu_ps {
2924 __m512 __v;
2925 } __attribute__((__packed__, __may_alias__));
2926 return ((struct __loadu_ps*)__p)->__v;
2927}
2928
2929static __inline __m512 __DEFAULT_FN_ATTRS
2930_mm512_load_ps(float const *__p)
2931{
2932 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
2933 (__v16sf)
2934 _mm512_setzero_ps (),
2935 (__mmask16) -1);
2936}
2937
2938static __inline __m512d __DEFAULT_FN_ATTRS
2939_mm512_load_pd(double const *__p)
2940{
2941 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
2942 (__v8df)
2943 _mm512_setzero_pd (),
2944 (__mmask8) -1);
2945}
2946
2947/* SIMD store ops */
2948
2949static __inline void __DEFAULT_FN_ATTRS
2950_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
2951{
2952 __builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A,
2953 (__mmask8) __U);
2954}
2955
2956static __inline void __DEFAULT_FN_ATTRS
2957_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
2958{
2959 __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A,
2960 (__mmask16) __U);
2961}
2962
2963static __inline void __DEFAULT_FN_ATTRS
2964_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
2965{
2966 __builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
2967}
2968
2969static __inline void __DEFAULT_FN_ATTRS
2970_mm512_storeu_pd(void *__P, __m512d __A)
2971{
2972 __builtin_ia32_storeupd512_mask((__v8df *)__P, (__v8df)__A, (__mmask8)-1);
2973}
2974
2975static __inline void __DEFAULT_FN_ATTRS
2976_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
2977{
2978 __builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A,
2979 (__mmask16) __U);
2980}
2981
2982static __inline void __DEFAULT_FN_ATTRS
2983_mm512_storeu_ps(void *__P, __m512 __A)
2984{
2985 __builtin_ia32_storeups512_mask((__v16sf *)__P, (__v16sf)__A, (__mmask16)-1);
2986}
2987
2988static __inline void __DEFAULT_FN_ATTRS
2989_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
2990{
2991 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
2992}
2993
2994static __inline void __DEFAULT_FN_ATTRS
2995_mm512_store_pd(void *__P, __m512d __A)
2996{
2997 *(__m512d*)__P = __A;
2998}
2999
3000static __inline void __DEFAULT_FN_ATTRS
3001_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
3002{
3003 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
3004 (__mmask16) __U);
3005}
3006
3007static __inline void __DEFAULT_FN_ATTRS
3008_mm512_store_ps(void *__P, __m512 __A)
3009{
3010 *(__m512*)__P = __A;
3011}
3012
3013/* Mask ops */
3014
3015static __inline __mmask16 __DEFAULT_FN_ATTRS
3016_mm512_knot(__mmask16 __M)
3017{
3018 return __builtin_ia32_knothi(__M);
3019}
3020
3021/* Integer compare */
3022
3023static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3024_mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
3025 return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
3026 (__mmask16)-1);
3027}
3028
3029static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3030_mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
3031 return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
3032 __u);
3033}
3034
3035static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3036_mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
3037 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
3038 (__mmask16)-1);
3039}
3040
3041static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3042_mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
3043 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
3044 __u);
3045}
3046
3047static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3048_mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3049 return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
3050 __u);
3051}
3052
3053static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3054_mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
3055 return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
3056 (__mmask8)-1);
3057}
3058
3059static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3060_mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) {
3061 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
3062 (__mmask8)-1);
3063}
3064
3065static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3066_mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3067 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
3068 __u);
3069}
3070
3071static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3072_mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) {
3073 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
3074 (__mmask16)-1);
3075}
3076
3077static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3078_mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
3079 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
3080 __u);
3081}
3082
3083static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3084_mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) {
3085 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
3086 (__mmask16)-1);
3087}
3088
3089static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3090_mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
3091 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
3092 __u);
3093}
3094
3095static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3096_mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) {
3097 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
3098 (__mmask8)-1);
3099}
3100
3101static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3102_mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3103 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
3104 __u);
3105}
3106
3107static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3108_mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) {
3109 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
3110 (__mmask8)-1);
3111}
3112
3113static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3114_mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3115 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
3116 __u);
3117}
3118
3119static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3120_mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) {
3121 return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
3122 (__mmask16)-1);
3123}
3124
3125static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3126_mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
3127 return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
3128 __u);
3129}
3130
3131static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3132_mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) {
3133 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
3134 (__mmask16)-1);
3135}
3136
3137static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3138_mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
3139 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
3140 __u);
3141}
3142
3143static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3144_mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3145 return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
3146 __u);
3147}
3148
3149static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3150_mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) {
3151 return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
3152 (__mmask8)-1);
3153}
3154
3155static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3156_mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) {
3157 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
3158 (__mmask8)-1);
3159}
3160
3161static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3162_mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3163 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
3164 __u);
3165}
3166
3167static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3168_mm512_cmple_epi32_mask(__m512i __a, __m512i __b) {
3169 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
3170 (__mmask16)-1);
3171}
3172
3173static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3174_mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
3175 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
3176 __u);
3177}
3178
3179static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3180_mm512_cmple_epu32_mask(__m512i __a, __m512i __b) {
3181 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
3182 (__mmask16)-1);
3183}
3184
3185static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3186_mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
3187 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
3188 __u);
3189}
3190
3191static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3192_mm512_cmple_epi64_mask(__m512i __a, __m512i __b) {
3193 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
3194 (__mmask8)-1);
3195}
3196
3197static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3198_mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3199 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
3200 __u);
3201}
3202
3203static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3204_mm512_cmple_epu64_mask(__m512i __a, __m512i __b) {
3205 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
3206 (__mmask8)-1);
3207}
3208
3209static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3210_mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3211 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
3212 __u);
3213}
3214
3215static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3216_mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) {
3217 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
3218 (__mmask16)-1);
3219}
3220
3221static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3222_mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
3223 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
3224 __u);
3225}
3226
3227static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3228_mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) {
3229 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
3230 (__mmask16)-1);
3231}
3232
3233static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3234_mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
3235 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
3236 __u);
3237}
3238
3239static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3240_mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) {
3241 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
3242 (__mmask8)-1);
3243}
3244
3245static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3246_mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3247 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
3248 __u);
3249}
3250
3251static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3252_mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) {
3253 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
3254 (__mmask8)-1);
3255}
3256
3257static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3258_mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3259 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
3260 __u);
3261}
3262
3263static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3264_mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) {
3265 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
3266 (__mmask16)-1);
3267}
3268
3269static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3270_mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
3271 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
3272 __u);
3273}
3274
3275static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3276_mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) {
3277 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
3278 (__mmask16)-1);
3279}
3280
3281static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3282_mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
3283 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
3284 __u);
3285}
3286
3287static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3288_mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) {
3289 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
3290 (__mmask8)-1);
3291}
3292
3293static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3294_mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3295 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
3296 __u);
3297}
3298
3299static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3300_mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) {
3301 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
3302 (__mmask8)-1);
3303}
3304
3305static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3306_mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3307 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
3308 __u);
3309}
3310
3311static __inline__ __m512i __DEFAULT_FN_ATTRS
3312_mm512_cvtepi8_epi32 (__m128i __A)
3313{
3314 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
3315 (__v16si)
3316 _mm512_setzero_si512 (),
3317 (__mmask16) -1);
3318}
3319
3320static __inline__ __m512i __DEFAULT_FN_ATTRS
3321_mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
3322{
3323 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
3324 (__v16si) __W,
3325 (__mmask16) __U);
3326}
3327
3328static __inline__ __m512i __DEFAULT_FN_ATTRS
3329_mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
3330{
3331 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
3332 (__v16si)
3333 _mm512_setzero_si512 (),
3334 (__mmask16) __U);
3335}
3336
3337static __inline__ __m512i __DEFAULT_FN_ATTRS
3338_mm512_cvtepi8_epi64 (__m128i __A)
3339{
3340 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
3341 (__v8di)
3342 _mm512_setzero_si512 (),
3343 (__mmask8) -1);
3344}
3345
3346static __inline__ __m512i __DEFAULT_FN_ATTRS
3347_mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
3348{
3349 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
3350 (__v8di) __W,
3351 (__mmask8) __U);
3352}
3353
3354static __inline__ __m512i __DEFAULT_FN_ATTRS
3355_mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
3356{
3357 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
3358 (__v8di)
3359 _mm512_setzero_si512 (),
3360 (__mmask8) __U);
3361}
3362
3363static __inline__ __m512i __DEFAULT_FN_ATTRS
3364_mm512_cvtepi32_epi64 (__m256i __X)
3365{
3366 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
3367 (__v8di)
3368 _mm512_setzero_si512 (),
3369 (__mmask8) -1);
3370}
3371
3372static __inline__ __m512i __DEFAULT_FN_ATTRS
3373_mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
3374{
3375 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
3376 (__v8di) __W,
3377 (__mmask8) __U);
3378}
3379
3380static __inline__ __m512i __DEFAULT_FN_ATTRS
3381_mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
3382{
3383 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
3384 (__v8di)
3385 _mm512_setzero_si512 (),
3386 (__mmask8) __U);
3387}
3388
3389static __inline__ __m512i __DEFAULT_FN_ATTRS
3390_mm512_cvtepi16_epi32 (__m256i __A)
3391{
3392 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
3393 (__v16si)
3394 _mm512_setzero_si512 (),
3395 (__mmask16) -1);
3396}
3397
3398static __inline__ __m512i __DEFAULT_FN_ATTRS
3399_mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
3400{
3401 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
3402 (__v16si) __W,
3403 (__mmask16) __U);
3404}
3405
3406static __inline__ __m512i __DEFAULT_FN_ATTRS
3407_mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
3408{
3409 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
3410 (__v16si)
3411 _mm512_setzero_si512 (),
3412 (__mmask16) __U);
3413}
3414
3415static __inline__ __m512i __DEFAULT_FN_ATTRS
3416_mm512_cvtepi16_epi64 (__m128i __A)
3417{
3418 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
3419 (__v8di)
3420 _mm512_setzero_si512 (),
3421 (__mmask8) -1);
3422}
3423
3424static __inline__ __m512i __DEFAULT_FN_ATTRS
3425_mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
3426{
3427 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
3428 (__v8di) __W,
3429 (__mmask8) __U);
3430}
3431
3432static __inline__ __m512i __DEFAULT_FN_ATTRS
3433_mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
3434{
3435 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
3436 (__v8di)
3437 _mm512_setzero_si512 (),
3438 (__mmask8) __U);
3439}
3440
3441static __inline__ __m512i __DEFAULT_FN_ATTRS
3442_mm512_cvtepu8_epi32 (__m128i __A)
3443{
3444 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
3445 (__v16si)
3446 _mm512_setzero_si512 (),
3447 (__mmask16) -1);
3448}
3449
3450static __inline__ __m512i __DEFAULT_FN_ATTRS
3451_mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
3452{
3453 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
3454 (__v16si) __W,
3455 (__mmask16) __U);
3456}
3457
3458static __inline__ __m512i __DEFAULT_FN_ATTRS
3459_mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
3460{
3461 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
3462 (__v16si)
3463 _mm512_setzero_si512 (),
3464 (__mmask16) __U);
3465}
3466
3467static __inline__ __m512i __DEFAULT_FN_ATTRS
3468_mm512_cvtepu8_epi64 (__m128i __A)
3469{
3470 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
3471 (__v8di)
3472 _mm512_setzero_si512 (),
3473 (__mmask8) -1);
3474}
3475
3476static __inline__ __m512i __DEFAULT_FN_ATTRS
3477_mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
3478{
3479 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
3480 (__v8di) __W,
3481 (__mmask8) __U);
3482}
3483
3484static __inline__ __m512i __DEFAULT_FN_ATTRS
3485_mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
3486{
3487 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
3488 (__v8di)
3489 _mm512_setzero_si512 (),
3490 (__mmask8) __U);
3491}
3492
3493static __inline__ __m512i __DEFAULT_FN_ATTRS
3494_mm512_cvtepu32_epi64 (__m256i __X)
3495{
3496 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
3497 (__v8di)
3498 _mm512_setzero_si512 (),
3499 (__mmask8) -1);
3500}
3501
3502static __inline__ __m512i __DEFAULT_FN_ATTRS
3503_mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
3504{
3505 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
3506 (__v8di) __W,
3507 (__mmask8) __U);
3508}
3509
3510static __inline__ __m512i __DEFAULT_FN_ATTRS
3511_mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
3512{
3513 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
3514 (__v8di)
3515 _mm512_setzero_si512 (),
3516 (__mmask8) __U);
3517}
3518
3519static __inline__ __m512i __DEFAULT_FN_ATTRS
3520_mm512_cvtepu16_epi32 (__m256i __A)
3521{
3522 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
3523 (__v16si)
3524 _mm512_setzero_si512 (),
3525 (__mmask16) -1);
3526}
3527
3528static __inline__ __m512i __DEFAULT_FN_ATTRS
3529_mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
3530{
3531 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
3532 (__v16si) __W,
3533 (__mmask16) __U);
3534}
3535
3536static __inline__ __m512i __DEFAULT_FN_ATTRS
3537_mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
3538{
3539 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
3540 (__v16si)
3541 _mm512_setzero_si512 (),
3542 (__mmask16) __U);
3543}
3544
3545static __inline__ __m512i __DEFAULT_FN_ATTRS
3546_mm512_cvtepu16_epi64 (__m128i __A)
3547{
3548 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
3549 (__v8di)
3550 _mm512_setzero_si512 (),
3551 (__mmask8) -1);
3552}
3553
3554static __inline__ __m512i __DEFAULT_FN_ATTRS
3555_mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
3556{
3557 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
3558 (__v8di) __W,
3559 (__mmask8) __U);
3560}
3561
3562static __inline__ __m512i __DEFAULT_FN_ATTRS
3563_mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3564{
3565 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
3566 (__v8di)
3567 _mm512_setzero_si512 (),
3568 (__mmask8) __U);
3569}
3570
3571static __inline__ __m512i __DEFAULT_FN_ATTRS
3572_mm512_rorv_epi32 (__m512i __A, __m512i __B)
3573{
3574 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
3575 (__v16si) __B,
3576 (__v16si)
3577 _mm512_setzero_si512 (),
3578 (__mmask16) -1);
3579}
3580
3581static __inline__ __m512i __DEFAULT_FN_ATTRS
3582_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
3583{
3584 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
3585 (__v16si) __B,
3586 (__v16si) __W,
3587 (__mmask16) __U);
3588}
3589
3590static __inline__ __m512i __DEFAULT_FN_ATTRS
3591_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
3592{
3593 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
3594 (__v16si) __B,
3595 (__v16si)
3596 _mm512_setzero_si512 (),
3597 (__mmask16) __U);
3598}
3599
3600static __inline__ __m512i __DEFAULT_FN_ATTRS
3601_mm512_rorv_epi64 (__m512i __A, __m512i __B)
3602{
3603 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
3604 (__v8di) __B,
3605 (__v8di)
3606 _mm512_setzero_si512 (),
3607 (__mmask8) -1);
3608}
3609
3610static __inline__ __m512i __DEFAULT_FN_ATTRS
3611_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
3612{
3613 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
3614 (__v8di) __B,
3615 (__v8di) __W,
3616 (__mmask8) __U);
3617}
3618
3619static __inline__ __m512i __DEFAULT_FN_ATTRS
3620_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
3621{
3622 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
3623 (__v8di) __B,
3624 (__v8di)
3625 _mm512_setzero_si512 (),
3626 (__mmask8) __U);
3627}
3628
3629
3630
3631#define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
3632 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
3633 (__v16si)(__m512i)(b), (p), \
3634 (__mmask16)-1); })
3635
3636#define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
3637 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
3638 (__v16si)(__m512i)(b), (p), \
3639 (__mmask16)-1); })
3640
3641#define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
3642 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
3643 (__v8di)(__m512i)(b), (p), \
3644 (__mmask8)-1); })
3645
3646#define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
3647 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
3648 (__v8di)(__m512i)(b), (p), \
3649 (__mmask8)-1); })
3650
3651#define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
3652 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
3653 (__v16si)(__m512i)(b), (p), \
3654 (__mmask16)(m)); })
3655
3656#define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
3657 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
3658 (__v16si)(__m512i)(b), (p), \
3659 (__mmask16)(m)); })
3660
3661#define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
3662 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
3663 (__v8di)(__m512i)(b), (p), \
3664 (__mmask8)(m)); })
3665
3666#define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
3667 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
3668 (__v8di)(__m512i)(b), (p), \
3669 (__mmask8)(m)); })
3670
3671#define _mm512_rol_epi32(a, b) __extension__ ({ \
3672 (__m512i) __builtin_ia32_prold512_mask ((__v16si) (a), (b),\
3673 (__v16si)\
3674 _mm512_setzero_si512 (),\
3675 (__mmask16) -1); })
3676
3677#define _mm512_mask_rol_epi32(W, U, a, b) __extension__ ({ \
3678 (__m512i) __builtin_ia32_prold512_mask ((__v16si) (a), (b),\
3679 (__v16si) (W),\
3680 (__mmask16) (U)); })
3681
3682#define _mm512_maskz_rol_epi32(U, a, b) __extension__ ({ \
3683 (__m512i) __builtin_ia32_prold512_mask ((__v16si) (a), (b),\
3684 (__v16si)\
3685 _mm512_setzero_si512 (),\
3686 (__mmask16) (U)); })
3687
3688#define _mm512_rol_epi64(a, b) __extension__ ({ \
3689 (__m512i) __builtin_ia32_prolq512_mask ((__v8di) (a), (b),\
3690 (__v8di)\
3691 _mm512_setzero_si512 (),\
3692 (__mmask8) -1); })
3693
3694#define _mm512_mask_rol_epi64(W, U, a, b) __extension__ ({ \
3695 (__m512i) __builtin_ia32_prolq512_mask ((__v8di) (a), (b),\
3696 (__v8di) (W),\
3697 (__mmask8) (U)); })
3698
3699#define _mm512_maskz_rol_epi64(U, a, b) __extension__ ({ \
3700 (__m512i) __builtin_ia32_prolq512_mask ((__v8di) (a), (b),\
3701 (__v8di)\
3702 _mm512_setzero_si512 (),\
3703 (__mmask8) (U)); })
3704static __inline__ __m512i __DEFAULT_FN_ATTRS
3705_mm512_rolv_epi32 (__m512i __A, __m512i __B)
3706{
3707 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
3708 (__v16si) __B,
3709 (__v16si)
3710 _mm512_setzero_si512 (),
3711 (__mmask16) -1);
3712}
3713
3714static __inline__ __m512i __DEFAULT_FN_ATTRS
3715_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
3716{
3717 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
3718 (__v16si) __B,
3719 (__v16si) __W,
3720 (__mmask16) __U);
3721}
3722
3723static __inline__ __m512i __DEFAULT_FN_ATTRS
3724_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
3725{
3726 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
3727 (__v16si) __B,
3728 (__v16si)
3729 _mm512_setzero_si512 (),
3730 (__mmask16) __U);
3731}
3732
3733static __inline__ __m512i __DEFAULT_FN_ATTRS
3734_mm512_rolv_epi64 (__m512i __A, __m512i __B)
3735{
3736 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
3737 (__v8di) __B,
3738 (__v8di)
3739 _mm512_setzero_si512 (),
3740 (__mmask8) -1);
3741}
3742
3743static __inline__ __m512i __DEFAULT_FN_ATTRS
3744_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
3745{
3746 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
3747 (__v8di) __B,
3748 (__v8di) __W,
3749 (__mmask8) __U);
3750}
3751
3752static __inline__ __m512i __DEFAULT_FN_ATTRS
3753_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
3754{
3755 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
3756 (__v8di) __B,
3757 (__v8di)
3758 _mm512_setzero_si512 (),
3759 (__mmask8) __U);
3760}
3761
3762#define _mm512_ror_epi32( __A, __B) __extension__ ({ \
3763__builtin_ia32_prord512_mask ((__v16si)( __A),( __B),\
3764 (__v16si)\
3765 _mm512_setzero_si512 (),\
3766 (__mmask16) -1);\
3767})
3768
3769#define _mm512_mask_ror_epi32( __W, __U, __A, __B) __extension__ ({ \
3770__builtin_ia32_prord512_mask ((__v16si)( __A),( __B),\
3771 (__v16si)( __W),\
3772 (__mmask16)( __U));\
3773})
3774
3775#define _mm512_maskz_ror_epi32( __U, __A, __B) __extension__ ({ \
3776__builtin_ia32_prord512_mask ((__v16si)( __A),( __B),\
3777 (__v16si)\
3778 _mm512_setzero_si512 (),\
3779 (__mmask16)( __U));\
3780})
3781
3782#define _mm512_ror_epi64( __A, __B) __extension__ ({ \
3783__builtin_ia32_prorq512_mask ((__v8di)( __A),( __B),\
3784 (__v8di)\
3785 _mm512_setzero_si512 (),\
3786 (__mmask8) -1);\
3787})
3788
3789#define _mm512_mask_ror_epi64( __W, __U, __A, __B) __extension__ ({ \
3790__builtin_ia32_prorq512_mask ((__v8di)( __A),( __B),\
3791 (__v8di)( __W),\
3792 (__mmask8)( __U));\
3793})
3794
3795#define _mm512_maskz_ror_epi64( __U, __A, __B) __extension__ ({ \
3796__builtin_ia32_prorq512_mask ((__v8di)( __A),( __B),\
3797 (__v8di)\
3798 _mm512_setzero_si512 (),\
3799 (__mmask8)( __U));\
3800})
3801
3802#define _mm512_slli_epi32( __A, __B) __extension__ ({ \
3803__builtin_ia32_pslldi512_mask ((__v16si)( __A),( __B),\
3804 (__v16si)\
3805 _mm512_setzero_si512 (),\
3806 (__mmask16) -1);\
3807})
3808
3809#define _mm512_mask_slli_epi32( __W, __U, __A ,__B) __extension__ ({ \
3810__builtin_ia32_pslldi512_mask ((__v16si) (__A), (__B),\
3811 (__v16si)( __W),\
3812 (__mmask16)( __U));\
3813})
3814
3815#define _mm512_maskz_slli_epi32( __U, __A, __B) __extension__ ({ \
3816__builtin_ia32_pslldi512_mask ((__v16si)( __A),( __B),\
3817 (__v16si)\
3818 _mm512_setzero_si512 (),\
3819 (__mmask16)( __U));\
3820})
3821
3822#define _mm512_slli_epi64( __A, __B) __extension__ ({ \
3823__builtin_ia32_psllqi512_mask ((__v8di)( __A),( __B),\
3824 (__v8di)\
3825 _mm512_setzero_si512 (),\
3826 (__mmask8) -1);\
3827})
3828
3829#define _mm512_mask_slli_epi64( __W, __U, __A ,__B) __extension__ ({ \
3830__builtin_ia32_psllqi512_mask ((__v8di) (__A), (__B),\
3831 (__v8di)( __W),\
3832 (__mmask8)( __U));\
3833})
3834
3835#define _mm512_maskz_slli_epi64( __U, __A, __B) __extension__ ({ \
3836__builtin_ia32_psllqi512_mask ((__v8di)( __A),( __B),\
3837 (__v8di)\
3838 _mm512_setzero_si512 (),\
3839 (__mmask8)( __U));\
3840})
3841
3842
3843
3844#define _mm512_srli_epi32( __A, __B) __extension__ ({ \
3845__builtin_ia32_psrldi512_mask ((__v16si)( __A),( __B),\
3846 (__v16si)\
3847 _mm512_setzero_si512 (),\
3848 (__mmask16) -1);\
3849})
3850
3851#define _mm512_mask_srli_epi32( __W, __U, __A, __B) __extension__ ({ \
3852__builtin_ia32_psrldi512_mask ((__v16si)( __A),( __B),\
3853 (__v16si)( __W),\
3854 (__mmask16)( __U));\
3855})
3856
3857#define _mm512_maskz_srli_epi32( __U, __A, __B) __extension__ ({ \
3858__builtin_ia32_psrldi512_mask ((__v16si)( __A),( __B),\
3859 (__v16si)\
3860 _mm512_setzero_si512 (),\
3861 (__mmask16)( __U));\
3862})
3863
3864#define _mm512_srli_epi64( __A, __B) __extension__ ({ \
3865__builtin_ia32_psrlqi512_mask ((__v8di)( __A),( __B),\
3866 (__v8di)\
3867 _mm512_setzero_si512 (),\
3868 (__mmask8) -1);\
3869})
3870
3871#define _mm512_mask_srli_epi64( __W, __U, __A, __B) __extension__ ({ \
3872__builtin_ia32_psrlqi512_mask ((__v8di)( __A),( __B),\
3873 (__v8di)( __W),\
3874 (__mmask8)( __U));\
3875})
3876
3877#define _mm512_maskz_srli_epi64( __U, __A, __B) __extension__ ({ \
3878__builtin_ia32_psrlqi512_mask ((__v8di)( __A),( __B),\
3879 (__v8di)\
3880 _mm512_setzero_si512 (),\
3881 (__mmask8)( __U));\
3882})
3883
3884static __inline__ __m512i __DEFAULT_FN_ATTRS
3885_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
3886{
3887 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
3888 (__v16si) __W,
3889 (__mmask16) __U);
3890}
3891
3892static __inline__ __m512i __DEFAULT_FN_ATTRS
3893_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
3894{
3895 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
3896 (__v16si)
3897 _mm512_setzero_si512 (),
3898 (__mmask16) __U);
3899}
3900
3901static __inline__ void __DEFAULT_FN_ATTRS
3902_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
3903{
3904 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
3905 (__mmask16) __U);
3906}
3907
3908static __inline__ __m512i __DEFAULT_FN_ATTRS
3909_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
3910{
3911 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
3912 (__v8di) __W,
3913 (__mmask8) __U);
3914}
3915
3916static __inline__ __m512i __DEFAULT_FN_ATTRS
3917_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
3918{
3919 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
3920 (__v8di)
3921 _mm512_setzero_si512 (),
3922 (__mmask8) __U);
3923}
3924
3925static __inline__ __m512i __DEFAULT_FN_ATTRS
3926_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
3927{
3928 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
3929 (__v8di) __W,
3930 (__mmask8) __U);
3931}
3932
3933static __inline__ __m512i __DEFAULT_FN_ATTRS
3934_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
3935{
3936 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
3937 (__v8di)
3938 _mm512_setzero_si512 (),
3939 (__mmask8) __U);
3940}
3941
3942static __inline__ void __DEFAULT_FN_ATTRS
3943_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
3944{
3945 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
3946 (__mmask8) __U);
3947}
3948
3949
3950
3951static __inline__ __m512d __DEFAULT_FN_ATTRS
3952_mm512_movedup_pd (__m512d __A)
3953{
3954 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
3955 (__v8df)
3956 _mm512_undefined_pd (),
3957 (__mmask8) -1);
3958}
3959
3960static __inline__ __m512d __DEFAULT_FN_ATTRS
3961_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
3962{
3963 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
3964 (__v8df) __W,
3965 (__mmask8) __U);
3966}
3967
3968static __inline__ __m512d __DEFAULT_FN_ATTRS
3969_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
3970{
3971 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
3972 (__v8df)
3973 _mm512_setzero_pd (),
3974 (__mmask8) __U);
3975}
3976
3977#define _mm512_fixupimm_round_pd( __A, __B, __C, __imm, __R) __extension__ ({ \
3978__builtin_ia32_fixupimmpd512_mask ((__v8df)( __A),\
3979 (__v8df)( __B),\
3980 (__v8di)( __C),\
3981 (__imm),\
3982 (__mmask8) -1, (__R));\
3983})
3984
3985#define _mm512_mask_fixupimm_round_pd( __A, __U, __B, __C, __imm, __R) __extension__ ({ \
3986__builtin_ia32_fixupimmpd512_mask ((__v8df)( __A),\
3987 (__v8df)( __B),\
3988 (__v8di)( __C),\
3989 (__imm),\
3990 (__mmask8)( __U), (__R));\
3991})
3992
3993#define _mm512_fixupimm_pd( __A, __B, __C, __imm) __extension__ ({ \
3994__builtin_ia32_fixupimmpd512_mask ((__v8df)( __A),\
3995 (__v8df)( __B),\
3996 (__v8di)( __C),\
3997 ( __imm),\
3998 (__mmask8) -1,\
3999 _MM_FROUND_CUR_DIRECTION);\
4000})
4001
4002#define _mm512_mask_fixupimm_pd( __A, __U, __B, __C, __imm) __extension__ ({ \
4003__builtin_ia32_fixupimmpd512_mask ((__v8df)( __A),\
4004 (__v8df)( __B),\
4005 (__v8di)( __C),\
4006 ( __imm),\
4007 (__mmask8)( __U),\
4008 _MM_FROUND_CUR_DIRECTION);\
4009})
4010
4011#define _mm512_maskz_fixupimm_round_pd( __U, __A, __B, __C, __imm, __R) __extension__ ({ \
4012__builtin_ia32_fixupimmpd512_maskz ((__v8df)( __A),\
4013 (__v8df)( __B),\
4014 (__v8di)( __C),\
4015 (__imm),\
4016 (__mmask8)( __U), (__R));\
4017})
4018
4019#define _mm512_maskz_fixupimm_pd( __U, __A, __B, __C, __imm) __extension__ ({ \
4020__builtin_ia32_fixupimmpd512_maskz ((__v8df)( __A),\
4021 (__v8df)( __B),\
4022 (__v8di)( __C),\
4023 ( __imm),\
4024 (__mmask8)( __U),\
4025 _MM_FROUND_CUR_DIRECTION);\
4026})
4027
4028#define _mm512_fixupimm_round_ps( __A, __B, __C, __imm, __R) __extension__ ({ \
4029__builtin_ia32_fixupimmps512_mask ((__v16sf)( __A),\
4030 (__v16sf)( __B),\
4031 (__v16si)( __C),\
4032 (__imm),\
4033 (__mmask16) -1, (__R));\
4034})
4035
4036#define _mm512_mask_fixupimm_round_ps( __A, __U, __B, __C, __imm, __R) __extension__ ({ \
4037__builtin_ia32_fixupimmps512_mask ((__v16sf)( __A),\
4038 (__v16sf)( __B),\
4039 (__v16si)( __C),\
4040 (__imm),\
4041 (__mmask16)( __U), (__R));\
4042})
4043
4044#define _mm512_fixupimm_ps( __A, __B, __C, __imm) __extension__ ({ \
4045__builtin_ia32_fixupimmps512_mask ((__v16sf)( __A),\
4046 (__v16sf)( __B),\
4047 (__v16si)( __C),\
4048 ( __imm),\
4049 (__mmask16) -1,\
4050 _MM_FROUND_CUR_DIRECTION);\
4051})
4052
4053#define _mm512_mask_fixupimm_ps( __A, __U, __B, __C, __imm) __extension__ ({ \
4054__builtin_ia32_fixupimmps512_mask ((__v16sf)( __A),\
4055 (__v16sf)( __B),\
4056 (__v16si)( __C),\
4057 ( __imm),\
4058 (__mmask16)( __U),\
4059 _MM_FROUND_CUR_DIRECTION);\
4060})
4061
4062#define _mm512_maskz_fixupimm_round_ps( __U, __A, __B, __C, __imm, __R) __extension__ ({ \
4063__builtin_ia32_fixupimmps512_maskz ((__v16sf)( __A),\
4064 (__v16sf)( __B),\
4065 (__v16si)( __C),\
4066 (__imm),\
4067 (__mmask16)( __U), (__R));\
4068})
4069
4070#define _mm512_maskz_fixupimm_ps( __U, __A, __B, __C, __imm) __extension__ ({ \
4071__builtin_ia32_fixupimmps512_maskz ((__v16sf)( __A),\
4072 (__v16sf)( __B),\
4073 (__v16si)( __C),\
4074 ( __imm),\
4075 (__mmask16)( __U),\
4076 _MM_FROUND_CUR_DIRECTION);\
4077})
4078
4079#define _mm_fixupimm_round_sd( __A, __B, __C, __imm, __R) __extension__ ({ \
4080__builtin_ia32_fixupimmsd_mask ((__v2df)( __A),\
4081 (__v2df)( __B),\
4082 (__v2di)( __C), __imm,\
4083 (__mmask8) -1, (__R));\
4084})
4085
4086#define _mm_mask_fixupimm_round_sd( __A, __U, __B, __C, __imm, __R) __extension__ ({ \
4087__builtin_ia32_fixupimmsd_mask ((__v2df)( __A),\
4088 (__v2df)( __B),\
4089 (__v2di)( __C), __imm,\
4090 (__mmask8)( __U), (__R));\
4091})
4092
4093#define _mm_fixupimm_sd( __A, __B, __C, __imm) __extension__ ({ \
4094__builtin_ia32_fixupimmsd_mask ((__v2df)( __A),\
4095 (__v2df)( __B),\
4096 (__v2di)( __C),( __imm),\
4097 (__mmask8) -1,\
4098 _MM_FROUND_CUR_DIRECTION);\
4099})
4100
4101#define _mm_mask_fixupimm_sd( __A, __U, __B, __C, __imm) __extension__ ({ \
4102__builtin_ia32_fixupimmsd_mask ((__v2df)( __A),\
4103 (__v2df)( __B),\
4104 (__v2di)( __C),( __imm),\
4105 (__mmask8)( __U),\
4106 _MM_FROUND_CUR_DIRECTION);\
4107})
4108
4109#define _mm_maskz_fixupimm_round_sd( __U, __A, __B, __C, __imm, __R) __extension__ ({ \
4110__builtin_ia32_fixupimmsd_maskz ((__v2df)( __A),\
4111 (__v2df)( __B),\
4112 (__v2di)( __C),\
4113 __imm,\
4114 (__mmask8)( __U), (__R));\
4115})
4116
4117#define _mm_maskz_fixupimm_sd( __U, __A, __B, __C, __imm) __extension__ ({ \
4118__builtin_ia32_fixupimmsd_maskz ((__v2df)( __A),\
4119 (__v2df)( __B),\
4120 (__v2di)( __C),\
4121 ( __imm),\
4122 (__mmask8)( __U),\
4123 _MM_FROUND_CUR_DIRECTION);\
4124})
4125
4126#define _mm_fixupimm_round_ss( __A, __B, __C, __imm, __R) __extension__ ({ \
4127__builtin_ia32_fixupimmss_mask ((__v4sf)( __A),\
4128 (__v4sf)( __B),\
4129 (__v4si)( __C), (__imm),\
4130 (__mmask8) -1, (__R));\
4131})
4132
4133#define _mm_mask_fixupimm_round_ss( __A, __U, __B, __C, __imm, __R) __extension__ ({ \
4134__builtin_ia32_fixupimmss_mask ((__v4sf)( __A),\
4135 (__v4sf)( __B),\
4136 (__v4si)( __C), (__imm),\
4137 (__mmask8)( __U), (__R));\
4138})
4139
4140#define _mm_fixupimm_ss( __A, __B, __C, __imm) __extension__ ({ \
4141__builtin_ia32_fixupimmss_mask ((__v4sf)( __A),\
4142 (__v4sf)( __B),\
4143 (__v4si)( __C),( __imm),\
4144 (__mmask8) -1,\
4145 _MM_FROUND_CUR_DIRECTION);\
4146})
4147
4148#define _mm_mask_fixupimm_ss( __A, __U, __B, __C, __imm) __extension__ ({ \
4149__builtin_ia32_fixupimmss_mask ((__v4sf)( __A),\
4150 (__v4sf)( __B),\
4151 (__v4si)( __C),( __imm),\
4152 (__mmask8)( __U),\
4153 _MM_FROUND_CUR_DIRECTION);\
4154})
4155
4156#define _mm_maskz_fixupimm_round_ss( __U, __A, __B, __C, __imm, __R) __extension__ ({ \
4157__builtin_ia32_fixupimmss_maskz ((__v4sf)( __A),\
4158 (__v4sf)( __B),\
4159 (__v4si)( __C), (__imm),\
4160 (__mmask8)( __U), (__R));\
4161})
4162
4163#define _mm_maskz_fixupimm_ss( __U, __A, __B, __C, __imm) __extension__ ({ \
4164__builtin_ia32_fixupimmss_maskz ((__v4sf)( __A),\
4165 (__v4sf)( __B),\
4166 (__v4si)( __C),( __imm),\
4167 (__mmask8)( __U),\
4168 _MM_FROUND_CUR_DIRECTION);\
4169})
4170
4171#define _mm_getexp_round_sd( __A, __B ,__R) __extension__ ({ \
4172__builtin_ia32_getexpsd128_round_mask ((__v2df)(__A),\
4173 (__v2df)( __B), (__v2df) _mm_setzero_pd(), (__mmask8) -1,\
4174 ( __R));\
4175})
4176
4177
4178static __inline__ __m128d __DEFAULT_FN_ATTRS
4179_mm_getexp_sd (__m128d __A, __m128d __B)
4180{
4181 return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
4182 (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
4183}
4184
Ben Murdoch61f157c2016-09-16 13:49:30 +01004185static __inline__ __m128d __DEFAULT_FN_ATTRS
4186_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
4187{
4188 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
4189 (__v2df) __B,
4190 (__v2df) __W,
4191 (__mmask8) __U,
4192 _MM_FROUND_CUR_DIRECTION);
4193}
4194
4195#define _mm_mask_getexp_round_sd( __W, __U, __A, __B, __R) __extension__ ({\
4196__builtin_ia32_getexpsd128_round_mask ((__v2df) __A,\
4197 (__v2df) __B,\
4198 (__v2df) __W,\
4199 (__mmask8) __U,\
4200 __R);\
4201})
4202
4203static __inline__ __m128d __DEFAULT_FN_ATTRS
4204_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
4205{
4206 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
4207 (__v2df) __B,
4208 (__v2df) _mm_setzero_pd (),
4209 (__mmask8) __U,
4210 _MM_FROUND_CUR_DIRECTION);
4211}
4212
4213#define _mm_maskz_getexp_round_sd( __U, __A, __B, __R) __extension__ ({\
4214__builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,\
4215 (__v2df) __B,\
4216 (__v2df) _mm_setzero_pd (),\
4217 (__mmask8) __U,\
4218 __R);\
4219})
4220
Ben Murdoch097c5b22016-05-18 11:27:45 +01004221#define _mm_getexp_round_ss( __A, __B, __R) __extension__ ({ \
4222__builtin_ia32_getexpss128_round_mask ((__v4sf)( __A),\
4223 (__v4sf)( __B), (__v4sf) _mm_setzero_ps(), (__mmask8) -1,\
4224 ( __R));\
4225})
4226
4227static __inline__ __m128 __DEFAULT_FN_ATTRS
4228_mm_getexp_ss (__m128 __A, __m128 __B)
4229{
4230 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
4231 (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
4232}
4233
Ben Murdoch61f157c2016-09-16 13:49:30 +01004234static __inline__ __m128d __DEFAULT_FN_ATTRS
4235_mm_mask_getexp_ss (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
4236{
4237 return (__m128d) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
4238 (__v4sf) __B,
4239 (__v4sf) __W,
4240 (__mmask8) __U,
4241 _MM_FROUND_CUR_DIRECTION);
4242}
4243
4244#define _mm_mask_getexp_round_ss( __W, __U, __A, __B, __R) __extension__ ({\
4245__builtin_ia32_getexpss128_round_mask ((__v4sf) __A,\
4246 (__v4sf) __B,\
4247 (__v4sf) __W,\
4248 (__mmask8) __U,\
4249 __R);\
4250})
4251
4252static __inline__ __m128d __DEFAULT_FN_ATTRS
4253_mm_maskz_getexp_ss (__mmask8 __U, __m128d __A, __m128d __B)
4254{
4255 return (__m128d) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
4256 (__v4sf) __B,
4257 (__v4sf) _mm_setzero_pd (),
4258 (__mmask8) __U,
4259 _MM_FROUND_CUR_DIRECTION);
4260}
4261
4262#define _mm_maskz_getexp_round_ss( __U, __A, __B, __R) __extension__ ({\
4263__builtin_ia32_getexpss128_round_mask ((__v4sf) __A,\
4264 (__v4sf) __B,\
4265 (__v4sf) _mm_setzero_ps (),\
4266 (__mmask8) __U,\
4267 __R);\
4268})
4269
Ben Murdoch097c5b22016-05-18 11:27:45 +01004270#define _mm_getmant_round_sd( __A, __B, __C, __D, __R) __extension__ ({ \
4271__builtin_ia32_getmantsd_round_mask ((__v2df)( __A),\
4272 (__v2df)( __B),\
4273 (( __D) << 2) |( __C), (__v2df) _mm_setzero_pd(), (__mmask8) -1,\
4274 ( __R));\
4275})
4276
4277#define _mm_getmant_sd( __A, __B, __C, __D) __extension__ ({ \
4278__builtin_ia32_getmantsd_round_mask ((__v2df)( __A),\
4279 (__v2df)( __B),\
4280 (( __D) << 2) |( __C), (__v2df) _mm_setzero_pd(), (__mmask8) -1,\
4281 _MM_FROUND_CUR_DIRECTION);\
4282})
4283
Ben Murdoch61f157c2016-09-16 13:49:30 +01004284#define _mm_mask_getmant_sd( __W, __U, __A, __B, __C, __D) __extension__ ({\
4285__builtin_ia32_getmantsd_round_mask ( (__v2df) __A,\
4286 (__v2df) __B,\
4287 (( __D) << 2) |( __C),\
4288 (__v2df) __W,\
4289 (__mmask8) __U,\
4290 _MM_FROUND_CUR_DIRECTION);\
4291})
4292
4293#define _mm_mask_getmant_round_sd( __W, __U, __A, __B, __C, __D, __R)({\
4294__builtin_ia32_getmantsd_round_mask ( (__v2df) __A,\
4295 (__v2df) __B,\
4296 (( __D) << 2) |( __C),\
4297 (__v2df) __W,\
4298 (__mmask8) __U,\
4299 __R);\
4300})
4301
4302#define _mm_maskz_getmant_sd( __U, __A, __B, __C, __D) __extension__ ({\
4303__builtin_ia32_getmantsd_round_mask ( (__v2df) __A,\
4304 (__v2df) __B,\
4305 (( __D) << 2) |( __C),\
4306 (__v2df) _mm_setzero_pd (),\
4307 (__mmask8) __U,\
4308 _MM_FROUND_CUR_DIRECTION);\
4309})
4310
4311#define _mm_maskz_getmant_round_sd( __U, __A, __B, __C, __D, __R) __extension__ ({\
4312__builtin_ia32_getmantsd_round_mask ( (__v2df) __A,\
4313 (__v2df) __B,\
4314 (( __D) << 2) |( __C),\
4315 (__v2df) _mm_setzero_pd (),\
4316 (__mmask8) __U,\
4317 __R);\
4318})
4319
Ben Murdoch097c5b22016-05-18 11:27:45 +01004320#define _mm_getmant_round_ss( __A, __B, __C, __D, __R) __extension__ ({ \
4321__builtin_ia32_getmantss_round_mask ((__v4sf)( __A),\
4322 (__v4sf)( __B),\
4323 ((__D) << 2) |( __C), (__v4sf) _mm_setzero_ps(), (__mmask8) -1,\
4324 ( __R));\
4325})
4326
4327#define _mm_getmant_ss(__A, __B, __C, __D) __extension__ ({ \
4328__builtin_ia32_getmantss_round_mask ((__v4sf)( __A),\
4329 (__v4sf)( __B),\
4330 ((__D) << 2) |( __C), (__v4sf) _mm_setzero_ps(), (__mmask8) -1,\
4331 _MM_FROUND_CUR_DIRECTION);\
4332})
4333
Ben Murdoch61f157c2016-09-16 13:49:30 +01004334#define _mm_mask_getmant_ss( __W, __U, __A, __B, __C, __D) __extension__ ({\
4335__builtin_ia32_getmantss_round_mask ((__v4sf) __A,\
4336 (__v4sf) __B,\
4337 (( __D) << 2) |( __C),\
4338 (__v4sf) __W,\
4339 (__mmask8) __U,\
4340 _MM_FROUND_CUR_DIRECTION);\
4341})
4342
4343#define _mm_mask_getmant_round_ss( __W, __U, __A, __B, __C, __D, __R)({\
4344__builtin_ia32_getmantss_round_mask ((__v4sf) __A,\
4345 (__v4sf) __B,\
4346 (( __D) << 2) |( __C),\
4347 (__v4sf) __W,\
4348 (__mmask8) __U,\
4349 __R);\
4350})
4351
4352#define _mm_maskz_getmant_ss( __U, __A, __B, __C, __D) __extension__ ({\
4353__builtin_ia32_getmantss_round_mask ((__v4sf) __A,\
4354 (__v4sf) __B,\
4355 (( __D) << 2) |( __C),\
4356 (__v4sf) _mm_setzero_pd (),\
4357 (__mmask8) __U,\
4358 _MM_FROUND_CUR_DIRECTION);\
4359})
4360
4361#define _mm_maskz_getmant_round_ss( __U, __A, __B, __C, __D, __R) __extension__ ({\
4362__builtin_ia32_getmantss_round_mask ((__v4sf) __A,\
4363 (__v4sf) __B,\
4364 (( __D) << 2) |( __C),\
4365 (__v4sf) _mm_setzero_ps (),\
4366 (__mmask8) __U,\
4367 __R);\
4368})
Ben Murdoch097c5b22016-05-18 11:27:45 +01004369
4370static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4371_mm512_kmov (__mmask16 __A)
4372{
4373 return __A;
4374}
4375
4376#define _mm_comi_round_sd(__A, __B, __P, __R) __extension__ ({\
4377__builtin_ia32_vcomisd ((__v2df) (__A), (__v2df) (__B), ( __P), ( __R));\
4378})
4379
4380#define _mm_comi_round_ss( __A, __B, __P, __R) __extension__ ({\
4381__builtin_ia32_vcomiss ((__v4sf) (__A), (__v4sf) (__B), ( __P), ( __R));\
4382})
4383
Ben Murdoch61f157c2016-09-16 13:49:30 +01004384static __inline__ __m512d __DEFAULT_FN_ATTRS
4385_mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4386{
4387 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
4388 (__v8df) __B,
4389 (__v8df) __W,
4390 (__mmask8) __U);
4391}
4392#define _mm_cvt_roundsd_si64( __A, __R) __extension__ ({ \
4393__builtin_ia32_vcvtsd2si64 ((__v2df)( __A),( __R));\
4394})
4395static __inline__ __m512i __DEFAULT_FN_ATTRS
4396_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
4397 __mmask16 __U, __m512i __B)
4398{
4399 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
4400 (__v16si) __I
4401 /* idx */ ,
4402 (__v16si) __B,
4403 (__mmask16) __U);
4404}
4405static __inline__ __m512i __DEFAULT_FN_ATTRS
4406_mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
4407{
4408 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
4409 (__v16si) __B,
4410 (__v16si)
4411 _mm512_setzero_si512 (),
4412 (__mmask16) -1);
4413}
4414
4415static __inline__ __m512i __DEFAULT_FN_ATTRS
4416_mm512_sll_epi32 (__m512i __A, __m128i __B)
4417{
4418 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
4419 (__v4si) __B,
4420 (__v16si)
4421 _mm512_setzero_si512 (),
4422 (__mmask16) -1);
4423}
4424
4425static __inline__ __m512i __DEFAULT_FN_ATTRS
4426_mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
4427{
4428 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
4429 (__v4si) __B,
4430 (__v16si) __W,
4431 (__mmask16) __U);
4432}
4433
4434static __inline__ __m512i __DEFAULT_FN_ATTRS
4435_mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
4436{
4437 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
4438 (__v4si) __B,
4439 (__v16si)
4440 _mm512_setzero_si512 (),
4441 (__mmask16) __U);
4442}
4443
4444static __inline__ __m512i __DEFAULT_FN_ATTRS
4445_mm512_sll_epi64 (__m512i __A, __m128i __B)
4446{
4447 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
4448 (__v2di) __B,
4449 (__v8di)
4450 _mm512_setzero_si512 (),
4451 (__mmask8) -1);
4452}
4453
4454static __inline__ __m512i __DEFAULT_FN_ATTRS
4455_mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
4456{
4457 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
4458 (__v2di) __B,
4459 (__v8di) __W,
4460 (__mmask8) __U);
4461}
4462
4463static __inline__ __m512i __DEFAULT_FN_ATTRS
4464_mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
4465{
4466 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
4467 (__v2di) __B,
4468 (__v8di)
4469 _mm512_setzero_si512 (),
4470 (__mmask8) __U);
4471}
4472
4473static __inline__ __m512i __DEFAULT_FN_ATTRS
4474_mm512_sllv_epi32 (__m512i __X, __m512i __Y)
4475{
4476 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
4477 (__v16si) __Y,
4478 (__v16si)
4479 _mm512_setzero_si512 (),
4480 (__mmask16) -1);
4481}
4482
4483static __inline__ __m512i __DEFAULT_FN_ATTRS
4484_mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
4485{
4486 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
4487 (__v16si) __Y,
4488 (__v16si) __W,
4489 (__mmask16) __U);
4490}
4491
4492static __inline__ __m512i __DEFAULT_FN_ATTRS
4493_mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
4494{
4495 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
4496 (__v16si) __Y,
4497 (__v16si)
4498 _mm512_setzero_si512 (),
4499 (__mmask16) __U);
4500}
4501
4502static __inline__ __m512i __DEFAULT_FN_ATTRS
4503_mm512_sllv_epi64 (__m512i __X, __m512i __Y)
4504{
4505 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
4506 (__v8di) __Y,
4507 (__v8di)
4508 _mm512_undefined_pd (),
4509 (__mmask8) -1);
4510}
4511
4512static __inline__ __m512i __DEFAULT_FN_ATTRS
4513_mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
4514{
4515 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
4516 (__v8di) __Y,
4517 (__v8di) __W,
4518 (__mmask8) __U);
4519}
4520
4521static __inline__ __m512i __DEFAULT_FN_ATTRS
4522_mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
4523{
4524 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
4525 (__v8di) __Y,
4526 (__v8di)
4527 _mm512_setzero_si512 (),
4528 (__mmask8) __U);
4529}
4530
4531static __inline__ __m512i __DEFAULT_FN_ATTRS
4532_mm512_sra_epi32 (__m512i __A, __m128i __B)
4533{
4534 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
4535 (__v4si) __B,
4536 (__v16si)
4537 _mm512_setzero_si512 (),
4538 (__mmask16) -1);
4539}
4540
4541static __inline__ __m512i __DEFAULT_FN_ATTRS
4542_mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
4543{
4544 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
4545 (__v4si) __B,
4546 (__v16si) __W,
4547 (__mmask16) __U);
4548}
4549
4550static __inline__ __m512i __DEFAULT_FN_ATTRS
4551_mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
4552{
4553 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
4554 (__v4si) __B,
4555 (__v16si)
4556 _mm512_setzero_si512 (),
4557 (__mmask16) __U);
4558}
4559
4560static __inline__ __m512i __DEFAULT_FN_ATTRS
4561_mm512_sra_epi64 (__m512i __A, __m128i __B)
4562{
4563 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
4564 (__v2di) __B,
4565 (__v8di)
4566 _mm512_setzero_si512 (),
4567 (__mmask8) -1);
4568}
4569
4570static __inline__ __m512i __DEFAULT_FN_ATTRS
4571_mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
4572{
4573 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
4574 (__v2di) __B,
4575 (__v8di) __W,
4576 (__mmask8) __U);
4577}
4578
4579static __inline__ __m512i __DEFAULT_FN_ATTRS
4580_mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
4581{
4582 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
4583 (__v2di) __B,
4584 (__v8di)
4585 _mm512_setzero_si512 (),
4586 (__mmask8) __U);
4587}
4588
4589static __inline__ __m512i __DEFAULT_FN_ATTRS
4590_mm512_srav_epi32 (__m512i __X, __m512i __Y)
4591{
4592 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
4593 (__v16si) __Y,
4594 (__v16si)
4595 _mm512_setzero_si512 (),
4596 (__mmask16) -1);
4597}
4598
4599static __inline__ __m512i __DEFAULT_FN_ATTRS
4600_mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
4601{
4602 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
4603 (__v16si) __Y,
4604 (__v16si) __W,
4605 (__mmask16) __U);
4606}
4607
4608static __inline__ __m512i __DEFAULT_FN_ATTRS
4609_mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
4610{
4611 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
4612 (__v16si) __Y,
4613 (__v16si)
4614 _mm512_setzero_si512 (),
4615 (__mmask16) __U);
4616}
4617
4618static __inline__ __m512i __DEFAULT_FN_ATTRS
4619_mm512_srav_epi64 (__m512i __X, __m512i __Y)
4620{
4621 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
4622 (__v8di) __Y,
4623 (__v8di)
4624 _mm512_setzero_si512 (),
4625 (__mmask8) -1);
4626}
4627
4628static __inline__ __m512i __DEFAULT_FN_ATTRS
4629_mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
4630{
4631 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
4632 (__v8di) __Y,
4633 (__v8di) __W,
4634 (__mmask8) __U);
4635}
4636
4637static __inline__ __m512i __DEFAULT_FN_ATTRS
4638_mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
4639{
4640 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
4641 (__v8di) __Y,
4642 (__v8di)
4643 _mm512_setzero_si512 (),
4644 (__mmask8) __U);
4645}
4646
4647static __inline__ __m512i __DEFAULT_FN_ATTRS
4648_mm512_srl_epi32 (__m512i __A, __m128i __B)
4649{
4650 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
4651 (__v4si) __B,
4652 (__v16si)
4653 _mm512_setzero_si512 (),
4654 (__mmask16) -1);
4655}
4656
4657static __inline__ __m512i __DEFAULT_FN_ATTRS
4658_mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
4659{
4660 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
4661 (__v4si) __B,
4662 (__v16si) __W,
4663 (__mmask16) __U);
4664}
4665
4666static __inline__ __m512i __DEFAULT_FN_ATTRS
4667_mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
4668{
4669 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
4670 (__v4si) __B,
4671 (__v16si)
4672 _mm512_setzero_si512 (),
4673 (__mmask16) __U);
4674}
4675
4676static __inline__ __m512i __DEFAULT_FN_ATTRS
4677_mm512_srl_epi64 (__m512i __A, __m128i __B)
4678{
4679 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
4680 (__v2di) __B,
4681 (__v8di)
4682 _mm512_setzero_si512 (),
4683 (__mmask8) -1);
4684}
4685
4686static __inline__ __m512i __DEFAULT_FN_ATTRS
4687_mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
4688{
4689 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
4690 (__v2di) __B,
4691 (__v8di) __W,
4692 (__mmask8) __U);
4693}
4694
4695static __inline__ __m512i __DEFAULT_FN_ATTRS
4696_mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
4697{
4698 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
4699 (__v2di) __B,
4700 (__v8di)
4701 _mm512_setzero_si512 (),
4702 (__mmask8) __U);
4703}
4704
4705static __inline__ __m512i __DEFAULT_FN_ATTRS
4706_mm512_srlv_epi32 (__m512i __X, __m512i __Y)
4707{
4708 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
4709 (__v16si) __Y,
4710 (__v16si)
4711 _mm512_setzero_si512 (),
4712 (__mmask16) -1);
4713}
4714
4715static __inline__ __m512i __DEFAULT_FN_ATTRS
4716_mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
4717{
4718 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
4719 (__v16si) __Y,
4720 (__v16si) __W,
4721 (__mmask16) __U);
4722}
4723
4724static __inline__ __m512i __DEFAULT_FN_ATTRS
4725_mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
4726{
4727 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
4728 (__v16si) __Y,
4729 (__v16si)
4730 _mm512_setzero_si512 (),
4731 (__mmask16) __U);
4732}
4733
4734static __inline__ __m512i __DEFAULT_FN_ATTRS
4735_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
4736{
4737 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
4738 (__v8di) __Y,
4739 (__v8di)
4740 _mm512_setzero_si512 (),
4741 (__mmask8) -1);
4742}
4743
4744static __inline__ __m512i __DEFAULT_FN_ATTRS
4745_mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
4746{
4747 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
4748 (__v8di) __Y,
4749 (__v8di) __W,
4750 (__mmask8) __U);
4751}
4752
4753static __inline__ __m512i __DEFAULT_FN_ATTRS
4754_mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
4755{
4756 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
4757 (__v8di) __Y,
4758 (__v8di)
4759 _mm512_setzero_si512 (),
4760 (__mmask8) __U);
4761}
4762
4763#define _mm512_ternarylogic_epi32( __A, __B, __C, imm) __extension__ ({ \
4764__builtin_ia32_pternlogd512_mask ((__v16si)( __A),\
4765 (__v16si)( __B),\
4766 (__v16si)( __C),\
4767 ( imm), (__mmask16) -1);\
4768})
4769
4770#define _mm512_mask_ternarylogic_epi32( __A, __U, __B, __C, imm) __extension__ ({ \
4771__builtin_ia32_pternlogd512_mask ((__v16si)( __A),\
4772 (__v16si)( __B),\
4773 (__v16si)( __C),\
4774 ( imm), (__mmask16)( __U));\
4775})
4776
4777#define _mm512_maskz_ternarylogic_epi32( __U, __A, __B, __C, imm) __extension__ ({ \
4778__builtin_ia32_pternlogd512_maskz ((__v16si)( __A),\
4779 (__v16si)( __B),\
4780 (__v16si)( __C),\
4781 ( imm), (__mmask16)( __U));\
4782})
4783
4784#define _mm512_ternarylogic_epi64( __A, __B, __C, imm) __extension__ ({ \
4785__builtin_ia32_pternlogq512_mask ((__v8di)( __A),\
4786 (__v8di)( __B),\
4787 (__v8di)( __C),( imm),\
4788 (__mmask8) -1);\
4789})
4790
4791#define _mm512_mask_ternarylogic_epi64( __A, __U, __B, __C, imm) __extension__ ({ \
4792__builtin_ia32_pternlogq512_mask ((__v8di)( __A),\
4793 (__v8di)( __B),\
4794 (__v8di)( __C),( imm),\
4795 (__mmask8)( __U));\
4796})
4797
4798#define _mm512_maskz_ternarylogic_epi64( __U, __A, __B, __C, imm) __extension__ ({ \
4799__builtin_ia32_pternlogq512_maskz ((__v8di)( __A),\
4800 (__v8di)( __B),\
4801 (__v8di)( __C),\
4802 ( imm), (__mmask8)( __U));\
4803})
4804
4805static __inline__ __m512d __DEFAULT_FN_ATTRS
4806_mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
4807{
4808 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
4809 (__v8df) __B,
4810 (__v8df)
4811 _mm512_setzero_pd (),
4812 (__mmask8) __U);
4813}
4814
4815static __inline__ __m512 __DEFAULT_FN_ATTRS
4816_mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4817{
4818 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
4819 (__v16sf) __B,
4820 (__v16sf) __W,
4821 (__mmask16) __U);
4822}
4823
4824static __inline__ __m512 __DEFAULT_FN_ATTRS
4825_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
4826{
4827 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
4828 (__v16sf) __B,
4829 (__v16sf)
4830 _mm512_setzero_ps (),
4831 (__mmask16) __U);
4832}
4833
4834static __inline__ __m512d __DEFAULT_FN_ATTRS
4835_mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4836{
4837 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
4838 (__v8df) __B,
4839 (__v8df) __W,
4840 (__mmask8) __U);
4841}
4842
4843static __inline__ __m512d __DEFAULT_FN_ATTRS
4844_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
4845{
4846 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
4847 (__v8df) __B,
4848 (__v8df)
4849 _mm512_setzero_pd (),
4850 (__mmask8) __U);
4851}
4852
4853static __inline__ __m512 __DEFAULT_FN_ATTRS
4854_mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4855{
4856 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
4857 (__v16sf) __B,
4858 (__v16sf) __W,
4859 (__mmask16) __U);
4860}
4861
4862static __inline__ __m512 __DEFAULT_FN_ATTRS
4863_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
4864{
4865 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
4866 (__v16sf) __B,
4867 (__v16sf)
4868 _mm512_setzero_ps (),
4869 (__mmask16) __U);
4870}
4871
4872#define _mm_cvt_roundsd_i64( __A, __R) __extension__ ({ \
4873__builtin_ia32_vcvtsd2si64 ((__v2df)( __A),( __R));\
4874})
4875
4876#define _mm_cvt_roundsd_si32( __A, __R) __extension__ ({ \
4877__builtin_ia32_vcvtsd2si32 ((__v2df)( __A),( __R));\
4878})
4879
4880#define _mm_cvt_roundsd_i32( __A, __R) __extension__ ({ \
4881__builtin_ia32_vcvtsd2si32 ((__v2df)( __A),( __R));\
4882})
4883
4884#define _mm_cvt_roundsd_u32( __A, __R) __extension__ ({ \
4885__builtin_ia32_vcvtsd2usi32 ((__v2df)( __A),( __R));\
4886})
4887
4888static __inline__ unsigned __DEFAULT_FN_ATTRS
4889_mm_cvtsd_u32 (__m128d __A)
4890{
4891 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
4892 _MM_FROUND_CUR_DIRECTION);
4893}
4894
4895#define _mm_cvt_roundsd_u64( __A, __R) __extension__ ({ \
4896__builtin_ia32_vcvtsd2usi64 ((__v2df)( __A),( __R));\
4897})
4898
4899static __inline__ unsigned long long __DEFAULT_FN_ATTRS
4900_mm_cvtsd_u64 (__m128d __A)
4901{
4902 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
4903 __A,
4904 _MM_FROUND_CUR_DIRECTION);
4905}
4906
4907#define _mm_cvt_roundss_si32( __A, __R) __extension__ ({ \
4908__builtin_ia32_vcvtss2si32 ((__v4sf)( __A),( __R));\
4909})
4910
4911#define _mm_cvt_roundss_i32( __A, __R) __extension__ ({ \
4912__builtin_ia32_vcvtss2si32 ((__v4sf)( __A),( __R));\
4913})
4914
4915#define _mm_cvt_roundss_si64( __A, __R) __extension__ ({ \
4916__builtin_ia32_vcvtss2si64 ((__v4sf)( __A),( __R));\
4917})
4918
4919#define _mm_cvt_roundss_i64( __A, __R) __extension__ ({ \
4920__builtin_ia32_vcvtss2si64 ((__v4sf)( __A),( __R));\
4921})
4922
4923#define _mm_cvt_roundss_u32( __A, __R) __extension__ ({ \
4924__builtin_ia32_vcvtss2usi32 ((__v4sf)( __A),( __R));\
4925})
4926
4927static __inline__ unsigned __DEFAULT_FN_ATTRS
4928_mm_cvtss_u32 (__m128 __A)
4929{
4930 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
4931 _MM_FROUND_CUR_DIRECTION);
4932}
4933
4934#define _mm_cvt_roundss_u64( __A, __R) __extension__ ({ \
4935__builtin_ia32_vcvtss2usi64 ((__v4sf)( __A),( __R));\
4936})
4937
4938static __inline__ unsigned long long __DEFAULT_FN_ATTRS
4939_mm_cvtss_u64 (__m128 __A)
4940{
4941 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
4942 __A,
4943 _MM_FROUND_CUR_DIRECTION);
4944}
4945
4946#define _mm_cvtt_roundsd_i32( __A, __R) __extension__ ({ \
4947__builtin_ia32_vcvttsd2si32 ((__v2df)( __A),( __R));\
4948})
4949
4950#define _mm_cvtt_roundsd_si32( __A, __R) __extension__ ({ \
4951__builtin_ia32_vcvttsd2si32 ((__v2df)( __A),( __R));\
4952})
4953
4954static __inline__ int __DEFAULT_FN_ATTRS
4955_mm_cvttsd_i32 (__m128d __A)
4956{
4957 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
4958 _MM_FROUND_CUR_DIRECTION);
4959}
4960
4961#define _mm_cvtt_roundsd_si64( __A, __R) __extension__ ({ \
4962__builtin_ia32_vcvttsd2si64 ((__v2df)( __A),( __R));\
4963})
4964
4965#define _mm_cvtt_roundsd_i64( __A, __R) __extension__ ({ \
4966__builtin_ia32_vcvttsd2si64 ((__v2df)( __A),( __R));\
4967})
4968
4969static __inline__ long long __DEFAULT_FN_ATTRS
4970_mm_cvttsd_i64 (__m128d __A)
4971{
4972 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
4973 _MM_FROUND_CUR_DIRECTION);
4974}
4975
4976#define _mm_cvtt_roundsd_u32( __A, __R) __extension__ ({ \
4977__builtin_ia32_vcvttsd2usi32 ((__v2df)( __A),( __R));\
4978})
4979
4980static __inline__ unsigned __DEFAULT_FN_ATTRS
4981_mm_cvttsd_u32 (__m128d __A)
4982{
4983 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
4984 _MM_FROUND_CUR_DIRECTION);
4985}
4986
4987#define _mm_cvtt_roundsd_u64( __A, __R) __extension__ ({ \
4988__builtin_ia32_vcvttsd2usi64 ((__v2df)( __A),( __R));\
4989})
4990
4991static __inline__ unsigned long long __DEFAULT_FN_ATTRS
4992_mm_cvttsd_u64 (__m128d __A)
4993{
4994 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
4995 __A,
4996 _MM_FROUND_CUR_DIRECTION);
4997}
4998
4999#define _mm_cvtt_roundss_i32( __A, __R) __extension__ ({ \
5000__builtin_ia32_vcvttss2si32 ((__v4sf)( __A),( __R));\
5001})
5002
5003#define _mm_cvtt_roundss_si32( __A, __R) __extension__ ({ \
5004__builtin_ia32_vcvttss2si32 ((__v4sf)( __A),( __R));\
5005})
5006
5007static __inline__ int __DEFAULT_FN_ATTRS
5008_mm_cvttss_i32 (__m128 __A)
5009{
5010 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
5011 _MM_FROUND_CUR_DIRECTION);
5012}
5013
5014#define _mm_cvtt_roundss_i64( __A, __R) __extension__ ({ \
5015__builtin_ia32_vcvttss2si64 ((__v4sf)( __A),( __R));\
5016})
5017
5018#define _mm_cvtt_roundss_si64( __A, __R) __extension__ ({ \
5019__builtin_ia32_vcvttss2si64 ((__v4sf)( __A),( __R));\
5020})
5021
5022static __inline__ long long __DEFAULT_FN_ATTRS
5023_mm_cvttss_i64 (__m128 __A)
5024{
5025 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
5026 _MM_FROUND_CUR_DIRECTION);
5027}
5028
5029#define _mm_cvtt_roundss_u32( __A, __R) __extension__ ({ \
5030__builtin_ia32_vcvttss2usi32 ((__v4sf)( __A),( __R));\
5031})
5032
5033static __inline__ unsigned __DEFAULT_FN_ATTRS
5034_mm_cvttss_u32 (__m128 __A)
5035{
5036 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
5037 _MM_FROUND_CUR_DIRECTION);
5038}
5039
5040#define _mm_cvtt_roundss_u64( __A, __R) __extension__ ({ \
5041__builtin_ia32_vcvttss2usi64 ((__v4sf)( __A),( __R));\
5042})
5043
5044static __inline__ unsigned long long __DEFAULT_FN_ATTRS
5045_mm_cvttss_u64 (__m128 __A)
5046{
5047 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
5048 __A,
5049 _MM_FROUND_CUR_DIRECTION);
5050}
5051
5052static __inline__ __m512d __DEFAULT_FN_ATTRS
5053_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
5054 __m512d __B)
5055{
5056 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
5057 (__v8di) __I
5058 /* idx */ ,
5059 (__v8df) __B,
5060 (__mmask8) __U);
5061}
5062
5063static __inline__ __m512 __DEFAULT_FN_ATTRS
5064_mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
5065 __m512 __B)
5066{
5067 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
5068 (__v16si) __I
5069 /* idx */ ,
5070 (__v16sf) __B,
5071 (__mmask16) __U);
5072}
5073
5074static __inline__ __m512i __DEFAULT_FN_ATTRS
5075_mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
5076 __mmask8 __U, __m512i __B)
5077{
5078 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
5079 (__v8di) __I
5080 /* idx */ ,
5081 (__v8di) __B,
5082 (__mmask8) __U);
5083}
5084
5085#define _mm512_permute_pd( __X, __C) __extension__ ({ \
5086__builtin_ia32_vpermilpd512_mask ((__v8df)( __X),( __C),\
5087 (__v8df)\
5088 _mm512_undefined_pd (),\
5089 (__mmask8) -1);\
5090})
5091
5092#define _mm512_mask_permute_pd( __W, __U, __X, __C) __extension__ ({ \
5093__builtin_ia32_vpermilpd512_mask ((__v8df)( __X),( __C),\
5094 (__v8df)( __W),\
5095 (__mmask8)( __U));\
5096})
5097
5098#define _mm512_maskz_permute_pd( __U, __X, __C) __extension__ ({ \
5099__builtin_ia32_vpermilpd512_mask ((__v8df)( __X),( __C),\
5100 (__v8df)\
5101 _mm512_setzero_pd (),\
5102 (__mmask8)( __U));\
5103})
5104
5105#define _mm512_permute_ps( __X, __C) __extension__ ({ \
5106__builtin_ia32_vpermilps512_mask ((__v16sf)( __X),( __C),\
5107 (__v16sf)\
5108 _mm512_undefined_ps (),\
5109 (__mmask16) -1);\
5110})
5111
5112#define _mm512_mask_permute_ps( __W, __U, __X, __C) __extension__ ({ \
5113__builtin_ia32_vpermilps512_mask ((__v16sf)( __X),( __C),\
5114 (__v16sf)( __W),\
5115 (__mmask16)( __U));\
5116})
5117
5118#define _mm512_maskz_permute_ps( __U, __X, __C) __extension__ ({ \
5119__builtin_ia32_vpermilps512_mask ((__v16sf)( __X),( __C),\
5120 (__v16sf)\
5121 _mm512_setzero_ps (),\
5122 (__mmask16)( __U));\
5123})
5124
5125static __inline__ __m512d __DEFAULT_FN_ATTRS
5126_mm512_permutevar_pd (__m512d __A, __m512i __C)
5127{
5128 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5129 (__v8di) __C,
5130 (__v8df)
5131 _mm512_undefined_pd (),
5132 (__mmask8) -1);
5133}
5134
5135static __inline__ __m512d __DEFAULT_FN_ATTRS
5136_mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
5137{
5138 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5139 (__v8di) __C,
5140 (__v8df) __W,
5141 (__mmask8) __U);
5142}
5143
5144static __inline__ __m512d __DEFAULT_FN_ATTRS
5145_mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
5146{
5147 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5148 (__v8di) __C,
5149 (__v8df)
5150 _mm512_setzero_pd (),
5151 (__mmask8) __U);
5152}
5153
5154static __inline__ __m512 __DEFAULT_FN_ATTRS
5155_mm512_permutevar_ps (__m512 __A, __m512i __C)
5156{
5157 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5158 (__v16si) __C,
5159 (__v16sf)
5160 _mm512_undefined_ps (),
5161 (__mmask16) -1);
5162}
5163
5164static __inline__ __m512 __DEFAULT_FN_ATTRS
5165_mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
5166{
5167 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5168 (__v16si) __C,
5169 (__v16sf) __W,
5170 (__mmask16) __U);
5171}
5172
5173static __inline__ __m512 __DEFAULT_FN_ATTRS
5174_mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
5175{
5176 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5177 (__v16si) __C,
5178 (__v16sf)
5179 _mm512_setzero_ps (),
5180 (__mmask16) __U);
5181}
5182
5183static __inline__ __m512i __DEFAULT_FN_ATTRS
5184_mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
5185 __m512i __I, __m512i __B)
5186{
5187 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
5188 /* idx */ ,
5189 (__v16si) __A,
5190 (__v16si) __B,
5191 (__mmask16) __U);
5192}
5193
5194static __inline__ __m512d __DEFAULT_FN_ATTRS
5195_mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
5196 __m512d __B)
5197{
5198 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
5199 /* idx */ ,
5200 (__v8df) __A,
5201 (__v8df) __B,
5202 (__mmask8) __U);
5203}
5204
5205static __inline__ __m512 __DEFAULT_FN_ATTRS
5206_mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
5207 __m512 __B)
5208{
5209 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
5210 /* idx */ ,
5211 (__v16sf) __A,
5212 (__v16sf) __B,
5213 (__mmask16) __U);
5214}
5215
5216static __inline__ __m512i __DEFAULT_FN_ATTRS
5217_mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
5218 __m512i __I, __m512i __B)
5219{
5220 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
5221 /* idx */ ,
5222 (__v8di) __A,
5223 (__v8di) __B,
5224 (__mmask8) __U);
5225}
5226
5227static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5228_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
5229{
5230 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
5231 (__v16si) __B,
5232 (__mmask16) -1);
5233}
5234
5235static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5236_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
5237{
5238 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
5239 (__v16si) __B, __U);
5240}
5241
5242static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5243_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
5244{
5245 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
5246 (__v8di) __B,
5247 (__mmask8) -1);
5248}
5249
5250static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5251_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
5252{
5253 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
5254 (__v8di) __B, __U);
5255}
5256
5257#define _mm512_cvtt_roundpd_epu32( __A, __R) __extension__ ({ \
5258__builtin_ia32_cvttpd2udq512_mask ((__v8df)( __A),\
5259 (__v8si)\
5260 _mm256_undefined_si256 (),\
5261 (__mmask8) -1,( __R));\
5262})
5263
5264#define _mm512_mask_cvtt_roundpd_epu32( __W, __U, __A, __R) __extension__ ({ \
5265__builtin_ia32_cvttpd2udq512_mask ((__v8df)( __A),\
5266 (__v8si)( __W),\
5267 (__mmask8)( __U),( __R));\
5268})
5269
5270#define _mm512_maskz_cvtt_roundpd_epu32( __U, __A, __R) __extension__ ({ \
5271__builtin_ia32_cvttpd2udq512_mask ((__v8df)( __A),\
5272 (__v8si)\
5273 _mm256_setzero_si256 (),\
5274 (__mmask8)( __U),( __R));\
5275})
5276
5277static __inline__ __m256i __DEFAULT_FN_ATTRS
5278_mm512_cvttpd_epu32 (__m512d __A)
5279{
5280 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
5281 (__v8si)
5282 _mm256_undefined_si256 (),
5283 (__mmask8) -1,
5284 _MM_FROUND_CUR_DIRECTION);
5285}
5286
5287static __inline__ __m256i __DEFAULT_FN_ATTRS
5288_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
5289{
5290 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
5291 (__v8si) __W,
5292 (__mmask8) __U,
5293 _MM_FROUND_CUR_DIRECTION);
5294}
5295
5296static __inline__ __m256i __DEFAULT_FN_ATTRS
5297_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
5298{
5299 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
5300 (__v8si)
5301 _mm256_setzero_si256 (),
5302 (__mmask8) __U,
5303 _MM_FROUND_CUR_DIRECTION);
5304}
5305
5306static __inline__ __m512i __DEFAULT_FN_ATTRS
5307_mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
5308 __m512i __B)
5309{
5310 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
5311 (__v16si) __B,
5312 (__v16si) __W,
5313 (__mmask16) __U);
5314}
5315
5316static __inline__ __m512i __DEFAULT_FN_ATTRS
5317_mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5318{
5319 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
5320 (__v16si) __B,
5321 (__v16si)
5322 _mm512_setzero_si512 (),
5323 (__mmask16) __U);
5324}
5325
5326static __inline__ __m512i __DEFAULT_FN_ATTRS
5327_mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
5328{
5329 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
5330 (__v8di) __B,
5331 (__v8di)
5332 _mm512_setzero_si512 (),
5333 (__mmask8) -1);
5334}
5335
5336static __inline__ __m512i __DEFAULT_FN_ATTRS
5337_mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5338{
5339 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
5340 (__v8di) __B,
5341 (__v8di) __W,
5342 (__mmask8) __U);
5343}
5344
5345static __inline__ __m512i __DEFAULT_FN_ATTRS
5346_mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5347{
5348 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
5349 (__v8di) __B,
5350 (__v8di)
5351 _mm512_setzero_si512 (),
5352 (__mmask8) __U);
5353}
5354
5355static __inline__ __m512i __DEFAULT_FN_ATTRS
5356_mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
5357{
5358 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
5359 (__v16si) __B,
5360 (__v16si)
5361 _mm512_setzero_si512 (),
5362 (__mmask16) -1);
5363}
5364
5365static __inline__ __m512i __DEFAULT_FN_ATTRS
5366_mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
5367 __m512i __B)
5368{
5369 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
5370 (__v16si) __B,
5371 (__v16si) __W,
5372 (__mmask16) __U);
5373}
5374
5375static __inline__ __m512i __DEFAULT_FN_ATTRS
5376_mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5377{
5378 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
5379 (__v16si) __B,
5380 (__v16si)
5381 _mm512_setzero_si512 (),
5382 (__mmask16) __U);
5383}
5384
5385static __inline__ __m512i __DEFAULT_FN_ATTRS
5386_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
5387{
5388 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
5389 (__v8di) __B,
5390 (__v8di)
5391 _mm512_setzero_si512 (),
5392 (__mmask8) -1);
5393}
5394
5395static __inline__ __m512i __DEFAULT_FN_ATTRS
5396_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5397{
5398 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
5399 (__v8di) __B,
5400 (__v8di) __W,
5401 (__mmask8) __U);
5402}
5403
5404static __inline__ __m512i __DEFAULT_FN_ATTRS
5405_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5406{
5407 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
5408 (__v8di) __B,
5409 (__v8di)
5410 _mm512_setzero_si512 (),
5411 (__mmask8) __U);
5412}
5413
5414#define _mm_roundscale_round_sd( __A, __B, __imm, __R) __extension__ ({ \
5415__builtin_ia32_rndscalesd_round_mask ((__v2df)( __A),\
5416 (__v2df)( __B), (__v2df) _mm_setzero_pd(),\
5417 (__mmask8) -1,( __imm),( __R));\
5418})
5419
5420#define _mm_roundscale_sd( __A, __B, __imm) __extension__ ({ \
5421__builtin_ia32_rndscalesd_round_mask ((__v2df)( __A),\
5422 (__v2df)( __B), (__v2df) _mm_setzero_pd(),\
5423 (__mmask8) -1, ( __imm),\
5424 _MM_FROUND_CUR_DIRECTION);\
5425})
5426
5427#define _mm_mask_roundscale_sd( __W, __U, __A, __B, __imm) __extension__ ({ \
5428__builtin_ia32_rndscalesd_round_mask ( (__v2df)( __A),\
5429 (__v2df)( __B),\
5430 (__v2df)( __W),\
5431 (__mmask8)( __U),\
5432 (__imm),\
5433 _MM_FROUND_CUR_DIRECTION);\
5434})
5435
5436#define _mm_mask_roundscale_round_sd( __W, __U, __A, __B, __I, __R) __extension__ ({ \
5437__builtin_ia32_rndscalesd_round_mask ( (__v2df)( __A),\
5438 (__v2df)( __B),\
5439 (__v2df)( __W),\
5440 (__mmask8)( __U),\
5441 __I,\
5442 __R);\
5443})
5444
5445#define _mm_maskz_roundscale_sd( __U, __A, __B, __I) __extension__ ({ \
5446__builtin_ia32_rndscalesd_round_mask ( (__v2df)( __A),\
5447 (__v2df)( __B),\
5448 (__v2df) _mm_setzero_pd (),\
5449 (__mmask8)( __U),\
5450 __I,\
5451 _MM_FROUND_CUR_DIRECTION);\
5452})
5453
5454#define _mm_maskz_roundscale_round_sd( __U, __A, __B, __I, __R) __extension__ ({ \
5455__builtin_ia32_rndscalesd_round_mask ( (__v2df)( __A),\
5456 (__v2df)( __B),\
5457 (__v2df) _mm_setzero_pd (),\
5458 (__mmask8)( __U),\
5459 __I,\
5460 __R);\
5461})
5462
5463#define _mm_roundscale_round_ss( __A, __B, __imm, __R) __extension__ ({ \
5464__builtin_ia32_rndscaless_round_mask ((__v4sf)( __A),\
5465 (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),\
5466 (__mmask8) -1, __imm, __R);\
5467})
5468
5469#define _mm_roundscale_ss( __A, __B, __imm) __extension__ ({ \
5470__builtin_ia32_rndscaless_round_mask ((__v4sf)( __A),\
5471 (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),\
5472 (__mmask8) -1, ( __imm),\
5473 _MM_FROUND_CUR_DIRECTION);\
5474})
5475
5476#define _mm_mask_roundscale_ss( __W, __U, __A, __B, __I) __extension__ ({ \
5477__builtin_ia32_rndscaless_round_mask ( (__v4sf) ( __A),\
5478 (__v4sf)( __B),\
5479 (__v4sf)( __W),\
5480 (__mmask8)( __U),\
5481 __I,\
5482 _MM_FROUND_CUR_DIRECTION);\
5483})
5484
5485#define _mm_mask_roundscale_round_ss( __W, __U, __A, __B, __I, __R) __extension__ ({ \
5486__builtin_ia32_rndscaless_round_mask ( (__v4sf)( __A),\
5487 (__v4sf)( __B),\
5488 (__v4sf)( __W),\
5489 (__mmask8)( __U),\
5490 __I,\
5491 __R);\
5492})
5493
5494#define _mm_maskz_roundscale_ss( __U, __A, __B, __I) __extension__ ({ \
5495__builtin_ia32_rndscaless_round_mask ( (__v4sf)( __A),\
5496 (__v4sf)( __B),\
5497 (__v4sf) _mm_setzero_ps (),\
5498 (__mmask8)( __U),\
5499 __I,\
5500 _MM_FROUND_CUR_DIRECTION);\
5501})
5502
5503#define _mm_maskz_roundscale_round_ss( __U, __A, __B, __I, __R) __extension__ ({ \
5504__builtin_ia32_rndscaless_round_mask ( (__v4sf)( __A),\
5505 (__v4sf)( __B),\
5506 (__v4sf) _mm_setzero_ps (),\
5507 (__mmask8)( __U),\
5508 __I,\
5509 __R);\
5510})
5511
5512#define _mm512_scalef_round_pd( __A, __B, __R) __extension__ ({ \
5513__builtin_ia32_scalefpd512_mask ((__v8df)( __A),\
5514 (__v8df)( __B),\
5515 (__v8df)\
5516 _mm512_undefined_pd (),\
5517 (__mmask8) -1,( __R));\
5518})
5519
5520#define _mm512_mask_scalef_round_pd( __W, __U, __A, __B, __R) __extension__ ({ \
5521__builtin_ia32_scalefpd512_mask ((__v8df)( __A),\
5522 (__v8df)( __B),\
5523 (__v8df)( __W),\
5524 (__mmask8)( __U),( __R));\
5525})
5526
5527#define _mm512_maskz_scalef_round_pd( __U, __A, __B, __R) __extension__ ({ \
5528__builtin_ia32_scalefpd512_mask ((__v8df)( __A),\
5529 (__v8df)( __B),\
5530 (__v8df)\
5531 _mm512_setzero_pd (),\
5532 (__mmask8)( __U),( __R));\
5533})
5534
5535static __inline__ __m512d __DEFAULT_FN_ATTRS
5536_mm512_scalef_pd (__m512d __A, __m512d __B)
5537{
5538 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
5539 (__v8df) __B,
5540 (__v8df)
5541 _mm512_undefined_pd (),
5542 (__mmask8) -1,
5543 _MM_FROUND_CUR_DIRECTION);
5544}
5545
5546static __inline__ __m512d __DEFAULT_FN_ATTRS
5547_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
5548{
5549 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
5550 (__v8df) __B,
5551 (__v8df) __W,
5552 (__mmask8) __U,
5553 _MM_FROUND_CUR_DIRECTION);
5554}
5555
5556static __inline__ __m512d __DEFAULT_FN_ATTRS
5557_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
5558{
5559 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
5560 (__v8df) __B,
5561 (__v8df)
5562 _mm512_setzero_pd (),
5563 (__mmask8) __U,
5564 _MM_FROUND_CUR_DIRECTION);
5565}
5566
5567#define _mm512_scalef_round_ps( __A, __B, __R) __extension__ ({ \
5568__builtin_ia32_scalefps512_mask ((__v16sf)( __A),\
5569 (__v16sf)( __B),\
5570 (__v16sf)\
5571 _mm512_undefined_ps (),\
5572 (__mmask16) -1,( __R));\
5573})
5574
5575#define _mm512_mask_scalef_round_ps( __W, __U, __A, __B, __R) __extension__ ({ \
5576__builtin_ia32_scalefps512_mask ((__v16sf)( __A),\
5577 (__v16sf)( __B),\
5578 (__v16sf)( __W),\
5579 (__mmask16)( __U),( __R));\
5580})
5581
5582#define _mm512_maskz_scalef_round_ps( __U, __A, __B, __R) __extension__ ({ \
5583__builtin_ia32_scalefps512_mask ((__v16sf)( __A),\
5584 (__v16sf)( __B),\
5585 (__v16sf)\
5586 _mm512_setzero_ps (),\
5587 (__mmask16)( __U),( __R));\
5588})
5589
5590static __inline__ __m512 __DEFAULT_FN_ATTRS
5591_mm512_scalef_ps (__m512 __A, __m512 __B)
5592{
5593 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
5594 (__v16sf) __B,
5595 (__v16sf)
5596 _mm512_undefined_ps (),
5597 (__mmask16) -1,
5598 _MM_FROUND_CUR_DIRECTION);
5599}
5600
5601static __inline__ __m512 __DEFAULT_FN_ATTRS
5602_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
5603{
5604 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
5605 (__v16sf) __B,
5606 (__v16sf) __W,
5607 (__mmask16) __U,
5608 _MM_FROUND_CUR_DIRECTION);
5609}
5610
5611static __inline__ __m512 __DEFAULT_FN_ATTRS
5612_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
5613{
5614 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
5615 (__v16sf) __B,
5616 (__v16sf)
5617 _mm512_setzero_ps (),
5618 (__mmask16) __U,
5619 _MM_FROUND_CUR_DIRECTION);
5620}
5621
5622#define _mm_scalef_round_sd( __A, __B, __R) __extension__ ({ \
5623__builtin_ia32_scalefsd_round_mask ((__v2df)( __A),\
5624 (__v2df)( __B), (__v2df) _mm_setzero_pd(),\
5625 (__mmask8) -1,\
5626 ( __R));\
5627})
5628
5629static __inline__ __m128d __DEFAULT_FN_ATTRS
5630_mm_scalef_sd (__m128d __A, __m128d __B)
5631{
5632 return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
5633 (__v2df)( __B), (__v2df) _mm_setzero_pd(),
5634 (__mmask8) -1,
5635 _MM_FROUND_CUR_DIRECTION);
5636}
5637
5638static __inline__ __m128d __DEFAULT_FN_ATTRS
5639_mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5640{
5641 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
5642 (__v2df) __B,
5643 (__v2df) __W,
5644 (__mmask8) __U,
5645 _MM_FROUND_CUR_DIRECTION);
5646}
5647
5648#define _mm_mask_scalef_round_sd( __W, __U, __A, __B, __R) __extension__ ({ \
5649__builtin_ia32_scalefsd_round_mask ((__v2df)( __A),\
5650 (__v2df)( __B), (__v2df) __W,\
5651 (__mmask8) __U,\
5652 ( __R));\
5653})
5654
5655static __inline__ __m128d __DEFAULT_FN_ATTRS
5656_mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
5657{
5658 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
5659 (__v2df) __B,
5660 (__v2df) _mm_setzero_pd (),
5661 (__mmask8) __U,
5662 _MM_FROUND_CUR_DIRECTION);
5663}
5664
5665#define _mm_maskz_scalef_round_sd( __U, __A, __B, __R) __extension__ ({ \
5666__builtin_ia32_scalefsd_round_mask ((__v2df)( __A),\
5667 (__v2df)( __B), (__v2df) _mm_setzero_pd (),\
5668 (__mmask8) __U,\
5669 ( __R));\
5670})
5671
5672#define _mm_scalef_round_ss( __A, __B, __R) __extension__ ({ \
5673__builtin_ia32_scalefss_round_mask ((__v4sf)( __A),\
5674 (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),\
5675 (__mmask8) -1,\
5676 ( __R));\
5677})
5678
5679static __inline__ __m128 __DEFAULT_FN_ATTRS
5680_mm_scalef_ss (__m128 __A, __m128 __B)
5681{
5682 return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
5683 (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
5684 (__mmask8) -1,
5685 _MM_FROUND_CUR_DIRECTION);
5686}
5687
5688static __inline__ __m128 __DEFAULT_FN_ATTRS
5689_mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5690{
5691 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
5692 (__v4sf) __B,
5693 (__v4sf) __W,
5694 (__mmask8) __U,
5695 _MM_FROUND_CUR_DIRECTION);
5696}
5697
5698#define _mm_mask_scalef_round_ss( __W, __U, __A, __B, __R) __extension__ ({ \
5699__builtin_ia32_scalefss_round_mask ((__v4sf)( __A),\
5700 (__v4sf)( __B), (__v4sf) __W,\
5701 (__mmask8) __U,\
5702 ( __R));\
5703})
5704
5705static __inline__ __m128 __DEFAULT_FN_ATTRS
5706_mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
5707{
5708 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
5709 (__v4sf) __B,
5710 (__v4sf) _mm_setzero_ps (),
5711 (__mmask8) __U,
5712 _MM_FROUND_CUR_DIRECTION);
5713}
5714
5715#define _mm_maskz_scalef_round_ss( __U, __A, __B, __R) __extension__ ({ \
5716__builtin_ia32_scalefss_round_mask ((__v4sf)( __A),\
5717 (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),\
5718 (__mmask8) __U,\
5719 _MM_FROUND_CUR_DIRECTION);\
5720})
5721
5722static __inline__ __m512i __DEFAULT_FN_ATTRS
5723_mm512_srai_epi32 (__m512i __A, unsigned int __B)
5724{
5725 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
5726 (__v16si)
5727 _mm512_setzero_si512 (),
5728 (__mmask16) -1);
5729}
5730
5731#define _mm512_mask_srai_epi32( __W, __U, __A, __B) __extension__ ({ \
5732__builtin_ia32_psradi512_mask ((__v16si)( __A),( __B),\
5733 (__v16si)( __W),\
5734 (__mmask16)( __U));\
5735})
5736
5737#define _mm512_maskz_srai_epi32( __U, __A, __B) __extension__ ({ \
5738__builtin_ia32_psradi512_mask ((__v16si)( __A),( __B),\
5739 (__v16si)\
5740 _mm512_setzero_si512 (),\
5741 (__mmask16)( __U));\
5742})
5743
5744#define _mm512_srai_epi64( __A, __B) __extension__ ({ \
5745__builtin_ia32_psraqi512_mask ((__v8di)( __A),( __B),\
5746 (__v8di)\
5747 _mm512_setzero_si512 (),\
5748 (__mmask8) -1);\
5749})
5750
5751#define _mm512_mask_srai_epi64( __W, __U, __A, __B) __extension__ ({ \
5752__builtin_ia32_psraqi512_mask ((__v8di)( __A),( __B),\
5753 (__v8di)( __W),\
5754 (__mmask8)( __U));\
5755})
5756
5757#define _mm512_maskz_srai_epi64( __U, __A, __B) __extension__ ({ \
5758__builtin_ia32_psraqi512_mask ((__v8di)( __A),( __B),\
5759 (__v8di)\
5760 _mm512_setzero_si512 (),\
5761 (__mmask8)( __U));\
5762})
5763
5764#define _mm512_shuffle_f32x4( __A, __B, __imm) __extension__ ({ \
5765__builtin_ia32_shuf_f32x4_mask ((__v16sf)( __A),\
5766 (__v16sf)( __B),( __imm),\
5767 (__v16sf)\
5768 _mm512_undefined_ps (),\
5769 (__mmask16) -1);\
5770})
5771
5772#define _mm512_mask_shuffle_f32x4( __W, __U, __A, __B, __imm) __extension__ ({ \
5773__builtin_ia32_shuf_f32x4_mask ((__v16sf)( __A),\
5774 (__v16sf)( __B),( __imm),\
5775 (__v16sf)( __W),\
5776 (__mmask16)( __U));\
5777})
5778
5779#define _mm512_maskz_shuffle_f32x4( __U, __A, __B, __imm) __extension__ ({ \
5780__builtin_ia32_shuf_f32x4_mask ((__v16sf)( __A),\
5781 (__v16sf)( __B),( __imm),\
5782 (__v16sf)\
5783 _mm512_setzero_ps (),\
5784 (__mmask16)( __U));\
5785})
5786
5787#define _mm512_shuffle_f64x2( __A, __B, __imm) __extension__ ({ \
5788__builtin_ia32_shuf_f64x2_mask ((__v8df)( __A),\
5789 (__v8df)( __B),( __imm),\
5790 (__v8df)\
5791 _mm512_undefined_pd (),\
5792 (__mmask8) -1);\
5793})
5794
5795#define _mm512_mask_shuffle_f64x2( __W, __U, __A, __B, __imm) __extension__ ({ \
5796__builtin_ia32_shuf_f64x2_mask ((__v8df)( __A),\
5797 (__v8df)( __B),( __imm),\
5798 (__v8df)( __W),\
5799 (__mmask8)( __U));\
5800})
5801
5802#define _mm512_maskz_shuffle_f64x2( __U, __A, __B, __imm) __extension__ ({ \
5803__builtin_ia32_shuf_f64x2_mask ((__v8df)( __A),\
5804 (__v8df)( __B),( __imm),\
5805 (__v8df)\
5806 _mm512_setzero_pd (),\
5807 (__mmask8)( __U));\
5808})
5809
5810#define _mm512_shuffle_i32x4( __A, __B, __imm) __extension__ ({ \
5811__builtin_ia32_shuf_i32x4_mask ((__v16si)( __A),\
5812 (__v16si)( __B),\
5813 ( __imm),\
5814 (__v16si)\
5815 _mm512_setzero_si512 (),\
5816 (__mmask16) -1);\
5817})
5818
5819#define _mm512_mask_shuffle_i32x4( __W, __U, __A, __B, __imm) __extension__ ({ \
5820__builtin_ia32_shuf_i32x4_mask ((__v16si)( __A),\
5821 (__v16si)( __B),\
5822 ( __imm),\
5823 (__v16si)( __W),\
5824 (__mmask16)( __U));\
5825})
5826
5827#define _mm512_maskz_shuffle_i32x4( __U, __A, __B, __imm) __extension__ ({ \
5828__builtin_ia32_shuf_i32x4_mask ((__v16si)( __A),\
5829 (__v16si)( __B),\
5830 ( __imm),\
5831 (__v16si)\
5832 _mm512_setzero_si512 (),\
5833 (__mmask16)( __U));\
5834})
5835
5836#define _mm512_shuffle_i64x2( __A, __B, __imm) __extension__ ({ \
5837__builtin_ia32_shuf_i64x2_mask ((__v8di)( __A),\
5838 (__v8di)( __B),( __imm),\
5839 (__v8di)\
5840 _mm512_setzero_si512 (),\
5841 (__mmask8) -1);\
5842})
5843
5844#define _mm512_mask_shuffle_i64x2( __W, __U, __A, __B, __imm) __extension__ ({ \
5845__builtin_ia32_shuf_i64x2_mask ((__v8di)( __A),\
5846 (__v8di)( __B),( __imm),\
5847 (__v8di)( __W),\
5848 (__mmask8)( __U));\
5849})
5850
5851#define _mm512_maskz_shuffle_i64x2( __U, __A, __B, __imm) __extension__ ({ \
5852__builtin_ia32_shuf_i64x2_mask ((__v8di)( __A),\
5853 (__v8di)( __B),( __imm),\
5854 (__v8di)\
5855 _mm512_setzero_si512 (),\
5856 (__mmask8)( __U));\
5857})
5858
5859#define _mm512_shuffle_pd( __M, __V, __imm) __extension__ ({ \
5860__builtin_ia32_shufpd512_mask ((__v8df)( __M),\
5861 (__v8df)( __V),( __imm),\
5862 (__v8df)\
5863 _mm512_undefined_pd (),\
5864 (__mmask8) -1);\
5865})
5866
5867#define _mm512_mask_shuffle_pd( __W, __U, __M, __V, __imm) __extension__ ({ \
5868__builtin_ia32_shufpd512_mask ((__v8df)( __M),\
5869 (__v8df)( __V),( __imm),\
5870 (__v8df)( __W),\
5871 (__mmask8)( __U));\
5872})
5873
5874#define _mm512_maskz_shuffle_pd( __U, __M, __V, __imm) __extension__ ({ \
5875__builtin_ia32_shufpd512_mask ((__v8df)( __M),\
5876 (__v8df)( __V),( __imm),\
5877 (__v8df)\
5878 _mm512_setzero_pd (),\
5879 (__mmask8)( __U));\
5880})
5881
5882#define _mm512_shuffle_ps( __M, __V, __imm) __extension__ ({ \
5883__builtin_ia32_shufps512_mask ((__v16sf)( __M),\
5884 (__v16sf)( __V),( __imm),\
5885 (__v16sf)\
5886 _mm512_undefined_ps (),\
5887 (__mmask16) -1);\
5888})
5889
5890#define _mm512_mask_shuffle_ps( __W, __U, __M, __V, __imm) __extension__ ({ \
5891__builtin_ia32_shufps512_mask ((__v16sf)( __M),\
5892 (__v16sf)( __V),( __imm),\
5893 (__v16sf)( __W),\
5894 (__mmask16)( __U));\
5895})
5896
5897#define _mm512_maskz_shuffle_ps( __U, __M, __V, __imm) __extension__ ({ \
5898__builtin_ia32_shufps512_mask ((__v16sf)( __M),\
5899 (__v16sf)( __V),( __imm),\
5900 (__v16sf)\
5901 _mm512_setzero_ps (),\
5902 (__mmask16)( __U));\
5903})
5904
5905#define _mm_sqrt_round_sd( __A, __B, __R) __extension__ ({ \
5906__builtin_ia32_sqrtsd_round_mask ((__v2df)( __B),\
5907 (__v2df)( __A),(__v2df) _mm_setzero_pd(),\
5908 (__mmask8) -1,\
5909 ( __R));\
5910})
5911
5912static __inline__ __m128d __DEFAULT_FN_ATTRS
5913_mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5914{
5915 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __B,
5916 (__v2df) __A,
5917 (__v2df) __W,
5918 (__mmask8) __U,
5919 _MM_FROUND_CUR_DIRECTION);
5920}
5921
5922#define _mm_mask_sqrt_round_sd( __W, __U, __A, __B, __R) __extension__ ({ \
5923__builtin_ia32_sqrtsd_round_mask ((__v2df)( __B),\
5924 (__v2df)( __A),(__v2df) __W,\
5925 (__mmask8) __U,\
5926 ( __R));\
5927})
5928
5929static __inline__ __m128d __DEFAULT_FN_ATTRS
5930_mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
5931{
5932 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __B,
5933 (__v2df) __A,
5934 (__v2df) _mm_setzero_pd (),
5935 (__mmask8) __U,
5936 _MM_FROUND_CUR_DIRECTION);
5937}
5938
5939#define _mm_maskz_sqrt_round_sd( __U, __A, __B, __R) __extension__ ({ \
5940__builtin_ia32_sqrtsd_round_mask ((__v2df)( __B),\
5941 (__v2df)( __A),(__v2df) _mm_setzero_pd(),\
5942 (__mmask8) __U,\
5943 ( __R));\
5944})
5945
5946#define _mm_sqrt_round_ss( __A, __B, __R) __extension__ ({ \
5947__builtin_ia32_sqrtss_round_mask ((__v4sf)( __B),\
5948 (__v4sf)( __A),(__v4sf) _mm_setzero_ps(),\
5949 (__mmask8) -1,\
5950 ( __R));\
5951})
5952
5953static __inline__ __m128 __DEFAULT_FN_ATTRS
5954_mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5955{
5956 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __B,
5957 (__v4sf) __A,
5958 (__v4sf) __W,
5959 (__mmask8) __U,
5960 _MM_FROUND_CUR_DIRECTION);
5961}
5962
5963#define _mm_mask_sqrt_round_ss( __W, __U, __A, __B, __R) __extension__ ({ \
5964__builtin_ia32_sqrtss_round_mask ((__v4sf)( __B),\
5965 (__v4sf)( __A),(__v4sf) __W,\
5966 (__mmask8) __U,\
5967 ( __R));\
5968})
5969
5970static __inline__ __m128 __DEFAULT_FN_ATTRS
5971_mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
5972{
5973 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
5974 (__v4sf) __B,
5975 (__v4sf) _mm_setzero_ps (),
5976 (__mmask8) __U,
5977 _MM_FROUND_CUR_DIRECTION);
5978}
5979
5980#define _mm_maskz_sqrt_round_ss( __U, __A, __B, __R) __extension__ ({ \
5981__builtin_ia32_sqrtss_round_mask ((__v4sf)( __B),\
5982 (__v4sf)( __A),(__v4sf) _mm_setzero_ps(),\
5983 (__mmask8) __U,\
5984 __R);\
5985})
5986
5987static __inline__ __m512 __DEFAULT_FN_ATTRS
5988_mm512_broadcast_f32x4 (__m128 __A)
5989{
5990 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
5991 (__v16sf)
5992 _mm512_undefined_ps (),
5993 (__mmask16) -1);
5994}
5995
5996static __inline__ __m512 __DEFAULT_FN_ATTRS
5997_mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
5998{
5999 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
6000 (__v16sf) __O,
6001 __M);
6002}
6003
6004static __inline__ __m512 __DEFAULT_FN_ATTRS
6005_mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
6006{
6007 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
6008 (__v16sf)
6009 _mm512_setzero_ps (),
6010 __M);
6011}
6012
6013static __inline__ __m512d __DEFAULT_FN_ATTRS
6014_mm512_broadcast_f64x4 (__m256d __A)
6015{
6016 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
6017 (__v8df)
6018 _mm512_undefined_pd (),
6019 (__mmask8) -1);
6020}
6021
6022static __inline__ __m512d __DEFAULT_FN_ATTRS
6023_mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
6024{
6025 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
6026 (__v8df) __O,
6027 __M);
6028}
6029
6030static __inline__ __m512d __DEFAULT_FN_ATTRS
6031_mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
6032{
6033 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
6034 (__v8df)
6035 _mm512_setzero_pd (),
6036 __M);
6037}
6038
6039static __inline__ __m512i __DEFAULT_FN_ATTRS
6040_mm512_broadcast_i32x4 (__m128i __A)
6041{
6042 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
6043 (__v16si)
6044 _mm512_undefined_epi32 (),
6045 (__mmask16) -1);
6046}
6047
6048static __inline__ __m512i __DEFAULT_FN_ATTRS
6049_mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
6050{
6051 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
6052 (__v16si) __O,
6053 __M);
6054}
6055
6056static __inline__ __m512i __DEFAULT_FN_ATTRS
6057_mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
6058{
6059 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
6060 (__v16si)
6061 _mm512_setzero_si512 (),
6062 __M);
6063}
6064
6065static __inline__ __m512i __DEFAULT_FN_ATTRS
6066_mm512_broadcast_i64x4 (__m256i __A)
6067{
6068 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
6069 (__v8di)
6070 _mm512_undefined_epi32 (),
6071 (__mmask8) -1);
6072}
6073
6074static __inline__ __m512i __DEFAULT_FN_ATTRS
6075_mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
6076{
6077 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
6078 (__v8di) __O,
6079 __M);
6080}
6081
6082static __inline__ __m512i __DEFAULT_FN_ATTRS
6083_mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
6084{
6085 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
6086 (__v8di)
6087 _mm512_setzero_si512 (),
6088 __M);
6089}
6090
6091static __inline__ __m512d __DEFAULT_FN_ATTRS
6092_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
6093{
6094 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
6095 (__v8df) __O, __M);
6096}
6097
6098static __inline__ __m512d __DEFAULT_FN_ATTRS
6099_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
6100{
6101 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
6102 (__v8df)
6103 _mm512_setzero_pd (),
6104 __M);
6105}
6106
6107static __inline__ __m512 __DEFAULT_FN_ATTRS
6108_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
6109{
6110 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
6111 (__v16sf) __O, __M);
6112}
6113
6114static __inline__ __m512 __DEFAULT_FN_ATTRS
6115_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
6116{
6117 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
6118 (__v16sf)
6119 _mm512_setzero_ps (),
6120 __M);
6121}
6122
6123static __inline__ __m128i __DEFAULT_FN_ATTRS
6124_mm512_cvtsepi32_epi8 (__m512i __A)
6125{
6126 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6127 (__v16qi) _mm_undefined_si128 (),
6128 (__mmask16) -1);
6129}
6130
6131static __inline__ __m128i __DEFAULT_FN_ATTRS
6132_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6133{
6134 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6135 (__v16qi) __O, __M);
6136}
6137
6138static __inline__ __m128i __DEFAULT_FN_ATTRS
6139_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
6140{
6141 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6142 (__v16qi) _mm_setzero_si128 (),
6143 __M);
6144}
6145
6146static __inline__ void __DEFAULT_FN_ATTRS
6147_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
6148{
6149 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6150}
6151
6152static __inline__ __m256i __DEFAULT_FN_ATTRS
6153_mm512_cvtsepi32_epi16 (__m512i __A)
6154{
6155 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6156 (__v16hi) _mm256_undefined_si256 (),
6157 (__mmask16) -1);
6158}
6159
6160static __inline__ __m256i __DEFAULT_FN_ATTRS
6161_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6162{
6163 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6164 (__v16hi) __O, __M);
6165}
6166
6167static __inline__ __m256i __DEFAULT_FN_ATTRS
6168_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
6169{
6170 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6171 (__v16hi) _mm256_setzero_si256 (),
6172 __M);
6173}
6174
6175static __inline__ void __DEFAULT_FN_ATTRS
6176_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
6177{
6178 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
6179}
6180
6181static __inline__ __m128i __DEFAULT_FN_ATTRS
6182_mm512_cvtsepi64_epi8 (__m512i __A)
6183{
6184 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6185 (__v16qi) _mm_undefined_si128 (),
6186 (__mmask8) -1);
6187}
6188
6189static __inline__ __m128i __DEFAULT_FN_ATTRS
6190_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6191{
6192 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6193 (__v16qi) __O, __M);
6194}
6195
6196static __inline__ __m128i __DEFAULT_FN_ATTRS
6197_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
6198{
6199 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6200 (__v16qi) _mm_setzero_si128 (),
6201 __M);
6202}
6203
6204static __inline__ void __DEFAULT_FN_ATTRS
6205_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
6206{
6207 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
6208}
6209
6210static __inline__ __m256i __DEFAULT_FN_ATTRS
6211_mm512_cvtsepi64_epi32 (__m512i __A)
6212{
6213 __v8si __O;
6214 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6215 (__v8si) _mm256_undefined_si256 (),
6216 (__mmask8) -1);
6217}
6218
6219static __inline__ __m256i __DEFAULT_FN_ATTRS
6220_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
6221{
6222 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6223 (__v8si) __O, __M);
6224}
6225
6226static __inline__ __m256i __DEFAULT_FN_ATTRS
6227_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
6228{
6229 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6230 (__v8si) _mm256_setzero_si256 (),
6231 __M);
6232}
6233
6234static __inline__ void __DEFAULT_FN_ATTRS
6235_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
6236{
6237 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
6238}
6239
6240static __inline__ __m128i __DEFAULT_FN_ATTRS
6241_mm512_cvtsepi64_epi16 (__m512i __A)
6242{
6243 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6244 (__v8hi) _mm_undefined_si128 (),
6245 (__mmask8) -1);
6246}
6247
6248static __inline__ __m128i __DEFAULT_FN_ATTRS
6249_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
6250{
6251 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6252 (__v8hi) __O, __M);
6253}
6254
6255static __inline__ __m128i __DEFAULT_FN_ATTRS
6256_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
6257{
6258 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6259 (__v8hi) _mm_setzero_si128 (),
6260 __M);
6261}
6262
6263static __inline__ void __DEFAULT_FN_ATTRS
6264_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
6265{
6266 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
6267}
6268
6269static __inline__ __m128i __DEFAULT_FN_ATTRS
6270_mm512_cvtusepi32_epi8 (__m512i __A)
6271{
6272 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6273 (__v16qi) _mm_undefined_si128 (),
6274 (__mmask16) -1);
6275}
6276
6277static __inline__ __m128i __DEFAULT_FN_ATTRS
6278_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6279{
6280 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6281 (__v16qi) __O,
6282 __M);
6283}
6284
6285static __inline__ __m128i __DEFAULT_FN_ATTRS
6286_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
6287{
6288 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6289 (__v16qi) _mm_setzero_si128 (),
6290 __M);
6291}
6292
6293static __inline__ void __DEFAULT_FN_ATTRS
6294_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
6295{
6296 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6297}
6298
6299static __inline__ __m256i __DEFAULT_FN_ATTRS
6300_mm512_cvtusepi32_epi16 (__m512i __A)
6301{
6302 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6303 (__v16hi) _mm256_undefined_si256 (),
6304 (__mmask16) -1);
6305}
6306
6307static __inline__ __m256i __DEFAULT_FN_ATTRS
6308_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6309{
6310 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6311 (__v16hi) __O,
6312 __M);
6313}
6314
6315static __inline__ __m256i __DEFAULT_FN_ATTRS
6316_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
6317{
6318 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6319 (__v16hi) _mm256_setzero_si256 (),
6320 __M);
6321}
6322
6323static __inline__ void __DEFAULT_FN_ATTRS
6324_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
6325{
6326 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
6327}
6328
6329static __inline__ __m128i __DEFAULT_FN_ATTRS
6330_mm512_cvtusepi64_epi8 (__m512i __A)
6331{
6332 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6333 (__v16qi) _mm_undefined_si128 (),
6334 (__mmask8) -1);
6335}
6336
6337static __inline__ __m128i __DEFAULT_FN_ATTRS
6338_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6339{
6340 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6341 (__v16qi) __O,
6342 __M);
6343}
6344
6345static __inline__ __m128i __DEFAULT_FN_ATTRS
6346_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
6347{
6348 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6349 (__v16qi) _mm_setzero_si128 (),
6350 __M);
6351}
6352
6353static __inline__ void __DEFAULT_FN_ATTRS
6354_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
6355{
6356 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
6357}
6358
6359static __inline__ __m256i __DEFAULT_FN_ATTRS
6360_mm512_cvtusepi64_epi32 (__m512i __A)
6361{
6362 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6363 (__v8si) _mm256_undefined_si256 (),
6364 (__mmask8) -1);
6365}
6366
6367static __inline__ __m256i __DEFAULT_FN_ATTRS
6368_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
6369{
6370 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6371 (__v8si) __O, __M);
6372}
6373
6374static __inline__ __m256i __DEFAULT_FN_ATTRS
6375_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
6376{
6377 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6378 (__v8si) _mm256_setzero_si256 (),
6379 __M);
6380}
6381
6382static __inline__ void __DEFAULT_FN_ATTRS
6383_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
6384{
6385 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
6386}
6387
6388static __inline__ __m128i __DEFAULT_FN_ATTRS
6389_mm512_cvtusepi64_epi16 (__m512i __A)
6390{
6391 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6392 (__v8hi) _mm_undefined_si128 (),
6393 (__mmask8) -1);
6394}
6395
6396static __inline__ __m128i __DEFAULT_FN_ATTRS
6397_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
6398{
6399 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6400 (__v8hi) __O, __M);
6401}
6402
6403static __inline__ __m128i __DEFAULT_FN_ATTRS
6404_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
6405{
6406 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6407 (__v8hi) _mm_setzero_si128 (),
6408 __M);
6409}
6410
6411static __inline__ void __DEFAULT_FN_ATTRS
6412_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
6413{
6414 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
6415}
6416
6417static __inline__ __m128i __DEFAULT_FN_ATTRS
6418_mm512_cvtepi32_epi8 (__m512i __A)
6419{
6420 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6421 (__v16qi) _mm_undefined_si128 (),
6422 (__mmask16) -1);
6423}
6424
6425static __inline__ __m128i __DEFAULT_FN_ATTRS
6426_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6427{
6428 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6429 (__v16qi) __O, __M);
6430}
6431
6432static __inline__ __m128i __DEFAULT_FN_ATTRS
6433_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
6434{
6435 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6436 (__v16qi) _mm_setzero_si128 (),
6437 __M);
6438}
6439
6440static __inline__ void __DEFAULT_FN_ATTRS
6441_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
6442{
6443 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6444}
6445
6446static __inline__ __m256i __DEFAULT_FN_ATTRS
6447_mm512_cvtepi32_epi16 (__m512i __A)
6448{
6449 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6450 (__v16hi) _mm256_undefined_si256 (),
6451 (__mmask16) -1);
6452}
6453
6454static __inline__ __m256i __DEFAULT_FN_ATTRS
6455_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6456{
6457 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6458 (__v16hi) __O, __M);
6459}
6460
6461static __inline__ __m256i __DEFAULT_FN_ATTRS
6462_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
6463{
6464 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6465 (__v16hi) _mm256_setzero_si256 (),
6466 __M);
6467}
6468
6469static __inline__ void __DEFAULT_FN_ATTRS
6470_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
6471{
6472 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
6473}
6474
6475static __inline__ __m128i __DEFAULT_FN_ATTRS
6476_mm512_cvtepi64_epi8 (__m512i __A)
6477{
6478 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
6479 (__v16qi) _mm_undefined_si128 (),
6480 (__mmask8) -1);
6481}
6482
6483static __inline__ __m128i __DEFAULT_FN_ATTRS
6484_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6485{
6486 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
6487 (__v16qi) __O, __M);
6488}
6489
6490static __inline__ __m128i __DEFAULT_FN_ATTRS
6491_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
6492{
6493 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
6494 (__v16qi) _mm_setzero_si128 (),
6495 __M);
6496}
6497
6498static __inline__ void __DEFAULT_FN_ATTRS
6499_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
6500{
6501 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
6502}
6503
6504static __inline__ __m256i __DEFAULT_FN_ATTRS
6505_mm512_cvtepi64_epi32 (__m512i __A)
6506{
6507 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
6508 (__v8si) _mm256_undefined_si256 (),
6509 (__mmask8) -1);
6510}
6511
6512static __inline__ __m256i __DEFAULT_FN_ATTRS
6513_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
6514{
6515 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
6516 (__v8si) __O, __M);
6517}
6518
6519static __inline__ __m256i __DEFAULT_FN_ATTRS
6520_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
6521{
6522 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
6523 (__v8si) _mm256_setzero_si256 (),
6524 __M);
6525}
6526
6527static __inline__ void __DEFAULT_FN_ATTRS
6528_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
6529{
6530 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
6531}
6532
6533static __inline__ __m128i __DEFAULT_FN_ATTRS
6534_mm512_cvtepi64_epi16 (__m512i __A)
6535{
6536 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
6537 (__v8hi) _mm_undefined_si128 (),
6538 (__mmask8) -1);
6539}
6540
6541static __inline__ __m128i __DEFAULT_FN_ATTRS
6542_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
6543{
6544 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
6545 (__v8hi) __O, __M);
6546}
6547
6548static __inline__ __m128i __DEFAULT_FN_ATTRS
6549_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
6550{
6551 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
6552 (__v8hi) _mm_setzero_si128 (),
6553 __M);
6554}
6555
6556static __inline__ void __DEFAULT_FN_ATTRS
6557_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
6558{
6559 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
6560}
6561
6562#define _mm512_extracti32x4_epi32( __A, __imm) __extension__ ({ \
6563__builtin_ia32_extracti32x4_mask ((__v16si)( __A),\
6564 (__imm),\
6565 (__v4si) _mm_undefined_si128 (),\
6566 (__mmask8) -1);\
6567})
6568
6569#define _mm512_mask_extracti32x4_epi32( __W, __U, __A, __imm) __extension__ ({ \
6570__builtin_ia32_extracti32x4_mask ((__v16si)( __A),\
6571 ( __imm),\
6572 (__v4si)( __W),\
6573 (__mmask8)( __U));\
6574})
6575
6576#define _mm512_maskz_extracti32x4_epi32( __U, __A, __imm) __extension__ ({ \
6577__builtin_ia32_extracti32x4_mask ((__v16si)( __A),\
6578 ( __imm),\
6579 (__v4si) _mm_setzero_si128 (),\
6580 (__mmask8)( __U));\
6581})
6582
6583#define _mm512_extracti64x4_epi64( __A, __imm) __extension__ ({ \
6584__builtin_ia32_extracti64x4_mask ((__v8di)( __A),\
6585 ( __imm),\
6586 (__v4di) _mm256_undefined_si256 (),\
6587 (__mmask8) -1);\
6588})
6589
6590#define _mm512_mask_extracti64x4_epi64( __W, __U, __A, __imm) __extension__ ({ \
6591__builtin_ia32_extracti64x4_mask ((__v8di)( __A),\
6592 ( __imm),\
6593 (__v4di)( __W),\
6594 (__mmask8)( __U));\
6595})
6596
6597#define _mm512_maskz_extracti64x4_epi64( __U, __A, __imm) __extension__ ({ \
6598__builtin_ia32_extracti64x4_mask ((__v8di)( __A),\
6599 ( __imm),\
6600 (__v4di) _mm256_setzero_si256 (),\
6601 (__mmask8)( __U));\
6602})
6603
6604#define _mm512_insertf64x4( __A, __B, __imm) __extension__ ({ \
6605__builtin_ia32_insertf64x4_mask ((__v8df)( __A),\
6606 (__v4df)( __B),\
6607 ( __imm),\
6608 (__v8df) _mm512_undefined_pd (),\
6609 (__mmask8) -1);\
6610})
6611
6612#define _mm512_mask_insertf64x4( __W, __U, __A, __B, __imm) __extension__ ({ \
6613__builtin_ia32_insertf64x4_mask ((__v8df)( __A),\
6614 (__v4df)( __B),\
6615 ( __imm),\
6616 (__v8df)( __W),\
6617 (__mmask8)( __U));\
6618})
6619
6620#define _mm512_maskz_insertf64x4( __U, __A, __B, __imm) __extension__ ({ \
6621__builtin_ia32_insertf64x4_mask ((__v8df)( __A),\
6622 (__v4df)( __B),\
6623 ( __imm),\
6624 (__v8df) _mm512_setzero_pd (),\
6625 (__mmask8)( __U));\
6626})
6627
6628#define _mm512_inserti64x4( __A, __B, __imm) __extension__ ({ \
6629__builtin_ia32_inserti64x4_mask ((__v8di)( __A),\
6630 (__v4di)( __B),\
6631 ( __imm),\
6632 (__v8di) _mm512_setzero_si512 (),\
6633 (__mmask8) -1);\
6634})
6635
6636#define _mm512_mask_inserti64x4( __W, __U, __A, __B, __imm) __extension__ ({ \
6637__builtin_ia32_inserti64x4_mask ((__v8di)( __A),\
6638 (__v4di)( __B),\
6639 ( __imm),\
6640 (__v8di)( __W),\
6641 (__mmask8)( __U));\
6642})
6643
6644#define _mm512_maskz_inserti64x4( __U, __A, __B, __imm) __extension__ ({ \
6645__builtin_ia32_inserti64x4_mask ((__v8di)( __A),\
6646 (__v4di)( __B),\
6647 ( __imm),\
6648 (__v8di) _mm512_setzero_si512 (),\
6649 (__mmask8)( __U));\
6650})
6651
6652#define _mm512_getmant_round_pd( __A, __B, __C, __R) __extension__ ({ \
6653__builtin_ia32_getmantpd512_mask ((__v8df)( __A),\
6654 (__C << 2) |( __B),\
6655 (__v8df) _mm512_undefined_pd (),\
6656 (__mmask8) -1,( __R));\
6657})
6658
6659#define _mm512_mask_getmant_round_pd( __W, __U, __A, __B, __C, __R) __extension__ ({ \
6660__builtin_ia32_getmantpd512_mask ((__v8df)( __A),\
6661 (__C << 2) |( __B),\
6662 (__v8df)( __W),(__mmask8)( __U),\
6663 ( __R));\
6664})
6665
6666#define _mm512_maskz_getmant_round_pd( __U, __A, __B, __C, __R) __extension__ ({ \
6667__builtin_ia32_getmantpd512_mask ((__v8df)( __A),\
6668 (__C << 2) |( __B),\
6669 (__v8df) _mm512_setzero_pd (),\
6670 (__mmask8)( __U),( __R));\
6671})
6672
6673#define _mm512_getmant_pd( __A, __B, __C) __extension__ ({ \
6674__builtin_ia32_getmantpd512_mask ((__v8df)( __A),\
6675 (__C << 2) |( __B),\
6676 (__v8df) _mm512_setzero_pd (),\
6677 (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);\
6678})
6679
6680#define _mm512_mask_getmant_pd( __W, __U, __A, __B, __C) __extension__ ({ \
6681__builtin_ia32_getmantpd512_mask ((__v8df)( __A),\
6682 (__C << 2) |( __B),\
6683 (__v8df)( __W), (__mmask8)( __U), _MM_FROUND_CUR_DIRECTION);\
6684})
6685
6686#define _mm512_maskz_getmant_pd( __U, __A, __B, __C) __extension__ ({ \
6687__builtin_ia32_getmantpd512_mask ((__v8df)( __A),\
6688 (__C << 2) |( __B),\
6689 (__v8df) _mm512_setzero_pd (),\
6690 (__mmask8)( __U), _MM_FROUND_CUR_DIRECTION);\
6691})
6692
6693#define _mm512_getmant_round_ps( __A, __B, __C, __R) __extension__ ({ \
6694__builtin_ia32_getmantps512_mask ((__v16sf)( __A),\
6695 (__C << 2) |( __B),\
6696 (__v16sf) _mm512_undefined_ps (),\
6697 (__mmask16) -1,( __R));\
6698})
6699
6700#define _mm512_mask_getmant_round_ps( __W, __U, __A, __B, __C, __R) __extension__ ({ \
6701__builtin_ia32_getmantps512_mask ((__v16sf)( __A),\
6702 (__C << 2) |( __B),\
6703 (__v16sf)( __W),(__mmask16)( __U),\
6704 ( __R));\
6705})
6706
6707#define _mm512_maskz_getmant_round_ps( __U, __A, __B, __C, __R) __extension__ ({ \
6708__builtin_ia32_getmantps512_mask ((__v16sf)( __A),\
6709 (__C << 2) |( __B),\
6710 (__v16sf) _mm512_setzero_ps (),\
6711 ( __U),( __R));\
6712})
6713
6714#define _mm512_getmant_ps( __A, __B, __C) __extension__ ({ \
6715__builtin_ia32_getmantps512_mask ((__v16sf)( __A),\
6716 (__C << 2) |( __B),\
6717 (__v16sf) _mm512_undefined_ps (),\
6718 (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);\
6719})
6720
6721#define _mm512_mask_getmant_ps( __W, __U, __A, __B, __C) __extension__ ({ \
6722__builtin_ia32_getmantps512_mask ((__v16sf)( __A),\
6723 (__C << 2) |( __B),\
6724 (__v16sf)( __W),(__mmask16) ( __U),\
6725 _MM_FROUND_CUR_DIRECTION);\
6726})
6727
6728#define _mm512_maskz_getmant_ps( __U, __A, __B, __C) __extension__ ({ \
6729__builtin_ia32_getmantps512_mask ((__v16sf)( __A),\
6730 (__C << 2) |( __B),\
6731 (__v16sf) _mm512_setzero_ps (),\
6732 (__mmask16)( __U),_MM_FROUND_CUR_DIRECTION);\
6733})
6734
6735#define _mm512_getexp_round_pd( __A, __R) __extension__ ({ \
6736__builtin_ia32_getexppd512_mask ((__v8df)( __A),\
6737 (__v8df) _mm512_undefined_pd (),\
6738 (__mmask8) -1,( __R));\
6739})
6740
6741#define _mm512_mask_getexp_round_pd( __W, __U, __A, __R) __extension__ ({ \
6742__builtin_ia32_getexppd512_mask ((__v8df)( __A),\
6743 (__v8df)( __W),\
6744 (__mmask8)( __U),( __R));\
6745})
6746
6747#define _mm512_maskz_getexp_round_pd( __U, __A, __R) __extension__ ({ \
6748__builtin_ia32_getexppd512_mask ((__v8df)( __A),\
6749 (__v8df) _mm512_setzero_pd (),\
6750 (__mmask8)( __U),( __R));\
6751})
6752
6753static __inline__ __m512d __DEFAULT_FN_ATTRS
6754_mm512_getexp_pd (__m512d __A)
6755{
6756 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
6757 (__v8df) _mm512_undefined_pd (),
6758 (__mmask8) -1,
6759 _MM_FROUND_CUR_DIRECTION);
6760}
6761
6762static __inline__ __m512d __DEFAULT_FN_ATTRS
6763_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
6764{
6765 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
6766 (__v8df) __W,
6767 (__mmask8) __U,
6768 _MM_FROUND_CUR_DIRECTION);
6769}
6770
6771static __inline__ __m512d __DEFAULT_FN_ATTRS
6772_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
6773{
6774 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
6775 (__v8df) _mm512_setzero_pd (),
6776 (__mmask8) __U,
6777 _MM_FROUND_CUR_DIRECTION);
6778}
6779
6780#define _mm512_getexp_round_ps( __A, __R) __extension__ ({ \
6781__builtin_ia32_getexpps512_mask ((__v16sf)( __A),\
6782 (__v16sf) _mm512_undefined_ps (),\
6783 (__mmask16) -1,( __R));\
6784})
6785
6786#define _mm512_mask_getexp_round_ps( __W, __U, __A, __R) __extension__ ({ \
6787__builtin_ia32_getexpps512_mask ((__v16sf)( __A),\
6788 (__v16sf)( __W),\
6789 (__mmask16)( __U),( __R));\
6790})
6791
6792#define _mm512_maskz_getexp_round_ps( __U, __A, __R) __extension__ ({ \
6793__builtin_ia32_getexpps512_mask ((__v16sf)( __A),\
6794 (__v16sf) _mm512_setzero_ps (),\
6795 (__mmask16)( __U),( __R));\
6796})
6797
6798static __inline__ __m512 __DEFAULT_FN_ATTRS
6799_mm512_getexp_ps (__m512 __A)
6800{
6801 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
6802 (__v16sf) _mm512_undefined_ps (),
6803 (__mmask16) -1,
6804 _MM_FROUND_CUR_DIRECTION);
6805}
6806
6807static __inline__ __m512 __DEFAULT_FN_ATTRS
6808_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
6809{
6810 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
6811 (__v16sf) __W,
6812 (__mmask16) __U,
6813 _MM_FROUND_CUR_DIRECTION);
6814}
6815
6816static __inline__ __m512 __DEFAULT_FN_ATTRS
6817_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
6818{
6819 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
6820 (__v16sf) _mm512_setzero_ps (),
6821 (__mmask16) __U,
6822 _MM_FROUND_CUR_DIRECTION);
6823}
6824
6825#define _mm512_i64gather_ps( __index, __addr, __scale) __extension__ ({ \
6826__builtin_ia32_gatherdiv16sf ((__v8sf) _mm256_undefined_ps (),\
6827 __addr, (__v8di) __index, (__mmask8) -1, __scale);\
6828})
6829
6830#define _mm512_mask_i64gather_ps( __v1_old, __mask, __index,\
6831 __addr, __scale) __extension__({\
6832__builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,\
6833 __addr,(__v8di) __index, __mask, __scale);\
6834})
6835
6836#define _mm512_i64gather_epi32(__index, __addr, __scale) __extension__ ({\
6837__builtin_ia32_gatherdiv16si ((__v8si) _mm256_undefined_ps (),\
6838 __addr, (__v8di) __index, (__mmask8) -1 , __scale);\
6839})
6840
6841#define _mm512_mask_i64gather_epi32( __v1_old, __mask, __index, __addr, __scale) __extension__ ({\
6842__builtin_ia32_gatherdiv16si ((__v8si) __v1_old,\
6843 __addr, (__v8di) __index, __mask , __scale);\
6844})
6845
6846#define _mm512_i64gather_pd(__index, __addr, __scale) __extension__ ({\
6847__builtin_ia32_gatherdiv8df ((__v8df) _mm512_undefined_pd(),\
6848 __addr, (__v8di) __index, (__mmask8) -1 , __scale);\
6849})
6850
6851#define _mm512_mask_i64gather_pd( __v1_old, __mask, __index, __addr, __scale) __extension__ ({\
6852__builtin_ia32_gatherdiv8df ((__v8df) __v1_old,\
6853 __addr, (__v8di) __index, __mask , __scale);\
6854})
6855
6856#define _mm512_i64gather_epi64(__index, __addr, __scale) __extension__ ({\
6857__builtin_ia32_gatherdiv8di ((__v8di) _mm512_undefined_pd(),\
6858 __addr, (__v8di) __index, (__mmask8) -1 , __scale);\
6859})
6860
6861#define _mm512_mask_i64gather_epi64( __v1_old, __mask, __index, __addr, __scale) __extension__ ({\
6862__builtin_ia32_gatherdiv8di ((__v8di) __v1_old,\
6863 __addr, (__v8di) __index, __mask , __scale);\
6864})
6865
6866#define _mm512_i32gather_ps(__index, __addr, __scale) __extension__ ({\
6867__builtin_ia32_gathersiv16sf ((__v16sf) _mm512_undefined_ps(),\
6868 __addr, (__v16si) __index, (__mmask8) -1 , __scale);\
6869})
6870
6871#define _mm512_mask_i32gather_ps( __v1_old, __mask, __index, __addr, __scale) __extension__ ({\
6872__builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,\
6873 __addr, (__v16si) __index, __mask , __scale);\
6874})
6875
6876#define _mm512_i32gather_epi32(__index, __addr, __scale) __extension__ ({\
6877__builtin_ia32_gathersiv16si ((__v16sf) _mm512_undefined_epi32(),\
6878 __addr, (__v16si) __index, (__mmask8) -1 , __scale);\
6879})
6880
6881#define _mm512_mask_i32gather_epi32( __v1_old, __mask, __index, __addr, __scale) __extension__ ({\
6882__builtin_ia32_gathersiv16si ((__v16sf) __v1_old,\
6883 __addr, (__v16si) __index, __mask , __scale);\
6884})
6885
6886#define _mm512_i32gather_pd(__index, __addr, __scale) __extension__ ({\
6887__builtin_ia32_gathersiv8df ((__v8df) _mm512_undefined_pd(),\
6888 __addr, (__v8si) __index, (__mmask8) -1 , __scale);\
6889})
6890
6891#define _mm512_mask_i32gather_pd( __v1_old, __mask, __index, __addr, __scale) __extension__ ({\
6892__builtin_ia32_gathersiv8df ((__v8df) __v1_old,\
6893 __addr, (__v8si) __index, __mask , __scale);\
6894})
6895
6896#define _mm512_i32gather_epi64(__index, __addr, __scale) __extension__ ({\
6897__builtin_ia32_gathersiv8di ((__v8di) _mm512_undefined_epi32(),\
6898 __addr, (__v8si) __index, (__mmask8) -1 , __scale);\
6899})
6900
6901#define _mm512_mask_i32gather_epi64( __v1_old, __mask, __index, __addr, __scale) __extension__ ({\
6902__builtin_ia32_gathersiv8di ((__v8di) __v1_old,\
6903 __addr, (__v8si) __index, __mask , __scale);\
6904})
6905
6906#define _mm512_i64scatter_ps(__addr,__index, __v1, __scale) __extension__ ({\
6907__builtin_ia32_scatterdiv16sf(__addr, (__mmask8) -1,\
6908 (__v8di) __index, (__v8sf) __v1, __scale);\
6909})
6910
6911#define _mm512_mask_i64scatter_ps(__addr, __mask,__index, __v1, __scale) __extension__ ({\
6912__builtin_ia32_scatterdiv16sf(__addr, __mask,\
6913 (__v8di) __index, (__v8sf) __v1, __scale);\
6914})
6915
6916#define _mm512_i64scatter_epi32(__addr, __index, __v1, __scale) __extension__ ({\
6917__builtin_ia32_scatterdiv16si (__addr, (__mmask8) -1,\
6918 (__v8di) __index, (__v8si) __v1, __scale);\
6919})
6920
6921#define _mm512_mask_i64scatter_epi32(__addr, __mask, __index, __v1, __scale) __extension__ ({\
6922__builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,\
6923 (__v8si) __v1, __scale);\
6924})
6925
6926#define _mm512_i64scatter_pd( __addr, __index, __v1, __scale) __extension__ ({\
6927__builtin_ia32_scatterdiv8df (__addr, (__mmask8) -1,\
6928 (__v8di) __index, (__v8df) __v1, __scale);\
6929})
6930
6931#define _mm512_mask_i64scatter_pd( __addr, __mask, __index, __v1, __scale) __extension__ ({\
6932__builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,\
6933 (__v8df) __v1, __scale);\
6934})
6935
6936#define _mm512_i64scatter_epi64( __addr, __index, __v1, __scale) __extension__ ({\
6937__builtin_ia32_scatterdiv8di (__addr, (__mmask8) -1,\
6938 (__v8di) __index, (__v8di) __v1, __scale);\
6939})
6940
6941#define _mm512_mask_i64scatter_epi64( __addr, __mask, __index, __v1, __scale) __extension__ ({\
6942__builtin_ia32_scatterdiv8di(__addr, __mask, (__v8di) __index,\
6943 (__v8di) __v1, __scale);\
6944})
6945
6946#define _mm512_i32scatter_ps( __addr, __index, __v1, __scale) __extension__ ({\
6947__builtin_ia32_scattersiv16sf (__addr, (__mmask16) -1,\
6948 (__v16si) __index, (__v16sf) __v1, __scale);\
6949})
6950
6951#define _mm512_mask_i32scatter_ps( __addr, __mask, __index, __v1, __scale) __extension__ ({\
6952__builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,\
6953 (__v16sf) __v1, __scale);\
6954})
6955
6956#define _mm512_i32scatter_epi32( __addr, __index, __v1, __scale) __extension__ ({\
6957__builtin_ia32_scattersiv16si (__addr, (__mmask16) -1,\
6958 (__v16si) __index, (__v16si) __v1, __scale);\
6959})
6960
6961#define _mm512_mask_i32scatter_epi32( __addr, __mask, __index, __v1, __scale) __extension__ ({\
6962__builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,\
6963 (__v16si) __v1, __scale);\
6964})
6965
6966#define _mm512_i32scatter_pd( __addr, __index, __v1, __scale) __extension__ ({\
6967__builtin_ia32_scattersiv8df (__addr, (__mmask8) -1,\
6968 (__v8si) __index, (__v8df) __v1, __scale);\
6969})
6970
6971#define _mm512_mask_i32scatter_pd( __addr, __mask, __index, __v1, __scale) __extension__ ({\
6972__builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,\
6973 (__v8df) __v1, __scale);\
6974})
6975
6976#define _mm512_i32scatter_epi64( __addr, __index, __v1, __scale) __extension__ ({\
6977__builtin_ia32_scattersiv8di (__addr, (__mmask8) -1,\
6978 (__v8si) __index, (__v8di) __v1, __scale);\
6979})
6980
6981#define _mm512_mask_i32scatter_epi64( __addr, __mask, __index, __v1, __scale) __extension__ ({\
6982__builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,\
6983 (__v8di) __v1, __scale);\
6984})
6985
6986static __inline__ __m128 __DEFAULT_FN_ATTRS
6987_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6988{
6989 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __A,
6990 (__v4sf) __B,
6991 (__v4sf) __W,
6992 (__mmask8) __U,
6993 _MM_FROUND_CUR_DIRECTION);
6994}
6995
6996#define _mm_mask_fmadd_round_ss( __W, __U, __A, __B, __R) __extension__({\
6997__builtin_ia32_vfmaddss3_mask ((__v4sf) __A,\
6998 (__v4sf) __B,\
6999 (__v4sf) __W,\
7000 (__mmask8) __U,\
7001 __R);\
7002})
7003
7004static __inline__ __m128 __DEFAULT_FN_ATTRS
7005_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7006{
7007 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
7008 (__v4sf) __B,
7009 (__v4sf) __C,
7010 (__mmask8) __U,
7011 _MM_FROUND_CUR_DIRECTION);
7012}
7013
7014#define _mm_maskz_fmadd_round_ss( __U, __A, __B, __C, __R) __extension__ ({\
7015__builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,\
7016 (__v4sf) __B,\
7017 (__v4sf) __C,\
7018 (__mmask8) __U,\
7019 _MM_FROUND_CUR_DIRECTION);\
7020})
7021
7022static __inline__ __m128 __DEFAULT_FN_ATTRS
7023_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7024{
7025 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
7026 (__v4sf) __X,
7027 (__v4sf) __Y,
7028 (__mmask8) __U,
7029 _MM_FROUND_CUR_DIRECTION);
7030}
7031
7032#define _mm_mask3_fmadd_round_ss( __W, __X, __Y, __U, __R) __extension__ ({\
7033__builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,\
7034 (__v4sf) __X,\
7035 (__v4sf) __Y,\
7036 (__mmask8) __U,\
7037 __R);\
7038})
7039
7040static __inline__ __m128 __DEFAULT_FN_ATTRS
7041_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7042{
7043 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __A,
7044 (__v4sf) -(__B),
7045 (__v4sf) __W,
7046 (__mmask8) __U,
7047 _MM_FROUND_CUR_DIRECTION);
7048}
7049
7050#define _mm_mask_fmsub_round_ss( __W, __U, __A, __B, __R) __extension__ ({\
7051__builtin_ia32_vfmaddss3_mask ((__v4sf) __A,\
7052 (__v4sf) -(__B),\
7053 (__v4sf) __W,\
7054 (__mmask8) __U,\
7055 __R);\
7056})
7057
7058static __inline__ __m128 __DEFAULT_FN_ATTRS
7059_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7060{
7061 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
7062 (__v4sf) __B,
7063 (__v4sf) -(__C),
7064 (__mmask8) __U,
7065 _MM_FROUND_CUR_DIRECTION);
7066}
7067
7068#define _mm_maskz_fmsub_round_ss( __U, __A, __B, __C, __R) __extension__ ({\
7069__builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,\
7070 (__v4sf) __B,\
7071 (__v4sf) -(__C),\
7072 (__mmask8) __U,\
7073 __R);\
7074})
7075
7076static __inline__ __m128 __DEFAULT_FN_ATTRS
7077_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7078{
7079 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
7080 (__v4sf) __X,
7081 (__v4sf) -(__Y),
7082 (__mmask8) __U,
7083 _MM_FROUND_CUR_DIRECTION);
7084}
7085
7086#define _mm_mask3_fmsub_round_ss( __W, __X, __Y, __U, __R) __extension__ ({\
7087__builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,\
7088 (__v4sf) __X,\
7089 (__v4sf) -(__Y),\
7090 (__mmask8) __U,\
7091 __R);\
7092})
7093
7094static __inline__ __m128 __DEFAULT_FN_ATTRS
7095_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7096{
7097 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) -(__A),
7098 (__v4sf) __B,
7099 (__v4sf) __W,
7100 (__mmask8) __U,
7101 _MM_FROUND_CUR_DIRECTION);
7102}
7103
7104#define _mm_mask_fnmadd_round_ss( __W, __U, __A, __B, __R) __extension__ ({\
7105__builtin_ia32_vfmaddss3_mask ((__v4sf) -(__A),\
7106 (__v4sf) __B,\
7107 (__v4sf) __W,\
7108 (__mmask8) __U,\
7109 __R);\
7110})
7111
7112static __inline__ __m128 __DEFAULT_FN_ATTRS
7113_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7114{
7115 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) -(__A),
7116 (__v4sf) __B,
7117 (__v4sf) __C,
7118 (__mmask8) __U,
7119 _MM_FROUND_CUR_DIRECTION);
7120}
7121
7122#define _mm_maskz_fnmadd_round_ss( __U, __A, __B, __C, __R) __extension__ ({\
7123__builtin_ia32_vfmaddss3_maskz ((__v4sf) -(__A),\
7124 (__v4sf) __B,\
7125 (__v4sf) __C,\
7126 (__mmask8) __U,\
7127 __R);\
7128})
7129
7130static __inline__ __m128 __DEFAULT_FN_ATTRS
7131_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7132{
7133 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) -(__W),
7134 (__v4sf) __X,
7135 (__v4sf) __Y,
7136 (__mmask8) __U,
7137 _MM_FROUND_CUR_DIRECTION);
7138}
7139
7140#define _mm_mask3_fnmadd_round_ss( __W, __X, __Y, __U, __R) __extension__({\
7141__builtin_ia32_vfmaddss3_mask3 ((__v4sf) -(__W),\
7142 (__v4sf) __X,\
7143 (__v4sf) __Y,\
7144 (__mmask8) __U,\
7145 __R);\
7146})
7147
7148static __inline__ __m128 __DEFAULT_FN_ATTRS
7149_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7150{
7151 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) -(__A),
7152 (__v4sf) -(__B),
7153 (__v4sf) __W,
7154 (__mmask8) __U,
7155 _MM_FROUND_CUR_DIRECTION);
7156}
7157
7158#define _mm_mask_fnmsub_round_ss( __W, __U, __A, __B, __R) __extension__ ({\
7159__builtin_ia32_vfmaddss3_mask ((__v4sf) -(__A),\
7160 (__v4sf) -(__B),\
7161 (__v4sf) __W,\
7162 (__mmask8) __U,\
7163 __R);\
7164})
7165
7166static __inline__ __m128 __DEFAULT_FN_ATTRS
7167_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7168{
7169 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) -(__A),
7170 (__v4sf) __B,
7171 (__v4sf) -(__C),
7172 (__mmask8) __U,
7173 _MM_FROUND_CUR_DIRECTION);
7174}
7175
7176#define _mm_maskz_fnmsub_round_ss( __U, __A, __B, __C, __R) __extension__ ({\
7177__builtin_ia32_vfmaddss3_maskz((__v4sf) -(__A),\
7178 (__v4sf) __B,\
7179 (__v4sf) -(__C),\
7180 (__mmask8) __U,\
7181 _MM_FROUND_CUR_DIRECTION);\
7182})
7183
7184static __inline__ __m128 __DEFAULT_FN_ATTRS
7185_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7186{
7187 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) -(__W),
7188 (__v4sf) __X,
7189 (__v4sf) -(__Y),
7190 (__mmask8) __U,
7191 _MM_FROUND_CUR_DIRECTION);
7192}
7193
7194#define _mm_mask3_fnmsub_round_ss( __W, __X, __Y, __U, __R) __extension__({\
7195__builtin_ia32_vfmaddss3_mask3 ((__v4sf) -(__W),\
7196 (__v4sf) __X,\
7197 (__v4sf) -(__Y),\
7198 (__mmask8) __U,\
7199 __R);\
7200})
7201
7202static __inline__ __m128 __DEFAULT_FN_ATTRS
7203_mm_mask_fmadd_sd (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7204{
7205 return (__m128) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __A,
7206 (__v2df) __B,
7207 (__v2df) __W,
7208 (__mmask8) __U,
7209 _MM_FROUND_CUR_DIRECTION);
7210}
7211
7212#define _mm_mask_fmadd_round_sd( __W, __U, __A, __B, __R) __extension__({\
7213__builtin_ia32_vfmaddsd3_mask ( (__v2df) __A,\
7214 (__v2df) __B,\
7215 (__v2df) __W,\
7216 (__mmask8) __U,\
7217 __R);\
7218})
7219
7220static __inline__ __m128 __DEFAULT_FN_ATTRS
7221_mm_maskz_fmadd_sd (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7222{
7223 return (__m128) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
7224 (__v2df) __B,
7225 (__v2df) __C,
7226 (__mmask8) __U,
7227 _MM_FROUND_CUR_DIRECTION);
7228}
7229
7230#define _mm_maskz_fmadd_round_sd( __U, __A, __B, __C, __R) __extension__ ({\
7231__builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,\
7232 (__v2df) __B,\
7233 (__v2df) __C,\
7234 (__mmask8) __U,\
7235 _MM_FROUND_CUR_DIRECTION);\
7236})
7237
7238static __inline__ __m128 __DEFAULT_FN_ATTRS
7239_mm_mask3_fmadd_sd (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7240{
7241 return (__m128) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
7242 (__v2df) __X,
7243 (__v2df) __Y,
7244 (__mmask8) __U,
7245 _MM_FROUND_CUR_DIRECTION);
7246}
7247
7248#define _mm_mask3_fmadd_round_sd( __W, __X, __Y, __U, __R) __extension__ ({\
7249__builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,\
7250 (__v2df) __X,\
7251 (__v2df) __Y,\
7252 (__mmask8) __U,\
7253 __R);\
7254})
7255
7256static __inline__ __m128 __DEFAULT_FN_ATTRS
7257_mm_mask_fmsub_sd (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7258{
7259 return (__m128) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __A,
7260 (__v2df) -(__B),
7261 (__v2df) __W,
7262 (__mmask8) __U,
7263 _MM_FROUND_CUR_DIRECTION);
7264}
7265
7266#define _mm_mask_fmsub_round_sd( __W, __U, __A, __B, __R) __extension__ ({\
7267__builtin_ia32_vfmaddsd3_mask ( (__v2df) __A,\
7268 (__v2df) -(__B),\
7269 (__v2df) __W,\
7270 (__mmask8) __U,\
7271 __R);\
7272})
7273
7274static __inline__ __m128 __DEFAULT_FN_ATTRS
7275_mm_maskz_fmsub_sd (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7276{
7277 return (__m128) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
7278 (__v2df) __B,
7279 (__v2df) -(__C),
7280 (__mmask8) __U,
7281 _MM_FROUND_CUR_DIRECTION);
7282}
7283
7284#define _mm_maskz_fmsub_round_sd( __U, __A, __B, __C, __R) __extension__ ({\
7285__builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,\
7286 (__v2df) __B,\
7287 (__v2df) -(__C),\
7288 (__mmask8) __U,\
7289 __R);\
7290})
7291
7292static __inline__ __m128 __DEFAULT_FN_ATTRS
7293_mm_mask3_fmsub_sd (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7294{
7295 return (__m128) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
7296 (__v2df) __X,
7297 (__v2df) -(__Y),
7298 (__mmask8) __U,
7299 _MM_FROUND_CUR_DIRECTION);
7300}
7301
7302#define _mm_mask3_fmsub_round_sd( __W, __X, __Y, __U, __R) __extension__ ({\
7303__builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,\
7304 (__v2df) __X,\
7305 (__v2df) -(__Y),\
7306 (__mmask8) __U, __R);\
7307})
7308
7309static __inline__ __m128 __DEFAULT_FN_ATTRS
7310_mm_mask_fnmadd_sd (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7311{
7312 return (__m128) __builtin_ia32_vfmaddsd3_mask ( (__v2df) -(__A),
7313 (__v2df) __B,
7314 (__v2df) __W,
7315 (__mmask8) __U,
7316 _MM_FROUND_CUR_DIRECTION);
7317}
7318
7319#define _mm_mask_fnmadd_round_sd( __W, __U, __A, __B, __R) __extension__ ({\
7320__builtin_ia32_vfmaddsd3_mask ( (__v2df) -(__A),\
7321 (__v2df) __B,\
7322 (__v2df) __W,\
7323 (__mmask8) __U,\
7324 __R);\
7325})
7326
7327static __inline__ __m128 __DEFAULT_FN_ATTRS
7328_mm_maskz_fnmadd_sd (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7329{
7330 return (__m128) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) -(__A),
7331 (__v2df) __B,
7332 (__v2df) __C,
7333 (__mmask8) __U,
7334 _MM_FROUND_CUR_DIRECTION);
7335}
7336
7337#define _mm_maskz_fnmadd_round_sd( __U, __A, __B, __C, __R) __extension__ ({\
7338__builtin_ia32_vfmaddsd3_maskz ( (__v2df) -(__A),\
7339 (__v2df) __B,\
7340 (__v2df) __C,\
7341 (__mmask8) __U,\
7342 __R);\
7343})
7344
7345static __inline__ __m128 __DEFAULT_FN_ATTRS
7346_mm_mask3_fnmadd_sd (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7347{
7348 return (__m128) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) -(__W),
7349 (__v2df) __X,
7350 (__v2df) __Y,
7351 (__mmask8) __U,
7352 _MM_FROUND_CUR_DIRECTION);
7353}
7354
7355#define _mm_mask3_fnmadd_round_sd( __W, __X, __Y, __U, __R) __extension__({\
7356__builtin_ia32_vfmaddsd3_mask3 ((__v2df) -(__W),\
7357 (__v2df) __X,\
7358 (__v2df) __Y,\
7359 (__mmask8) __U,\
7360 __R);\
7361})
7362
7363static __inline__ __m128 __DEFAULT_FN_ATTRS
7364_mm_mask_fnmsub_sd (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7365{
7366 return (__m128) __builtin_ia32_vfmaddsd3_mask ( (__v2df) -(__A),
7367 (__v2df) -(__B),
7368 (__v2df) __W,
7369 (__mmask8) __U,
7370 _MM_FROUND_CUR_DIRECTION);
7371}
7372
7373#define _mm_mask_fnmsub_round_sd( __W, __U, __A, __B, __R) __extension__ ({\
7374__builtin_ia32_vfmaddsd3_mask ( (__v2df) -(__A),\
7375 (__v2df) -(__B),\
7376 (__v2df) __W,\
7377 (__mmask8) __U,\
7378 __R);\
7379})
7380
7381static __inline__ __m128 __DEFAULT_FN_ATTRS
7382_mm_maskz_fnmsub_sd (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7383{
7384 return (__m128) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) -(__A),
7385 (__v2df) __B,
7386 (__v2df) -(__C),
7387 (__mmask8) __U,
7388 _MM_FROUND_CUR_DIRECTION);
7389}
7390
7391#define _mm_maskz_fnmsub_round_sd( __U, __A, __B, __C, __R) __extension__ ({\
7392__builtin_ia32_vfmaddsd3_maskz( (__v2df) -(__A),\
7393 (__v2df) __B,\
7394 (__v2df) -(__C),\
7395 (__mmask8) __U,\
7396 _MM_FROUND_CUR_DIRECTION);\
7397})
7398
7399static __inline__ __m128 __DEFAULT_FN_ATTRS
7400_mm_mask3_fnmsub_sd (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7401{
7402 return (__m128) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) -(__W),
7403 (__v2df) __X,
7404 (__v2df) -(__Y),
7405 (__mmask8) __U,
7406 _MM_FROUND_CUR_DIRECTION);
7407}
7408
7409#define _mm_mask3_fnmsub_round_sd( __W, __X, __Y, __U, __R) __extension__({\
7410__builtin_ia32_vfmaddsd3_mask3 ((__v2df) -(__W),\
7411 (__v2df) __X,\
7412 (__v2df) -(__Y),\
7413 (__mmask8) __U,\
7414 __R);\
7415})
7416
7417#define _mm512_permutex_pd( __X, __M) __extension__ ({ \
7418__builtin_ia32_permdf512_mask ((__v8df)( __X),( __M),\
7419 (__v8df) _mm512_undefined_pd (),\
7420 (__mmask8) -1);\
7421})
7422
7423#define _mm512_mask_permutex_pd( __W, __U, __X, __M) __extension__ ({ \
7424__builtin_ia32_permdf512_mask ((__v8df)( __X),( __M),\
7425 (__v8df)( __W),\
7426 (__mmask8)( __U));\
7427})
7428
7429#define _mm512_maskz_permutex_pd( __U, __X, __M) __extension__ ({ \
7430__builtin_ia32_permdf512_mask ((__v8df)( __X),( __M),\
7431 (__v8df) _mm512_setzero_pd (),\
7432 (__mmask8)( __U));\
7433})
7434
7435#define _mm512_permutex_epi64( __X, __I) __extension__ ({ \
7436__builtin_ia32_permdi512_mask ((__v8di)( __X),( __I),\
7437 (__v8di) _mm512_undefined_epi32 (),\
7438 (__mmask8) (-1));\
7439})
7440
7441#define _mm512_mask_permutex_epi64( __W, __M, __X, __I) __extension__ ({ \
7442__builtin_ia32_permdi512_mask ((__v8di)( __X),( __I),\
7443 (__v8di)( __W),\
7444 (__mmask8)( __M));\
7445})
7446
7447#define _mm512_maskz_permutex_epi64( __M, __X, __I) __extension__ ({ \
7448__builtin_ia32_permdi512_mask ((__v8di)( __X),( __I),\
7449 (__v8di) _mm512_setzero_si512 (),\
7450 (__mmask8)( __M));\
7451})
7452
7453static __inline__ __m512d __DEFAULT_FN_ATTRS
7454_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
7455{
7456 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
7457 (__v8di) __X,
7458 (__v8df) _mm512_undefined_pd (),
7459 (__mmask8) -1);
7460}
7461
7462static __inline__ __m512d __DEFAULT_FN_ATTRS
7463_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
7464{
7465 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
7466 (__v8di) __X,
7467 (__v8df) __W,
7468 (__mmask8) __U);
7469}
7470
7471static __inline__ __m512d __DEFAULT_FN_ATTRS
7472_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
7473{
7474 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
7475 (__v8di) __X,
7476 (__v8df) _mm512_setzero_pd (),
7477 (__mmask8) __U);
7478}
7479
7480static __inline__ __m512i __DEFAULT_FN_ATTRS
7481_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
7482{
7483 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
7484 (__v8di) __X,
7485 (__v8di) _mm512_setzero_si512 (),
7486 __M);
7487}
7488
7489static __inline__ __m512i __DEFAULT_FN_ATTRS
7490_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
7491{
7492 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
7493 (__v8di) __X,
7494 (__v8di) _mm512_undefined_epi32 (),
7495 (__mmask8) -1);
7496}
7497
7498static __inline__ __m512i __DEFAULT_FN_ATTRS
7499_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
7500 __m512i __Y)
7501{
7502 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
7503 (__v8di) __X,
7504 (__v8di) __W,
7505 __M);
7506}
7507
7508static __inline__ __m512 __DEFAULT_FN_ATTRS
7509_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
7510{
7511 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
7512 (__v16si) __X,
7513 (__v16sf) _mm512_undefined_ps (),
7514 (__mmask16) -1);
7515}
7516
7517static __inline__ __m512 __DEFAULT_FN_ATTRS
7518_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
7519{
7520 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
7521 (__v16si) __X,
7522 (__v16sf) __W,
7523 (__mmask16) __U);
7524}
7525
7526static __inline__ __m512 __DEFAULT_FN_ATTRS
7527_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
7528{
7529 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
7530 (__v16si) __X,
7531 (__v16sf) _mm512_setzero_ps (),
7532 (__mmask16) __U);
7533}
7534
7535static __inline__ __m512i __DEFAULT_FN_ATTRS
7536_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
7537{
7538 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
7539 (__v16si) __X,
7540 (__v16si) _mm512_setzero_si512 (),
7541 __M);
7542}
7543
7544static __inline__ __m512i __DEFAULT_FN_ATTRS
7545_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
7546{
7547 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
7548 (__v16si) __X,
7549 (__v16si) _mm512_undefined_epi32 (),
7550 (__mmask16) -1);
7551}
7552
7553static __inline__ __m512i __DEFAULT_FN_ATTRS
7554_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
7555 __m512i __Y)
7556{
7557 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
7558 (__v16si) __X,
7559 (__v16si) __W,
7560 __M);
7561}
7562
7563static __inline__ __mmask16 __DEFAULT_FN_ATTRS
7564_mm512_kand (__mmask16 __A, __mmask16 __B)
7565{
7566 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
7567}
7568
7569static __inline__ __mmask16 __DEFAULT_FN_ATTRS
7570_mm512_kandn (__mmask16 __A, __mmask16 __B)
7571{
7572 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
7573}
7574
7575static __inline__ __mmask16 __DEFAULT_FN_ATTRS
7576_mm512_kor (__mmask16 __A, __mmask16 __B)
7577{
7578 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
7579}
7580
7581static __inline__ int __DEFAULT_FN_ATTRS
7582_mm512_kortestc (__mmask16 __A, __mmask16 __B)
7583{
7584 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
7585 (__mmask16) __B);
7586}
7587
7588static __inline__ int __DEFAULT_FN_ATTRS
7589_mm512_kortestz (__mmask16 __A, __mmask16 __B)
7590{
7591 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
7592 (__mmask16) __B);
7593}
7594
7595static __inline__ __mmask16 __DEFAULT_FN_ATTRS
7596_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
7597{
7598 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
7599}
7600
7601static __inline__ __mmask16 __DEFAULT_FN_ATTRS
7602_mm512_kxnor (__mmask16 __A, __mmask16 __B)
7603{
7604 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
7605}
7606
7607static __inline__ __mmask16 __DEFAULT_FN_ATTRS
7608_mm512_kxor (__mmask16 __A, __mmask16 __B)
7609{
7610 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
7611}
7612
7613static __inline__ void __DEFAULT_FN_ATTRS
7614_mm512_stream_si512 (__m512i * __P, __m512i __A)
7615{
7616 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
7617}
7618
7619static __inline__ __m512i __DEFAULT_FN_ATTRS
7620_mm512_stream_load_si512 (void *__P)
7621{
7622 return __builtin_ia32_movntdqa512 ((__v8di *)__P);
7623}
7624
7625static __inline__ void __DEFAULT_FN_ATTRS
7626_mm512_stream_pd (double *__P, __m512d __A)
7627{
7628 __builtin_ia32_movntpd512 (__P, (__v8df) __A);
7629}
7630
7631static __inline__ void __DEFAULT_FN_ATTRS
7632_mm512_stream_ps (float *__P, __m512 __A)
7633{
7634 __builtin_ia32_movntps512 (__P, (__v16sf) __A);
7635}
7636
7637static __inline__ __m512d __DEFAULT_FN_ATTRS
7638_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
7639{
7640 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
7641 (__v8df) __W,
7642 (__mmask8) __U);
7643}
7644
7645static __inline__ __m512d __DEFAULT_FN_ATTRS
7646_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
7647{
7648 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
7649 (__v8df)
7650 _mm512_setzero_pd (),
7651 (__mmask8) __U);
7652}
7653
7654static __inline__ __m512i __DEFAULT_FN_ATTRS
7655_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
7656{
7657 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
7658 (__v8di) __W,
7659 (__mmask8) __U);
7660}
7661
7662static __inline__ __m512i __DEFAULT_FN_ATTRS
7663_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
7664{
7665 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
7666 (__v8di)
7667 _mm512_setzero_si512 (),
7668 (__mmask8) __U);
7669}
7670
7671static __inline__ __m512 __DEFAULT_FN_ATTRS
7672_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
7673{
7674 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
7675 (__v16sf) __W,
7676 (__mmask16) __U);
7677}
7678
7679static __inline__ __m512 __DEFAULT_FN_ATTRS
7680_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
7681{
7682 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
7683 (__v16sf)
7684 _mm512_setzero_ps (),
7685 (__mmask16) __U);
7686}
7687
7688static __inline__ __m512i __DEFAULT_FN_ATTRS
7689_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
7690{
7691 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
7692 (__v16si) __W,
7693 (__mmask16) __U);
7694}
7695
7696static __inline__ __m512i __DEFAULT_FN_ATTRS
7697_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
7698{
7699 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
7700 (__v16si)
7701 _mm512_setzero_si512 (),
7702 (__mmask16) __U);
7703}
7704
7705#define _mm_cmp_round_ss_mask( __X, __Y, __P, __R) __extension__ ({ \
7706__builtin_ia32_cmpss_mask ((__v4sf)( __X),\
7707 (__v4sf)( __Y), __P,\
7708 (__mmask8) -1, __R);\
7709})
7710
7711#define _mm_mask_cmp_round_ss_mask( __M, __X, __Y, __P, __R) __extension__ ({ \
7712__builtin_ia32_cmpss_mask ((__v4sf)( __X),\
7713 (__v4sf)( __Y), __P,\
7714 (__mmask8)( __M), __R);\
7715})
7716
7717#define _mm_cmp_ss_mask( __X, __Y, __P) __extension__ ({ \
7718__builtin_ia32_cmpss_mask ((__v4sf)( __X),\
7719 (__v4sf)( __Y),( __P),\
7720 (__mmask8) -1,\
7721 _MM_FROUND_CUR_DIRECTION);\
7722})
7723
7724#define _mm_mask_cmp_ss_mask( __M, __X, __Y, __P) __extension__ ({ \
7725__builtin_ia32_cmpss_mask ((__v4sf)( __X),\
7726 (__v4sf)( __Y),( __P),\
7727 (__mmask8)( __M),\
7728 _MM_FROUND_CUR_DIRECTION);\
7729})
7730
7731#define _mm_cmp_round_sd_mask( __X, __Y, __P,__R) __extension__ ({ \
7732__builtin_ia32_cmpsd_mask ((__v2df)( __X),\
7733 (__v2df)( __Y), __P,\
7734 (__mmask8) -1, __R);\
7735})
7736
7737#define _mm_mask_cmp_round_sd_mask( __M, __X, __Y, __P, __R) __extension__ ({ \
7738__builtin_ia32_cmpsd_mask ((__v2df)( __X),\
7739 (__v2df)( __Y), __P,\
7740 (__mmask8)( __M), __R);\
7741})
7742
7743#define _mm_cmp_sd_mask( __X, __Y, __P) __extension__ ({ \
7744__builtin_ia32_cmpsd_mask ((__v2df)( __X),\
7745 (__v2df)( __Y),( __P),\
7746 (__mmask8) -1,\
7747 _MM_FROUND_CUR_DIRECTION);\
7748})
7749
7750#define _mm_mask_cmp_sd_mask( __M, __X, __Y, __P) __extension__ ({ \
7751__builtin_ia32_cmpsd_mask ((__v2df)( __X),\
7752 (__v2df)( __Y),( __P),\
7753 (__mmask8)( __M),\
7754 _MM_FROUND_CUR_DIRECTION);\
7755})
7756
7757static __inline__ __m512 __DEFAULT_FN_ATTRS
7758_mm512_movehdup_ps (__m512 __A)
7759{
7760 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7761 (__v16sf)
7762 _mm512_undefined_ps (),
7763 (__mmask16) -1);
7764}
7765
7766static __inline__ __m512 __DEFAULT_FN_ATTRS
7767_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
7768{
7769 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7770 (__v16sf) __W,
7771 (__mmask16) __U);
7772}
7773
7774static __inline__ __m512 __DEFAULT_FN_ATTRS
7775_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
7776{
7777 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7778 (__v16sf)
7779 _mm512_setzero_ps (),
7780 (__mmask16) __U);
7781}
7782
7783static __inline__ __m512 __DEFAULT_FN_ATTRS
7784_mm512_moveldup_ps (__m512 __A)
7785{
7786 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7787 (__v16sf)
7788 _mm512_undefined_ps (),
7789 (__mmask16) -1);
7790}
7791
7792static __inline__ __m512 __DEFAULT_FN_ATTRS
7793_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
7794{
7795 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7796 (__v16sf) __W,
7797 (__mmask16) __U);
7798}
7799
7800static __inline__ __m512 __DEFAULT_FN_ATTRS
7801_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
7802{
7803 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7804 (__v16sf)
7805 _mm512_setzero_ps (),
7806 (__mmask16) __U);
7807}
7808
7809#define _mm512_shuffle_epi32( __A, __I) __extension__ ({ \
7810__builtin_ia32_pshufd512_mask ((__v16si)( __A),\
7811 ( __I),\
7812 (__v16si) _mm512_undefined_epi32 (),\
7813 (__mmask16) -1);\
7814})
7815
7816#define _mm512_mask_shuffle_epi32( __W, __U, __A, __I) __extension__ ({ \
7817__builtin_ia32_pshufd512_mask ((__v16si)( __A),\
7818 ( __I),\
7819 (__v16si)( __W),\
7820 (__mmask16)( __U));\
7821})
7822
7823#define _mm512_maskz_shuffle_epi32( __U, __A, __I) __extension__ ({ \
7824__builtin_ia32_pshufd512_mask ((__v16si)( __A),\
7825 ( __I),\
7826 (__v16si) _mm512_setzero_si512 (),\
7827 (__mmask16)( __U));\
7828})
7829
7830static __inline__ __m512d __DEFAULT_FN_ATTRS
7831_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
7832{
7833 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
7834 (__v8df) __W,
7835 (__mmask8) __U);
7836}
7837
7838static __inline__ __m512d __DEFAULT_FN_ATTRS
7839_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
7840{
7841 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
7842 (__v8df) _mm512_setzero_pd (),
7843 (__mmask8) __U);
7844}
7845
7846static __inline__ __m512i __DEFAULT_FN_ATTRS
7847_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
7848{
7849 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
7850 (__v8di) __W,
7851 (__mmask8) __U);
7852}
7853
7854static __inline__ __m512i __DEFAULT_FN_ATTRS
7855_mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A)
7856{
7857 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
7858 (__v8di) _mm512_setzero_pd (),
7859 (__mmask8) __U);
7860}
7861
7862static __inline__ __m512d __DEFAULT_FN_ATTRS
7863_mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
7864{
7865 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
7866 (__v8df) __W,
7867 (__mmask8) __U);
7868}
7869
7870static __inline__ __m512d __DEFAULT_FN_ATTRS
7871_mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
7872{
7873 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
7874 (__v8df) _mm512_setzero_pd(),
7875 (__mmask8) __U);
7876}
7877
7878static __inline__ __m512i __DEFAULT_FN_ATTRS
7879_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
7880{
7881 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
7882 (__v8di) __W,
7883 (__mmask8) __U);
7884}
7885
7886static __inline__ __m512i __DEFAULT_FN_ATTRS
7887_mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
7888{
7889 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
7890 (__v8di) _mm512_setzero_pd(),
7891 (__mmask8) __U);
7892}
7893
7894static __inline__ __m512 __DEFAULT_FN_ATTRS
7895_mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
7896{
7897 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
7898 (__v16sf) __W,
7899 (__mmask16) __U);
7900}
7901
7902static __inline__ __m512 __DEFAULT_FN_ATTRS
7903_mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
7904{
7905 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
7906 (__v16sf) _mm512_setzero_ps(),
7907 (__mmask16) __U);
7908}
7909
7910static __inline__ __m512i __DEFAULT_FN_ATTRS
7911_mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
7912{
7913 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
7914 (__v16si) __W,
7915 (__mmask16) __U);
7916}
7917
7918static __inline__ __m512i __DEFAULT_FN_ATTRS
7919_mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
7920{
7921 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
7922 (__v16si) _mm512_setzero_ps(),
7923 (__mmask16) __U);
7924}
7925
7926static __inline__ __m512 __DEFAULT_FN_ATTRS
7927_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
7928{
7929 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
7930 (__v16sf) __W,
7931 (__mmask16) __U);
7932}
7933
7934static __inline__ __m512 __DEFAULT_FN_ATTRS
7935_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
7936{
7937 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
7938 (__v16sf) _mm512_setzero_ps(),
7939 (__mmask16) __U);
7940}
7941
7942static __inline__ __m512i __DEFAULT_FN_ATTRS
7943_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
7944{
7945 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
7946 (__v16si) __W,
7947 (__mmask16) __U);
7948}
7949
7950static __inline__ __m512i __DEFAULT_FN_ATTRS
7951_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
7952{
7953 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
7954 (__v16si) _mm512_setzero_ps(),
7955 (__mmask16) __U);
7956}
7957
7958#define _mm512_cvt_roundps_pd( __A, __R) __extension__ ({ \
7959__builtin_ia32_cvtps2pd512_mask ((__v8sf)( __A),\
7960 (__v8df)\
7961 _mm512_undefined_pd (),\
7962 (__mmask8) -1,( __R));\
7963})
7964
7965#define _mm512_mask_cvt_roundps_pd( __W, __U, __A, __R) __extension__ ({ \
7966__builtin_ia32_cvtps2pd512_mask ((__v8sf)( __A),\
7967 (__v8df)( __W),\
7968 (__mmask8)( __U),( __R));\
7969})
7970
7971#define _mm512_maskz_cvt_roundps_pd( __U, __A, __R) __extension__ ({ \
7972__builtin_ia32_cvtps2pd512_mask ((__v8sf)( __A),\
7973 (__v8df)\
7974 _mm512_setzero_pd (),\
7975 (__mmask8)( __U),( __R));\
7976})
7977
7978static __inline__ __m512d __DEFAULT_FN_ATTRS
7979_mm512_cvtps_pd (__m256 __A)
7980{
7981 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7982 (__v8df)
7983 _mm512_undefined_pd (),
7984 (__mmask8) -1,
7985 _MM_FROUND_CUR_DIRECTION);
7986}
7987
7988static __inline__ __m512d __DEFAULT_FN_ATTRS
7989_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
7990{
7991 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7992 (__v8df) __W,
7993 (__mmask8) __U,
7994 _MM_FROUND_CUR_DIRECTION);
7995}
7996
7997static __inline__ __m512d __DEFAULT_FN_ATTRS
7998_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
7999{
8000 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8001 (__v8df)
8002 _mm512_setzero_pd (),
8003 (__mmask8) __U,
8004 _MM_FROUND_CUR_DIRECTION);
8005}
8006
8007static __inline__ __m512d __DEFAULT_FN_ATTRS
8008_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
8009{
8010 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
8011 (__v8df) __W,
8012 (__mmask8) __U);
8013}
8014
8015static __inline__ __m512d __DEFAULT_FN_ATTRS
8016_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
8017{
8018 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
8019 (__v8df)
8020 _mm512_setzero_pd (),
8021 (__mmask8) __U);
8022}
8023
8024static __inline__ __m512 __DEFAULT_FN_ATTRS
8025_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
8026{
8027 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
8028 (__v16sf) __W,
8029 (__mmask16) __U);
8030}
8031
8032static __inline__ __m512 __DEFAULT_FN_ATTRS
8033_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
8034{
8035 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
8036 (__v16sf)
8037 _mm512_setzero_ps (),
8038 (__mmask16) __U);
8039}
8040
8041static __inline__ void __DEFAULT_FN_ATTRS
8042_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
8043{
8044 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
8045 (__mmask8) __U);
8046}
8047
8048static __inline__ void __DEFAULT_FN_ATTRS
8049_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
8050{
8051 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
8052 (__mmask8) __U);
8053}
8054
8055static __inline__ void __DEFAULT_FN_ATTRS
8056_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
8057{
8058 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
8059 (__mmask16) __U);
8060}
8061
8062static __inline__ void __DEFAULT_FN_ATTRS
8063_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
8064{
8065 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
8066 (__mmask16) __U);
8067}
8068
8069#define _mm_cvt_roundsd_ss( __A, __B, __R) __extension__ ({ \
8070__builtin_ia32_cvtsd2ss_round_mask ((__v4sf)( __A),\
8071 (__v2df)( __B),\
8072 (__v4sf) _mm_undefined_ps (),\
8073 (__mmask8) -1,\
8074 ( __R));\
8075})
8076
8077#define _mm_mask_cvt_roundsd_ss( __W, __U, __A, __B, __R) __extension__ ({ \
8078__builtin_ia32_cvtsd2ss_round_mask ((__v4sf)( __A),\
8079 (__v2df)( __B),\
8080 (__v4sf) __W,\
8081 (__mmask8) __U,\
8082 ( __R));\
8083})
8084
8085#define _mm_maskz_cvt_roundsd_ss( __U, __A, __B, __R) __extension__ ({ \
8086__builtin_ia32_cvtsd2ss_round_mask ((__v4sf)( __A),\
8087 (__v2df)( __B),\
8088 (__v4sf) _mm_setzero_ps (),\
8089 (__mmask8) __U,\
8090 ( __R));\
8091})
8092
8093#define _mm_cvt_roundi64_sd( __A, __B, __R) __extension__ ({ \
8094__builtin_ia32_cvtsi2sd64 ((__v2df)( __A),( __B),( __R));\
8095})
8096
8097#define _mm_cvt_roundsi64_sd( __A, __B, __R) __extension__ ({ \
8098__builtin_ia32_cvtsi2sd64 ((__v2df)( __A),( __B),( __R));\
8099})
8100
8101#define _mm_cvt_roundsi32_ss( __A, __B, __R) __extension__ ({ \
8102__builtin_ia32_cvtsi2ss32 ((__v4sf)( __A),( __B),( __R));\
8103})
8104
8105#define _mm_cvt_roundi32_ss( __A, __B, __R) __extension__ ({ \
8106__builtin_ia32_cvtsi2ss32 ((__v4sf)( __A),( __B),( __R));\
8107})
8108
8109#define _mm_cvt_roundsi64_ss( __A, __B, __R) __extension__ ({ \
8110__builtin_ia32_cvtsi2ss64 ((__v4sf)( __A),( __B),( __R));\
8111})
8112
8113#define _mm_cvt_roundi64_ss( __A, __B, __R) __extension__ ({ \
8114__builtin_ia32_cvtsi2ss64 ((__v4sf)( __A),( __B),( __R));\
8115})
8116
8117#define _mm_cvt_roundss_sd( __A, __B, __R) __extension__ ({ \
8118__builtin_ia32_cvtss2sd_round_mask ((__v2df)( __A),\
8119 (__v4sf)( __B),\
8120 (__v2df) _mm_undefined_pd (),\
8121 (__mmask8)-1,\
8122 ( __R));\
8123})
8124
8125#define _mm_mask_cvt_roundss_sd(__W, __U,__A, __B, __R) __extension__ ({ \
8126__builtin_ia32_cvtss2sd_round_mask ((__v2df)( __A),\
8127 (__v4sf)( __B),\
8128 (__v2df) __W,\
8129 (__mmask8) __U,\
8130 ( __R));\
8131})
8132
8133#define _mm_maskz_cvt_roundss_sd( __U,__A, __B, __R) __extension__ ({ \
8134__builtin_ia32_cvtss2sd_round_mask ((__v2df)( __A),\
8135 (__v4sf)( __B),\
8136 (__v2df) _mm_setzero_pd(),\
8137 (__mmask8) __U,\
8138 ( __R));\
8139})
8140
8141static __inline__ __m128d __DEFAULT_FN_ATTRS
8142_mm_cvtu32_sd (__m128d __A, unsigned __B)
8143{
8144 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
8145}
8146
8147#define _mm_cvt_roundu64_sd( __A, __B, __R) __extension__ ({ \
8148__builtin_ia32_cvtusi2sd64 ((__v2df)( __A),( __B),( __R));\
8149})
8150
8151static __inline__ __m128d __DEFAULT_FN_ATTRS
8152_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
8153{
8154 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
8155 _MM_FROUND_CUR_DIRECTION);
8156}
8157
8158#define _mm_cvt_roundu32_ss( __A, __B, __R) __extension__ ({ \
8159__builtin_ia32_cvtusi2ss32 ((__v4sf)( __A),( __B),( __R));\
8160})
8161
8162static __inline__ __m128 __DEFAULT_FN_ATTRS
8163_mm_cvtu32_ss (__m128 __A, unsigned __B)
8164{
8165 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
8166 _MM_FROUND_CUR_DIRECTION);
8167}
8168
8169#define _mm_cvt_roundu64_ss( __A, __B, __R) __extension__ ({ \
8170__builtin_ia32_cvtusi2ss64 ((__v4sf)( __A),( __B),( __R));\
8171})
8172
8173static __inline__ __m128 __DEFAULT_FN_ATTRS
8174_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
8175{
8176 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
8177 _MM_FROUND_CUR_DIRECTION);
8178}
Ben Murdoch097c5b22016-05-18 11:27:45 +01008179
8180#undef __DEFAULT_FN_ATTRS
8181
8182#endif // __AVX512FINTRIN_H