blob: 340847dba81d5a90ba33e22dd049c7e2986c6c5c [file] [log] [blame]
Adam Nemet9a3ea602014-07-28 17:14:38 +00001/*===---- avx512fintrin.h - AVX2 intrinsics --------------------------------===
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00002 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23#ifndef __IMMINTRIN_H
24#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
25#endif
26
27#ifndef __AVX512FINTRIN_H
28#define __AVX512FINTRIN_H
29
30typedef double __v8df __attribute__((__vector_size__(64)));
31typedef float __v16sf __attribute__((__vector_size__(64)));
32typedef long long __v8di __attribute__((__vector_size__(64)));
33typedef int __v16si __attribute__((__vector_size__(64)));
34
35typedef float __m512 __attribute__((__vector_size__(64)));
36typedef double __m512d __attribute__((__vector_size__(64)));
37typedef long long __m512i __attribute__((__vector_size__(64)));
38
39typedef unsigned char __mmask8;
40typedef unsigned short __mmask16;
41
42/* Rounding mode macros. */
43#define _MM_FROUND_TO_NEAREST_INT 0x00
44#define _MM_FROUND_TO_NEG_INF 0x01
45#define _MM_FROUND_TO_POS_INF 0x02
46#define _MM_FROUND_TO_ZERO 0x03
47#define _MM_FROUND_CUR_DIRECTION 0x04
48
49static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +000050_mm512_setzero_ps(void)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000051{
52 return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
53 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
54}
55static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +000056_mm512_setzero_pd(void)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000057{
58 return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
59}
60static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +000061_mm512_setzero_si512(void)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000062{
63 return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
64}
65
66static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
67_mm512_sqrt_pd(__m512d a)
68{
69 return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)a,
70 (__v8df) _mm512_setzero_pd (),
71 (__mmask8) -1,
72 _MM_FROUND_CUR_DIRECTION);
73}
74
75static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
76_mm512_sqrt_ps(__m512 a)
77{
78 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)a,
79 (__v16sf) _mm512_setzero_ps (),
80 (__mmask16) -1,
81 _MM_FROUND_CUR_DIRECTION);
82}
83
84// rsqrt14
85static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
86_mm512_rsqrt14_pd(__m512d __A)
87{
88 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
89 (__v8df)
90 _mm512_setzero_pd (),
91 (__mmask8) -1);}
92
93static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
94_mm512_rsqrt14_ps(__m512 __A)
95{
96 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
97 (__v16sf)
98 _mm512_setzero_ps (),
99 (__mmask16) -1);
100}
101
102static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
103_mm_rsqrt14_ss(__m128 __A, __m128 __B)
104{
105 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
106 (__v4sf) __B,
107 (__v4sf)
108 _mm_setzero_ps (),
109 (__mmask8) -1);
110}
111
112static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
113_mm_rsqrt14_sd(__m128d __A, __m128d __B)
114{
115 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
116 (__v2df) __B,
117 (__v2df)
118 _mm_setzero_pd (),
119 (__mmask8) -1);
120}
121
122// rcp14
123static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
124_mm512_rcp14_pd(__m512d __A)
125{
126 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
127 (__v8df)
128 _mm512_setzero_pd (),
129 (__mmask8) -1);
130}
131
132static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
133_mm512_rcp14_ps(__m512 __A)
134{
135 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
136 (__v16sf)
137 _mm512_setzero_ps (),
138 (__mmask16) -1);
139}
140static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000141_mm_rcp14_ss(__m128 __A, __m128 __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000142{
143 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
144 (__v4sf) __B,
145 (__v4sf)
146 _mm_setzero_ps (),
147 (__mmask8) -1);
148}
149
150static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000151_mm_rcp14_sd(__m128d __A, __m128d __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000152{
153 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
154 (__v2df) __B,
155 (__v2df)
156 _mm_setzero_pd (),
157 (__mmask8) -1);
158}
159
160// min/max
161static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
162_mm512_max_pd(__m512d __A, __m512d __B)
163{
164 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
165 (__v8df) __B,
166 (__v8df)
167 _mm512_setzero_pd (),
168 (__mmask8) -1,
169 _MM_FROUND_CUR_DIRECTION);
170}
171
172static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
173_mm512_max_ps(__m512 __A, __m512 __B)
174{
175 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
176 (__v16sf) __B,
177 (__v16sf)
178 _mm512_setzero_ps (),
179 (__mmask16) -1,
180 _MM_FROUND_CUR_DIRECTION);
181}
182static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
183_mm512_min_pd(__m512d __A, __m512d __B)
184{
185 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
186 (__v8df) __B,
187 (__v8df)
188 _mm512_setzero_pd (),
189 (__mmask8) -1,
190 _MM_FROUND_CUR_DIRECTION);
191}
192
193static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
194_mm512_min_ps(__m512 __A, __m512 __B)
195{
196 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
197 (__v16sf) __B,
198 (__v16sf)
199 _mm512_setzero_ps (),
200 (__mmask16) -1,
201 _MM_FROUND_CUR_DIRECTION);
202}
203
204static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000205_mm512_cvtps_ph(__m512 __A, const int __I)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000206{
207 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
208 __I,
209 (__v16hi)
210 _mm256_setzero_si256 (),
211 -1);
212}
213
214static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000215_mm512_cvtph_ps(__m256i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000216{
217 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
218 (__v16sf)
219 _mm512_setzero_ps (),
220 (__mmask16) -1,
221 _MM_FROUND_CUR_DIRECTION);
222}
223
224static __inline __m512i __attribute__((__always_inline__, __nodebug__))
225_mm512_cvttps_epi32(__m512 a)
226{
227 return (__m512i)
228 __builtin_ia32_cvttps2dq512_mask((__v16sf) a,
229 (__v16si) _mm512_setzero_si512 (),
230 (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
231}
232
233static __inline __m256i __attribute__((__always_inline__, __nodebug__))
234_mm512_cvttpd_epi32(__m512d a)
235{
236 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) a,
237 (__v8si)_mm256_setzero_si256(),
238 (__mmask8) -1,
239 _MM_FROUND_CUR_DIRECTION);
240}
241
242static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000243_mm512_cvtt_roundpd_epi32(__m512d __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000244{
245 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
246 (__v8si)
247 _mm256_setzero_si256 (),
248 (__mmask8) -1,
249 __R);
250}
251static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000252_mm512_cvtt_roundps_epi32(__m512 __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000253{
254 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
255 (__v16si)
256 _mm512_setzero_si512 (),
257 (__mmask16) -1,
258 __R);
259}
260
261static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000262_mm512_cvt_roundps_epi32(__m512 __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000263{
264 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
265 (__v16si)
266 _mm512_setzero_si512 (),
267 (__mmask16) -1,
268 __R);
269}
270static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000271_mm512_cvt_roundpd_epi32(__m512d __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000272{
273 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
274 (__v8si)
275 _mm256_setzero_si256 (),
276 (__mmask8) -1,
277 __R);
278}
279static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000280_mm512_cvt_roundps_epu32(__m512 __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000281{
282 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
283 (__v16si)
284 _mm512_setzero_si512 (),
285 (__mmask16) -1,
286 __R);
287}
288static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000289_mm512_cvt_roundpd_epu32(__m512d __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000290{
291 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
292 (__v8si)
293 _mm256_setzero_si256 (),
294 (__mmask8) -1,
295 __R);
296}
297
298static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000299_mm512_roundscale_ps(__m512 __A, const int __imm)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000300{
301 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
302 (__v16sf) __A, -1,
303 _MM_FROUND_CUR_DIRECTION);
304}
305static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000306_mm512_roundscale_pd(__m512d __A, const int __imm)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000307{
308 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
309 (__v8df) __A, -1,
310 _MM_FROUND_CUR_DIRECTION);
311}
312
313static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000314_mm512_cmp_ps_mask(__m512 a, __m512 b, const int p)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000315{
316 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) a,
317 (__v16sf) b, p, (__mmask16) -1,
318 _MM_FROUND_CUR_DIRECTION);
319}
320
321static __inline __mmask8 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000322_mm512_cmp_pd_mask(__m512d __X, __m512d __Y, const int __P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000323{
324 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
325 (__v8df) __Y, __P,
326 (__mmask8) -1,
327 _MM_FROUND_CUR_DIRECTION);
328}
329
330static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000331_mm512_cvttps_epu32(__m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000332{
333 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
334 (__v16si)
335 _mm512_setzero_si512 (),
336 (__mmask16) -1,
337 _MM_FROUND_CUR_DIRECTION);
338}
339
340static __inline __m512 __attribute__ (( __always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000341_mm512_cvt_roundepi32_ps(__m512i __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000342{
343 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
344 (__v16sf)
345 _mm512_setzero_ps (),
346 (__mmask16) -1,
347 __R);
348}
349
350static __inline __m512 __attribute__ (( __always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000351_mm512_cvt_roundepu32_ps(__m512i __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000352{
353 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
354 (__v16sf)
355 _mm512_setzero_ps (),
356 (__mmask16) -1,
357 __R);
358}
359
360static __inline __m512d __attribute__ (( __always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000361_mm512_cvtepi32_pd(__m256i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000362{
363 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
364 (__v8df)
365 _mm512_setzero_pd (),
366 (__mmask8) -1);
367}
368
369static __inline __m512d __attribute__ (( __always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000370_mm512_cvtepu32_pd(__m256i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000371{
372 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
373 (__v8df)
374 _mm512_setzero_pd (),
375 (__mmask8) -1);
376}
377static __inline __m256 __attribute__ (( __always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000378_mm512_cvt_roundpd_ps(__m512d __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000379{
380 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
381 (__v8sf)
382 _mm256_setzero_ps (),
383 (__mmask8) -1,
384 __R);
385}
386
387static __inline __m512i __attribute__ (( __always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000388_mm512_abs_epi64(__m512i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000389{
390 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
391 (__v8di)
392 _mm512_setzero_si512 (),
393 (__mmask8) -1);
394}
395
396static __inline __m512i __attribute__ (( __always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000397_mm512_abs_epi32(__m512i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000398{
399 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
400 (__v16si)
401 _mm512_setzero_si512 (),
402 (__mmask16) -1);
403}
404
405static __inline __m512i
406__attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000407_mm512_max_epi32(__m512i __A, __m512i __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000408{
409 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
410 (__v16si) __B,
411 (__v16si)
412 _mm512_setzero_si512 (),
413 (__mmask16) -1);
414}
415
416static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000417_mm512_max_epu32(__m512i __A, __m512i __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000418{
419 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
420 (__v16si) __B,
421 (__v16si)
422 _mm512_setzero_si512 (),
423 (__mmask16) -1);
424}
425
426static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000427_mm512_max_epi64(__m512i __A, __m512i __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000428{
429 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
430 (__v8di) __B,
431 (__v8di)
432 _mm512_setzero_si512 (),
433 (__mmask8) -1);
434}
435
436static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000437_mm512_max_epu64(__m512i __A, __m512i __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000438{
439 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
440 (__v8di) __B,
441 (__v8di)
442 _mm512_setzero_si512 (),
443 (__mmask8) -1);
444}
445static __inline __m512i
446__attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000447_mm512_min_epi32(__m512i __A, __m512i __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000448{
449 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
450 (__v16si) __B,
451 (__v16si)
452 _mm512_setzero_si512 (),
453 (__mmask16) -1);
454}
455
456static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000457_mm512_min_epu32(__m512i __A, __m512i __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000458{
459 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
460 (__v16si) __B,
461 (__v16si)
462 _mm512_setzero_si512 (),
463 (__mmask16) -1);
464}
465
466static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000467_mm512_min_epi64(__m512i __A, __m512i __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000468{
469 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
470 (__v8di) __B,
471 (__v8di)
472 _mm512_setzero_si512 (),
473 (__mmask8) -1);
474}
475
476static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000477_mm512_min_epu64(__m512i __A, __m512i __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000478{
479 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
480 (__v8di) __B,
481 (__v8di)
482 _mm512_setzero_si512 (),
483 (__mmask8) -1);
484}
485
486static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000487_mm512_mul_epi32(__m512i __X, __m512i __Y)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000488{
489 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
490 (__v16si) __Y,
491 (__v8di)
492 _mm512_setzero_si512 (),
493 (__mmask8) -1);
494}
495
496static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000497_mm512_mul_epu32(__m512i __X, __m512i __Y)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000498{
499 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
500 (__v16si) __Y,
501 (__v8di)
502 _mm512_setzero_si512 (),
503 (__mmask8) -1);
504}
505
506static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000507_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000508{
509 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
510 (__v8di) __W,
511 (__mmask8) __U);
512}
513
514static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000515_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000516{
517 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
518 (__v16si) __W,
519 (__mmask16) __U);
520}
521
522static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000523_mm512_test_epi32_mask(__m512i __A, __m512i __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000524{
525 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
526 (__v16si) __B,
527 (__mmask16) -1);
528}
529
530static __inline __mmask8 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000531_mm512_test_epi64_mask(__m512i __A, __m512i __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000532{
533 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
534 (__v8di) __B,
535 (__mmask8) -1);
536}
537
538static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000539_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000540{
541 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
542 (__v8df) __W,
543 (__mmask8) __U);
544}
545
546static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000547_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000548{
549 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
550 (__v16sf) __W,
551 (__mmask16) __U);
552}
553
554static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000555_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000556{
557 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
558 (__v16si)
559 _mm512_setzero_si512 (),
560 __M);
561}
562
563static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000564_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000565{
566#ifdef __x86_64__
567 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
568 (__v8di)
569 _mm512_setzero_si512 (),
570 __M);
571#else
572 return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
573 (__v8di)
574 _mm512_setzero_si512 (),
575 __M);
576#endif
577}
578
579static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000580_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000581{
582 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P,
583 (__v16si)
584 _mm512_setzero_si512 (),
585 (__mmask16) __U);
586}
587
588static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000589_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000590{
591 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P,
592 (__v8di)
593 _mm512_setzero_si512 (),
594 (__mmask8) __U);
595}
596
597static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000598_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000599{
600 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P,
601 (__v16sf)
602 _mm512_setzero_ps (),
603 (__mmask16) __U);
604}
605
606static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000607_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000608{
609 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P,
610 (__v8df)
611 _mm512_setzero_pd (),
612 (__mmask8) __U);
613}
614
615static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000616_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000617{
618 __builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A,
619 (__mmask8) __U);
620}
621
622static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000623_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000624{
625 __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A,
626 (__mmask16) __U);
627}
628
629static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000630_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000631{
632 __builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
633}
634
635static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000636_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000637{
638 __builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A,
639 (__mmask16) __U);
640}
641
642static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000643_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000644{
645 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
646 /* idx */ ,
647 (__v16si) __A,
648 (__v16si) __B,
649 (__mmask16) -1);
650}
651static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000652_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000653{
654 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
655 /* idx */ ,
656 (__v8di) __A,
657 (__v8di) __B,
658 (__mmask8) -1);
659}
660
661static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000662_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000663{
664 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
665 /* idx */ ,
666 (__v8df) __A,
667 (__v8df) __B,
668 (__mmask8) -1);
669}
670static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000671_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000672{
673 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
674 /* idx */ ,
675 (__v16sf) __A,
676 (__v16sf) __B,
677 (__mmask16) -1);
678}
679
680static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000681_mm512_floor_ps(__m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000682{
683 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
684 _MM_FROUND_FLOOR,
685 (__v16sf) __A, -1,
686 _MM_FROUND_CUR_DIRECTION);
687}
688
689static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000690_mm512_floor_pd(__m512d __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000691{
692 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
693 _MM_FROUND_FLOOR,
694 (__v8df) __A, -1,
695 _MM_FROUND_CUR_DIRECTION);
696}
697
698static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000699_mm512_ceil_ps(__m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000700{
701 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
702 _MM_FROUND_CEIL,
703 (__v16sf) __A, -1,
704 _MM_FROUND_CUR_DIRECTION);
705}
706
707static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000708_mm512_ceil_pd(__m512d __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000709{
710 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
711 _MM_FROUND_CEIL,
712 (__v8df) __A, -1,
713 _MM_FROUND_CUR_DIRECTION);
714}
715
716#endif // __AVX512FINTRIN_H