blob: a336f6d74634ad040d0b497509ee35e490af7785 [file] [log] [blame]
Adam Nemet9a3ea602014-07-28 17:14:38 +00001/*===---- avx512fintrin.h - AVX2 intrinsics --------------------------------===
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00002 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23#ifndef __IMMINTRIN_H
24#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
25#endif
26
27#ifndef __AVX512FINTRIN_H
28#define __AVX512FINTRIN_H
29
30typedef double __v8df __attribute__((__vector_size__(64)));
31typedef float __v16sf __attribute__((__vector_size__(64)));
32typedef long long __v8di __attribute__((__vector_size__(64)));
33typedef int __v16si __attribute__((__vector_size__(64)));
34
35typedef float __m512 __attribute__((__vector_size__(64)));
36typedef double __m512d __attribute__((__vector_size__(64)));
37typedef long long __m512i __attribute__((__vector_size__(64)));
38
39typedef unsigned char __mmask8;
40typedef unsigned short __mmask16;
41
42/* Rounding mode macros. */
43#define _MM_FROUND_TO_NEAREST_INT 0x00
44#define _MM_FROUND_TO_NEG_INF 0x01
45#define _MM_FROUND_TO_POS_INF 0x02
46#define _MM_FROUND_TO_ZERO 0x03
47#define _MM_FROUND_CUR_DIRECTION 0x04
48
Adam Nemet0d5bb552014-07-28 17:14:40 +000049/* Create vectors with repeated elements */
50
51static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
52_mm512_setzero_si512(void)
53{
54 return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
55}
56
57static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
58_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
59{
60 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
61 (__v16si)
62 _mm512_setzero_si512 (),
63 __M);
64}
65
66static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
67_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
68{
69#ifdef __x86_64__
70 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
71 (__v8di)
72 _mm512_setzero_si512 (),
73 __M);
74#else
75 return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
76 (__v8di)
77 _mm512_setzero_si512 (),
78 __M);
79#endif
80}
81
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000082static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +000083_mm512_setzero_ps(void)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000084{
85 return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
86 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
87}
88static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +000089_mm512_setzero_pd(void)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000090{
91 return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
92}
Adam Nemet0d5bb552014-07-28 17:14:40 +000093
Adam Nemetf42e7a22014-07-30 16:51:22 +000094static __inline __m512 __attribute__((__always_inline__, __nodebug__))
95_mm512_set1_ps(float __w)
96{
97 return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
98 __w, __w, __w, __w, __w, __w, __w, __w };
99}
100
101static __inline __m512d __attribute__((__always_inline__, __nodebug__))
102_mm512_set1_pd(double __w)
103{
104 return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
105}
106
107static __inline __m512i __attribute__((__always_inline__, __nodebug__))
108_mm512_set1_epi32(int __s)
109{
110 return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
111 __s, __s, __s, __s, __s, __s, __s, __s };
112}
113
114static __inline __m512i __attribute__((__always_inline__, __nodebug__))
115_mm512_set1_epi64(long long __d)
116{
117 return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
118}
119
Adam Nemet0d5bb552014-07-28 17:14:40 +0000120/* Arithmetic */
121
Adam Nemeta3ebe622014-07-28 17:14:42 +0000122static __inline __m512d __attribute__((__always_inline__, __nodebug__))
123_mm512_add_pd(__m512d __a, __m512d __b)
124{
125 return __a + __b;
126}
127
128static __inline __m512 __attribute__((__always_inline__, __nodebug__))
129_mm512_add_ps(__m512 __a, __m512 __b)
130{
131 return __a + __b;
132}
133
134static __inline __m512d __attribute__((__always_inline__, __nodebug__))
135_mm512_mul_pd(__m512d __a, __m512d __b)
136{
137 return __a * __b;
138}
139
140static __inline __m512 __attribute__((__always_inline__, __nodebug__))
141_mm512_mul_ps(__m512 __a, __m512 __b)
142{
143 return __a * __b;
144}
145
146static __inline __m512d __attribute__((__always_inline__, __nodebug__))
147_mm512_sub_pd(__m512d __a, __m512d __b)
148{
149 return __a - __b;
150}
151
152static __inline __m512 __attribute__((__always_inline__, __nodebug__))
153_mm512_sub_ps(__m512 __a, __m512 __b)
154{
155 return __a - __b;
156}
157
Adam Nemet0d5bb552014-07-28 17:14:40 +0000158static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
159_mm512_max_pd(__m512d __A, __m512d __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000160{
Adam Nemet0d5bb552014-07-28 17:14:40 +0000161 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
162 (__v8df) __B,
163 (__v8df)
164 _mm512_setzero_pd (),
165 (__mmask8) -1,
166 _MM_FROUND_CUR_DIRECTION);
167}
168
169static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
170_mm512_max_ps(__m512 __A, __m512 __B)
171{
172 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
173 (__v16sf) __B,
174 (__v16sf)
175 _mm512_setzero_ps (),
176 (__mmask16) -1,
177 _MM_FROUND_CUR_DIRECTION);
178}
179
180static __inline __m512i
181__attribute__ ((__always_inline__, __nodebug__))
182_mm512_max_epi32(__m512i __A, __m512i __B)
183{
184 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
185 (__v16si) __B,
186 (__v16si)
187 _mm512_setzero_si512 (),
188 (__mmask16) -1);
189}
190
191static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
192_mm512_max_epu32(__m512i __A, __m512i __B)
193{
194 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
195 (__v16si) __B,
196 (__v16si)
197 _mm512_setzero_si512 (),
198 (__mmask16) -1);
199}
200
201static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
202_mm512_max_epi64(__m512i __A, __m512i __B)
203{
204 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
205 (__v8di) __B,
206 (__v8di)
207 _mm512_setzero_si512 (),
208 (__mmask8) -1);
209}
210
211static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
212_mm512_max_epu64(__m512i __A, __m512i __B)
213{
214 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
215 (__v8di) __B,
216 (__v8di)
217 _mm512_setzero_si512 (),
218 (__mmask8) -1);
219}
220
221static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
222_mm512_min_pd(__m512d __A, __m512d __B)
223{
224 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
225 (__v8df) __B,
226 (__v8df)
227 _mm512_setzero_pd (),
228 (__mmask8) -1,
229 _MM_FROUND_CUR_DIRECTION);
230}
231
232static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
233_mm512_min_ps(__m512 __A, __m512 __B)
234{
235 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
236 (__v16sf) __B,
237 (__v16sf)
238 _mm512_setzero_ps (),
239 (__mmask16) -1,
240 _MM_FROUND_CUR_DIRECTION);
241}
242
243static __inline __m512i
244__attribute__ ((__always_inline__, __nodebug__))
245_mm512_min_epi32(__m512i __A, __m512i __B)
246{
247 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
248 (__v16si) __B,
249 (__v16si)
250 _mm512_setzero_si512 (),
251 (__mmask16) -1);
252}
253
254static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
255_mm512_min_epu32(__m512i __A, __m512i __B)
256{
257 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
258 (__v16si) __B,
259 (__v16si)
260 _mm512_setzero_si512 (),
261 (__mmask16) -1);
262}
263
264static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
265_mm512_min_epi64(__m512i __A, __m512i __B)
266{
267 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
268 (__v8di) __B,
269 (__v8di)
270 _mm512_setzero_si512 (),
271 (__mmask8) -1);
272}
273
274static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
275_mm512_min_epu64(__m512i __A, __m512i __B)
276{
277 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
278 (__v8di) __B,
279 (__v8di)
280 _mm512_setzero_si512 (),
281 (__mmask8) -1);
282}
283
284static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
285_mm512_mul_epi32(__m512i __X, __m512i __Y)
286{
287 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
288 (__v16si) __Y,
289 (__v8di)
290 _mm512_setzero_si512 (),
291 (__mmask8) -1);
292}
293
294static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
295_mm512_mul_epu32(__m512i __X, __m512i __Y)
296{
297 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
298 (__v16si) __Y,
299 (__v8di)
300 _mm512_setzero_si512 (),
301 (__mmask8) -1);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000302}
303
304static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
305_mm512_sqrt_pd(__m512d a)
306{
307 return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)a,
308 (__v8df) _mm512_setzero_pd (),
309 (__mmask8) -1,
310 _MM_FROUND_CUR_DIRECTION);
311}
312
313static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
314_mm512_sqrt_ps(__m512 a)
315{
316 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)a,
317 (__v16sf) _mm512_setzero_ps (),
318 (__mmask16) -1,
319 _MM_FROUND_CUR_DIRECTION);
320}
321
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000322static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
323_mm512_rsqrt14_pd(__m512d __A)
324{
325 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
326 (__v8df)
327 _mm512_setzero_pd (),
328 (__mmask8) -1);}
329
330static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
331_mm512_rsqrt14_ps(__m512 __A)
332{
333 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
334 (__v16sf)
335 _mm512_setzero_ps (),
336 (__mmask16) -1);
337}
338
339static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
340_mm_rsqrt14_ss(__m128 __A, __m128 __B)
341{
342 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
343 (__v4sf) __B,
344 (__v4sf)
345 _mm_setzero_ps (),
346 (__mmask8) -1);
347}
348
349static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
350_mm_rsqrt14_sd(__m128d __A, __m128d __B)
351{
352 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
353 (__v2df) __B,
354 (__v2df)
355 _mm_setzero_pd (),
356 (__mmask8) -1);
357}
358
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000359static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
360_mm512_rcp14_pd(__m512d __A)
361{
362 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
363 (__v8df)
364 _mm512_setzero_pd (),
365 (__mmask8) -1);
366}
367
368static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
369_mm512_rcp14_ps(__m512 __A)
370{
371 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
372 (__v16sf)
373 _mm512_setzero_ps (),
374 (__mmask16) -1);
375}
376static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000377_mm_rcp14_ss(__m128 __A, __m128 __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000378{
379 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
380 (__v4sf) __B,
381 (__v4sf)
382 _mm_setzero_ps (),
383 (__mmask8) -1);
384}
385
386static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000387_mm_rcp14_sd(__m128d __A, __m128d __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000388{
389 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
390 (__v2df) __B,
391 (__v2df)
392 _mm_setzero_pd (),
393 (__mmask8) -1);
394}
395
Adam Nemet0d5bb552014-07-28 17:14:40 +0000396static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
397_mm512_floor_ps(__m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000398{
Adam Nemet0d5bb552014-07-28 17:14:40 +0000399 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
400 _MM_FROUND_FLOOR,
401 (__v16sf) __A, -1,
402 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000403}
404
Adam Nemet0d5bb552014-07-28 17:14:40 +0000405static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
406_mm512_floor_pd(__m512d __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000407{
Adam Nemet0d5bb552014-07-28 17:14:40 +0000408 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
409 _MM_FROUND_FLOOR,
410 (__v8df) __A, -1,
411 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000412}
413
Adam Nemet0d5bb552014-07-28 17:14:40 +0000414static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
415_mm512_ceil_ps(__m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000416{
Adam Nemet0d5bb552014-07-28 17:14:40 +0000417 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
418 _MM_FROUND_CEIL,
419 (__v16sf) __A, -1,
420 _MM_FROUND_CUR_DIRECTION);
421}
422
423static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
424_mm512_ceil_pd(__m512d __A)
425{
426 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
427 _MM_FROUND_CEIL,
428 (__v8df) __A, -1,
429 _MM_FROUND_CUR_DIRECTION);
430}
431
432static __inline __m512i __attribute__ (( __always_inline__, __nodebug__))
433_mm512_abs_epi64(__m512i __A)
434{
435 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
436 (__v8di)
437 _mm512_setzero_si512 (),
438 (__mmask8) -1);
439}
440
441static __inline __m512i __attribute__ (( __always_inline__, __nodebug__))
442_mm512_abs_epi32(__m512i __A)
443{
444 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
445 (__v16si)
446 _mm512_setzero_si512 (),
447 (__mmask16) -1);
448}
449
450static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
451_mm512_roundscale_ps(__m512 __A, const int __imm)
452{
453 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
454 (__v16sf) __A, -1,
455 _MM_FROUND_CUR_DIRECTION);
456}
457static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
458_mm512_roundscale_pd(__m512d __A, const int __imm)
459{
460 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
461 (__v8df) __A, -1,
462 _MM_FROUND_CUR_DIRECTION);
463}
464
465/* Vector permutations */
466
467static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
468_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
469{
470 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
471 /* idx */ ,
472 (__v16si) __A,
473 (__v16si) __B,
474 (__mmask16) -1);
475}
476static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
477_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
478{
479 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
480 /* idx */ ,
481 (__v8di) __A,
482 (__v8di) __B,
483 (__mmask8) -1);
484}
485
486static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
487_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
488{
489 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
490 /* idx */ ,
491 (__v8df) __A,
492 (__v8df) __B,
493 (__mmask8) -1);
494}
495static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
496_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
497{
498 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
499 /* idx */ ,
500 (__v16sf) __A,
501 (__v16sf) __B,
502 (__mmask16) -1);
503}
504
505/* Vector Blend */
506
507static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
508_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
509{
510 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
511 (__v8df) __W,
512 (__mmask8) __U);
513}
514
515static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
516_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
517{
518 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
519 (__v16sf) __W,
520 (__mmask16) __U);
521}
522
523static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
524_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
525{
526 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
527 (__v8di) __W,
528 (__mmask8) __U);
529}
530
531static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
532_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
533{
534 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
535 (__v16si) __W,
536 (__mmask16) __U);
537}
538
539/* Compare */
540
541static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
542_mm512_cmp_ps_mask(__m512 a, __m512 b, const int p)
543{
544 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) a,
545 (__v16sf) b, p, (__mmask16) -1,
546 _MM_FROUND_CUR_DIRECTION);
547}
548
549static __inline __mmask8 __attribute__ ((__always_inline__, __nodebug__))
550_mm512_cmp_pd_mask(__m512d __X, __m512d __Y, const int __P)
551{
552 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
553 (__v8df) __Y, __P,
554 (__mmask8) -1,
555 _MM_FROUND_CUR_DIRECTION);
556}
557
558/* Conversion */
559
560static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
561_mm512_cvttps_epu32(__m512 __A)
562{
563 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
564 (__v16si)
565 _mm512_setzero_si512 (),
566 (__mmask16) -1,
567 _MM_FROUND_CUR_DIRECTION);
568}
569
570static __inline __m512 __attribute__ (( __always_inline__, __nodebug__))
571_mm512_cvt_roundepi32_ps(__m512i __A, const int __R)
572{
573 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
574 (__v16sf)
575 _mm512_setzero_ps (),
576 (__mmask16) -1,
577 __R);
578}
579
580static __inline __m512 __attribute__ (( __always_inline__, __nodebug__))
581_mm512_cvt_roundepu32_ps(__m512i __A, const int __R)
582{
583 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
584 (__v16sf)
585 _mm512_setzero_ps (),
586 (__mmask16) -1,
587 __R);
588}
589
590static __inline __m512d __attribute__ (( __always_inline__, __nodebug__))
591_mm512_cvtepi32_pd(__m256i __A)
592{
593 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
594 (__v8df)
595 _mm512_setzero_pd (),
596 (__mmask8) -1);
597}
598
599static __inline __m512d __attribute__ (( __always_inline__, __nodebug__))
600_mm512_cvtepu32_pd(__m256i __A)
601{
602 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
603 (__v8df)
604 _mm512_setzero_pd (),
605 (__mmask8) -1);
606}
607static __inline __m256 __attribute__ (( __always_inline__, __nodebug__))
608_mm512_cvt_roundpd_ps(__m512d __A, const int __R)
609{
610 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
611 (__v8sf)
612 _mm256_setzero_ps (),
613 (__mmask8) -1,
614 __R);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000615}
616
617static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000618_mm512_cvtps_ph(__m512 __A, const int __I)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000619{
620 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
621 __I,
622 (__v16hi)
623 _mm256_setzero_si256 (),
624 -1);
625}
626
627static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000628_mm512_cvtph_ps(__m256i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000629{
630 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
631 (__v16sf)
632 _mm512_setzero_ps (),
633 (__mmask16) -1,
634 _MM_FROUND_CUR_DIRECTION);
635}
636
637static __inline __m512i __attribute__((__always_inline__, __nodebug__))
638_mm512_cvttps_epi32(__m512 a)
639{
640 return (__m512i)
641 __builtin_ia32_cvttps2dq512_mask((__v16sf) a,
642 (__v16si) _mm512_setzero_si512 (),
643 (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
644}
645
646static __inline __m256i __attribute__((__always_inline__, __nodebug__))
647_mm512_cvttpd_epi32(__m512d a)
648{
649 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) a,
650 (__v8si)_mm256_setzero_si256(),
651 (__mmask8) -1,
652 _MM_FROUND_CUR_DIRECTION);
653}
654
655static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000656_mm512_cvtt_roundpd_epi32(__m512d __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000657{
658 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
659 (__v8si)
660 _mm256_setzero_si256 (),
661 (__mmask8) -1,
662 __R);
663}
664static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000665_mm512_cvtt_roundps_epi32(__m512 __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000666{
667 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
668 (__v16si)
669 _mm512_setzero_si512 (),
670 (__mmask16) -1,
671 __R);
672}
673
674static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000675_mm512_cvt_roundps_epi32(__m512 __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000676{
677 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
678 (__v16si)
679 _mm512_setzero_si512 (),
680 (__mmask16) -1,
681 __R);
682}
683static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000684_mm512_cvt_roundpd_epi32(__m512d __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000685{
686 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
687 (__v8si)
688 _mm256_setzero_si256 (),
689 (__mmask8) -1,
690 __R);
691}
692static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000693_mm512_cvt_roundps_epu32(__m512 __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000694{
695 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
696 (__v16si)
697 _mm512_setzero_si512 (),
698 (__mmask16) -1,
699 __R);
700}
701static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000702_mm512_cvt_roundpd_epu32(__m512d __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000703{
704 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
705 (__v8si)
706 _mm256_setzero_si256 (),
707 (__mmask8) -1,
708 __R);
709}
710
Adam Nemet0d5bb552014-07-28 17:14:40 +0000711/* Bit Test */
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000712
713static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000714_mm512_test_epi32_mask(__m512i __A, __m512i __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000715{
716 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
717 (__v16si) __B,
718 (__mmask16) -1);
719}
720
721static __inline __mmask8 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000722_mm512_test_epi64_mask(__m512i __A, __m512i __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000723{
724 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
725 (__v8di) __B,
726 (__mmask8) -1);
727}
728
Adam Nemet0d5bb552014-07-28 17:14:40 +0000729/* SIMD load ops */
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000730
731static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000732_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000733{
734 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P,
735 (__v16si)
736 _mm512_setzero_si512 (),
737 (__mmask16) __U);
738}
739
740static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000741_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000742{
743 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P,
744 (__v8di)
745 _mm512_setzero_si512 (),
746 (__mmask8) __U);
747}
748
749static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000750_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000751{
752 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P,
753 (__v16sf)
754 _mm512_setzero_ps (),
755 (__mmask16) __U);
756}
757
758static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000759_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000760{
761 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P,
762 (__v8df)
763 _mm512_setzero_pd (),
764 (__mmask8) __U);
765}
766
Adam Nemet0d5bb552014-07-28 17:14:40 +0000767/* SIMD store ops */
768
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000769static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000770_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000771{
772 __builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A,
773 (__mmask8) __U);
774}
775
776static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000777_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000778{
779 __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A,
780 (__mmask16) __U);
781}
782
783static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000784_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000785{
786 __builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
787}
788
789static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemetfce1ad02014-07-28 17:14:45 +0000790_mm512_storeu_pd(void *__P, __m512d __A)
791{
792 __builtin_ia32_storeupd512_mask((__v8df *)__P, (__v8df)__A, (__mmask8)-1);
793}
794
795static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000796_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000797{
798 __builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A,
799 (__mmask16) __U);
800}
801
Adam Nemetfce1ad02014-07-28 17:14:45 +0000802static __inline void __attribute__ ((__always_inline__, __nodebug__))
803_mm512_storeu_ps(void *__P, __m512 __A)
804{
805 __builtin_ia32_storeups512_mask((__v16sf *)__P, (__v16sf)__A, (__mmask16)-1);
806}
807
808static __inline void __attribute__ ((__always_inline__, __nodebug__))
809_mm512_store_ps(void *__P, __m512 __A)
810{
811 *(__m512*)__P = __A;
812}
813
814static __inline void __attribute__ ((__always_inline__, __nodebug__))
815_mm512_store_pd(void *__P, __m512d __A)
816{
817 *(__m512d*)__P = __A;
818}
819
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000820#endif // __AVX512FINTRIN_H