blob: c205662c164e26a3025804dc154983efa100b18d [file] [log] [blame]
Adam Nemet9a3ea602014-07-28 17:14:38 +00001/*===---- avx512fintrin.h - AVX2 intrinsics --------------------------------===
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00002 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23#ifndef __IMMINTRIN_H
24#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
25#endif
26
27#ifndef __AVX512FINTRIN_H
28#define __AVX512FINTRIN_H
29
30typedef double __v8df __attribute__((__vector_size__(64)));
31typedef float __v16sf __attribute__((__vector_size__(64)));
32typedef long long __v8di __attribute__((__vector_size__(64)));
33typedef int __v16si __attribute__((__vector_size__(64)));
34
35typedef float __m512 __attribute__((__vector_size__(64)));
36typedef double __m512d __attribute__((__vector_size__(64)));
37typedef long long __m512i __attribute__((__vector_size__(64)));
38
39typedef unsigned char __mmask8;
40typedef unsigned short __mmask16;
41
42/* Rounding mode macros. */
43#define _MM_FROUND_TO_NEAREST_INT 0x00
44#define _MM_FROUND_TO_NEG_INF 0x01
45#define _MM_FROUND_TO_POS_INF 0x02
46#define _MM_FROUND_TO_ZERO 0x03
47#define _MM_FROUND_CUR_DIRECTION 0x04
48
Adam Nemet0d5bb552014-07-28 17:14:40 +000049/* Create vectors with repeated elements */
50
51static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
52_mm512_setzero_si512(void)
53{
54 return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
55}
56
57static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
58_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
59{
60 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
61 (__v16si)
62 _mm512_setzero_si512 (),
63 __M);
64}
65
66static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
67_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
68{
69#ifdef __x86_64__
70 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
71 (__v8di)
72 _mm512_setzero_si512 (),
73 __M);
74#else
75 return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
76 (__v8di)
77 _mm512_setzero_si512 (),
78 __M);
79#endif
80}
81
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000082static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +000083_mm512_setzero_ps(void)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000084{
85 return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
86 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
87}
88static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +000089_mm512_setzero_pd(void)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000090{
91 return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
92}
Adam Nemet0d5bb552014-07-28 17:14:40 +000093
Adam Nemetf42e7a22014-07-30 16:51:22 +000094static __inline __m512 __attribute__((__always_inline__, __nodebug__))
95_mm512_set1_ps(float __w)
96{
97 return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
98 __w, __w, __w, __w, __w, __w, __w, __w };
99}
100
101static __inline __m512d __attribute__((__always_inline__, __nodebug__))
102_mm512_set1_pd(double __w)
103{
104 return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
105}
106
107static __inline __m512i __attribute__((__always_inline__, __nodebug__))
108_mm512_set1_epi32(int __s)
109{
110 return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
111 __s, __s, __s, __s, __s, __s, __s, __s };
112}
113
114static __inline __m512i __attribute__((__always_inline__, __nodebug__))
115_mm512_set1_epi64(long long __d)
116{
117 return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
118}
119
Adam Nemetc871ff92014-07-30 16:51:24 +0000120/* Cast between vector types */
121
122static __inline __m512d __attribute__((__always_inline__, __nodebug__))
123_mm512_castpd256_pd512(__m256d __a)
124{
125 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
126}
127
128static __inline __m512 __attribute__((__always_inline__, __nodebug__))
129_mm512_castps256_ps512(__m256 __a)
130{
131 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7,
132 -1, -1, -1, -1, -1, -1, -1, -1);
133}
134
135static __inline __m128d __attribute__((__always_inline__, __nodebug__))
136_mm512_castpd512_pd128(__m512d __a)
137{
138 return __builtin_shufflevector(__a, __a, 0, 1);
139}
140
141static __inline __m128 __attribute__((__always_inline__, __nodebug__))
142_mm512_castps512_ps128(__m512 __a)
143{
144 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
145}
146
Adam Nemet0d5bb552014-07-28 17:14:40 +0000147/* Arithmetic */
148
Adam Nemeta3ebe622014-07-28 17:14:42 +0000149static __inline __m512d __attribute__((__always_inline__, __nodebug__))
150_mm512_add_pd(__m512d __a, __m512d __b)
151{
152 return __a + __b;
153}
154
155static __inline __m512 __attribute__((__always_inline__, __nodebug__))
156_mm512_add_ps(__m512 __a, __m512 __b)
157{
158 return __a + __b;
159}
160
161static __inline __m512d __attribute__((__always_inline__, __nodebug__))
162_mm512_mul_pd(__m512d __a, __m512d __b)
163{
164 return __a * __b;
165}
166
167static __inline __m512 __attribute__((__always_inline__, __nodebug__))
168_mm512_mul_ps(__m512 __a, __m512 __b)
169{
170 return __a * __b;
171}
172
173static __inline __m512d __attribute__((__always_inline__, __nodebug__))
174_mm512_sub_pd(__m512d __a, __m512d __b)
175{
176 return __a - __b;
177}
178
179static __inline __m512 __attribute__((__always_inline__, __nodebug__))
180_mm512_sub_ps(__m512 __a, __m512 __b)
181{
182 return __a - __b;
183}
184
Adam Nemet0d5bb552014-07-28 17:14:40 +0000185static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
186_mm512_max_pd(__m512d __A, __m512d __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000187{
Adam Nemet0d5bb552014-07-28 17:14:40 +0000188 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
189 (__v8df) __B,
190 (__v8df)
191 _mm512_setzero_pd (),
192 (__mmask8) -1,
193 _MM_FROUND_CUR_DIRECTION);
194}
195
196static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
197_mm512_max_ps(__m512 __A, __m512 __B)
198{
199 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
200 (__v16sf) __B,
201 (__v16sf)
202 _mm512_setzero_ps (),
203 (__mmask16) -1,
204 _MM_FROUND_CUR_DIRECTION);
205}
206
207static __inline __m512i
208__attribute__ ((__always_inline__, __nodebug__))
209_mm512_max_epi32(__m512i __A, __m512i __B)
210{
211 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
212 (__v16si) __B,
213 (__v16si)
214 _mm512_setzero_si512 (),
215 (__mmask16) -1);
216}
217
218static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
219_mm512_max_epu32(__m512i __A, __m512i __B)
220{
221 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
222 (__v16si) __B,
223 (__v16si)
224 _mm512_setzero_si512 (),
225 (__mmask16) -1);
226}
227
228static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
229_mm512_max_epi64(__m512i __A, __m512i __B)
230{
231 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
232 (__v8di) __B,
233 (__v8di)
234 _mm512_setzero_si512 (),
235 (__mmask8) -1);
236}
237
238static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
239_mm512_max_epu64(__m512i __A, __m512i __B)
240{
241 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
242 (__v8di) __B,
243 (__v8di)
244 _mm512_setzero_si512 (),
245 (__mmask8) -1);
246}
247
248static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
249_mm512_min_pd(__m512d __A, __m512d __B)
250{
251 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
252 (__v8df) __B,
253 (__v8df)
254 _mm512_setzero_pd (),
255 (__mmask8) -1,
256 _MM_FROUND_CUR_DIRECTION);
257}
258
259static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
260_mm512_min_ps(__m512 __A, __m512 __B)
261{
262 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
263 (__v16sf) __B,
264 (__v16sf)
265 _mm512_setzero_ps (),
266 (__mmask16) -1,
267 _MM_FROUND_CUR_DIRECTION);
268}
269
270static __inline __m512i
271__attribute__ ((__always_inline__, __nodebug__))
272_mm512_min_epi32(__m512i __A, __m512i __B)
273{
274 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
275 (__v16si) __B,
276 (__v16si)
277 _mm512_setzero_si512 (),
278 (__mmask16) -1);
279}
280
281static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
282_mm512_min_epu32(__m512i __A, __m512i __B)
283{
284 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
285 (__v16si) __B,
286 (__v16si)
287 _mm512_setzero_si512 (),
288 (__mmask16) -1);
289}
290
291static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
292_mm512_min_epi64(__m512i __A, __m512i __B)
293{
294 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
295 (__v8di) __B,
296 (__v8di)
297 _mm512_setzero_si512 (),
298 (__mmask8) -1);
299}
300
301static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
302_mm512_min_epu64(__m512i __A, __m512i __B)
303{
304 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
305 (__v8di) __B,
306 (__v8di)
307 _mm512_setzero_si512 (),
308 (__mmask8) -1);
309}
310
311static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
312_mm512_mul_epi32(__m512i __X, __m512i __Y)
313{
314 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
315 (__v16si) __Y,
316 (__v8di)
317 _mm512_setzero_si512 (),
318 (__mmask8) -1);
319}
320
321static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
322_mm512_mul_epu32(__m512i __X, __m512i __Y)
323{
324 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
325 (__v16si) __Y,
326 (__v8di)
327 _mm512_setzero_si512 (),
328 (__mmask8) -1);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000329}
330
331static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
332_mm512_sqrt_pd(__m512d a)
333{
334 return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)a,
335 (__v8df) _mm512_setzero_pd (),
336 (__mmask8) -1,
337 _MM_FROUND_CUR_DIRECTION);
338}
339
340static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
341_mm512_sqrt_ps(__m512 a)
342{
343 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)a,
344 (__v16sf) _mm512_setzero_ps (),
345 (__mmask16) -1,
346 _MM_FROUND_CUR_DIRECTION);
347}
348
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000349static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
350_mm512_rsqrt14_pd(__m512d __A)
351{
352 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
353 (__v8df)
354 _mm512_setzero_pd (),
355 (__mmask8) -1);}
356
357static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
358_mm512_rsqrt14_ps(__m512 __A)
359{
360 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
361 (__v16sf)
362 _mm512_setzero_ps (),
363 (__mmask16) -1);
364}
365
366static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
367_mm_rsqrt14_ss(__m128 __A, __m128 __B)
368{
369 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
370 (__v4sf) __B,
371 (__v4sf)
372 _mm_setzero_ps (),
373 (__mmask8) -1);
374}
375
376static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
377_mm_rsqrt14_sd(__m128d __A, __m128d __B)
378{
379 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
380 (__v2df) __B,
381 (__v2df)
382 _mm_setzero_pd (),
383 (__mmask8) -1);
384}
385
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000386static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
387_mm512_rcp14_pd(__m512d __A)
388{
389 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
390 (__v8df)
391 _mm512_setzero_pd (),
392 (__mmask8) -1);
393}
394
395static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
396_mm512_rcp14_ps(__m512 __A)
397{
398 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
399 (__v16sf)
400 _mm512_setzero_ps (),
401 (__mmask16) -1);
402}
403static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000404_mm_rcp14_ss(__m128 __A, __m128 __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000405{
406 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
407 (__v4sf) __B,
408 (__v4sf)
409 _mm_setzero_ps (),
410 (__mmask8) -1);
411}
412
413static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000414_mm_rcp14_sd(__m128d __A, __m128d __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000415{
416 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
417 (__v2df) __B,
418 (__v2df)
419 _mm_setzero_pd (),
420 (__mmask8) -1);
421}
422
Adam Nemet0d5bb552014-07-28 17:14:40 +0000423static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
424_mm512_floor_ps(__m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000425{
Adam Nemet0d5bb552014-07-28 17:14:40 +0000426 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
427 _MM_FROUND_FLOOR,
428 (__v16sf) __A, -1,
429 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000430}
431
Adam Nemet0d5bb552014-07-28 17:14:40 +0000432static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
433_mm512_floor_pd(__m512d __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000434{
Adam Nemet0d5bb552014-07-28 17:14:40 +0000435 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
436 _MM_FROUND_FLOOR,
437 (__v8df) __A, -1,
438 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000439}
440
Adam Nemet0d5bb552014-07-28 17:14:40 +0000441static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
442_mm512_ceil_ps(__m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000443{
Adam Nemet0d5bb552014-07-28 17:14:40 +0000444 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
445 _MM_FROUND_CEIL,
446 (__v16sf) __A, -1,
447 _MM_FROUND_CUR_DIRECTION);
448}
449
450static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
451_mm512_ceil_pd(__m512d __A)
452{
453 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
454 _MM_FROUND_CEIL,
455 (__v8df) __A, -1,
456 _MM_FROUND_CUR_DIRECTION);
457}
458
459static __inline __m512i __attribute__ (( __always_inline__, __nodebug__))
460_mm512_abs_epi64(__m512i __A)
461{
462 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
463 (__v8di)
464 _mm512_setzero_si512 (),
465 (__mmask8) -1);
466}
467
468static __inline __m512i __attribute__ (( __always_inline__, __nodebug__))
469_mm512_abs_epi32(__m512i __A)
470{
471 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
472 (__v16si)
473 _mm512_setzero_si512 (),
474 (__mmask16) -1);
475}
476
477static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
478_mm512_roundscale_ps(__m512 __A, const int __imm)
479{
480 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
481 (__v16sf) __A, -1,
482 _MM_FROUND_CUR_DIRECTION);
483}
484static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
485_mm512_roundscale_pd(__m512d __A, const int __imm)
486{
487 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
488 (__v8df) __A, -1,
489 _MM_FROUND_CUR_DIRECTION);
490}
491
492/* Vector permutations */
493
494static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
495_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
496{
497 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
498 /* idx */ ,
499 (__v16si) __A,
500 (__v16si) __B,
501 (__mmask16) -1);
502}
503static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
504_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
505{
506 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
507 /* idx */ ,
508 (__v8di) __A,
509 (__v8di) __B,
510 (__mmask8) -1);
511}
512
513static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
514_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
515{
516 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
517 /* idx */ ,
518 (__v8df) __A,
519 (__v8df) __B,
520 (__mmask8) -1);
521}
522static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
523_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
524{
525 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
526 /* idx */ ,
527 (__v16sf) __A,
528 (__v16sf) __B,
529 (__mmask16) -1);
530}
531
532/* Vector Blend */
533
534static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
535_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
536{
537 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
538 (__v8df) __W,
539 (__mmask8) __U);
540}
541
542static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
543_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
544{
545 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
546 (__v16sf) __W,
547 (__mmask16) __U);
548}
549
550static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
551_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
552{
553 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
554 (__v8di) __W,
555 (__mmask8) __U);
556}
557
558static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
559_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
560{
561 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
562 (__v16si) __W,
563 (__mmask16) __U);
564}
565
566/* Compare */
567
568static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
569_mm512_cmp_ps_mask(__m512 a, __m512 b, const int p)
570{
571 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) a,
572 (__v16sf) b, p, (__mmask16) -1,
573 _MM_FROUND_CUR_DIRECTION);
574}
575
576static __inline __mmask8 __attribute__ ((__always_inline__, __nodebug__))
577_mm512_cmp_pd_mask(__m512d __X, __m512d __Y, const int __P)
578{
579 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
580 (__v8df) __Y, __P,
581 (__mmask8) -1,
582 _MM_FROUND_CUR_DIRECTION);
583}
584
585/* Conversion */
586
587static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
588_mm512_cvttps_epu32(__m512 __A)
589{
590 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
591 (__v16si)
592 _mm512_setzero_si512 (),
593 (__mmask16) -1,
594 _MM_FROUND_CUR_DIRECTION);
595}
596
597static __inline __m512 __attribute__ (( __always_inline__, __nodebug__))
598_mm512_cvt_roundepi32_ps(__m512i __A, const int __R)
599{
600 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
601 (__v16sf)
602 _mm512_setzero_ps (),
603 (__mmask16) -1,
604 __R);
605}
606
607static __inline __m512 __attribute__ (( __always_inline__, __nodebug__))
608_mm512_cvt_roundepu32_ps(__m512i __A, const int __R)
609{
610 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
611 (__v16sf)
612 _mm512_setzero_ps (),
613 (__mmask16) -1,
614 __R);
615}
616
617static __inline __m512d __attribute__ (( __always_inline__, __nodebug__))
618_mm512_cvtepi32_pd(__m256i __A)
619{
620 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
621 (__v8df)
622 _mm512_setzero_pd (),
623 (__mmask8) -1);
624}
625
626static __inline __m512d __attribute__ (( __always_inline__, __nodebug__))
627_mm512_cvtepu32_pd(__m256i __A)
628{
629 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
630 (__v8df)
631 _mm512_setzero_pd (),
632 (__mmask8) -1);
633}
634static __inline __m256 __attribute__ (( __always_inline__, __nodebug__))
635_mm512_cvt_roundpd_ps(__m512d __A, const int __R)
636{
637 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
638 (__v8sf)
639 _mm256_setzero_ps (),
640 (__mmask8) -1,
641 __R);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000642}
643
644static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000645_mm512_cvtps_ph(__m512 __A, const int __I)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000646{
647 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
648 __I,
649 (__v16hi)
650 _mm256_setzero_si256 (),
651 -1);
652}
653
654static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000655_mm512_cvtph_ps(__m256i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000656{
657 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
658 (__v16sf)
659 _mm512_setzero_ps (),
660 (__mmask16) -1,
661 _MM_FROUND_CUR_DIRECTION);
662}
663
664static __inline __m512i __attribute__((__always_inline__, __nodebug__))
665_mm512_cvttps_epi32(__m512 a)
666{
667 return (__m512i)
668 __builtin_ia32_cvttps2dq512_mask((__v16sf) a,
669 (__v16si) _mm512_setzero_si512 (),
670 (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
671}
672
673static __inline __m256i __attribute__((__always_inline__, __nodebug__))
674_mm512_cvttpd_epi32(__m512d a)
675{
676 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) a,
677 (__v8si)_mm256_setzero_si256(),
678 (__mmask8) -1,
679 _MM_FROUND_CUR_DIRECTION);
680}
681
682static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000683_mm512_cvtt_roundpd_epi32(__m512d __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000684{
685 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
686 (__v8si)
687 _mm256_setzero_si256 (),
688 (__mmask8) -1,
689 __R);
690}
691static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000692_mm512_cvtt_roundps_epi32(__m512 __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000693{
694 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
695 (__v16si)
696 _mm512_setzero_si512 (),
697 (__mmask16) -1,
698 __R);
699}
700
701static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000702_mm512_cvt_roundps_epi32(__m512 __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000703{
704 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
705 (__v16si)
706 _mm512_setzero_si512 (),
707 (__mmask16) -1,
708 __R);
709}
710static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000711_mm512_cvt_roundpd_epi32(__m512d __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000712{
713 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
714 (__v8si)
715 _mm256_setzero_si256 (),
716 (__mmask8) -1,
717 __R);
718}
719static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000720_mm512_cvt_roundps_epu32(__m512 __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000721{
722 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
723 (__v16si)
724 _mm512_setzero_si512 (),
725 (__mmask16) -1,
726 __R);
727}
728static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000729_mm512_cvt_roundpd_epu32(__m512d __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000730{
731 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
732 (__v8si)
733 _mm256_setzero_si256 (),
734 (__mmask8) -1,
735 __R);
736}
737
Adam Nemet0d5bb552014-07-28 17:14:40 +0000738/* Bit Test */
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000739
740static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000741_mm512_test_epi32_mask(__m512i __A, __m512i __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000742{
743 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
744 (__v16si) __B,
745 (__mmask16) -1);
746}
747
748static __inline __mmask8 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000749_mm512_test_epi64_mask(__m512i __A, __m512i __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000750{
751 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
752 (__v8di) __B,
753 (__mmask8) -1);
754}
755
Adam Nemet0d5bb552014-07-28 17:14:40 +0000756/* SIMD load ops */
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000757
758static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000759_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000760{
761 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P,
762 (__v16si)
763 _mm512_setzero_si512 (),
764 (__mmask16) __U);
765}
766
767static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000768_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000769{
770 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P,
771 (__v8di)
772 _mm512_setzero_si512 (),
773 (__mmask8) __U);
774}
775
776static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000777_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000778{
779 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P,
780 (__v16sf)
781 _mm512_setzero_ps (),
782 (__mmask16) __U);
783}
784
785static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000786_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000787{
788 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P,
789 (__v8df)
790 _mm512_setzero_pd (),
791 (__mmask8) __U);
792}
793
Adam Nemetda82bcc2014-07-31 04:00:39 +0000794static __inline __m512d __attribute__((__always_inline__, __nodebug__))
795_mm512_loadu_pd(double const *__p)
796{
797 struct __loadu_pd {
798 __m512d __v;
799 } __attribute__((packed, may_alias));
800 return ((struct __loadu_pd*)__p)->__v;
801}
802
803static __inline __m512 __attribute__((__always_inline__, __nodebug__))
804_mm512_loadu_ps(float const *__p)
805{
806 struct __loadu_ps {
807 __m512 __v;
808 } __attribute__((packed, may_alias));
809 return ((struct __loadu_ps*)__p)->__v;
810}
811
Adam Nemet0d5bb552014-07-28 17:14:40 +0000812/* SIMD store ops */
813
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000814static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000815_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000816{
817 __builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A,
818 (__mmask8) __U);
819}
820
821static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000822_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000823{
824 __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A,
825 (__mmask16) __U);
826}
827
828static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000829_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000830{
831 __builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
832}
833
834static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemetfce1ad02014-07-28 17:14:45 +0000835_mm512_storeu_pd(void *__P, __m512d __A)
836{
837 __builtin_ia32_storeupd512_mask((__v8df *)__P, (__v8df)__A, (__mmask8)-1);
838}
839
840static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000841_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000842{
843 __builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A,
844 (__mmask16) __U);
845}
846
Adam Nemetfce1ad02014-07-28 17:14:45 +0000847static __inline void __attribute__ ((__always_inline__, __nodebug__))
848_mm512_storeu_ps(void *__P, __m512 __A)
849{
850 __builtin_ia32_storeups512_mask((__v16sf *)__P, (__v16sf)__A, (__mmask16)-1);
851}
852
853static __inline void __attribute__ ((__always_inline__, __nodebug__))
854_mm512_store_ps(void *__P, __m512 __A)
855{
856 *(__m512*)__P = __A;
857}
858
859static __inline void __attribute__ ((__always_inline__, __nodebug__))
860_mm512_store_pd(void *__P, __m512d __A)
861{
862 *(__m512d*)__P = __A;
863}
864
Adam Nemet2db1d2f2014-07-30 16:51:27 +0000865/* Mask ops */
866
867static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
868_mm512_knot(__mmask16 __M)
869{
870 return __builtin_ia32_knothi(__M);
871}
872
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000873#endif // __AVX512FINTRIN_H