blob: cebb0d5a0563b9f9567605281b137f6877135fe4 [file] [log] [blame]
Adam Nemet9a3ea602014-07-28 17:14:38 +00001/*===---- avx512fintrin.h - AVX2 intrinsics --------------------------------===
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00002 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23#ifndef __IMMINTRIN_H
24#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
25#endif
26
27#ifndef __AVX512FINTRIN_H
28#define __AVX512FINTRIN_H
29
30typedef double __v8df __attribute__((__vector_size__(64)));
31typedef float __v16sf __attribute__((__vector_size__(64)));
32typedef long long __v8di __attribute__((__vector_size__(64)));
33typedef int __v16si __attribute__((__vector_size__(64)));
34
35typedef float __m512 __attribute__((__vector_size__(64)));
36typedef double __m512d __attribute__((__vector_size__(64)));
37typedef long long __m512i __attribute__((__vector_size__(64)));
38
39typedef unsigned char __mmask8;
40typedef unsigned short __mmask16;
41
42/* Rounding mode macros. */
43#define _MM_FROUND_TO_NEAREST_INT 0x00
44#define _MM_FROUND_TO_NEG_INF 0x01
45#define _MM_FROUND_TO_POS_INF 0x02
46#define _MM_FROUND_TO_ZERO 0x03
47#define _MM_FROUND_CUR_DIRECTION 0x04
48
Adam Nemet0d5bb552014-07-28 17:14:40 +000049/* Create vectors with repeated elements */
50
51static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
52_mm512_setzero_si512(void)
53{
54 return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
55}
56
57static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
58_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
59{
60 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
61 (__v16si)
62 _mm512_setzero_si512 (),
63 __M);
64}
65
66static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
67_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
68{
69#ifdef __x86_64__
70 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
71 (__v8di)
72 _mm512_setzero_si512 (),
73 __M);
74#else
75 return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
76 (__v8di)
77 _mm512_setzero_si512 (),
78 __M);
79#endif
80}
81
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000082static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +000083_mm512_setzero_ps(void)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000084{
85 return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
86 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
87}
88static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +000089_mm512_setzero_pd(void)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000090{
91 return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
92}
Adam Nemet0d5bb552014-07-28 17:14:40 +000093
94/* Arithmetic */
95
Adam Nemeta3ebe622014-07-28 17:14:42 +000096static __inline __m512d __attribute__((__always_inline__, __nodebug__))
97_mm512_add_pd(__m512d __a, __m512d __b)
98{
99 return __a + __b;
100}
101
102static __inline __m512 __attribute__((__always_inline__, __nodebug__))
103_mm512_add_ps(__m512 __a, __m512 __b)
104{
105 return __a + __b;
106}
107
108static __inline __m512d __attribute__((__always_inline__, __nodebug__))
109_mm512_mul_pd(__m512d __a, __m512d __b)
110{
111 return __a * __b;
112}
113
114static __inline __m512 __attribute__((__always_inline__, __nodebug__))
115_mm512_mul_ps(__m512 __a, __m512 __b)
116{
117 return __a * __b;
118}
119
120static __inline __m512d __attribute__((__always_inline__, __nodebug__))
121_mm512_sub_pd(__m512d __a, __m512d __b)
122{
123 return __a - __b;
124}
125
126static __inline __m512 __attribute__((__always_inline__, __nodebug__))
127_mm512_sub_ps(__m512 __a, __m512 __b)
128{
129 return __a - __b;
130}
131
Adam Nemet0d5bb552014-07-28 17:14:40 +0000132static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
133_mm512_max_pd(__m512d __A, __m512d __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000134{
Adam Nemet0d5bb552014-07-28 17:14:40 +0000135 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
136 (__v8df) __B,
137 (__v8df)
138 _mm512_setzero_pd (),
139 (__mmask8) -1,
140 _MM_FROUND_CUR_DIRECTION);
141}
142
143static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
144_mm512_max_ps(__m512 __A, __m512 __B)
145{
146 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
147 (__v16sf) __B,
148 (__v16sf)
149 _mm512_setzero_ps (),
150 (__mmask16) -1,
151 _MM_FROUND_CUR_DIRECTION);
152}
153
154static __inline __m512i
155__attribute__ ((__always_inline__, __nodebug__))
156_mm512_max_epi32(__m512i __A, __m512i __B)
157{
158 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
159 (__v16si) __B,
160 (__v16si)
161 _mm512_setzero_si512 (),
162 (__mmask16) -1);
163}
164
165static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
166_mm512_max_epu32(__m512i __A, __m512i __B)
167{
168 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
169 (__v16si) __B,
170 (__v16si)
171 _mm512_setzero_si512 (),
172 (__mmask16) -1);
173}
174
175static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
176_mm512_max_epi64(__m512i __A, __m512i __B)
177{
178 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
179 (__v8di) __B,
180 (__v8di)
181 _mm512_setzero_si512 (),
182 (__mmask8) -1);
183}
184
185static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
186_mm512_max_epu64(__m512i __A, __m512i __B)
187{
188 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
189 (__v8di) __B,
190 (__v8di)
191 _mm512_setzero_si512 (),
192 (__mmask8) -1);
193}
194
195static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
196_mm512_min_pd(__m512d __A, __m512d __B)
197{
198 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
199 (__v8df) __B,
200 (__v8df)
201 _mm512_setzero_pd (),
202 (__mmask8) -1,
203 _MM_FROUND_CUR_DIRECTION);
204}
205
206static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
207_mm512_min_ps(__m512 __A, __m512 __B)
208{
209 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
210 (__v16sf) __B,
211 (__v16sf)
212 _mm512_setzero_ps (),
213 (__mmask16) -1,
214 _MM_FROUND_CUR_DIRECTION);
215}
216
217static __inline __m512i
218__attribute__ ((__always_inline__, __nodebug__))
219_mm512_min_epi32(__m512i __A, __m512i __B)
220{
221 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
222 (__v16si) __B,
223 (__v16si)
224 _mm512_setzero_si512 (),
225 (__mmask16) -1);
226}
227
228static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
229_mm512_min_epu32(__m512i __A, __m512i __B)
230{
231 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
232 (__v16si) __B,
233 (__v16si)
234 _mm512_setzero_si512 (),
235 (__mmask16) -1);
236}
237
238static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
239_mm512_min_epi64(__m512i __A, __m512i __B)
240{
241 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
242 (__v8di) __B,
243 (__v8di)
244 _mm512_setzero_si512 (),
245 (__mmask8) -1);
246}
247
248static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
249_mm512_min_epu64(__m512i __A, __m512i __B)
250{
251 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
252 (__v8di) __B,
253 (__v8di)
254 _mm512_setzero_si512 (),
255 (__mmask8) -1);
256}
257
258static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
259_mm512_mul_epi32(__m512i __X, __m512i __Y)
260{
261 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
262 (__v16si) __Y,
263 (__v8di)
264 _mm512_setzero_si512 (),
265 (__mmask8) -1);
266}
267
268static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
269_mm512_mul_epu32(__m512i __X, __m512i __Y)
270{
271 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
272 (__v16si) __Y,
273 (__v8di)
274 _mm512_setzero_si512 (),
275 (__mmask8) -1);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000276}
277
278static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
279_mm512_sqrt_pd(__m512d a)
280{
281 return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)a,
282 (__v8df) _mm512_setzero_pd (),
283 (__mmask8) -1,
284 _MM_FROUND_CUR_DIRECTION);
285}
286
287static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
288_mm512_sqrt_ps(__m512 a)
289{
290 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)a,
291 (__v16sf) _mm512_setzero_ps (),
292 (__mmask16) -1,
293 _MM_FROUND_CUR_DIRECTION);
294}
295
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000296static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
297_mm512_rsqrt14_pd(__m512d __A)
298{
299 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
300 (__v8df)
301 _mm512_setzero_pd (),
302 (__mmask8) -1);}
303
304static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
305_mm512_rsqrt14_ps(__m512 __A)
306{
307 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
308 (__v16sf)
309 _mm512_setzero_ps (),
310 (__mmask16) -1);
311}
312
313static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
314_mm_rsqrt14_ss(__m128 __A, __m128 __B)
315{
316 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
317 (__v4sf) __B,
318 (__v4sf)
319 _mm_setzero_ps (),
320 (__mmask8) -1);
321}
322
323static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
324_mm_rsqrt14_sd(__m128d __A, __m128d __B)
325{
326 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
327 (__v2df) __B,
328 (__v2df)
329 _mm_setzero_pd (),
330 (__mmask8) -1);
331}
332
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000333static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
334_mm512_rcp14_pd(__m512d __A)
335{
336 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
337 (__v8df)
338 _mm512_setzero_pd (),
339 (__mmask8) -1);
340}
341
342static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
343_mm512_rcp14_ps(__m512 __A)
344{
345 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
346 (__v16sf)
347 _mm512_setzero_ps (),
348 (__mmask16) -1);
349}
350static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000351_mm_rcp14_ss(__m128 __A, __m128 __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000352{
353 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
354 (__v4sf) __B,
355 (__v4sf)
356 _mm_setzero_ps (),
357 (__mmask8) -1);
358}
359
360static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000361_mm_rcp14_sd(__m128d __A, __m128d __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000362{
363 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
364 (__v2df) __B,
365 (__v2df)
366 _mm_setzero_pd (),
367 (__mmask8) -1);
368}
369
Adam Nemet0d5bb552014-07-28 17:14:40 +0000370static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
371_mm512_floor_ps(__m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000372{
Adam Nemet0d5bb552014-07-28 17:14:40 +0000373 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
374 _MM_FROUND_FLOOR,
375 (__v16sf) __A, -1,
376 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000377}
378
Adam Nemet0d5bb552014-07-28 17:14:40 +0000379static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
380_mm512_floor_pd(__m512d __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000381{
Adam Nemet0d5bb552014-07-28 17:14:40 +0000382 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
383 _MM_FROUND_FLOOR,
384 (__v8df) __A, -1,
385 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000386}
387
Adam Nemet0d5bb552014-07-28 17:14:40 +0000388static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
389_mm512_ceil_ps(__m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000390{
Adam Nemet0d5bb552014-07-28 17:14:40 +0000391 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
392 _MM_FROUND_CEIL,
393 (__v16sf) __A, -1,
394 _MM_FROUND_CUR_DIRECTION);
395}
396
397static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
398_mm512_ceil_pd(__m512d __A)
399{
400 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
401 _MM_FROUND_CEIL,
402 (__v8df) __A, -1,
403 _MM_FROUND_CUR_DIRECTION);
404}
405
406static __inline __m512i __attribute__ (( __always_inline__, __nodebug__))
407_mm512_abs_epi64(__m512i __A)
408{
409 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
410 (__v8di)
411 _mm512_setzero_si512 (),
412 (__mmask8) -1);
413}
414
415static __inline __m512i __attribute__ (( __always_inline__, __nodebug__))
416_mm512_abs_epi32(__m512i __A)
417{
418 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
419 (__v16si)
420 _mm512_setzero_si512 (),
421 (__mmask16) -1);
422}
423
424static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
425_mm512_roundscale_ps(__m512 __A, const int __imm)
426{
427 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
428 (__v16sf) __A, -1,
429 _MM_FROUND_CUR_DIRECTION);
430}
431static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
432_mm512_roundscale_pd(__m512d __A, const int __imm)
433{
434 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
435 (__v8df) __A, -1,
436 _MM_FROUND_CUR_DIRECTION);
437}
438
439/* Vector permutations */
440
441static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
442_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
443{
444 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
445 /* idx */ ,
446 (__v16si) __A,
447 (__v16si) __B,
448 (__mmask16) -1);
449}
450static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
451_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
452{
453 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
454 /* idx */ ,
455 (__v8di) __A,
456 (__v8di) __B,
457 (__mmask8) -1);
458}
459
460static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
461_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
462{
463 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
464 /* idx */ ,
465 (__v8df) __A,
466 (__v8df) __B,
467 (__mmask8) -1);
468}
469static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
470_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
471{
472 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
473 /* idx */ ,
474 (__v16sf) __A,
475 (__v16sf) __B,
476 (__mmask16) -1);
477}
478
479/* Vector Blend */
480
481static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
482_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
483{
484 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
485 (__v8df) __W,
486 (__mmask8) __U);
487}
488
489static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
490_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
491{
492 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
493 (__v16sf) __W,
494 (__mmask16) __U);
495}
496
497static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
498_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
499{
500 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
501 (__v8di) __W,
502 (__mmask8) __U);
503}
504
505static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
506_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
507{
508 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
509 (__v16si) __W,
510 (__mmask16) __U);
511}
512
513/* Compare */
514
515static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
516_mm512_cmp_ps_mask(__m512 a, __m512 b, const int p)
517{
518 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) a,
519 (__v16sf) b, p, (__mmask16) -1,
520 _MM_FROUND_CUR_DIRECTION);
521}
522
523static __inline __mmask8 __attribute__ ((__always_inline__, __nodebug__))
524_mm512_cmp_pd_mask(__m512d __X, __m512d __Y, const int __P)
525{
526 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
527 (__v8df) __Y, __P,
528 (__mmask8) -1,
529 _MM_FROUND_CUR_DIRECTION);
530}
531
532/* Conversion */
533
534static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
535_mm512_cvttps_epu32(__m512 __A)
536{
537 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
538 (__v16si)
539 _mm512_setzero_si512 (),
540 (__mmask16) -1,
541 _MM_FROUND_CUR_DIRECTION);
542}
543
544static __inline __m512 __attribute__ (( __always_inline__, __nodebug__))
545_mm512_cvt_roundepi32_ps(__m512i __A, const int __R)
546{
547 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
548 (__v16sf)
549 _mm512_setzero_ps (),
550 (__mmask16) -1,
551 __R);
552}
553
554static __inline __m512 __attribute__ (( __always_inline__, __nodebug__))
555_mm512_cvt_roundepu32_ps(__m512i __A, const int __R)
556{
557 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
558 (__v16sf)
559 _mm512_setzero_ps (),
560 (__mmask16) -1,
561 __R);
562}
563
564static __inline __m512d __attribute__ (( __always_inline__, __nodebug__))
565_mm512_cvtepi32_pd(__m256i __A)
566{
567 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
568 (__v8df)
569 _mm512_setzero_pd (),
570 (__mmask8) -1);
571}
572
573static __inline __m512d __attribute__ (( __always_inline__, __nodebug__))
574_mm512_cvtepu32_pd(__m256i __A)
575{
576 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
577 (__v8df)
578 _mm512_setzero_pd (),
579 (__mmask8) -1);
580}
581static __inline __m256 __attribute__ (( __always_inline__, __nodebug__))
582_mm512_cvt_roundpd_ps(__m512d __A, const int __R)
583{
584 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
585 (__v8sf)
586 _mm256_setzero_ps (),
587 (__mmask8) -1,
588 __R);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000589}
590
591static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000592_mm512_cvtps_ph(__m512 __A, const int __I)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000593{
594 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
595 __I,
596 (__v16hi)
597 _mm256_setzero_si256 (),
598 -1);
599}
600
601static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000602_mm512_cvtph_ps(__m256i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000603{
604 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
605 (__v16sf)
606 _mm512_setzero_ps (),
607 (__mmask16) -1,
608 _MM_FROUND_CUR_DIRECTION);
609}
610
611static __inline __m512i __attribute__((__always_inline__, __nodebug__))
612_mm512_cvttps_epi32(__m512 a)
613{
614 return (__m512i)
615 __builtin_ia32_cvttps2dq512_mask((__v16sf) a,
616 (__v16si) _mm512_setzero_si512 (),
617 (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
618}
619
620static __inline __m256i __attribute__((__always_inline__, __nodebug__))
621_mm512_cvttpd_epi32(__m512d a)
622{
623 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) a,
624 (__v8si)_mm256_setzero_si256(),
625 (__mmask8) -1,
626 _MM_FROUND_CUR_DIRECTION);
627}
628
629static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000630_mm512_cvtt_roundpd_epi32(__m512d __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000631{
632 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
633 (__v8si)
634 _mm256_setzero_si256 (),
635 (__mmask8) -1,
636 __R);
637}
638static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000639_mm512_cvtt_roundps_epi32(__m512 __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000640{
641 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
642 (__v16si)
643 _mm512_setzero_si512 (),
644 (__mmask16) -1,
645 __R);
646}
647
648static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000649_mm512_cvt_roundps_epi32(__m512 __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000650{
651 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
652 (__v16si)
653 _mm512_setzero_si512 (),
654 (__mmask16) -1,
655 __R);
656}
657static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000658_mm512_cvt_roundpd_epi32(__m512d __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000659{
660 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
661 (__v8si)
662 _mm256_setzero_si256 (),
663 (__mmask8) -1,
664 __R);
665}
666static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000667_mm512_cvt_roundps_epu32(__m512 __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000668{
669 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
670 (__v16si)
671 _mm512_setzero_si512 (),
672 (__mmask16) -1,
673 __R);
674}
675static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000676_mm512_cvt_roundpd_epu32(__m512d __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000677{
678 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
679 (__v8si)
680 _mm256_setzero_si256 (),
681 (__mmask8) -1,
682 __R);
683}
684
Adam Nemet0d5bb552014-07-28 17:14:40 +0000685/* Bit Test */
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000686
687static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000688_mm512_test_epi32_mask(__m512i __A, __m512i __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000689{
690 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
691 (__v16si) __B,
692 (__mmask16) -1);
693}
694
695static __inline __mmask8 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000696_mm512_test_epi64_mask(__m512i __A, __m512i __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000697{
698 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
699 (__v8di) __B,
700 (__mmask8) -1);
701}
702
Adam Nemet0d5bb552014-07-28 17:14:40 +0000703/* SIMD load ops */
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000704
705static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000706_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000707{
708 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P,
709 (__v16si)
710 _mm512_setzero_si512 (),
711 (__mmask16) __U);
712}
713
714static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000715_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000716{
717 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P,
718 (__v8di)
719 _mm512_setzero_si512 (),
720 (__mmask8) __U);
721}
722
723static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000724_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000725{
726 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P,
727 (__v16sf)
728 _mm512_setzero_ps (),
729 (__mmask16) __U);
730}
731
732static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000733_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000734{
735 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P,
736 (__v8df)
737 _mm512_setzero_pd (),
738 (__mmask8) __U);
739}
740
Adam Nemet0d5bb552014-07-28 17:14:40 +0000741/* SIMD store ops */
742
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000743static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000744_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000745{
746 __builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A,
747 (__mmask8) __U);
748}
749
750static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000751_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000752{
753 __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A,
754 (__mmask16) __U);
755}
756
757static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000758_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000759{
760 __builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
761}
762
763static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemetfce1ad02014-07-28 17:14:45 +0000764_mm512_storeu_pd(void *__P, __m512d __A)
765{
766 __builtin_ia32_storeupd512_mask((__v8df *)__P, (__v8df)__A, (__mmask8)-1);
767}
768
769static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000770_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000771{
772 __builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A,
773 (__mmask16) __U);
774}
775
Adam Nemetfce1ad02014-07-28 17:14:45 +0000776static __inline void __attribute__ ((__always_inline__, __nodebug__))
777_mm512_storeu_ps(void *__P, __m512 __A)
778{
779 __builtin_ia32_storeups512_mask((__v16sf *)__P, (__v16sf)__A, (__mmask16)-1);
780}
781
782static __inline void __attribute__ ((__always_inline__, __nodebug__))
783_mm512_store_ps(void *__P, __m512 __A)
784{
785 *(__m512*)__P = __A;
786}
787
788static __inline void __attribute__ ((__always_inline__, __nodebug__))
789_mm512_store_pd(void *__P, __m512d __A)
790{
791 *(__m512d*)__P = __A;
792}
793
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000794#endif // __AVX512FINTRIN_H