blob: 9ea5da9e343de1f10e5dd4915956e4aec3a96d7c [file] [log] [blame]
Adam Nemet9a3ea602014-07-28 17:14:38 +00001/*===---- avx512fintrin.h - AVX2 intrinsics --------------------------------===
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00002 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23#ifndef __IMMINTRIN_H
24#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
25#endif
26
27#ifndef __AVX512FINTRIN_H
28#define __AVX512FINTRIN_H
29
30typedef double __v8df __attribute__((__vector_size__(64)));
31typedef float __v16sf __attribute__((__vector_size__(64)));
32typedef long long __v8di __attribute__((__vector_size__(64)));
33typedef int __v16si __attribute__((__vector_size__(64)));
34
35typedef float __m512 __attribute__((__vector_size__(64)));
36typedef double __m512d __attribute__((__vector_size__(64)));
37typedef long long __m512i __attribute__((__vector_size__(64)));
38
39typedef unsigned char __mmask8;
40typedef unsigned short __mmask16;
41
42/* Rounding mode macros. */
43#define _MM_FROUND_TO_NEAREST_INT 0x00
44#define _MM_FROUND_TO_NEG_INF 0x01
45#define _MM_FROUND_TO_POS_INF 0x02
46#define _MM_FROUND_TO_ZERO 0x03
47#define _MM_FROUND_CUR_DIRECTION 0x04
48
Adam Nemet0d5bb552014-07-28 17:14:40 +000049/* Create vectors with repeated elements */
50
51static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
52_mm512_setzero_si512(void)
53{
54 return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
55}
56
57static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
58_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
59{
60 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
61 (__v16si)
62 _mm512_setzero_si512 (),
63 __M);
64}
65
66static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
67_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
68{
69#ifdef __x86_64__
70 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
71 (__v8di)
72 _mm512_setzero_si512 (),
73 __M);
74#else
75 return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
76 (__v8di)
77 _mm512_setzero_si512 (),
78 __M);
79#endif
80}
81
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000082static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +000083_mm512_setzero_ps(void)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000084{
85 return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
86 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
87}
88static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +000089_mm512_setzero_pd(void)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000090{
91 return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
92}
Adam Nemet0d5bb552014-07-28 17:14:40 +000093
94/* Arithmetic */
95
96static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
97_mm512_max_pd(__m512d __A, __m512d __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000098{
Adam Nemet0d5bb552014-07-28 17:14:40 +000099 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
100 (__v8df) __B,
101 (__v8df)
102 _mm512_setzero_pd (),
103 (__mmask8) -1,
104 _MM_FROUND_CUR_DIRECTION);
105}
106
107static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
108_mm512_max_ps(__m512 __A, __m512 __B)
109{
110 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
111 (__v16sf) __B,
112 (__v16sf)
113 _mm512_setzero_ps (),
114 (__mmask16) -1,
115 _MM_FROUND_CUR_DIRECTION);
116}
117
118static __inline __m512i
119__attribute__ ((__always_inline__, __nodebug__))
120_mm512_max_epi32(__m512i __A, __m512i __B)
121{
122 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
123 (__v16si) __B,
124 (__v16si)
125 _mm512_setzero_si512 (),
126 (__mmask16) -1);
127}
128
129static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
130_mm512_max_epu32(__m512i __A, __m512i __B)
131{
132 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
133 (__v16si) __B,
134 (__v16si)
135 _mm512_setzero_si512 (),
136 (__mmask16) -1);
137}
138
139static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
140_mm512_max_epi64(__m512i __A, __m512i __B)
141{
142 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
143 (__v8di) __B,
144 (__v8di)
145 _mm512_setzero_si512 (),
146 (__mmask8) -1);
147}
148
149static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
150_mm512_max_epu64(__m512i __A, __m512i __B)
151{
152 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
153 (__v8di) __B,
154 (__v8di)
155 _mm512_setzero_si512 (),
156 (__mmask8) -1);
157}
158
159static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
160_mm512_min_pd(__m512d __A, __m512d __B)
161{
162 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
163 (__v8df) __B,
164 (__v8df)
165 _mm512_setzero_pd (),
166 (__mmask8) -1,
167 _MM_FROUND_CUR_DIRECTION);
168}
169
170static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
171_mm512_min_ps(__m512 __A, __m512 __B)
172{
173 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
174 (__v16sf) __B,
175 (__v16sf)
176 _mm512_setzero_ps (),
177 (__mmask16) -1,
178 _MM_FROUND_CUR_DIRECTION);
179}
180
181static __inline __m512i
182__attribute__ ((__always_inline__, __nodebug__))
183_mm512_min_epi32(__m512i __A, __m512i __B)
184{
185 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
186 (__v16si) __B,
187 (__v16si)
188 _mm512_setzero_si512 (),
189 (__mmask16) -1);
190}
191
192static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
193_mm512_min_epu32(__m512i __A, __m512i __B)
194{
195 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
196 (__v16si) __B,
197 (__v16si)
198 _mm512_setzero_si512 (),
199 (__mmask16) -1);
200}
201
202static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
203_mm512_min_epi64(__m512i __A, __m512i __B)
204{
205 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
206 (__v8di) __B,
207 (__v8di)
208 _mm512_setzero_si512 (),
209 (__mmask8) -1);
210}
211
212static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
213_mm512_min_epu64(__m512i __A, __m512i __B)
214{
215 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
216 (__v8di) __B,
217 (__v8di)
218 _mm512_setzero_si512 (),
219 (__mmask8) -1);
220}
221
222static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
223_mm512_mul_epi32(__m512i __X, __m512i __Y)
224{
225 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
226 (__v16si) __Y,
227 (__v8di)
228 _mm512_setzero_si512 (),
229 (__mmask8) -1);
230}
231
232static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
233_mm512_mul_epu32(__m512i __X, __m512i __Y)
234{
235 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
236 (__v16si) __Y,
237 (__v8di)
238 _mm512_setzero_si512 (),
239 (__mmask8) -1);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000240}
241
242static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
243_mm512_sqrt_pd(__m512d a)
244{
245 return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)a,
246 (__v8df) _mm512_setzero_pd (),
247 (__mmask8) -1,
248 _MM_FROUND_CUR_DIRECTION);
249}
250
251static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
252_mm512_sqrt_ps(__m512 a)
253{
254 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)a,
255 (__v16sf) _mm512_setzero_ps (),
256 (__mmask16) -1,
257 _MM_FROUND_CUR_DIRECTION);
258}
259
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000260static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
261_mm512_rsqrt14_pd(__m512d __A)
262{
263 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
264 (__v8df)
265 _mm512_setzero_pd (),
266 (__mmask8) -1);}
267
268static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
269_mm512_rsqrt14_ps(__m512 __A)
270{
271 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
272 (__v16sf)
273 _mm512_setzero_ps (),
274 (__mmask16) -1);
275}
276
277static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
278_mm_rsqrt14_ss(__m128 __A, __m128 __B)
279{
280 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
281 (__v4sf) __B,
282 (__v4sf)
283 _mm_setzero_ps (),
284 (__mmask8) -1);
285}
286
287static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
288_mm_rsqrt14_sd(__m128d __A, __m128d __B)
289{
290 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
291 (__v2df) __B,
292 (__v2df)
293 _mm_setzero_pd (),
294 (__mmask8) -1);
295}
296
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000297static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
298_mm512_rcp14_pd(__m512d __A)
299{
300 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
301 (__v8df)
302 _mm512_setzero_pd (),
303 (__mmask8) -1);
304}
305
306static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
307_mm512_rcp14_ps(__m512 __A)
308{
309 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
310 (__v16sf)
311 _mm512_setzero_ps (),
312 (__mmask16) -1);
313}
314static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000315_mm_rcp14_ss(__m128 __A, __m128 __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000316{
317 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
318 (__v4sf) __B,
319 (__v4sf)
320 _mm_setzero_ps (),
321 (__mmask8) -1);
322}
323
324static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000325_mm_rcp14_sd(__m128d __A, __m128d __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000326{
327 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
328 (__v2df) __B,
329 (__v2df)
330 _mm_setzero_pd (),
331 (__mmask8) -1);
332}
333
Adam Nemet0d5bb552014-07-28 17:14:40 +0000334static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
335_mm512_floor_ps(__m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000336{
Adam Nemet0d5bb552014-07-28 17:14:40 +0000337 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
338 _MM_FROUND_FLOOR,
339 (__v16sf) __A, -1,
340 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000341}
342
Adam Nemet0d5bb552014-07-28 17:14:40 +0000343static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
344_mm512_floor_pd(__m512d __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000345{
Adam Nemet0d5bb552014-07-28 17:14:40 +0000346 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
347 _MM_FROUND_FLOOR,
348 (__v8df) __A, -1,
349 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000350}
351
Adam Nemet0d5bb552014-07-28 17:14:40 +0000352static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
353_mm512_ceil_ps(__m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000354{
Adam Nemet0d5bb552014-07-28 17:14:40 +0000355 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
356 _MM_FROUND_CEIL,
357 (__v16sf) __A, -1,
358 _MM_FROUND_CUR_DIRECTION);
359}
360
361static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
362_mm512_ceil_pd(__m512d __A)
363{
364 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
365 _MM_FROUND_CEIL,
366 (__v8df) __A, -1,
367 _MM_FROUND_CUR_DIRECTION);
368}
369
370static __inline __m512i __attribute__ (( __always_inline__, __nodebug__))
371_mm512_abs_epi64(__m512i __A)
372{
373 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
374 (__v8di)
375 _mm512_setzero_si512 (),
376 (__mmask8) -1);
377}
378
379static __inline __m512i __attribute__ (( __always_inline__, __nodebug__))
380_mm512_abs_epi32(__m512i __A)
381{
382 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
383 (__v16si)
384 _mm512_setzero_si512 (),
385 (__mmask16) -1);
386}
387
388static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
389_mm512_roundscale_ps(__m512 __A, const int __imm)
390{
391 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
392 (__v16sf) __A, -1,
393 _MM_FROUND_CUR_DIRECTION);
394}
395static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
396_mm512_roundscale_pd(__m512d __A, const int __imm)
397{
398 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
399 (__v8df) __A, -1,
400 _MM_FROUND_CUR_DIRECTION);
401}
402
403/* Vector permutations */
404
405static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
406_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
407{
408 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
409 /* idx */ ,
410 (__v16si) __A,
411 (__v16si) __B,
412 (__mmask16) -1);
413}
414static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
415_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
416{
417 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
418 /* idx */ ,
419 (__v8di) __A,
420 (__v8di) __B,
421 (__mmask8) -1);
422}
423
424static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
425_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
426{
427 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
428 /* idx */ ,
429 (__v8df) __A,
430 (__v8df) __B,
431 (__mmask8) -1);
432}
433static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
434_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
435{
436 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
437 /* idx */ ,
438 (__v16sf) __A,
439 (__v16sf) __B,
440 (__mmask16) -1);
441}
442
443/* Vector Blend */
444
445static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
446_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
447{
448 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
449 (__v8df) __W,
450 (__mmask8) __U);
451}
452
453static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
454_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
455{
456 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
457 (__v16sf) __W,
458 (__mmask16) __U);
459}
460
461static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
462_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
463{
464 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
465 (__v8di) __W,
466 (__mmask8) __U);
467}
468
469static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
470_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
471{
472 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
473 (__v16si) __W,
474 (__mmask16) __U);
475}
476
477/* Compare */
478
479static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
480_mm512_cmp_ps_mask(__m512 a, __m512 b, const int p)
481{
482 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) a,
483 (__v16sf) b, p, (__mmask16) -1,
484 _MM_FROUND_CUR_DIRECTION);
485}
486
487static __inline __mmask8 __attribute__ ((__always_inline__, __nodebug__))
488_mm512_cmp_pd_mask(__m512d __X, __m512d __Y, const int __P)
489{
490 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
491 (__v8df) __Y, __P,
492 (__mmask8) -1,
493 _MM_FROUND_CUR_DIRECTION);
494}
495
496/* Conversion */
497
498static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
499_mm512_cvttps_epu32(__m512 __A)
500{
501 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
502 (__v16si)
503 _mm512_setzero_si512 (),
504 (__mmask16) -1,
505 _MM_FROUND_CUR_DIRECTION);
506}
507
508static __inline __m512 __attribute__ (( __always_inline__, __nodebug__))
509_mm512_cvt_roundepi32_ps(__m512i __A, const int __R)
510{
511 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
512 (__v16sf)
513 _mm512_setzero_ps (),
514 (__mmask16) -1,
515 __R);
516}
517
518static __inline __m512 __attribute__ (( __always_inline__, __nodebug__))
519_mm512_cvt_roundepu32_ps(__m512i __A, const int __R)
520{
521 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
522 (__v16sf)
523 _mm512_setzero_ps (),
524 (__mmask16) -1,
525 __R);
526}
527
528static __inline __m512d __attribute__ (( __always_inline__, __nodebug__))
529_mm512_cvtepi32_pd(__m256i __A)
530{
531 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
532 (__v8df)
533 _mm512_setzero_pd (),
534 (__mmask8) -1);
535}
536
537static __inline __m512d __attribute__ (( __always_inline__, __nodebug__))
538_mm512_cvtepu32_pd(__m256i __A)
539{
540 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
541 (__v8df)
542 _mm512_setzero_pd (),
543 (__mmask8) -1);
544}
545static __inline __m256 __attribute__ (( __always_inline__, __nodebug__))
546_mm512_cvt_roundpd_ps(__m512d __A, const int __R)
547{
548 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
549 (__v8sf)
550 _mm256_setzero_ps (),
551 (__mmask8) -1,
552 __R);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000553}
554
555static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000556_mm512_cvtps_ph(__m512 __A, const int __I)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000557{
558 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
559 __I,
560 (__v16hi)
561 _mm256_setzero_si256 (),
562 -1);
563}
564
565static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000566_mm512_cvtph_ps(__m256i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000567{
568 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
569 (__v16sf)
570 _mm512_setzero_ps (),
571 (__mmask16) -1,
572 _MM_FROUND_CUR_DIRECTION);
573}
574
575static __inline __m512i __attribute__((__always_inline__, __nodebug__))
576_mm512_cvttps_epi32(__m512 a)
577{
578 return (__m512i)
579 __builtin_ia32_cvttps2dq512_mask((__v16sf) a,
580 (__v16si) _mm512_setzero_si512 (),
581 (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
582}
583
584static __inline __m256i __attribute__((__always_inline__, __nodebug__))
585_mm512_cvttpd_epi32(__m512d a)
586{
587 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) a,
588 (__v8si)_mm256_setzero_si256(),
589 (__mmask8) -1,
590 _MM_FROUND_CUR_DIRECTION);
591}
592
593static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000594_mm512_cvtt_roundpd_epi32(__m512d __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000595{
596 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
597 (__v8si)
598 _mm256_setzero_si256 (),
599 (__mmask8) -1,
600 __R);
601}
602static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000603_mm512_cvtt_roundps_epi32(__m512 __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000604{
605 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
606 (__v16si)
607 _mm512_setzero_si512 (),
608 (__mmask16) -1,
609 __R);
610}
611
612static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000613_mm512_cvt_roundps_epi32(__m512 __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000614{
615 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
616 (__v16si)
617 _mm512_setzero_si512 (),
618 (__mmask16) -1,
619 __R);
620}
621static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000622_mm512_cvt_roundpd_epi32(__m512d __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000623{
624 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
625 (__v8si)
626 _mm256_setzero_si256 (),
627 (__mmask8) -1,
628 __R);
629}
630static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000631_mm512_cvt_roundps_epu32(__m512 __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000632{
633 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
634 (__v16si)
635 _mm512_setzero_si512 (),
636 (__mmask16) -1,
637 __R);
638}
639static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000640_mm512_cvt_roundpd_epu32(__m512d __A, const int __R)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000641{
642 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
643 (__v8si)
644 _mm256_setzero_si256 (),
645 (__mmask8) -1,
646 __R);
647}
648
Adam Nemet0d5bb552014-07-28 17:14:40 +0000649/* Bit Test */
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000650
651static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000652_mm512_test_epi32_mask(__m512i __A, __m512i __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000653{
654 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
655 (__v16si) __B,
656 (__mmask16) -1);
657}
658
659static __inline __mmask8 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000660_mm512_test_epi64_mask(__m512i __A, __m512i __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000661{
662 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
663 (__v8di) __B,
664 (__mmask8) -1);
665}
666
Adam Nemet0d5bb552014-07-28 17:14:40 +0000667/* SIMD load ops */
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000668
669static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000670_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000671{
672 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P,
673 (__v16si)
674 _mm512_setzero_si512 (),
675 (__mmask16) __U);
676}
677
678static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000679_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000680{
681 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P,
682 (__v8di)
683 _mm512_setzero_si512 (),
684 (__mmask8) __U);
685}
686
687static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000688_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000689{
690 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P,
691 (__v16sf)
692 _mm512_setzero_ps (),
693 (__mmask16) __U);
694}
695
696static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000697_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000698{
699 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P,
700 (__v8df)
701 _mm512_setzero_pd (),
702 (__mmask8) __U);
703}
704
Adam Nemet0d5bb552014-07-28 17:14:40 +0000705/* SIMD store ops */
706
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000707static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000708_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000709{
710 __builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A,
711 (__mmask8) __U);
712}
713
714static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000715_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000716{
717 __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A,
718 (__mmask16) __U);
719}
720
721static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000722_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000723{
724 __builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
725}
726
727static __inline void __attribute__ ((__always_inline__, __nodebug__))
Adam Nemet9a3ea602014-07-28 17:14:38 +0000728_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000729{
730 __builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A,
731 (__mmask16) __U);
732}
733
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000734#endif // __AVX512FINTRIN_H