blob: b6188831e77c075b17c05e71d2837e6449802842 [file] [log] [blame]
Craig Topper991d4992015-11-03 06:16:31 +00001/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00002 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23#ifndef __IMMINTRIN_H
24#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
25#endif
26
27#ifndef __AVX512FINTRIN_H
28#define __AVX512FINTRIN_H
29
30typedef double __v8df __attribute__((__vector_size__(64)));
31typedef float __v16sf __attribute__((__vector_size__(64)));
32typedef long long __v8di __attribute__((__vector_size__(64)));
33typedef int __v16si __attribute__((__vector_size__(64)));
34
35typedef float __m512 __attribute__((__vector_size__(64)));
36typedef double __m512d __attribute__((__vector_size__(64)));
37typedef long long __m512i __attribute__((__vector_size__(64)));
38
39typedef unsigned char __mmask8;
40typedef unsigned short __mmask16;
41
42/* Rounding mode macros. */
43#define _MM_FROUND_TO_NEAREST_INT 0x00
44#define _MM_FROUND_TO_NEG_INF 0x01
45#define _MM_FROUND_TO_POS_INF 0x02
46#define _MM_FROUND_TO_ZERO 0x03
47#define _MM_FROUND_CUR_DIRECTION 0x04
48
Michael Zuckermandef78752016-03-28 12:23:09 +000049typedef enum
50{
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +000051 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
52 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
53 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
54 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
55 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
56 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
57 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
58 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
59 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
60 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
61 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
62 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
63 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
64 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
65 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
66 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
67 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
68 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
69 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
70 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
71 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
72 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
73 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
74 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
75 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
76 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
77 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
78 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
79 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
80 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
81 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
82 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
83 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
84 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
85 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
86 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
87 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
88 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
89 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
90 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
91 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
92 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
93 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
94 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
95 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
96 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
97 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
98 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
99 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
100 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
101 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
102 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
103 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
104 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
105 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
106 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
107 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
108 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
109 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
110 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
111 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
112 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
113 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
114 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
115 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
116 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
117 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
118 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
119 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
120 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
121 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
122 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
123 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
124 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
125 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
126 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
127 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
128 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
129 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
130 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
131 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
132 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
133 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
134 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
135 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
136 _MM_PERM_DDDD = 0xFF
137} _MM_PERM_ENUM;
138
139typedef enum
140{
Michael Zuckermandef78752016-03-28 12:23:09 +0000141 _MM_MANT_NORM_1_2, /* interval [1, 2) */
142 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
143 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
144 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
145} _MM_MANTISSA_NORM_ENUM;
146
147typedef enum
148{
149 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
150 _MM_MANT_SIGN_zero, /* sign = 0 */
151 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
152} _MM_MANTISSA_SIGN_ENUM;
153
Eric Christopher4d1851682015-06-17 07:09:20 +0000154/* Define the default attributes for the functions in this file. */
Michael Kupersteine45af542015-06-30 13:36:19 +0000155#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
Eric Christopher4d1851682015-06-17 07:09:20 +0000156
Adam Nemet0d5bb552014-07-28 17:14:40 +0000157/* Create vectors with repeated elements */
158
Michael Kupersteine45af542015-06-30 13:36:19 +0000159static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +0000160_mm512_setzero_si512(void)
161{
162 return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
163}
164
Simon Pilgrim5aba9922015-08-26 21:17:12 +0000165static __inline__ __m512d __DEFAULT_FN_ATTRS
166_mm512_undefined_pd()
167{
168 return (__m512d)__builtin_ia32_undef512();
169}
170
171static __inline__ __m512 __DEFAULT_FN_ATTRS
172_mm512_undefined()
173{
174 return (__m512)__builtin_ia32_undef512();
175}
176
177static __inline__ __m512 __DEFAULT_FN_ATTRS
178_mm512_undefined_ps()
179{
180 return (__m512)__builtin_ia32_undef512();
181}
182
183static __inline__ __m512i __DEFAULT_FN_ATTRS
184_mm512_undefined_epi32()
185{
186 return (__m512i)__builtin_ia32_undef512();
187}
Michael Zuckerman8c2900f2016-04-27 11:43:14 +0000188static __inline__ __m512i __DEFAULT_FN_ATTRS
189_mm512_broadcastd_epi32 (__m128i __A)
190{
191 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
192 (__v16si)
193 _mm512_undefined_epi32 (),
194 (__mmask16) -1);
195}
196
197static __inline__ __m512i __DEFAULT_FN_ATTRS
198_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
199{
200 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
201 (__v16si) __O, __M);
202}
203
204static __inline__ __m512i __DEFAULT_FN_ATTRS
205_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
206{
207 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
208 (__v16si)
209 _mm512_setzero_si512 (),
210 __M);
211}
212
213static __inline__ __m512i __DEFAULT_FN_ATTRS
214_mm512_broadcastq_epi64 (__m128i __A)
215{
216 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
217 (__v8di)
218 _mm512_undefined_pd (),
219 (__mmask8) -1);
220}
221
222static __inline__ __m512i __DEFAULT_FN_ATTRS
223_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
224{
225 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
226 (__v8di) __O, __M);
227}
228
229static __inline__ __m512i __DEFAULT_FN_ATTRS
230_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
231{
232 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
233 (__v8di)
234 _mm512_setzero_si512 (),
235 __M);
236}
Simon Pilgrim5aba9922015-08-26 21:17:12 +0000237
Michael Kupersteine45af542015-06-30 13:36:19 +0000238static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +0000239_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
240{
241 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
242 (__v16si)
243 _mm512_setzero_si512 (),
244 __M);
245}
246
Michael Kupersteine45af542015-06-30 13:36:19 +0000247static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +0000248_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
249{
250#ifdef __x86_64__
251 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
252 (__v8di)
253 _mm512_setzero_si512 (),
254 __M);
255#else
256 return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
257 (__v8di)
258 _mm512_setzero_si512 (),
259 __M);
260#endif
261}
262
Michael Kupersteine45af542015-06-30 13:36:19 +0000263static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +0000264_mm512_setzero_ps(void)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000265{
266 return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
267 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
268}
Michael Kupersteine45af542015-06-30 13:36:19 +0000269static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +0000270_mm512_setzero_pd(void)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000271{
272 return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
273}
Adam Nemet0d5bb552014-07-28 17:14:40 +0000274
Michael Kupersteine45af542015-06-30 13:36:19 +0000275static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemetf42e7a22014-07-30 16:51:22 +0000276_mm512_set1_ps(float __w)
277{
278 return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
279 __w, __w, __w, __w, __w, __w, __w, __w };
280}
281
Michael Kupersteine45af542015-06-30 13:36:19 +0000282static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemetf42e7a22014-07-30 16:51:22 +0000283_mm512_set1_pd(double __w)
284{
285 return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
286}
287
Michael Kupersteine45af542015-06-30 13:36:19 +0000288static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemetf42e7a22014-07-30 16:51:22 +0000289_mm512_set1_epi32(int __s)
290{
291 return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
292 __s, __s, __s, __s, __s, __s, __s, __s };
293}
294
Michael Kupersteine45af542015-06-30 13:36:19 +0000295static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemetf42e7a22014-07-30 16:51:22 +0000296_mm512_set1_epi64(long long __d)
297{
298 return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
299}
300
Michael Kupersteine45af542015-06-30 13:36:19 +0000301static __inline__ __m512 __DEFAULT_FN_ATTRS
Adam Nemet4abc07c2014-08-13 00:29:01 +0000302_mm512_broadcastss_ps(__m128 __X)
303{
304 float __f = __X[0];
305 return (__v16sf){ __f, __f, __f, __f,
306 __f, __f, __f, __f,
307 __f, __f, __f, __f,
308 __f, __f, __f, __f };
309}
310
Michael Kupersteine45af542015-06-30 13:36:19 +0000311static __inline__ __m512d __DEFAULT_FN_ATTRS
Adam Nemet4abc07c2014-08-13 00:29:01 +0000312_mm512_broadcastsd_pd(__m128d __X)
313{
314 double __d = __X[0];
315 return (__v8df){ __d, __d, __d, __d,
316 __d, __d, __d, __d };
317}
318
Adam Nemetc871ff92014-07-30 16:51:24 +0000319/* Cast between vector types */
320
Michael Kupersteine45af542015-06-30 13:36:19 +0000321static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemetc871ff92014-07-30 16:51:24 +0000322_mm512_castpd256_pd512(__m256d __a)
323{
324 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
325}
326
Michael Kupersteine45af542015-06-30 13:36:19 +0000327static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemetc871ff92014-07-30 16:51:24 +0000328_mm512_castps256_ps512(__m256 __a)
329{
330 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7,
331 -1, -1, -1, -1, -1, -1, -1, -1);
332}
333
Michael Kupersteine45af542015-06-30 13:36:19 +0000334static __inline __m128d __DEFAULT_FN_ATTRS
Adam Nemetc871ff92014-07-30 16:51:24 +0000335_mm512_castpd512_pd128(__m512d __a)
336{
337 return __builtin_shufflevector(__a, __a, 0, 1);
338}
339
Michael Kupersteine45af542015-06-30 13:36:19 +0000340static __inline __m128 __DEFAULT_FN_ATTRS
Adam Nemetc871ff92014-07-30 16:51:24 +0000341_mm512_castps512_ps128(__m512 __a)
342{
343 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
344}
345
Michael Zuckermanc6677032016-05-03 14:26:52 +0000346
347static __inline__ __m512d __DEFAULT_FN_ATTRS
348_mm512_castpd128_pd512 (__m128d __A)
349{
350 return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
351}
352
353static __inline__ __m512 __DEFAULT_FN_ATTRS
354_mm512_castps128_ps512 (__m128 __A)
355{
356 return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
357}
358
359static __inline__ __m512i __DEFAULT_FN_ATTRS
360_mm512_castsi128_si512 (__m128i __A)
361{
362 return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
363}
364
365static __inline__ __m512i __DEFAULT_FN_ATTRS
366_mm512_castsi256_si512 (__m256i __A)
367{
368 return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
369}
370
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000371/* Bitwise operators */
Michael Kupersteine45af542015-06-30 13:36:19 +0000372static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000373_mm512_and_epi32(__m512i __a, __m512i __b)
374{
375 return __a & __b;
376}
377
Michael Kupersteine45af542015-06-30 13:36:19 +0000378static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000379_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
380{
381 return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a,
382 (__v16si) __b,
383 (__v16si) __src,
384 (__mmask16) __k);
385}
Michael Kupersteine45af542015-06-30 13:36:19 +0000386static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000387_mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
388{
389 return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a,
390 (__v16si) __b,
391 (__v16si)
392 _mm512_setzero_si512 (),
393 (__mmask16) __k);
394}
395
Michael Kupersteine45af542015-06-30 13:36:19 +0000396static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000397_mm512_and_epi64(__m512i __a, __m512i __b)
398{
399 return __a & __b;
400}
401
Michael Kupersteine45af542015-06-30 13:36:19 +0000402static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000403_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
404{
405 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a,
406 (__v8di) __b,
407 (__v8di) __src,
408 (__mmask8) __k);
409}
Michael Kupersteine45af542015-06-30 13:36:19 +0000410static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000411_mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
412{
413 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a,
414 (__v8di) __b,
415 (__v8di)
416 _mm512_setzero_si512 (),
417 (__mmask8) __k);
418}
419
Michael Kupersteine45af542015-06-30 13:36:19 +0000420static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000421_mm512_andnot_epi32 (__m512i __A, __m512i __B)
422{
423 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
424 (__v16si) __B,
425 (__v16si)
426 _mm512_setzero_si512 (),
427 (__mmask16) -1);
428}
429
Michael Kupersteine45af542015-06-30 13:36:19 +0000430static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000431_mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
432{
433 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
434 (__v16si) __B,
435 (__v16si) __W,
436 (__mmask16) __U);
437}
438
Michael Kupersteine45af542015-06-30 13:36:19 +0000439static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000440_mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
441{
442 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
443 (__v16si) __B,
444 (__v16si)
445 _mm512_setzero_si512 (),
446 (__mmask16) __U);
447}
448
Michael Kupersteine45af542015-06-30 13:36:19 +0000449static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000450_mm512_andnot_epi64 (__m512i __A, __m512i __B)
451{
452 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
453 (__v8di) __B,
454 (__v8di)
455 _mm512_setzero_si512 (),
456 (__mmask8) -1);
457}
458
Michael Kupersteine45af542015-06-30 13:36:19 +0000459static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000460_mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
461{
462 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
463 (__v8di) __B,
464 (__v8di) __W, __U);
465}
466
Michael Kupersteine45af542015-06-30 13:36:19 +0000467static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000468_mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
469{
470 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
471 (__v8di) __B,
472 (__v8di)
473 _mm512_setzero_pd (),
474 __U);
475}
Michael Kupersteine45af542015-06-30 13:36:19 +0000476static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000477_mm512_or_epi32(__m512i __a, __m512i __b)
478{
479 return __a | __b;
480}
481
Michael Kupersteine45af542015-06-30 13:36:19 +0000482static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000483_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
484{
485 return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a,
486 (__v16si) __b,
487 (__v16si) __src,
488 (__mmask16) __k);
489}
Michael Kupersteine45af542015-06-30 13:36:19 +0000490static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000491_mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
492{
493 return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a,
494 (__v16si) __b,
495 (__v16si)
496 _mm512_setzero_si512 (),
497 (__mmask16) __k);
498}
499
Michael Kupersteine45af542015-06-30 13:36:19 +0000500static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000501_mm512_or_epi64(__m512i __a, __m512i __b)
502{
503 return __a | __b;
504}
505
Michael Kupersteine45af542015-06-30 13:36:19 +0000506static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000507_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
508{
509 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a,
510 (__v8di) __b,
511 (__v8di) __src,
512 (__mmask8) __k);
513}
Michael Kupersteine45af542015-06-30 13:36:19 +0000514static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000515_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
516{
517 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a,
518 (__v8di) __b,
519 (__v8di)
520 _mm512_setzero_si512 (),
521 (__mmask8) __k);
522}
523
Michael Kupersteine45af542015-06-30 13:36:19 +0000524static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000525_mm512_xor_epi32(__m512i __a, __m512i __b)
526{
527 return __a ^ __b;
528}
529
Michael Kupersteine45af542015-06-30 13:36:19 +0000530static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000531_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
532{
533 return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a,
534 (__v16si) __b,
535 (__v16si) __src,
536 (__mmask16) __k);
537}
Michael Kupersteine45af542015-06-30 13:36:19 +0000538static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000539_mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
540{
541 return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a,
542 (__v16si) __b,
543 (__v16si)
544 _mm512_setzero_si512 (),
545 (__mmask16) __k);
546}
547
Michael Kupersteine45af542015-06-30 13:36:19 +0000548static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000549_mm512_xor_epi64(__m512i __a, __m512i __b)
550{
551 return __a ^ __b;
552}
553
Michael Kupersteine45af542015-06-30 13:36:19 +0000554static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000555_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
556{
557 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a,
558 (__v8di) __b,
559 (__v8di) __src,
560 (__mmask8) __k);
561}
Michael Kupersteine45af542015-06-30 13:36:19 +0000562static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000563_mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
564{
565 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a,
566 (__v8di) __b,
567 (__v8di)
568 _mm512_setzero_si512 (),
569 (__mmask8) __k);
570}
571
Michael Kupersteine45af542015-06-30 13:36:19 +0000572static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000573_mm512_and_si512(__m512i __a, __m512i __b)
574{
575 return __a & __b;
576}
577
Michael Kupersteine45af542015-06-30 13:36:19 +0000578static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000579_mm512_or_si512(__m512i __a, __m512i __b)
580{
581 return __a | __b;
582}
583
Michael Kupersteine45af542015-06-30 13:36:19 +0000584static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000585_mm512_xor_si512(__m512i __a, __m512i __b)
586{
587 return __a ^ __b;
588}
Adam Nemet0d5bb552014-07-28 17:14:40 +0000589/* Arithmetic */
590
Michael Kupersteine45af542015-06-30 13:36:19 +0000591static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemeta3ebe622014-07-28 17:14:42 +0000592_mm512_add_pd(__m512d __a, __m512d __b)
593{
594 return __a + __b;
595}
596
Michael Kupersteine45af542015-06-30 13:36:19 +0000597static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemeta3ebe622014-07-28 17:14:42 +0000598_mm512_add_ps(__m512 __a, __m512 __b)
599{
600 return __a + __b;
601}
602
Michael Kupersteine45af542015-06-30 13:36:19 +0000603static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemeta3ebe622014-07-28 17:14:42 +0000604_mm512_mul_pd(__m512d __a, __m512d __b)
605{
606 return __a * __b;
607}
608
Michael Kupersteine45af542015-06-30 13:36:19 +0000609static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemeta3ebe622014-07-28 17:14:42 +0000610_mm512_mul_ps(__m512 __a, __m512 __b)
611{
612 return __a * __b;
613}
614
Michael Kupersteine45af542015-06-30 13:36:19 +0000615static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemeta3ebe622014-07-28 17:14:42 +0000616_mm512_sub_pd(__m512d __a, __m512d __b)
617{
618 return __a - __b;
619}
620
Michael Kupersteine45af542015-06-30 13:36:19 +0000621static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemeta3ebe622014-07-28 17:14:42 +0000622_mm512_sub_ps(__m512 __a, __m512 __b)
623{
624 return __a - __b;
625}
626
Michael Kupersteine45af542015-06-30 13:36:19 +0000627static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000628_mm512_add_epi64 (__m512i __A, __m512i __B)
629{
630 return (__m512i) ((__v8di) __A + (__v8di) __B);
631}
632
Michael Kupersteine45af542015-06-30 13:36:19 +0000633static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000634_mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
635{
636 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
637 (__v8di) __B,
638 (__v8di) __W,
639 (__mmask8) __U);
640}
641
Michael Kupersteine45af542015-06-30 13:36:19 +0000642static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000643_mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
644{
645 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
646 (__v8di) __B,
647 (__v8di)
648 _mm512_setzero_si512 (),
649 (__mmask8) __U);
650}
651
Michael Kupersteine45af542015-06-30 13:36:19 +0000652static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000653_mm512_sub_epi64 (__m512i __A, __m512i __B)
654{
655 return (__m512i) ((__v8di) __A - (__v8di) __B);
656}
657
Michael Kupersteine45af542015-06-30 13:36:19 +0000658static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000659_mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
660{
661 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
662 (__v8di) __B,
663 (__v8di) __W,
664 (__mmask8) __U);
665}
666
Michael Kupersteine45af542015-06-30 13:36:19 +0000667static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000668_mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
669{
670 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
671 (__v8di) __B,
672 (__v8di)
673 _mm512_setzero_si512 (),
674 (__mmask8) __U);
675}
676
Michael Kupersteine45af542015-06-30 13:36:19 +0000677static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000678_mm512_add_epi32 (__m512i __A, __m512i __B)
679{
680 return (__m512i) ((__v16si) __A + (__v16si) __B);
681}
682
Michael Kupersteine45af542015-06-30 13:36:19 +0000683static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000684_mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
685{
686 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
687 (__v16si) __B,
688 (__v16si) __W,
689 (__mmask16) __U);
690}
691
Michael Kupersteine45af542015-06-30 13:36:19 +0000692static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000693_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
694{
695 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
696 (__v16si) __B,
697 (__v16si)
698 _mm512_setzero_si512 (),
699 (__mmask16) __U);
700}
701
Michael Kupersteine45af542015-06-30 13:36:19 +0000702static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000703_mm512_sub_epi32 (__m512i __A, __m512i __B)
704{
705 return (__m512i) ((__v16si) __A - (__v16si) __B);
706}
707
Michael Kupersteine45af542015-06-30 13:36:19 +0000708static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000709_mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
710{
711 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
712 (__v16si) __B,
713 (__v16si) __W,
714 (__mmask16) __U);
715}
716
Michael Kupersteine45af542015-06-30 13:36:19 +0000717static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000718_mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
719{
720 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
721 (__v16si) __B,
722 (__v16si)
723 _mm512_setzero_si512 (),
724 (__mmask16) __U);
725}
726
Michael Kupersteine45af542015-06-30 13:36:19 +0000727static __inline__ __m512d __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +0000728_mm512_max_pd(__m512d __A, __m512d __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000729{
Adam Nemet0d5bb552014-07-28 17:14:40 +0000730 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
731 (__v8df) __B,
732 (__v8df)
733 _mm512_setzero_pd (),
734 (__mmask8) -1,
735 _MM_FROUND_CUR_DIRECTION);
736}
737
Michael Kupersteine45af542015-06-30 13:36:19 +0000738static __inline__ __m512 __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +0000739_mm512_max_ps(__m512 __A, __m512 __B)
740{
741 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
742 (__v16sf) __B,
743 (__v16sf)
744 _mm512_setzero_ps (),
745 (__mmask16) -1,
746 _MM_FROUND_CUR_DIRECTION);
747}
748
Asaf Badouhf6a58b62015-07-23 12:13:32 +0000749static __inline__ __m128 __DEFAULT_FN_ATTRS
750_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +0000751 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +0000752 (__v4sf) __B,
753 (__v4sf) __W,
754 (__mmask8) __U,
755 _MM_FROUND_CUR_DIRECTION);
756}
757
758static __inline__ __m128 __DEFAULT_FN_ATTRS
759_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +0000760 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +0000761 (__v4sf) __B,
762 (__v4sf) _mm_setzero_ps (),
763 (__mmask8) __U,
764 _MM_FROUND_CUR_DIRECTION);
765}
766
767#define _mm_max_round_ss(__A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +0000768 (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, (__v4sf) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +0000769 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
770
771#define _mm_mask_max_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +0000772 (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, (__v4sf) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +0000773 (__v4sf) __W, (__mmask8) __U,__R); })
774
775#define _mm_maskz_max_round_ss(__U, __A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +0000776 (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, (__v4sf) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +0000777 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
778
779static __inline__ __m128d __DEFAULT_FN_ATTRS
780_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +0000781 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +0000782 (__v2df) __B,
783 (__v2df) __W,
784 (__mmask8) __U,
785 _MM_FROUND_CUR_DIRECTION);
786}
787
788static __inline__ __m128d __DEFAULT_FN_ATTRS
789_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +0000790 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +0000791 (__v2df) __B,
792 (__v2df) _mm_setzero_pd (),
793 (__mmask8) __U,
794 _MM_FROUND_CUR_DIRECTION);
795}
796
797#define _mm_max_round_sd(__A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +0000798 (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, (__v2df) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +0000799 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
800
801#define _mm_mask_max_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +0000802 (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, (__v2df) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +0000803 (__v2df) __W, (__mmask8) __U,__R); })
804
805#define _mm_maskz_max_round_sd(__U, __A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +0000806 (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, (__v2df) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +0000807 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
808
Adam Nemet0d5bb552014-07-28 17:14:40 +0000809static __inline __m512i
Michael Kupersteine45af542015-06-30 13:36:19 +0000810__DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +0000811_mm512_max_epi32(__m512i __A, __m512i __B)
812{
813 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
814 (__v16si) __B,
815 (__v16si)
816 _mm512_setzero_si512 (),
817 (__mmask16) -1);
818}
819
Michael Kupersteine45af542015-06-30 13:36:19 +0000820static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +0000821_mm512_max_epu32(__m512i __A, __m512i __B)
822{
823 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
824 (__v16si) __B,
825 (__v16si)
826 _mm512_setzero_si512 (),
827 (__mmask16) -1);
828}
829
Michael Kupersteine45af542015-06-30 13:36:19 +0000830static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +0000831_mm512_max_epi64(__m512i __A, __m512i __B)
832{
833 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
834 (__v8di) __B,
835 (__v8di)
836 _mm512_setzero_si512 (),
837 (__mmask8) -1);
838}
839
Michael Kupersteine45af542015-06-30 13:36:19 +0000840static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +0000841_mm512_max_epu64(__m512i __A, __m512i __B)
842{
843 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
844 (__v8di) __B,
845 (__v8di)
846 _mm512_setzero_si512 (),
847 (__mmask8) -1);
848}
849
Michael Kupersteine45af542015-06-30 13:36:19 +0000850static __inline__ __m512d __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +0000851_mm512_min_pd(__m512d __A, __m512d __B)
852{
853 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
854 (__v8df) __B,
855 (__v8df)
856 _mm512_setzero_pd (),
857 (__mmask8) -1,
858 _MM_FROUND_CUR_DIRECTION);
859}
860
Michael Kupersteine45af542015-06-30 13:36:19 +0000861static __inline__ __m512 __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +0000862_mm512_min_ps(__m512 __A, __m512 __B)
863{
864 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
865 (__v16sf) __B,
866 (__v16sf)
867 _mm512_setzero_ps (),
868 (__mmask16) -1,
869 _MM_FROUND_CUR_DIRECTION);
870}
871
Asaf Badouhf6a58b62015-07-23 12:13:32 +0000872static __inline__ __m128 __DEFAULT_FN_ATTRS
873_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +0000874 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +0000875 (__v4sf) __B,
876 (__v4sf) __W,
877 (__mmask8) __U,
878 _MM_FROUND_CUR_DIRECTION);
879}
880
881static __inline__ __m128 __DEFAULT_FN_ATTRS
882_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +0000883 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +0000884 (__v4sf) __B,
885 (__v4sf) _mm_setzero_ps (),
886 (__mmask8) __U,
887 _MM_FROUND_CUR_DIRECTION);
888}
889
890#define _mm_min_round_ss(__A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +0000891 (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, (__v4sf) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +0000892 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
893
894#define _mm_mask_min_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +0000895 (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, (__v4sf) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +0000896 (__v4sf) __W, (__mmask8) __U,__R); })
897
898#define _mm_maskz_min_round_ss(__U, __A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +0000899 (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, (__v4sf) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +0000900 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
901
902static __inline__ __m128d __DEFAULT_FN_ATTRS
903_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +0000904 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +0000905 (__v2df) __B,
906 (__v2df) __W,
907 (__mmask8) __U,
908 _MM_FROUND_CUR_DIRECTION);
909}
910
911static __inline__ __m128d __DEFAULT_FN_ATTRS
912_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +0000913 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +0000914 (__v2df) __B,
915 (__v2df) _mm_setzero_pd (),
916 (__mmask8) __U,
917 _MM_FROUND_CUR_DIRECTION);
918}
919
920#define _mm_min_round_sd(__A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +0000921 (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, (__v2df) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +0000922 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
923
924#define _mm_mask_min_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +0000925 (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, (__v2df) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +0000926 (__v2df) __W, (__mmask8) __U,__R); })
927
928#define _mm_maskz_min_round_sd(__U, __A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +0000929 (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, (__v2df) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +0000930 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
931
Adam Nemet0d5bb552014-07-28 17:14:40 +0000932static __inline __m512i
Michael Kupersteine45af542015-06-30 13:36:19 +0000933__DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +0000934_mm512_min_epi32(__m512i __A, __m512i __B)
935{
936 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
937 (__v16si) __B,
938 (__v16si)
939 _mm512_setzero_si512 (),
940 (__mmask16) -1);
941}
942
Michael Kupersteine45af542015-06-30 13:36:19 +0000943static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +0000944_mm512_min_epu32(__m512i __A, __m512i __B)
945{
946 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
947 (__v16si) __B,
948 (__v16si)
949 _mm512_setzero_si512 (),
950 (__mmask16) -1);
951}
952
Michael Kupersteine45af542015-06-30 13:36:19 +0000953static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +0000954_mm512_min_epi64(__m512i __A, __m512i __B)
955{
956 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
957 (__v8di) __B,
958 (__v8di)
959 _mm512_setzero_si512 (),
960 (__mmask8) -1);
961}
962
Michael Kupersteine45af542015-06-30 13:36:19 +0000963static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +0000964_mm512_min_epu64(__m512i __A, __m512i __B)
965{
966 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
967 (__v8di) __B,
968 (__v8di)
969 _mm512_setzero_si512 (),
970 (__mmask8) -1);
971}
972
Michael Kupersteine45af542015-06-30 13:36:19 +0000973static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +0000974_mm512_mul_epi32(__m512i __X, __m512i __Y)
975{
976 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
977 (__v16si) __Y,
978 (__v8di)
979 _mm512_setzero_si512 (),
980 (__mmask8) -1);
981}
982
Michael Kupersteine45af542015-06-30 13:36:19 +0000983static __inline __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000984_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
985{
986 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
987 (__v16si) __Y,
988 (__v8di) __W, __M);
989}
990
Michael Kupersteine45af542015-06-30 13:36:19 +0000991static __inline __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000992_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
993{
994 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
995 (__v16si) __Y,
996 (__v8di)
997 _mm512_setzero_si512 (),
998 __M);
999}
1000
Michael Kupersteine45af542015-06-30 13:36:19 +00001001static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001002_mm512_mul_epu32(__m512i __X, __m512i __Y)
1003{
1004 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
1005 (__v16si) __Y,
1006 (__v8di)
1007 _mm512_setzero_si512 (),
1008 (__mmask8) -1);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001009}
1010
Michael Kupersteine45af542015-06-30 13:36:19 +00001011static __inline __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001012_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1013{
1014 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
1015 (__v16si) __Y,
1016 (__v8di) __W, __M);
1017}
1018
Michael Kupersteine45af542015-06-30 13:36:19 +00001019static __inline __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001020_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
1021{
1022 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
1023 (__v16si) __Y,
1024 (__v8di)
1025 _mm512_setzero_si512 (),
1026 __M);
1027}
1028
Michael Kupersteine45af542015-06-30 13:36:19 +00001029static __inline __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001030_mm512_mullo_epi32 (__m512i __A, __m512i __B)
1031{
1032 return (__m512i) ((__v16si) __A * (__v16si) __B);
1033}
1034
Michael Kupersteine45af542015-06-30 13:36:19 +00001035static __inline __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001036_mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1037{
1038 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
1039 (__v16si) __B,
1040 (__v16si)
1041 _mm512_setzero_si512 (),
1042 __M);
1043}
1044
Michael Kupersteine45af542015-06-30 13:36:19 +00001045static __inline __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001046_mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1047{
1048 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
1049 (__v16si) __B,
1050 (__v16si) __W, __M);
1051}
1052
Michael Kupersteine45af542015-06-30 13:36:19 +00001053static __inline__ __m512d __DEFAULT_FN_ATTRS
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00001054_mm512_sqrt_pd(__m512d __a)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001055{
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00001056 return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001057 (__v8df) _mm512_setzero_pd (),
1058 (__mmask8) -1,
1059 _MM_FROUND_CUR_DIRECTION);
1060}
1061
Michael Kupersteine45af542015-06-30 13:36:19 +00001062static __inline__ __m512 __DEFAULT_FN_ATTRS
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00001063_mm512_sqrt_ps(__m512 __a)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001064{
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00001065 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001066 (__v16sf) _mm512_setzero_ps (),
1067 (__mmask16) -1,
1068 _MM_FROUND_CUR_DIRECTION);
1069}
1070
Michael Kupersteine45af542015-06-30 13:36:19 +00001071static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001072_mm512_rsqrt14_pd(__m512d __A)
1073{
1074 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1075 (__v8df)
1076 _mm512_setzero_pd (),
1077 (__mmask8) -1);}
1078
Michael Kupersteine45af542015-06-30 13:36:19 +00001079static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001080_mm512_rsqrt14_ps(__m512 __A)
1081{
1082 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1083 (__v16sf)
1084 _mm512_setzero_ps (),
1085 (__mmask16) -1);
1086}
1087
Michael Kupersteine45af542015-06-30 13:36:19 +00001088static __inline__ __m128 __DEFAULT_FN_ATTRS
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001089_mm_rsqrt14_ss(__m128 __A, __m128 __B)
1090{
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001091 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001092 (__v4sf) __B,
1093 (__v4sf)
1094 _mm_setzero_ps (),
1095 (__mmask8) -1);
1096}
1097
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001098static __inline__ __m128 __DEFAULT_FN_ATTRS
1099_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1100{
1101 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1102 (__v4sf) __B,
1103 (__v4sf) __W,
1104 (__mmask8) __U);
1105}
1106
1107static __inline__ __m128 __DEFAULT_FN_ATTRS
1108_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1109{
1110 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1111 (__v4sf) __B,
1112 (__v4sf) _mm_setzero_ps (),
1113 (__mmask8) __U);
1114}
1115
Michael Kupersteine45af542015-06-30 13:36:19 +00001116static __inline__ __m128d __DEFAULT_FN_ATTRS
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001117_mm_rsqrt14_sd(__m128d __A, __m128d __B)
1118{
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001119 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001120 (__v2df) __B,
1121 (__v2df)
1122 _mm_setzero_pd (),
1123 (__mmask8) -1);
1124}
1125
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001126static __inline__ __m128d __DEFAULT_FN_ATTRS
1127_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1128{
1129 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1130 (__v2df) __B,
1131 (__v2df) __W,
1132 (__mmask8) __U);
1133}
1134
1135static __inline__ __m128d __DEFAULT_FN_ATTRS
1136_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1137{
1138 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1139 (__v2df) __B,
1140 (__v2df) _mm_setzero_pd (),
1141 (__mmask8) __U);
1142}
1143
Michael Kupersteine45af542015-06-30 13:36:19 +00001144static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001145_mm512_rcp14_pd(__m512d __A)
1146{
1147 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1148 (__v8df)
1149 _mm512_setzero_pd (),
1150 (__mmask8) -1);
1151}
1152
Michael Kupersteine45af542015-06-30 13:36:19 +00001153static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001154_mm512_rcp14_ps(__m512 __A)
1155{
1156 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1157 (__v16sf)
1158 _mm512_setzero_ps (),
1159 (__mmask16) -1);
1160}
Michael Kupersteine45af542015-06-30 13:36:19 +00001161static __inline__ __m128 __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00001162_mm_rcp14_ss(__m128 __A, __m128 __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001163{
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001164 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001165 (__v4sf) __B,
1166 (__v4sf)
1167 _mm_setzero_ps (),
1168 (__mmask8) -1);
1169}
1170
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001171static __inline__ __m128 __DEFAULT_FN_ATTRS
1172_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1173{
1174 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1175 (__v4sf) __B,
1176 (__v4sf) __W,
1177 (__mmask8) __U);
1178}
1179
1180static __inline__ __m128 __DEFAULT_FN_ATTRS
1181_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1182{
1183 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1184 (__v4sf) __B,
1185 (__v4sf) _mm_setzero_ps (),
1186 (__mmask8) __U);
1187}
1188
Michael Kupersteine45af542015-06-30 13:36:19 +00001189static __inline__ __m128d __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00001190_mm_rcp14_sd(__m128d __A, __m128d __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001191{
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001192 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001193 (__v2df) __B,
1194 (__v2df)
1195 _mm_setzero_pd (),
1196 (__mmask8) -1);
1197}
1198
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001199static __inline__ __m128d __DEFAULT_FN_ATTRS
1200_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1201{
1202 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1203 (__v2df) __B,
1204 (__v2df) __W,
1205 (__mmask8) __U);
1206}
1207
1208static __inline__ __m128d __DEFAULT_FN_ATTRS
1209_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1210{
1211 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1212 (__v2df) __B,
1213 (__v2df) _mm_setzero_pd (),
1214 (__mmask8) __U);
1215}
1216
Michael Kupersteine45af542015-06-30 13:36:19 +00001217static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001218_mm512_floor_ps(__m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001219{
Adam Nemet0d5bb552014-07-28 17:14:40 +00001220 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1221 _MM_FROUND_FLOOR,
1222 (__v16sf) __A, -1,
1223 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001224}
1225
Michael Kupersteine45af542015-06-30 13:36:19 +00001226static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001227_mm512_floor_pd(__m512d __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001228{
Adam Nemet0d5bb552014-07-28 17:14:40 +00001229 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1230 _MM_FROUND_FLOOR,
1231 (__v8df) __A, -1,
1232 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001233}
1234
Michael Kupersteine45af542015-06-30 13:36:19 +00001235static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001236_mm512_ceil_ps(__m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001237{
Adam Nemet0d5bb552014-07-28 17:14:40 +00001238 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1239 _MM_FROUND_CEIL,
1240 (__v16sf) __A, -1,
1241 _MM_FROUND_CUR_DIRECTION);
1242}
1243
Michael Kupersteine45af542015-06-30 13:36:19 +00001244static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001245_mm512_ceil_pd(__m512d __A)
1246{
1247 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1248 _MM_FROUND_CEIL,
1249 (__v8df) __A, -1,
1250 _MM_FROUND_CUR_DIRECTION);
1251}
1252
Michael Kupersteine45af542015-06-30 13:36:19 +00001253static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001254_mm512_abs_epi64(__m512i __A)
1255{
1256 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1257 (__v8di)
1258 _mm512_setzero_si512 (),
1259 (__mmask8) -1);
1260}
1261
Michael Kupersteine45af542015-06-30 13:36:19 +00001262static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001263_mm512_abs_epi32(__m512i __A)
1264{
1265 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
1266 (__v16si)
1267 _mm512_setzero_si512 (),
1268 (__mmask16) -1);
1269}
1270
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001271static __inline__ __m128 __DEFAULT_FN_ATTRS
1272_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001273 return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001274 (__v4sf) __B,
1275 (__v4sf) __W,
1276 (__mmask8) __U,
1277 _MM_FROUND_CUR_DIRECTION);
1278}
1279
1280static __inline__ __m128 __DEFAULT_FN_ATTRS
1281_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001282 return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001283 (__v4sf) __B,
1284 (__v4sf) _mm_setzero_ps (),
1285 (__mmask8) __U,
1286 _MM_FROUND_CUR_DIRECTION);
1287}
1288
1289#define _mm_add_round_ss(__A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001290 (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A, (__v4sf) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001291 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
1292
1293#define _mm_mask_add_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001294 (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A, (__v4sf) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001295 (__v4sf) __W, (__mmask8) __U,__R); })
1296
1297#define _mm_maskz_add_round_ss(__U, __A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001298 (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A, (__v4sf) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001299 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
1300
1301static __inline__ __m128d __DEFAULT_FN_ATTRS
1302_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001303 return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001304 (__v2df) __B,
1305 (__v2df) __W,
1306 (__mmask8) __U,
1307 _MM_FROUND_CUR_DIRECTION);
1308}
1309
1310static __inline__ __m128d __DEFAULT_FN_ATTRS
1311_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001312 return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001313 (__v2df) __B,
1314 (__v2df) _mm_setzero_pd (),
1315 (__mmask8) __U,
1316 _MM_FROUND_CUR_DIRECTION);
1317}
1318#define _mm_add_round_sd(__A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001319 (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A, (__v2df) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001320 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
1321
1322#define _mm_mask_add_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001323 (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A, (__v2df) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001324 (__v2df) __W, (__mmask8) __U,__R); })
1325
1326#define _mm_maskz_add_round_sd(__U, __A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001327 (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A, (__v2df) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001328 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
1329
Asaf Badouhffeb6242015-07-21 15:27:28 +00001330static __inline__ __m512d __DEFAULT_FN_ATTRS
1331_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1332 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
1333 (__v8df) __B,
1334 (__v8df) __W,
1335 (__mmask8) __U,
1336 _MM_FROUND_CUR_DIRECTION);
1337}
1338
1339static __inline__ __m512d __DEFAULT_FN_ATTRS
1340_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1341 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
1342 (__v8df) __B,
1343 (__v8df) _mm512_setzero_pd (),
1344 (__mmask8) __U,
1345 _MM_FROUND_CUR_DIRECTION);
1346}
1347
1348static __inline__ __m512 __DEFAULT_FN_ATTRS
1349_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1350 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
1351 (__v16sf) __B,
1352 (__v16sf) __W,
1353 (__mmask16) __U,
1354 _MM_FROUND_CUR_DIRECTION);
1355}
1356
1357static __inline__ __m512 __DEFAULT_FN_ATTRS
1358_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1359 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
1360 (__v16sf) __B,
1361 (__v16sf) _mm512_setzero_ps (),
1362 (__mmask16) __U,
1363 _MM_FROUND_CUR_DIRECTION);
1364}
1365
1366#define _mm512_add_round_pd(__A, __B, __R) __extension__ ({ \
1367 (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, (__v8df) __B, \
1368 (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
1369
1370#define _mm512_mask_add_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
1371 (__m512d) __builtin_ia32_addpd512_mask((__v8df) __A, (__v8df) __B, \
1372 (__v8df) __W, (__mmask8) __U, __R); })
1373
1374#define _mm512_maskz_add_round_pd(__U, __A, __B, __R) __extension__ ({ \
1375 (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, (__v8df) __B, \
1376 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R); })
1377
1378#define _mm512_add_round_ps(__A, __B, __R) __extension__ ({ \
1379 (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
1380 (__v16sf) _mm512_setzero_ps(), (__mmask16) -1, __R); })
1381
1382#define _mm512_mask_add_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
1383 (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
1384 (__v16sf) __W, (__mmask16)__U, __R); })
1385
1386#define _mm512_maskz_add_round_ps(__U, __A, __B, __R) __extension__ ({ \
1387 (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
1388 (__v16sf) _mm512_setzero_ps(), (__mmask16)__U, __R); })
1389
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001390static __inline__ __m128 __DEFAULT_FN_ATTRS
1391_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001392 return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001393 (__v4sf) __B,
1394 (__v4sf) __W,
1395 (__mmask8) __U,
1396 _MM_FROUND_CUR_DIRECTION);
1397}
1398
1399static __inline__ __m128 __DEFAULT_FN_ATTRS
1400_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001401 return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001402 (__v4sf) __B,
1403 (__v4sf) _mm_setzero_ps (),
1404 (__mmask8) __U,
1405 _MM_FROUND_CUR_DIRECTION);
1406}
1407#define _mm_sub_round_ss(__A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001408 (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A, (__v4sf) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001409 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
1410
1411#define _mm_mask_sub_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001412 (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A, (__v4sf) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001413 (__v4sf) __W, (__mmask8) __U,__R); })
1414
1415#define _mm_maskz_sub_round_ss(__U, __A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001416 (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A, (__v4sf) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001417 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
1418
1419static __inline__ __m128d __DEFAULT_FN_ATTRS
1420_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001421 return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001422 (__v2df) __B,
1423 (__v2df) __W,
1424 (__mmask8) __U,
1425 _MM_FROUND_CUR_DIRECTION);
1426}
1427
1428static __inline__ __m128d __DEFAULT_FN_ATTRS
1429_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001430 return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001431 (__v2df) __B,
1432 (__v2df) _mm_setzero_pd (),
1433 (__mmask8) __U,
1434 _MM_FROUND_CUR_DIRECTION);
1435}
1436
1437#define _mm_sub_round_sd(__A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001438 (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A, (__v2df) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001439 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
1440
1441#define _mm_mask_sub_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001442 (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A, (__v2df) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001443 (__v2df) __W, (__mmask8) __U,__R); })
1444
1445#define _mm_maskz_sub_round_sd(__U, __A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001446 (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A, (__v2df) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001447 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
1448
Asaf Badouhffeb6242015-07-21 15:27:28 +00001449static __inline__ __m512d __DEFAULT_FN_ATTRS
1450_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1451 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
1452 (__v8df) __B,
1453 (__v8df) __W,
1454 (__mmask8) __U,
1455 _MM_FROUND_CUR_DIRECTION);
1456}
1457
1458static __inline__ __m512d __DEFAULT_FN_ATTRS
1459_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1460 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
1461 (__v8df) __B,
1462 (__v8df)
1463 _mm512_setzero_pd (),
1464 (__mmask8) __U,
1465 _MM_FROUND_CUR_DIRECTION);
1466}
1467
1468static __inline__ __m512 __DEFAULT_FN_ATTRS
1469_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1470 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
1471 (__v16sf) __B,
1472 (__v16sf) __W,
1473 (__mmask16) __U,
1474 _MM_FROUND_CUR_DIRECTION);
1475}
1476
1477static __inline__ __m512 __DEFAULT_FN_ATTRS
1478_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1479 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
1480 (__v16sf) __B,
1481 (__v16sf)
1482 _mm512_setzero_ps (),
1483 (__mmask16) __U,
1484 _MM_FROUND_CUR_DIRECTION);
1485}
1486
1487#define _mm512_sub_round_pd(__A, __B, __R) __extension__ ({ \
1488 (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B,\
1489 (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
1490
1491#define _mm512_mask_sub_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
1492 (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B, \
1493 (__v8df) __W, (__mmask8) __U, __R); })
1494
1495#define _mm512_maskz_sub_round_pd(__U, __A, __B, __R) __extension__ ({ \
1496 (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B, \
1497 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
1498
1499#define _mm512_sub_round_ps(__A, __B, __R) __extension__ ({ \
1500 (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
1501 (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
1502
1503#define _mm512_mask_sub_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
1504 (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
1505 (__v16sf) __W, (__mmask16) __U, __R); });
1506
1507#define _mm512_maskz_sub_round_ps(__U, __A, __B, __R) __extension__ ({ \
1508 (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
1509 (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
1510
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001511static __inline__ __m128 __DEFAULT_FN_ATTRS
1512_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001513 return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001514 (__v4sf) __B,
1515 (__v4sf) __W,
1516 (__mmask8) __U,
1517 _MM_FROUND_CUR_DIRECTION);
1518}
1519
1520static __inline__ __m128 __DEFAULT_FN_ATTRS
1521_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001522 return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001523 (__v4sf) __B,
1524 (__v4sf) _mm_setzero_ps (),
1525 (__mmask8) __U,
1526 _MM_FROUND_CUR_DIRECTION);
1527}
1528#define _mm_mul_round_ss(__A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001529 (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A, (__v4sf) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001530 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
1531
1532#define _mm_mask_mul_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001533 (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A, (__v4sf) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001534 (__v4sf) __W, (__mmask8) __U,__R); })
1535
1536#define _mm_maskz_mul_round_ss(__U, __A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001537 (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A, (__v4sf) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001538 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
1539
1540static __inline__ __m128d __DEFAULT_FN_ATTRS
1541_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001542 return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001543 (__v2df) __B,
1544 (__v2df) __W,
1545 (__mmask8) __U,
1546 _MM_FROUND_CUR_DIRECTION);
1547}
1548
1549static __inline__ __m128d __DEFAULT_FN_ATTRS
1550_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001551 return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001552 (__v2df) __B,
1553 (__v2df) _mm_setzero_pd (),
1554 (__mmask8) __U,
1555 _MM_FROUND_CUR_DIRECTION);
1556}
1557
1558#define _mm_mul_round_sd(__A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001559 (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A, (__v2df) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001560 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
1561
1562#define _mm_mask_mul_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001563 (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A, (__v2df) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001564 (__v2df) __W, (__mmask8) __U,__R); })
1565
1566#define _mm_maskz_mul_round_sd(__U, __A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001567 (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A, (__v2df) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001568 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
1569
Asaf Badouhffeb6242015-07-21 15:27:28 +00001570static __inline__ __m512d __DEFAULT_FN_ATTRS
1571_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1572 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
1573 (__v8df) __B,
1574 (__v8df) __W,
1575 (__mmask8) __U,
1576 _MM_FROUND_CUR_DIRECTION);
1577}
1578
1579static __inline__ __m512d __DEFAULT_FN_ATTRS
1580_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1581 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
1582 (__v8df) __B,
1583 (__v8df)
1584 _mm512_setzero_pd (),
1585 (__mmask8) __U,
1586 _MM_FROUND_CUR_DIRECTION);
1587}
1588
1589static __inline__ __m512 __DEFAULT_FN_ATTRS
1590_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1591 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
1592 (__v16sf) __B,
1593 (__v16sf) __W,
1594 (__mmask16) __U,
1595 _MM_FROUND_CUR_DIRECTION);
1596}
1597
1598static __inline__ __m512 __DEFAULT_FN_ATTRS
1599_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1600 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
1601 (__v16sf) __B,
1602 (__v16sf)
1603 _mm512_setzero_ps (),
1604 (__mmask16) __U,
1605 _MM_FROUND_CUR_DIRECTION);
1606}
1607
1608#define _mm512_mul_round_pd(__A, __B, __R) __extension__ ({ \
1609 (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B,\
1610 (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
1611
1612#define _mm512_mask_mul_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
1613 (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B, \
1614 (__v8df) __W, (__mmask8) __U, __R); })
1615
1616#define _mm512_maskz_mul_round_pd(__U, __A, __B, __R) __extension__ ({ \
1617 (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B, \
1618 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
1619
1620#define _mm512_mul_round_ps(__A, __B, __R) __extension__ ({ \
1621 (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
1622 (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
1623
1624#define _mm512_mask_mul_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
1625 (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
1626 (__v16sf) __W, (__mmask16) __U, __R); });
1627
1628#define _mm512_maskz_mul_round_ps(__U, __A, __B, __R) __extension__ ({ \
1629 (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
1630 (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
1631
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001632static __inline__ __m128 __DEFAULT_FN_ATTRS
1633_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001634 return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001635 (__v4sf) __B,
1636 (__v4sf) __W,
1637 (__mmask8) __U,
1638 _MM_FROUND_CUR_DIRECTION);
1639}
1640
1641static __inline__ __m128 __DEFAULT_FN_ATTRS
1642_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001643 return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001644 (__v4sf) __B,
1645 (__v4sf) _mm_setzero_ps (),
1646 (__mmask8) __U,
1647 _MM_FROUND_CUR_DIRECTION);
1648}
1649
1650#define _mm_div_round_ss(__A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001651 (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A, (__v4sf) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001652 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
1653
1654#define _mm_mask_div_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001655 (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A, (__v4sf) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001656 (__v4sf) __W, (__mmask8) __U,__R); })
1657
1658#define _mm_maskz_div_round_ss(__U, __A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001659 (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A, (__v4sf) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001660 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
1661
1662static __inline__ __m128d __DEFAULT_FN_ATTRS
1663_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001664 return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001665 (__v2df) __B,
1666 (__v2df) __W,
1667 (__mmask8) __U,
1668 _MM_FROUND_CUR_DIRECTION);
1669}
1670
1671static __inline__ __m128d __DEFAULT_FN_ATTRS
1672_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001673 return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001674 (__v2df) __B,
1675 (__v2df) _mm_setzero_pd (),
1676 (__mmask8) __U,
1677 _MM_FROUND_CUR_DIRECTION);
1678}
1679
1680#define _mm_div_round_sd(__A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001681 (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A, (__v2df) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001682 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
1683
1684#define _mm_mask_div_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001685 (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A, (__v2df) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001686 (__v2df) __W, (__mmask8) __U,__R); })
1687
1688#define _mm_maskz_div_round_sd(__U, __A, __B, __R) __extension__ ({ \
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001689 (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A, (__v2df) __B, \
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001690 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
1691
Asaf Badouhffeb6242015-07-21 15:27:28 +00001692static __inline__ __m512d __DEFAULT_FN_ATTRS
1693_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1694 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A,
1695 (__v8df) __B,
1696 (__v8df) __W,
1697 (__mmask8) __U,
1698 _MM_FROUND_CUR_DIRECTION);
1699}
1700
1701static __inline__ __m512d __DEFAULT_FN_ATTRS
1702_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1703 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A,
1704 (__v8df) __B,
1705 (__v8df)
1706 _mm512_setzero_pd (),
1707 (__mmask8) __U,
1708 _MM_FROUND_CUR_DIRECTION);
1709}
1710
1711static __inline__ __m512 __DEFAULT_FN_ATTRS
1712_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1713 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
1714 (__v16sf) __B,
1715 (__v16sf) __W,
1716 (__mmask16) __U,
1717 _MM_FROUND_CUR_DIRECTION);
1718}
1719
1720static __inline__ __m512 __DEFAULT_FN_ATTRS
1721_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1722 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
1723 (__v16sf) __B,
1724 (__v16sf)
1725 _mm512_setzero_ps (),
1726 (__mmask16) __U,
1727 _MM_FROUND_CUR_DIRECTION);
1728}
1729
1730#define _mm512_div_round_pd(__A, __B, __R) __extension__ ({ \
1731 (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B,\
1732 (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
1733
1734#define _mm512_mask_div_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
1735 (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B, \
1736 (__v8df) __W, (__mmask8) __U, __R); })
1737
1738#define _mm512_maskz_div_round_pd(__U, __A, __B, __R) __extension__ ({ \
1739 (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B, \
1740 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
1741
1742#define _mm512_div_round_ps(__A, __B, __R) __extension__ ({ \
1743 (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
1744 (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
1745
1746#define _mm512_mask_div_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
1747 (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
1748 (__v16sf) __W, (__mmask16) __U, __R); });
1749
1750#define _mm512_maskz_div_round_ps(__U, __A, __B, __R) __extension__ ({ \
1751 (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
1752 (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
1753
Craig Topper72c7d512015-02-01 07:35:35 +00001754#define _mm512_roundscale_ps(A, B) __extension__ ({ \
1755 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(A), (B), (__v16sf)(A), \
1756 -1, _MM_FROUND_CUR_DIRECTION); })
1757
1758#define _mm512_roundscale_pd(A, B) __extension__ ({ \
1759 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(A), (B), (__v8df)(A), \
1760 -1, _MM_FROUND_CUR_DIRECTION); })
Adam Nemet0d5bb552014-07-28 17:14:40 +00001761
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00001762#define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
1763 (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
1764 (__v8df) (B), (__v8df) (C), \
1765 (__mmask8) -1, (R)); })
1766
1767
1768#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \
1769 (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
1770 (__v8df) (B), (__v8df) (C), \
1771 (__mmask8) (U), (R)); })
1772
1773
1774#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \
1775 (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) (A), \
1776 (__v8df) (B), (__v8df) (C), \
1777 (__mmask8) (U), (R)); })
1778
1779
1780#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \
1781 (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \
1782 (__v8df) (B), (__v8df) (C), \
1783 (__mmask8) (U), (R)); })
1784
1785
1786#define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \
1787 (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
1788 (__v8df) (B), -(__v8df) (C), \
1789 (__mmask8) -1, (R)); })
1790
1791
1792#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \
1793 (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
1794 (__v8df) (B), -(__v8df) (C), \
1795 (__mmask8) (U), (R)); })
1796
1797
1798#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \
1799 (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \
1800 (__v8df) (B), -(__v8df) (C), \
1801 (__mmask8) (U), (R)); })
1802
1803
1804#define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \
1805 (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \
1806 (__v8df) (B), (__v8df) (C), \
1807 (__mmask8) -1, (R)); })
1808
1809
1810#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \
1811 (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) (A), \
1812 (__v8df) (B), (__v8df) (C), \
1813 (__mmask8) (U), (R)); })
1814
1815
1816#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \
1817 (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \
1818 (__v8df) (B), (__v8df) (C), \
1819 (__mmask8) (U), (R)); })
1820
1821
1822#define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \
1823 (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \
1824 (__v8df) (B), -(__v8df) (C), \
1825 (__mmask8) -1, (R)); })
1826
1827
1828#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \
1829 (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \
1830 (__v8df) (B), -(__v8df) (C), \
1831 (__mmask8) (U), (R)); })
1832
1833
Michael Kupersteine45af542015-06-30 13:36:19 +00001834static __inline__ __m512d __DEFAULT_FN_ATTRS
Adam Nemet2278fcb2014-08-14 17:17:57 +00001835_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
1836{
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00001837 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
1838 (__v8df) __B,
1839 (__v8df) __C,
1840 (__mmask8) -1,
1841 _MM_FROUND_CUR_DIRECTION);
1842}
1843
Michael Kupersteine45af542015-06-30 13:36:19 +00001844static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00001845_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
1846{
1847 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
1848 (__v8df) __B,
1849 (__v8df) __C,
1850 (__mmask8) __U,
1851 _MM_FROUND_CUR_DIRECTION);
1852}
1853
Michael Kupersteine45af542015-06-30 13:36:19 +00001854static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00001855_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
1856{
1857 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
1858 (__v8df) __B,
1859 (__v8df) __C,
1860 (__mmask8) __U,
1861 _MM_FROUND_CUR_DIRECTION);
1862}
1863
Michael Kupersteine45af542015-06-30 13:36:19 +00001864static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00001865_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1866{
1867 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
1868 (__v8df) __B,
1869 (__v8df) __C,
1870 (__mmask8) __U,
1871 _MM_FROUND_CUR_DIRECTION);
Adam Nemet2278fcb2014-08-14 17:17:57 +00001872}
1873
Michael Kupersteine45af542015-06-30 13:36:19 +00001874static __inline__ __m512d __DEFAULT_FN_ATTRS
Adam Nemet2278fcb2014-08-14 17:17:57 +00001875_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
1876{
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00001877 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
1878 (__v8df) __B,
1879 -(__v8df) __C,
1880 (__mmask8) -1,
1881 _MM_FROUND_CUR_DIRECTION);
1882}
1883
Michael Kupersteine45af542015-06-30 13:36:19 +00001884static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00001885_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
1886{
1887 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
1888 (__v8df) __B,
1889 -(__v8df) __C,
1890 (__mmask8) __U,
1891 _MM_FROUND_CUR_DIRECTION);
1892}
1893
Michael Kupersteine45af542015-06-30 13:36:19 +00001894static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00001895_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1896{
1897 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
1898 (__v8df) __B,
1899 -(__v8df) __C,
1900 (__mmask8) __U,
1901 _MM_FROUND_CUR_DIRECTION);
Adam Nemet2278fcb2014-08-14 17:17:57 +00001902}
1903
Michael Kupersteine45af542015-06-30 13:36:19 +00001904static __inline__ __m512d __DEFAULT_FN_ATTRS
Adam Nemet2278fcb2014-08-14 17:17:57 +00001905_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
1906{
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00001907 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
1908 (__v8df) __B,
1909 (__v8df) __C,
1910 (__mmask8) -1,
1911 _MM_FROUND_CUR_DIRECTION);
Adam Nemet2278fcb2014-08-14 17:17:57 +00001912}
1913
Michael Kupersteine45af542015-06-30 13:36:19 +00001914static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00001915_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
1916{
1917 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
1918 (__v8df) __B,
1919 (__v8df) __C,
1920 (__mmask8) __U,
1921 _MM_FROUND_CUR_DIRECTION);
1922}
1923
Michael Kupersteine45af542015-06-30 13:36:19 +00001924static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00001925_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1926{
1927 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
1928 (__v8df) __B,
1929 (__v8df) __C,
1930 (__mmask8) __U,
1931 _MM_FROUND_CUR_DIRECTION);
1932}
1933
Michael Kupersteine45af542015-06-30 13:36:19 +00001934static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00001935_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
1936{
1937 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
1938 (__v8df) __B,
1939 -(__v8df) __C,
1940 (__mmask8) -1,
1941 _MM_FROUND_CUR_DIRECTION);
1942}
1943
Michael Kupersteine45af542015-06-30 13:36:19 +00001944static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00001945_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1946{
1947 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
1948 (__v8df) __B,
1949 -(__v8df) __C,
1950 (__mmask8) __U,
1951 _MM_FROUND_CUR_DIRECTION);
1952}
1953
1954#define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \
1955 (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1956 (__v16sf) (B), (__v16sf) (C), \
1957 (__mmask16) -1, (R)); })
1958
1959
1960#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \
1961 (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1962 (__v16sf) (B), (__v16sf) (C), \
1963 (__mmask16) (U), (R)); })
1964
1965
1966#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \
1967 (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) (A), \
1968 (__v16sf) (B), (__v16sf) (C), \
1969 (__mmask16) (U), (R)); })
1970
1971
1972#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \
1973 (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \
1974 (__v16sf) (B), (__v16sf) (C), \
1975 (__mmask16) (U), (R)); })
1976
1977
1978#define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \
1979 (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1980 (__v16sf) (B), -(__v16sf) (C), \
1981 (__mmask16) -1, (R)); })
1982
1983
1984#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \
1985 (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1986 (__v16sf) (B), -(__v16sf) (C), \
1987 (__mmask16) (U), (R)); })
1988
1989
1990#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \
1991 (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \
1992 (__v16sf) (B), -(__v16sf) (C), \
1993 (__mmask16) (U), (R)); })
1994
1995
1996#define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \
1997 (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \
1998 (__v16sf) (B), (__v16sf) (C), \
1999 (__mmask16) -1, (R)); })
2000
2001
2002#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \
2003 (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) (A), \
2004 (__v16sf) (B), (__v16sf) (C), \
2005 (__mmask16) (U), (R)); })
2006
2007
2008#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \
2009 (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \
2010 (__v16sf) (B), (__v16sf) (C), \
2011 (__mmask16) (U), (R)); })
2012
2013
2014#define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \
2015 (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \
2016 (__v16sf) (B), -(__v16sf) (C), \
2017 (__mmask16) -1, (R)); })
2018
2019
2020#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \
2021 (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \
2022 (__v16sf) (B), -(__v16sf) (C), \
2023 (__mmask16) (U), (R)); })
2024
2025
Michael Kupersteine45af542015-06-30 13:36:19 +00002026static __inline__ __m512 __DEFAULT_FN_ATTRS
Adam Nemet2278fcb2014-08-14 17:17:57 +00002027_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2028{
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002029 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2030 (__v16sf) __B,
2031 (__v16sf) __C,
2032 (__mmask16) -1,
2033 _MM_FROUND_CUR_DIRECTION);
2034}
2035
Michael Kupersteine45af542015-06-30 13:36:19 +00002036static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002037_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2038{
2039 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2040 (__v16sf) __B,
2041 (__v16sf) __C,
2042 (__mmask16) __U,
2043 _MM_FROUND_CUR_DIRECTION);
2044}
2045
Michael Kupersteine45af542015-06-30 13:36:19 +00002046static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002047_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2048{
2049 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2050 (__v16sf) __B,
2051 (__v16sf) __C,
2052 (__mmask16) __U,
2053 _MM_FROUND_CUR_DIRECTION);
2054}
2055
Michael Kupersteine45af542015-06-30 13:36:19 +00002056static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002057_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2058{
2059 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2060 (__v16sf) __B,
2061 (__v16sf) __C,
2062 (__mmask16) __U,
2063 _MM_FROUND_CUR_DIRECTION);
Adam Nemet2278fcb2014-08-14 17:17:57 +00002064}
2065
Michael Kupersteine45af542015-06-30 13:36:19 +00002066static __inline__ __m512 __DEFAULT_FN_ATTRS
Adam Nemet2278fcb2014-08-14 17:17:57 +00002067_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2068{
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002069 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2070 (__v16sf) __B,
2071 -(__v16sf) __C,
2072 (__mmask16) -1,
2073 _MM_FROUND_CUR_DIRECTION);
2074}
2075
Michael Kupersteine45af542015-06-30 13:36:19 +00002076static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002077_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2078{
2079 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2080 (__v16sf) __B,
2081 -(__v16sf) __C,
2082 (__mmask16) __U,
2083 _MM_FROUND_CUR_DIRECTION);
2084}
2085
Michael Kupersteine45af542015-06-30 13:36:19 +00002086static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002087_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2088{
2089 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2090 (__v16sf) __B,
2091 -(__v16sf) __C,
2092 (__mmask16) __U,
2093 _MM_FROUND_CUR_DIRECTION);
Adam Nemet2278fcb2014-08-14 17:17:57 +00002094}
2095
Michael Kupersteine45af542015-06-30 13:36:19 +00002096static __inline__ __m512 __DEFAULT_FN_ATTRS
Adam Nemet2278fcb2014-08-14 17:17:57 +00002097_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2098{
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002099 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
2100 (__v16sf) __B,
2101 (__v16sf) __C,
2102 (__mmask16) -1,
2103 _MM_FROUND_CUR_DIRECTION);
Adam Nemet2278fcb2014-08-14 17:17:57 +00002104}
2105
Michael Kupersteine45af542015-06-30 13:36:19 +00002106static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002107_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2108{
2109 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
2110 (__v16sf) __B,
2111 (__v16sf) __C,
2112 (__mmask16) __U,
2113 _MM_FROUND_CUR_DIRECTION);
2114}
2115
Michael Kupersteine45af542015-06-30 13:36:19 +00002116static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002117_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2118{
2119 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2120 (__v16sf) __B,
2121 (__v16sf) __C,
2122 (__mmask16) __U,
2123 _MM_FROUND_CUR_DIRECTION);
2124}
2125
Michael Kupersteine45af542015-06-30 13:36:19 +00002126static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002127_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2128{
2129 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
2130 (__v16sf) __B,
2131 -(__v16sf) __C,
2132 (__mmask16) -1,
2133 _MM_FROUND_CUR_DIRECTION);
2134}
2135
Michael Kupersteine45af542015-06-30 13:36:19 +00002136static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002137_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2138{
2139 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2140 (__v16sf) __B,
2141 -(__v16sf) __C,
2142 (__mmask16) __U,
2143 _MM_FROUND_CUR_DIRECTION);
2144}
2145
2146#define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \
2147 (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
2148 (__v8df) (B), (__v8df) (C), \
2149 (__mmask8) -1, (R)); })
2150
2151
2152#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \
2153 (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
2154 (__v8df) (B), (__v8df) (C), \
2155 (__mmask8) (U), (R)); })
2156
2157
2158#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \
2159 (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) (A), \
2160 (__v8df) (B), (__v8df) (C), \
2161 (__mmask8) (U), (R)); })
2162
2163
2164#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \
2165 (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \
2166 (__v8df) (B), (__v8df) (C), \
2167 (__mmask8) (U), (R)); })
2168
2169
2170#define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \
2171 (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
2172 (__v8df) (B), -(__v8df) (C), \
2173 (__mmask8) -1, (R)); })
2174
2175
2176#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \
2177 (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
2178 (__v8df) (B), -(__v8df) (C), \
2179 (__mmask8) (U), (R)); })
2180
2181
2182#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \
2183 (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \
2184 (__v8df) (B), -(__v8df) (C), \
2185 (__mmask8) (U), (R)); })
2186
2187
Michael Kupersteine45af542015-06-30 13:36:19 +00002188static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002189_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
2190{
2191 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2192 (__v8df) __B,
2193 (__v8df) __C,
2194 (__mmask8) -1,
2195 _MM_FROUND_CUR_DIRECTION);
2196}
2197
Michael Kupersteine45af542015-06-30 13:36:19 +00002198static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002199_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2200{
2201 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2202 (__v8df) __B,
2203 (__v8df) __C,
2204 (__mmask8) __U,
2205 _MM_FROUND_CUR_DIRECTION);
2206}
2207
Michael Kupersteine45af542015-06-30 13:36:19 +00002208static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002209_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2210{
2211 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2212 (__v8df) __B,
2213 (__v8df) __C,
2214 (__mmask8) __U,
2215 _MM_FROUND_CUR_DIRECTION);
2216}
2217
Michael Kupersteine45af542015-06-30 13:36:19 +00002218static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002219_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2220{
2221 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2222 (__v8df) __B,
2223 (__v8df) __C,
2224 (__mmask8) __U,
2225 _MM_FROUND_CUR_DIRECTION);
2226}
2227
Michael Kupersteine45af542015-06-30 13:36:19 +00002228static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002229_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
2230{
2231 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2232 (__v8df) __B,
2233 -(__v8df) __C,
2234 (__mmask8) -1,
2235 _MM_FROUND_CUR_DIRECTION);
2236}
2237
Michael Kupersteine45af542015-06-30 13:36:19 +00002238static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002239_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2240{
2241 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2242 (__v8df) __B,
2243 -(__v8df) __C,
2244 (__mmask8) __U,
2245 _MM_FROUND_CUR_DIRECTION);
2246}
2247
Michael Kupersteine45af542015-06-30 13:36:19 +00002248static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002249_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2250{
2251 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2252 (__v8df) __B,
2253 -(__v8df) __C,
2254 (__mmask8) __U,
2255 _MM_FROUND_CUR_DIRECTION);
2256}
2257
2258#define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \
2259 (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
2260 (__v16sf) (B), (__v16sf) (C), \
2261 (__mmask16) -1, (R)); })
2262
2263
2264#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \
2265 (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
2266 (__v16sf) (B), (__v16sf) (C), \
2267 (__mmask16) (U), (R)); })
2268
2269
2270#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \
2271 (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) (A), \
2272 (__v16sf) (B), (__v16sf) (C), \
2273 (__mmask16) (U), (R)); })
2274
2275
2276#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \
2277 (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \
2278 (__v16sf) (B), (__v16sf) (C), \
2279 (__mmask16) (U), (R)); })
2280
2281
2282#define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \
2283 (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
2284 (__v16sf) (B), -(__v16sf) (C), \
2285 (__mmask16) -1, (R)); })
2286
2287
2288#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \
2289 (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
2290 (__v16sf) (B), -(__v16sf) (C), \
2291 (__mmask16) (U), (R)); })
2292
2293
2294#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \
2295 (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \
2296 (__v16sf) (B), -(__v16sf) (C), \
2297 (__mmask16) (U), (R)); })
2298
2299
Michael Kupersteine45af542015-06-30 13:36:19 +00002300static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002301_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
2302{
2303 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2304 (__v16sf) __B,
2305 (__v16sf) __C,
2306 (__mmask16) -1,
2307 _MM_FROUND_CUR_DIRECTION);
2308}
2309
Michael Kupersteine45af542015-06-30 13:36:19 +00002310static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002311_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2312{
2313 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2314 (__v16sf) __B,
2315 (__v16sf) __C,
2316 (__mmask16) __U,
2317 _MM_FROUND_CUR_DIRECTION);
2318}
2319
Michael Kupersteine45af542015-06-30 13:36:19 +00002320static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002321_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2322{
2323 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
2324 (__v16sf) __B,
2325 (__v16sf) __C,
2326 (__mmask16) __U,
2327 _MM_FROUND_CUR_DIRECTION);
2328}
2329
Michael Kupersteine45af542015-06-30 13:36:19 +00002330static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002331_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2332{
2333 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2334 (__v16sf) __B,
2335 (__v16sf) __C,
2336 (__mmask16) __U,
2337 _MM_FROUND_CUR_DIRECTION);
2338}
2339
Michael Kupersteine45af542015-06-30 13:36:19 +00002340static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002341_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
2342{
2343 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2344 (__v16sf) __B,
2345 -(__v16sf) __C,
2346 (__mmask16) -1,
2347 _MM_FROUND_CUR_DIRECTION);
2348}
2349
Michael Kupersteine45af542015-06-30 13:36:19 +00002350static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002351_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2352{
2353 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2354 (__v16sf) __B,
2355 -(__v16sf) __C,
2356 (__mmask16) __U,
2357 _MM_FROUND_CUR_DIRECTION);
2358}
2359
Michael Kupersteine45af542015-06-30 13:36:19 +00002360static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002361_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2362{
2363 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2364 (__v16sf) __B,
2365 -(__v16sf) __C,
2366 (__mmask16) __U,
2367 _MM_FROUND_CUR_DIRECTION);
2368}
2369
2370#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \
2371 (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) (A), \
2372 (__v8df) (B), (__v8df) (C), \
2373 (__mmask8) (U), (R)); })
2374
2375
Michael Kupersteine45af542015-06-30 13:36:19 +00002376static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002377_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2378{
2379 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
2380 (__v8df) __B,
2381 (__v8df) __C,
2382 (__mmask8) __U,
2383 _MM_FROUND_CUR_DIRECTION);
2384}
2385
2386#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \
2387 (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) (A), \
2388 (__v16sf) (B), (__v16sf) (C), \
2389 (__mmask16) (U), (R)); })
2390
2391
Michael Kupersteine45af542015-06-30 13:36:19 +00002392static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002393_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2394{
2395 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
2396 (__v16sf) __B,
2397 (__v16sf) __C,
2398 (__mmask16) __U,
2399 _MM_FROUND_CUR_DIRECTION);
2400}
2401
2402#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \
2403 (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) (A), \
2404 (__v8df) (B), (__v8df) (C), \
2405 (__mmask8) (U), (R)); })
2406
2407
Michael Kupersteine45af542015-06-30 13:36:19 +00002408static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002409_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2410{
2411 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
2412 (__v8df) __B,
2413 (__v8df) __C,
2414 (__mmask8) __U,
2415 _MM_FROUND_CUR_DIRECTION);
2416}
2417
2418#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \
2419 (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) (A), \
2420 (__v16sf) (B), (__v16sf) (C), \
2421 (__mmask16) (U), (R)); })
2422
2423
Michael Kupersteine45af542015-06-30 13:36:19 +00002424static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002425_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2426{
2427 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
2428 (__v16sf) __B,
2429 (__v16sf) __C,
2430 (__mmask16) __U,
2431 _MM_FROUND_CUR_DIRECTION);
2432}
2433
2434#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \
2435 (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) (A), \
2436 (__v8df) (B), (__v8df) (C), \
2437 (__mmask8) (U), (R)); })
2438
2439
Michael Kupersteine45af542015-06-30 13:36:19 +00002440static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002441_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2442{
2443 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
2444 (__v8df) __B,
2445 (__v8df) __C,
2446 (__mmask8) __U,
2447 _MM_FROUND_CUR_DIRECTION);
2448}
2449
2450#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \
2451 (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) (A), \
2452 (__v16sf) (B), (__v16sf) (C), \
2453 (__mmask16) (U), (R)); })
2454
2455
Michael Kupersteine45af542015-06-30 13:36:19 +00002456static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002457_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2458{
2459 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
2460 (__v16sf) __B,
2461 (__v16sf) __C,
2462 (__mmask16) __U,
2463 _MM_FROUND_CUR_DIRECTION);
2464}
2465
2466#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \
2467 (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) (A), \
2468 (__v8df) (B), (__v8df) (C), \
2469 (__mmask8) (U), (R)); })
2470
2471
2472#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \
2473 (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) (A), \
2474 (__v8df) (B), (__v8df) (C), \
2475 (__mmask8) (U), (R)); })
2476
2477
Michael Kupersteine45af542015-06-30 13:36:19 +00002478static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002479_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2480{
2481 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
2482 (__v8df) __B,
2483 (__v8df) __C,
2484 (__mmask8) __U,
2485 _MM_FROUND_CUR_DIRECTION);
2486}
2487
Michael Kupersteine45af542015-06-30 13:36:19 +00002488static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002489_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2490{
2491 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
2492 (__v8df) __B,
2493 (__v8df) __C,
2494 (__mmask8) __U,
2495 _MM_FROUND_CUR_DIRECTION);
2496}
2497
2498#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \
2499 (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) (A), \
2500 (__v16sf) (B), (__v16sf) (C), \
2501 (__mmask16) (U), (R)); })
2502
2503
2504#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \
2505 (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) (A), \
2506 (__v16sf) (B), (__v16sf) (C), \
2507 (__mmask16) (U), (R)); })
2508
2509
Michael Kupersteine45af542015-06-30 13:36:19 +00002510static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002511_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2512{
2513 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
2514 (__v16sf) __B,
2515 (__v16sf) __C,
2516 (__mmask16) __U,
2517 _MM_FROUND_CUR_DIRECTION);
2518}
2519
Michael Kupersteine45af542015-06-30 13:36:19 +00002520static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002521_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2522{
2523 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
2524 (__v16sf) __B,
2525 (__v16sf) __C,
2526 (__mmask16) __U,
2527 _MM_FROUND_CUR_DIRECTION);
2528}
2529
2530
2531
Adam Nemet0d5bb552014-07-28 17:14:40 +00002532/* Vector permutations */
2533
Michael Kupersteine45af542015-06-30 13:36:19 +00002534static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00002535_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
2536{
2537 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
2538 /* idx */ ,
2539 (__v16si) __A,
2540 (__v16si) __B,
2541 (__mmask16) -1);
2542}
Michael Kupersteine45af542015-06-30 13:36:19 +00002543static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00002544_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
2545{
2546 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
2547 /* idx */ ,
2548 (__v8di) __A,
2549 (__v8di) __B,
2550 (__mmask8) -1);
2551}
2552
Michael Kupersteine45af542015-06-30 13:36:19 +00002553static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00002554_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
2555{
2556 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
2557 /* idx */ ,
2558 (__v8df) __A,
2559 (__v8df) __B,
2560 (__mmask8) -1);
2561}
Michael Kupersteine45af542015-06-30 13:36:19 +00002562static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00002563_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
2564{
2565 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
2566 /* idx */ ,
2567 (__v16sf) __A,
2568 (__v16sf) __B,
2569 (__mmask16) -1);
2570}
2571
Craig Topper67826a52015-02-01 07:35:40 +00002572#define _mm512_alignr_epi64(A, B, I) __extension__ ({ \
2573 (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \
2574 (__v8di)(__m512i)(B), \
2575 (I), (__v8di)_mm512_setzero_si512(), \
2576 (__mmask8)-1); })
Adam Nemet5bf7baa2014-08-05 17:28:23 +00002577
Michael Zuckerman533e0652016-04-28 12:47:30 +00002578#define _mm512_mask_alignr_epi64( __W, __U, __A, __B, __imm) __extension__({\
2579 (__m512i)__builtin_ia32_alignq512_mask ((__v8di) __A,\
2580 (__v8di) __B, __imm,\
2581 (__v8di) __W,\
2582 (__mmask8) __U);\
2583})
2584
2585#define _mm512_maskz_alignr_epi64( __U, __A, __B, __imm) __extension__({\
2586 (__m512i)__builtin_ia32_alignq512_mask ((__v8di) __A,\
2587 (__v8di) __B, __imm,\
2588 (__v8di) _mm512_setzero_si512 (),\
2589 (__mmask8) __U);\
2590})
2591
Craig Topper67826a52015-02-01 07:35:40 +00002592#define _mm512_alignr_epi32(A, B, I) __extension__ ({ \
Michael Zuckerman533e0652016-04-28 12:47:30 +00002593 (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \
Craig Topper67826a52015-02-01 07:35:40 +00002594 (__v16si)(__m512i)(B), \
2595 (I), (__v16si)_mm512_setzero_si512(), \
Michael Zuckerman533e0652016-04-28 12:47:30 +00002596 (__mmask16)-1);\
2597})
2598
2599#define _mm512_mask_alignr_epi32( __W, __U, __A, __B, __imm) __extension__ ({\
2600 (__m512i) __builtin_ia32_alignd512_mask((__v16si) __A,\
2601 (__v16si) __B, __imm,\
2602 (__v16si) __W,\
2603 (__mmask16) __U);\
2604})
Adam Nemet5bf7baa2014-08-05 17:28:23 +00002605
Michael Zuckerman533e0652016-04-28 12:47:30 +00002606#define _mm512_maskz_alignr_epi32( __U, __A, __B, __imm) __extension__({\
2607 (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,\
2608 (__v16si) __B, __imm,\
2609 (__v16si) _mm512_setzero_si512 (),\
2610 (__mmask16) __U);\
2611})
Adam Nemetf893ede2015-01-19 20:12:05 +00002612/* Vector Extract */
2613
2614#define _mm512_extractf64x4_pd(A, I) __extension__ ({ \
Adam Nemetf893ede2015-01-19 20:12:05 +00002615 (__m256d) \
Craig Topper3a71f352015-11-29 06:50:33 +00002616 __builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), \
Adam Nemetf893ede2015-01-19 20:12:05 +00002617 (I), \
2618 (__v4df)_mm256_setzero_si256(), \
2619 (__mmask8) -1); })
2620
2621#define _mm512_extractf32x4_ps(A, I) __extension__ ({ \
Adam Nemetf893ede2015-01-19 20:12:05 +00002622 (__m128) \
Craig Topper3a71f352015-11-29 06:50:33 +00002623 __builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), \
Adam Nemetf893ede2015-01-19 20:12:05 +00002624 (I), \
2625 (__v4sf)_mm_setzero_ps(), \
2626 (__mmask8) -1); })
2627
Adam Nemet0d5bb552014-07-28 17:14:40 +00002628/* Vector Blend */
2629
Michael Kupersteine45af542015-06-30 13:36:19 +00002630static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00002631_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
2632{
2633 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
2634 (__v8df) __W,
2635 (__mmask8) __U);
2636}
2637
Michael Kupersteine45af542015-06-30 13:36:19 +00002638static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00002639_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
2640{
2641 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
2642 (__v16sf) __W,
2643 (__mmask16) __U);
2644}
2645
Michael Kupersteine45af542015-06-30 13:36:19 +00002646static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00002647_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
2648{
2649 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
2650 (__v8di) __W,
2651 (__mmask8) __U);
2652}
2653
Michael Kupersteine45af542015-06-30 13:36:19 +00002654static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00002655_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
2656{
2657 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
2658 (__v16si) __W,
2659 (__mmask16) __U);
2660}
2661
2662/* Compare */
2663
Craig Topper53565c62015-02-01 22:27:40 +00002664#define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \
2665 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
2666 (__v16sf)(__m512)(B), \
2667 (P), (__mmask16)-1, (R)); })
Adam Nemet0d5bb552014-07-28 17:14:40 +00002668
Craig Topper53565c62015-02-01 22:27:40 +00002669#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \
2670 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
2671 (__v16sf)(__m512)(B), \
2672 (P), (__mmask16)(U), (R)); })
2673
2674#define _mm512_cmp_ps_mask(A, B, P) \
2675 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
2676
2677#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
2678 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
2679
2680#define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \
2681 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
2682 (__v8df)(__m512d)(B), \
2683 (P), (__mmask8)-1, (R)); })
2684
2685#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \
2686 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
2687 (__v8df)(__m512d)(B), \
2688 (P), (__mmask8)(U), (R)); })
2689
2690#define _mm512_cmp_pd_mask(A, B, P) \
2691 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
2692
2693#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
2694 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
Adam Nemet0d5bb552014-07-28 17:14:40 +00002695
2696/* Conversion */
2697
Michael Kupersteine45af542015-06-30 13:36:19 +00002698static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00002699_mm512_cvttps_epu32(__m512 __A)
2700{
2701 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
2702 (__v16si)
2703 _mm512_setzero_si512 (),
2704 (__mmask16) -1,
2705 _MM_FROUND_CUR_DIRECTION);
2706}
2707
Michael Zuckermanf1544752016-05-09 10:32:51 +00002708static __inline__ __m512i __DEFAULT_FN_ATTRS
2709_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
2710{
2711 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
2712 (__v16si) __W,
2713 (__mmask16) __U,
2714 _MM_FROUND_CUR_DIRECTION);
2715}
2716
2717static __inline__ __m512i __DEFAULT_FN_ATTRS
2718_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
2719{
2720 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
2721 (__v16si) _mm512_setzero_si512 (),
2722 (__mmask16) __U,
2723 _MM_FROUND_CUR_DIRECTION);
2724}
2725
Craig Topper72c7d512015-02-01 07:35:35 +00002726#define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \
2727 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), \
2728 (__v16sf)_mm512_setzero_ps(), \
2729 (__mmask16)-1, (R)); })
Adam Nemet0d5bb552014-07-28 17:14:40 +00002730
Craig Topper72c7d512015-02-01 07:35:35 +00002731#define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
2732 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), \
2733 (__v16sf)_mm512_setzero_ps(), \
2734 (__mmask16)-1, (R)); })
Adam Nemet0d5bb552014-07-28 17:14:40 +00002735
Michael Zuckermanf1544752016-05-09 10:32:51 +00002736static __inline__ __m512 __DEFAULT_FN_ATTRS
2737_mm512_cvtepu32_ps (__m512i __A)
2738{
2739 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
2740 (__v16sf) _mm512_undefined_ps (),
2741 (__mmask16) -1,
2742 _MM_FROUND_CUR_DIRECTION);
2743}
2744
2745static __inline__ __m512 __DEFAULT_FN_ATTRS
2746_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
2747{
2748 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
2749 (__v16sf) __W,
2750 (__mmask16) __U,
2751 _MM_FROUND_CUR_DIRECTION);
2752}
2753
2754static __inline__ __m512 __DEFAULT_FN_ATTRS
2755_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
2756{
2757 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
2758 (__v16sf) _mm512_setzero_ps (),
2759 (__mmask16) __U,
2760 _MM_FROUND_CUR_DIRECTION);
2761}
2762
Michael Kupersteine45af542015-06-30 13:36:19 +00002763static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00002764_mm512_cvtepi32_pd(__m256i __A)
2765{
2766 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
Michael Zuckermanf1544752016-05-09 10:32:51 +00002767 (__v8df)
Adam Nemet0d5bb552014-07-28 17:14:40 +00002768 _mm512_setzero_pd (),
2769 (__mmask8) -1);
2770}
2771
Michael Zuckermanf1544752016-05-09 10:32:51 +00002772static __inline__ __m512d __DEFAULT_FN_ATTRS
2773_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
2774{
2775 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
2776 (__v8df) __W,
2777 (__mmask8) __U);
2778}
2779
2780static __inline__ __m512d __DEFAULT_FN_ATTRS
2781_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
2782{
2783 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
2784 (__v8df) _mm512_setzero_pd (),
2785 (__mmask8) __U);
2786}
2787
2788static __inline__ __m512 __DEFAULT_FN_ATTRS
2789_mm512_cvtepi32_ps (__m512i __A)
2790{
2791 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
2792 (__v16sf) _mm512_undefined_ps (),
2793 (__mmask16) -1,
2794 _MM_FROUND_CUR_DIRECTION);
2795}
2796
2797static __inline__ __m512 __DEFAULT_FN_ATTRS
2798_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
2799{
2800 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
2801 (__v16sf) __W,
2802 (__mmask16) __U,
2803 _MM_FROUND_CUR_DIRECTION);
2804}
2805
2806static __inline__ __m512 __DEFAULT_FN_ATTRS
2807_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
2808{
2809 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
2810 (__v16sf) _mm512_setzero_ps (),
2811 (__mmask16) __U,
2812 _MM_FROUND_CUR_DIRECTION);
2813}
2814
Michael Kupersteine45af542015-06-30 13:36:19 +00002815static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00002816_mm512_cvtepu32_pd(__m256i __A)
2817{
2818 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
2819 (__v8df)
2820 _mm512_setzero_pd (),
2821 (__mmask8) -1);
2822}
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00002823
Michael Zuckermanf1544752016-05-09 10:32:51 +00002824static __inline__ __m512d __DEFAULT_FN_ATTRS
2825_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
2826{
2827 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
2828 (__v8df) __W,
2829 (__mmask8) __U);
2830}
2831
2832static __inline__ __m512d __DEFAULT_FN_ATTRS
2833_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
2834{
2835 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
2836 (__v8df) _mm512_setzero_pd (),
2837 (__mmask8) __U);
2838}
2839
Craig Topper72c7d512015-02-01 07:35:35 +00002840#define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \
2841 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(A), \
2842 (__v8sf)_mm256_setzero_ps(), \
2843 (__mmask8)-1, (R)); })
2844
Michael Zuckermanf1544752016-05-09 10:32:51 +00002845static __inline__ __m256 __DEFAULT_FN_ATTRS
2846_mm512_cvtpd_ps (__m512d __A)
2847{
2848 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
2849 (__v8sf) _mm256_undefined_ps (),
2850 (__mmask8) -1,
2851 _MM_FROUND_CUR_DIRECTION);
2852}
2853
2854static __inline__ __m256 __DEFAULT_FN_ATTRS
2855_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
2856{
2857 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
2858 (__v8sf) __W,
2859 (__mmask8) __U,
2860 _MM_FROUND_CUR_DIRECTION);
2861}
2862
2863static __inline__ __m256 __DEFAULT_FN_ATTRS
2864_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
2865{
2866 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
2867 (__v8sf) _mm256_setzero_ps (),
2868 (__mmask8) __U,
2869 _MM_FROUND_CUR_DIRECTION);
2870}
2871
Craig Topper72c7d512015-02-01 07:35:35 +00002872#define _mm512_cvtps_ph(A, I) __extension__ ({ \
2873 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(A), (I), \
2874 (__v16hi)_mm256_setzero_si256(), \
2875 -1); })
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00002876
Michael Zuckermanf1544752016-05-09 10:32:51 +00002877#define _mm512_mask_cvtps_ph(__U, __W, __A, __I) __extension__ ({ \
2878 (__m256i) __builtin_ia32_vcvtps2ph512_mask((__v16sf) __A,\
2879 __I,\
2880 (__v16hi) __U,\
2881 (__mmask16) __W);\
2882})
2883
2884#define _mm512_maskz_cvtps_ph( __W, __A, __I) __extension__ ({\
2885 (__m256i) __builtin_ia32_vcvtps2ph512_mask((__v16sf) __A,\
2886 __I,\
2887 (__v16hi)\
2888 _mm256_setzero_si256 (),\
2889 (__mmask16) __W);\
2890})
2891
2892
Michael Kupersteine45af542015-06-30 13:36:19 +00002893static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00002894_mm512_cvtph_ps(__m256i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00002895{
2896 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
Michael Zuckermanf1544752016-05-09 10:32:51 +00002897 (__v16sf)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00002898 _mm512_setzero_ps (),
2899 (__mmask16) -1,
2900 _MM_FROUND_CUR_DIRECTION);
2901}
2902
Michael Zuckermanf1544752016-05-09 10:32:51 +00002903static __inline__ __m512 __DEFAULT_FN_ATTRS
2904_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00002905{
Michael Zuckermanf1544752016-05-09 10:32:51 +00002906 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
2907 (__v16sf) __W,
2908 (__mmask16) __U,
2909 _MM_FROUND_CUR_DIRECTION);
2910}
2911
2912static __inline__ __m512 __DEFAULT_FN_ATTRS
2913_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
2914{
2915 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
2916 (__v16sf) _mm512_setzero_ps (),
2917 (__mmask16) __U,
2918 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00002919}
2920
Michael Kupersteine45af542015-06-30 13:36:19 +00002921static __inline __m256i __DEFAULT_FN_ATTRS
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00002922_mm512_cvttpd_epi32(__m512d __a)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00002923{
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00002924 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00002925 (__v8si)_mm256_setzero_si256(),
2926 (__mmask8) -1,
2927 _MM_FROUND_CUR_DIRECTION);
2928}
2929
Michael Zuckermanf1544752016-05-09 10:32:51 +00002930static __inline__ __m256i __DEFAULT_FN_ATTRS
2931_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
2932{
2933 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
2934 (__v8si) __W,
2935 (__mmask8) __U,
2936 _MM_FROUND_CUR_DIRECTION);
2937}
2938
2939static __inline__ __m256i __DEFAULT_FN_ATTRS
2940_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
2941{
2942 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
2943 (__v8si) _mm256_setzero_si256 (),
2944 (__mmask8) __U,
2945 _MM_FROUND_CUR_DIRECTION);
2946}
2947
Craig Topper72c7d512015-02-01 07:35:35 +00002948#define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \
2949 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(A), \
2950 (__v8si)_mm256_setzero_si256(), \
2951 (__mmask8)-1, (R)); })
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00002952
Craig Topper72c7d512015-02-01 07:35:35 +00002953#define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \
2954 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(A), \
2955 (__v16si)_mm512_setzero_si512(), \
2956 (__mmask16)-1, (R)); })
Michael Zuckermanf1544752016-05-09 10:32:51 +00002957
2958static __inline __m512i __DEFAULT_FN_ATTRS
2959_mm512_cvttps_epi32(__m512 __a)
2960{
2961 return (__m512i)
2962 __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
2963 (__v16si) _mm512_setzero_si512 (),
2964 (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
2965}
2966
2967static __inline__ __m512i __DEFAULT_FN_ATTRS
2968_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
2969{
2970 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
2971 (__v16si) __W,
2972 (__mmask16) __U,
2973 _MM_FROUND_CUR_DIRECTION);
2974}
2975
2976static __inline__ __m512i __DEFAULT_FN_ATTRS
2977_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
2978{
2979 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
2980 (__v16si) _mm512_setzero_si512 (),
2981 (__mmask16) __U,
2982 _MM_FROUND_CUR_DIRECTION);
2983}
Craig Topper72c7d512015-02-01 07:35:35 +00002984
2985#define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \
2986 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(A), \
2987 (__v16si)_mm512_setzero_si512(), \
2988 (__mmask16)-1, (R)); })
2989
Michael Zuckermanf1544752016-05-09 10:32:51 +00002990static __inline__ __m512i __DEFAULT_FN_ATTRS
2991_mm512_cvtps_epi32 (__m512 __A)
2992{
2993 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
2994 (__v16si) _mm512_undefined_epi32 (),
2995 (__mmask16) -1,
2996 _MM_FROUND_CUR_DIRECTION);
2997}
2998
2999static __inline__ __m512i __DEFAULT_FN_ATTRS
3000_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3001{
3002 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3003 (__v16si) __W,
3004 (__mmask16) __U,
3005 _MM_FROUND_CUR_DIRECTION);
3006}
3007
3008static __inline__ __m512i __DEFAULT_FN_ATTRS
3009_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
3010{
3011 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3012 (__v16si)
3013 _mm512_setzero_si512 (),
3014 (__mmask16) __U,
3015 _MM_FROUND_CUR_DIRECTION);
3016}
3017
Craig Topper72c7d512015-02-01 07:35:35 +00003018#define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \
3019 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(A), \
3020 (__v8si)_mm256_setzero_si256(), \
3021 (__mmask8)-1, (R)); })
3022
Michael Zuckermanf1544752016-05-09 10:32:51 +00003023static __inline__ __m256i __DEFAULT_FN_ATTRS
3024_mm512_cvtpd_epi32 (__m512d __A)
3025{
3026 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3027 (__v8si)
3028 _mm256_undefined_si256 (),
3029 (__mmask8) -1,
3030 _MM_FROUND_CUR_DIRECTION);
3031}
3032
3033static __inline__ __m256i __DEFAULT_FN_ATTRS
3034_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3035{
3036 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3037 (__v8si) __W,
3038 (__mmask8) __U,
3039 _MM_FROUND_CUR_DIRECTION);
3040}
3041
3042static __inline__ __m256i __DEFAULT_FN_ATTRS
3043_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
3044{
3045 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3046 (__v8si)
3047 _mm256_setzero_si256 (),
3048 (__mmask8) __U,
3049 _MM_FROUND_CUR_DIRECTION);
3050}
3051
Craig Topper72c7d512015-02-01 07:35:35 +00003052#define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \
3053 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(A), \
3054 (__v16si)_mm512_setzero_si512(), \
3055 (__mmask16)-1, (R)); })
3056
Michael Zuckermanf1544752016-05-09 10:32:51 +00003057static __inline__ __m512i __DEFAULT_FN_ATTRS
3058_mm512_cvtps_epu32 ( __m512 __A)
3059{
3060 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
3061 (__v16si)\
3062 _mm512_undefined_epi32 (),\
3063 (__mmask16) -1,\
3064 _MM_FROUND_CUR_DIRECTION);\
3065}
3066
3067static __inline__ __m512i __DEFAULT_FN_ATTRS
3068_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3069{
3070 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
3071 (__v16si) __W,
3072 (__mmask16) __U,
3073 _MM_FROUND_CUR_DIRECTION);
3074}
3075
Craig Topper72c7d512015-02-01 07:35:35 +00003076#define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \
3077 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(A), \
3078 (__v8si)_mm256_setzero_si256(), \
3079 (__mmask8) -1, (R)); })
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00003080
Michael Zuckermanf1544752016-05-09 10:32:51 +00003081static __inline__ __m256i __DEFAULT_FN_ATTRS
3082_mm512_cvtpd_epu32 (__m512d __A)
3083{
3084 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
3085 (__v8si)
3086 _mm256_undefined_si256 (),
3087 (__mmask8) -1,
3088 _MM_FROUND_CUR_DIRECTION);
3089}
3090
3091static __inline__ __m256i __DEFAULT_FN_ATTRS
3092_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
3093{
3094 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
3095 (__v8si) __W,
3096 (__mmask8) __U,
3097 _MM_FROUND_CUR_DIRECTION);
3098}
3099
3100static __inline__ __m256i __DEFAULT_FN_ATTRS
3101_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
3102{
3103 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
3104 (__v8si)
3105 _mm256_setzero_si256 (),
3106 (__mmask8) __U,
3107 _MM_FROUND_CUR_DIRECTION);
3108}
Adam Nemet63a951e2015-01-14 01:31:17 +00003109/* Unpack and Interleave */
Michael Kupersteine45af542015-06-30 13:36:19 +00003110static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet63a951e2015-01-14 01:31:17 +00003111_mm512_unpackhi_pd(__m512d __a, __m512d __b)
3112{
3113 return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
3114}
3115
Michael Kupersteine45af542015-06-30 13:36:19 +00003116static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet63a951e2015-01-14 01:31:17 +00003117_mm512_unpacklo_pd(__m512d __a, __m512d __b)
3118{
3119 return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
3120}
3121
Michael Kupersteine45af542015-06-30 13:36:19 +00003122static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet63a951e2015-01-14 01:31:17 +00003123_mm512_unpackhi_ps(__m512 __a, __m512 __b)
3124{
3125 return __builtin_shufflevector(__a, __b,
3126 2, 18, 3, 19,
3127 2+4, 18+4, 3+4, 19+4,
3128 2+8, 18+8, 3+8, 19+8,
3129 2+12, 18+12, 3+12, 19+12);
3130}
3131
Michael Kupersteine45af542015-06-30 13:36:19 +00003132static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet63a951e2015-01-14 01:31:17 +00003133_mm512_unpacklo_ps(__m512 __a, __m512 __b)
3134{
3135 return __builtin_shufflevector(__a, __b,
3136 0, 16, 1, 17,
3137 0+4, 16+4, 1+4, 17+4,
3138 0+8, 16+8, 1+8, 17+8,
3139 0+12, 16+12, 1+12, 17+12);
3140}
3141
Adam Nemet0d5bb552014-07-28 17:14:40 +00003142/* Bit Test */
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00003143
Michael Kupersteine45af542015-06-30 13:36:19 +00003144static __inline __mmask16 __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00003145_mm512_test_epi32_mask(__m512i __A, __m512i __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00003146{
3147 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
3148 (__v16si) __B,
3149 (__mmask16) -1);
3150}
3151
Michael Kupersteine45af542015-06-30 13:36:19 +00003152static __inline __mmask8 __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00003153_mm512_test_epi64_mask(__m512i __A, __m512i __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00003154{
3155 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
3156 (__v8di) __B,
3157 (__mmask8) -1);
3158}
3159
Adam Nemet0d5bb552014-07-28 17:14:40 +00003160/* SIMD load ops */
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00003161
Michael Kupersteine45af542015-06-30 13:36:19 +00003162static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00003163_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00003164{
3165 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P,
3166 (__v16si)
3167 _mm512_setzero_si512 (),
3168 (__mmask16) __U);
3169}
3170
Michael Kupersteine45af542015-06-30 13:36:19 +00003171static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00003172_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00003173{
3174 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P,
3175 (__v8di)
3176 _mm512_setzero_si512 (),
3177 (__mmask8) __U);
3178}
3179
Michael Kupersteine45af542015-06-30 13:36:19 +00003180static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00003181_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00003182{
3183 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P,
3184 (__v16sf)
3185 _mm512_setzero_ps (),
3186 (__mmask16) __U);
3187}
3188
Michael Kupersteine45af542015-06-30 13:36:19 +00003189static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00003190_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00003191{
3192 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P,
3193 (__v8df)
3194 _mm512_setzero_pd (),
3195 (__mmask8) __U);
3196}
3197
Michael Kupersteine45af542015-06-30 13:36:19 +00003198static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemetc0cff242015-01-16 18:51:50 +00003199_mm512_maskz_load_ps(__mmask16 __U, void const *__P)
3200{
3201 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
3202 (__v16sf)
3203 _mm512_setzero_ps (),
3204 (__mmask16) __U);
3205}
3206
Michael Kupersteine45af542015-06-30 13:36:19 +00003207static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemetc0cff242015-01-16 18:51:50 +00003208_mm512_maskz_load_pd(__mmask8 __U, void const *__P)
3209{
3210 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
3211 (__v8df)
3212 _mm512_setzero_pd (),
3213 (__mmask8) __U);
3214}
3215
Michael Kupersteine45af542015-06-30 13:36:19 +00003216static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemetda82bcc2014-07-31 04:00:39 +00003217_mm512_loadu_pd(double const *__p)
3218{
3219 struct __loadu_pd {
3220 __m512d __v;
David Majnemer1cf22e62015-02-04 00:26:10 +00003221 } __attribute__((__packed__, __may_alias__));
Adam Nemetda82bcc2014-07-31 04:00:39 +00003222 return ((struct __loadu_pd*)__p)->__v;
3223}
3224
Michael Kupersteine45af542015-06-30 13:36:19 +00003225static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemetda82bcc2014-07-31 04:00:39 +00003226_mm512_loadu_ps(float const *__p)
3227{
3228 struct __loadu_ps {
3229 __m512 __v;
David Majnemer1cf22e62015-02-04 00:26:10 +00003230 } __attribute__((__packed__, __may_alias__));
Adam Nemetda82bcc2014-07-31 04:00:39 +00003231 return ((struct __loadu_ps*)__p)->__v;
3232}
3233
Michael Kupersteine45af542015-06-30 13:36:19 +00003234static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemete7087472016-01-19 02:02:25 +00003235_mm512_load_ps(float const *__p)
Adam Nemetc0cff242015-01-16 18:51:50 +00003236{
3237 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
3238 (__v16sf)
3239 _mm512_setzero_ps (),
3240 (__mmask16) -1);
3241}
3242
Michael Kupersteine45af542015-06-30 13:36:19 +00003243static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemete7087472016-01-19 02:02:25 +00003244_mm512_load_pd(double const *__p)
Adam Nemetc0cff242015-01-16 18:51:50 +00003245{
3246 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
3247 (__v8df)
3248 _mm512_setzero_pd (),
3249 (__mmask8) -1);
3250}
3251
Adam Nemet0d5bb552014-07-28 17:14:40 +00003252/* SIMD store ops */
3253
Michael Kupersteine45af542015-06-30 13:36:19 +00003254static __inline void __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00003255_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00003256{
3257 __builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A,
3258 (__mmask8) __U);
3259}
3260
Michael Kupersteine45af542015-06-30 13:36:19 +00003261static __inline void __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00003262_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00003263{
3264 __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A,
3265 (__mmask16) __U);
3266}
3267
Michael Kupersteine45af542015-06-30 13:36:19 +00003268static __inline void __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00003269_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00003270{
3271 __builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
3272}
3273
Michael Kupersteine45af542015-06-30 13:36:19 +00003274static __inline void __DEFAULT_FN_ATTRS
Adam Nemetfce1ad02014-07-28 17:14:45 +00003275_mm512_storeu_pd(void *__P, __m512d __A)
3276{
3277 __builtin_ia32_storeupd512_mask((__v8df *)__P, (__v8df)__A, (__mmask8)-1);
3278}
3279
Michael Kupersteine45af542015-06-30 13:36:19 +00003280static __inline void __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00003281_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00003282{
3283 __builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A,
3284 (__mmask16) __U);
3285}
3286
Michael Kupersteine45af542015-06-30 13:36:19 +00003287static __inline void __DEFAULT_FN_ATTRS
Adam Nemetfce1ad02014-07-28 17:14:45 +00003288_mm512_storeu_ps(void *__P, __m512 __A)
3289{
3290 __builtin_ia32_storeups512_mask((__v16sf *)__P, (__v16sf)__A, (__mmask16)-1);
3291}
3292
Michael Kupersteine45af542015-06-30 13:36:19 +00003293static __inline void __DEFAULT_FN_ATTRS
Adam Nemetc0cff242015-01-16 18:51:50 +00003294_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
Adam Nemetfce1ad02014-07-28 17:14:45 +00003295{
Adam Nemetc0cff242015-01-16 18:51:50 +00003296 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
Adam Nemetfce1ad02014-07-28 17:14:45 +00003297}
3298
Michael Kupersteine45af542015-06-30 13:36:19 +00003299static __inline void __DEFAULT_FN_ATTRS
Adam Nemetfce1ad02014-07-28 17:14:45 +00003300_mm512_store_pd(void *__P, __m512d __A)
3301{
3302 *(__m512d*)__P = __A;
3303}
3304
Michael Kupersteine45af542015-06-30 13:36:19 +00003305static __inline void __DEFAULT_FN_ATTRS
Adam Nemetc0cff242015-01-16 18:51:50 +00003306_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
3307{
3308 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
3309 (__mmask16) __U);
3310}
3311
Michael Kupersteine45af542015-06-30 13:36:19 +00003312static __inline void __DEFAULT_FN_ATTRS
Adam Nemetc0cff242015-01-16 18:51:50 +00003313_mm512_store_ps(void *__P, __m512 __A)
3314{
3315 *(__m512*)__P = __A;
3316}
3317
Adam Nemet2db1d2f2014-07-30 16:51:27 +00003318/* Mask ops */
3319
Michael Kupersteine45af542015-06-30 13:36:19 +00003320static __inline __mmask16 __DEFAULT_FN_ATTRS
Adam Nemet2db1d2f2014-07-30 16:51:27 +00003321_mm512_knot(__mmask16 __M)
3322{
3323 return __builtin_ia32_knothi(__M);
3324}
3325
Robert Khasanovb9f3a912014-10-08 17:18:13 +00003326/* Integer compare */
3327
Michael Kupersteine45af542015-06-30 13:36:19 +00003328static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Robert Khasanovb9f3a912014-10-08 17:18:13 +00003329_mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
3330 return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
3331 (__mmask16)-1);
3332}
3333
Michael Kupersteine45af542015-06-30 13:36:19 +00003334static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Robert Khasanovb9f3a912014-10-08 17:18:13 +00003335_mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
3336 return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
3337 __u);
3338}
3339
Michael Kupersteine45af542015-06-30 13:36:19 +00003340static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003341_mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
3342 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
3343 (__mmask16)-1);
3344}
3345
Michael Kupersteine45af542015-06-30 13:36:19 +00003346static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003347_mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
3348 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
3349 __u);
3350}
3351
Michael Kupersteine45af542015-06-30 13:36:19 +00003352static __inline__ __mmask8 __DEFAULT_FN_ATTRS
Robert Khasanovb9f3a912014-10-08 17:18:13 +00003353_mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3354 return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
3355 __u);
3356}
3357
Michael Kupersteine45af542015-06-30 13:36:19 +00003358static __inline__ __mmask8 __DEFAULT_FN_ATTRS
Robert Khasanovb9f3a912014-10-08 17:18:13 +00003359_mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
3360 return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
3361 (__mmask8)-1);
3362}
3363
Michael Kupersteine45af542015-06-30 13:36:19 +00003364static __inline__ __mmask8 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003365_mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) {
3366 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
3367 (__mmask8)-1);
3368}
3369
Michael Kupersteine45af542015-06-30 13:36:19 +00003370static __inline__ __mmask8 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003371_mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3372 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
3373 __u);
3374}
3375
Michael Kupersteine45af542015-06-30 13:36:19 +00003376static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003377_mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) {
3378 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
3379 (__mmask16)-1);
3380}
3381
Michael Kupersteine45af542015-06-30 13:36:19 +00003382static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003383_mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
3384 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
3385 __u);
3386}
3387
Michael Kupersteine45af542015-06-30 13:36:19 +00003388static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003389_mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) {
3390 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
3391 (__mmask16)-1);
3392}
3393
Michael Kupersteine45af542015-06-30 13:36:19 +00003394static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003395_mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
3396 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
3397 __u);
3398}
3399
Michael Kupersteine45af542015-06-30 13:36:19 +00003400static __inline__ __mmask8 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003401_mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) {
3402 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
3403 (__mmask8)-1);
3404}
3405
Michael Kupersteine45af542015-06-30 13:36:19 +00003406static __inline__ __mmask8 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003407_mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3408 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
3409 __u);
3410}
3411
Michael Kupersteine45af542015-06-30 13:36:19 +00003412static __inline__ __mmask8 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003413_mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) {
3414 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
3415 (__mmask8)-1);
3416}
3417
Michael Kupersteine45af542015-06-30 13:36:19 +00003418static __inline__ __mmask8 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003419_mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3420 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
3421 __u);
3422}
3423
Michael Kupersteine45af542015-06-30 13:36:19 +00003424static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003425_mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) {
3426 return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
3427 (__mmask16)-1);
3428}
3429
Michael Kupersteine45af542015-06-30 13:36:19 +00003430static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003431_mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
3432 return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
3433 __u);
3434}
3435
Michael Kupersteine45af542015-06-30 13:36:19 +00003436static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003437_mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) {
3438 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
3439 (__mmask16)-1);
3440}
3441
Michael Kupersteine45af542015-06-30 13:36:19 +00003442static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003443_mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
3444 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
3445 __u);
3446}
3447
Michael Kupersteine45af542015-06-30 13:36:19 +00003448static __inline__ __mmask8 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003449_mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3450 return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
3451 __u);
3452}
3453
Michael Kupersteine45af542015-06-30 13:36:19 +00003454static __inline__ __mmask8 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003455_mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) {
3456 return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
3457 (__mmask8)-1);
3458}
3459
Michael Kupersteine45af542015-06-30 13:36:19 +00003460static __inline__ __mmask8 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003461_mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) {
3462 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
3463 (__mmask8)-1);
3464}
3465
Michael Kupersteine45af542015-06-30 13:36:19 +00003466static __inline__ __mmask8 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003467_mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3468 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
3469 __u);
3470}
3471
Michael Kupersteine45af542015-06-30 13:36:19 +00003472static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003473_mm512_cmple_epi32_mask(__m512i __a, __m512i __b) {
3474 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
3475 (__mmask16)-1);
3476}
3477
Michael Kupersteine45af542015-06-30 13:36:19 +00003478static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003479_mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
3480 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
3481 __u);
3482}
3483
Michael Kupersteine45af542015-06-30 13:36:19 +00003484static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003485_mm512_cmple_epu32_mask(__m512i __a, __m512i __b) {
3486 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
3487 (__mmask16)-1);
3488}
3489
Michael Kupersteine45af542015-06-30 13:36:19 +00003490static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003491_mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
3492 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
3493 __u);
3494}
3495
Michael Kupersteine45af542015-06-30 13:36:19 +00003496static __inline__ __mmask8 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003497_mm512_cmple_epi64_mask(__m512i __a, __m512i __b) {
3498 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
3499 (__mmask8)-1);
3500}
3501
Michael Kupersteine45af542015-06-30 13:36:19 +00003502static __inline__ __mmask8 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003503_mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3504 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
3505 __u);
3506}
3507
Michael Kupersteine45af542015-06-30 13:36:19 +00003508static __inline__ __mmask8 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003509_mm512_cmple_epu64_mask(__m512i __a, __m512i __b) {
3510 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
3511 (__mmask8)-1);
3512}
3513
Michael Kupersteine45af542015-06-30 13:36:19 +00003514static __inline__ __mmask8 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003515_mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3516 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
3517 __u);
3518}
3519
Michael Kupersteine45af542015-06-30 13:36:19 +00003520static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003521_mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) {
3522 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
3523 (__mmask16)-1);
3524}
3525
Michael Kupersteine45af542015-06-30 13:36:19 +00003526static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003527_mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
3528 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
3529 __u);
3530}
3531
Michael Kupersteine45af542015-06-30 13:36:19 +00003532static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003533_mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) {
3534 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
3535 (__mmask16)-1);
3536}
3537
Michael Kupersteine45af542015-06-30 13:36:19 +00003538static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003539_mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
3540 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
3541 __u);
3542}
3543
Michael Kupersteine45af542015-06-30 13:36:19 +00003544static __inline__ __mmask8 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003545_mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) {
3546 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
3547 (__mmask8)-1);
3548}
3549
Michael Kupersteine45af542015-06-30 13:36:19 +00003550static __inline__ __mmask8 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003551_mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3552 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
3553 __u);
3554}
3555
Michael Kupersteine45af542015-06-30 13:36:19 +00003556static __inline__ __mmask8 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003557_mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) {
3558 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
3559 (__mmask8)-1);
3560}
3561
Michael Kupersteine45af542015-06-30 13:36:19 +00003562static __inline__ __mmask8 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003563_mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3564 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
3565 __u);
3566}
3567
Michael Kupersteine45af542015-06-30 13:36:19 +00003568static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003569_mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) {
3570 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
3571 (__mmask16)-1);
3572}
3573
Michael Kupersteine45af542015-06-30 13:36:19 +00003574static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003575_mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
3576 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
3577 __u);
3578}
3579
Michael Kupersteine45af542015-06-30 13:36:19 +00003580static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003581_mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) {
3582 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
3583 (__mmask16)-1);
3584}
3585
Michael Kupersteine45af542015-06-30 13:36:19 +00003586static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003587_mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
3588 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
3589 __u);
3590}
3591
Michael Kupersteine45af542015-06-30 13:36:19 +00003592static __inline__ __mmask8 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003593_mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) {
3594 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
3595 (__mmask8)-1);
3596}
3597
Michael Kupersteine45af542015-06-30 13:36:19 +00003598static __inline__ __mmask8 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003599_mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3600 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
3601 __u);
3602}
3603
Michael Kupersteine45af542015-06-30 13:36:19 +00003604static __inline__ __mmask8 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003605_mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) {
3606 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
3607 (__mmask8)-1);
3608}
3609
Michael Kupersteine45af542015-06-30 13:36:19 +00003610static __inline__ __mmask8 __DEFAULT_FN_ATTRS
Craig Topper4cac1c22015-01-25 23:30:07 +00003611_mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3612 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
3613 __u);
3614}
3615
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00003616static __inline__ __m512i __DEFAULT_FN_ATTRS
3617_mm512_cvtepi8_epi32 (__m128i __A)
3618{
3619 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
3620 (__v16si)
3621 _mm512_setzero_si512 (),
3622 (__mmask16) -1);
3623}
3624
3625static __inline__ __m512i __DEFAULT_FN_ATTRS
3626_mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
3627{
3628 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
3629 (__v16si) __W,
3630 (__mmask16) __U);
3631}
3632
3633static __inline__ __m512i __DEFAULT_FN_ATTRS
3634_mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
3635{
3636 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
3637 (__v16si)
3638 _mm512_setzero_si512 (),
3639 (__mmask16) __U);
3640}
3641
3642static __inline__ __m512i __DEFAULT_FN_ATTRS
3643_mm512_cvtepi8_epi64 (__m128i __A)
3644{
3645 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
3646 (__v8di)
3647 _mm512_setzero_si512 (),
3648 (__mmask8) -1);
3649}
3650
3651static __inline__ __m512i __DEFAULT_FN_ATTRS
3652_mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
3653{
3654 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
3655 (__v8di) __W,
3656 (__mmask8) __U);
3657}
3658
3659static __inline__ __m512i __DEFAULT_FN_ATTRS
3660_mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
3661{
3662 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
3663 (__v8di)
3664 _mm512_setzero_si512 (),
3665 (__mmask8) __U);
3666}
3667
3668static __inline__ __m512i __DEFAULT_FN_ATTRS
3669_mm512_cvtepi32_epi64 (__m256i __X)
3670{
3671 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
3672 (__v8di)
3673 _mm512_setzero_si512 (),
3674 (__mmask8) -1);
3675}
3676
3677static __inline__ __m512i __DEFAULT_FN_ATTRS
3678_mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
3679{
3680 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
3681 (__v8di) __W,
3682 (__mmask8) __U);
3683}
3684
3685static __inline__ __m512i __DEFAULT_FN_ATTRS
3686_mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
3687{
3688 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
3689 (__v8di)
3690 _mm512_setzero_si512 (),
3691 (__mmask8) __U);
3692}
3693
3694static __inline__ __m512i __DEFAULT_FN_ATTRS
3695_mm512_cvtepi16_epi32 (__m256i __A)
3696{
3697 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
3698 (__v16si)
3699 _mm512_setzero_si512 (),
3700 (__mmask16) -1);
3701}
3702
3703static __inline__ __m512i __DEFAULT_FN_ATTRS
3704_mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
3705{
3706 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
3707 (__v16si) __W,
3708 (__mmask16) __U);
3709}
3710
3711static __inline__ __m512i __DEFAULT_FN_ATTRS
3712_mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
3713{
3714 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
3715 (__v16si)
3716 _mm512_setzero_si512 (),
3717 (__mmask16) __U);
3718}
3719
3720static __inline__ __m512i __DEFAULT_FN_ATTRS
3721_mm512_cvtepi16_epi64 (__m128i __A)
3722{
3723 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
3724 (__v8di)
3725 _mm512_setzero_si512 (),
3726 (__mmask8) -1);
3727}
3728
3729static __inline__ __m512i __DEFAULT_FN_ATTRS
3730_mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
3731{
3732 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
3733 (__v8di) __W,
3734 (__mmask8) __U);
3735}
3736
3737static __inline__ __m512i __DEFAULT_FN_ATTRS
3738_mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
3739{
3740 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
3741 (__v8di)
3742 _mm512_setzero_si512 (),
3743 (__mmask8) __U);
3744}
3745
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00003746static __inline__ __m512i __DEFAULT_FN_ATTRS
3747_mm512_cvtepu8_epi32 (__m128i __A)
3748{
3749 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
3750 (__v16si)
3751 _mm512_setzero_si512 (),
3752 (__mmask16) -1);
3753}
3754
3755static __inline__ __m512i __DEFAULT_FN_ATTRS
3756_mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
3757{
3758 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
3759 (__v16si) __W,
3760 (__mmask16) __U);
3761}
3762
3763static __inline__ __m512i __DEFAULT_FN_ATTRS
3764_mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
3765{
3766 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
3767 (__v16si)
3768 _mm512_setzero_si512 (),
3769 (__mmask16) __U);
3770}
3771
3772static __inline__ __m512i __DEFAULT_FN_ATTRS
3773_mm512_cvtepu8_epi64 (__m128i __A)
3774{
3775 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
3776 (__v8di)
3777 _mm512_setzero_si512 (),
3778 (__mmask8) -1);
3779}
3780
3781static __inline__ __m512i __DEFAULT_FN_ATTRS
3782_mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
3783{
3784 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
3785 (__v8di) __W,
3786 (__mmask8) __U);
3787}
3788
3789static __inline__ __m512i __DEFAULT_FN_ATTRS
3790_mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
3791{
3792 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
3793 (__v8di)
3794 _mm512_setzero_si512 (),
3795 (__mmask8) __U);
3796}
3797
3798static __inline__ __m512i __DEFAULT_FN_ATTRS
3799_mm512_cvtepu32_epi64 (__m256i __X)
3800{
3801 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
3802 (__v8di)
3803 _mm512_setzero_si512 (),
3804 (__mmask8) -1);
3805}
3806
3807static __inline__ __m512i __DEFAULT_FN_ATTRS
3808_mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
3809{
3810 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
3811 (__v8di) __W,
3812 (__mmask8) __U);
3813}
3814
3815static __inline__ __m512i __DEFAULT_FN_ATTRS
3816_mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
3817{
3818 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
3819 (__v8di)
3820 _mm512_setzero_si512 (),
3821 (__mmask8) __U);
3822}
3823
3824static __inline__ __m512i __DEFAULT_FN_ATTRS
3825_mm512_cvtepu16_epi32 (__m256i __A)
3826{
3827 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
3828 (__v16si)
3829 _mm512_setzero_si512 (),
3830 (__mmask16) -1);
3831}
3832
3833static __inline__ __m512i __DEFAULT_FN_ATTRS
3834_mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
3835{
3836 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
3837 (__v16si) __W,
3838 (__mmask16) __U);
3839}
3840
3841static __inline__ __m512i __DEFAULT_FN_ATTRS
3842_mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
3843{
3844 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
3845 (__v16si)
3846 _mm512_setzero_si512 (),
3847 (__mmask16) __U);
3848}
3849
3850static __inline__ __m512i __DEFAULT_FN_ATTRS
3851_mm512_cvtepu16_epi64 (__m128i __A)
3852{
3853 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
3854 (__v8di)
3855 _mm512_setzero_si512 (),
3856 (__mmask8) -1);
3857}
3858
3859static __inline__ __m512i __DEFAULT_FN_ATTRS
3860_mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
3861{
3862 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
3863 (__v8di) __W,
3864 (__mmask8) __U);
3865}
3866
3867static __inline__ __m512i __DEFAULT_FN_ATTRS
3868_mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3869{
3870 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
3871 (__v8di)
3872 _mm512_setzero_si512 (),
3873 (__mmask8) __U);
3874}
3875
Michael Zuckermane98cc742016-02-23 15:59:47 +00003876static __inline__ __m512i __DEFAULT_FN_ATTRS
3877_mm512_rorv_epi32 (__m512i __A, __m512i __B)
3878{
3879 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
3880 (__v16si) __B,
3881 (__v16si)
3882 _mm512_setzero_si512 (),
3883 (__mmask16) -1);
3884}
3885
3886static __inline__ __m512i __DEFAULT_FN_ATTRS
3887_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
3888{
3889 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
3890 (__v16si) __B,
3891 (__v16si) __W,
3892 (__mmask16) __U);
3893}
3894
3895static __inline__ __m512i __DEFAULT_FN_ATTRS
3896_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
3897{
3898 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
3899 (__v16si) __B,
3900 (__v16si)
3901 _mm512_setzero_si512 (),
3902 (__mmask16) __U);
3903}
3904
3905static __inline__ __m512i __DEFAULT_FN_ATTRS
3906_mm512_rorv_epi64 (__m512i __A, __m512i __B)
3907{
3908 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
3909 (__v8di) __B,
3910 (__v8di)
3911 _mm512_setzero_si512 (),
3912 (__mmask8) -1);
3913}
3914
3915static __inline__ __m512i __DEFAULT_FN_ATTRS
3916_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
3917{
3918 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
3919 (__v8di) __B,
3920 (__v8di) __W,
3921 (__mmask8) __U);
3922}
3923
3924static __inline__ __m512i __DEFAULT_FN_ATTRS
3925_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
3926{
3927 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
3928 (__v8di) __B,
3929 (__v8di)
3930 _mm512_setzero_si512 (),
3931 (__mmask8) __U);
3932}
3933
3934
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00003935
Craig Topper4cac1c22015-01-25 23:30:07 +00003936#define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
Craig Topper3a71f352015-11-29 06:50:33 +00003937 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
3938 (__v16si)(__m512i)(b), (p), \
Craig Topper4cac1c22015-01-25 23:30:07 +00003939 (__mmask16)-1); })
3940
3941#define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
Craig Topper3a71f352015-11-29 06:50:33 +00003942 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
3943 (__v16si)(__m512i)(b), (p), \
Craig Topper4cac1c22015-01-25 23:30:07 +00003944 (__mmask16)-1); })
3945
3946#define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
Craig Topper3a71f352015-11-29 06:50:33 +00003947 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
3948 (__v8di)(__m512i)(b), (p), \
Craig Topper4cac1c22015-01-25 23:30:07 +00003949 (__mmask8)-1); })
3950
3951#define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
Craig Topper3a71f352015-11-29 06:50:33 +00003952 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
3953 (__v8di)(__m512i)(b), (p), \
Craig Topper4cac1c22015-01-25 23:30:07 +00003954 (__mmask8)-1); })
3955
3956#define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
Craig Topper3a71f352015-11-29 06:50:33 +00003957 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
3958 (__v16si)(__m512i)(b), (p), \
Craig Topper4cac1c22015-01-25 23:30:07 +00003959 (__mmask16)(m)); })
3960
3961#define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
Craig Topper3a71f352015-11-29 06:50:33 +00003962 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
3963 (__v16si)(__m512i)(b), (p), \
Craig Topper4cac1c22015-01-25 23:30:07 +00003964 (__mmask16)(m)); })
3965
3966#define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
Craig Topper3a71f352015-11-29 06:50:33 +00003967 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
3968 (__v8di)(__m512i)(b), (p), \
Craig Topper4cac1c22015-01-25 23:30:07 +00003969 (__mmask8)(m)); })
3970
3971#define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
Craig Topper3a71f352015-11-29 06:50:33 +00003972 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
3973 (__v8di)(__m512i)(b), (p), \
Craig Topper4cac1c22015-01-25 23:30:07 +00003974 (__mmask8)(m)); })
Eric Christopher4d1851682015-06-17 07:09:20 +00003975
Michael Zuckerman38a27272016-02-22 09:05:41 +00003976#define _mm512_rol_epi32(a, b) __extension__ ({ \
3977 (__m512i) __builtin_ia32_prold512_mask ((__v16si) (a), (b),\
3978 (__v16si)\
3979 _mm512_setzero_si512 (),\
3980 (__mmask16) -1); })
3981
3982#define _mm512_mask_rol_epi32(W, U, a, b) __extension__ ({ \
3983 (__m512i) __builtin_ia32_prold512_mask ((__v16si) (a), (b),\
3984 (__v16si) (W),\
3985 (__mmask16) (U)); })
3986
3987#define _mm512_maskz_rol_epi32(U, a, b) __extension__ ({ \
3988 (__m512i) __builtin_ia32_prold512_mask ((__v16si) (a), (b),\
3989 (__v16si)\
3990 _mm512_setzero_si512 (),\
3991 (__mmask16) (U)); })
3992
3993#define _mm512_rol_epi64(a, b) __extension__ ({ \
3994 (__m512i) __builtin_ia32_prolq512_mask ((__v8di) (a), (b),\
3995 (__v8di)\
3996 _mm512_setzero_si512 (),\
3997 (__mmask8) -1); })
3998
3999#define _mm512_mask_rol_epi64(W, U, a, b) __extension__ ({ \
4000 (__m512i) __builtin_ia32_prolq512_mask ((__v8di) (a), (b),\
4001 (__v8di) (W),\
4002 (__mmask8) (U)); })
4003
4004#define _mm512_maskz_rol_epi64(U, a, b) __extension__ ({ \
4005 (__m512i) __builtin_ia32_prolq512_mask ((__v8di) (a), (b),\
4006 (__v8di)\
4007 _mm512_setzero_si512 (),\
4008 (__mmask8) (U)); })
Michael Zuckerman0231f162016-02-23 13:41:13 +00004009static __inline__ __m512i __DEFAULT_FN_ATTRS
4010_mm512_rolv_epi32 (__m512i __A, __m512i __B)
4011{
4012 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4013 (__v16si) __B,
4014 (__v16si)
4015 _mm512_setzero_si512 (),
4016 (__mmask16) -1);
4017}
4018
4019static __inline__ __m512i __DEFAULT_FN_ATTRS
4020_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4021{
4022 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4023 (__v16si) __B,
4024 (__v16si) __W,
4025 (__mmask16) __U);
4026}
4027
4028static __inline__ __m512i __DEFAULT_FN_ATTRS
4029_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4030{
4031 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4032 (__v16si) __B,
4033 (__v16si)
4034 _mm512_setzero_si512 (),
4035 (__mmask16) __U);
4036}
4037
4038static __inline__ __m512i __DEFAULT_FN_ATTRS
4039_mm512_rolv_epi64 (__m512i __A, __m512i __B)
4040{
4041 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4042 (__v8di) __B,
4043 (__v8di)
4044 _mm512_setzero_si512 (),
4045 (__mmask8) -1);
4046}
4047
4048static __inline__ __m512i __DEFAULT_FN_ATTRS
4049_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4050{
4051 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4052 (__v8di) __B,
4053 (__v8di) __W,
4054 (__mmask8) __U);
4055}
4056
4057static __inline__ __m512i __DEFAULT_FN_ATTRS
4058_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4059{
4060 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4061 (__v8di) __B,
4062 (__v8di)
4063 _mm512_setzero_si512 (),
4064 (__mmask8) __U);
4065}
4066
4067#define _mm512_ror_epi32( __A, __B) __extension__ ({ \
4068__builtin_ia32_prord512_mask ((__v16si)( __A),( __B),\
4069 (__v16si)\
4070 _mm512_setzero_si512 (),\
4071 (__mmask16) -1);\
Michael Zuckerman4924c7a2016-02-23 14:23:53 +00004072})
Michael Zuckerman0231f162016-02-23 13:41:13 +00004073
4074#define _mm512_mask_ror_epi32( __W, __U, __A, __B) __extension__ ({ \
4075__builtin_ia32_prord512_mask ((__v16si)( __A),( __B),\
4076 (__v16si)( __W),\
4077 (__mmask16)( __U));\
Michael Zuckerman4924c7a2016-02-23 14:23:53 +00004078})
Michael Zuckerman0231f162016-02-23 13:41:13 +00004079
4080#define _mm512_maskz_ror_epi32( __U, __A, __B) __extension__ ({ \
4081__builtin_ia32_prord512_mask ((__v16si)( __A),( __B),\
4082 (__v16si)\
4083 _mm512_setzero_si512 (),\
4084 (__mmask16)( __U));\
Michael Zuckerman4924c7a2016-02-23 14:23:53 +00004085})
Michael Zuckerman0231f162016-02-23 13:41:13 +00004086
4087#define _mm512_ror_epi64( __A, __B) __extension__ ({ \
4088__builtin_ia32_prorq512_mask ((__v8di)( __A),( __B),\
4089 (__v8di)\
4090 _mm512_setzero_si512 (),\
4091 (__mmask8) -1);\
Michael Zuckerman4924c7a2016-02-23 14:23:53 +00004092})
Michael Zuckerman0231f162016-02-23 13:41:13 +00004093
4094#define _mm512_mask_ror_epi64( __W, __U, __A, __B) __extension__ ({ \
4095__builtin_ia32_prorq512_mask ((__v8di)( __A),( __B),\
4096 (__v8di)( __W),\
4097 (__mmask8)( __U));\
Michael Zuckerman4924c7a2016-02-23 14:23:53 +00004098})
Michael Zuckerman0231f162016-02-23 13:41:13 +00004099
4100#define _mm512_maskz_ror_epi64( __U, __A, __B) __extension__ ({ \
4101__builtin_ia32_prorq512_mask ((__v8di)( __A),( __B),\
4102 (__v8di)\
4103 _mm512_setzero_si512 (),\
4104 (__mmask8)( __U));\
Michael Zuckerman4924c7a2016-02-23 14:23:53 +00004105})
Michael Zuckerman38a27272016-02-22 09:05:41 +00004106
Michael Zuckerman1ac360c2016-03-01 11:38:16 +00004107#define _mm512_slli_epi32( __A, __B) __extension__ ({ \
4108__builtin_ia32_pslldi512_mask ((__v16si)( __A),( __B),\
4109 (__v16si)\
4110 _mm512_setzero_si512 (),\
4111 (__mmask16) -1);\
4112})
4113
4114#define _mm512_mask_slli_epi32( __W, __U, __A ,__B) __extension__ ({ \
4115__builtin_ia32_pslldi512_mask ((__v16si) (__A), (__B),\
4116 (__v16si)( __W),\
4117 (__mmask16)( __U));\
4118})
4119
4120#define _mm512_maskz_slli_epi32( __U, __A, __B) __extension__ ({ \
4121__builtin_ia32_pslldi512_mask ((__v16si)( __A),( __B),\
4122 (__v16si)\
4123 _mm512_setzero_si512 (),\
4124 (__mmask16)( __U));\
4125})
4126
4127#define _mm512_slli_epi64( __A, __B) __extension__ ({ \
4128__builtin_ia32_psllqi512_mask ((__v8di)( __A),( __B),\
4129 (__v8di)\
4130 _mm512_setzero_si512 (),\
4131 (__mmask8) -1);\
4132})
4133
4134#define _mm512_mask_slli_epi64( __W, __U, __A ,__B) __extension__ ({ \
4135__builtin_ia32_psllqi512_mask ((__v8di) (__A), (__B),\
4136 (__v8di)( __W),\
4137 (__mmask8)( __U));\
4138})
4139
4140#define _mm512_maskz_slli_epi64( __U, __A, __B) __extension__ ({ \
4141__builtin_ia32_psllqi512_mask ((__v8di)( __A),( __B),\
4142 (__v8di)\
4143 _mm512_setzero_si512 (),\
4144 (__mmask8)( __U));\
4145})
4146
Michael Zuckerman38a27272016-02-22 09:05:41 +00004147
Michael Zuckermand176d742016-03-01 17:49:03 +00004148
4149#define _mm512_srli_epi32( __A, __B) __extension__ ({ \
4150__builtin_ia32_psrldi512_mask ((__v16si)( __A),( __B),\
4151 (__v16si)\
4152 _mm512_setzero_si512 (),\
4153 (__mmask16) -1);\
4154})
4155
4156#define _mm512_mask_srli_epi32( __W, __U, __A, __B) __extension__ ({ \
4157__builtin_ia32_psrldi512_mask ((__v16si)( __A),( __B),\
4158 (__v16si)( __W),\
4159 (__mmask16)( __U));\
4160})
4161
4162#define _mm512_maskz_srli_epi32( __U, __A, __B) __extension__ ({ \
4163__builtin_ia32_psrldi512_mask ((__v16si)( __A),( __B),\
4164 (__v16si)\
4165 _mm512_setzero_si512 (),\
4166 (__mmask16)( __U));\
4167})
4168
4169#define _mm512_srli_epi64( __A, __B) __extension__ ({ \
4170__builtin_ia32_psrlqi512_mask ((__v8di)( __A),( __B),\
4171 (__v8di)\
4172 _mm512_setzero_si512 (),\
4173 (__mmask8) -1);\
4174})
4175
4176#define _mm512_mask_srli_epi64( __W, __U, __A, __B) __extension__ ({ \
4177__builtin_ia32_psrlqi512_mask ((__v8di)( __A),( __B),\
4178 (__v8di)( __W),\
4179 (__mmask8)( __U));\
4180})
4181
4182#define _mm512_maskz_srli_epi64( __U, __A, __B) __extension__ ({ \
4183__builtin_ia32_psrlqi512_mask ((__v8di)( __A),( __B),\
4184 (__v8di)\
4185 _mm512_setzero_si512 (),\
4186 (__mmask8)( __U));\
4187})
4188
Michael Zuckermanffbb67a2016-03-03 09:26:01 +00004189static __inline__ __m512i __DEFAULT_FN_ATTRS
4190_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4191{
4192 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
4193 (__v16si) __W,
4194 (__mmask16) __U);
4195}
4196
4197static __inline__ __m512i __DEFAULT_FN_ATTRS
4198_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
4199{
4200 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
4201 (__v16si)
4202 _mm512_setzero_si512 (),
4203 (__mmask16) __U);
4204}
4205
4206static __inline__ void __DEFAULT_FN_ATTRS
4207_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
4208{
4209 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
4210 (__mmask16) __U);
4211}
4212
4213static __inline__ __m512i __DEFAULT_FN_ATTRS
4214_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
4215{
4216 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
4217 (__v8di) __W,
4218 (__mmask8) __U);
4219}
4220
4221static __inline__ __m512i __DEFAULT_FN_ATTRS
4222_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
4223{
4224 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
4225 (__v8di)
4226 _mm512_setzero_si512 (),
4227 (__mmask8) __U);
4228}
4229
4230static __inline__ __m512i __DEFAULT_FN_ATTRS
4231_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4232{
4233 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
4234 (__v8di) __W,
4235 (__mmask8) __U);
4236}
4237
4238static __inline__ __m512i __DEFAULT_FN_ATTRS
4239_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
4240{
4241 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
4242 (__v8di)
4243 _mm512_setzero_si512 (),
4244 (__mmask8) __U);
4245}
4246
4247static __inline__ void __DEFAULT_FN_ATTRS
4248_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
4249{
4250 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
4251 (__mmask8) __U);
4252}
4253
4254
Michael Zuckerman0d67e4b2016-03-03 13:43:05 +00004255
4256static __inline__ __m512d __DEFAULT_FN_ATTRS
4257_mm512_movedup_pd (__m512d __A)
4258{
4259 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
4260 (__v8df)
4261 _mm512_undefined_pd (),
4262 (__mmask8) -1);
4263}
4264
4265static __inline__ __m512d __DEFAULT_FN_ATTRS
4266_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
4267{
4268 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
4269 (__v8df) __W,
4270 (__mmask8) __U);
4271}
4272
4273static __inline__ __m512d __DEFAULT_FN_ATTRS
4274_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
4275{
4276 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
4277 (__v8df)
4278 _mm512_setzero_pd (),
4279 (__mmask8) __U);
4280}
4281
Michael Zuckermandef78752016-03-28 12:23:09 +00004282#define _mm512_fixupimm_round_pd( __A, __B, __C, __imm, __R) __extension__ ({ \
4283__builtin_ia32_fixupimmpd512_mask ((__v8df)( __A),\
4284 (__v8df)( __B),\
4285 (__v8di)( __C),\
4286 (__imm),\
4287 (__mmask8) -1, (__R));\
4288})
4289
4290#define _mm512_mask_fixupimm_round_pd( __A, __U, __B, __C, __imm, __R) __extension__ ({ \
4291__builtin_ia32_fixupimmpd512_mask ((__v8df)( __A),\
4292 (__v8df)( __B),\
4293 (__v8di)( __C),\
4294 (__imm),\
4295 (__mmask8)( __U), (__R));\
4296})
4297
4298#define _mm512_fixupimm_pd( __A, __B, __C, __imm) __extension__ ({ \
4299__builtin_ia32_fixupimmpd512_mask ((__v8df)( __A),\
4300 (__v8df)( __B),\
4301 (__v8di)( __C),\
4302 ( __imm),\
4303 (__mmask8) -1,\
4304 _MM_FROUND_CUR_DIRECTION);\
4305})
4306
4307#define _mm512_mask_fixupimm_pd( __A, __U, __B, __C, __imm) __extension__ ({ \
4308__builtin_ia32_fixupimmpd512_mask ((__v8df)( __A),\
4309 (__v8df)( __B),\
4310 (__v8di)( __C),\
4311 ( __imm),\
4312 (__mmask8)( __U),\
4313 _MM_FROUND_CUR_DIRECTION);\
4314})
4315
4316#define _mm512_maskz_fixupimm_round_pd( __U, __A, __B, __C, __imm, __R) __extension__ ({ \
4317__builtin_ia32_fixupimmpd512_maskz ((__v8df)( __A),\
4318 (__v8df)( __B),\
4319 (__v8di)( __C),\
4320 (__imm),\
4321 (__mmask8)( __U), (__R));\
4322})
4323
4324#define _mm512_maskz_fixupimm_pd( __U, __A, __B, __C, __imm) __extension__ ({ \
4325__builtin_ia32_fixupimmpd512_maskz ((__v8df)( __A),\
4326 (__v8df)( __B),\
4327 (__v8di)( __C),\
4328 ( __imm),\
4329 (__mmask8)( __U),\
4330 _MM_FROUND_CUR_DIRECTION);\
4331})
4332
4333#define _mm512_fixupimm_round_ps( __A, __B, __C, __imm, __R) __extension__ ({ \
4334__builtin_ia32_fixupimmps512_mask ((__v16sf)( __A),\
4335 (__v16sf)( __B),\
4336 (__v16si)( __C),\
4337 (__imm),\
4338 (__mmask16) -1, (__R));\
4339})
4340
4341#define _mm512_mask_fixupimm_round_ps( __A, __U, __B, __C, __imm, __R) __extension__ ({ \
4342__builtin_ia32_fixupimmps512_mask ((__v16sf)( __A),\
4343 (__v16sf)( __B),\
4344 (__v16si)( __C),\
4345 (__imm),\
4346 (__mmask16)( __U), (__R));\
4347})
4348
4349#define _mm512_fixupimm_ps( __A, __B, __C, __imm) __extension__ ({ \
4350__builtin_ia32_fixupimmps512_mask ((__v16sf)( __A),\
4351 (__v16sf)( __B),\
4352 (__v16si)( __C),\
4353 ( __imm),\
4354 (__mmask16) -1,\
4355 _MM_FROUND_CUR_DIRECTION);\
4356})
4357
4358#define _mm512_mask_fixupimm_ps( __A, __U, __B, __C, __imm) __extension__ ({ \
4359__builtin_ia32_fixupimmps512_mask ((__v16sf)( __A),\
4360 (__v16sf)( __B),\
4361 (__v16si)( __C),\
4362 ( __imm),\
4363 (__mmask16)( __U),\
4364 _MM_FROUND_CUR_DIRECTION);\
4365})
4366
4367#define _mm512_maskz_fixupimm_round_ps( __U, __A, __B, __C, __imm, __R) __extension__ ({ \
4368__builtin_ia32_fixupimmps512_maskz ((__v16sf)( __A),\
4369 (__v16sf)( __B),\
4370 (__v16si)( __C),\
4371 (__imm),\
4372 (__mmask16)( __U), (__R));\
4373})
4374
4375#define _mm512_maskz_fixupimm_ps( __U, __A, __B, __C, __imm) __extension__ ({ \
4376__builtin_ia32_fixupimmps512_maskz ((__v16sf)( __A),\
4377 (__v16sf)( __B),\
4378 (__v16si)( __C),\
4379 ( __imm),\
4380 (__mmask16)( __U),\
4381 _MM_FROUND_CUR_DIRECTION);\
4382})
4383
4384#define _mm_fixupimm_round_sd( __A, __B, __C, __imm, __R) __extension__ ({ \
4385__builtin_ia32_fixupimmsd_mask ((__v2df)( __A),\
4386 (__v2df)( __B),\
4387 (__v2di)( __C), __imm,\
4388 (__mmask8) -1, (__R));\
4389})
4390
4391#define _mm_mask_fixupimm_round_sd( __A, __U, __B, __C, __imm, __R) __extension__ ({ \
4392__builtin_ia32_fixupimmsd_mask ((__v2df)( __A),\
4393 (__v2df)( __B),\
4394 (__v2di)( __C), __imm,\
4395 (__mmask8)( __U), (__R));\
4396})
4397
4398#define _mm_fixupimm_sd( __A, __B, __C, __imm) __extension__ ({ \
4399__builtin_ia32_fixupimmsd_mask ((__v2df)( __A),\
4400 (__v2df)( __B),\
4401 (__v2di)( __C),( __imm),\
4402 (__mmask8) -1,\
4403 _MM_FROUND_CUR_DIRECTION);\
4404})
4405
4406#define _mm_mask_fixupimm_sd( __A, __U, __B, __C, __imm) __extension__ ({ \
4407__builtin_ia32_fixupimmsd_mask ((__v2df)( __A),\
4408 (__v2df)( __B),\
4409 (__v2di)( __C),( __imm),\
4410 (__mmask8)( __U),\
4411 _MM_FROUND_CUR_DIRECTION);\
4412})
4413
4414#define _mm_maskz_fixupimm_round_sd( __U, __A, __B, __C, __imm, __R) __extension__ ({ \
4415__builtin_ia32_fixupimmsd_maskz ((__v2df)( __A),\
4416 (__v2df)( __B),\
4417 (__v2di)( __C),\
4418 __imm,\
4419 (__mmask8)( __U), (__R));\
4420})
4421
4422#define _mm_maskz_fixupimm_sd( __U, __A, __B, __C, __imm) __extension__ ({ \
4423__builtin_ia32_fixupimmsd_maskz ((__v2df)( __A),\
4424 (__v2df)( __B),\
4425 (__v2di)( __C),\
4426 ( __imm),\
4427 (__mmask8)( __U),\
4428 _MM_FROUND_CUR_DIRECTION);\
4429})
4430
4431#define _mm_fixupimm_round_ss( __A, __B, __C, __imm, __R) __extension__ ({ \
4432__builtin_ia32_fixupimmss_mask ((__v4sf)( __A),\
4433 (__v4sf)( __B),\
4434 (__v4si)( __C), (__imm),\
4435 (__mmask8) -1, (__R));\
4436})
4437
4438#define _mm_mask_fixupimm_round_ss( __A, __U, __B, __C, __imm, __R) __extension__ ({ \
4439__builtin_ia32_fixupimmss_mask ((__v4sf)( __A),\
4440 (__v4sf)( __B),\
4441 (__v4si)( __C), (__imm),\
4442 (__mmask8)( __U), (__R));\
4443})
4444
4445#define _mm_fixupimm_ss( __A, __B, __C, __imm) __extension__ ({ \
4446__builtin_ia32_fixupimmss_mask ((__v4sf)( __A),\
4447 (__v4sf)( __B),\
4448 (__v4si)( __C),( __imm),\
4449 (__mmask8) -1,\
4450 _MM_FROUND_CUR_DIRECTION);\
4451})
4452
4453#define _mm_mask_fixupimm_ss( __A, __U, __B, __C, __imm) __extension__ ({ \
4454__builtin_ia32_fixupimmss_mask ((__v4sf)( __A),\
4455 (__v4sf)( __B),\
4456 (__v4si)( __C),( __imm),\
4457 (__mmask8)( __U),\
4458 _MM_FROUND_CUR_DIRECTION);\
4459})
4460
4461#define _mm_maskz_fixupimm_round_ss( __U, __A, __B, __C, __imm, __R) __extension__ ({ \
4462__builtin_ia32_fixupimmss_maskz ((__v4sf)( __A),\
4463 (__v4sf)( __B),\
4464 (__v4si)( __C), (__imm),\
4465 (__mmask8)( __U), (__R));\
4466})
4467
4468#define _mm_maskz_fixupimm_ss( __U, __A, __B, __C, __imm) __extension__ ({ \
4469__builtin_ia32_fixupimmss_maskz ((__v4sf)( __A),\
4470 (__v4sf)( __B),\
4471 (__v4si)( __C),( __imm),\
4472 (__mmask8)( __U),\
4473 _MM_FROUND_CUR_DIRECTION);\
4474})
4475
4476#define _mm_getexp_round_sd( __A, __B ,__R) __extension__ ({ \
4477__builtin_ia32_getexpsd128_round_mask ((__v2df)(__A),\
4478 (__v2df)( __B), (__v2df) _mm_setzero_pd(), (__mmask8) -1,\
4479 ( __R));\
4480})
4481
4482
4483static __inline__ __m128d __DEFAULT_FN_ATTRS
4484_mm_getexp_sd (__m128d __A, __m128d __B)
4485{
4486 return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
4487 (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
4488}
4489
Michael Zuckermana1ceca22016-04-22 10:06:10 +00004490static __inline__ __m128d __DEFAULT_FN_ATTRS
4491_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
4492{
4493 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
4494 (__v2df) __B,
4495 (__v2df) __W,
4496 (__mmask8) __U,
4497 _MM_FROUND_CUR_DIRECTION);
4498}
4499
4500#define _mm_mask_getexp_round_sd( __W, __U, __A, __B, __R) __extension__ ({\
4501__builtin_ia32_getexpsd128_round_mask ((__v2df) __A,\
4502 (__v2df) __B,\
4503 (__v2df) __W,\
4504 (__mmask8) __U,\
4505 __R);\
4506})
4507
4508static __inline__ __m128d __DEFAULT_FN_ATTRS
4509_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
4510{
4511 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
4512 (__v2df) __B,
4513 (__v2df) _mm_setzero_pd (),
4514 (__mmask8) __U,
4515 _MM_FROUND_CUR_DIRECTION);
4516}
4517
4518#define _mm_maskz_getexp_round_sd( __U, __A, __B, __R) __extension__ ({\
4519__builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,\
4520 (__v2df) __B,\
4521 (__v2df) _mm_setzero_pd (),\
4522 (__mmask8) __U,\
4523 __R);\
4524})
4525
Michael Zuckermandef78752016-03-28 12:23:09 +00004526#define _mm_getexp_round_ss( __A, __B, __R) __extension__ ({ \
4527__builtin_ia32_getexpss128_round_mask ((__v4sf)( __A),\
4528 (__v4sf)( __B), (__v4sf) _mm_setzero_ps(), (__mmask8) -1,\
4529 ( __R));\
4530})
4531
4532static __inline__ __m128 __DEFAULT_FN_ATTRS
4533_mm_getexp_ss (__m128 __A, __m128 __B)
4534{
4535 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
4536 (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
4537}
4538
Michael Zuckermana1ceca22016-04-22 10:06:10 +00004539static __inline__ __m128d __DEFAULT_FN_ATTRS
4540_mm_mask_getexp_ss (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
4541{
4542 return (__m128d) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
4543 (__v4sf) __B,
4544 (__v4sf) __W,
4545 (__mmask8) __U,
4546 _MM_FROUND_CUR_DIRECTION);
4547}
4548
4549#define _mm_mask_getexp_round_ss( __W, __U, __A, __B, __R) __extension__ ({\
4550__builtin_ia32_getexpss128_round_mask ((__v4sf) __A,\
4551 (__v4sf) __B,\
4552 (__v4sf) __W,\
4553 (__mmask8) __U,\
4554 __R);\
4555})
4556
4557static __inline__ __m128d __DEFAULT_FN_ATTRS
4558_mm_maskz_getexp_ss (__mmask8 __U, __m128d __A, __m128d __B)
4559{
4560 return (__m128d) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
4561 (__v4sf) __B,
4562 (__v4sf) _mm_setzero_pd (),
4563 (__mmask8) __U,
4564 _MM_FROUND_CUR_DIRECTION);
4565}
4566
4567#define _mm_maskz_getexp_round_ss( __U, __A, __B, __R) __extension__ ({\
4568__builtin_ia32_getexpss128_round_mask ((__v4sf) __A,\
4569 (__v4sf) __B,\
4570 (__v4sf) _mm_setzero_ps (),\
4571 (__mmask8) __U,\
4572 __R);\
4573})
4574
Michael Zuckermandef78752016-03-28 12:23:09 +00004575#define _mm_getmant_round_sd( __A, __B, __C, __D, __R) __extension__ ({ \
4576__builtin_ia32_getmantsd_round_mask ((__v2df)( __A),\
4577 (__v2df)( __B),\
4578 (( __D) << 2) |( __C), (__v2df) _mm_setzero_pd(), (__mmask8) -1,\
4579 ( __R));\
4580})
4581
4582#define _mm_getmant_sd( __A, __B, __C, __D) __extension__ ({ \
4583__builtin_ia32_getmantsd_round_mask ((__v2df)( __A),\
4584 (__v2df)( __B),\
4585 (( __D) << 2) |( __C), (__v2df) _mm_setzero_pd(), (__mmask8) -1,\
4586 _MM_FROUND_CUR_DIRECTION);\
4587})
4588
Michael Zuckermana1ceca22016-04-22 10:06:10 +00004589#define _mm_mask_getmant_sd( __W, __U, __A, __B, __C, __D) __extension__ ({\
4590__builtin_ia32_getmantsd_round_mask ( (__v2df) __A,\
4591 (__v2df) __B,\
4592 (( __D) << 2) |( __C),\
4593 (__v2df) __W,\
4594 (__mmask8) __U,\
4595 _MM_FROUND_CUR_DIRECTION);\
4596})
4597
4598#define _mm_mask_getmant_round_sd( __W, __U, __A, __B, __C, __D, __R)({\
4599__builtin_ia32_getmantsd_round_mask ( (__v2df) __A,\
4600 (__v2df) __B,\
4601 (( __D) << 2) |( __C),\
4602 (__v2df) __W,\
4603 (__mmask8) __U,\
4604 __R);\
4605})
4606
4607#define _mm_maskz_getmant_sd( __U, __A, __B, __C, __D) __extension__ ({\
4608__builtin_ia32_getmantsd_round_mask ( (__v2df) __A,\
4609 (__v2df) __B,\
4610 (( __D) << 2) |( __C),\
4611 (__v2df) _mm_setzero_pd (),\
4612 (__mmask8) __U,\
4613 _MM_FROUND_CUR_DIRECTION);\
4614})
4615
4616#define _mm_maskz_getmant_round_sd( __U, __A, __B, __C, __D, __R) __extension__ ({\
4617__builtin_ia32_getmantsd_round_mask ( (__v2df) __A,\
4618 (__v2df) __B,\
4619 (( __D) << 2) |( __C),\
4620 (__v2df) _mm_setzero_pd (),\
4621 (__mmask8) __U,\
4622 __R);\
4623})
4624
Michael Zuckermandef78752016-03-28 12:23:09 +00004625#define _mm_getmant_round_ss( __A, __B, __C, __D, __R) __extension__ ({ \
4626__builtin_ia32_getmantss_round_mask ((__v4sf)( __A),\
4627 (__v4sf)( __B),\
4628 ((__D) << 2) |( __C), (__v4sf) _mm_setzero_ps(), (__mmask8) -1,\
4629 ( __R));\
4630})
4631
4632#define _mm_getmant_ss(__A, __B, __C, __D) __extension__ ({ \
4633__builtin_ia32_getmantss_round_mask ((__v4sf)( __A),\
4634 (__v4sf)( __B),\
4635 ((__D) << 2) |( __C), (__v4sf) _mm_setzero_ps(), (__mmask8) -1,\
4636 _MM_FROUND_CUR_DIRECTION);\
4637})
4638
Michael Zuckermana1ceca22016-04-22 10:06:10 +00004639#define _mm_mask_getmant_ss( __W, __U, __A, __B, __C, __D) __extension__ ({\
4640__builtin_ia32_getmantss_round_mask ((__v4sf) __A,\
4641 (__v4sf) __B,\
4642 (( __D) << 2) |( __C),\
4643 (__v4sf) __W,\
4644 (__mmask8) __U,\
4645 _MM_FROUND_CUR_DIRECTION);\
4646})
4647
4648#define _mm_mask_getmant_round_ss( __W, __U, __A, __B, __C, __D, __R)({\
4649__builtin_ia32_getmantss_round_mask ((__v4sf) __A,\
4650 (__v4sf) __B,\
4651 (( __D) << 2) |( __C),\
4652 (__v4sf) __W,\
4653 (__mmask8) __U,\
4654 __R);\
4655})
4656
4657#define _mm_maskz_getmant_ss( __U, __A, __B, __C, __D) __extension__ ({\
4658__builtin_ia32_getmantss_round_mask ((__v4sf) __A,\
4659 (__v4sf) __B,\
4660 (( __D) << 2) |( __C),\
4661 (__v4sf) _mm_setzero_pd (),\
4662 (__mmask8) __U,\
4663 _MM_FROUND_CUR_DIRECTION);\
4664})
4665
4666#define _mm_maskz_getmant_round_ss( __U, __A, __B, __C, __D, __R) __extension__ ({\
4667__builtin_ia32_getmantss_round_mask ((__v4sf) __A,\
4668 (__v4sf) __B,\
4669 (( __D) << 2) |( __C),\
4670 (__v4sf) _mm_setzero_ps (),\
4671 (__mmask8) __U,\
4672 __R);\
4673})
Michael Zuckermandef78752016-03-28 12:23:09 +00004674
4675static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4676_mm512_kmov (__mmask16 __A)
4677{
4678 return __A;
4679}
4680
Michael Zuckermane71d59f2016-03-07 19:15:00 +00004681#define _mm_comi_round_sd(__A, __B, __P, __R) __extension__ ({\
4682__builtin_ia32_vcomisd ((__v2df) (__A), (__v2df) (__B), ( __P), ( __R));\
4683})
4684
4685#define _mm_comi_round_ss( __A, __B, __P, __R) __extension__ ({\
4686__builtin_ia32_vcomiss ((__v4sf) (__A), (__v4sf) (__B), ( __P), ( __R));\
4687})
4688
Michael Zuckermancdd54c82016-04-10 12:54:23 +00004689static __inline__ __m512d __DEFAULT_FN_ATTRS
4690_mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4691{
4692 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
4693 (__v8df) __B,
4694 (__v8df) __W,
4695 (__mmask8) __U);
4696}
Michael Zuckerman8d161992016-04-10 17:24:03 +00004697#define _mm_cvt_roundsd_si64( __A, __R) __extension__ ({ \
4698__builtin_ia32_vcvtsd2si64 ((__v2df)( __A),( __R));\
4699})
Michael Zuckermand8d2f622016-04-11 07:15:34 +00004700static __inline__ __m512i __DEFAULT_FN_ATTRS
4701_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
4702 __mmask16 __U, __m512i __B)
4703{
4704 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
4705 (__v16si) __I
4706 /* idx */ ,
4707 (__v16si) __B,
4708 (__mmask16) __U);
4709}
Michael Zuckerman1af947a2016-04-11 12:32:31 +00004710static __inline__ __m512i __DEFAULT_FN_ATTRS
4711_mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
4712{
4713 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
4714 (__v16si) __B,
4715 (__v16si)
4716 _mm512_setzero_si512 (),
4717 (__mmask16) -1);
4718}
Michael Zuckermancdd54c82016-04-10 12:54:23 +00004719
Michael Zuckerman81f468c2016-04-11 17:04:21 +00004720static __inline__ __m512i __DEFAULT_FN_ATTRS
4721_mm512_sll_epi32 (__m512i __A, __m128i __B)
4722{
4723 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
4724 (__v4si) __B,
4725 (__v16si)
4726 _mm512_setzero_si512 (),
4727 (__mmask16) -1);
4728}
4729
4730static __inline__ __m512i __DEFAULT_FN_ATTRS
4731_mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
4732{
4733 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
4734 (__v4si) __B,
4735 (__v16si) __W,
4736 (__mmask16) __U);
4737}
4738
4739static __inline__ __m512i __DEFAULT_FN_ATTRS
4740_mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
4741{
4742 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
4743 (__v4si) __B,
4744 (__v16si)
4745 _mm512_setzero_si512 (),
4746 (__mmask16) __U);
4747}
4748
4749static __inline__ __m512i __DEFAULT_FN_ATTRS
4750_mm512_sll_epi64 (__m512i __A, __m128i __B)
4751{
4752 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
4753 (__v2di) __B,
4754 (__v8di)
4755 _mm512_setzero_si512 (),
4756 (__mmask8) -1);
4757}
4758
4759static __inline__ __m512i __DEFAULT_FN_ATTRS
4760_mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
4761{
4762 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
4763 (__v2di) __B,
4764 (__v8di) __W,
4765 (__mmask8) __U);
4766}
4767
4768static __inline__ __m512i __DEFAULT_FN_ATTRS
4769_mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
4770{
4771 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
4772 (__v2di) __B,
4773 (__v8di)
4774 _mm512_setzero_si512 (),
4775 (__mmask8) __U);
4776}
4777
4778static __inline__ __m512i __DEFAULT_FN_ATTRS
4779_mm512_sllv_epi32 (__m512i __X, __m512i __Y)
4780{
4781 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
4782 (__v16si) __Y,
4783 (__v16si)
4784 _mm512_setzero_si512 (),
4785 (__mmask16) -1);
4786}
4787
4788static __inline__ __m512i __DEFAULT_FN_ATTRS
4789_mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
4790{
4791 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
4792 (__v16si) __Y,
4793 (__v16si) __W,
4794 (__mmask16) __U);
4795}
4796
4797static __inline__ __m512i __DEFAULT_FN_ATTRS
4798_mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
4799{
4800 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
4801 (__v16si) __Y,
4802 (__v16si)
4803 _mm512_setzero_si512 (),
4804 (__mmask16) __U);
4805}
4806
4807static __inline__ __m512i __DEFAULT_FN_ATTRS
4808_mm512_sllv_epi64 (__m512i __X, __m512i __Y)
4809{
4810 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
4811 (__v8di) __Y,
4812 (__v8di)
4813 _mm512_undefined_pd (),
4814 (__mmask8) -1);
4815}
4816
4817static __inline__ __m512i __DEFAULT_FN_ATTRS
4818_mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
4819{
4820 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
4821 (__v8di) __Y,
4822 (__v8di) __W,
4823 (__mmask8) __U);
4824}
4825
4826static __inline__ __m512i __DEFAULT_FN_ATTRS
4827_mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
4828{
4829 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
4830 (__v8di) __Y,
4831 (__v8di)
4832 _mm512_setzero_si512 (),
4833 (__mmask8) __U);
4834}
4835
4836static __inline__ __m512i __DEFAULT_FN_ATTRS
4837_mm512_sra_epi32 (__m512i __A, __m128i __B)
4838{
4839 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
4840 (__v4si) __B,
4841 (__v16si)
4842 _mm512_setzero_si512 (),
4843 (__mmask16) -1);
4844}
4845
4846static __inline__ __m512i __DEFAULT_FN_ATTRS
4847_mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
4848{
4849 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
4850 (__v4si) __B,
4851 (__v16si) __W,
4852 (__mmask16) __U);
4853}
4854
4855static __inline__ __m512i __DEFAULT_FN_ATTRS
4856_mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
4857{
4858 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
4859 (__v4si) __B,
4860 (__v16si)
4861 _mm512_setzero_si512 (),
4862 (__mmask16) __U);
4863}
4864
4865static __inline__ __m512i __DEFAULT_FN_ATTRS
4866_mm512_sra_epi64 (__m512i __A, __m128i __B)
4867{
4868 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
4869 (__v2di) __B,
4870 (__v8di)
4871 _mm512_setzero_si512 (),
4872 (__mmask8) -1);
4873}
4874
4875static __inline__ __m512i __DEFAULT_FN_ATTRS
4876_mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
4877{
4878 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
4879 (__v2di) __B,
4880 (__v8di) __W,
4881 (__mmask8) __U);
4882}
4883
4884static __inline__ __m512i __DEFAULT_FN_ATTRS
4885_mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
4886{
4887 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
4888 (__v2di) __B,
4889 (__v8di)
4890 _mm512_setzero_si512 (),
4891 (__mmask8) __U);
4892}
4893
4894static __inline__ __m512i __DEFAULT_FN_ATTRS
4895_mm512_srav_epi32 (__m512i __X, __m512i __Y)
4896{
4897 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
4898 (__v16si) __Y,
4899 (__v16si)
4900 _mm512_setzero_si512 (),
4901 (__mmask16) -1);
4902}
4903
4904static __inline__ __m512i __DEFAULT_FN_ATTRS
4905_mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
4906{
4907 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
4908 (__v16si) __Y,
4909 (__v16si) __W,
4910 (__mmask16) __U);
4911}
4912
4913static __inline__ __m512i __DEFAULT_FN_ATTRS
4914_mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
4915{
4916 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
4917 (__v16si) __Y,
4918 (__v16si)
4919 _mm512_setzero_si512 (),
4920 (__mmask16) __U);
4921}
4922
4923static __inline__ __m512i __DEFAULT_FN_ATTRS
4924_mm512_srav_epi64 (__m512i __X, __m512i __Y)
4925{
4926 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
4927 (__v8di) __Y,
4928 (__v8di)
4929 _mm512_setzero_si512 (),
4930 (__mmask8) -1);
4931}
4932
4933static __inline__ __m512i __DEFAULT_FN_ATTRS
4934_mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
4935{
4936 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
4937 (__v8di) __Y,
4938 (__v8di) __W,
4939 (__mmask8) __U);
4940}
4941
4942static __inline__ __m512i __DEFAULT_FN_ATTRS
4943_mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
4944{
4945 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
4946 (__v8di) __Y,
4947 (__v8di)
4948 _mm512_setzero_si512 (),
4949 (__mmask8) __U);
4950}
4951
4952static __inline__ __m512i __DEFAULT_FN_ATTRS
4953_mm512_srl_epi32 (__m512i __A, __m128i __B)
4954{
4955 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
4956 (__v4si) __B,
4957 (__v16si)
4958 _mm512_setzero_si512 (),
4959 (__mmask16) -1);
4960}
4961
4962static __inline__ __m512i __DEFAULT_FN_ATTRS
4963_mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
4964{
4965 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
4966 (__v4si) __B,
4967 (__v16si) __W,
4968 (__mmask16) __U);
4969}
4970
4971static __inline__ __m512i __DEFAULT_FN_ATTRS
4972_mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
4973{
4974 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
4975 (__v4si) __B,
4976 (__v16si)
4977 _mm512_setzero_si512 (),
4978 (__mmask16) __U);
4979}
4980
4981static __inline__ __m512i __DEFAULT_FN_ATTRS
4982_mm512_srl_epi64 (__m512i __A, __m128i __B)
4983{
4984 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
4985 (__v2di) __B,
4986 (__v8di)
4987 _mm512_setzero_si512 (),
4988 (__mmask8) -1);
4989}
4990
4991static __inline__ __m512i __DEFAULT_FN_ATTRS
4992_mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
4993{
4994 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
4995 (__v2di) __B,
4996 (__v8di) __W,
4997 (__mmask8) __U);
4998}
4999
5000static __inline__ __m512i __DEFAULT_FN_ATTRS
5001_mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
5002{
5003 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
5004 (__v2di) __B,
5005 (__v8di)
5006 _mm512_setzero_si512 (),
5007 (__mmask8) __U);
5008}
5009
5010static __inline__ __m512i __DEFAULT_FN_ATTRS
5011_mm512_srlv_epi32 (__m512i __X, __m512i __Y)
5012{
5013 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
5014 (__v16si) __Y,
5015 (__v16si)
5016 _mm512_setzero_si512 (),
5017 (__mmask16) -1);
5018}
5019
5020static __inline__ __m512i __DEFAULT_FN_ATTRS
5021_mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
5022{
5023 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
5024 (__v16si) __Y,
5025 (__v16si) __W,
5026 (__mmask16) __U);
5027}
5028
5029static __inline__ __m512i __DEFAULT_FN_ATTRS
5030_mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
5031{
5032 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
5033 (__v16si) __Y,
5034 (__v16si)
5035 _mm512_setzero_si512 (),
5036 (__mmask16) __U);
5037}
5038
5039static __inline__ __m512i __DEFAULT_FN_ATTRS
5040_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
5041{
5042 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
5043 (__v8di) __Y,
5044 (__v8di)
5045 _mm512_setzero_si512 (),
5046 (__mmask8) -1);
5047}
5048
5049static __inline__ __m512i __DEFAULT_FN_ATTRS
5050_mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5051{
5052 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
5053 (__v8di) __Y,
5054 (__v8di) __W,
5055 (__mmask8) __U);
5056}
5057
5058static __inline__ __m512i __DEFAULT_FN_ATTRS
5059_mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
5060{
5061 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
5062 (__v8di) __Y,
5063 (__v8di)
5064 _mm512_setzero_si512 (),
5065 (__mmask8) __U);
5066}
5067
5068#define _mm512_ternarylogic_epi32( __A, __B, __C, imm) __extension__ ({ \
5069__builtin_ia32_pternlogd512_mask ((__v16si)( __A),\
5070 (__v16si)( __B),\
5071 (__v16si)( __C),\
5072 ( imm), (__mmask16) -1);\
5073})
5074
5075#define _mm512_mask_ternarylogic_epi32( __A, __U, __B, __C, imm) __extension__ ({ \
5076__builtin_ia32_pternlogd512_mask ((__v16si)( __A),\
5077 (__v16si)( __B),\
5078 (__v16si)( __C),\
5079 ( imm), (__mmask16)( __U));\
5080})
5081
5082#define _mm512_maskz_ternarylogic_epi32( __U, __A, __B, __C, imm) __extension__ ({ \
5083__builtin_ia32_pternlogd512_maskz ((__v16si)( __A),\
5084 (__v16si)( __B),\
5085 (__v16si)( __C),\
5086 ( imm), (__mmask16)( __U));\
5087})
5088
5089#define _mm512_ternarylogic_epi64( __A, __B, __C, imm) __extension__ ({ \
5090__builtin_ia32_pternlogq512_mask ((__v8di)( __A),\
5091 (__v8di)( __B),\
5092 (__v8di)( __C),( imm),\
5093 (__mmask8) -1);\
5094})
5095
5096#define _mm512_mask_ternarylogic_epi64( __A, __U, __B, __C, imm) __extension__ ({ \
5097__builtin_ia32_pternlogq512_mask ((__v8di)( __A),\
5098 (__v8di)( __B),\
5099 (__v8di)( __C),( imm),\
5100 (__mmask8)( __U));\
5101})
5102
5103#define _mm512_maskz_ternarylogic_epi64( __U, __A, __B, __C, imm) __extension__ ({ \
5104__builtin_ia32_pternlogq512_maskz ((__v8di)( __A),\
5105 (__v8di)( __B),\
5106 (__v8di)( __C),\
5107 ( imm), (__mmask8)( __U));\
5108})
5109
Michael Zuckermancdd54c82016-04-10 12:54:23 +00005110static __inline__ __m512d __DEFAULT_FN_ATTRS
5111_mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
5112{
5113 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
5114 (__v8df) __B,
5115 (__v8df)
5116 _mm512_setzero_pd (),
5117 (__mmask8) __U);
5118}
5119
5120static __inline__ __m512 __DEFAULT_FN_ATTRS
5121_mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
5122{
5123 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
5124 (__v16sf) __B,
5125 (__v16sf) __W,
5126 (__mmask16) __U);
5127}
5128
5129static __inline__ __m512 __DEFAULT_FN_ATTRS
5130_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
5131{
5132 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
5133 (__v16sf) __B,
5134 (__v16sf)
5135 _mm512_setzero_ps (),
5136 (__mmask16) __U);
5137}
5138
5139static __inline__ __m512d __DEFAULT_FN_ATTRS
5140_mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
5141{
5142 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
5143 (__v8df) __B,
5144 (__v8df) __W,
5145 (__mmask8) __U);
5146}
5147
5148static __inline__ __m512d __DEFAULT_FN_ATTRS
5149_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
5150{
5151 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
5152 (__v8df) __B,
5153 (__v8df)
5154 _mm512_setzero_pd (),
5155 (__mmask8) __U);
5156}
5157
5158static __inline__ __m512 __DEFAULT_FN_ATTRS
5159_mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
5160{
5161 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
5162 (__v16sf) __B,
5163 (__v16sf) __W,
5164 (__mmask16) __U);
5165}
5166
5167static __inline__ __m512 __DEFAULT_FN_ATTRS
5168_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
5169{
5170 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
5171 (__v16sf) __B,
5172 (__v16sf)
5173 _mm512_setzero_ps (),
5174 (__mmask16) __U);
5175}
Michael Zuckerman0d67e4b2016-03-03 13:43:05 +00005176
Michael Zuckerman8d161992016-04-10 17:24:03 +00005177#define _mm_cvt_roundsd_i64( __A, __R) __extension__ ({ \
5178__builtin_ia32_vcvtsd2si64 ((__v2df)( __A),( __R));\
5179})
5180
5181#define _mm_cvt_roundsd_si32( __A, __R) __extension__ ({ \
5182__builtin_ia32_vcvtsd2si32 ((__v2df)( __A),( __R));\
5183})
5184
5185#define _mm_cvt_roundsd_i32( __A, __R) __extension__ ({ \
5186__builtin_ia32_vcvtsd2si32 ((__v2df)( __A),( __R));\
5187})
5188
5189#define _mm_cvt_roundsd_u32( __A, __R) __extension__ ({ \
5190__builtin_ia32_vcvtsd2usi32 ((__v2df)( __A),( __R));\
5191})
5192
5193static __inline__ unsigned __DEFAULT_FN_ATTRS
5194_mm_cvtsd_u32 (__m128d __A)
5195{
5196 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
5197 _MM_FROUND_CUR_DIRECTION);
5198}
5199
5200#define _mm_cvt_roundsd_u64( __A, __R) __extension__ ({ \
5201__builtin_ia32_vcvtsd2usi64 ((__v2df)( __A),( __R));\
5202})
5203
5204static __inline__ unsigned long long __DEFAULT_FN_ATTRS
5205_mm_cvtsd_u64 (__m128d __A)
5206{
5207 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
5208 __A,
5209 _MM_FROUND_CUR_DIRECTION);
5210}
5211
5212#define _mm_cvt_roundss_si32( __A, __R) __extension__ ({ \
5213__builtin_ia32_vcvtss2si32 ((__v4sf)( __A),( __R));\
5214})
5215
5216#define _mm_cvt_roundss_i32( __A, __R) __extension__ ({ \
5217__builtin_ia32_vcvtss2si32 ((__v4sf)( __A),( __R));\
5218})
5219
5220#define _mm_cvt_roundss_si64( __A, __R) __extension__ ({ \
5221__builtin_ia32_vcvtss2si64 ((__v4sf)( __A),( __R));\
5222})
5223
5224#define _mm_cvt_roundss_i64( __A, __R) __extension__ ({ \
5225__builtin_ia32_vcvtss2si64 ((__v4sf)( __A),( __R));\
5226})
5227
5228#define _mm_cvt_roundss_u32( __A, __R) __extension__ ({ \
5229__builtin_ia32_vcvtss2usi32 ((__v4sf)( __A),( __R));\
5230})
5231
5232static __inline__ unsigned __DEFAULT_FN_ATTRS
5233_mm_cvtss_u32 (__m128 __A)
5234{
5235 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
5236 _MM_FROUND_CUR_DIRECTION);
5237}
5238
5239#define _mm_cvt_roundss_u64( __A, __R) __extension__ ({ \
5240__builtin_ia32_vcvtss2usi64 ((__v4sf)( __A),( __R));\
5241})
5242
5243static __inline__ unsigned long long __DEFAULT_FN_ATTRS
5244_mm_cvtss_u64 (__m128 __A)
5245{
5246 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
5247 __A,
5248 _MM_FROUND_CUR_DIRECTION);
5249}
5250
5251#define _mm_cvtt_roundsd_i32( __A, __R) __extension__ ({ \
5252__builtin_ia32_vcvttsd2si32 ((__v2df)( __A),( __R));\
5253})
5254
5255#define _mm_cvtt_roundsd_si32( __A, __R) __extension__ ({ \
5256__builtin_ia32_vcvttsd2si32 ((__v2df)( __A),( __R));\
5257})
5258
5259static __inline__ int __DEFAULT_FN_ATTRS
5260_mm_cvttsd_i32 (__m128d __A)
5261{
5262 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
5263 _MM_FROUND_CUR_DIRECTION);
5264}
5265
5266#define _mm_cvtt_roundsd_si64( __A, __R) __extension__ ({ \
5267__builtin_ia32_vcvttsd2si64 ((__v2df)( __A),( __R));\
5268})
5269
5270#define _mm_cvtt_roundsd_i64( __A, __R) __extension__ ({ \
5271__builtin_ia32_vcvttsd2si64 ((__v2df)( __A),( __R));\
5272})
5273
5274static __inline__ long long __DEFAULT_FN_ATTRS
5275_mm_cvttsd_i64 (__m128d __A)
5276{
5277 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
5278 _MM_FROUND_CUR_DIRECTION);
5279}
5280
5281#define _mm_cvtt_roundsd_u32( __A, __R) __extension__ ({ \
5282__builtin_ia32_vcvttsd2usi32 ((__v2df)( __A),( __R));\
5283})
5284
5285static __inline__ unsigned __DEFAULT_FN_ATTRS
5286_mm_cvttsd_u32 (__m128d __A)
5287{
5288 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
5289 _MM_FROUND_CUR_DIRECTION);
5290}
5291
5292#define _mm_cvtt_roundsd_u64( __A, __R) __extension__ ({ \
5293__builtin_ia32_vcvttsd2usi64 ((__v2df)( __A),( __R));\
5294})
5295
5296static __inline__ unsigned long long __DEFAULT_FN_ATTRS
5297_mm_cvttsd_u64 (__m128d __A)
5298{
5299 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
5300 __A,
5301 _MM_FROUND_CUR_DIRECTION);
5302}
5303
5304#define _mm_cvtt_roundss_i32( __A, __R) __extension__ ({ \
5305__builtin_ia32_vcvttss2si32 ((__v4sf)( __A),( __R));\
5306})
5307
5308#define _mm_cvtt_roundss_si32( __A, __R) __extension__ ({ \
5309__builtin_ia32_vcvttss2si32 ((__v4sf)( __A),( __R));\
5310})
5311
5312static __inline__ int __DEFAULT_FN_ATTRS
5313_mm_cvttss_i32 (__m128 __A)
5314{
5315 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
5316 _MM_FROUND_CUR_DIRECTION);
5317}
5318
5319#define _mm_cvtt_roundss_i64( __A, __R) __extension__ ({ \
5320__builtin_ia32_vcvttss2si64 ((__v4sf)( __A),( __R));\
5321})
5322
5323#define _mm_cvtt_roundss_si64( __A, __R) __extension__ ({ \
5324__builtin_ia32_vcvttss2si64 ((__v4sf)( __A),( __R));\
5325})
5326
5327static __inline__ long long __DEFAULT_FN_ATTRS
5328_mm_cvttss_i64 (__m128 __A)
5329{
5330 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
5331 _MM_FROUND_CUR_DIRECTION);
5332}
5333
5334#define _mm_cvtt_roundss_u32( __A, __R) __extension__ ({ \
5335__builtin_ia32_vcvttss2usi32 ((__v4sf)( __A),( __R));\
5336})
5337
5338static __inline__ unsigned __DEFAULT_FN_ATTRS
5339_mm_cvttss_u32 (__m128 __A)
5340{
5341 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
5342 _MM_FROUND_CUR_DIRECTION);
5343}
5344
5345#define _mm_cvtt_roundss_u64( __A, __R) __extension__ ({ \
5346__builtin_ia32_vcvttss2usi64 ((__v4sf)( __A),( __R));\
5347})
5348
5349static __inline__ unsigned long long __DEFAULT_FN_ATTRS
5350_mm_cvttss_u64 (__m128 __A)
5351{
5352 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
5353 __A,
5354 _MM_FROUND_CUR_DIRECTION);
5355}
5356
Michael Zuckermand8d2f622016-04-11 07:15:34 +00005357static __inline__ __m512d __DEFAULT_FN_ATTRS
5358_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
5359 __m512d __B)
5360{
5361 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
5362 (__v8di) __I
5363 /* idx */ ,
5364 (__v8df) __B,
5365 (__mmask8) __U);
5366}
5367
5368static __inline__ __m512 __DEFAULT_FN_ATTRS
5369_mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
5370 __m512 __B)
5371{
5372 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
5373 (__v16si) __I
5374 /* idx */ ,
5375 (__v16sf) __B,
5376 (__mmask16) __U);
5377}
5378
5379static __inline__ __m512i __DEFAULT_FN_ATTRS
5380_mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
5381 __mmask8 __U, __m512i __B)
5382{
5383 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
5384 (__v8di) __I
5385 /* idx */ ,
5386 (__v8di) __B,
5387 (__mmask8) __U);
5388}
5389
5390#define _mm512_permute_pd( __X, __C) __extension__ ({ \
5391__builtin_ia32_vpermilpd512_mask ((__v8df)( __X),( __C),\
5392 (__v8df)\
5393 _mm512_undefined_pd (),\
5394 (__mmask8) -1);\
5395})
5396
5397#define _mm512_mask_permute_pd( __W, __U, __X, __C) __extension__ ({ \
5398__builtin_ia32_vpermilpd512_mask ((__v8df)( __X),( __C),\
5399 (__v8df)( __W),\
5400 (__mmask8)( __U));\
5401})
5402
5403#define _mm512_maskz_permute_pd( __U, __X, __C) __extension__ ({ \
5404__builtin_ia32_vpermilpd512_mask ((__v8df)( __X),( __C),\
5405 (__v8df)\
5406 _mm512_setzero_pd (),\
5407 (__mmask8)( __U));\
5408})
5409
5410#define _mm512_permute_ps( __X, __C) __extension__ ({ \
5411__builtin_ia32_vpermilps512_mask ((__v16sf)( __X),( __C),\
5412 (__v16sf)\
5413 _mm512_undefined_ps (),\
5414 (__mmask16) -1);\
5415})
5416
5417#define _mm512_mask_permute_ps( __W, __U, __X, __C) __extension__ ({ \
5418__builtin_ia32_vpermilps512_mask ((__v16sf)( __X),( __C),\
5419 (__v16sf)( __W),\
5420 (__mmask16)( __U));\
5421})
5422
5423#define _mm512_maskz_permute_ps( __U, __X, __C) __extension__ ({ \
5424__builtin_ia32_vpermilps512_mask ((__v16sf)( __X),( __C),\
5425 (__v16sf)\
5426 _mm512_setzero_ps (),\
5427 (__mmask16)( __U));\
5428})
5429
5430static __inline__ __m512d __DEFAULT_FN_ATTRS
5431_mm512_permutevar_pd (__m512d __A, __m512i __C)
5432{
5433 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5434 (__v8di) __C,
5435 (__v8df)
5436 _mm512_undefined_pd (),
5437 (__mmask8) -1);
5438}
5439
5440static __inline__ __m512d __DEFAULT_FN_ATTRS
5441_mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
5442{
5443 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5444 (__v8di) __C,
5445 (__v8df) __W,
5446 (__mmask8) __U);
5447}
5448
5449static __inline__ __m512d __DEFAULT_FN_ATTRS
5450_mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
5451{
5452 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5453 (__v8di) __C,
5454 (__v8df)
5455 _mm512_setzero_pd (),
5456 (__mmask8) __U);
5457}
5458
5459static __inline__ __m512 __DEFAULT_FN_ATTRS
5460_mm512_permutevar_ps (__m512 __A, __m512i __C)
5461{
5462 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5463 (__v16si) __C,
5464 (__v16sf)
5465 _mm512_undefined_ps (),
5466 (__mmask16) -1);
5467}
5468
5469static __inline__ __m512 __DEFAULT_FN_ATTRS
5470_mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
5471{
5472 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5473 (__v16si) __C,
5474 (__v16sf) __W,
5475 (__mmask16) __U);
5476}
5477
5478static __inline__ __m512 __DEFAULT_FN_ATTRS
5479_mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
5480{
5481 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5482 (__v16si) __C,
5483 (__v16sf)
5484 _mm512_setzero_ps (),
5485 (__mmask16) __U);
5486}
5487
5488static __inline__ __m512i __DEFAULT_FN_ATTRS
5489_mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
5490 __m512i __I, __m512i __B)
5491{
5492 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
5493 /* idx */ ,
5494 (__v16si) __A,
5495 (__v16si) __B,
5496 (__mmask16) __U);
5497}
5498
5499static __inline__ __m512d __DEFAULT_FN_ATTRS
5500_mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
5501 __m512d __B)
5502{
5503 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
5504 /* idx */ ,
5505 (__v8df) __A,
5506 (__v8df) __B,
5507 (__mmask8) __U);
5508}
5509
5510static __inline__ __m512 __DEFAULT_FN_ATTRS
5511_mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
5512 __m512 __B)
5513{
5514 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
5515 /* idx */ ,
5516 (__v16sf) __A,
5517 (__v16sf) __B,
5518 (__mmask16) __U);
5519}
5520
5521static __inline__ __m512i __DEFAULT_FN_ATTRS
5522_mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
5523 __m512i __I, __m512i __B)
5524{
5525 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
5526 /* idx */ ,
5527 (__v8di) __A,
5528 (__v8di) __B,
5529 (__mmask8) __U);
5530}
5531
Michael Zuckerman07525092016-04-11 10:22:07 +00005532static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5533_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
5534{
5535 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
5536 (__v16si) __B,
5537 (__mmask16) -1);
5538}
5539
5540static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5541_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
5542{
5543 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
5544 (__v16si) __B, __U);
5545}
5546
5547static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5548_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
5549{
5550 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
5551 (__v8di) __B,
5552 (__mmask8) -1);
5553}
5554
5555static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5556_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
5557{
5558 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
5559 (__v8di) __B, __U);
5560}
5561
Michael Zuckerman138fc5b2016-05-03 11:05:24 +00005562#define _mm512_cvtt_roundpd_epu32( __A, __R) __extension__ ({ \
5563__builtin_ia32_cvttpd2udq512_mask ((__v8df)( __A),\
5564 (__v8si)\
5565 _mm256_undefined_si256 (),\
5566 (__mmask8) -1,( __R));\
5567})
5568
5569#define _mm512_mask_cvtt_roundpd_epu32( __W, __U, __A, __R) __extension__ ({ \
5570__builtin_ia32_cvttpd2udq512_mask ((__v8df)( __A),\
5571 (__v8si)( __W),\
5572 (__mmask8)( __U),( __R));\
5573})
5574
5575#define _mm512_maskz_cvtt_roundpd_epu32( __U, __A, __R) __extension__ ({ \
5576__builtin_ia32_cvttpd2udq512_mask ((__v8df)( __A),\
5577 (__v8si)\
5578 _mm256_setzero_si256 (),\
5579 (__mmask8)( __U),( __R));\
5580})
5581
5582static __inline__ __m256i __DEFAULT_FN_ATTRS
5583_mm512_cvttpd_epu32 (__m512d __A)
5584{
5585 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
5586 (__v8si)
5587 _mm256_undefined_si256 (),
5588 (__mmask8) -1,
5589 _MM_FROUND_CUR_DIRECTION);
5590}
5591
5592static __inline__ __m256i __DEFAULT_FN_ATTRS
5593_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
5594{
5595 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
5596 (__v8si) __W,
5597 (__mmask8) __U,
5598 _MM_FROUND_CUR_DIRECTION);
5599}
5600
5601static __inline__ __m256i __DEFAULT_FN_ATTRS
5602_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
5603{
5604 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
5605 (__v8si)
5606 _mm256_setzero_si256 (),
5607 (__mmask8) __U,
5608 _MM_FROUND_CUR_DIRECTION);
5609}
Michael Zuckerman07525092016-04-11 10:22:07 +00005610
Michael Zuckerman1af947a2016-04-11 12:32:31 +00005611static __inline__ __m512i __DEFAULT_FN_ATTRS
5612_mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
5613 __m512i __B)
5614{
5615 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
5616 (__v16si) __B,
5617 (__v16si) __W,
5618 (__mmask16) __U);
5619}
5620
5621static __inline__ __m512i __DEFAULT_FN_ATTRS
5622_mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5623{
5624 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
5625 (__v16si) __B,
5626 (__v16si)
5627 _mm512_setzero_si512 (),
5628 (__mmask16) __U);
5629}
5630
5631static __inline__ __m512i __DEFAULT_FN_ATTRS
5632_mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
5633{
5634 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
5635 (__v8di) __B,
5636 (__v8di)
5637 _mm512_setzero_si512 (),
5638 (__mmask8) -1);
5639}
5640
5641static __inline__ __m512i __DEFAULT_FN_ATTRS
5642_mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5643{
5644 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
5645 (__v8di) __B,
5646 (__v8di) __W,
5647 (__mmask8) __U);
5648}
5649
5650static __inline__ __m512i __DEFAULT_FN_ATTRS
5651_mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5652{
5653 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
5654 (__v8di) __B,
5655 (__v8di)
5656 _mm512_setzero_si512 (),
5657 (__mmask8) __U);
5658}
5659
5660static __inline__ __m512i __DEFAULT_FN_ATTRS
5661_mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
5662{
5663 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
5664 (__v16si) __B,
5665 (__v16si)
5666 _mm512_setzero_si512 (),
5667 (__mmask16) -1);
5668}
5669
5670static __inline__ __m512i __DEFAULT_FN_ATTRS
5671_mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
5672 __m512i __B)
5673{
5674 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
5675 (__v16si) __B,
5676 (__v16si) __W,
5677 (__mmask16) __U);
5678}
5679
5680static __inline__ __m512i __DEFAULT_FN_ATTRS
5681_mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5682{
5683 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
5684 (__v16si) __B,
5685 (__v16si)
5686 _mm512_setzero_si512 (),
5687 (__mmask16) __U);
5688}
5689
5690static __inline__ __m512i __DEFAULT_FN_ATTRS
5691_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
5692{
5693 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
5694 (__v8di) __B,
5695 (__v8di)
5696 _mm512_setzero_si512 (),
5697 (__mmask8) -1);
5698}
5699
5700static __inline__ __m512i __DEFAULT_FN_ATTRS
5701_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5702{
5703 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
5704 (__v8di) __B,
5705 (__v8di) __W,
5706 (__mmask8) __U);
5707}
5708
5709static __inline__ __m512i __DEFAULT_FN_ATTRS
5710_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5711{
5712 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
5713 (__v8di) __B,
5714 (__v8di)
5715 _mm512_setzero_si512 (),
5716 (__mmask8) __U);
5717}
5718
5719#define _mm_roundscale_round_sd( __A, __B, __imm, __R) __extension__ ({ \
5720__builtin_ia32_rndscalesd_round_mask ((__v2df)( __A),\
5721 (__v2df)( __B), (__v2df) _mm_setzero_pd(),\
5722 (__mmask8) -1,( __imm),( __R));\
5723})
5724
5725#define _mm_roundscale_sd( __A, __B, __imm) __extension__ ({ \
5726__builtin_ia32_rndscalesd_round_mask ((__v2df)( __A),\
5727 (__v2df)( __B), (__v2df) _mm_setzero_pd(),\
5728 (__mmask8) -1, ( __imm),\
5729 _MM_FROUND_CUR_DIRECTION);\
5730})
5731
5732#define _mm_mask_roundscale_sd( __W, __U, __A, __B, __imm) __extension__ ({ \
5733__builtin_ia32_rndscalesd_round_mask ( (__v2df)( __A),\
5734 (__v2df)( __B),\
5735 (__v2df)( __W),\
5736 (__mmask8)( __U),\
5737 (__imm),\
5738 _MM_FROUND_CUR_DIRECTION);\
5739})
5740
5741#define _mm_mask_roundscale_round_sd( __W, __U, __A, __B, __I, __R) __extension__ ({ \
5742__builtin_ia32_rndscalesd_round_mask ( (__v2df)( __A),\
5743 (__v2df)( __B),\
5744 (__v2df)( __W),\
5745 (__mmask8)( __U),\
5746 __I,\
5747 __R);\
5748})
5749
5750#define _mm_maskz_roundscale_sd( __U, __A, __B, __I) __extension__ ({ \
5751__builtin_ia32_rndscalesd_round_mask ( (__v2df)( __A),\
5752 (__v2df)( __B),\
5753 (__v2df) _mm_setzero_pd (),\
5754 (__mmask8)( __U),\
5755 __I,\
5756 _MM_FROUND_CUR_DIRECTION);\
5757})
5758
5759#define _mm_maskz_roundscale_round_sd( __U, __A, __B, __I, __R) __extension__ ({ \
5760__builtin_ia32_rndscalesd_round_mask ( (__v2df)( __A),\
5761 (__v2df)( __B),\
5762 (__v2df) _mm_setzero_pd (),\
5763 (__mmask8)( __U),\
5764 __I,\
5765 __R);\
5766})
5767
5768#define _mm_roundscale_round_ss( __A, __B, __imm, __R) __extension__ ({ \
5769__builtin_ia32_rndscaless_round_mask ((__v4sf)( __A),\
5770 (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),\
5771 (__mmask8) -1, __imm, __R);\
5772})
5773
5774#define _mm_roundscale_ss( __A, __B, __imm) __extension__ ({ \
5775__builtin_ia32_rndscaless_round_mask ((__v4sf)( __A),\
5776 (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),\
5777 (__mmask8) -1, ( __imm),\
5778 _MM_FROUND_CUR_DIRECTION);\
5779})
5780
5781#define _mm_mask_roundscale_ss( __W, __U, __A, __B, __I) __extension__ ({ \
5782__builtin_ia32_rndscaless_round_mask ( (__v4sf) ( __A),\
5783 (__v4sf)( __B),\
5784 (__v4sf)( __W),\
5785 (__mmask8)( __U),\
5786 __I,\
5787 _MM_FROUND_CUR_DIRECTION);\
5788})
5789
5790#define _mm_mask_roundscale_round_ss( __W, __U, __A, __B, __I, __R) __extension__ ({ \
5791__builtin_ia32_rndscaless_round_mask ( (__v4sf)( __A),\
5792 (__v4sf)( __B),\
5793 (__v4sf)( __W),\
5794 (__mmask8)( __U),\
5795 __I,\
5796 __R);\
5797})
5798
5799#define _mm_maskz_roundscale_ss( __U, __A, __B, __I) __extension__ ({ \
5800__builtin_ia32_rndscaless_round_mask ( (__v4sf)( __A),\
5801 (__v4sf)( __B),\
5802 (__v4sf) _mm_setzero_ps (),\
5803 (__mmask8)( __U),\
5804 __I,\
5805 _MM_FROUND_CUR_DIRECTION);\
5806})
5807
5808#define _mm_maskz_roundscale_round_ss( __U, __A, __B, __I, __R) __extension__ ({ \
5809__builtin_ia32_rndscaless_round_mask ( (__v4sf)( __A),\
5810 (__v4sf)( __B),\
5811 (__v4sf) _mm_setzero_ps (),\
5812 (__mmask8)( __U),\
5813 __I,\
5814 __R);\
5815})
5816
5817#define _mm512_scalef_round_pd( __A, __B, __R) __extension__ ({ \
5818__builtin_ia32_scalefpd512_mask ((__v8df)( __A),\
5819 (__v8df)( __B),\
5820 (__v8df)\
5821 _mm512_undefined_pd (),\
5822 (__mmask8) -1,( __R));\
5823})
5824
5825#define _mm512_mask_scalef_round_pd( __W, __U, __A, __B, __R) __extension__ ({ \
5826__builtin_ia32_scalefpd512_mask ((__v8df)( __A),\
5827 (__v8df)( __B),\
5828 (__v8df)( __W),\
5829 (__mmask8)( __U),( __R));\
5830})
5831
5832#define _mm512_maskz_scalef_round_pd( __U, __A, __B, __R) __extension__ ({ \
5833__builtin_ia32_scalefpd512_mask ((__v8df)( __A),\
5834 (__v8df)( __B),\
5835 (__v8df)\
5836 _mm512_setzero_pd (),\
5837 (__mmask8)( __U),( __R));\
5838})
5839
5840static __inline__ __m512d __DEFAULT_FN_ATTRS
5841_mm512_scalef_pd (__m512d __A, __m512d __B)
5842{
5843 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
5844 (__v8df) __B,
5845 (__v8df)
5846 _mm512_undefined_pd (),
5847 (__mmask8) -1,
5848 _MM_FROUND_CUR_DIRECTION);
5849}
5850
5851static __inline__ __m512d __DEFAULT_FN_ATTRS
5852_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
5853{
5854 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
5855 (__v8df) __B,
5856 (__v8df) __W,
5857 (__mmask8) __U,
5858 _MM_FROUND_CUR_DIRECTION);
5859}
5860
5861static __inline__ __m512d __DEFAULT_FN_ATTRS
5862_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
5863{
5864 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
5865 (__v8df) __B,
5866 (__v8df)
5867 _mm512_setzero_pd (),
5868 (__mmask8) __U,
5869 _MM_FROUND_CUR_DIRECTION);
5870}
5871
5872#define _mm512_scalef_round_ps( __A, __B, __R) __extension__ ({ \
5873__builtin_ia32_scalefps512_mask ((__v16sf)( __A),\
5874 (__v16sf)( __B),\
5875 (__v16sf)\
5876 _mm512_undefined_ps (),\
5877 (__mmask16) -1,( __R));\
5878})
5879
5880#define _mm512_mask_scalef_round_ps( __W, __U, __A, __B, __R) __extension__ ({ \
5881__builtin_ia32_scalefps512_mask ((__v16sf)( __A),\
5882 (__v16sf)( __B),\
5883 (__v16sf)( __W),\
5884 (__mmask16)( __U),( __R));\
5885})
5886
5887#define _mm512_maskz_scalef_round_ps( __U, __A, __B, __R) __extension__ ({ \
5888__builtin_ia32_scalefps512_mask ((__v16sf)( __A),\
5889 (__v16sf)( __B),\
5890 (__v16sf)\
5891 _mm512_setzero_ps (),\
5892 (__mmask16)( __U),( __R));\
5893})
5894
5895static __inline__ __m512 __DEFAULT_FN_ATTRS
5896_mm512_scalef_ps (__m512 __A, __m512 __B)
5897{
5898 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
5899 (__v16sf) __B,
5900 (__v16sf)
5901 _mm512_undefined_ps (),
5902 (__mmask16) -1,
5903 _MM_FROUND_CUR_DIRECTION);
5904}
5905
5906static __inline__ __m512 __DEFAULT_FN_ATTRS
5907_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
5908{
5909 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
5910 (__v16sf) __B,
5911 (__v16sf) __W,
5912 (__mmask16) __U,
5913 _MM_FROUND_CUR_DIRECTION);
5914}
5915
5916static __inline__ __m512 __DEFAULT_FN_ATTRS
5917_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
5918{
5919 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
5920 (__v16sf) __B,
5921 (__v16sf)
5922 _mm512_setzero_ps (),
5923 (__mmask16) __U,
5924 _MM_FROUND_CUR_DIRECTION);
5925}
5926
5927#define _mm_scalef_round_sd( __A, __B, __R) __extension__ ({ \
5928__builtin_ia32_scalefsd_round_mask ((__v2df)( __A),\
5929 (__v2df)( __B), (__v2df) _mm_setzero_pd(),\
5930 (__mmask8) -1,\
5931 ( __R));\
5932})
5933
5934static __inline__ __m128d __DEFAULT_FN_ATTRS
5935_mm_scalef_sd (__m128d __A, __m128d __B)
5936{
5937 return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
5938 (__v2df)( __B), (__v2df) _mm_setzero_pd(),
5939 (__mmask8) -1,
5940 _MM_FROUND_CUR_DIRECTION);
5941}
5942
5943static __inline__ __m128d __DEFAULT_FN_ATTRS
5944_mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5945{
5946 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
5947 (__v2df) __B,
5948 (__v2df) __W,
5949 (__mmask8) __U,
5950 _MM_FROUND_CUR_DIRECTION);
5951}
5952
5953#define _mm_mask_scalef_round_sd( __W, __U, __A, __B, __R) __extension__ ({ \
5954__builtin_ia32_scalefsd_round_mask ((__v2df)( __A),\
5955 (__v2df)( __B), (__v2df) __W,\
5956 (__mmask8) __U,\
5957 ( __R));\
5958})
5959
5960static __inline__ __m128d __DEFAULT_FN_ATTRS
5961_mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
5962{
5963 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
5964 (__v2df) __B,
5965 (__v2df) _mm_setzero_pd (),
5966 (__mmask8) __U,
5967 _MM_FROUND_CUR_DIRECTION);
5968}
5969
5970#define _mm_maskz_scalef_round_sd( __U, __A, __B, __R) __extension__ ({ \
5971__builtin_ia32_scalefsd_round_mask ((__v2df)( __A),\
5972 (__v2df)( __B), (__v2df) _mm_setzero_pd (),\
5973 (__mmask8) __U,\
5974 ( __R));\
5975})
5976
5977#define _mm_scalef_round_ss( __A, __B, __R) __extension__ ({ \
5978__builtin_ia32_scalefss_round_mask ((__v4sf)( __A),\
5979 (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),\
5980 (__mmask8) -1,\
5981 ( __R));\
5982})
5983
5984static __inline__ __m128 __DEFAULT_FN_ATTRS
5985_mm_scalef_ss (__m128 __A, __m128 __B)
5986{
5987 return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
5988 (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
5989 (__mmask8) -1,
5990 _MM_FROUND_CUR_DIRECTION);
5991}
5992
5993static __inline__ __m128 __DEFAULT_FN_ATTRS
5994_mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5995{
5996 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
5997 (__v4sf) __B,
5998 (__v4sf) __W,
5999 (__mmask8) __U,
6000 _MM_FROUND_CUR_DIRECTION);
6001}
6002
6003#define _mm_mask_scalef_round_ss( __W, __U, __A, __B, __R) __extension__ ({ \
6004__builtin_ia32_scalefss_round_mask ((__v4sf)( __A),\
6005 (__v4sf)( __B), (__v4sf) __W,\
6006 (__mmask8) __U,\
6007 ( __R));\
6008})
6009
6010static __inline__ __m128 __DEFAULT_FN_ATTRS
6011_mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
6012{
6013 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6014 (__v4sf) __B,
6015 (__v4sf) _mm_setzero_ps (),
6016 (__mmask8) __U,
6017 _MM_FROUND_CUR_DIRECTION);
6018}
6019
6020#define _mm_maskz_scalef_round_ss( __U, __A, __B, __R) __extension__ ({ \
6021__builtin_ia32_scalefss_round_mask ((__v4sf)( __A),\
6022 (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),\
6023 (__mmask8) __U,\
6024 _MM_FROUND_CUR_DIRECTION);\
6025})
6026
Michael Zuckerman6b5f4d82016-04-11 15:46:39 +00006027static __inline__ __m512i __DEFAULT_FN_ATTRS
6028_mm512_srai_epi32 (__m512i __A, unsigned int __B)
6029{
6030 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
6031 (__v16si)
6032 _mm512_setzero_si512 (),
6033 (__mmask16) -1);
6034}
6035
6036#define _mm512_mask_srai_epi32( __W, __U, __A, __B) __extension__ ({ \
6037__builtin_ia32_psradi512_mask ((__v16si)( __A),( __B),\
6038 (__v16si)( __W),\
6039 (__mmask16)( __U));\
6040})
6041
6042#define _mm512_maskz_srai_epi32( __U, __A, __B) __extension__ ({ \
6043__builtin_ia32_psradi512_mask ((__v16si)( __A),( __B),\
6044 (__v16si)\
6045 _mm512_setzero_si512 (),\
6046 (__mmask16)( __U));\
6047})
6048
6049#define _mm512_srai_epi64( __A, __B) __extension__ ({ \
6050__builtin_ia32_psraqi512_mask ((__v8di)( __A),( __B),\
6051 (__v8di)\
6052 _mm512_setzero_si512 (),\
6053 (__mmask8) -1);\
6054})
6055
6056#define _mm512_mask_srai_epi64( __W, __U, __A, __B) __extension__ ({ \
6057__builtin_ia32_psraqi512_mask ((__v8di)( __A),( __B),\
6058 (__v8di)( __W),\
6059 (__mmask8)( __U));\
6060})
6061
6062#define _mm512_maskz_srai_epi64( __U, __A, __B) __extension__ ({ \
6063__builtin_ia32_psraqi512_mask ((__v8di)( __A),( __B),\
6064 (__v8di)\
6065 _mm512_setzero_si512 (),\
6066 (__mmask8)( __U));\
6067})
6068
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006069#define _mm512_shuffle_f32x4( __A, __B, __imm) __extension__ ({ \
6070__builtin_ia32_shuf_f32x4_mask ((__v16sf)( __A),\
6071 (__v16sf)( __B),( __imm),\
6072 (__v16sf)\
6073 _mm512_undefined_ps (),\
6074 (__mmask16) -1);\
6075})
6076
6077#define _mm512_mask_shuffle_f32x4( __W, __U, __A, __B, __imm) __extension__ ({ \
6078__builtin_ia32_shuf_f32x4_mask ((__v16sf)( __A),\
6079 (__v16sf)( __B),( __imm),\
6080 (__v16sf)( __W),\
6081 (__mmask16)( __U));\
6082})
6083
6084#define _mm512_maskz_shuffle_f32x4( __U, __A, __B, __imm) __extension__ ({ \
6085__builtin_ia32_shuf_f32x4_mask ((__v16sf)( __A),\
6086 (__v16sf)( __B),( __imm),\
6087 (__v16sf)\
6088 _mm512_setzero_ps (),\
6089 (__mmask16)( __U));\
6090})
6091
6092#define _mm512_shuffle_f64x2( __A, __B, __imm) __extension__ ({ \
6093__builtin_ia32_shuf_f64x2_mask ((__v8df)( __A),\
6094 (__v8df)( __B),( __imm),\
6095 (__v8df)\
6096 _mm512_undefined_pd (),\
6097 (__mmask8) -1);\
6098})
6099
6100#define _mm512_mask_shuffle_f64x2( __W, __U, __A, __B, __imm) __extension__ ({ \
6101__builtin_ia32_shuf_f64x2_mask ((__v8df)( __A),\
6102 (__v8df)( __B),( __imm),\
6103 (__v8df)( __W),\
6104 (__mmask8)( __U));\
6105})
6106
6107#define _mm512_maskz_shuffle_f64x2( __U, __A, __B, __imm) __extension__ ({ \
6108__builtin_ia32_shuf_f64x2_mask ((__v8df)( __A),\
6109 (__v8df)( __B),( __imm),\
6110 (__v8df)\
6111 _mm512_setzero_pd (),\
6112 (__mmask8)( __U));\
6113})
6114
6115#define _mm512_shuffle_i32x4( __A, __B, __imm) __extension__ ({ \
6116__builtin_ia32_shuf_i32x4_mask ((__v16si)( __A),\
6117 (__v16si)( __B),\
6118 ( __imm),\
6119 (__v16si)\
6120 _mm512_setzero_si512 (),\
6121 (__mmask16) -1);\
6122})
6123
6124#define _mm512_mask_shuffle_i32x4( __W, __U, __A, __B, __imm) __extension__ ({ \
6125__builtin_ia32_shuf_i32x4_mask ((__v16si)( __A),\
6126 (__v16si)( __B),\
6127 ( __imm),\
6128 (__v16si)( __W),\
6129 (__mmask16)( __U));\
6130})
6131
6132#define _mm512_maskz_shuffle_i32x4( __U, __A, __B, __imm) __extension__ ({ \
6133__builtin_ia32_shuf_i32x4_mask ((__v16si)( __A),\
6134 (__v16si)( __B),\
6135 ( __imm),\
6136 (__v16si)\
6137 _mm512_setzero_si512 (),\
6138 (__mmask16)( __U));\
6139})
6140
6141#define _mm512_shuffle_i64x2( __A, __B, __imm) __extension__ ({ \
6142__builtin_ia32_shuf_i64x2_mask ((__v8di)( __A),\
6143 (__v8di)( __B),( __imm),\
6144 (__v8di)\
6145 _mm512_setzero_si512 (),\
6146 (__mmask8) -1);\
6147})
6148
6149#define _mm512_mask_shuffle_i64x2( __W, __U, __A, __B, __imm) __extension__ ({ \
6150__builtin_ia32_shuf_i64x2_mask ((__v8di)( __A),\
6151 (__v8di)( __B),( __imm),\
6152 (__v8di)( __W),\
6153 (__mmask8)( __U));\
6154})
6155
6156#define _mm512_maskz_shuffle_i64x2( __U, __A, __B, __imm) __extension__ ({ \
6157__builtin_ia32_shuf_i64x2_mask ((__v8di)( __A),\
6158 (__v8di)( __B),( __imm),\
6159 (__v8di)\
6160 _mm512_setzero_si512 (),\
6161 (__mmask8)( __U));\
6162})
6163
6164#define _mm512_shuffle_pd( __M, __V, __imm) __extension__ ({ \
6165__builtin_ia32_shufpd512_mask ((__v8df)( __M),\
6166 (__v8df)( __V),( __imm),\
6167 (__v8df)\
6168 _mm512_undefined_pd (),\
6169 (__mmask8) -1);\
6170})
6171
6172#define _mm512_mask_shuffle_pd( __W, __U, __M, __V, __imm) __extension__ ({ \
6173__builtin_ia32_shufpd512_mask ((__v8df)( __M),\
6174 (__v8df)( __V),( __imm),\
6175 (__v8df)( __W),\
6176 (__mmask8)( __U));\
6177})
6178
6179#define _mm512_maskz_shuffle_pd( __U, __M, __V, __imm) __extension__ ({ \
6180__builtin_ia32_shufpd512_mask ((__v8df)( __M),\
6181 (__v8df)( __V),( __imm),\
6182 (__v8df)\
6183 _mm512_setzero_pd (),\
6184 (__mmask8)( __U));\
6185})
6186
6187#define _mm512_shuffle_ps( __M, __V, __imm) __extension__ ({ \
6188__builtin_ia32_shufps512_mask ((__v16sf)( __M),\
6189 (__v16sf)( __V),( __imm),\
6190 (__v16sf)\
6191 _mm512_undefined_ps (),\
6192 (__mmask16) -1);\
6193})
6194
6195#define _mm512_mask_shuffle_ps( __W, __U, __M, __V, __imm) __extension__ ({ \
6196__builtin_ia32_shufps512_mask ((__v16sf)( __M),\
6197 (__v16sf)( __V),( __imm),\
6198 (__v16sf)( __W),\
6199 (__mmask16)( __U));\
6200})
6201
6202#define _mm512_maskz_shuffle_ps( __U, __M, __V, __imm) __extension__ ({ \
6203__builtin_ia32_shufps512_mask ((__v16sf)( __M),\
6204 (__v16sf)( __V),( __imm),\
6205 (__v16sf)\
6206 _mm512_setzero_ps (),\
6207 (__mmask16)( __U));\
6208})
6209
6210#define _mm_sqrt_round_sd( __A, __B, __R) __extension__ ({ \
6211__builtin_ia32_sqrtsd_round_mask ((__v2df)( __B),\
6212 (__v2df)( __A),(__v2df) _mm_setzero_pd(),\
6213 (__mmask8) -1,\
6214 ( __R));\
6215})
6216
6217static __inline__ __m128d __DEFAULT_FN_ATTRS
6218_mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6219{
6220 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __B,
6221 (__v2df) __A,
6222 (__v2df) __W,
6223 (__mmask8) __U,
6224 _MM_FROUND_CUR_DIRECTION);
6225}
6226
6227#define _mm_mask_sqrt_round_sd( __W, __U, __A, __B, __R) __extension__ ({ \
6228__builtin_ia32_sqrtsd_round_mask ((__v2df)( __B),\
6229 (__v2df)( __A),(__v2df) __W,\
6230 (__mmask8) __U,\
6231 ( __R));\
6232})
6233
6234static __inline__ __m128d __DEFAULT_FN_ATTRS
6235_mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
6236{
6237 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __B,
6238 (__v2df) __A,
6239 (__v2df) _mm_setzero_pd (),
6240 (__mmask8) __U,
6241 _MM_FROUND_CUR_DIRECTION);
6242}
6243
6244#define _mm_maskz_sqrt_round_sd( __U, __A, __B, __R) __extension__ ({ \
6245__builtin_ia32_sqrtsd_round_mask ((__v2df)( __B),\
6246 (__v2df)( __A),(__v2df) _mm_setzero_pd(),\
6247 (__mmask8) __U,\
6248 ( __R));\
6249})
6250
6251#define _mm_sqrt_round_ss( __A, __B, __R) __extension__ ({ \
6252__builtin_ia32_sqrtss_round_mask ((__v4sf)( __B),\
6253 (__v4sf)( __A),(__v4sf) _mm_setzero_ps(),\
6254 (__mmask8) -1,\
6255 ( __R));\
6256})
6257
6258static __inline__ __m128 __DEFAULT_FN_ATTRS
6259_mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6260{
6261 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __B,
6262 (__v4sf) __A,
6263 (__v4sf) __W,
6264 (__mmask8) __U,
6265 _MM_FROUND_CUR_DIRECTION);
6266}
6267
6268#define _mm_mask_sqrt_round_ss( __W, __U, __A, __B, __R) __extension__ ({ \
6269__builtin_ia32_sqrtss_round_mask ((__v4sf)( __B),\
6270 (__v4sf)( __A),(__v4sf) __W,\
6271 (__mmask8) __U,\
6272 ( __R));\
6273})
6274
6275static __inline__ __m128 __DEFAULT_FN_ATTRS
6276_mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
6277{
6278 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6279 (__v4sf) __B,
6280 (__v4sf) _mm_setzero_ps (),
6281 (__mmask8) __U,
6282 _MM_FROUND_CUR_DIRECTION);
6283}
6284
6285#define _mm_maskz_sqrt_round_ss( __U, __A, __B, __R) __extension__ ({ \
6286__builtin_ia32_sqrtss_round_mask ((__v4sf)( __B),\
6287 (__v4sf)( __A),(__v4sf) _mm_setzero_ps(),\
6288 (__mmask8) __U,\
6289 __R);\
6290})
Michael Zuckerman6b5f4d82016-04-11 15:46:39 +00006291
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00006292static __inline__ __m512 __DEFAULT_FN_ATTRS
6293_mm512_broadcast_f32x4 (__m128 __A)
6294{
6295 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
6296 (__v16sf)
6297 _mm512_undefined_ps (),
6298 (__mmask16) -1);
6299}
6300
6301static __inline__ __m512 __DEFAULT_FN_ATTRS
6302_mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
6303{
6304 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
6305 (__v16sf) __O,
6306 __M);
6307}
6308
6309static __inline__ __m512 __DEFAULT_FN_ATTRS
6310_mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
6311{
6312 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
6313 (__v16sf)
6314 _mm512_setzero_ps (),
6315 __M);
6316}
6317
6318static __inline__ __m512d __DEFAULT_FN_ATTRS
6319_mm512_broadcast_f64x4 (__m256d __A)
6320{
6321 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
6322 (__v8df)
6323 _mm512_undefined_pd (),
6324 (__mmask8) -1);
6325}
6326
6327static __inline__ __m512d __DEFAULT_FN_ATTRS
6328_mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
6329{
6330 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
6331 (__v8df) __O,
6332 __M);
6333}
6334
6335static __inline__ __m512d __DEFAULT_FN_ATTRS
6336_mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
6337{
6338 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
6339 (__v8df)
6340 _mm512_setzero_pd (),
6341 __M);
6342}
6343
6344static __inline__ __m512i __DEFAULT_FN_ATTRS
6345_mm512_broadcast_i32x4 (__m128i __A)
6346{
6347 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
6348 (__v16si)
6349 _mm512_undefined_epi32 (),
6350 (__mmask16) -1);
6351}
6352
6353static __inline__ __m512i __DEFAULT_FN_ATTRS
6354_mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
6355{
6356 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
6357 (__v16si) __O,
6358 __M);
6359}
6360
6361static __inline__ __m512i __DEFAULT_FN_ATTRS
6362_mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
6363{
6364 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
6365 (__v16si)
6366 _mm512_setzero_si512 (),
6367 __M);
6368}
6369
6370static __inline__ __m512i __DEFAULT_FN_ATTRS
6371_mm512_broadcast_i64x4 (__m256i __A)
6372{
6373 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
6374 (__v8di)
6375 _mm512_undefined_epi32 (),
6376 (__mmask8) -1);
6377}
6378
6379static __inline__ __m512i __DEFAULT_FN_ATTRS
6380_mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
6381{
6382 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
6383 (__v8di) __O,
6384 __M);
6385}
6386
6387static __inline__ __m512i __DEFAULT_FN_ATTRS
6388_mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
6389{
6390 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
6391 (__v8di)
6392 _mm512_setzero_si512 (),
6393 __M);
6394}
6395
6396static __inline__ __m512d __DEFAULT_FN_ATTRS
6397_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
6398{
6399 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
6400 (__v8df) __O, __M);
6401}
6402
6403static __inline__ __m512d __DEFAULT_FN_ATTRS
6404_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
6405{
6406 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
6407 (__v8df)
6408 _mm512_setzero_pd (),
6409 __M);
6410}
6411
6412static __inline__ __m512 __DEFAULT_FN_ATTRS
6413_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
6414{
6415 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
6416 (__v16sf) __O, __M);
6417}
6418
6419static __inline__ __m512 __DEFAULT_FN_ATTRS
6420_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
6421{
6422 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
6423 (__v16sf)
6424 _mm512_setzero_ps (),
6425 __M);
6426}
6427
Michael Zuckermane1680612016-04-13 15:02:04 +00006428static __inline__ __m128i __DEFAULT_FN_ATTRS
6429_mm512_cvtsepi32_epi8 (__m512i __A)
6430{
6431 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6432 (__v16qi) _mm_undefined_si128 (),
6433 (__mmask16) -1);
6434}
6435
6436static __inline__ __m128i __DEFAULT_FN_ATTRS
6437_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6438{
6439 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6440 (__v16qi) __O, __M);
6441}
6442
6443static __inline__ __m128i __DEFAULT_FN_ATTRS
6444_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
6445{
6446 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6447 (__v16qi) _mm_setzero_si128 (),
6448 __M);
6449}
6450
6451static __inline__ void __DEFAULT_FN_ATTRS
6452_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
6453{
6454 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6455}
6456
6457static __inline__ __m256i __DEFAULT_FN_ATTRS
6458_mm512_cvtsepi32_epi16 (__m512i __A)
6459{
6460 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6461 (__v16hi) _mm256_undefined_si256 (),
6462 (__mmask16) -1);
6463}
6464
6465static __inline__ __m256i __DEFAULT_FN_ATTRS
6466_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6467{
6468 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6469 (__v16hi) __O, __M);
6470}
6471
6472static __inline__ __m256i __DEFAULT_FN_ATTRS
6473_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
6474{
6475 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6476 (__v16hi) _mm256_setzero_si256 (),
6477 __M);
6478}
6479
6480static __inline__ void __DEFAULT_FN_ATTRS
6481_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
6482{
6483 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
6484}
6485
6486static __inline__ __m128i __DEFAULT_FN_ATTRS
6487_mm512_cvtsepi64_epi8 (__m512i __A)
6488{
6489 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6490 (__v16qi) _mm_undefined_si128 (),
6491 (__mmask8) -1);
6492}
6493
6494static __inline__ __m128i __DEFAULT_FN_ATTRS
6495_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6496{
6497 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6498 (__v16qi) __O, __M);
6499}
6500
6501static __inline__ __m128i __DEFAULT_FN_ATTRS
6502_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
6503{
6504 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6505 (__v16qi) _mm_setzero_si128 (),
6506 __M);
6507}
6508
6509static __inline__ void __DEFAULT_FN_ATTRS
6510_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
6511{
6512 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
6513}
6514
6515static __inline__ __m256i __DEFAULT_FN_ATTRS
6516_mm512_cvtsepi64_epi32 (__m512i __A)
6517{
6518 __v8si __O;
6519 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6520 (__v8si) _mm256_undefined_si256 (),
6521 (__mmask8) -1);
6522}
6523
6524static __inline__ __m256i __DEFAULT_FN_ATTRS
6525_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
6526{
6527 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6528 (__v8si) __O, __M);
6529}
6530
6531static __inline__ __m256i __DEFAULT_FN_ATTRS
6532_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
6533{
6534 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6535 (__v8si) _mm256_setzero_si256 (),
6536 __M);
6537}
6538
6539static __inline__ void __DEFAULT_FN_ATTRS
6540_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
6541{
6542 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
6543}
6544
6545static __inline__ __m128i __DEFAULT_FN_ATTRS
6546_mm512_cvtsepi64_epi16 (__m512i __A)
6547{
6548 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6549 (__v8hi) _mm_undefined_si128 (),
6550 (__mmask8) -1);
6551}
6552
6553static __inline__ __m128i __DEFAULT_FN_ATTRS
6554_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
6555{
6556 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6557 (__v8hi) __O, __M);
6558}
6559
6560static __inline__ __m128i __DEFAULT_FN_ATTRS
6561_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
6562{
6563 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6564 (__v8hi) _mm_setzero_si128 (),
6565 __M);
6566}
6567
6568static __inline__ void __DEFAULT_FN_ATTRS
6569_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
6570{
6571 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
6572}
6573
Michael Zuckermand8715312016-04-14 06:48:09 +00006574static __inline__ __m128i __DEFAULT_FN_ATTRS
6575_mm512_cvtusepi32_epi8 (__m512i __A)
6576{
6577 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6578 (__v16qi) _mm_undefined_si128 (),
6579 (__mmask16) -1);
6580}
6581
6582static __inline__ __m128i __DEFAULT_FN_ATTRS
6583_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6584{
6585 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6586 (__v16qi) __O,
6587 __M);
6588}
6589
6590static __inline__ __m128i __DEFAULT_FN_ATTRS
6591_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
6592{
6593 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6594 (__v16qi) _mm_setzero_si128 (),
6595 __M);
6596}
6597
6598static __inline__ void __DEFAULT_FN_ATTRS
6599_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
6600{
6601 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6602}
6603
6604static __inline__ __m256i __DEFAULT_FN_ATTRS
6605_mm512_cvtusepi32_epi16 (__m512i __A)
6606{
6607 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6608 (__v16hi) _mm256_undefined_si256 (),
6609 (__mmask16) -1);
6610}
6611
6612static __inline__ __m256i __DEFAULT_FN_ATTRS
6613_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6614{
6615 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6616 (__v16hi) __O,
6617 __M);
6618}
6619
6620static __inline__ __m256i __DEFAULT_FN_ATTRS
6621_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
6622{
6623 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6624 (__v16hi) _mm256_setzero_si256 (),
6625 __M);
6626}
6627
6628static __inline__ void __DEFAULT_FN_ATTRS
6629_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
6630{
6631 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
6632}
6633
6634static __inline__ __m128i __DEFAULT_FN_ATTRS
6635_mm512_cvtusepi64_epi8 (__m512i __A)
6636{
6637 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6638 (__v16qi) _mm_undefined_si128 (),
6639 (__mmask8) -1);
6640}
6641
6642static __inline__ __m128i __DEFAULT_FN_ATTRS
6643_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6644{
6645 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6646 (__v16qi) __O,
6647 __M);
6648}
6649
6650static __inline__ __m128i __DEFAULT_FN_ATTRS
6651_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
6652{
6653 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6654 (__v16qi) _mm_setzero_si128 (),
6655 __M);
6656}
6657
6658static __inline__ void __DEFAULT_FN_ATTRS
6659_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
6660{
6661 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
6662}
6663
6664static __inline__ __m256i __DEFAULT_FN_ATTRS
6665_mm512_cvtusepi64_epi32 (__m512i __A)
6666{
6667 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6668 (__v8si) _mm256_undefined_si256 (),
6669 (__mmask8) -1);
6670}
6671
6672static __inline__ __m256i __DEFAULT_FN_ATTRS
6673_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
6674{
6675 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6676 (__v8si) __O, __M);
6677}
6678
6679static __inline__ __m256i __DEFAULT_FN_ATTRS
6680_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
6681{
6682 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6683 (__v8si) _mm256_setzero_si256 (),
6684 __M);
6685}
6686
6687static __inline__ void __DEFAULT_FN_ATTRS
6688_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
6689{
6690 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
6691}
6692
6693static __inline__ __m128i __DEFAULT_FN_ATTRS
6694_mm512_cvtusepi64_epi16 (__m512i __A)
6695{
6696 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6697 (__v8hi) _mm_undefined_si128 (),
6698 (__mmask8) -1);
6699}
6700
6701static __inline__ __m128i __DEFAULT_FN_ATTRS
6702_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
6703{
6704 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6705 (__v8hi) __O, __M);
6706}
6707
6708static __inline__ __m128i __DEFAULT_FN_ATTRS
6709_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
6710{
6711 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6712 (__v8hi) _mm_setzero_si128 (),
6713 __M);
6714}
6715
6716static __inline__ void __DEFAULT_FN_ATTRS
6717_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
6718{
6719 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
6720}
6721
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00006722static __inline__ __m128i __DEFAULT_FN_ATTRS
6723_mm512_cvtepi32_epi8 (__m512i __A)
6724{
6725 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6726 (__v16qi) _mm_undefined_si128 (),
6727 (__mmask16) -1);
6728}
6729
6730static __inline__ __m128i __DEFAULT_FN_ATTRS
6731_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6732{
6733 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6734 (__v16qi) __O, __M);
6735}
6736
6737static __inline__ __m128i __DEFAULT_FN_ATTRS
6738_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
6739{
6740 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6741 (__v16qi) _mm_setzero_si128 (),
6742 __M);
6743}
6744
6745static __inline__ void __DEFAULT_FN_ATTRS
6746_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
6747{
6748 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6749}
6750
6751static __inline__ __m256i __DEFAULT_FN_ATTRS
6752_mm512_cvtepi32_epi16 (__m512i __A)
6753{
6754 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6755 (__v16hi) _mm256_undefined_si256 (),
6756 (__mmask16) -1);
6757}
6758
6759static __inline__ __m256i __DEFAULT_FN_ATTRS
6760_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6761{
6762 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6763 (__v16hi) __O, __M);
6764}
6765
6766static __inline__ __m256i __DEFAULT_FN_ATTRS
6767_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
6768{
6769 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6770 (__v16hi) _mm256_setzero_si256 (),
6771 __M);
6772}
6773
6774static __inline__ void __DEFAULT_FN_ATTRS
6775_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
6776{
6777 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
6778}
6779
6780static __inline__ __m128i __DEFAULT_FN_ATTRS
6781_mm512_cvtepi64_epi8 (__m512i __A)
6782{
6783 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
6784 (__v16qi) _mm_undefined_si128 (),
6785 (__mmask8) -1);
6786}
6787
6788static __inline__ __m128i __DEFAULT_FN_ATTRS
6789_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6790{
6791 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
6792 (__v16qi) __O, __M);
6793}
6794
6795static __inline__ __m128i __DEFAULT_FN_ATTRS
6796_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
6797{
6798 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
6799 (__v16qi) _mm_setzero_si128 (),
6800 __M);
6801}
6802
6803static __inline__ void __DEFAULT_FN_ATTRS
6804_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
6805{
6806 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
6807}
6808
6809static __inline__ __m256i __DEFAULT_FN_ATTRS
6810_mm512_cvtepi64_epi32 (__m512i __A)
6811{
6812 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
6813 (__v8si) _mm256_undefined_si256 (),
6814 (__mmask8) -1);
6815}
6816
6817static __inline__ __m256i __DEFAULT_FN_ATTRS
6818_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
6819{
6820 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
6821 (__v8si) __O, __M);
6822}
6823
6824static __inline__ __m256i __DEFAULT_FN_ATTRS
6825_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
6826{
6827 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
6828 (__v8si) _mm256_setzero_si256 (),
6829 __M);
6830}
6831
6832static __inline__ void __DEFAULT_FN_ATTRS
6833_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
6834{
6835 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
6836}
6837
6838static __inline__ __m128i __DEFAULT_FN_ATTRS
6839_mm512_cvtepi64_epi16 (__m512i __A)
6840{
6841 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
6842 (__v8hi) _mm_undefined_si128 (),
6843 (__mmask8) -1);
6844}
6845
6846static __inline__ __m128i __DEFAULT_FN_ATTRS
6847_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
6848{
6849 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
6850 (__v8hi) __O, __M);
6851}
6852
6853static __inline__ __m128i __DEFAULT_FN_ATTRS
6854_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
6855{
6856 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
6857 (__v8hi) _mm_setzero_si128 (),
6858 __M);
6859}
6860
6861static __inline__ void __DEFAULT_FN_ATTRS
6862_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
6863{
6864 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
6865}
6866
Michael Zuckermanef2979a2016-04-19 15:18:23 +00006867#define _mm512_extracti32x4_epi32( __A, __imm) __extension__ ({ \
6868__builtin_ia32_extracti32x4_mask ((__v16si)( __A),\
6869 (__imm),\
6870 (__v4si) _mm_undefined_si128 (),\
6871 (__mmask8) -1);\
6872})
6873
6874#define _mm512_mask_extracti32x4_epi32( __W, __U, __A, __imm) __extension__ ({ \
6875__builtin_ia32_extracti32x4_mask ((__v16si)( __A),\
6876 ( __imm),\
6877 (__v4si)( __W),\
6878 (__mmask8)( __U));\
6879})
6880
6881#define _mm512_maskz_extracti32x4_epi32( __U, __A, __imm) __extension__ ({ \
6882__builtin_ia32_extracti32x4_mask ((__v16si)( __A),\
6883 ( __imm),\
6884 (__v4si) _mm_setzero_si128 (),\
6885 (__mmask8)( __U));\
6886})
6887
6888#define _mm512_extracti64x4_epi64( __A, __imm) __extension__ ({ \
6889__builtin_ia32_extracti64x4_mask ((__v8di)( __A),\
6890 ( __imm),\
6891 (__v4di) _mm256_undefined_si256 (),\
6892 (__mmask8) -1);\
6893})
6894
6895#define _mm512_mask_extracti64x4_epi64( __W, __U, __A, __imm) __extension__ ({ \
6896__builtin_ia32_extracti64x4_mask ((__v8di)( __A),\
6897 ( __imm),\
6898 (__v4di)( __W),\
6899 (__mmask8)( __U));\
6900})
6901
6902#define _mm512_maskz_extracti64x4_epi64( __U, __A, __imm) __extension__ ({ \
6903__builtin_ia32_extracti64x4_mask ((__v8di)( __A),\
6904 ( __imm),\
6905 (__v4di) _mm256_setzero_si256 (),\
6906 (__mmask8)( __U));\
6907})
6908
6909#define _mm512_insertf64x4( __A, __B, __imm) __extension__ ({ \
6910__builtin_ia32_insertf64x4_mask ((__v8df)( __A),\
6911 (__v4df)( __B),\
6912 ( __imm),\
6913 (__v8df) _mm512_undefined_pd (),\
6914 (__mmask8) -1);\
6915})
6916
6917#define _mm512_mask_insertf64x4( __W, __U, __A, __B, __imm) __extension__ ({ \
6918__builtin_ia32_insertf64x4_mask ((__v8df)( __A),\
6919 (__v4df)( __B),\
6920 ( __imm),\
6921 (__v8df)( __W),\
6922 (__mmask8)( __U));\
6923})
6924
6925#define _mm512_maskz_insertf64x4( __U, __A, __B, __imm) __extension__ ({ \
6926__builtin_ia32_insertf64x4_mask ((__v8df)( __A),\
6927 (__v4df)( __B),\
6928 ( __imm),\
6929 (__v8df) _mm512_setzero_pd (),\
6930 (__mmask8)( __U));\
6931})
6932
6933#define _mm512_inserti64x4( __A, __B, __imm) __extension__ ({ \
6934__builtin_ia32_inserti64x4_mask ((__v8di)( __A),\
6935 (__v4di)( __B),\
6936 ( __imm),\
6937 (__v8di) _mm512_setzero_si512 (),\
6938 (__mmask8) -1);\
6939})
6940
6941#define _mm512_mask_inserti64x4( __W, __U, __A, __B, __imm) __extension__ ({ \
6942__builtin_ia32_inserti64x4_mask ((__v8di)( __A),\
6943 (__v4di)( __B),\
6944 ( __imm),\
6945 (__v8di)( __W),\
6946 (__mmask8)( __U));\
6947})
6948
6949#define _mm512_maskz_inserti64x4( __U, __A, __B, __imm) __extension__ ({ \
6950__builtin_ia32_inserti64x4_mask ((__v8di)( __A),\
6951 (__v4di)( __B),\
6952 ( __imm),\
6953 (__v8di) _mm512_setzero_si512 (),\
6954 (__mmask8)( __U));\
6955})
6956
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00006957#define _mm512_getmant_round_pd( __A, __B, __C, __R) __extension__ ({ \
6958__builtin_ia32_getmantpd512_mask ((__v8df)( __A),\
6959 (__C << 2) |( __B),\
6960 (__v8df) _mm512_undefined_pd (),\
6961 (__mmask8) -1,( __R));\
6962})
6963
6964#define _mm512_mask_getmant_round_pd( __W, __U, __A, __B, __C, __R) __extension__ ({ \
6965__builtin_ia32_getmantpd512_mask ((__v8df)( __A),\
6966 (__C << 2) |( __B),\
6967 (__v8df)( __W),(__mmask8)( __U),\
6968 ( __R));\
6969})
6970
6971#define _mm512_maskz_getmant_round_pd( __U, __A, __B, __C, __R) __extension__ ({ \
6972__builtin_ia32_getmantpd512_mask ((__v8df)( __A),\
6973 (__C << 2) |( __B),\
6974 (__v8df) _mm512_setzero_pd (),\
6975 (__mmask8)( __U),( __R));\
6976})
6977
6978#define _mm512_getmant_pd( __A, __B, __C) __extension__ ({ \
6979__builtin_ia32_getmantpd512_mask ((__v8df)( __A),\
6980 (__C << 2) |( __B),\
6981 (__v8df) _mm512_setzero_pd (),\
6982 (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);\
6983})
6984
6985#define _mm512_mask_getmant_pd( __W, __U, __A, __B, __C) __extension__ ({ \
6986__builtin_ia32_getmantpd512_mask ((__v8df)( __A),\
6987 (__C << 2) |( __B),\
6988 (__v8df)( __W), (__mmask8)( __U), _MM_FROUND_CUR_DIRECTION);\
6989})
6990
6991#define _mm512_maskz_getmant_pd( __U, __A, __B, __C) __extension__ ({ \
6992__builtin_ia32_getmantpd512_mask ((__v8df)( __A),\
6993 (__C << 2) |( __B),\
6994 (__v8df) _mm512_setzero_pd (),\
6995 (__mmask8)( __U), _MM_FROUND_CUR_DIRECTION);\
6996})
6997
6998#define _mm512_getmant_round_ps( __A, __B, __C, __R) __extension__ ({ \
6999__builtin_ia32_getmantps512_mask ((__v16sf)( __A),\
7000 (__C << 2) |( __B),\
7001 (__v16sf) _mm512_undefined_ps (),\
7002 (__mmask16) -1,( __R));\
7003})
7004
7005#define _mm512_mask_getmant_round_ps( __W, __U, __A, __B, __C, __R) __extension__ ({ \
7006__builtin_ia32_getmantps512_mask ((__v16sf)( __A),\
7007 (__C << 2) |( __B),\
7008 (__v16sf)( __W),(__mmask16)( __U),\
7009 ( __R));\
7010})
7011
7012#define _mm512_maskz_getmant_round_ps( __U, __A, __B, __C, __R) __extension__ ({ \
7013__builtin_ia32_getmantps512_mask ((__v16sf)( __A),\
7014 (__C << 2) |( __B),\
7015 (__v16sf) _mm512_setzero_ps (),\
7016 ( __U),( __R));\
7017})
7018
7019#define _mm512_getmant_ps( __A, __B, __C) __extension__ ({ \
7020__builtin_ia32_getmantps512_mask ((__v16sf)( __A),\
7021 (__C << 2) |( __B),\
7022 (__v16sf) _mm512_undefined_ps (),\
7023 (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);\
7024})
7025
7026#define _mm512_mask_getmant_ps( __W, __U, __A, __B, __C) __extension__ ({ \
7027__builtin_ia32_getmantps512_mask ((__v16sf)( __A),\
7028 (__C << 2) |( __B),\
7029 (__v16sf)( __W),(__mmask16) ( __U),\
7030 _MM_FROUND_CUR_DIRECTION);\
7031})
7032
7033#define _mm512_maskz_getmant_ps( __U, __A, __B, __C) __extension__ ({ \
7034__builtin_ia32_getmantps512_mask ((__v16sf)( __A),\
7035 (__C << 2) |( __B),\
7036 (__v16sf) _mm512_setzero_ps (),\
7037 (__mmask16)( __U),_MM_FROUND_CUR_DIRECTION);\
7038})
7039
7040#define _mm512_getexp_round_pd( __A, __R) __extension__ ({ \
7041__builtin_ia32_getexppd512_mask ((__v8df)( __A),\
7042 (__v8df) _mm512_undefined_pd (),\
7043 (__mmask8) -1,( __R));\
7044})
7045
7046#define _mm512_mask_getexp_round_pd( __W, __U, __A, __R) __extension__ ({ \
7047__builtin_ia32_getexppd512_mask ((__v8df)( __A),\
7048 (__v8df)( __W),\
7049 (__mmask8)( __U),( __R));\
7050})
7051
7052#define _mm512_maskz_getexp_round_pd( __U, __A, __R) __extension__ ({ \
7053__builtin_ia32_getexppd512_mask ((__v8df)( __A),\
7054 (__v8df) _mm512_setzero_pd (),\
7055 (__mmask8)( __U),( __R));\
7056})
7057
7058static __inline__ __m512d __DEFAULT_FN_ATTRS
7059_mm512_getexp_pd (__m512d __A)
7060{
7061 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7062 (__v8df) _mm512_undefined_pd (),
7063 (__mmask8) -1,
7064 _MM_FROUND_CUR_DIRECTION);
7065}
7066
7067static __inline__ __m512d __DEFAULT_FN_ATTRS
7068_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
7069{
7070 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7071 (__v8df) __W,
7072 (__mmask8) __U,
7073 _MM_FROUND_CUR_DIRECTION);
7074}
7075
7076static __inline__ __m512d __DEFAULT_FN_ATTRS
7077_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
7078{
7079 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7080 (__v8df) _mm512_setzero_pd (),
7081 (__mmask8) __U,
7082 _MM_FROUND_CUR_DIRECTION);
7083}
7084
7085#define _mm512_getexp_round_ps( __A, __R) __extension__ ({ \
7086__builtin_ia32_getexpps512_mask ((__v16sf)( __A),\
7087 (__v16sf) _mm512_undefined_ps (),\
7088 (__mmask16) -1,( __R));\
7089})
7090
7091#define _mm512_mask_getexp_round_ps( __W, __U, __A, __R) __extension__ ({ \
7092__builtin_ia32_getexpps512_mask ((__v16sf)( __A),\
7093 (__v16sf)( __W),\
7094 (__mmask16)( __U),( __R));\
7095})
7096
7097#define _mm512_maskz_getexp_round_ps( __U, __A, __R) __extension__ ({ \
7098__builtin_ia32_getexpps512_mask ((__v16sf)( __A),\
7099 (__v16sf) _mm512_setzero_ps (),\
7100 (__mmask16)( __U),( __R));\
7101})
7102
7103static __inline__ __m512 __DEFAULT_FN_ATTRS
7104_mm512_getexp_ps (__m512 __A)
7105{
7106 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7107 (__v16sf) _mm512_undefined_ps (),
7108 (__mmask16) -1,
7109 _MM_FROUND_CUR_DIRECTION);
7110}
7111
7112static __inline__ __m512 __DEFAULT_FN_ATTRS
7113_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
7114{
7115 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7116 (__v16sf) __W,
7117 (__mmask16) __U,
7118 _MM_FROUND_CUR_DIRECTION);
7119}
7120
7121static __inline__ __m512 __DEFAULT_FN_ATTRS
7122_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
7123{
7124 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7125 (__v16sf) _mm512_setzero_ps (),
7126 (__mmask16) __U,
7127 _MM_FROUND_CUR_DIRECTION);
7128}
7129
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00007130#define _mm512_i64gather_ps( __index, __addr, __scale) __extension__ ({ \
7131__builtin_ia32_gatherdiv16sf ((__v8sf) _mm256_undefined_ps (),\
7132 __addr, (__v8di) __index, (__mmask8) -1, __scale);\
7133})
7134
7135#define _mm512_mask_i64gather_ps( __v1_old, __mask, __index,\
7136 __addr, __scale) __extension__({\
7137__builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,\
7138 __addr,(__v8di) __index, __mask, __scale);\
7139})
7140
7141#define _mm512_i64gather_epi32(__index, __addr, __scale) __extension__ ({\
7142__builtin_ia32_gatherdiv16si ((__v8si) _mm256_undefined_ps (),\
7143 __addr, (__v8di) __index, (__mmask8) -1 , __scale);\
7144})
7145
7146#define _mm512_mask_i64gather_epi32( __v1_old, __mask, __index, __addr, __scale) __extension__ ({\
7147__builtin_ia32_gatherdiv16si ((__v8si) __v1_old,\
7148 __addr, (__v8di) __index, __mask , __scale);\
7149})
7150
7151#define _mm512_i64gather_pd(__index, __addr, __scale) __extension__ ({\
7152__builtin_ia32_gatherdiv8df ((__v8df) _mm512_undefined_pd(),\
7153 __addr, (__v8di) __index, (__mmask8) -1 , __scale);\
7154})
7155
7156#define _mm512_mask_i64gather_pd( __v1_old, __mask, __index, __addr, __scale) __extension__ ({\
7157__builtin_ia32_gatherdiv8df ((__v8df) __v1_old,\
7158 __addr, (__v8di) __index, __mask , __scale);\
7159})
7160
7161#define _mm512_i64gather_epi64(__index, __addr, __scale) __extension__ ({\
7162__builtin_ia32_gatherdiv8di ((__v8di) _mm512_undefined_pd(),\
7163 __addr, (__v8di) __index, (__mmask8) -1 , __scale);\
7164})
7165
7166#define _mm512_mask_i64gather_epi64( __v1_old, __mask, __index, __addr, __scale) __extension__ ({\
7167__builtin_ia32_gatherdiv8di ((__v8di) __v1_old,\
7168 __addr, (__v8di) __index, __mask , __scale);\
7169})
7170
7171#define _mm512_i32gather_ps(__index, __addr, __scale) __extension__ ({\
7172__builtin_ia32_gathersiv16sf ((__v16sf) _mm512_undefined_ps(),\
7173 __addr, (__v16si) __index, (__mmask8) -1 , __scale);\
7174})
7175
7176#define _mm512_mask_i32gather_ps( __v1_old, __mask, __index, __addr, __scale) __extension__ ({\
7177__builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,\
7178 __addr, (__v16si) __index, __mask , __scale);\
7179})
7180
7181#define _mm512_i32gather_epi32(__index, __addr, __scale) __extension__ ({\
7182__builtin_ia32_gathersiv16si ((__v16sf) _mm512_undefined_epi32(),\
7183 __addr, (__v16si) __index, (__mmask8) -1 , __scale);\
7184})
7185
7186#define _mm512_mask_i32gather_epi32( __v1_old, __mask, __index, __addr, __scale) __extension__ ({\
7187__builtin_ia32_gathersiv16si ((__v16sf) __v1_old,\
7188 __addr, (__v16si) __index, __mask , __scale);\
7189})
7190
7191#define _mm512_i32gather_pd(__index, __addr, __scale) __extension__ ({\
7192__builtin_ia32_gathersiv8df ((__v8df) _mm512_undefined_pd(),\
7193 __addr, (__v8si) __index, (__mmask8) -1 , __scale);\
7194})
7195
7196#define _mm512_mask_i32gather_pd( __v1_old, __mask, __index, __addr, __scale) __extension__ ({\
7197__builtin_ia32_gathersiv8df ((__v8df) __v1_old,\
7198 __addr, (__v8si) __index, __mask , __scale);\
7199})
7200
7201#define _mm512_i32gather_epi64(__index, __addr, __scale) __extension__ ({\
7202__builtin_ia32_gathersiv8di ((__v8di) _mm512_undefined_epi32(),\
7203 __addr, (__v8si) __index, (__mmask8) -1 , __scale);\
7204})
7205
7206#define _mm512_mask_i32gather_epi64( __v1_old, __mask, __index, __addr, __scale) __extension__ ({\
7207__builtin_ia32_gathersiv8di ((__v8di) __v1_old,\
7208 __addr, (__v8si) __index, __mask , __scale);\
7209})
7210
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00007211#define _mm512_i64scatter_ps(__addr,__index, __v1, __scale) __extension__ ({\
7212__builtin_ia32_scatterdiv16sf(__addr, (__mmask8) -1,\
7213 (__v8di) __index, (__v8sf) __v1, __scale);\
7214})
7215
7216#define _mm512_mask_i64scatter_ps(__addr, __mask,__index, __v1, __scale) __extension__ ({\
7217__builtin_ia32_scatterdiv16sf(__addr, __mask,\
7218 (__v8di) __index, (__v8sf) __v1, __scale);\
7219})
7220
7221#define _mm512_i64scatter_epi32(__addr, __index, __v1, __scale) __extension__ ({\
7222__builtin_ia32_scatterdiv16si (__addr, (__mmask8) -1,\
7223 (__v8di) __index, (__v8si) __v1, __scale);\
7224})
7225
7226#define _mm512_mask_i64scatter_epi32(__addr, __mask, __index, __v1, __scale) __extension__ ({\
7227__builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,\
7228 (__v8si) __v1, __scale);\
7229})
7230
7231#define _mm512_i64scatter_pd( __addr, __index, __v1, __scale) __extension__ ({\
7232__builtin_ia32_scatterdiv8df (__addr, (__mmask8) -1,\
7233 (__v8di) __index, (__v8df) __v1, __scale);\
7234})
7235
7236#define _mm512_mask_i64scatter_pd( __addr, __mask, __index, __v1, __scale) __extension__ ({\
7237__builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,\
7238 (__v8df) __v1, __scale);\
7239})
7240
7241#define _mm512_i64scatter_epi64( __addr, __index, __v1, __scale) __extension__ ({\
7242__builtin_ia32_scatterdiv8di (__addr, (__mmask8) -1,\
7243 (__v8di) __index, (__v8di) __v1, __scale);\
7244})
7245
7246#define _mm512_mask_i64scatter_epi64( __addr, __mask, __index, __v1, __scale) __extension__ ({\
7247__builtin_ia32_scatterdiv8di(__addr, __mask, (__v8di) __index,\
7248 (__v8di) __v1, __scale);\
7249})
7250
7251#define _mm512_i32scatter_ps( __addr, __index, __v1, __scale) __extension__ ({\
7252__builtin_ia32_scattersiv16sf (__addr, (__mmask16) -1,\
7253 (__v16si) __index, (__v16sf) __v1, __scale);\
7254})
7255
7256#define _mm512_mask_i32scatter_ps( __addr, __mask, __index, __v1, __scale) __extension__ ({\
7257__builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,\
7258 (__v16sf) __v1, __scale);\
7259})
7260
7261#define _mm512_i32scatter_epi32( __addr, __index, __v1, __scale) __extension__ ({\
7262__builtin_ia32_scattersiv16si (__addr, (__mmask16) -1,\
7263 (__v16si) __index, (__v16si) __v1, __scale);\
7264})
7265
7266#define _mm512_mask_i32scatter_epi32( __addr, __mask, __index, __v1, __scale) __extension__ ({\
7267__builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,\
7268 (__v16si) __v1, __scale);\
7269})
7270
7271#define _mm512_i32scatter_pd( __addr, __index, __v1, __scale) __extension__ ({\
7272__builtin_ia32_scattersiv8df (__addr, (__mmask8) -1,\
7273 (__v8si) __index, (__v8df) __v1, __scale);\
7274})
7275
7276#define _mm512_mask_i32scatter_pd( __addr, __mask, __index, __v1, __scale) __extension__ ({\
7277__builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,\
7278 (__v8df) __v1, __scale);\
7279})
7280
7281#define _mm512_i32scatter_epi64( __addr, __index, __v1, __scale) __extension__ ({\
7282__builtin_ia32_scattersiv8di (__addr, (__mmask8) -1,\
7283 (__v8si) __index, (__v8di) __v1, __scale);\
7284})
7285
7286#define _mm512_mask_i32scatter_epi64( __addr, __mask, __index, __v1, __scale) __extension__ ({\
7287__builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,\
7288 (__v8di) __v1, __scale);\
7289})
7290
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007291static __inline__ __m128 __DEFAULT_FN_ATTRS
7292_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7293{
7294 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __A,
7295 (__v4sf) __B,
7296 (__v4sf) __W,
7297 (__mmask8) __U,
7298 _MM_FROUND_CUR_DIRECTION);
7299}
7300
7301#define _mm_mask_fmadd_round_ss( __W, __U, __A, __B, __R) __extension__({\
7302__builtin_ia32_vfmaddss3_mask ((__v4sf) __A,\
7303 (__v4sf) __B,\
7304 (__v4sf) __W,\
7305 (__mmask8) __U,\
7306 __R);\
7307})
7308
7309static __inline__ __m128 __DEFAULT_FN_ATTRS
7310_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7311{
7312 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
7313 (__v4sf) __B,
7314 (__v4sf) __C,
7315 (__mmask8) __U,
7316 _MM_FROUND_CUR_DIRECTION);
7317}
7318
7319#define _mm_maskz_fmadd_round_ss( __U, __A, __B, __C, __R) __extension__ ({\
7320__builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,\
7321 (__v4sf) __B,\
7322 (__v4sf) __C,\
7323 (__mmask8) __U,\
7324 _MM_FROUND_CUR_DIRECTION);\
7325})
7326
7327static __inline__ __m128 __DEFAULT_FN_ATTRS
7328_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7329{
7330 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
7331 (__v4sf) __X,
7332 (__v4sf) __Y,
7333 (__mmask8) __U,
7334 _MM_FROUND_CUR_DIRECTION);
7335}
7336
7337#define _mm_mask3_fmadd_round_ss( __W, __X, __Y, __U, __R) __extension__ ({\
7338__builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,\
7339 (__v4sf) __X,\
7340 (__v4sf) __Y,\
7341 (__mmask8) __U,\
7342 __R);\
7343})
7344
7345static __inline__ __m128 __DEFAULT_FN_ATTRS
7346_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7347{
7348 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __A,
7349 (__v4sf) -(__B),
7350 (__v4sf) __W,
7351 (__mmask8) __U,
7352 _MM_FROUND_CUR_DIRECTION);
7353}
7354
7355#define _mm_mask_fmsub_round_ss( __W, __U, __A, __B, __R) __extension__ ({\
7356__builtin_ia32_vfmaddss3_mask ((__v4sf) __A,\
7357 (__v4sf) -(__B),\
7358 (__v4sf) __W,\
7359 (__mmask8) __U,\
7360 __R);\
7361})
7362
7363static __inline__ __m128 __DEFAULT_FN_ATTRS
7364_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7365{
7366 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
7367 (__v4sf) __B,
7368 (__v4sf) -(__C),
7369 (__mmask8) __U,
7370 _MM_FROUND_CUR_DIRECTION);
7371}
7372
7373#define _mm_maskz_fmsub_round_ss( __U, __A, __B, __C, __R) __extension__ ({\
7374__builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,\
7375 (__v4sf) __B,\
7376 (__v4sf) -(__C),\
7377 (__mmask8) __U,\
7378 __R);\
7379})
7380
7381static __inline__ __m128 __DEFAULT_FN_ATTRS
7382_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7383{
7384 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
7385 (__v4sf) __X,
7386 (__v4sf) -(__Y),
7387 (__mmask8) __U,
7388 _MM_FROUND_CUR_DIRECTION);
7389}
7390
7391#define _mm_mask3_fmsub_round_ss( __W, __X, __Y, __U, __R) __extension__ ({\
7392__builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,\
7393 (__v4sf) __X,\
7394 (__v4sf) -(__Y),\
7395 (__mmask8) __U,\
7396 __R);\
7397})
7398
7399static __inline__ __m128 __DEFAULT_FN_ATTRS
7400_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7401{
7402 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) -(__A),
7403 (__v4sf) __B,
7404 (__v4sf) __W,
7405 (__mmask8) __U,
7406 _MM_FROUND_CUR_DIRECTION);
7407}
7408
7409#define _mm_mask_fnmadd_round_ss( __W, __U, __A, __B, __R) __extension__ ({\
7410__builtin_ia32_vfmaddss3_mask ((__v4sf) -(__A),\
7411 (__v4sf) __B,\
7412 (__v4sf) __W,\
7413 (__mmask8) __U,\
7414 __R);\
7415})
7416
7417static __inline__ __m128 __DEFAULT_FN_ATTRS
7418_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7419{
7420 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) -(__A),
7421 (__v4sf) __B,
7422 (__v4sf) __C,
7423 (__mmask8) __U,
7424 _MM_FROUND_CUR_DIRECTION);
7425}
7426
7427#define _mm_maskz_fnmadd_round_ss( __U, __A, __B, __C, __R) __extension__ ({\
7428__builtin_ia32_vfmaddss3_maskz ((__v4sf) -(__A),\
7429 (__v4sf) __B,\
7430 (__v4sf) __C,\
7431 (__mmask8) __U,\
7432 __R);\
7433})
7434
7435static __inline__ __m128 __DEFAULT_FN_ATTRS
7436_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7437{
7438 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) -(__W),
7439 (__v4sf) __X,
7440 (__v4sf) __Y,
7441 (__mmask8) __U,
7442 _MM_FROUND_CUR_DIRECTION);
7443}
7444
7445#define _mm_mask3_fnmadd_round_ss( __W, __X, __Y, __U, __R) __extension__({\
7446__builtin_ia32_vfmaddss3_mask3 ((__v4sf) -(__W),\
7447 (__v4sf) __X,\
7448 (__v4sf) __Y,\
7449 (__mmask8) __U,\
7450 __R);\
7451})
7452
7453static __inline__ __m128 __DEFAULT_FN_ATTRS
7454_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7455{
7456 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) -(__A),
7457 (__v4sf) -(__B),
7458 (__v4sf) __W,
7459 (__mmask8) __U,
7460 _MM_FROUND_CUR_DIRECTION);
7461}
7462
7463#define _mm_mask_fnmsub_round_ss( __W, __U, __A, __B, __R) __extension__ ({\
7464__builtin_ia32_vfmaddss3_mask ((__v4sf) -(__A),\
7465 (__v4sf) -(__B),\
7466 (__v4sf) __W,\
7467 (__mmask8) __U,\
7468 __R);\
7469})
7470
7471static __inline__ __m128 __DEFAULT_FN_ATTRS
7472_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7473{
7474 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) -(__A),
7475 (__v4sf) __B,
7476 (__v4sf) -(__C),
7477 (__mmask8) __U,
7478 _MM_FROUND_CUR_DIRECTION);
7479}
7480
7481#define _mm_maskz_fnmsub_round_ss( __U, __A, __B, __C, __R) __extension__ ({\
7482__builtin_ia32_vfmaddss3_maskz((__v4sf) -(__A),\
7483 (__v4sf) __B,\
7484 (__v4sf) -(__C),\
7485 (__mmask8) __U,\
7486 _MM_FROUND_CUR_DIRECTION);\
7487})
7488
7489static __inline__ __m128 __DEFAULT_FN_ATTRS
7490_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7491{
7492 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) -(__W),
7493 (__v4sf) __X,
7494 (__v4sf) -(__Y),
7495 (__mmask8) __U,
7496 _MM_FROUND_CUR_DIRECTION);
7497}
7498
7499#define _mm_mask3_fnmsub_round_ss( __W, __X, __Y, __U, __R) __extension__({\
7500__builtin_ia32_vfmaddss3_mask3 ((__v4sf) -(__W),\
7501 (__v4sf) __X,\
7502 (__v4sf) -(__Y),\
7503 (__mmask8) __U,\
7504 __R);\
7505})
7506
7507static __inline__ __m128 __DEFAULT_FN_ATTRS
7508_mm_mask_fmadd_sd (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7509{
7510 return (__m128) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __A,
7511 (__v2df) __B,
7512 (__v2df) __W,
7513 (__mmask8) __U,
7514 _MM_FROUND_CUR_DIRECTION);
7515}
7516
7517#define _mm_mask_fmadd_round_sd( __W, __U, __A, __B, __R) __extension__({\
7518__builtin_ia32_vfmaddsd3_mask ( (__v2df) __A,\
7519 (__v2df) __B,\
7520 (__v2df) __W,\
7521 (__mmask8) __U,\
7522 __R);\
7523})
7524
7525static __inline__ __m128 __DEFAULT_FN_ATTRS
7526_mm_maskz_fmadd_sd (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7527{
7528 return (__m128) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
7529 (__v2df) __B,
7530 (__v2df) __C,
7531 (__mmask8) __U,
7532 _MM_FROUND_CUR_DIRECTION);
7533}
7534
7535#define _mm_maskz_fmadd_round_sd( __U, __A, __B, __C, __R) __extension__ ({\
7536__builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,\
7537 (__v2df) __B,\
7538 (__v2df) __C,\
7539 (__mmask8) __U,\
7540 _MM_FROUND_CUR_DIRECTION);\
7541})
7542
7543static __inline__ __m128 __DEFAULT_FN_ATTRS
7544_mm_mask3_fmadd_sd (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7545{
7546 return (__m128) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
7547 (__v2df) __X,
7548 (__v2df) __Y,
7549 (__mmask8) __U,
7550 _MM_FROUND_CUR_DIRECTION);
7551}
7552
7553#define _mm_mask3_fmadd_round_sd( __W, __X, __Y, __U, __R) __extension__ ({\
7554__builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,\
7555 (__v2df) __X,\
7556 (__v2df) __Y,\
7557 (__mmask8) __U,\
7558 __R);\
7559})
7560
7561static __inline__ __m128 __DEFAULT_FN_ATTRS
7562_mm_mask_fmsub_sd (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7563{
7564 return (__m128) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __A,
7565 (__v2df) -(__B),
7566 (__v2df) __W,
7567 (__mmask8) __U,
7568 _MM_FROUND_CUR_DIRECTION);
7569}
7570
7571#define _mm_mask_fmsub_round_sd( __W, __U, __A, __B, __R) __extension__ ({\
7572__builtin_ia32_vfmaddsd3_mask ( (__v2df) __A,\
7573 (__v2df) -(__B),\
7574 (__v2df) __W,\
7575 (__mmask8) __U,\
7576 __R);\
7577})
7578
7579static __inline__ __m128 __DEFAULT_FN_ATTRS
7580_mm_maskz_fmsub_sd (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7581{
7582 return (__m128) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
7583 (__v2df) __B,
7584 (__v2df) -(__C),
7585 (__mmask8) __U,
7586 _MM_FROUND_CUR_DIRECTION);
7587}
7588
7589#define _mm_maskz_fmsub_round_sd( __U, __A, __B, __C, __R) __extension__ ({\
7590__builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,\
7591 (__v2df) __B,\
7592 (__v2df) -(__C),\
7593 (__mmask8) __U,\
7594 __R);\
7595})
7596
7597static __inline__ __m128 __DEFAULT_FN_ATTRS
7598_mm_mask3_fmsub_sd (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7599{
7600 return (__m128) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
7601 (__v2df) __X,
7602 (__v2df) -(__Y),
7603 (__mmask8) __U,
7604 _MM_FROUND_CUR_DIRECTION);
7605}
7606
7607#define _mm_mask3_fmsub_round_sd( __W, __X, __Y, __U, __R) __extension__ ({\
7608__builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,\
7609 (__v2df) __X,\
7610 (__v2df) -(__Y),\
7611 (__mmask8) __U, __R);\
7612})
7613
7614static __inline__ __m128 __DEFAULT_FN_ATTRS
7615_mm_mask_fnmadd_sd (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7616{
7617 return (__m128) __builtin_ia32_vfmaddsd3_mask ( (__v2df) -(__A),
7618 (__v2df) __B,
7619 (__v2df) __W,
7620 (__mmask8) __U,
7621 _MM_FROUND_CUR_DIRECTION);
7622}
7623
7624#define _mm_mask_fnmadd_round_sd( __W, __U, __A, __B, __R) __extension__ ({\
7625__builtin_ia32_vfmaddsd3_mask ( (__v2df) -(__A),\
7626 (__v2df) __B,\
7627 (__v2df) __W,\
7628 (__mmask8) __U,\
7629 __R);\
7630})
7631
7632static __inline__ __m128 __DEFAULT_FN_ATTRS
7633_mm_maskz_fnmadd_sd (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7634{
7635 return (__m128) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) -(__A),
7636 (__v2df) __B,
7637 (__v2df) __C,
7638 (__mmask8) __U,
7639 _MM_FROUND_CUR_DIRECTION);
7640}
7641
7642#define _mm_maskz_fnmadd_round_sd( __U, __A, __B, __C, __R) __extension__ ({\
7643__builtin_ia32_vfmaddsd3_maskz ( (__v2df) -(__A),\
7644 (__v2df) __B,\
7645 (__v2df) __C,\
7646 (__mmask8) __U,\
7647 __R);\
7648})
7649
7650static __inline__ __m128 __DEFAULT_FN_ATTRS
7651_mm_mask3_fnmadd_sd (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7652{
7653 return (__m128) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) -(__W),
7654 (__v2df) __X,
7655 (__v2df) __Y,
7656 (__mmask8) __U,
7657 _MM_FROUND_CUR_DIRECTION);
7658}
7659
7660#define _mm_mask3_fnmadd_round_sd( __W, __X, __Y, __U, __R) __extension__({\
7661__builtin_ia32_vfmaddsd3_mask3 ((__v2df) -(__W),\
7662 (__v2df) __X,\
7663 (__v2df) __Y,\
7664 (__mmask8) __U,\
7665 __R);\
7666})
7667
7668static __inline__ __m128 __DEFAULT_FN_ATTRS
7669_mm_mask_fnmsub_sd (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7670{
7671 return (__m128) __builtin_ia32_vfmaddsd3_mask ( (__v2df) -(__A),
7672 (__v2df) -(__B),
7673 (__v2df) __W,
7674 (__mmask8) __U,
7675 _MM_FROUND_CUR_DIRECTION);
7676}
7677
7678#define _mm_mask_fnmsub_round_sd( __W, __U, __A, __B, __R) __extension__ ({\
7679__builtin_ia32_vfmaddsd3_mask ( (__v2df) -(__A),\
7680 (__v2df) -(__B),\
7681 (__v2df) __W,\
7682 (__mmask8) __U,\
7683 __R);\
7684})
7685
7686static __inline__ __m128 __DEFAULT_FN_ATTRS
7687_mm_maskz_fnmsub_sd (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7688{
7689 return (__m128) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) -(__A),
7690 (__v2df) __B,
7691 (__v2df) -(__C),
7692 (__mmask8) __U,
7693 _MM_FROUND_CUR_DIRECTION);
7694}
7695
7696#define _mm_maskz_fnmsub_round_sd( __U, __A, __B, __C, __R) __extension__ ({\
7697__builtin_ia32_vfmaddsd3_maskz( (__v2df) -(__A),\
7698 (__v2df) __B,\
7699 (__v2df) -(__C),\
7700 (__mmask8) __U,\
7701 _MM_FROUND_CUR_DIRECTION);\
7702})
7703
7704static __inline__ __m128 __DEFAULT_FN_ATTRS
7705_mm_mask3_fnmsub_sd (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7706{
7707 return (__m128) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) -(__W),
7708 (__v2df) __X,
7709 (__v2df) -(__Y),
7710 (__mmask8) __U,
7711 _MM_FROUND_CUR_DIRECTION);
7712}
7713
7714#define _mm_mask3_fnmsub_round_sd( __W, __X, __Y, __U, __R) __extension__({\
7715__builtin_ia32_vfmaddsd3_mask3 ((__v2df) -(__W),\
7716 (__v2df) __X,\
7717 (__v2df) -(__Y),\
7718 (__mmask8) __U,\
7719 __R);\
7720})
7721
Michael Zuckerman8938e832016-04-25 05:32:35 +00007722#define _mm512_permutex_pd( __X, __M) __extension__ ({ \
7723__builtin_ia32_permdf512_mask ((__v8df)( __X),( __M),\
7724 (__v8df) _mm512_undefined_pd (),\
7725 (__mmask8) -1);\
7726})
7727
7728#define _mm512_mask_permutex_pd( __W, __U, __X, __M) __extension__ ({ \
7729__builtin_ia32_permdf512_mask ((__v8df)( __X),( __M),\
7730 (__v8df)( __W),\
7731 (__mmask8)( __U));\
7732})
7733
7734#define _mm512_maskz_permutex_pd( __U, __X, __M) __extension__ ({ \
7735__builtin_ia32_permdf512_mask ((__v8df)( __X),( __M),\
7736 (__v8df) _mm512_setzero_pd (),\
7737 (__mmask8)( __U));\
7738})
7739
7740#define _mm512_permutex_epi64( __X, __I) __extension__ ({ \
7741__builtin_ia32_permdi512_mask ((__v8di)( __X),( __I),\
7742 (__v8di) _mm512_undefined_epi32 (),\
7743 (__mmask8) (-1));\
7744})
7745
7746#define _mm512_mask_permutex_epi64( __W, __M, __X, __I) __extension__ ({ \
7747__builtin_ia32_permdi512_mask ((__v8di)( __X),( __I),\
7748 (__v8di)( __W),\
7749 (__mmask8)( __M));\
7750})
7751
7752#define _mm512_maskz_permutex_epi64( __M, __X, __I) __extension__ ({ \
7753__builtin_ia32_permdi512_mask ((__v8di)( __X),( __I),\
7754 (__v8di) _mm512_setzero_si512 (),\
7755 (__mmask8)( __M));\
7756})
7757
7758static __inline__ __m512d __DEFAULT_FN_ATTRS
7759_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
7760{
7761 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
7762 (__v8di) __X,
7763 (__v8df) _mm512_undefined_pd (),
7764 (__mmask8) -1);
7765}
7766
7767static __inline__ __m512d __DEFAULT_FN_ATTRS
7768_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
7769{
7770 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
7771 (__v8di) __X,
7772 (__v8df) __W,
7773 (__mmask8) __U);
7774}
7775
7776static __inline__ __m512d __DEFAULT_FN_ATTRS
7777_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
7778{
7779 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
7780 (__v8di) __X,
7781 (__v8df) _mm512_setzero_pd (),
7782 (__mmask8) __U);
7783}
7784
7785static __inline__ __m512i __DEFAULT_FN_ATTRS
7786_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
7787{
7788 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
7789 (__v8di) __X,
7790 (__v8di) _mm512_setzero_si512 (),
7791 __M);
7792}
7793
7794static __inline__ __m512i __DEFAULT_FN_ATTRS
7795_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
7796{
7797 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
7798 (__v8di) __X,
7799 (__v8di) _mm512_undefined_epi32 (),
7800 (__mmask8) -1);
7801}
7802
7803static __inline__ __m512i __DEFAULT_FN_ATTRS
7804_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
7805 __m512i __Y)
7806{
7807 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
7808 (__v8di) __X,
7809 (__v8di) __W,
7810 __M);
7811}
7812
7813static __inline__ __m512 __DEFAULT_FN_ATTRS
7814_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
7815{
7816 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
7817 (__v16si) __X,
7818 (__v16sf) _mm512_undefined_ps (),
7819 (__mmask16) -1);
7820}
7821
7822static __inline__ __m512 __DEFAULT_FN_ATTRS
7823_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
7824{
7825 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
7826 (__v16si) __X,
7827 (__v16sf) __W,
7828 (__mmask16) __U);
7829}
7830
7831static __inline__ __m512 __DEFAULT_FN_ATTRS
7832_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
7833{
7834 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
7835 (__v16si) __X,
7836 (__v16sf) _mm512_setzero_ps (),
7837 (__mmask16) __U);
7838}
7839
7840static __inline__ __m512i __DEFAULT_FN_ATTRS
7841_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
7842{
7843 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
7844 (__v16si) __X,
7845 (__v16si) _mm512_setzero_si512 (),
7846 __M);
7847}
7848
7849static __inline__ __m512i __DEFAULT_FN_ATTRS
7850_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
7851{
7852 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
7853 (__v16si) __X,
7854 (__v16si) _mm512_undefined_epi32 (),
7855 (__mmask16) -1);
7856}
7857
7858static __inline__ __m512i __DEFAULT_FN_ATTRS
7859_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
7860 __m512i __Y)
7861{
7862 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
7863 (__v16si) __X,
7864 (__v16si) __W,
7865 __M);
7866}
7867
Michael Zuckermanfa508e82016-04-25 16:42:29 +00007868static __inline__ __mmask16 __DEFAULT_FN_ATTRS
7869_mm512_kand (__mmask16 __A, __mmask16 __B)
7870{
7871 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
7872}
Michael Zuckerman8938e832016-04-25 05:32:35 +00007873
Michael Zuckermanfa508e82016-04-25 16:42:29 +00007874static __inline__ __mmask16 __DEFAULT_FN_ATTRS
7875_mm512_kandn (__mmask16 __A, __mmask16 __B)
7876{
7877 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
7878}
7879
7880static __inline__ __mmask16 __DEFAULT_FN_ATTRS
7881_mm512_kor (__mmask16 __A, __mmask16 __B)
7882{
7883 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
7884}
7885
7886static __inline__ int __DEFAULT_FN_ATTRS
7887_mm512_kortestc (__mmask16 __A, __mmask16 __B)
7888{
7889 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
7890 (__mmask16) __B);
7891}
7892
7893static __inline__ int __DEFAULT_FN_ATTRS
7894_mm512_kortestz (__mmask16 __A, __mmask16 __B)
7895{
7896 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
7897 (__mmask16) __B);
7898}
7899
7900static __inline__ __mmask16 __DEFAULT_FN_ATTRS
7901_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
7902{
7903 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
7904}
7905
7906static __inline__ __mmask16 __DEFAULT_FN_ATTRS
7907_mm512_kxnor (__mmask16 __A, __mmask16 __B)
7908{
7909 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
7910}
7911
7912static __inline__ __mmask16 __DEFAULT_FN_ATTRS
7913_mm512_kxor (__mmask16 __A, __mmask16 __B)
7914{
7915 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
7916}
Michael Zuckerman8938e832016-04-25 05:32:35 +00007917
Michael Zuckerman7c85a8c2016-04-27 10:44:15 +00007918static __inline__ void __DEFAULT_FN_ATTRS
7919_mm512_stream_si512 (__m512i * __P, __m512i __A)
7920{
7921 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
7922}
7923
7924static __inline__ __m512i __DEFAULT_FN_ATTRS
7925_mm512_stream_load_si512 (void *__P)
7926{
7927 return __builtin_ia32_movntdqa512 ((__v8di *)__P);
7928}
7929
7930static __inline__ void __DEFAULT_FN_ATTRS
7931_mm512_stream_pd (double *__P, __m512d __A)
7932{
7933 __builtin_ia32_movntpd512 (__P, (__v8df) __A);
7934}
7935
7936static __inline__ void __DEFAULT_FN_ATTRS
7937_mm512_stream_ps (float *__P, __m512 __A)
7938{
7939 __builtin_ia32_movntps512 (__P, (__v16sf) __A);
7940}
7941
Michael Zuckerman41f5a372016-04-29 08:52:02 +00007942static __inline__ __m512d __DEFAULT_FN_ATTRS
7943_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
7944{
7945 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
7946 (__v8df) __W,
7947 (__mmask8) __U);
7948}
7949
7950static __inline__ __m512d __DEFAULT_FN_ATTRS
7951_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
7952{
7953 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
7954 (__v8df)
7955 _mm512_setzero_pd (),
7956 (__mmask8) __U);
7957}
7958
7959static __inline__ __m512i __DEFAULT_FN_ATTRS
7960_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
7961{
7962 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
7963 (__v8di) __W,
7964 (__mmask8) __U);
7965}
7966
7967static __inline__ __m512i __DEFAULT_FN_ATTRS
7968_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
7969{
7970 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
7971 (__v8di)
7972 _mm512_setzero_si512 (),
7973 (__mmask8) __U);
7974}
7975
7976static __inline__ __m512 __DEFAULT_FN_ATTRS
7977_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
7978{
7979 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
7980 (__v16sf) __W,
7981 (__mmask16) __U);
7982}
7983
7984static __inline__ __m512 __DEFAULT_FN_ATTRS
7985_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
7986{
7987 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
7988 (__v16sf)
7989 _mm512_setzero_ps (),
7990 (__mmask16) __U);
7991}
7992
7993static __inline__ __m512i __DEFAULT_FN_ATTRS
7994_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
7995{
7996 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
7997 (__v16si) __W,
7998 (__mmask16) __U);
7999}
8000
8001static __inline__ __m512i __DEFAULT_FN_ATTRS
8002_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
8003{
8004 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8005 (__v16si)
8006 _mm512_setzero_si512 (),
8007 (__mmask16) __U);
8008}
8009
Michael Zuckerman0b9d1052016-04-29 11:01:16 +00008010#define _mm_cmp_round_ss_mask( __X, __Y, __P, __R) __extension__ ({ \
8011__builtin_ia32_cmpss_mask ((__v4sf)( __X),\
8012 (__v4sf)( __Y), __P,\
8013 (__mmask8) -1, __R);\
8014})
8015
8016#define _mm_mask_cmp_round_ss_mask( __M, __X, __Y, __P, __R) __extension__ ({ \
8017__builtin_ia32_cmpss_mask ((__v4sf)( __X),\
8018 (__v4sf)( __Y), __P,\
8019 (__mmask8)( __M), __R);\
8020})
8021
8022#define _mm_cmp_ss_mask( __X, __Y, __P) __extension__ ({ \
8023__builtin_ia32_cmpss_mask ((__v4sf)( __X),\
8024 (__v4sf)( __Y),( __P),\
8025 (__mmask8) -1,\
8026 _MM_FROUND_CUR_DIRECTION);\
8027})
8028
8029#define _mm_mask_cmp_ss_mask( __M, __X, __Y, __P) __extension__ ({ \
8030__builtin_ia32_cmpss_mask ((__v4sf)( __X),\
8031 (__v4sf)( __Y),( __P),\
8032 (__mmask8)( __M),\
8033 _MM_FROUND_CUR_DIRECTION);\
8034})
8035
8036#define _mm_cmp_round_sd_mask( __X, __Y, __P,__R) __extension__ ({ \
8037__builtin_ia32_cmpsd_mask ((__v2df)( __X),\
8038 (__v2df)( __Y), __P,\
8039 (__mmask8) -1, __R);\
8040})
8041
8042#define _mm_mask_cmp_round_sd_mask( __M, __X, __Y, __P, __R) __extension__ ({ \
8043__builtin_ia32_cmpsd_mask ((__v2df)( __X),\
8044 (__v2df)( __Y), __P,\
8045 (__mmask8)( __M), __R);\
8046})
8047
8048#define _mm_cmp_sd_mask( __X, __Y, __P) __extension__ ({ \
8049__builtin_ia32_cmpsd_mask ((__v2df)( __X),\
8050 (__v2df)( __Y),( __P),\
8051 (__mmask8) -1,\
8052 _MM_FROUND_CUR_DIRECTION);\
8053})
8054
8055#define _mm_mask_cmp_sd_mask( __M, __X, __Y, __P) __extension__ ({ \
8056__builtin_ia32_cmpsd_mask ((__v2df)( __X),\
8057 (__v2df)( __Y),( __P),\
8058 (__mmask8)( __M),\
8059 _MM_FROUND_CUR_DIRECTION);\
8060})
8061
Michael Zuckermanac1e5192016-05-01 14:43:43 +00008062static __inline__ __m512 __DEFAULT_FN_ATTRS
8063_mm512_movehdup_ps (__m512 __A)
8064{
8065 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
8066 (__v16sf)
8067 _mm512_undefined_ps (),
8068 (__mmask16) -1);
8069}
8070
8071static __inline__ __m512 __DEFAULT_FN_ATTRS
8072_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
8073{
8074 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
8075 (__v16sf) __W,
8076 (__mmask16) __U);
8077}
8078
8079static __inline__ __m512 __DEFAULT_FN_ATTRS
8080_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
8081{
8082 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
8083 (__v16sf)
8084 _mm512_setzero_ps (),
8085 (__mmask16) __U);
8086}
8087
8088static __inline__ __m512 __DEFAULT_FN_ATTRS
8089_mm512_moveldup_ps (__m512 __A)
8090{
8091 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
8092 (__v16sf)
8093 _mm512_undefined_ps (),
8094 (__mmask16) -1);
8095}
8096
8097static __inline__ __m512 __DEFAULT_FN_ATTRS
8098_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
8099{
8100 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
8101 (__v16sf) __W,
8102 (__mmask16) __U);
8103}
8104
8105static __inline__ __m512 __DEFAULT_FN_ATTRS
8106_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
8107{
8108 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
8109 (__v16sf)
8110 _mm512_setzero_ps (),
8111 (__mmask16) __U);
8112}
8113
Michael Zuckermanc62f27e2016-05-02 07:35:27 +00008114#define _mm512_shuffle_epi32( __A, __I) __extension__ ({ \
8115__builtin_ia32_pshufd512_mask ((__v16si)( __A),\
8116 ( __I),\
8117 (__v16si) _mm512_undefined_epi32 (),\
8118 (__mmask16) -1);\
8119})
8120
8121#define _mm512_mask_shuffle_epi32( __W, __U, __A, __I) __extension__ ({ \
8122__builtin_ia32_pshufd512_mask ((__v16si)( __A),\
8123 ( __I),\
8124 (__v16si)( __W),\
8125 (__mmask16)( __U));\
8126})
8127
8128#define _mm512_maskz_shuffle_epi32( __U, __A, __I) __extension__ ({ \
8129__builtin_ia32_pshufd512_mask ((__v16si)( __A),\
8130 ( __I),\
8131 (__v16si) _mm512_setzero_si512 (),\
8132 (__mmask16)( __U));\
8133})
8134
Michael Zuckerman6a0e0872016-05-02 08:36:41 +00008135static __inline__ __m512d __DEFAULT_FN_ATTRS
8136_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
8137{
8138 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8139 (__v8df) __W,
8140 (__mmask8) __U);
8141}
8142
8143static __inline__ __m512d __DEFAULT_FN_ATTRS
8144_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
8145{
8146 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8147 (__v8df) _mm512_setzero_pd (),
8148 (__mmask8) __U);
8149}
8150
8151static __inline__ __m512i __DEFAULT_FN_ATTRS
8152_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8153{
8154 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8155 (__v8di) __W,
8156 (__mmask8) __U);
8157}
8158
8159static __inline__ __m512i __DEFAULT_FN_ATTRS
8160_mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A)
8161{
8162 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8163 (__v8di) _mm512_setzero_pd (),
8164 (__mmask8) __U);
8165}
8166
8167static __inline__ __m512d __DEFAULT_FN_ATTRS
8168_mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
8169{
8170 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8171 (__v8df) __W,
8172 (__mmask8) __U);
8173}
8174
8175static __inline__ __m512d __DEFAULT_FN_ATTRS
8176_mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
8177{
8178 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8179 (__v8df) _mm512_setzero_pd(),
8180 (__mmask8) __U);
8181}
8182
8183static __inline__ __m512i __DEFAULT_FN_ATTRS
8184_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
8185{
8186 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8187 (__v8di) __W,
8188 (__mmask8) __U);
8189}
8190
8191static __inline__ __m512i __DEFAULT_FN_ATTRS
8192_mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
8193{
8194 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8195 (__v8di) _mm512_setzero_pd(),
8196 (__mmask8) __U);
8197}
8198
8199static __inline__ __m512 __DEFAULT_FN_ATTRS
8200_mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
8201{
8202 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8203 (__v16sf) __W,
8204 (__mmask16) __U);
8205}
8206
8207static __inline__ __m512 __DEFAULT_FN_ATTRS
8208_mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
8209{
8210 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8211 (__v16sf) _mm512_setzero_ps(),
8212 (__mmask16) __U);
8213}
8214
8215static __inline__ __m512i __DEFAULT_FN_ATTRS
8216_mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
8217{
8218 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8219 (__v16si) __W,
8220 (__mmask16) __U);
8221}
8222
8223static __inline__ __m512i __DEFAULT_FN_ATTRS
8224_mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
8225{
8226 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8227 (__v16si) _mm512_setzero_ps(),
8228 (__mmask16) __U);
8229}
8230
8231static __inline__ __m512 __DEFAULT_FN_ATTRS
8232_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
8233{
8234 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8235 (__v16sf) __W,
8236 (__mmask16) __U);
8237}
8238
8239static __inline__ __m512 __DEFAULT_FN_ATTRS
8240_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
8241{
8242 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8243 (__v16sf) _mm512_setzero_ps(),
8244 (__mmask16) __U);
8245}
8246
8247static __inline__ __m512i __DEFAULT_FN_ATTRS
8248_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8249{
8250 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8251 (__v16si) __W,
8252 (__mmask16) __U);
8253}
8254
8255static __inline__ __m512i __DEFAULT_FN_ATTRS
8256_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
8257{
8258 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8259 (__v16si) _mm512_setzero_ps(),
8260 (__mmask16) __U);
8261}
8262
Michael Zuckermand6e68ce2016-05-02 09:42:31 +00008263#define _mm512_cvt_roundps_pd( __A, __R) __extension__ ({ \
8264__builtin_ia32_cvtps2pd512_mask ((__v8sf)( __A),\
8265 (__v8df)\
8266 _mm512_undefined_pd (),\
8267 (__mmask8) -1,( __R));\
8268})
8269
8270#define _mm512_mask_cvt_roundps_pd( __W, __U, __A, __R) __extension__ ({ \
8271__builtin_ia32_cvtps2pd512_mask ((__v8sf)( __A),\
8272 (__v8df)( __W),\
8273 (__mmask8)( __U),( __R));\
8274})
8275
8276#define _mm512_maskz_cvt_roundps_pd( __U, __A, __R) __extension__ ({ \
8277__builtin_ia32_cvtps2pd512_mask ((__v8sf)( __A),\
8278 (__v8df)\
8279 _mm512_setzero_pd (),\
8280 (__mmask8)( __U),( __R));\
8281})
8282
8283static __inline__ __m512d __DEFAULT_FN_ATTRS
8284_mm512_cvtps_pd (__m256 __A)
8285{
8286 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8287 (__v8df)
8288 _mm512_undefined_pd (),
8289 (__mmask8) -1,
8290 _MM_FROUND_CUR_DIRECTION);
8291}
8292
8293static __inline__ __m512d __DEFAULT_FN_ATTRS
8294_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
8295{
8296 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8297 (__v8df) __W,
8298 (__mmask8) __U,
8299 _MM_FROUND_CUR_DIRECTION);
8300}
8301
8302static __inline__ __m512d __DEFAULT_FN_ATTRS
8303_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
8304{
8305 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8306 (__v8df)
8307 _mm512_setzero_pd (),
8308 (__mmask8) __U,
8309 _MM_FROUND_CUR_DIRECTION);
8310}
8311
Michael Zuckerman5f0e96e2016-05-02 14:02:01 +00008312static __inline__ __m512d __DEFAULT_FN_ATTRS
8313_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
8314{
8315 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
8316 (__v8df) __W,
8317 (__mmask8) __U);
8318}
8319
8320static __inline__ __m512d __DEFAULT_FN_ATTRS
8321_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
8322{
8323 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
8324 (__v8df)
8325 _mm512_setzero_pd (),
8326 (__mmask8) __U);
8327}
8328
8329static __inline__ __m512 __DEFAULT_FN_ATTRS
8330_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
8331{
8332 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
8333 (__v16sf) __W,
8334 (__mmask16) __U);
8335}
8336
8337static __inline__ __m512 __DEFAULT_FN_ATTRS
8338_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
8339{
8340 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
8341 (__v16sf)
8342 _mm512_setzero_ps (),
8343 (__mmask16) __U);
8344}
8345
Michael Zuckerman708e7592016-05-03 10:42:46 +00008346static __inline__ void __DEFAULT_FN_ATTRS
8347_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
8348{
8349 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
8350 (__mmask8) __U);
8351}
8352
8353static __inline__ void __DEFAULT_FN_ATTRS
8354_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
8355{
8356 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
8357 (__mmask8) __U);
8358}
8359
8360static __inline__ void __DEFAULT_FN_ATTRS
8361_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
8362{
8363 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
8364 (__mmask16) __U);
8365}
8366
8367static __inline__ void __DEFAULT_FN_ATTRS
8368_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
8369{
8370 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
8371 (__mmask16) __U);
8372}
Michael Zuckerman5f0e96e2016-05-02 14:02:01 +00008373
Michael Zuckermane6f73892016-05-04 08:55:11 +00008374#define _mm_cvt_roundsd_ss( __A, __B, __R) __extension__ ({ \
8375__builtin_ia32_cvtsd2ss_round_mask ((__v4sf)( __A),\
8376 (__v2df)( __B),\
8377 (__v4sf) _mm_undefined_ps (),\
8378 (__mmask8) -1,\
8379 ( __R));\
8380})
8381
8382#define _mm_mask_cvt_roundsd_ss( __W, __U, __A, __B, __R) __extension__ ({ \
8383__builtin_ia32_cvtsd2ss_round_mask ((__v4sf)( __A),\
8384 (__v2df)( __B),\
8385 (__v4sf) __W,\
8386 (__mmask8) __U,\
8387 ( __R));\
8388})
8389
8390#define _mm_maskz_cvt_roundsd_ss( __U, __A, __B, __R) __extension__ ({ \
8391__builtin_ia32_cvtsd2ss_round_mask ((__v4sf)( __A),\
8392 (__v2df)( __B),\
8393 (__v4sf) _mm_setzero_ps (),\
8394 (__mmask8) __U,\
8395 ( __R));\
8396})
8397
8398#define _mm_cvt_roundi64_sd( __A, __B, __R) __extension__ ({ \
8399__builtin_ia32_cvtsi2sd64 ((__v2df)( __A),( __B),( __R));\
8400})
8401
8402#define _mm_cvt_roundsi64_sd( __A, __B, __R) __extension__ ({ \
8403__builtin_ia32_cvtsi2sd64 ((__v2df)( __A),( __B),( __R));\
8404})
8405
8406#define _mm_cvt_roundsi32_ss( __A, __B, __R) __extension__ ({ \
8407__builtin_ia32_cvtsi2ss32 ((__v4sf)( __A),( __B),( __R));\
8408})
8409
8410#define _mm_cvt_roundi32_ss( __A, __B, __R) __extension__ ({ \
8411__builtin_ia32_cvtsi2ss32 ((__v4sf)( __A),( __B),( __R));\
8412})
8413
8414#define _mm_cvt_roundsi64_ss( __A, __B, __R) __extension__ ({ \
8415__builtin_ia32_cvtsi2ss64 ((__v4sf)( __A),( __B),( __R));\
8416})
8417
8418#define _mm_cvt_roundi64_ss( __A, __B, __R) __extension__ ({ \
8419__builtin_ia32_cvtsi2ss64 ((__v4sf)( __A),( __B),( __R));\
8420})
8421
8422#define _mm_cvt_roundss_sd( __A, __B, __R) __extension__ ({ \
8423__builtin_ia32_cvtss2sd_round_mask ((__v2df)( __A),\
8424 (__v4sf)( __B),\
8425 (__v2df) _mm_undefined_pd (),\
8426 (__mmask8)-1,\
8427 ( __R));\
8428})
8429
8430#define _mm_mask_cvt_roundss_sd(__W, __U,__A, __B, __R) __extension__ ({ \
8431__builtin_ia32_cvtss2sd_round_mask ((__v2df)( __A),\
8432 (__v4sf)( __B),\
8433 (__v2df) __W,\
8434 (__mmask8) __U,\
8435 ( __R));\
8436})
8437
8438#define _mm_maskz_cvt_roundss_sd( __U,__A, __B, __R) __extension__ ({ \
8439__builtin_ia32_cvtss2sd_round_mask ((__v2df)( __A),\
8440 (__v4sf)( __B),\
8441 (__v2df) _mm_setzero_pd(),\
8442 (__mmask8) __U,\
8443 ( __R));\
8444})
8445
8446static __inline__ __m128d __DEFAULT_FN_ATTRS
8447_mm_cvtu32_sd (__m128d __A, unsigned __B)
8448{
8449 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
8450}
8451
8452#define _mm_cvt_roundu64_sd( __A, __B, __R) __extension__ ({ \
8453__builtin_ia32_cvtusi2sd64 ((__v2df)( __A),( __B),( __R));\
8454})
8455
8456static __inline__ __m128d __DEFAULT_FN_ATTRS
8457_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
8458{
8459 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
8460 _MM_FROUND_CUR_DIRECTION);
8461}
8462
8463#define _mm_cvt_roundu32_ss( __A, __B, __R) __extension__ ({ \
8464__builtin_ia32_cvtusi2ss32 ((__v4sf)( __A),( __B),( __R));\
8465})
8466
8467static __inline__ __m128 __DEFAULT_FN_ATTRS
8468_mm_cvtu32_ss (__m128 __A, unsigned __B)
8469{
8470 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
8471 _MM_FROUND_CUR_DIRECTION);
8472}
8473
8474#define _mm_cvt_roundu64_ss( __A, __B, __R) __extension__ ({ \
8475__builtin_ia32_cvtusi2ss64 ((__v4sf)( __A),( __B),( __R));\
8476})
8477
8478static __inline__ __m128 __DEFAULT_FN_ATTRS
8479_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
8480{
8481 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
8482 _MM_FROUND_CUR_DIRECTION);
8483}
8484
Michael Kupersteine45af542015-06-30 13:36:19 +00008485#undef __DEFAULT_FN_ATTRS
Eric Christopher4d1851682015-06-17 07:09:20 +00008486
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00008487#endif // __AVX512FINTRIN_H