blob: 0a3cf301b682f6ed7f3f59611f41b06b733d5e7c [file] [log] [blame]
Craig Topper991d4992015-11-03 06:16:31 +00001/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00002 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23#ifndef __IMMINTRIN_H
24#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
25#endif
26
27#ifndef __AVX512FINTRIN_H
28#define __AVX512FINTRIN_H
29
Michael Zuckerman6f08ceb2016-05-26 06:54:52 +000030typedef char __v64qi __attribute__((__vector_size__(64)));
31typedef short __v32hi __attribute__((__vector_size__(64)));
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000032typedef double __v8df __attribute__((__vector_size__(64)));
33typedef float __v16sf __attribute__((__vector_size__(64)));
34typedef long long __v8di __attribute__((__vector_size__(64)));
35typedef int __v16si __attribute__((__vector_size__(64)));
36
Craig Topper6a77b622016-06-04 05:43:41 +000037/* Unsigned types */
38typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
39typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
40typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
41typedef unsigned int __v16su __attribute__((__vector_size__(64)));
42
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000043typedef float __m512 __attribute__((__vector_size__(64)));
44typedef double __m512d __attribute__((__vector_size__(64)));
45typedef long long __m512i __attribute__((__vector_size__(64)));
46
47typedef unsigned char __mmask8;
48typedef unsigned short __mmask16;
49
50/* Rounding mode macros. */
51#define _MM_FROUND_TO_NEAREST_INT 0x00
52#define _MM_FROUND_TO_NEG_INF 0x01
53#define _MM_FROUND_TO_POS_INF 0x02
54#define _MM_FROUND_TO_ZERO 0x03
55#define _MM_FROUND_CUR_DIRECTION 0x04
56
Asaf Badouh2f344b72016-08-07 10:43:04 +000057/* Constants for integer comparison predicates */
58typedef enum {
59 _MM_CMPINT_EQ, /* Equal */
60 _MM_CMPINT_LT, /* Less than */
61 _MM_CMPINT_LE, /* Less than or Equal */
62 _MM_CMPINT_UNUSED,
63 _MM_CMPINT_NE, /* Not Equal */
64 _MM_CMPINT_NLT, /* Not Less than */
65#define _MM_CMPINT_GE _MM_CMPINT_NLT /* Greater than or Equal */
66 _MM_CMPINT_NLE /* Not Less than or Equal */
67#define _MM_CMPINT_GT _MM_CMPINT_NLE /* Greater than */
68} _MM_CMPINT_ENUM;
69
Michael Zuckermandef78752016-03-28 12:23:09 +000070typedef enum
71{
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +000072 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
73 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
74 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
75 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
76 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
77 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
78 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
79 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
80 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
81 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
82 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
83 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
84 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
85 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
86 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
87 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
88 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
89 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
90 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
91 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
92 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
93 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
94 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
95 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
96 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
97 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
98 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
99 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
100 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
101 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
102 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
103 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
104 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
105 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
106 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
107 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
108 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
109 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
110 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
111 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
112 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
113 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
114 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
115 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
116 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
117 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
118 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
119 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
120 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
121 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
122 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
123 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
124 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
125 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
126 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
127 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
128 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
129 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
130 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
131 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
132 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
133 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
134 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
135 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
136 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
137 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
138 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
139 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
140 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
141 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
142 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
143 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
144 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
145 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
146 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
147 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
148 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
149 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
150 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
151 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
152 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
153 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
154 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
155 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
156 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
157 _MM_PERM_DDDD = 0xFF
158} _MM_PERM_ENUM;
159
160typedef enum
161{
Michael Zuckermandef78752016-03-28 12:23:09 +0000162 _MM_MANT_NORM_1_2, /* interval [1, 2) */
163 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
164 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
165 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
166} _MM_MANTISSA_NORM_ENUM;
167
168typedef enum
169{
170 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
171 _MM_MANT_SIGN_zero, /* sign = 0 */
172 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
173} _MM_MANTISSA_SIGN_ENUM;
174
Eric Christopher4d1851682015-06-17 07:09:20 +0000175/* Define the default attributes for the functions in this file. */
Michael Kupersteine45af542015-06-30 13:36:19 +0000176#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
Eric Christopher4d1851682015-06-17 07:09:20 +0000177
Adam Nemet0d5bb552014-07-28 17:14:40 +0000178/* Create vectors with repeated elements */
179
Michael Kupersteine45af542015-06-30 13:36:19 +0000180static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +0000181_mm512_setzero_si512(void)
182{
183 return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
184}
185
Michael Zuckermanf36f6eb2016-06-05 15:12:52 +0000186#define _mm512_setzero_epi32 _mm512_setzero_si512
187
Simon Pilgrim5aba9922015-08-26 21:17:12 +0000188static __inline__ __m512d __DEFAULT_FN_ATTRS
Craig Topper3a0c7262016-06-09 05:14:28 +0000189_mm512_undefined_pd(void)
Simon Pilgrim5aba9922015-08-26 21:17:12 +0000190{
191 return (__m512d)__builtin_ia32_undef512();
192}
193
194static __inline__ __m512 __DEFAULT_FN_ATTRS
Craig Topper3a0c7262016-06-09 05:14:28 +0000195_mm512_undefined(void)
Simon Pilgrim5aba9922015-08-26 21:17:12 +0000196{
197 return (__m512)__builtin_ia32_undef512();
198}
199
200static __inline__ __m512 __DEFAULT_FN_ATTRS
Craig Topper3a0c7262016-06-09 05:14:28 +0000201_mm512_undefined_ps(void)
Simon Pilgrim5aba9922015-08-26 21:17:12 +0000202{
203 return (__m512)__builtin_ia32_undef512();
204}
205
206static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper3a0c7262016-06-09 05:14:28 +0000207_mm512_undefined_epi32(void)
Simon Pilgrim5aba9922015-08-26 21:17:12 +0000208{
209 return (__m512i)__builtin_ia32_undef512();
210}
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000211
Michael Zuckerman8c2900f2016-04-27 11:43:14 +0000212static __inline__ __m512i __DEFAULT_FN_ATTRS
213_mm512_broadcastd_epi32 (__m128i __A)
214{
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000215 return (__m512i)__builtin_shufflevector((__v4si) __A,
216 (__v4si)_mm_undefined_si128(),
217 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +0000218}
219
220static __inline__ __m512i __DEFAULT_FN_ATTRS
221_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
222{
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000223 return (__m512i)__builtin_ia32_selectd_512(__M,
224 (__v16si) _mm512_broadcastd_epi32(__A),
225 (__v16si) __O);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +0000226}
227
228static __inline__ __m512i __DEFAULT_FN_ATTRS
229_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
230{
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000231 return (__m512i)__builtin_ia32_selectd_512(__M,
232 (__v16si) _mm512_broadcastd_epi32(__A),
233 (__v16si) _mm512_setzero_si512());
Michael Zuckerman8c2900f2016-04-27 11:43:14 +0000234}
235
236static __inline__ __m512i __DEFAULT_FN_ATTRS
237_mm512_broadcastq_epi64 (__m128i __A)
238{
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000239 return (__m512i)__builtin_shufflevector((__v2di) __A,
240 (__v2di) _mm_undefined_si128(),
241 0, 0, 0, 0, 0, 0, 0, 0);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +0000242}
243
244static __inline__ __m512i __DEFAULT_FN_ATTRS
245_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
246{
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000247 return (__m512i)__builtin_ia32_selectq_512(__M,
248 (__v8di) _mm512_broadcastq_epi64(__A),
249 (__v8di) __O);
250
Michael Zuckerman8c2900f2016-04-27 11:43:14 +0000251}
252
253static __inline__ __m512i __DEFAULT_FN_ATTRS
254_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
255{
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000256 return (__m512i)__builtin_ia32_selectq_512(__M,
257 (__v8di) _mm512_broadcastq_epi64(__A),
258 (__v8di) _mm512_setzero_si512());
Michael Zuckerman8c2900f2016-04-27 11:43:14 +0000259}
Simon Pilgrim5aba9922015-08-26 21:17:12 +0000260
Adam Nemet0d5bb552014-07-28 17:14:40 +0000261
Michael Kupersteine45af542015-06-30 13:36:19 +0000262static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +0000263_mm512_setzero_ps(void)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000264{
265 return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
266 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
267}
Michael Zuckermanf36f6eb2016-06-05 15:12:52 +0000268
269#define _mm512_setzero _mm512_setzero_ps
270
Michael Kupersteine45af542015-06-30 13:36:19 +0000271static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +0000272_mm512_setzero_pd(void)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000273{
274 return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
275}
Adam Nemet0d5bb552014-07-28 17:14:40 +0000276
Michael Kupersteine45af542015-06-30 13:36:19 +0000277static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemetf42e7a22014-07-30 16:51:22 +0000278_mm512_set1_ps(float __w)
279{
280 return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
281 __w, __w, __w, __w, __w, __w, __w, __w };
282}
283
Michael Kupersteine45af542015-06-30 13:36:19 +0000284static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemetf42e7a22014-07-30 16:51:22 +0000285_mm512_set1_pd(double __w)
286{
287 return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
288}
289
Michael Kupersteine45af542015-06-30 13:36:19 +0000290static __inline __m512i __DEFAULT_FN_ATTRS
Michael Zuckerman6f08ceb2016-05-26 06:54:52 +0000291_mm512_set1_epi8(char __w)
292{
293 return (__m512i)(__v64qi){ __w, __w, __w, __w, __w, __w, __w, __w,
294 __w, __w, __w, __w, __w, __w, __w, __w,
295 __w, __w, __w, __w, __w, __w, __w, __w,
296 __w, __w, __w, __w, __w, __w, __w, __w,
297 __w, __w, __w, __w, __w, __w, __w, __w,
298 __w, __w, __w, __w, __w, __w, __w, __w,
299 __w, __w, __w, __w, __w, __w, __w, __w,
300 __w, __w, __w, __w, __w, __w, __w, __w };
301}
302
303static __inline __m512i __DEFAULT_FN_ATTRS
304_mm512_set1_epi16(short __w)
305{
306 return (__m512i)(__v32hi){ __w, __w, __w, __w, __w, __w, __w, __w,
307 __w, __w, __w, __w, __w, __w, __w, __w,
308 __w, __w, __w, __w, __w, __w, __w, __w,
309 __w, __w, __w, __w, __w, __w, __w, __w };
310}
311
312static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemetf42e7a22014-07-30 16:51:22 +0000313_mm512_set1_epi32(int __s)
314{
315 return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
316 __s, __s, __s, __s, __s, __s, __s, __s };
317}
318
Michael Kupersteine45af542015-06-30 13:36:19 +0000319static __inline __m512i __DEFAULT_FN_ATTRS
Jina Nahias3ad702a2017-09-19 11:00:27 +0000320_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
321{
322 return (__m512i)__builtin_ia32_selectd_512(__M,
323 (__v16si)_mm512_set1_epi32(__A),
324 (__v16si)_mm512_setzero_si512());
325}
326
327static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemetf42e7a22014-07-30 16:51:22 +0000328_mm512_set1_epi64(long long __d)
329{
330 return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
331}
332
Jina Nahias3ad702a2017-09-19 11:00:27 +0000333static __inline __m512i __DEFAULT_FN_ATTRS
334_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
335{
336 return (__m512i)__builtin_ia32_selectq_512(__M,
337 (__v8di)_mm512_set1_epi64(__A),
338 (__v8di)_mm512_setzero_si512());
339}
Jina Nahias3ad702a2017-09-19 11:00:27 +0000340
Michael Kupersteine45af542015-06-30 13:36:19 +0000341static __inline__ __m512 __DEFAULT_FN_ATTRS
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000342_mm512_broadcastss_ps(__m128 __A)
Adam Nemet4abc07c2014-08-13 00:29:01 +0000343{
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000344 return (__m512)__builtin_shufflevector((__v4sf) __A,
345 (__v4sf)_mm_undefined_ps(),
346 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
Adam Nemet4abc07c2014-08-13 00:29:01 +0000347}
348
Michael Zuckerman95721ac2016-06-05 15:43:30 +0000349static __inline __m512i __DEFAULT_FN_ATTRS
350_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
351{
352 return (__m512i)(__v16si)
353 { __D, __C, __B, __A, __D, __C, __B, __A,
354 __D, __C, __B, __A, __D, __C, __B, __A };
355}
356
357static __inline __m512i __DEFAULT_FN_ATTRS
358_mm512_set4_epi64 (long long __A, long long __B, long long __C,
359 long long __D)
360{
361 return (__m512i) (__v8di)
362 { __D, __C, __B, __A, __D, __C, __B, __A };
363}
364
365static __inline __m512d __DEFAULT_FN_ATTRS
366_mm512_set4_pd (double __A, double __B, double __C, double __D)
367{
368 return (__m512d)
369 { __D, __C, __B, __A, __D, __C, __B, __A };
370}
371
372static __inline __m512 __DEFAULT_FN_ATTRS
373_mm512_set4_ps (float __A, float __B, float __C, float __D)
374{
375 return (__m512)
376 { __D, __C, __B, __A, __D, __C, __B, __A,
377 __D, __C, __B, __A, __D, __C, __B, __A };
378}
379
380#define _mm512_setr4_epi32(e0,e1,e2,e3) \
Craig Topper60589492016-06-08 06:08:04 +0000381 _mm512_set4_epi32((e3),(e2),(e1),(e0))
Michael Zuckerman95721ac2016-06-05 15:43:30 +0000382
383#define _mm512_setr4_epi64(e0,e1,e2,e3) \
Craig Topper60589492016-06-08 06:08:04 +0000384 _mm512_set4_epi64((e3),(e2),(e1),(e0))
Michael Zuckerman95721ac2016-06-05 15:43:30 +0000385
386#define _mm512_setr4_pd(e0,e1,e2,e3) \
Craig Topper60589492016-06-08 06:08:04 +0000387 _mm512_set4_pd((e3),(e2),(e1),(e0))
Michael Zuckerman95721ac2016-06-05 15:43:30 +0000388
389#define _mm512_setr4_ps(e0,e1,e2,e3) \
Craig Topper60589492016-06-08 06:08:04 +0000390 _mm512_set4_ps((e3),(e2),(e1),(e0))
Michael Zuckerman95721ac2016-06-05 15:43:30 +0000391
Michael Kupersteine45af542015-06-30 13:36:19 +0000392static __inline__ __m512d __DEFAULT_FN_ATTRS
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000393_mm512_broadcastsd_pd(__m128d __A)
Adam Nemet4abc07c2014-08-13 00:29:01 +0000394{
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000395 return (__m512d)__builtin_shufflevector((__v2df) __A,
396 (__v2df) _mm_undefined_pd(),
397 0, 0, 0, 0, 0, 0, 0, 0);
Adam Nemet4abc07c2014-08-13 00:29:01 +0000398}
399
Adam Nemetc871ff92014-07-30 16:51:24 +0000400/* Cast between vector types */
401
Michael Kupersteine45af542015-06-30 13:36:19 +0000402static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemetc871ff92014-07-30 16:51:24 +0000403_mm512_castpd256_pd512(__m256d __a)
404{
405 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
406}
407
Michael Kupersteine45af542015-06-30 13:36:19 +0000408static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemetc871ff92014-07-30 16:51:24 +0000409_mm512_castps256_ps512(__m256 __a)
410{
411 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7,
412 -1, -1, -1, -1, -1, -1, -1, -1);
413}
414
Michael Kupersteine45af542015-06-30 13:36:19 +0000415static __inline __m128d __DEFAULT_FN_ATTRS
Adam Nemetc871ff92014-07-30 16:51:24 +0000416_mm512_castpd512_pd128(__m512d __a)
417{
418 return __builtin_shufflevector(__a, __a, 0, 1);
419}
420
Michael Zuckermand5cc6cd2016-05-25 14:04:21 +0000421static __inline __m256d __DEFAULT_FN_ATTRS
422_mm512_castpd512_pd256 (__m512d __A)
423{
424 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
425}
426
Michael Kupersteine45af542015-06-30 13:36:19 +0000427static __inline __m128 __DEFAULT_FN_ATTRS
Adam Nemetc871ff92014-07-30 16:51:24 +0000428_mm512_castps512_ps128(__m512 __a)
429{
430 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
431}
432
Michael Zuckermand5cc6cd2016-05-25 14:04:21 +0000433static __inline __m256 __DEFAULT_FN_ATTRS
434_mm512_castps512_ps256 (__m512 __A)
435{
436 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
437}
438
439static __inline __m512 __DEFAULT_FN_ATTRS
440_mm512_castpd_ps (__m512d __A)
441{
442 return (__m512) (__A);
443}
444
445static __inline __m512i __DEFAULT_FN_ATTRS
446_mm512_castpd_si512 (__m512d __A)
447{
448 return (__m512i) (__A);
449}
Michael Zuckermanc6677032016-05-03 14:26:52 +0000450
451static __inline__ __m512d __DEFAULT_FN_ATTRS
452_mm512_castpd128_pd512 (__m128d __A)
453{
454 return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
455}
456
Michael Zuckermand5cc6cd2016-05-25 14:04:21 +0000457static __inline __m512d __DEFAULT_FN_ATTRS
458_mm512_castps_pd (__m512 __A)
459{
460 return (__m512d) (__A);
461}
462
463static __inline __m512i __DEFAULT_FN_ATTRS
464_mm512_castps_si512 (__m512 __A)
465{
466 return (__m512i) (__A);
467}
468
Michael Zuckermanc6677032016-05-03 14:26:52 +0000469static __inline__ __m512 __DEFAULT_FN_ATTRS
470_mm512_castps128_ps512 (__m128 __A)
471{
472 return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
473}
474
475static __inline__ __m512i __DEFAULT_FN_ATTRS
476_mm512_castsi128_si512 (__m128i __A)
477{
478 return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
479}
480
481static __inline__ __m512i __DEFAULT_FN_ATTRS
482_mm512_castsi256_si512 (__m256i __A)
483{
484 return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
485}
486
Michael Zuckermand5cc6cd2016-05-25 14:04:21 +0000487static __inline __m512 __DEFAULT_FN_ATTRS
488_mm512_castsi512_ps (__m512i __A)
489{
490 return (__m512) (__A);
491}
492
493static __inline __m512d __DEFAULT_FN_ATTRS
494_mm512_castsi512_pd (__m512i __A)
495{
496 return (__m512d) (__A);
497}
498
499static __inline __m128i __DEFAULT_FN_ATTRS
500_mm512_castsi512_si128 (__m512i __A)
501{
502 return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
503}
504
Michael Zuckerman22c47e62016-05-26 14:32:11 +0000505static __inline __m256i __DEFAULT_FN_ATTRS
506_mm512_castsi512_si256 (__m512i __A)
507{
508 return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
509}
510
Ayman Musae60a41c2016-11-08 12:00:30 +0000511static __inline__ __mmask16 __DEFAULT_FN_ATTRS
512_mm512_int2mask(int __a)
513{
514 return (__mmask16)__a;
515}
516
517static __inline__ int __DEFAULT_FN_ATTRS
518_mm512_mask2int(__mmask16 __a)
519{
520 return (int)__a;
521}
522
Adrian Prantl9fc8faf2018-05-09 01:00:01 +0000523/// Constructs a 512-bit floating-point vector of [8 x double] from a
Simon Pilgrim96d02f52017-04-29 17:17:06 +0000524/// 128-bit floating-point vector of [2 x double]. The lower 128 bits
525/// contain the value of the source vector. The upper 384 bits are set
526/// to zero.
527///
528/// \headerfile <x86intrin.h>
529///
530/// This intrinsic has no corresponding instruction.
531///
532/// \param __a
533/// A 128-bit vector of [2 x double].
534/// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
535/// contain the value of the parameter. The upper 384 bits are set to zero.
536static __inline __m512d __DEFAULT_FN_ATTRS
537_mm512_zextpd128_pd512(__m128d __a)
538{
539 return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
540}
541
Adrian Prantl9fc8faf2018-05-09 01:00:01 +0000542/// Constructs a 512-bit floating-point vector of [8 x double] from a
Simon Pilgrim96d02f52017-04-29 17:17:06 +0000543/// 256-bit floating-point vector of [4 x double]. The lower 256 bits
544/// contain the value of the source vector. The upper 256 bits are set
545/// to zero.
546///
547/// \headerfile <x86intrin.h>
548///
549/// This intrinsic has no corresponding instruction.
550///
551/// \param __a
552/// A 256-bit vector of [4 x double].
553/// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
554/// contain the value of the parameter. The upper 256 bits are set to zero.
555static __inline __m512d __DEFAULT_FN_ATTRS
556_mm512_zextpd256_pd512(__m256d __a)
557{
558 return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
559}
560
Adrian Prantl9fc8faf2018-05-09 01:00:01 +0000561/// Constructs a 512-bit floating-point vector of [16 x float] from a
Simon Pilgrim96d02f52017-04-29 17:17:06 +0000562/// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain
563/// the value of the source vector. The upper 384 bits are set to zero.
564///
565/// \headerfile <x86intrin.h>
566///
567/// This intrinsic has no corresponding instruction.
568///
569/// \param __a
570/// A 128-bit vector of [4 x float].
571/// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
572/// contain the value of the parameter. The upper 384 bits are set to zero.
573static __inline __m512 __DEFAULT_FN_ATTRS
574_mm512_zextps128_ps512(__m128 __a)
575{
576 return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
577}
578
Adrian Prantl9fc8faf2018-05-09 01:00:01 +0000579/// Constructs a 512-bit floating-point vector of [16 x float] from a
Simon Pilgrim96d02f52017-04-29 17:17:06 +0000580/// 256-bit floating-point vector of [8 x float]. The lower 256 bits contain
581/// the value of the source vector. The upper 256 bits are set to zero.
582///
583/// \headerfile <x86intrin.h>
584///
585/// This intrinsic has no corresponding instruction.
586///
587/// \param __a
588/// A 256-bit vector of [8 x float].
589/// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
590/// contain the value of the parameter. The upper 256 bits are set to zero.
591static __inline __m512 __DEFAULT_FN_ATTRS
592_mm512_zextps256_ps512(__m256 __a)
593{
594 return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
595}
596
Adrian Prantl9fc8faf2018-05-09 01:00:01 +0000597/// Constructs a 512-bit integer vector from a 128-bit integer vector.
Simon Pilgrim96d02f52017-04-29 17:17:06 +0000598/// The lower 128 bits contain the value of the source vector. The upper
599/// 384 bits are set to zero.
600///
601/// \headerfile <x86intrin.h>
602///
603/// This intrinsic has no corresponding instruction.
604///
605/// \param __a
606/// A 128-bit integer vector.
607/// \returns A 512-bit integer vector. The lower 128 bits contain the value of
608/// the parameter. The upper 384 bits are set to zero.
609static __inline __m512i __DEFAULT_FN_ATTRS
610_mm512_zextsi128_si512(__m128i __a)
611{
612 return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
613}
614
Adrian Prantl9fc8faf2018-05-09 01:00:01 +0000615/// Constructs a 512-bit integer vector from a 256-bit integer vector.
Simon Pilgrim96d02f52017-04-29 17:17:06 +0000616/// The lower 256 bits contain the value of the source vector. The upper
617/// 256 bits are set to zero.
618///
619/// \headerfile <x86intrin.h>
620///
621/// This intrinsic has no corresponding instruction.
622///
623/// \param __a
624/// A 256-bit integer vector.
625/// \returns A 512-bit integer vector. The lower 256 bits contain the value of
626/// the parameter. The upper 256 bits are set to zero.
627static __inline __m512i __DEFAULT_FN_ATTRS
628_mm512_zextsi256_si512(__m256i __a)
629{
630 return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
631}
632
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000633/* Bitwise operators */
Michael Kupersteine45af542015-06-30 13:36:19 +0000634static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000635_mm512_and_epi32(__m512i __a, __m512i __b)
636{
Craig Topper6a77b622016-06-04 05:43:41 +0000637 return (__m512i)((__v16su)__a & (__v16su)__b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000638}
639
Michael Kupersteine45af542015-06-30 13:36:19 +0000640static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000641_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
642{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000643 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
Asaf Badouh13633282016-07-05 12:24:14 +0000644 (__v16si) _mm512_and_epi32(__a, __b),
645 (__v16si) __src);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000646}
Asaf Badouh13633282016-07-05 12:24:14 +0000647
Michael Kupersteine45af542015-06-30 13:36:19 +0000648static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000649_mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
650{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000651 return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (),
Asaf Badouh13633282016-07-05 12:24:14 +0000652 __k, __a, __b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000653}
654
Michael Kupersteine45af542015-06-30 13:36:19 +0000655static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000656_mm512_and_epi64(__m512i __a, __m512i __b)
657{
Craig Topper6a77b622016-06-04 05:43:41 +0000658 return (__m512i)((__v8du)__a & (__v8du)__b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000659}
660
Michael Kupersteine45af542015-06-30 13:36:19 +0000661static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000662_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
663{
Asaf Badouh13633282016-07-05 12:24:14 +0000664 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
665 (__v8di) _mm512_and_epi64(__a, __b),
666 (__v8di) __src);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000667}
Asaf Badouh13633282016-07-05 12:24:14 +0000668
Michael Kupersteine45af542015-06-30 13:36:19 +0000669static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000670_mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
671{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000672 return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (),
Asaf Badouh13633282016-07-05 12:24:14 +0000673 __k, __a, __b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000674}
675
Michael Kupersteine45af542015-06-30 13:36:19 +0000676static __inline__ __m512i __DEFAULT_FN_ATTRS
Michael Zuckerman2cacc352016-05-18 15:25:53 +0000677_mm512_andnot_si512 (__m512i __A, __m512i __B)
678{
Craig Topper6a77b622016-06-04 05:43:41 +0000679 return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
Michael Zuckerman2cacc352016-05-18 15:25:53 +0000680}
681
682static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000683_mm512_andnot_epi32 (__m512i __A, __m512i __B)
684{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000685 return (__m512i)(~(__v16su)(__A) & (__v16su)__B);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000686}
687
Michael Kupersteine45af542015-06-30 13:36:19 +0000688static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper4d61a3c2016-07-11 06:14:18 +0000689_mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000690{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000691 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
692 (__v16si)_mm512_andnot_epi32(__A, __B),
693 (__v16si)__W);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000694}
695
Michael Kupersteine45af542015-06-30 13:36:19 +0000696static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper4d61a3c2016-07-11 06:14:18 +0000697_mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000698{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000699 return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(),
700 __U, __A, __B);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000701}
702
Michael Kupersteine45af542015-06-30 13:36:19 +0000703static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper4d61a3c2016-07-11 06:14:18 +0000704_mm512_andnot_epi64(__m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000705{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000706 return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000707}
708
Michael Kupersteine45af542015-06-30 13:36:19 +0000709static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper4d61a3c2016-07-11 06:14:18 +0000710_mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000711{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000712 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
713 (__v8di)_mm512_andnot_epi64(__A, __B),
714 (__v8di)__W);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000715}
716
Michael Kupersteine45af542015-06-30 13:36:19 +0000717static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper4d61a3c2016-07-11 06:14:18 +0000718_mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000719{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000720 return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(),
721 __U, __A, __B);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000722}
Craig Topper4d61a3c2016-07-11 06:14:18 +0000723
Michael Kupersteine45af542015-06-30 13:36:19 +0000724static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000725_mm512_or_epi32(__m512i __a, __m512i __b)
726{
Craig Topper6a77b622016-06-04 05:43:41 +0000727 return (__m512i)((__v16su)__a | (__v16su)__b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000728}
729
Michael Kupersteine45af542015-06-30 13:36:19 +0000730static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000731_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
732{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000733 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
734 (__v16si)_mm512_or_epi32(__a, __b),
735 (__v16si)__src);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000736}
Craig Topper4d61a3c2016-07-11 06:14:18 +0000737
Michael Kupersteine45af542015-06-30 13:36:19 +0000738static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000739_mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
740{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000741 return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000742}
743
Michael Kupersteine45af542015-06-30 13:36:19 +0000744static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000745_mm512_or_epi64(__m512i __a, __m512i __b)
746{
Craig Topper6a77b622016-06-04 05:43:41 +0000747 return (__m512i)((__v8du)__a | (__v8du)__b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000748}
749
Michael Kupersteine45af542015-06-30 13:36:19 +0000750static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000751_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
752{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000753 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
754 (__v8di)_mm512_or_epi64(__a, __b),
755 (__v8di)__src);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000756}
Craig Topper4d61a3c2016-07-11 06:14:18 +0000757
Michael Kupersteine45af542015-06-30 13:36:19 +0000758static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000759_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
760{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000761 return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000762}
763
Michael Kupersteine45af542015-06-30 13:36:19 +0000764static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000765_mm512_xor_epi32(__m512i __a, __m512i __b)
766{
Craig Topper6a77b622016-06-04 05:43:41 +0000767 return (__m512i)((__v16su)__a ^ (__v16su)__b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000768}
769
Michael Kupersteine45af542015-06-30 13:36:19 +0000770static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000771_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
772{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000773 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
774 (__v16si)_mm512_xor_epi32(__a, __b),
775 (__v16si)__src);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000776}
Craig Topper4d61a3c2016-07-11 06:14:18 +0000777
Michael Kupersteine45af542015-06-30 13:36:19 +0000778static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000779_mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
780{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000781 return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000782}
783
Michael Kupersteine45af542015-06-30 13:36:19 +0000784static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000785_mm512_xor_epi64(__m512i __a, __m512i __b)
786{
Craig Topper6a77b622016-06-04 05:43:41 +0000787 return (__m512i)((__v8du)__a ^ (__v8du)__b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000788}
789
Michael Kupersteine45af542015-06-30 13:36:19 +0000790static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000791_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
792{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000793 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
794 (__v8di)_mm512_xor_epi64(__a, __b),
795 (__v8di)__src);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000796}
Craig Topper4d61a3c2016-07-11 06:14:18 +0000797
Michael Kupersteine45af542015-06-30 13:36:19 +0000798static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000799_mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
800{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000801 return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000802}
803
Michael Kupersteine45af542015-06-30 13:36:19 +0000804static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000805_mm512_and_si512(__m512i __a, __m512i __b)
806{
Craig Topper6a77b622016-06-04 05:43:41 +0000807 return (__m512i)((__v8du)__a & (__v8du)__b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000808}
809
Michael Kupersteine45af542015-06-30 13:36:19 +0000810static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000811_mm512_or_si512(__m512i __a, __m512i __b)
812{
Craig Topper6a77b622016-06-04 05:43:41 +0000813 return (__m512i)((__v8du)__a | (__v8du)__b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000814}
815
Michael Kupersteine45af542015-06-30 13:36:19 +0000816static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000817_mm512_xor_si512(__m512i __a, __m512i __b)
818{
Craig Topper6a77b622016-06-04 05:43:41 +0000819 return (__m512i)((__v8du)__a ^ (__v8du)__b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000820}
Craig Topper4d61a3c2016-07-11 06:14:18 +0000821
Adam Nemet0d5bb552014-07-28 17:14:40 +0000822/* Arithmetic */
823
Michael Kupersteine45af542015-06-30 13:36:19 +0000824static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemeta3ebe622014-07-28 17:14:42 +0000825_mm512_add_pd(__m512d __a, __m512d __b)
826{
Craig Topper9c6c85f2016-05-16 06:38:36 +0000827 return (__m512d)((__v8df)__a + (__v8df)__b);
Adam Nemeta3ebe622014-07-28 17:14:42 +0000828}
829
Michael Kupersteine45af542015-06-30 13:36:19 +0000830static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemeta3ebe622014-07-28 17:14:42 +0000831_mm512_add_ps(__m512 __a, __m512 __b)
832{
Craig Topper9c6c85f2016-05-16 06:38:36 +0000833 return (__m512)((__v16sf)__a + (__v16sf)__b);
Adam Nemeta3ebe622014-07-28 17:14:42 +0000834}
835
Michael Kupersteine45af542015-06-30 13:36:19 +0000836static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemeta3ebe622014-07-28 17:14:42 +0000837_mm512_mul_pd(__m512d __a, __m512d __b)
838{
Craig Topper9c6c85f2016-05-16 06:38:36 +0000839 return (__m512d)((__v8df)__a * (__v8df)__b);
Adam Nemeta3ebe622014-07-28 17:14:42 +0000840}
841
Michael Kupersteine45af542015-06-30 13:36:19 +0000842static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemeta3ebe622014-07-28 17:14:42 +0000843_mm512_mul_ps(__m512 __a, __m512 __b)
844{
Craig Topper9c6c85f2016-05-16 06:38:36 +0000845 return (__m512)((__v16sf)__a * (__v16sf)__b);
Adam Nemeta3ebe622014-07-28 17:14:42 +0000846}
847
Michael Kupersteine45af542015-06-30 13:36:19 +0000848static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemeta3ebe622014-07-28 17:14:42 +0000849_mm512_sub_pd(__m512d __a, __m512d __b)
850{
Craig Topper9c6c85f2016-05-16 06:38:36 +0000851 return (__m512d)((__v8df)__a - (__v8df)__b);
Adam Nemeta3ebe622014-07-28 17:14:42 +0000852}
853
Michael Kupersteine45af542015-06-30 13:36:19 +0000854static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemeta3ebe622014-07-28 17:14:42 +0000855_mm512_sub_ps(__m512 __a, __m512 __b)
856{
Craig Topper9c6c85f2016-05-16 06:38:36 +0000857 return (__m512)((__v16sf)__a - (__v16sf)__b);
Adam Nemeta3ebe622014-07-28 17:14:42 +0000858}
859
Michael Kupersteine45af542015-06-30 13:36:19 +0000860static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000861_mm512_add_epi64 (__m512i __A, __m512i __B)
862{
Craig Topper6a77b622016-06-04 05:43:41 +0000863 return (__m512i) ((__v8du) __A + (__v8du) __B);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000864}
865
Michael Kupersteine45af542015-06-30 13:36:19 +0000866static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0e189762016-09-03 18:29:35 +0000867_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000868{
Craig Topper0e189762016-09-03 18:29:35 +0000869 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
870 (__v8di)_mm512_add_epi64(__A, __B),
871 (__v8di)__W);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000872}
873
Michael Kupersteine45af542015-06-30 13:36:19 +0000874static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0e189762016-09-03 18:29:35 +0000875_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000876{
Craig Topper0e189762016-09-03 18:29:35 +0000877 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
878 (__v8di)_mm512_add_epi64(__A, __B),
879 (__v8di)_mm512_setzero_si512());
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000880}
881
Michael Kupersteine45af542015-06-30 13:36:19 +0000882static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000883_mm512_sub_epi64 (__m512i __A, __m512i __B)
884{
Craig Topper6a77b622016-06-04 05:43:41 +0000885 return (__m512i) ((__v8du) __A - (__v8du) __B);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000886}
887
Michael Kupersteine45af542015-06-30 13:36:19 +0000888static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0e189762016-09-03 18:29:35 +0000889_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000890{
Craig Topper0e189762016-09-03 18:29:35 +0000891 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
892 (__v8di)_mm512_sub_epi64(__A, __B),
893 (__v8di)__W);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000894}
895
Michael Kupersteine45af542015-06-30 13:36:19 +0000896static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0e189762016-09-03 18:29:35 +0000897_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000898{
Craig Topper0e189762016-09-03 18:29:35 +0000899 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
900 (__v8di)_mm512_sub_epi64(__A, __B),
901 (__v8di)_mm512_setzero_si512());
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000902}
903
Michael Kupersteine45af542015-06-30 13:36:19 +0000904static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000905_mm512_add_epi32 (__m512i __A, __m512i __B)
906{
Craig Topper6a77b622016-06-04 05:43:41 +0000907 return (__m512i) ((__v16su) __A + (__v16su) __B);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000908}
909
Michael Kupersteine45af542015-06-30 13:36:19 +0000910static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0e189762016-09-03 18:29:35 +0000911_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000912{
Craig Topper0e189762016-09-03 18:29:35 +0000913 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
914 (__v16si)_mm512_add_epi32(__A, __B),
915 (__v16si)__W);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000916}
917
Michael Kupersteine45af542015-06-30 13:36:19 +0000918static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000919_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
920{
Craig Topper0e189762016-09-03 18:29:35 +0000921 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
922 (__v16si)_mm512_add_epi32(__A, __B),
923 (__v16si)_mm512_setzero_si512());
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000924}
925
Michael Kupersteine45af542015-06-30 13:36:19 +0000926static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000927_mm512_sub_epi32 (__m512i __A, __m512i __B)
928{
Craig Topper6a77b622016-06-04 05:43:41 +0000929 return (__m512i) ((__v16su) __A - (__v16su) __B);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000930}
931
Michael Kupersteine45af542015-06-30 13:36:19 +0000932static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0e189762016-09-03 18:29:35 +0000933_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000934{
Craig Topper0e189762016-09-03 18:29:35 +0000935 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
936 (__v16si)_mm512_sub_epi32(__A, __B),
937 (__v16si)__W);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000938}
939
Michael Kupersteine45af542015-06-30 13:36:19 +0000940static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0e189762016-09-03 18:29:35 +0000941_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000942{
Craig Topper0e189762016-09-03 18:29:35 +0000943 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
944 (__v16si)_mm512_sub_epi32(__A, __B),
945 (__v16si)_mm512_setzero_si512());
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000946}
947
Craig Topperf3efec62016-06-08 06:08:07 +0000948#define _mm512_mask_max_round_pd(W, U, A, B, R) __extension__ ({ \
949 (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
950 (__v8df)(__m512d)(B), \
951 (__v8df)(__m512d)(W), (__mmask8)(U), \
952 (int)(R)); })
Michael Zuckermane6aa66a2016-06-01 08:34:03 +0000953
Craig Topperf3efec62016-06-08 06:08:07 +0000954#define _mm512_maskz_max_round_pd(U, A, B, R) __extension__ ({ \
955 (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
956 (__v8df)(__m512d)(B), \
957 (__v8df)_mm512_setzero_pd(), \
958 (__mmask8)(U), (int)(R)); })
Michael Zuckermane6aa66a2016-06-01 08:34:03 +0000959
Craig Topperf3efec62016-06-08 06:08:07 +0000960#define _mm512_max_round_pd(A, B, R) __extension__ ({ \
961 (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
962 (__v8df)(__m512d)(B), \
963 (__v8df)_mm512_undefined_pd(), \
964 (__mmask8)-1, (int)(R)); })
Michael Zuckermane6aa66a2016-06-01 08:34:03 +0000965
Michael Kupersteine45af542015-06-30 13:36:19 +0000966static __inline__ __m512d __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +0000967_mm512_max_pd(__m512d __A, __m512d __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000968{
Adam Nemet0d5bb552014-07-28 17:14:40 +0000969 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
970 (__v8df) __B,
971 (__v8df)
972 _mm512_setzero_pd (),
973 (__mmask8) -1,
974 _MM_FROUND_CUR_DIRECTION);
975}
976
Michael Zuckermanf9be3bb2016-05-09 12:38:49 +0000977static __inline__ __m512d __DEFAULT_FN_ATTRS
978_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
979{
980 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
981 (__v8df) __B,
982 (__v8df) __W,
983 (__mmask8) __U,
984 _MM_FROUND_CUR_DIRECTION);
985}
986
987static __inline__ __m512d __DEFAULT_FN_ATTRS
988_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
989{
990 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
991 (__v8df) __B,
992 (__v8df)
993 _mm512_setzero_pd (),
994 (__mmask8) __U,
995 _MM_FROUND_CUR_DIRECTION);
996}
997
Craig Topperf3efec62016-06-08 06:08:07 +0000998#define _mm512_mask_max_round_ps(W, U, A, B, R) __extension__ ({ \
999 (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
1000 (__v16sf)(__m512)(B), \
1001 (__v16sf)(__m512)(W), (__mmask16)(U), \
1002 (int)(R)); })
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001003
Craig Topperf3efec62016-06-08 06:08:07 +00001004#define _mm512_maskz_max_round_ps(U, A, B, R) __extension__ ({ \
1005 (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
1006 (__v16sf)(__m512)(B), \
1007 (__v16sf)_mm512_setzero_ps(), \
1008 (__mmask16)(U), (int)(R)); })
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001009
Craig Topperf3efec62016-06-08 06:08:07 +00001010#define _mm512_max_round_ps(A, B, R) __extension__ ({ \
1011 (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
1012 (__v16sf)(__m512)(B), \
1013 (__v16sf)_mm512_undefined_ps(), \
1014 (__mmask16)-1, (int)(R)); })
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001015
Michael Kupersteine45af542015-06-30 13:36:19 +00001016static __inline__ __m512 __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001017_mm512_max_ps(__m512 __A, __m512 __B)
1018{
1019 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
1020 (__v16sf) __B,
1021 (__v16sf)
1022 _mm512_setzero_ps (),
1023 (__mmask16) -1,
1024 _MM_FROUND_CUR_DIRECTION);
1025}
1026
Michael Zuckermanf9be3bb2016-05-09 12:38:49 +00001027static __inline__ __m512 __DEFAULT_FN_ATTRS
1028_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1029{
1030 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
1031 (__v16sf) __B,
1032 (__v16sf) __W,
1033 (__mmask16) __U,
1034 _MM_FROUND_CUR_DIRECTION);
1035}
1036
1037static __inline__ __m512 __DEFAULT_FN_ATTRS
1038_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
1039{
1040 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
1041 (__v16sf) __B,
1042 (__v16sf)
1043 _mm512_setzero_ps (),
1044 (__mmask16) __U,
1045 _MM_FROUND_CUR_DIRECTION);
1046}
1047
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001048static __inline__ __m128 __DEFAULT_FN_ATTRS
1049_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001050 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001051 (__v4sf) __B,
1052 (__v4sf) __W,
1053 (__mmask8) __U,
1054 _MM_FROUND_CUR_DIRECTION);
1055}
1056
1057static __inline__ __m128 __DEFAULT_FN_ATTRS
1058_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001059 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001060 (__v4sf) __B,
1061 (__v4sf) _mm_setzero_ps (),
1062 (__mmask8) __U,
1063 _MM_FROUND_CUR_DIRECTION);
1064}
1065
Craig Topper8c18e112016-05-17 04:41:50 +00001066#define _mm_max_round_ss(A, B, R) __extension__ ({ \
1067 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1068 (__v4sf)(__m128)(B), \
1069 (__v4sf)_mm_setzero_ps(), \
1070 (__mmask8)-1, (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001071
Craig Topper8c18e112016-05-17 04:41:50 +00001072#define _mm_mask_max_round_ss(W, U, A, B, R) __extension__ ({ \
1073 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1074 (__v4sf)(__m128)(B), \
1075 (__v4sf)(__m128)(W), (__mmask8)(U), \
1076 (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001077
Craig Topper8c18e112016-05-17 04:41:50 +00001078#define _mm_maskz_max_round_ss(U, A, B, R) __extension__ ({ \
1079 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1080 (__v4sf)(__m128)(B), \
1081 (__v4sf)_mm_setzero_ps(), \
1082 (__mmask8)(U), (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001083
1084static __inline__ __m128d __DEFAULT_FN_ATTRS
1085_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001086 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001087 (__v2df) __B,
1088 (__v2df) __W,
1089 (__mmask8) __U,
1090 _MM_FROUND_CUR_DIRECTION);
1091}
1092
1093static __inline__ __m128d __DEFAULT_FN_ATTRS
1094_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001095 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001096 (__v2df) __B,
1097 (__v2df) _mm_setzero_pd (),
1098 (__mmask8) __U,
1099 _MM_FROUND_CUR_DIRECTION);
1100}
1101
Craig Topper8c18e112016-05-17 04:41:50 +00001102#define _mm_max_round_sd(A, B, R) __extension__ ({ \
1103 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1104 (__v2df)(__m128d)(B), \
1105 (__v2df)_mm_setzero_pd(), \
1106 (__mmask8)-1, (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001107
Craig Topper8c18e112016-05-17 04:41:50 +00001108#define _mm_mask_max_round_sd(W, U, A, B, R) __extension__ ({ \
1109 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1110 (__v2df)(__m128d)(B), \
1111 (__v2df)(__m128d)(W), \
1112 (__mmask8)(U), (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001113
Craig Topper8c18e112016-05-17 04:41:50 +00001114#define _mm_maskz_max_round_sd(U, A, B, R) __extension__ ({ \
1115 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1116 (__v2df)(__m128d)(B), \
1117 (__v2df)_mm_setzero_pd(), \
1118 (__mmask8)(U), (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001119
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00001120static __inline __m512i
Michael Kupersteine45af542015-06-30 13:36:19 +00001121__DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001122_mm512_max_epi32(__m512i __A, __m512i __B)
1123{
Craig Topperf2043b02018-05-23 04:51:54 +00001124 return (__m512i)__builtin_ia32_pmaxsd512((__v16si)__A, (__v16si)__B);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001125}
1126
Michael Zuckermande860e52016-05-10 11:34:19 +00001127static __inline__ __m512i __DEFAULT_FN_ATTRS
1128_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1129{
Craig Topperf2043b02018-05-23 04:51:54 +00001130 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1131 (__v16si)_mm512_max_epi32(__A, __B),
1132 (__v16si)__W);
Michael Zuckermande860e52016-05-10 11:34:19 +00001133}
1134
1135static __inline__ __m512i __DEFAULT_FN_ATTRS
1136_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1137{
Craig Topperf2043b02018-05-23 04:51:54 +00001138 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1139 (__v16si)_mm512_max_epi32(__A, __B),
1140 (__v16si)_mm512_setzero_si512());
Michael Zuckermande860e52016-05-10 11:34:19 +00001141}
1142
Michael Kupersteine45af542015-06-30 13:36:19 +00001143static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001144_mm512_max_epu32(__m512i __A, __m512i __B)
1145{
Craig Topperf2043b02018-05-23 04:51:54 +00001146 return (__m512i)__builtin_ia32_pmaxud512((__v16si)__A, (__v16si)__B);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001147}
1148
Michael Zuckermande860e52016-05-10 11:34:19 +00001149static __inline__ __m512i __DEFAULT_FN_ATTRS
1150_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1151{
Craig Topperf2043b02018-05-23 04:51:54 +00001152 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1153 (__v16si)_mm512_max_epu32(__A, __B),
1154 (__v16si)__W);
Michael Zuckermande860e52016-05-10 11:34:19 +00001155}
1156
1157static __inline__ __m512i __DEFAULT_FN_ATTRS
1158_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1159{
Craig Topperf2043b02018-05-23 04:51:54 +00001160 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1161 (__v16si)_mm512_max_epu32(__A, __B),
1162 (__v16si)_mm512_setzero_si512());
Michael Zuckermande860e52016-05-10 11:34:19 +00001163}
1164
Michael Kupersteine45af542015-06-30 13:36:19 +00001165static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001166_mm512_max_epi64(__m512i __A, __m512i __B)
1167{
Craig Topperf2043b02018-05-23 04:51:54 +00001168 return (__m512i)__builtin_ia32_pmaxsq512((__v8di)__A, (__v8di)__B);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001169}
1170
Michael Zuckermande860e52016-05-10 11:34:19 +00001171static __inline__ __m512i __DEFAULT_FN_ATTRS
1172_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1173{
Craig Topperf2043b02018-05-23 04:51:54 +00001174 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1175 (__v8di)_mm512_max_epi64(__A, __B),
1176 (__v8di)__W);
Michael Zuckermande860e52016-05-10 11:34:19 +00001177}
1178
1179static __inline__ __m512i __DEFAULT_FN_ATTRS
1180_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1181{
Craig Topperf2043b02018-05-23 04:51:54 +00001182 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1183 (__v8di)_mm512_max_epi64(__A, __B),
1184 (__v8di)_mm512_setzero_si512());
Michael Zuckermande860e52016-05-10 11:34:19 +00001185}
1186
Michael Kupersteine45af542015-06-30 13:36:19 +00001187static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001188_mm512_max_epu64(__m512i __A, __m512i __B)
1189{
Craig Topperf2043b02018-05-23 04:51:54 +00001190 return (__m512i)__builtin_ia32_pmaxuq512((__v8di)__A, (__v8di)__B);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001191}
1192
Michael Zuckermande860e52016-05-10 11:34:19 +00001193static __inline__ __m512i __DEFAULT_FN_ATTRS
1194_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1195{
Craig Topperf2043b02018-05-23 04:51:54 +00001196 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1197 (__v8di)_mm512_max_epu64(__A, __B),
1198 (__v8di)__W);
Michael Zuckermande860e52016-05-10 11:34:19 +00001199}
1200
1201static __inline__ __m512i __DEFAULT_FN_ATTRS
1202_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1203{
Craig Topperf2043b02018-05-23 04:51:54 +00001204 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1205 (__v8di)_mm512_max_epu64(__A, __B),
1206 (__v8di)_mm512_setzero_si512());
Michael Zuckermande860e52016-05-10 11:34:19 +00001207}
1208
Craig Topperf3efec62016-06-08 06:08:07 +00001209#define _mm512_mask_min_round_pd(W, U, A, B, R) __extension__ ({ \
1210 (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1211 (__v8df)(__m512d)(B), \
1212 (__v8df)(__m512d)(W), (__mmask8)(U), \
1213 (int)(R)); })
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001214
Craig Topperf3efec62016-06-08 06:08:07 +00001215#define _mm512_maskz_min_round_pd(U, A, B, R) __extension__ ({ \
1216 (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1217 (__v8df)(__m512d)(B), \
1218 (__v8df)_mm512_setzero_pd(), \
1219 (__mmask8)(U), (int)(R)); })
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001220
Craig Topperf3efec62016-06-08 06:08:07 +00001221#define _mm512_min_round_pd(A, B, R) __extension__ ({ \
1222 (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1223 (__v8df)(__m512d)(B), \
1224 (__v8df)_mm512_undefined_pd(), \
1225 (__mmask8)-1, (int)(R)); })
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001226
Michael Kupersteine45af542015-06-30 13:36:19 +00001227static __inline__ __m512d __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001228_mm512_min_pd(__m512d __A, __m512d __B)
1229{
1230 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1231 (__v8df) __B,
1232 (__v8df)
1233 _mm512_setzero_pd (),
1234 (__mmask8) -1,
1235 _MM_FROUND_CUR_DIRECTION);
1236}
1237
Michael Zuckermanf9be3bb2016-05-09 12:38:49 +00001238static __inline__ __m512d __DEFAULT_FN_ATTRS
1239_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
1240{
1241 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1242 (__v8df) __B,
1243 (__v8df) __W,
1244 (__mmask8) __U,
1245 _MM_FROUND_CUR_DIRECTION);
1246}
1247
Craig Topperf3efec62016-06-08 06:08:07 +00001248#define _mm512_mask_min_round_ps(W, U, A, B, R) __extension__ ({ \
1249 (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1250 (__v16sf)(__m512)(B), \
1251 (__v16sf)(__m512)(W), (__mmask16)(U), \
1252 (int)(R)); })
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001253
Craig Topperf3efec62016-06-08 06:08:07 +00001254#define _mm512_maskz_min_round_ps(U, A, B, R) __extension__ ({ \
1255 (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1256 (__v16sf)(__m512)(B), \
1257 (__v16sf)_mm512_setzero_ps(), \
1258 (__mmask16)(U), (int)(R)); })
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001259
Craig Topperf3efec62016-06-08 06:08:07 +00001260#define _mm512_min_round_ps(A, B, R) __extension__ ({ \
1261 (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1262 (__v16sf)(__m512)(B), \
1263 (__v16sf)_mm512_undefined_ps(), \
1264 (__mmask16)-1, (int)(R)); })
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001265
Michael Zuckermanf9be3bb2016-05-09 12:38:49 +00001266static __inline__ __m512d __DEFAULT_FN_ATTRS
1267_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
1268{
1269 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1270 (__v8df) __B,
1271 (__v8df)
1272 _mm512_setzero_pd (),
1273 (__mmask8) __U,
1274 _MM_FROUND_CUR_DIRECTION);
1275}
1276
Michael Kupersteine45af542015-06-30 13:36:19 +00001277static __inline__ __m512 __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001278_mm512_min_ps(__m512 __A, __m512 __B)
1279{
1280 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1281 (__v16sf) __B,
1282 (__v16sf)
1283 _mm512_setzero_ps (),
1284 (__mmask16) -1,
1285 _MM_FROUND_CUR_DIRECTION);
1286}
1287
Michael Zuckermanf9be3bb2016-05-09 12:38:49 +00001288static __inline__ __m512 __DEFAULT_FN_ATTRS
1289_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1290{
1291 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1292 (__v16sf) __B,
1293 (__v16sf) __W,
1294 (__mmask16) __U,
1295 _MM_FROUND_CUR_DIRECTION);
1296}
1297
1298static __inline__ __m512 __DEFAULT_FN_ATTRS
1299_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
1300{
1301 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1302 (__v16sf) __B,
1303 (__v16sf)
1304 _mm512_setzero_ps (),
1305 (__mmask16) __U,
1306 _MM_FROUND_CUR_DIRECTION);
1307}
1308
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001309static __inline__ __m128 __DEFAULT_FN_ATTRS
1310_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001311 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001312 (__v4sf) __B,
1313 (__v4sf) __W,
1314 (__mmask8) __U,
1315 _MM_FROUND_CUR_DIRECTION);
1316}
1317
1318static __inline__ __m128 __DEFAULT_FN_ATTRS
1319_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001320 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001321 (__v4sf) __B,
1322 (__v4sf) _mm_setzero_ps (),
1323 (__mmask8) __U,
1324 _MM_FROUND_CUR_DIRECTION);
1325}
1326
Craig Topper8c18e112016-05-17 04:41:50 +00001327#define _mm_min_round_ss(A, B, R) __extension__ ({ \
1328 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1329 (__v4sf)(__m128)(B), \
1330 (__v4sf)_mm_setzero_ps(), \
1331 (__mmask8)-1, (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001332
Craig Topper8c18e112016-05-17 04:41:50 +00001333#define _mm_mask_min_round_ss(W, U, A, B, R) __extension__ ({ \
1334 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1335 (__v4sf)(__m128)(B), \
1336 (__v4sf)(__m128)(W), (__mmask8)(U), \
1337 (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001338
Craig Topper8c18e112016-05-17 04:41:50 +00001339#define _mm_maskz_min_round_ss(U, A, B, R) __extension__ ({ \
1340 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1341 (__v4sf)(__m128)(B), \
1342 (__v4sf)_mm_setzero_ps(), \
1343 (__mmask8)(U), (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001344
1345static __inline__ __m128d __DEFAULT_FN_ATTRS
1346_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001347 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001348 (__v2df) __B,
1349 (__v2df) __W,
1350 (__mmask8) __U,
1351 _MM_FROUND_CUR_DIRECTION);
1352}
1353
1354static __inline__ __m128d __DEFAULT_FN_ATTRS
1355_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001356 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001357 (__v2df) __B,
1358 (__v2df) _mm_setzero_pd (),
1359 (__mmask8) __U,
1360 _MM_FROUND_CUR_DIRECTION);
1361}
1362
Craig Topper8c18e112016-05-17 04:41:50 +00001363#define _mm_min_round_sd(A, B, R) __extension__ ({ \
1364 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1365 (__v2df)(__m128d)(B), \
1366 (__v2df)_mm_setzero_pd(), \
1367 (__mmask8)-1, (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001368
Craig Topper8c18e112016-05-17 04:41:50 +00001369#define _mm_mask_min_round_sd(W, U, A, B, R) __extension__ ({ \
1370 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1371 (__v2df)(__m128d)(B), \
1372 (__v2df)(__m128d)(W), \
1373 (__mmask8)(U), (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001374
Craig Topper8c18e112016-05-17 04:41:50 +00001375#define _mm_maskz_min_round_sd(U, A, B, R) __extension__ ({ \
1376 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1377 (__v2df)(__m128d)(B), \
1378 (__v2df)_mm_setzero_pd(), \
1379 (__mmask8)(U), (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001380
Adam Nemet0d5bb552014-07-28 17:14:40 +00001381static __inline __m512i
Michael Kupersteine45af542015-06-30 13:36:19 +00001382__DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001383_mm512_min_epi32(__m512i __A, __m512i __B)
1384{
Craig Topperf2043b02018-05-23 04:51:54 +00001385 return (__m512i)__builtin_ia32_pminsd512((__v16si)__A, (__v16si)__B);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001386}
1387
Michael Zuckermande860e52016-05-10 11:34:19 +00001388static __inline__ __m512i __DEFAULT_FN_ATTRS
1389_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1390{
Craig Topperf2043b02018-05-23 04:51:54 +00001391 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1392 (__v16si)_mm512_min_epi32(__A, __B),
1393 (__v16si)__W);
Michael Zuckermande860e52016-05-10 11:34:19 +00001394}
1395
1396static __inline__ __m512i __DEFAULT_FN_ATTRS
1397_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1398{
Craig Topperf2043b02018-05-23 04:51:54 +00001399 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1400 (__v16si)_mm512_min_epi32(__A, __B),
1401 (__v16si)_mm512_setzero_si512());
Michael Zuckermande860e52016-05-10 11:34:19 +00001402}
1403
Michael Kupersteine45af542015-06-30 13:36:19 +00001404static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001405_mm512_min_epu32(__m512i __A, __m512i __B)
1406{
Craig Topperf2043b02018-05-23 04:51:54 +00001407 return (__m512i)__builtin_ia32_pminud512((__v16si)__A, (__v16si)__B);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001408}
1409
Michael Zuckermande860e52016-05-10 11:34:19 +00001410static __inline__ __m512i __DEFAULT_FN_ATTRS
1411_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1412{
Craig Topperf2043b02018-05-23 04:51:54 +00001413 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1414 (__v16si)_mm512_min_epu32(__A, __B),
1415 (__v16si)__W);
Michael Zuckermande860e52016-05-10 11:34:19 +00001416}
1417
1418static __inline__ __m512i __DEFAULT_FN_ATTRS
1419_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1420{
Craig Topperf2043b02018-05-23 04:51:54 +00001421 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1422 (__v16si)_mm512_min_epu32(__A, __B),
1423 (__v16si)_mm512_setzero_si512());
Michael Zuckermande860e52016-05-10 11:34:19 +00001424}
1425
Michael Kupersteine45af542015-06-30 13:36:19 +00001426static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001427_mm512_min_epi64(__m512i __A, __m512i __B)
1428{
Craig Topperf2043b02018-05-23 04:51:54 +00001429 return (__m512i)__builtin_ia32_pminsq512((__v8di)__A, (__v8di)__B);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001430}
1431
Michael Zuckermande860e52016-05-10 11:34:19 +00001432static __inline__ __m512i __DEFAULT_FN_ATTRS
1433_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1434{
Craig Topperf2043b02018-05-23 04:51:54 +00001435 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1436 (__v8di)_mm512_min_epi64(__A, __B),
1437 (__v8di)__W);
Michael Zuckermande860e52016-05-10 11:34:19 +00001438}
1439
1440static __inline__ __m512i __DEFAULT_FN_ATTRS
1441_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1442{
Craig Topperf2043b02018-05-23 04:51:54 +00001443 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1444 (__v8di)_mm512_min_epi64(__A, __B),
1445 (__v8di)_mm512_setzero_si512());
Michael Zuckermande860e52016-05-10 11:34:19 +00001446}
1447
Michael Kupersteine45af542015-06-30 13:36:19 +00001448static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001449_mm512_min_epu64(__m512i __A, __m512i __B)
1450{
Craig Topperf2043b02018-05-23 04:51:54 +00001451 return (__m512i)__builtin_ia32_pminuq512((__v8di)__A, (__v8di)__B);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001452}
1453
Michael Zuckermande860e52016-05-10 11:34:19 +00001454static __inline__ __m512i __DEFAULT_FN_ATTRS
1455_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1456{
Craig Topperf2043b02018-05-23 04:51:54 +00001457 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1458 (__v8di)_mm512_min_epu64(__A, __B),
1459 (__v8di)__W);
Michael Zuckermande860e52016-05-10 11:34:19 +00001460}
1461
1462static __inline__ __m512i __DEFAULT_FN_ATTRS
1463_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1464{
Craig Topperf2043b02018-05-23 04:51:54 +00001465 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1466 (__v8di)_mm512_min_epu64(__A, __B),
1467 (__v8di)_mm512_setzero_si512());
Michael Zuckermande860e52016-05-10 11:34:19 +00001468}
1469
Michael Kupersteine45af542015-06-30 13:36:19 +00001470static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001471_mm512_mul_epi32(__m512i __X, __m512i __Y)
1472{
Craig Topper70536f42016-12-27 04:04:57 +00001473 return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001474}
1475
Michael Kupersteine45af542015-06-30 13:36:19 +00001476static __inline __m512i __DEFAULT_FN_ATTRS
Craig Topper70536f42016-12-27 04:04:57 +00001477_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001478{
Craig Topper70536f42016-12-27 04:04:57 +00001479 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1480 (__v8di)_mm512_mul_epi32(__X, __Y),
1481 (__v8di)__W);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001482}
1483
Michael Kupersteine45af542015-06-30 13:36:19 +00001484static __inline __m512i __DEFAULT_FN_ATTRS
Craig Topper70536f42016-12-27 04:04:57 +00001485_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001486{
Craig Topper70536f42016-12-27 04:04:57 +00001487 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1488 (__v8di)_mm512_mul_epi32(__X, __Y),
1489 (__v8di)_mm512_setzero_si512 ());
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001490}
1491
Michael Kupersteine45af542015-06-30 13:36:19 +00001492static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001493_mm512_mul_epu32(__m512i __X, __m512i __Y)
1494{
Craig Topper70536f42016-12-27 04:04:57 +00001495 return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001496}
1497
Michael Kupersteine45af542015-06-30 13:36:19 +00001498static __inline __m512i __DEFAULT_FN_ATTRS
Craig Topper70536f42016-12-27 04:04:57 +00001499_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001500{
Craig Topper70536f42016-12-27 04:04:57 +00001501 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1502 (__v8di)_mm512_mul_epu32(__X, __Y),
1503 (__v8di)__W);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001504}
1505
Michael Kupersteine45af542015-06-30 13:36:19 +00001506static __inline __m512i __DEFAULT_FN_ATTRS
Craig Topper70536f42016-12-27 04:04:57 +00001507_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001508{
Craig Topper70536f42016-12-27 04:04:57 +00001509 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1510 (__v8di)_mm512_mul_epu32(__X, __Y),
1511 (__v8di)_mm512_setzero_si512 ());
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001512}
1513
Michael Kupersteine45af542015-06-30 13:36:19 +00001514static __inline __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001515_mm512_mullo_epi32 (__m512i __A, __m512i __B)
1516{
Craig Topper6a77b622016-06-04 05:43:41 +00001517 return (__m512i) ((__v16su) __A * (__v16su) __B);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001518}
1519
Michael Kupersteine45af542015-06-30 13:36:19 +00001520static __inline __m512i __DEFAULT_FN_ATTRS
Craig Topperf43e4a12016-09-03 19:19:49 +00001521_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001522{
Craig Topperf43e4a12016-09-03 19:19:49 +00001523 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1524 (__v16si)_mm512_mullo_epi32(__A, __B),
1525 (__v16si)_mm512_setzero_si512());
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001526}
1527
Michael Kupersteine45af542015-06-30 13:36:19 +00001528static __inline __m512i __DEFAULT_FN_ATTRS
Craig Topperf43e4a12016-09-03 19:19:49 +00001529_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001530{
Craig Topperf43e4a12016-09-03 19:19:49 +00001531 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1532 (__v16si)_mm512_mullo_epi32(__A, __B),
1533 (__v16si)__W);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001534}
1535
Craig Toppere95bde32018-04-26 05:38:39 +00001536static __inline__ __m512i __DEFAULT_FN_ATTRS
1537_mm512_mullox_epi64 (__m512i __A, __m512i __B) {
1538 return (__m512i) ((__v8du) __A * (__v8du) __B);
1539}
1540
1541static __inline__ __m512i __DEFAULT_FN_ATTRS
1542_mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
1543 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1544 (__v8di)_mm512_mullox_epi64(__A, __B),
1545 (__v8di)__W);
1546}
1547
Craig Topperf3efec62016-06-08 06:08:07 +00001548#define _mm512_mask_sqrt_round_pd(W, U, A, R) __extension__ ({ \
1549 (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1550 (__v8df)(__m512d)(W), (__mmask8)(U), \
1551 (int)(R)); })
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001552
Craig Topperf3efec62016-06-08 06:08:07 +00001553#define _mm512_maskz_sqrt_round_pd(U, A, R) __extension__ ({ \
1554 (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1555 (__v8df)_mm512_setzero_pd(), \
1556 (__mmask8)(U), (int)(R)); })
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001557
Craig Topperf3efec62016-06-08 06:08:07 +00001558#define _mm512_sqrt_round_pd(A, R) __extension__ ({ \
1559 (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1560 (__v8df)_mm512_undefined_pd(), \
1561 (__mmask8)-1, (int)(R)); })
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001562
Michael Kupersteine45af542015-06-30 13:36:19 +00001563static __inline__ __m512d __DEFAULT_FN_ATTRS
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00001564_mm512_sqrt_pd(__m512d __a)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001565{
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00001566 return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001567 (__v8df) _mm512_setzero_pd (),
1568 (__mmask8) -1,
1569 _MM_FROUND_CUR_DIRECTION);
1570}
1571
Michael Zuckermancb856772016-05-16 11:42:01 +00001572static __inline__ __m512d __DEFAULT_FN_ATTRS
1573_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
1574{
1575 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1576 (__v8df) __W,
1577 (__mmask8) __U,
1578 _MM_FROUND_CUR_DIRECTION);
1579}
1580
1581static __inline__ __m512d __DEFAULT_FN_ATTRS
1582_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
1583{
1584 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1585 (__v8df)
1586 _mm512_setzero_pd (),
1587 (__mmask8) __U,
1588 _MM_FROUND_CUR_DIRECTION);
1589}
1590
Craig Topperf3efec62016-06-08 06:08:07 +00001591#define _mm512_mask_sqrt_round_ps(W, U, A, R) __extension__ ({ \
1592 (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1593 (__v16sf)(__m512)(W), (__mmask16)(U), \
1594 (int)(R)); })
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001595
Craig Topperf3efec62016-06-08 06:08:07 +00001596#define _mm512_maskz_sqrt_round_ps(U, A, R) __extension__ ({ \
1597 (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1598 (__v16sf)_mm512_setzero_ps(), \
1599 (__mmask16)(U), (int)(R)); })
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001600
Craig Topperf3efec62016-06-08 06:08:07 +00001601#define _mm512_sqrt_round_ps(A, R) __extension__ ({ \
1602 (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1603 (__v16sf)_mm512_undefined_ps(), \
1604 (__mmask16)-1, (int)(R)); })
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001605
Michael Kupersteine45af542015-06-30 13:36:19 +00001606static __inline__ __m512 __DEFAULT_FN_ATTRS
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00001607_mm512_sqrt_ps(__m512 __a)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001608{
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00001609 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001610 (__v16sf) _mm512_setzero_ps (),
1611 (__mmask16) -1,
1612 _MM_FROUND_CUR_DIRECTION);
1613}
1614
Michael Zuckermancb856772016-05-16 11:42:01 +00001615static __inline__ __m512 __DEFAULT_FN_ATTRS
Asaf Badouh880f0c22016-06-13 15:15:57 +00001616_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
Michael Zuckermancb856772016-05-16 11:42:01 +00001617{
1618 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
1619 (__v16sf) __W,
1620 (__mmask16) __U,
1621 _MM_FROUND_CUR_DIRECTION);
1622}
1623
1624static __inline__ __m512 __DEFAULT_FN_ATTRS
Asaf Badouh880f0c22016-06-13 15:15:57 +00001625_mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
Michael Zuckermancb856772016-05-16 11:42:01 +00001626{
1627 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
1628 (__v16sf) _mm512_setzero_ps (),
1629 (__mmask16) __U,
1630 _MM_FROUND_CUR_DIRECTION);
1631}
1632
Michael Kupersteine45af542015-06-30 13:36:19 +00001633static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001634_mm512_rsqrt14_pd(__m512d __A)
1635{
1636 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1637 (__v8df)
1638 _mm512_setzero_pd (),
1639 (__mmask8) -1);}
1640
Michael Zuckermancb856772016-05-16 11:42:01 +00001641static __inline__ __m512d __DEFAULT_FN_ATTRS
1642_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1643{
1644 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1645 (__v8df) __W,
1646 (__mmask8) __U);
1647}
1648
1649static __inline__ __m512d __DEFAULT_FN_ATTRS
1650_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1651{
1652 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1653 (__v8df)
1654 _mm512_setzero_pd (),
1655 (__mmask8) __U);
1656}
1657
Michael Kupersteine45af542015-06-30 13:36:19 +00001658static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001659_mm512_rsqrt14_ps(__m512 __A)
1660{
1661 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1662 (__v16sf)
1663 _mm512_setzero_ps (),
1664 (__mmask16) -1);
1665}
1666
Michael Zuckermancb856772016-05-16 11:42:01 +00001667static __inline__ __m512 __DEFAULT_FN_ATTRS
1668_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1669{
1670 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1671 (__v16sf) __W,
1672 (__mmask16) __U);
1673}
1674
1675static __inline__ __m512 __DEFAULT_FN_ATTRS
1676_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1677{
1678 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1679 (__v16sf)
1680 _mm512_setzero_ps (),
1681 (__mmask16) __U);
1682}
1683
Michael Kupersteine45af542015-06-30 13:36:19 +00001684static __inline__ __m128 __DEFAULT_FN_ATTRS
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001685_mm_rsqrt14_ss(__m128 __A, __m128 __B)
1686{
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001687 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001688 (__v4sf) __B,
1689 (__v4sf)
1690 _mm_setzero_ps (),
1691 (__mmask8) -1);
1692}
1693
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001694static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00001695_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001696{
1697 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1698 (__v4sf) __B,
1699 (__v4sf) __W,
1700 (__mmask8) __U);
1701}
1702
1703static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00001704_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001705{
1706 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1707 (__v4sf) __B,
1708 (__v4sf) _mm_setzero_ps (),
1709 (__mmask8) __U);
1710}
1711
Michael Kupersteine45af542015-06-30 13:36:19 +00001712static __inline__ __m128d __DEFAULT_FN_ATTRS
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001713_mm_rsqrt14_sd(__m128d __A, __m128d __B)
1714{
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001715 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001716 (__v2df) __B,
1717 (__v2df)
1718 _mm_setzero_pd (),
1719 (__mmask8) -1);
1720}
1721
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001722static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00001723_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001724{
1725 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1726 (__v2df) __B,
1727 (__v2df) __W,
1728 (__mmask8) __U);
1729}
1730
1731static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00001732_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001733{
1734 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1735 (__v2df) __B,
1736 (__v2df) _mm_setzero_pd (),
1737 (__mmask8) __U);
1738}
1739
Michael Kupersteine45af542015-06-30 13:36:19 +00001740static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001741_mm512_rcp14_pd(__m512d __A)
1742{
1743 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1744 (__v8df)
1745 _mm512_setzero_pd (),
1746 (__mmask8) -1);
1747}
1748
Michael Zuckermancb856772016-05-16 11:42:01 +00001749static __inline__ __m512d __DEFAULT_FN_ATTRS
1750_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1751{
1752 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1753 (__v8df) __W,
1754 (__mmask8) __U);
1755}
1756
1757static __inline__ __m512d __DEFAULT_FN_ATTRS
1758_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1759{
1760 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1761 (__v8df)
1762 _mm512_setzero_pd (),
1763 (__mmask8) __U);
1764}
1765
Michael Kupersteine45af542015-06-30 13:36:19 +00001766static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001767_mm512_rcp14_ps(__m512 __A)
1768{
1769 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1770 (__v16sf)
1771 _mm512_setzero_ps (),
1772 (__mmask16) -1);
1773}
Michael Zuckermancb856772016-05-16 11:42:01 +00001774
1775static __inline__ __m512 __DEFAULT_FN_ATTRS
1776_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1777{
1778 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1779 (__v16sf) __W,
1780 (__mmask16) __U);
1781}
1782
1783static __inline__ __m512 __DEFAULT_FN_ATTRS
1784_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1785{
1786 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1787 (__v16sf)
1788 _mm512_setzero_ps (),
1789 (__mmask16) __U);
1790}
1791
Michael Kupersteine45af542015-06-30 13:36:19 +00001792static __inline__ __m128 __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00001793_mm_rcp14_ss(__m128 __A, __m128 __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001794{
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001795 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001796 (__v4sf) __B,
1797 (__v4sf)
1798 _mm_setzero_ps (),
1799 (__mmask8) -1);
1800}
1801
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001802static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00001803_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001804{
1805 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1806 (__v4sf) __B,
1807 (__v4sf) __W,
1808 (__mmask8) __U);
1809}
1810
1811static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00001812_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001813{
1814 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1815 (__v4sf) __B,
1816 (__v4sf) _mm_setzero_ps (),
1817 (__mmask8) __U);
1818}
1819
Michael Kupersteine45af542015-06-30 13:36:19 +00001820static __inline__ __m128d __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00001821_mm_rcp14_sd(__m128d __A, __m128d __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001822{
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001823 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001824 (__v2df) __B,
1825 (__v2df)
1826 _mm_setzero_pd (),
1827 (__mmask8) -1);
1828}
1829
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001830static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00001831_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001832{
1833 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1834 (__v2df) __B,
1835 (__v2df) __W,
1836 (__mmask8) __U);
1837}
1838
1839static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00001840_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001841{
1842 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1843 (__v2df) __B,
1844 (__v2df) _mm_setzero_pd (),
1845 (__mmask8) __U);
1846}
1847
Michael Kupersteine45af542015-06-30 13:36:19 +00001848static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001849_mm512_floor_ps(__m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001850{
Adam Nemet0d5bb552014-07-28 17:14:40 +00001851 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1852 _MM_FROUND_FLOOR,
1853 (__v16sf) __A, -1,
1854 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001855}
1856
Michael Zuckerman7360d8a2016-05-10 07:30:58 +00001857static __inline__ __m512 __DEFAULT_FN_ATTRS
1858_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1859{
1860 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1861 _MM_FROUND_FLOOR,
1862 (__v16sf) __W, __U,
1863 _MM_FROUND_CUR_DIRECTION);
1864}
1865
Michael Kupersteine45af542015-06-30 13:36:19 +00001866static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001867_mm512_floor_pd(__m512d __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001868{
Adam Nemet0d5bb552014-07-28 17:14:40 +00001869 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1870 _MM_FROUND_FLOOR,
1871 (__v8df) __A, -1,
1872 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001873}
1874
Michael Zuckerman7360d8a2016-05-10 07:30:58 +00001875static __inline__ __m512d __DEFAULT_FN_ATTRS
1876_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1877{
1878 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1879 _MM_FROUND_FLOOR,
1880 (__v8df) __W, __U,
1881 _MM_FROUND_CUR_DIRECTION);
1882}
1883
Michael Zuckerman7360d8a2016-05-10 07:30:58 +00001884static __inline__ __m512 __DEFAULT_FN_ATTRS
1885_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1886{
1887 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1888 _MM_FROUND_CEIL,
1889 (__v16sf) __W, __U,
1890 _MM_FROUND_CUR_DIRECTION);
1891}
1892
Michael Kupersteine45af542015-06-30 13:36:19 +00001893static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001894_mm512_ceil_ps(__m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001895{
Adam Nemet0d5bb552014-07-28 17:14:40 +00001896 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1897 _MM_FROUND_CEIL,
1898 (__v16sf) __A, -1,
1899 _MM_FROUND_CUR_DIRECTION);
1900}
1901
Michael Kupersteine45af542015-06-30 13:36:19 +00001902static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001903_mm512_ceil_pd(__m512d __A)
1904{
1905 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1906 _MM_FROUND_CEIL,
1907 (__v8df) __A, -1,
1908 _MM_FROUND_CUR_DIRECTION);
1909}
1910
Michael Zuckerman7360d8a2016-05-10 07:30:58 +00001911static __inline__ __m512d __DEFAULT_FN_ATTRS
1912_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1913{
1914 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1915 _MM_FROUND_CEIL,
1916 (__v8df) __W, __U,
1917 _MM_FROUND_CUR_DIRECTION);
1918}
1919
Michael Kupersteine45af542015-06-30 13:36:19 +00001920static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001921_mm512_abs_epi64(__m512i __A)
1922{
Craig Topperf2043b02018-05-23 04:51:54 +00001923 return (__m512i)__builtin_ia32_pabsq512((__v8di)__A);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001924}
1925
Michael Zuckermanbf05a452016-05-16 18:57:24 +00001926static __inline__ __m512i __DEFAULT_FN_ATTRS
1927_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
1928{
Craig Topperf2043b02018-05-23 04:51:54 +00001929 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1930 (__v8di)_mm512_abs_epi64(__A),
1931 (__v8di)__W);
Michael Zuckermanbf05a452016-05-16 18:57:24 +00001932}
1933
1934static __inline__ __m512i __DEFAULT_FN_ATTRS
1935_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
1936{
Craig Topperf2043b02018-05-23 04:51:54 +00001937 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1938 (__v8di)_mm512_abs_epi64(__A),
1939 (__v8di)_mm512_setzero_si512());
Michael Zuckermanbf05a452016-05-16 18:57:24 +00001940}
1941
Michael Kupersteine45af542015-06-30 13:36:19 +00001942static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001943_mm512_abs_epi32(__m512i __A)
1944{
Craig Topperf2043b02018-05-23 04:51:54 +00001945 return (__m512i)__builtin_ia32_pabsd512((__v16si) __A);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001946}
1947
Michael Zuckermanbf05a452016-05-16 18:57:24 +00001948static __inline__ __m512i __DEFAULT_FN_ATTRS
1949_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
1950{
Craig Topper26df8c42018-05-24 17:32:49 +00001951 return (__m512i)__builtin_ia32_selectd_512(__U,
Craig Topperf2043b02018-05-23 04:51:54 +00001952 (__v16si)_mm512_abs_epi32(__A),
1953 (__v16si)__W);
Michael Zuckermanbf05a452016-05-16 18:57:24 +00001954}
1955
1956static __inline__ __m512i __DEFAULT_FN_ATTRS
1957_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
1958{
Craig Topper26df8c42018-05-24 17:32:49 +00001959 return (__m512i)__builtin_ia32_selectd_512(__U,
Craig Topperf2043b02018-05-23 04:51:54 +00001960 (__v16si)_mm512_abs_epi32(__A),
1961 (__v16si)_mm512_setzero_si512());
Michael Zuckermanbf05a452016-05-16 18:57:24 +00001962}
1963
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001964static __inline__ __m128 __DEFAULT_FN_ATTRS
1965_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001966 return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001967 (__v4sf) __B,
1968 (__v4sf) __W,
1969 (__mmask8) __U,
1970 _MM_FROUND_CUR_DIRECTION);
1971}
1972
1973static __inline__ __m128 __DEFAULT_FN_ATTRS
1974_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001975 return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001976 (__v4sf) __B,
1977 (__v4sf) _mm_setzero_ps (),
1978 (__mmask8) __U,
1979 _MM_FROUND_CUR_DIRECTION);
1980}
1981
Craig Topper8c18e112016-05-17 04:41:50 +00001982#define _mm_add_round_ss(A, B, R) __extension__ ({ \
1983 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1984 (__v4sf)(__m128)(B), \
1985 (__v4sf)_mm_setzero_ps(), \
1986 (__mmask8)-1, (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001987
Craig Topper8c18e112016-05-17 04:41:50 +00001988#define _mm_mask_add_round_ss(W, U, A, B, R) __extension__ ({ \
1989 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1990 (__v4sf)(__m128)(B), \
1991 (__v4sf)(__m128)(W), (__mmask8)(U), \
1992 (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001993
Craig Topper8c18e112016-05-17 04:41:50 +00001994#define _mm_maskz_add_round_ss(U, A, B, R) __extension__ ({ \
1995 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1996 (__v4sf)(__m128)(B), \
1997 (__v4sf)_mm_setzero_ps(), \
1998 (__mmask8)(U), (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001999
2000static __inline__ __m128d __DEFAULT_FN_ATTRS
2001_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002002 return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002003 (__v2df) __B,
2004 (__v2df) __W,
2005 (__mmask8) __U,
2006 _MM_FROUND_CUR_DIRECTION);
2007}
2008
2009static __inline__ __m128d __DEFAULT_FN_ATTRS
2010_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002011 return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002012 (__v2df) __B,
2013 (__v2df) _mm_setzero_pd (),
2014 (__mmask8) __U,
2015 _MM_FROUND_CUR_DIRECTION);
2016}
Craig Topper8c18e112016-05-17 04:41:50 +00002017#define _mm_add_round_sd(A, B, R) __extension__ ({ \
2018 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
2019 (__v2df)(__m128d)(B), \
2020 (__v2df)_mm_setzero_pd(), \
2021 (__mmask8)-1, (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002022
Craig Topper8c18e112016-05-17 04:41:50 +00002023#define _mm_mask_add_round_sd(W, U, A, B, R) __extension__ ({ \
2024 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
2025 (__v2df)(__m128d)(B), \
2026 (__v2df)(__m128d)(W), \
2027 (__mmask8)(U), (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002028
Craig Topper8c18e112016-05-17 04:41:50 +00002029#define _mm_maskz_add_round_sd(U, A, B, R) __extension__ ({ \
2030 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
2031 (__v2df)(__m128d)(B), \
2032 (__v2df)_mm_setzero_pd(), \
2033 (__mmask8)(U), (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002034
Asaf Badouhffeb6242015-07-21 15:27:28 +00002035static __inline__ __m512d __DEFAULT_FN_ATTRS
2036_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002037 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2038 (__v8df)_mm512_add_pd(__A, __B),
2039 (__v8df)__W);
Asaf Badouhffeb6242015-07-21 15:27:28 +00002040}
2041
2042static __inline__ __m512d __DEFAULT_FN_ATTRS
2043_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002044 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2045 (__v8df)_mm512_add_pd(__A, __B),
2046 (__v8df)_mm512_setzero_pd());
Asaf Badouhffeb6242015-07-21 15:27:28 +00002047}
2048
2049static __inline__ __m512 __DEFAULT_FN_ATTRS
2050_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002051 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2052 (__v16sf)_mm512_add_ps(__A, __B),
2053 (__v16sf)__W);
Asaf Badouhffeb6242015-07-21 15:27:28 +00002054}
2055
2056static __inline__ __m512 __DEFAULT_FN_ATTRS
2057_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002058 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2059 (__v16sf)_mm512_add_ps(__A, __B),
2060 (__v16sf)_mm512_setzero_ps());
Asaf Badouhffeb6242015-07-21 15:27:28 +00002061}
2062
Craig Topper8c18e112016-05-17 04:41:50 +00002063#define _mm512_add_round_pd(A, B, R) __extension__ ({ \
2064 (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2065 (__v8df)(__m512d)(B), \
2066 (__v8df)_mm512_setzero_pd(), \
2067 (__mmask8)-1, (int)(R)); })
Asaf Badouhffeb6242015-07-21 15:27:28 +00002068
Craig Topper8c18e112016-05-17 04:41:50 +00002069#define _mm512_mask_add_round_pd(W, U, A, B, R) __extension__ ({ \
2070 (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2071 (__v8df)(__m512d)(B), \
2072 (__v8df)(__m512d)(W), (__mmask8)(U), \
2073 (int)(R)); })
Asaf Badouhffeb6242015-07-21 15:27:28 +00002074
Craig Topper8c18e112016-05-17 04:41:50 +00002075#define _mm512_maskz_add_round_pd(U, A, B, R) __extension__ ({ \
2076 (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2077 (__v8df)(__m512d)(B), \
2078 (__v8df)_mm512_setzero_pd(), \
2079 (__mmask8)(U), (int)(R)); })
Asaf Badouhffeb6242015-07-21 15:27:28 +00002080
Craig Topper8c18e112016-05-17 04:41:50 +00002081#define _mm512_add_round_ps(A, B, R) __extension__ ({ \
2082 (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2083 (__v16sf)(__m512)(B), \
2084 (__v16sf)_mm512_setzero_ps(), \
2085 (__mmask16)-1, (int)(R)); })
Asaf Badouhffeb6242015-07-21 15:27:28 +00002086
Craig Topper8c18e112016-05-17 04:41:50 +00002087#define _mm512_mask_add_round_ps(W, U, A, B, R) __extension__ ({ \
2088 (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2089 (__v16sf)(__m512)(B), \
2090 (__v16sf)(__m512)(W), (__mmask16)(U), \
2091 (int)(R)); })
Asaf Badouhffeb6242015-07-21 15:27:28 +00002092
Craig Topper8c18e112016-05-17 04:41:50 +00002093#define _mm512_maskz_add_round_ps(U, A, B, R) __extension__ ({ \
2094 (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2095 (__v16sf)(__m512)(B), \
2096 (__v16sf)_mm512_setzero_ps(), \
2097 (__mmask16)(U), (int)(R)); })
Asaf Badouhffeb6242015-07-21 15:27:28 +00002098
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002099static __inline__ __m128 __DEFAULT_FN_ATTRS
2100_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002101 return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002102 (__v4sf) __B,
2103 (__v4sf) __W,
2104 (__mmask8) __U,
2105 _MM_FROUND_CUR_DIRECTION);
2106}
2107
2108static __inline__ __m128 __DEFAULT_FN_ATTRS
2109_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002110 return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002111 (__v4sf) __B,
2112 (__v4sf) _mm_setzero_ps (),
2113 (__mmask8) __U,
2114 _MM_FROUND_CUR_DIRECTION);
2115}
Craig Topper8c18e112016-05-17 04:41:50 +00002116#define _mm_sub_round_ss(A, B, R) __extension__ ({ \
2117 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2118 (__v4sf)(__m128)(B), \
2119 (__v4sf)_mm_setzero_ps(), \
2120 (__mmask8)-1, (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002121
Craig Topper8c18e112016-05-17 04:41:50 +00002122#define _mm_mask_sub_round_ss(W, U, A, B, R) __extension__ ({ \
2123 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2124 (__v4sf)(__m128)(B), \
2125 (__v4sf)(__m128)(W), (__mmask8)(U), \
2126 (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002127
Craig Topper8c18e112016-05-17 04:41:50 +00002128#define _mm_maskz_sub_round_ss(U, A, B, R) __extension__ ({ \
2129 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2130 (__v4sf)(__m128)(B), \
2131 (__v4sf)_mm_setzero_ps(), \
2132 (__mmask8)(U), (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002133
2134static __inline__ __m128d __DEFAULT_FN_ATTRS
2135_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002136 return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002137 (__v2df) __B,
2138 (__v2df) __W,
2139 (__mmask8) __U,
2140 _MM_FROUND_CUR_DIRECTION);
2141}
2142
2143static __inline__ __m128d __DEFAULT_FN_ATTRS
2144_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002145 return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002146 (__v2df) __B,
2147 (__v2df) _mm_setzero_pd (),
2148 (__mmask8) __U,
2149 _MM_FROUND_CUR_DIRECTION);
2150}
2151
Craig Topper8c18e112016-05-17 04:41:50 +00002152#define _mm_sub_round_sd(A, B, R) __extension__ ({ \
2153 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2154 (__v2df)(__m128d)(B), \
2155 (__v2df)_mm_setzero_pd(), \
2156 (__mmask8)-1, (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002157
Craig Topper8c18e112016-05-17 04:41:50 +00002158#define _mm_mask_sub_round_sd(W, U, A, B, R) __extension__ ({ \
2159 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2160 (__v2df)(__m128d)(B), \
2161 (__v2df)(__m128d)(W), \
2162 (__mmask8)(U), (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002163
Craig Topper8c18e112016-05-17 04:41:50 +00002164#define _mm_maskz_sub_round_sd(U, A, B, R) __extension__ ({ \
2165 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2166 (__v2df)(__m128d)(B), \
2167 (__v2df)_mm_setzero_pd(), \
2168 (__mmask8)(U), (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002169
Asaf Badouhffeb6242015-07-21 15:27:28 +00002170static __inline__ __m512d __DEFAULT_FN_ATTRS
2171_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002172 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2173 (__v8df)_mm512_sub_pd(__A, __B),
2174 (__v8df)__W);
Asaf Badouhffeb6242015-07-21 15:27:28 +00002175}
2176
2177static __inline__ __m512d __DEFAULT_FN_ATTRS
2178_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002179 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2180 (__v8df)_mm512_sub_pd(__A, __B),
2181 (__v8df)_mm512_setzero_pd());
Asaf Badouhffeb6242015-07-21 15:27:28 +00002182}
2183
2184static __inline__ __m512 __DEFAULT_FN_ATTRS
2185_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002186 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2187 (__v16sf)_mm512_sub_ps(__A, __B),
2188 (__v16sf)__W);
Asaf Badouhffeb6242015-07-21 15:27:28 +00002189}
2190
2191static __inline__ __m512 __DEFAULT_FN_ATTRS
2192_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002193 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2194 (__v16sf)_mm512_sub_ps(__A, __B),
2195 (__v16sf)_mm512_setzero_ps());
Asaf Badouhffeb6242015-07-21 15:27:28 +00002196}
2197
Craig Topper8c18e112016-05-17 04:41:50 +00002198#define _mm512_sub_round_pd(A, B, R) __extension__ ({ \
2199 (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2200 (__v8df)(__m512d)(B), \
2201 (__v8df)_mm512_setzero_pd(), \
2202 (__mmask8)-1, (int)(R)); })
Asaf Badouhffeb6242015-07-21 15:27:28 +00002203
Craig Topper8c18e112016-05-17 04:41:50 +00002204#define _mm512_mask_sub_round_pd(W, U, A, B, R) __extension__ ({ \
2205 (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2206 (__v8df)(__m512d)(B), \
2207 (__v8df)(__m512d)(W), (__mmask8)(U), \
2208 (int)(R)); })
Asaf Badouhffeb6242015-07-21 15:27:28 +00002209
Craig Topper8c18e112016-05-17 04:41:50 +00002210#define _mm512_maskz_sub_round_pd(U, A, B, R) __extension__ ({ \
2211 (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2212 (__v8df)(__m512d)(B), \
2213 (__v8df)_mm512_setzero_pd(), \
2214 (__mmask8)(U), (int)(R)); })
Asaf Badouhffeb6242015-07-21 15:27:28 +00002215
Craig Topper8c18e112016-05-17 04:41:50 +00002216#define _mm512_sub_round_ps(A, B, R) __extension__ ({ \
2217 (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2218 (__v16sf)(__m512)(B), \
2219 (__v16sf)_mm512_setzero_ps(), \
2220 (__mmask16)-1, (int)(R)); })
Asaf Badouhffeb6242015-07-21 15:27:28 +00002221
Craig Topper8c18e112016-05-17 04:41:50 +00002222#define _mm512_mask_sub_round_ps(W, U, A, B, R) __extension__ ({ \
2223 (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2224 (__v16sf)(__m512)(B), \
2225 (__v16sf)(__m512)(W), (__mmask16)(U), \
2226 (int)(R)); });
Asaf Badouhffeb6242015-07-21 15:27:28 +00002227
Craig Topper8c18e112016-05-17 04:41:50 +00002228#define _mm512_maskz_sub_round_ps(U, A, B, R) __extension__ ({ \
2229 (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2230 (__v16sf)(__m512)(B), \
2231 (__v16sf)_mm512_setzero_ps(), \
2232 (__mmask16)(U), (int)(R)); });
Asaf Badouhffeb6242015-07-21 15:27:28 +00002233
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002234static __inline__ __m128 __DEFAULT_FN_ATTRS
2235_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002236 return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002237 (__v4sf) __B,
2238 (__v4sf) __W,
2239 (__mmask8) __U,
2240 _MM_FROUND_CUR_DIRECTION);
2241}
2242
2243static __inline__ __m128 __DEFAULT_FN_ATTRS
2244_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002245 return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002246 (__v4sf) __B,
2247 (__v4sf) _mm_setzero_ps (),
2248 (__mmask8) __U,
2249 _MM_FROUND_CUR_DIRECTION);
2250}
Craig Topper8c18e112016-05-17 04:41:50 +00002251#define _mm_mul_round_ss(A, B, R) __extension__ ({ \
2252 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2253 (__v4sf)(__m128)(B), \
2254 (__v4sf)_mm_setzero_ps(), \
2255 (__mmask8)-1, (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002256
Craig Topper8c18e112016-05-17 04:41:50 +00002257#define _mm_mask_mul_round_ss(W, U, A, B, R) __extension__ ({ \
2258 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2259 (__v4sf)(__m128)(B), \
2260 (__v4sf)(__m128)(W), (__mmask8)(U), \
2261 (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002262
Craig Topper8c18e112016-05-17 04:41:50 +00002263#define _mm_maskz_mul_round_ss(U, A, B, R) __extension__ ({ \
2264 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2265 (__v4sf)(__m128)(B), \
2266 (__v4sf)_mm_setzero_ps(), \
2267 (__mmask8)(U), (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002268
2269static __inline__ __m128d __DEFAULT_FN_ATTRS
2270_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002271 return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002272 (__v2df) __B,
2273 (__v2df) __W,
2274 (__mmask8) __U,
2275 _MM_FROUND_CUR_DIRECTION);
2276}
2277
2278static __inline__ __m128d __DEFAULT_FN_ATTRS
2279_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002280 return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002281 (__v2df) __B,
2282 (__v2df) _mm_setzero_pd (),
2283 (__mmask8) __U,
2284 _MM_FROUND_CUR_DIRECTION);
2285}
2286
Craig Topper8c18e112016-05-17 04:41:50 +00002287#define _mm_mul_round_sd(A, B, R) __extension__ ({ \
2288 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2289 (__v2df)(__m128d)(B), \
2290 (__v2df)_mm_setzero_pd(), \
2291 (__mmask8)-1, (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002292
Craig Topper8c18e112016-05-17 04:41:50 +00002293#define _mm_mask_mul_round_sd(W, U, A, B, R) __extension__ ({ \
2294 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2295 (__v2df)(__m128d)(B), \
2296 (__v2df)(__m128d)(W), \
2297 (__mmask8)(U), (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002298
Craig Topper8c18e112016-05-17 04:41:50 +00002299#define _mm_maskz_mul_round_sd(U, A, B, R) __extension__ ({ \
2300 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2301 (__v2df)(__m128d)(B), \
2302 (__v2df)_mm_setzero_pd(), \
2303 (__mmask8)(U), (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002304
Asaf Badouhffeb6242015-07-21 15:27:28 +00002305static __inline__ __m512d __DEFAULT_FN_ATTRS
2306_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002307 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2308 (__v8df)_mm512_mul_pd(__A, __B),
2309 (__v8df)__W);
Asaf Badouhffeb6242015-07-21 15:27:28 +00002310}
2311
2312static __inline__ __m512d __DEFAULT_FN_ATTRS
2313_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002314 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2315 (__v8df)_mm512_mul_pd(__A, __B),
2316 (__v8df)_mm512_setzero_pd());
Asaf Badouhffeb6242015-07-21 15:27:28 +00002317}
2318
2319static __inline__ __m512 __DEFAULT_FN_ATTRS
2320_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002321 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2322 (__v16sf)_mm512_mul_ps(__A, __B),
2323 (__v16sf)__W);
Asaf Badouhffeb6242015-07-21 15:27:28 +00002324}
2325
2326static __inline__ __m512 __DEFAULT_FN_ATTRS
2327_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002328 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2329 (__v16sf)_mm512_mul_ps(__A, __B),
2330 (__v16sf)_mm512_setzero_ps());
Asaf Badouhffeb6242015-07-21 15:27:28 +00002331}
2332
Craig Topper8c18e112016-05-17 04:41:50 +00002333#define _mm512_mul_round_pd(A, B, R) __extension__ ({ \
2334 (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2335 (__v8df)(__m512d)(B), \
2336 (__v8df)_mm512_setzero_pd(), \
2337 (__mmask8)-1, (int)(R)); })
Asaf Badouhffeb6242015-07-21 15:27:28 +00002338
Craig Topper8c18e112016-05-17 04:41:50 +00002339#define _mm512_mask_mul_round_pd(W, U, A, B, R) __extension__ ({ \
2340 (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2341 (__v8df)(__m512d)(B), \
2342 (__v8df)(__m512d)(W), (__mmask8)(U), \
2343 (int)(R)); })
Asaf Badouhffeb6242015-07-21 15:27:28 +00002344
Craig Topper8c18e112016-05-17 04:41:50 +00002345#define _mm512_maskz_mul_round_pd(U, A, B, R) __extension__ ({ \
2346 (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2347 (__v8df)(__m512d)(B), \
2348 (__v8df)_mm512_setzero_pd(), \
2349 (__mmask8)(U), (int)(R)); })
Asaf Badouhffeb6242015-07-21 15:27:28 +00002350
Craig Topper8c18e112016-05-17 04:41:50 +00002351#define _mm512_mul_round_ps(A, B, R) __extension__ ({ \
2352 (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2353 (__v16sf)(__m512)(B), \
2354 (__v16sf)_mm512_setzero_ps(), \
2355 (__mmask16)-1, (int)(R)); })
Asaf Badouhffeb6242015-07-21 15:27:28 +00002356
Craig Topper8c18e112016-05-17 04:41:50 +00002357#define _mm512_mask_mul_round_ps(W, U, A, B, R) __extension__ ({ \
2358 (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2359 (__v16sf)(__m512)(B), \
2360 (__v16sf)(__m512)(W), (__mmask16)(U), \
2361 (int)(R)); });
Asaf Badouhffeb6242015-07-21 15:27:28 +00002362
Craig Topper8c18e112016-05-17 04:41:50 +00002363#define _mm512_maskz_mul_round_ps(U, A, B, R) __extension__ ({ \
2364 (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2365 (__v16sf)(__m512)(B), \
2366 (__v16sf)_mm512_setzero_ps(), \
2367 (__mmask16)(U), (int)(R)); });
Asaf Badouhffeb6242015-07-21 15:27:28 +00002368
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002369static __inline__ __m128 __DEFAULT_FN_ATTRS
2370_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002371 return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002372 (__v4sf) __B,
2373 (__v4sf) __W,
2374 (__mmask8) __U,
2375 _MM_FROUND_CUR_DIRECTION);
2376}
2377
2378static __inline__ __m128 __DEFAULT_FN_ATTRS
2379_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002380 return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002381 (__v4sf) __B,
2382 (__v4sf) _mm_setzero_ps (),
2383 (__mmask8) __U,
2384 _MM_FROUND_CUR_DIRECTION);
2385}
2386
Craig Topper8c18e112016-05-17 04:41:50 +00002387#define _mm_div_round_ss(A, B, R) __extension__ ({ \
2388 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2389 (__v4sf)(__m128)(B), \
2390 (__v4sf)_mm_setzero_ps(), \
2391 (__mmask8)-1, (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002392
Craig Topper8c18e112016-05-17 04:41:50 +00002393#define _mm_mask_div_round_ss(W, U, A, B, R) __extension__ ({ \
2394 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2395 (__v4sf)(__m128)(B), \
2396 (__v4sf)(__m128)(W), (__mmask8)(U), \
2397 (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002398
Craig Topper8c18e112016-05-17 04:41:50 +00002399#define _mm_maskz_div_round_ss(U, A, B, R) __extension__ ({ \
2400 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2401 (__v4sf)(__m128)(B), \
2402 (__v4sf)_mm_setzero_ps(), \
2403 (__mmask8)(U), (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002404
2405static __inline__ __m128d __DEFAULT_FN_ATTRS
2406_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002407 return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002408 (__v2df) __B,
2409 (__v2df) __W,
2410 (__mmask8) __U,
2411 _MM_FROUND_CUR_DIRECTION);
2412}
2413
2414static __inline__ __m128d __DEFAULT_FN_ATTRS
2415_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002416 return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002417 (__v2df) __B,
2418 (__v2df) _mm_setzero_pd (),
2419 (__mmask8) __U,
2420 _MM_FROUND_CUR_DIRECTION);
2421}
2422
Craig Topper8c18e112016-05-17 04:41:50 +00002423#define _mm_div_round_sd(A, B, R) __extension__ ({ \
2424 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2425 (__v2df)(__m128d)(B), \
2426 (__v2df)_mm_setzero_pd(), \
2427 (__mmask8)-1, (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002428
Craig Topper8c18e112016-05-17 04:41:50 +00002429#define _mm_mask_div_round_sd(W, U, A, B, R) __extension__ ({ \
2430 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2431 (__v2df)(__m128d)(B), \
2432 (__v2df)(__m128d)(W), \
2433 (__mmask8)(U), (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002434
Craig Topper8c18e112016-05-17 04:41:50 +00002435#define _mm_maskz_div_round_sd(U, A, B, R) __extension__ ({ \
2436 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2437 (__v2df)(__m128d)(B), \
2438 (__v2df)_mm_setzero_pd(), \
2439 (__mmask8)(U), (int)(R)); })
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002440
Michael Zuckerman223676d2016-06-14 12:38:58 +00002441static __inline __m512d __DEFAULT_FN_ATTRS
2442_mm512_div_pd(__m512d __a, __m512d __b)
2443{
2444 return (__m512d)((__v8df)__a/(__v8df)__b);
2445}
2446
Asaf Badouhffeb6242015-07-21 15:27:28 +00002447static __inline__ __m512d __DEFAULT_FN_ATTRS
2448_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002449 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2450 (__v8df)_mm512_div_pd(__A, __B),
2451 (__v8df)__W);
Asaf Badouhffeb6242015-07-21 15:27:28 +00002452}
2453
2454static __inline__ __m512d __DEFAULT_FN_ATTRS
2455_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002456 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2457 (__v8df)_mm512_div_pd(__A, __B),
2458 (__v8df)_mm512_setzero_pd());
Asaf Badouhffeb6242015-07-21 15:27:28 +00002459}
2460
Michael Zuckerman223676d2016-06-14 12:38:58 +00002461static __inline __m512 __DEFAULT_FN_ATTRS
2462_mm512_div_ps(__m512 __a, __m512 __b)
2463{
2464 return (__m512)((__v16sf)__a/(__v16sf)__b);
2465}
2466
Asaf Badouhffeb6242015-07-21 15:27:28 +00002467static __inline__ __m512 __DEFAULT_FN_ATTRS
2468_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002469 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2470 (__v16sf)_mm512_div_ps(__A, __B),
2471 (__v16sf)__W);
Asaf Badouhffeb6242015-07-21 15:27:28 +00002472}
2473
2474static __inline__ __m512 __DEFAULT_FN_ATTRS
2475_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002476 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2477 (__v16sf)_mm512_div_ps(__A, __B),
2478 (__v16sf)_mm512_setzero_ps());
Asaf Badouhffeb6242015-07-21 15:27:28 +00002479}
2480
Craig Topper8c18e112016-05-17 04:41:50 +00002481#define _mm512_div_round_pd(A, B, R) __extension__ ({ \
2482 (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2483 (__v8df)(__m512d)(B), \
2484 (__v8df)_mm512_setzero_pd(), \
2485 (__mmask8)-1, (int)(R)); })
Asaf Badouhffeb6242015-07-21 15:27:28 +00002486
Craig Topper8c18e112016-05-17 04:41:50 +00002487#define _mm512_mask_div_round_pd(W, U, A, B, R) __extension__ ({ \
2488 (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2489 (__v8df)(__m512d)(B), \
2490 (__v8df)(__m512d)(W), (__mmask8)(U), \
2491 (int)(R)); })
Asaf Badouhffeb6242015-07-21 15:27:28 +00002492
Craig Topper8c18e112016-05-17 04:41:50 +00002493#define _mm512_maskz_div_round_pd(U, A, B, R) __extension__ ({ \
2494 (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2495 (__v8df)(__m512d)(B), \
2496 (__v8df)_mm512_setzero_pd(), \
2497 (__mmask8)(U), (int)(R)); })
Asaf Badouhffeb6242015-07-21 15:27:28 +00002498
Craig Topper8c18e112016-05-17 04:41:50 +00002499#define _mm512_div_round_ps(A, B, R) __extension__ ({ \
2500 (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2501 (__v16sf)(__m512)(B), \
2502 (__v16sf)_mm512_setzero_ps(), \
2503 (__mmask16)-1, (int)(R)); })
Asaf Badouhffeb6242015-07-21 15:27:28 +00002504
Craig Topper8c18e112016-05-17 04:41:50 +00002505#define _mm512_mask_div_round_ps(W, U, A, B, R) __extension__ ({ \
2506 (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2507 (__v16sf)(__m512)(B), \
2508 (__v16sf)(__m512)(W), (__mmask16)(U), \
2509 (int)(R)); });
Asaf Badouhffeb6242015-07-21 15:27:28 +00002510
Craig Topper8c18e112016-05-17 04:41:50 +00002511#define _mm512_maskz_div_round_ps(U, A, B, R) __extension__ ({ \
2512 (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2513 (__v16sf)(__m512)(B), \
2514 (__v16sf)_mm512_setzero_ps(), \
2515 (__mmask16)(U), (int)(R)); });
Asaf Badouhffeb6242015-07-21 15:27:28 +00002516
Craig Topper72c7d512015-02-01 07:35:35 +00002517#define _mm512_roundscale_ps(A, B) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00002518 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2519 (__v16sf)(__m512)(A), (__mmask16)-1, \
2520 _MM_FROUND_CUR_DIRECTION); })
Craig Topper72c7d512015-02-01 07:35:35 +00002521
Craig Topper8c18e112016-05-17 04:41:50 +00002522#define _mm512_mask_roundscale_ps(A, B, C, imm) __extension__ ({\
2523 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2524 (__v16sf)(__m512)(A), (__mmask16)(B), \
2525 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckerman7360d8a2016-05-10 07:30:58 +00002526
Craig Topper8c18e112016-05-17 04:41:50 +00002527#define _mm512_maskz_roundscale_ps(A, B, imm) __extension__ ({\
2528 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2529 (__v16sf)_mm512_setzero_ps(), \
2530 (__mmask16)(A), \
2531 _MM_FROUND_CUR_DIRECTION); })
2532
Craig Topperf3efec62016-06-08 06:08:07 +00002533#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) __extension__ ({ \
2534 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2535 (__v16sf)(__m512)(A), (__mmask16)(B), \
2536 (int)(R)); })
Michael Zuckermanc301c192016-06-01 07:35:44 +00002537
Craig Topperf3efec62016-06-08 06:08:07 +00002538#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) __extension__ ({ \
2539 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2540 (__v16sf)_mm512_setzero_ps(), \
2541 (__mmask16)(A), (int)(R)); })
Michael Zuckermanc301c192016-06-01 07:35:44 +00002542
Craig Topperf3efec62016-06-08 06:08:07 +00002543#define _mm512_roundscale_round_ps(A, imm, R) __extension__ ({ \
2544 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2545 (__v16sf)_mm512_undefined_ps(), \
2546 (__mmask16)-1, (int)(R)); })
Michael Zuckermanc301c192016-06-01 07:35:44 +00002547
Craig Topper72c7d512015-02-01 07:35:35 +00002548#define _mm512_roundscale_pd(A, B) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00002549 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2550 (__v8df)(__m512d)(A), (__mmask8)-1, \
2551 _MM_FROUND_CUR_DIRECTION); })
Adam Nemet0d5bb552014-07-28 17:14:40 +00002552
Craig Topper8c18e112016-05-17 04:41:50 +00002553#define _mm512_mask_roundscale_pd(A, B, C, imm) __extension__ ({\
2554 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2555 (__v8df)(__m512d)(A), (__mmask8)(B), \
2556 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckerman7360d8a2016-05-10 07:30:58 +00002557
Craig Topper8c18e112016-05-17 04:41:50 +00002558#define _mm512_maskz_roundscale_pd(A, B, imm) __extension__ ({\
2559 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2560 (__v8df)_mm512_setzero_pd(), \
2561 (__mmask8)(A), \
2562 _MM_FROUND_CUR_DIRECTION); })
2563
Craig Topperf3efec62016-06-08 06:08:07 +00002564#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) __extension__ ({ \
2565 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2566 (__v8df)(__m512d)(A), (__mmask8)(B), \
2567 (int)(R)); })
Michael Zuckermanc301c192016-06-01 07:35:44 +00002568
Craig Topperf3efec62016-06-08 06:08:07 +00002569#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) __extension__ ({ \
2570 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2571 (__v8df)_mm512_setzero_pd(), \
2572 (__mmask8)(A), (int)(R)); })
Michael Zuckermanc301c192016-06-01 07:35:44 +00002573
Craig Topperf3efec62016-06-08 06:08:07 +00002574#define _mm512_roundscale_round_pd(A, imm, R) __extension__ ({ \
2575 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2576 (__v8df)_mm512_undefined_pd(), \
2577 (__mmask8)-1, (int)(R)); })
Michael Zuckermanc301c192016-06-01 07:35:44 +00002578
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002579#define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00002580 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2581 (__v8df)(__m512d)(B), \
2582 (__v8df)(__m512d)(C), (__mmask8)-1, \
2583 (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002584
2585
2586#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00002587 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2588 (__v8df)(__m512d)(B), \
2589 (__v8df)(__m512d)(C), \
2590 (__mmask8)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002591
2592
2593#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00002594 (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2595 (__v8df)(__m512d)(B), \
2596 (__v8df)(__m512d)(C), \
2597 (__mmask8)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002598
2599
2600#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00002601 (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2602 (__v8df)(__m512d)(B), \
2603 (__v8df)(__m512d)(C), \
2604 (__mmask8)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002605
2606
2607#define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00002608 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2609 (__v8df)(__m512d)(B), \
2610 -(__v8df)(__m512d)(C), \
2611 (__mmask8)-1, (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002612
2613
2614#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00002615 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2616 (__v8df)(__m512d)(B), \
2617 -(__v8df)(__m512d)(C), \
2618 (__mmask8)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002619
2620
2621#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00002622 (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2623 (__v8df)(__m512d)(B), \
2624 -(__v8df)(__m512d)(C), \
2625 (__mmask8)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002626
2627
2628#define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00002629 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2630 (__v8df)(__m512d)(B), \
2631 (__v8df)(__m512d)(C), (__mmask8)-1, \
2632 (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002633
2634
2635#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00002636 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2637 (__v8df)(__m512d)(B), \
2638 (__v8df)(__m512d)(C), \
2639 (__mmask8)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002640
2641
2642#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00002643 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2644 (__v8df)(__m512d)(B), \
2645 (__v8df)(__m512d)(C), \
2646 (__mmask8)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002647
2648
2649#define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00002650 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2651 (__v8df)(__m512d)(B), \
2652 -(__v8df)(__m512d)(C), \
2653 (__mmask8)-1, (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002654
2655
2656#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00002657 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2658 (__v8df)(__m512d)(B), \
2659 -(__v8df)(__m512d)(C), \
2660 (__mmask8)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002661
2662
Michael Kupersteine45af542015-06-30 13:36:19 +00002663static __inline__ __m512d __DEFAULT_FN_ATTRS
Adam Nemet2278fcb2014-08-14 17:17:57 +00002664_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2665{
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002666 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2667 (__v8df) __B,
2668 (__v8df) __C,
2669 (__mmask8) -1,
2670 _MM_FROUND_CUR_DIRECTION);
2671}
2672
Michael Kupersteine45af542015-06-30 13:36:19 +00002673static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002674_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2675{
2676 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2677 (__v8df) __B,
2678 (__v8df) __C,
2679 (__mmask8) __U,
2680 _MM_FROUND_CUR_DIRECTION);
2681}
2682
Michael Kupersteine45af542015-06-30 13:36:19 +00002683static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002684_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2685{
2686 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2687 (__v8df) __B,
2688 (__v8df) __C,
2689 (__mmask8) __U,
2690 _MM_FROUND_CUR_DIRECTION);
2691}
2692
Michael Kupersteine45af542015-06-30 13:36:19 +00002693static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002694_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2695{
2696 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2697 (__v8df) __B,
2698 (__v8df) __C,
2699 (__mmask8) __U,
2700 _MM_FROUND_CUR_DIRECTION);
Adam Nemet2278fcb2014-08-14 17:17:57 +00002701}
2702
Michael Kupersteine45af542015-06-30 13:36:19 +00002703static __inline__ __m512d __DEFAULT_FN_ATTRS
Adam Nemet2278fcb2014-08-14 17:17:57 +00002704_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2705{
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002706 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2707 (__v8df) __B,
2708 -(__v8df) __C,
2709 (__mmask8) -1,
2710 _MM_FROUND_CUR_DIRECTION);
2711}
2712
Michael Kupersteine45af542015-06-30 13:36:19 +00002713static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002714_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2715{
2716 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2717 (__v8df) __B,
2718 -(__v8df) __C,
2719 (__mmask8) __U,
2720 _MM_FROUND_CUR_DIRECTION);
2721}
2722
Michael Kupersteine45af542015-06-30 13:36:19 +00002723static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002724_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2725{
2726 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2727 (__v8df) __B,
2728 -(__v8df) __C,
2729 (__mmask8) __U,
2730 _MM_FROUND_CUR_DIRECTION);
Adam Nemet2278fcb2014-08-14 17:17:57 +00002731}
2732
Michael Kupersteine45af542015-06-30 13:36:19 +00002733static __inline__ __m512d __DEFAULT_FN_ATTRS
Adam Nemet2278fcb2014-08-14 17:17:57 +00002734_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2735{
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002736 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
2737 (__v8df) __B,
2738 (__v8df) __C,
2739 (__mmask8) -1,
2740 _MM_FROUND_CUR_DIRECTION);
Adam Nemet2278fcb2014-08-14 17:17:57 +00002741}
2742
Michael Kupersteine45af542015-06-30 13:36:19 +00002743static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002744_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2745{
2746 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
2747 (__v8df) __B,
2748 (__v8df) __C,
2749 (__mmask8) __U,
2750 _MM_FROUND_CUR_DIRECTION);
2751}
2752
Michael Kupersteine45af542015-06-30 13:36:19 +00002753static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002754_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2755{
2756 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2757 (__v8df) __B,
2758 (__v8df) __C,
2759 (__mmask8) __U,
2760 _MM_FROUND_CUR_DIRECTION);
2761}
2762
Michael Kupersteine45af542015-06-30 13:36:19 +00002763static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002764_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2765{
2766 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
2767 (__v8df) __B,
2768 -(__v8df) __C,
2769 (__mmask8) -1,
2770 _MM_FROUND_CUR_DIRECTION);
2771}
2772
Michael Kupersteine45af542015-06-30 13:36:19 +00002773static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002774_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2775{
2776 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2777 (__v8df) __B,
2778 -(__v8df) __C,
2779 (__mmask8) __U,
2780 _MM_FROUND_CUR_DIRECTION);
2781}
2782
2783#define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00002784 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2785 (__v16sf)(__m512)(B), \
2786 (__v16sf)(__m512)(C), (__mmask16)-1, \
2787 (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002788
2789
2790#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00002791 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2792 (__v16sf)(__m512)(B), \
2793 (__v16sf)(__m512)(C), \
2794 (__mmask16)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002795
2796
2797#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00002798 (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2799 (__v16sf)(__m512)(B), \
2800 (__v16sf)(__m512)(C), \
2801 (__mmask16)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002802
2803
2804#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00002805 (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2806 (__v16sf)(__m512)(B), \
2807 (__v16sf)(__m512)(C), \
2808 (__mmask16)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002809
2810
2811#define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00002812 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2813 (__v16sf)(__m512)(B), \
2814 -(__v16sf)(__m512)(C), \
2815 (__mmask16)-1, (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002816
2817
2818#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00002819 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2820 (__v16sf)(__m512)(B), \
2821 -(__v16sf)(__m512)(C), \
2822 (__mmask16)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002823
2824
2825#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00002826 (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2827 (__v16sf)(__m512)(B), \
2828 -(__v16sf)(__m512)(C), \
2829 (__mmask16)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002830
2831
2832#define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00002833 (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
2834 (__v16sf)(__m512)(B), \
2835 (__v16sf)(__m512)(C), (__mmask16)-1, \
2836 (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002837
2838
2839#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00002840 (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2841 (__v16sf)(__m512)(B), \
2842 (__v16sf)(__m512)(C), \
2843 (__mmask16)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002844
2845
2846#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00002847 (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2848 (__v16sf)(__m512)(B), \
2849 (__v16sf)(__m512)(C), \
2850 (__mmask16)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002851
2852
2853#define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00002854 (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
2855 (__v16sf)(__m512)(B), \
2856 -(__v16sf)(__m512)(C), \
2857 (__mmask16)-1, (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002858
2859
2860#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00002861 (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2862 (__v16sf)(__m512)(B), \
2863 -(__v16sf)(__m512)(C), \
2864 (__mmask16)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002865
2866
Michael Kupersteine45af542015-06-30 13:36:19 +00002867static __inline__ __m512 __DEFAULT_FN_ATTRS
Adam Nemet2278fcb2014-08-14 17:17:57 +00002868_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2869{
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002870 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2871 (__v16sf) __B,
2872 (__v16sf) __C,
2873 (__mmask16) -1,
2874 _MM_FROUND_CUR_DIRECTION);
2875}
2876
Michael Kupersteine45af542015-06-30 13:36:19 +00002877static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002878_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2879{
2880 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2881 (__v16sf) __B,
2882 (__v16sf) __C,
2883 (__mmask16) __U,
2884 _MM_FROUND_CUR_DIRECTION);
2885}
2886
Michael Kupersteine45af542015-06-30 13:36:19 +00002887static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002888_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2889{
2890 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2891 (__v16sf) __B,
2892 (__v16sf) __C,
2893 (__mmask16) __U,
2894 _MM_FROUND_CUR_DIRECTION);
2895}
2896
Michael Kupersteine45af542015-06-30 13:36:19 +00002897static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002898_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2899{
2900 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2901 (__v16sf) __B,
2902 (__v16sf) __C,
2903 (__mmask16) __U,
2904 _MM_FROUND_CUR_DIRECTION);
Adam Nemet2278fcb2014-08-14 17:17:57 +00002905}
2906
Michael Kupersteine45af542015-06-30 13:36:19 +00002907static __inline__ __m512 __DEFAULT_FN_ATTRS
Adam Nemet2278fcb2014-08-14 17:17:57 +00002908_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2909{
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002910 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2911 (__v16sf) __B,
2912 -(__v16sf) __C,
2913 (__mmask16) -1,
2914 _MM_FROUND_CUR_DIRECTION);
2915}
2916
Michael Kupersteine45af542015-06-30 13:36:19 +00002917static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002918_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2919{
2920 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2921 (__v16sf) __B,
2922 -(__v16sf) __C,
2923 (__mmask16) __U,
2924 _MM_FROUND_CUR_DIRECTION);
2925}
2926
Michael Kupersteine45af542015-06-30 13:36:19 +00002927static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002928_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2929{
2930 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2931 (__v16sf) __B,
2932 -(__v16sf) __C,
2933 (__mmask16) __U,
2934 _MM_FROUND_CUR_DIRECTION);
Adam Nemet2278fcb2014-08-14 17:17:57 +00002935}
2936
Michael Kupersteine45af542015-06-30 13:36:19 +00002937static __inline__ __m512 __DEFAULT_FN_ATTRS
Adam Nemet2278fcb2014-08-14 17:17:57 +00002938_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2939{
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002940 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
2941 (__v16sf) __B,
2942 (__v16sf) __C,
2943 (__mmask16) -1,
2944 _MM_FROUND_CUR_DIRECTION);
Adam Nemet2278fcb2014-08-14 17:17:57 +00002945}
2946
Michael Kupersteine45af542015-06-30 13:36:19 +00002947static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002948_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2949{
2950 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
2951 (__v16sf) __B,
2952 (__v16sf) __C,
2953 (__mmask16) __U,
2954 _MM_FROUND_CUR_DIRECTION);
2955}
2956
Michael Kupersteine45af542015-06-30 13:36:19 +00002957static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002958_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2959{
2960 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2961 (__v16sf) __B,
2962 (__v16sf) __C,
2963 (__mmask16) __U,
2964 _MM_FROUND_CUR_DIRECTION);
2965}
2966
Michael Kupersteine45af542015-06-30 13:36:19 +00002967static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002968_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2969{
2970 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
2971 (__v16sf) __B,
2972 -(__v16sf) __C,
2973 (__mmask16) -1,
2974 _MM_FROUND_CUR_DIRECTION);
2975}
2976
Michael Kupersteine45af542015-06-30 13:36:19 +00002977static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002978_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2979{
2980 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2981 (__v16sf) __B,
2982 -(__v16sf) __C,
2983 (__mmask16) __U,
2984 _MM_FROUND_CUR_DIRECTION);
2985}
2986
2987#define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00002988 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2989 (__v8df)(__m512d)(B), \
2990 (__v8df)(__m512d)(C), \
2991 (__mmask8)-1, (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002992
2993
2994#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00002995 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2996 (__v8df)(__m512d)(B), \
2997 (__v8df)(__m512d)(C), \
2998 (__mmask8)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002999
3000
3001#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00003002 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
3003 (__v8df)(__m512d)(B), \
3004 (__v8df)(__m512d)(C), \
3005 (__mmask8)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003006
3007
3008#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00003009 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
3010 (__v8df)(__m512d)(B), \
3011 (__v8df)(__m512d)(C), \
3012 (__mmask8)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003013
3014
3015#define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00003016 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
3017 (__v8df)(__m512d)(B), \
3018 -(__v8df)(__m512d)(C), \
3019 (__mmask8)-1, (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003020
3021
3022#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00003023 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
3024 (__v8df)(__m512d)(B), \
3025 -(__v8df)(__m512d)(C), \
3026 (__mmask8)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003027
3028
3029#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00003030 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
3031 (__v8df)(__m512d)(B), \
3032 -(__v8df)(__m512d)(C), \
3033 (__mmask8)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003034
3035
Michael Kupersteine45af542015-06-30 13:36:19 +00003036static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003037_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
3038{
3039 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3040 (__v8df) __B,
3041 (__v8df) __C,
3042 (__mmask8) -1,
3043 _MM_FROUND_CUR_DIRECTION);
3044}
3045
Michael Kupersteine45af542015-06-30 13:36:19 +00003046static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003047_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3048{
3049 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3050 (__v8df) __B,
3051 (__v8df) __C,
3052 (__mmask8) __U,
3053 _MM_FROUND_CUR_DIRECTION);
3054}
3055
Michael Kupersteine45af542015-06-30 13:36:19 +00003056static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003057_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3058{
3059 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
3060 (__v8df) __B,
3061 (__v8df) __C,
3062 (__mmask8) __U,
3063 _MM_FROUND_CUR_DIRECTION);
3064}
3065
Michael Kupersteine45af542015-06-30 13:36:19 +00003066static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003067_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
3068{
3069 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3070 (__v8df) __B,
3071 (__v8df) __C,
3072 (__mmask8) __U,
3073 _MM_FROUND_CUR_DIRECTION);
3074}
3075
Michael Kupersteine45af542015-06-30 13:36:19 +00003076static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003077_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
3078{
3079 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3080 (__v8df) __B,
3081 -(__v8df) __C,
3082 (__mmask8) -1,
3083 _MM_FROUND_CUR_DIRECTION);
3084}
3085
Michael Kupersteine45af542015-06-30 13:36:19 +00003086static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003087_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3088{
3089 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3090 (__v8df) __B,
3091 -(__v8df) __C,
3092 (__mmask8) __U,
3093 _MM_FROUND_CUR_DIRECTION);
3094}
3095
Michael Kupersteine45af542015-06-30 13:36:19 +00003096static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003097_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
3098{
3099 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3100 (__v8df) __B,
3101 -(__v8df) __C,
3102 (__mmask8) __U,
3103 _MM_FROUND_CUR_DIRECTION);
3104}
3105
3106#define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00003107 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3108 (__v16sf)(__m512)(B), \
3109 (__v16sf)(__m512)(C), \
3110 (__mmask16)-1, (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003111
3112
3113#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00003114 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3115 (__v16sf)(__m512)(B), \
3116 (__v16sf)(__m512)(C), \
3117 (__mmask16)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003118
3119
3120#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00003121 (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
3122 (__v16sf)(__m512)(B), \
3123 (__v16sf)(__m512)(C), \
3124 (__mmask16)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003125
3126
3127#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00003128 (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3129 (__v16sf)(__m512)(B), \
3130 (__v16sf)(__m512)(C), \
3131 (__mmask16)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003132
3133
3134#define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00003135 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3136 (__v16sf)(__m512)(B), \
3137 -(__v16sf)(__m512)(C), \
3138 (__mmask16)-1, (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003139
3140
3141#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00003142 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3143 (__v16sf)(__m512)(B), \
3144 -(__v16sf)(__m512)(C), \
3145 (__mmask16)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003146
3147
3148#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00003149 (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3150 (__v16sf)(__m512)(B), \
3151 -(__v16sf)(__m512)(C), \
3152 (__mmask16)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003153
3154
Michael Kupersteine45af542015-06-30 13:36:19 +00003155static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003156_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
3157{
3158 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3159 (__v16sf) __B,
3160 (__v16sf) __C,
3161 (__mmask16) -1,
3162 _MM_FROUND_CUR_DIRECTION);
3163}
3164
Michael Kupersteine45af542015-06-30 13:36:19 +00003165static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003166_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3167{
3168 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3169 (__v16sf) __B,
3170 (__v16sf) __C,
3171 (__mmask16) __U,
3172 _MM_FROUND_CUR_DIRECTION);
3173}
3174
Michael Kupersteine45af542015-06-30 13:36:19 +00003175static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003176_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3177{
3178 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3179 (__v16sf) __B,
3180 (__v16sf) __C,
3181 (__mmask16) __U,
3182 _MM_FROUND_CUR_DIRECTION);
3183}
3184
Michael Kupersteine45af542015-06-30 13:36:19 +00003185static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003186_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3187{
3188 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3189 (__v16sf) __B,
3190 (__v16sf) __C,
3191 (__mmask16) __U,
3192 _MM_FROUND_CUR_DIRECTION);
3193}
3194
Michael Kupersteine45af542015-06-30 13:36:19 +00003195static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003196_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
3197{
3198 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3199 (__v16sf) __B,
3200 -(__v16sf) __C,
3201 (__mmask16) -1,
3202 _MM_FROUND_CUR_DIRECTION);
3203}
3204
Michael Kupersteine45af542015-06-30 13:36:19 +00003205static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003206_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3207{
3208 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3209 (__v16sf) __B,
3210 -(__v16sf) __C,
3211 (__mmask16) __U,
3212 _MM_FROUND_CUR_DIRECTION);
3213}
3214
Michael Kupersteine45af542015-06-30 13:36:19 +00003215static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003216_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3217{
3218 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3219 (__v16sf) __B,
3220 -(__v16sf) __C,
3221 (__mmask16) __U,
3222 _MM_FROUND_CUR_DIRECTION);
3223}
3224
3225#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00003226 (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
3227 (__v8df)(__m512d)(B), \
3228 (__v8df)(__m512d)(C), \
3229 (__mmask8)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003230
3231
Michael Kupersteine45af542015-06-30 13:36:19 +00003232static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003233_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3234{
3235 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3236 (__v8df) __B,
3237 (__v8df) __C,
3238 (__mmask8) __U,
3239 _MM_FROUND_CUR_DIRECTION);
3240}
3241
3242#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00003243 (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
3244 (__v16sf)(__m512)(B), \
3245 (__v16sf)(__m512)(C), \
3246 (__mmask16)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003247
3248
Michael Kupersteine45af542015-06-30 13:36:19 +00003249static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003250_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3251{
3252 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3253 (__v16sf) __B,
3254 (__v16sf) __C,
3255 (__mmask16) __U,
3256 _MM_FROUND_CUR_DIRECTION);
3257}
3258
3259#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00003260 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
3261 (__v8df)(__m512d)(B), \
3262 (__v8df)(__m512d)(C), \
3263 (__mmask8)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003264
3265
Michael Kupersteine45af542015-06-30 13:36:19 +00003266static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003267_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3268{
3269 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3270 (__v8df) __B,
3271 (__v8df) __C,
3272 (__mmask8) __U,
3273 _MM_FROUND_CUR_DIRECTION);
3274}
3275
3276#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00003277 (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
3278 (__v16sf)(__m512)(B), \
3279 (__v16sf)(__m512)(C), \
3280 (__mmask16)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003281
3282
Michael Kupersteine45af542015-06-30 13:36:19 +00003283static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003284_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3285{
3286 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3287 (__v16sf) __B,
3288 (__v16sf) __C,
3289 (__mmask16) __U,
3290 _MM_FROUND_CUR_DIRECTION);
3291}
3292
3293#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00003294 (__m512d)__builtin_ia32_vfnmaddpd512_mask((__v8df)(__m512d)(A), \
3295 (__v8df)(__m512d)(B), \
3296 (__v8df)(__m512d)(C), \
3297 (__mmask8)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003298
3299
Michael Kupersteine45af542015-06-30 13:36:19 +00003300static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003301_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3302{
3303 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3304 (__v8df) __B,
3305 (__v8df) __C,
3306 (__mmask8) __U,
3307 _MM_FROUND_CUR_DIRECTION);
3308}
3309
3310#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00003311 (__m512)__builtin_ia32_vfnmaddps512_mask((__v16sf)(__m512)(A), \
3312 (__v16sf)(__m512)(B), \
3313 (__v16sf)(__m512)(C), \
3314 (__mmask16)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003315
3316
Michael Kupersteine45af542015-06-30 13:36:19 +00003317static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003318_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3319{
3320 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3321 (__v16sf) __B,
3322 (__v16sf) __C,
3323 (__mmask16) __U,
3324 _MM_FROUND_CUR_DIRECTION);
3325}
3326
3327#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00003328 (__m512d)__builtin_ia32_vfnmsubpd512_mask((__v8df)(__m512d)(A), \
3329 (__v8df)(__m512d)(B), \
3330 (__v8df)(__m512d)(C), \
3331 (__mmask8)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003332
3333
3334#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00003335 (__m512d)__builtin_ia32_vfnmsubpd512_mask3((__v8df)(__m512d)(A), \
3336 (__v8df)(__m512d)(B), \
3337 (__v8df)(__m512d)(C), \
3338 (__mmask8)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003339
3340
Michael Kupersteine45af542015-06-30 13:36:19 +00003341static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003342_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3343{
3344 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3345 (__v8df) __B,
3346 (__v8df) __C,
3347 (__mmask8) __U,
3348 _MM_FROUND_CUR_DIRECTION);
3349}
3350
Michael Kupersteine45af542015-06-30 13:36:19 +00003351static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003352_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3353{
3354 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3355 (__v8df) __B,
3356 (__v8df) __C,
3357 (__mmask8) __U,
3358 _MM_FROUND_CUR_DIRECTION);
3359}
3360
3361#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00003362 (__m512)__builtin_ia32_vfnmsubps512_mask((__v16sf)(__m512)(A), \
3363 (__v16sf)(__m512)(B), \
3364 (__v16sf)(__m512)(C), \
3365 (__mmask16)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003366
3367
3368#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00003369 (__m512)__builtin_ia32_vfnmsubps512_mask3((__v16sf)(__m512)(A), \
3370 (__v16sf)(__m512)(B), \
3371 (__v16sf)(__m512)(C), \
3372 (__mmask16)(U), (int)(R)); })
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003373
3374
Michael Kupersteine45af542015-06-30 13:36:19 +00003375static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003376_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3377{
3378 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3379 (__v16sf) __B,
3380 (__v16sf) __C,
3381 (__mmask16) __U,
3382 _MM_FROUND_CUR_DIRECTION);
3383}
3384
Michael Kupersteine45af542015-06-30 13:36:19 +00003385static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003386_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3387{
3388 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3389 (__v16sf) __B,
3390 (__v16sf) __C,
3391 (__mmask16) __U,
3392 _MM_FROUND_CUR_DIRECTION);
3393}
3394
3395
3396
Adam Nemet0d5bb552014-07-28 17:14:40 +00003397/* Vector permutations */
3398
Michael Kupersteine45af542015-06-30 13:36:19 +00003399static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00003400_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
3401{
3402 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
3403 /* idx */ ,
3404 (__v16si) __A,
3405 (__v16si) __B,
3406 (__mmask16) -1);
3407}
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00003408
3409static __inline__ __m512i __DEFAULT_FN_ATTRS
3410_mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
3411 __m512i __I, __m512i __B)
3412{
3413 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
3414 /* idx */ ,
3415 (__v16si) __A,
3416 (__v16si) __B,
3417 (__mmask16) __U);
3418}
3419
3420static __inline__ __m512i __DEFAULT_FN_ATTRS
3421_mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
3422 __m512i __I, __m512i __B)
3423{
3424 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
3425 /* idx */ ,
3426 (__v16si) __A,
3427 (__v16si) __B,
3428 (__mmask16) __U);
3429}
3430
Michael Kupersteine45af542015-06-30 13:36:19 +00003431static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00003432_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
3433{
3434 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
3435 /* idx */ ,
3436 (__v8di) __A,
3437 (__v8di) __B,
3438 (__mmask8) -1);
3439}
3440
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00003441static __inline__ __m512i __DEFAULT_FN_ATTRS
3442_mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
3443 __m512i __B)
Adam Nemet0d5bb552014-07-28 17:14:40 +00003444{
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00003445 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
Adam Nemet0d5bb552014-07-28 17:14:40 +00003446 /* idx */ ,
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00003447 (__v8di) __A,
3448 (__v8di) __B,
3449 (__mmask8) __U);
3450}
3451
3452
3453static __inline__ __m512i __DEFAULT_FN_ATTRS
3454_mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
3455 __m512i __I, __m512i __B)
3456{
3457 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
3458 /* idx */ ,
3459 (__v8di) __A,
3460 (__v8di) __B,
3461 (__mmask8) __U);
Adam Nemet0d5bb552014-07-28 17:14:40 +00003462}
3463
Craig Topper67826a52015-02-01 07:35:40 +00003464#define _mm512_alignr_epi64(A, B, I) __extension__ ({ \
Craig Topper6aefe002016-11-23 01:47:12 +00003465 (__m512i)__builtin_shufflevector((__v8di)(__m512i)(B), \
3466 (__v8di)(__m512i)(A), \
3467 ((int)(I) & 0x7) + 0, \
3468 ((int)(I) & 0x7) + 1, \
3469 ((int)(I) & 0x7) + 2, \
3470 ((int)(I) & 0x7) + 3, \
3471 ((int)(I) & 0x7) + 4, \
3472 ((int)(I) & 0x7) + 5, \
3473 ((int)(I) & 0x7) + 6, \
3474 ((int)(I) & 0x7) + 7); })
Adam Nemet5bf7baa2014-08-05 17:28:23 +00003475
Craig Topper8c18e112016-05-17 04:41:50 +00003476#define _mm512_mask_alignr_epi64(W, U, A, B, imm) __extension__({\
Craig Topper6aefe002016-11-23 01:47:12 +00003477 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3478 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3479 (__v8di)(__m512i)(W)); })
Michael Zuckerman533e0652016-04-28 12:47:30 +00003480
Craig Topper8c18e112016-05-17 04:41:50 +00003481#define _mm512_maskz_alignr_epi64(U, A, B, imm) __extension__({\
Craig Topper6aefe002016-11-23 01:47:12 +00003482 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3483 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3484 (__v8di)_mm512_setzero_si512()); })
Michael Zuckerman533e0652016-04-28 12:47:30 +00003485
Craig Topper67826a52015-02-01 07:35:40 +00003486#define _mm512_alignr_epi32(A, B, I) __extension__ ({ \
Craig Topper6aefe002016-11-23 01:47:12 +00003487 (__m512i)__builtin_shufflevector((__v16si)(__m512i)(B), \
3488 (__v16si)(__m512i)(A), \
3489 ((int)(I) & 0xf) + 0, \
3490 ((int)(I) & 0xf) + 1, \
3491 ((int)(I) & 0xf) + 2, \
3492 ((int)(I) & 0xf) + 3, \
3493 ((int)(I) & 0xf) + 4, \
3494 ((int)(I) & 0xf) + 5, \
3495 ((int)(I) & 0xf) + 6, \
3496 ((int)(I) & 0xf) + 7, \
3497 ((int)(I) & 0xf) + 8, \
3498 ((int)(I) & 0xf) + 9, \
3499 ((int)(I) & 0xf) + 10, \
3500 ((int)(I) & 0xf) + 11, \
3501 ((int)(I) & 0xf) + 12, \
3502 ((int)(I) & 0xf) + 13, \
3503 ((int)(I) & 0xf) + 14, \
3504 ((int)(I) & 0xf) + 15); })
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00003505
Craig Topper8c18e112016-05-17 04:41:50 +00003506#define _mm512_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({\
Craig Topper6aefe002016-11-23 01:47:12 +00003507 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3508 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3509 (__v16si)(__m512i)(W)); })
Adam Nemet5bf7baa2014-08-05 17:28:23 +00003510
Craig Topper8c18e112016-05-17 04:41:50 +00003511#define _mm512_maskz_alignr_epi32(U, A, B, imm) __extension__({\
Craig Topper6aefe002016-11-23 01:47:12 +00003512 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3513 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3514 (__v16si)_mm512_setzero_si512()); })
Adam Nemetf893ede2015-01-19 20:12:05 +00003515/* Vector Extract */
3516
Craig Topper93ffabd2016-10-31 04:30:56 +00003517#define _mm512_extractf64x4_pd(A, I) __extension__ ({ \
3518 (__m256d)__builtin_shufflevector((__v8df)(__m512d)(A), \
3519 (__v8df)_mm512_undefined_pd(), \
3520 ((I) & 1) ? 4 : 0, \
3521 ((I) & 1) ? 5 : 1, \
3522 ((I) & 1) ? 6 : 2, \
3523 ((I) & 1) ? 7 : 3); })
Adam Nemetf893ede2015-01-19 20:12:05 +00003524
Craig Topper8c18e112016-05-17 04:41:50 +00003525#define _mm512_mask_extractf64x4_pd(W, U, A, imm) __extension__ ({\
Craig Topper93ffabd2016-10-31 04:30:56 +00003526 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
3527 (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
3528 (__v4df)(W)); })
Michael Zuckerman2564d2f2016-05-10 10:14:50 +00003529
Craig Topper8c18e112016-05-17 04:41:50 +00003530#define _mm512_maskz_extractf64x4_pd(U, A, imm) __extension__ ({\
Craig Topper93ffabd2016-10-31 04:30:56 +00003531 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
3532 (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
3533 (__v4df)_mm256_setzero_pd()); })
Michael Zuckerman2564d2f2016-05-10 10:14:50 +00003534
Craig Topper93ffabd2016-10-31 04:30:56 +00003535#define _mm512_extractf32x4_ps(A, I) __extension__ ({ \
3536 (__m128)__builtin_shufflevector((__v16sf)(__m512)(A), \
3537 (__v16sf)_mm512_undefined_ps(), \
3538 0 + ((I) & 0x3) * 4, \
3539 1 + ((I) & 0x3) * 4, \
3540 2 + ((I) & 0x3) * 4, \
3541 3 + ((I) & 0x3) * 4); })
Adam Nemetf893ede2015-01-19 20:12:05 +00003542
Craig Topper8c18e112016-05-17 04:41:50 +00003543#define _mm512_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({\
Craig Topper93ffabd2016-10-31 04:30:56 +00003544 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
3545 (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
3546 (__v4sf)(W)); })
Michael Zuckerman2564d2f2016-05-10 10:14:50 +00003547
Craig Topper8c18e112016-05-17 04:41:50 +00003548#define _mm512_maskz_extractf32x4_ps(U, A, imm) __extension__ ({\
Craig Topper93ffabd2016-10-31 04:30:56 +00003549 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
3550 (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
3551 (__v4sf)_mm_setzero_ps()); })
3552
Adam Nemet0d5bb552014-07-28 17:14:40 +00003553/* Vector Blend */
3554
Michael Kupersteine45af542015-06-30 13:36:19 +00003555static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00003556_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
3557{
Igor Bregeraadb8762016-06-08 13:59:20 +00003558 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
Adam Nemet0d5bb552014-07-28 17:14:40 +00003559 (__v8df) __W,
Igor Bregeraadb8762016-06-08 13:59:20 +00003560 (__v8df) __A);
Adam Nemet0d5bb552014-07-28 17:14:40 +00003561}
3562
Michael Kupersteine45af542015-06-30 13:36:19 +00003563static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00003564_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
3565{
Igor Bregeraadb8762016-06-08 13:59:20 +00003566 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
Adam Nemet0d5bb552014-07-28 17:14:40 +00003567 (__v16sf) __W,
Igor Bregeraadb8762016-06-08 13:59:20 +00003568 (__v16sf) __A);
Adam Nemet0d5bb552014-07-28 17:14:40 +00003569}
3570
Michael Kupersteine45af542015-06-30 13:36:19 +00003571static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00003572_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
3573{
Igor Bregeraadb8762016-06-08 13:59:20 +00003574 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
Adam Nemet0d5bb552014-07-28 17:14:40 +00003575 (__v8di) __W,
Igor Bregeraadb8762016-06-08 13:59:20 +00003576 (__v8di) __A);
Adam Nemet0d5bb552014-07-28 17:14:40 +00003577}
3578
Michael Kupersteine45af542015-06-30 13:36:19 +00003579static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00003580_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
3581{
Igor Bregeraadb8762016-06-08 13:59:20 +00003582 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
Adam Nemet0d5bb552014-07-28 17:14:40 +00003583 (__v16si) __W,
Igor Bregeraadb8762016-06-08 13:59:20 +00003584 (__v16si) __A);
Adam Nemet0d5bb552014-07-28 17:14:40 +00003585}
3586
3587/* Compare */
3588
Craig Topper53565c62015-02-01 22:27:40 +00003589#define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \
3590 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
Craig Topper8c18e112016-05-17 04:41:50 +00003591 (__v16sf)(__m512)(B), (int)(P), \
3592 (__mmask16)-1, (int)(R)); })
Adam Nemet0d5bb552014-07-28 17:14:40 +00003593
Craig Topper53565c62015-02-01 22:27:40 +00003594#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \
3595 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
Craig Topper8c18e112016-05-17 04:41:50 +00003596 (__v16sf)(__m512)(B), (int)(P), \
3597 (__mmask16)(U), (int)(R)); })
Craig Topper53565c62015-02-01 22:27:40 +00003598
3599#define _mm512_cmp_ps_mask(A, B, P) \
3600 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
Craig Topper53565c62015-02-01 22:27:40 +00003601#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3602 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3603
Ayman Musa2e250e82016-09-27 14:06:32 +00003604#define _mm512_cmpeq_ps_mask(A, B) \
3605 _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3606#define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3607 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3608
3609#define _mm512_cmplt_ps_mask(A, B) \
3610 _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3611#define _mm512_mask_cmplt_ps_mask(k, A, B) \
3612 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3613
3614#define _mm512_cmple_ps_mask(A, B) \
3615 _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3616#define _mm512_mask_cmple_ps_mask(k, A, B) \
3617 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3618
3619#define _mm512_cmpunord_ps_mask(A, B) \
3620 _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3621#define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3622 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3623
3624#define _mm512_cmpneq_ps_mask(A, B) \
3625 _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3626#define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3627 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3628
3629#define _mm512_cmpnlt_ps_mask(A, B) \
3630 _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3631#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3632 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3633
3634#define _mm512_cmpnle_ps_mask(A, B) \
3635 _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3636#define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3637 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3638
3639#define _mm512_cmpord_ps_mask(A, B) \
3640 _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3641#define _mm512_mask_cmpord_ps_mask(k, A, B) \
3642 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3643
Craig Topper53565c62015-02-01 22:27:40 +00003644#define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \
3645 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
Craig Topper8c18e112016-05-17 04:41:50 +00003646 (__v8df)(__m512d)(B), (int)(P), \
3647 (__mmask8)-1, (int)(R)); })
Craig Topper53565c62015-02-01 22:27:40 +00003648
3649#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \
3650 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
Craig Topper8c18e112016-05-17 04:41:50 +00003651 (__v8df)(__m512d)(B), (int)(P), \
3652 (__mmask8)(U), (int)(R)); })
Craig Topper53565c62015-02-01 22:27:40 +00003653
3654#define _mm512_cmp_pd_mask(A, B, P) \
3655 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
Craig Topper53565c62015-02-01 22:27:40 +00003656#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3657 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
Adam Nemet0d5bb552014-07-28 17:14:40 +00003658
Ayman Musa2e250e82016-09-27 14:06:32 +00003659#define _mm512_cmpeq_pd_mask(A, B) \
3660 _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3661#define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3662 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3663
3664#define _mm512_cmplt_pd_mask(A, B) \
3665 _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3666#define _mm512_mask_cmplt_pd_mask(k, A, B) \
3667 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3668
3669#define _mm512_cmple_pd_mask(A, B) \
3670 _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3671#define _mm512_mask_cmple_pd_mask(k, A, B) \
3672 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3673
3674#define _mm512_cmpunord_pd_mask(A, B) \
3675 _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3676#define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3677 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3678
3679#define _mm512_cmpneq_pd_mask(A, B) \
3680 _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3681#define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3682 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3683
3684#define _mm512_cmpnlt_pd_mask(A, B) \
3685 _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3686#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3687 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3688
3689#define _mm512_cmpnle_pd_mask(A, B) \
3690 _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3691#define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3692 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3693
3694#define _mm512_cmpord_pd_mask(A, B) \
3695 _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3696#define _mm512_mask_cmpord_pd_mask(k, A, B) \
3697 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3698
Adam Nemet0d5bb552014-07-28 17:14:40 +00003699/* Conversion */
3700
Craig Topperf3efec62016-06-08 06:08:07 +00003701#define _mm512_cvtt_roundps_epu32(A, R) __extension__ ({ \
3702 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3703 (__v16si)_mm512_undefined_epi32(), \
3704 (__mmask16)-1, (int)(R)); })
Michael Zuckerman6170c152016-06-01 14:41:41 +00003705
Craig Topperf3efec62016-06-08 06:08:07 +00003706#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) __extension__ ({ \
3707 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3708 (__v16si)(__m512i)(W), \
3709 (__mmask16)(U), (int)(R)); })
Michael Zuckerman6170c152016-06-01 14:41:41 +00003710
Craig Topperf3efec62016-06-08 06:08:07 +00003711#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) __extension__ ({ \
3712 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3713 (__v16si)_mm512_setzero_si512(), \
3714 (__mmask16)(U), (int)(R)); })
Michael Zuckerman6170c152016-06-01 14:41:41 +00003715
3716
Michael Kupersteine45af542015-06-30 13:36:19 +00003717static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00003718_mm512_cvttps_epu32(__m512 __A)
3719{
3720 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3721 (__v16si)
3722 _mm512_setzero_si512 (),
3723 (__mmask16) -1,
3724 _MM_FROUND_CUR_DIRECTION);
3725}
3726
Michael Zuckermanf1544752016-05-09 10:32:51 +00003727static __inline__ __m512i __DEFAULT_FN_ATTRS
3728_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3729{
3730 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3731 (__v16si) __W,
3732 (__mmask16) __U,
3733 _MM_FROUND_CUR_DIRECTION);
3734}
3735
3736static __inline__ __m512i __DEFAULT_FN_ATTRS
3737_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
3738{
3739 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3740 (__v16si) _mm512_setzero_si512 (),
3741 (__mmask16) __U,
3742 _MM_FROUND_CUR_DIRECTION);
3743}
3744
Craig Topper72c7d512015-02-01 07:35:35 +00003745#define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00003746 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
Craig Topper72c7d512015-02-01 07:35:35 +00003747 (__v16sf)_mm512_setzero_ps(), \
Craig Topper8c18e112016-05-17 04:41:50 +00003748 (__mmask16)-1, (int)(R)); })
Adam Nemet0d5bb552014-07-28 17:14:40 +00003749
Craig Topperf3efec62016-06-08 06:08:07 +00003750#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) __extension__ ({ \
3751 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3752 (__v16sf)(__m512)(W), \
3753 (__mmask16)(U), (int)(R)); })
Michael Zuckerman186d8672016-05-31 11:27:34 +00003754
Craig Topperf3efec62016-06-08 06:08:07 +00003755#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) __extension__ ({ \
3756 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3757 (__v16sf)_mm512_setzero_ps(), \
3758 (__mmask16)(U), (int)(R)); })
Michael Zuckerman186d8672016-05-31 11:27:34 +00003759
Craig Topper72c7d512015-02-01 07:35:35 +00003760#define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00003761 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
Craig Topper72c7d512015-02-01 07:35:35 +00003762 (__v16sf)_mm512_setzero_ps(), \
Craig Topper8c18e112016-05-17 04:41:50 +00003763 (__mmask16)-1, (int)(R)); })
Adam Nemet0d5bb552014-07-28 17:14:40 +00003764
Craig Topperf3efec62016-06-08 06:08:07 +00003765#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) __extension__ ({ \
3766 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3767 (__v16sf)(__m512)(W), \
3768 (__mmask16)(U), (int)(R)); })
Michael Zuckerman186d8672016-05-31 11:27:34 +00003769
Craig Topperf3efec62016-06-08 06:08:07 +00003770#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) __extension__ ({ \
3771 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3772 (__v16sf)_mm512_setzero_ps(), \
3773 (__mmask16)(U), (int)(R)); })
Michael Zuckerman186d8672016-05-31 11:27:34 +00003774
Michael Zuckermanf1544752016-05-09 10:32:51 +00003775static __inline__ __m512 __DEFAULT_FN_ATTRS
3776_mm512_cvtepu32_ps (__m512i __A)
3777{
Craig Topper842171d2018-05-21 20:19:17 +00003778 return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
Michael Zuckermanf1544752016-05-09 10:32:51 +00003779}
3780
3781static __inline__ __m512 __DEFAULT_FN_ATTRS
3782_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3783{
Craig Topper842171d2018-05-21 20:19:17 +00003784 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3785 (__v16sf)_mm512_cvtepu32_ps(__A),
3786 (__v16sf)__W);
Michael Zuckermanf1544752016-05-09 10:32:51 +00003787}
3788
3789static __inline__ __m512 __DEFAULT_FN_ATTRS
3790_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
3791{
Craig Topper842171d2018-05-21 20:19:17 +00003792 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3793 (__v16sf)_mm512_cvtepu32_ps(__A),
3794 (__v16sf)_mm512_setzero_ps());
Michael Zuckermanf1544752016-05-09 10:32:51 +00003795}
3796
Michael Kupersteine45af542015-06-30 13:36:19 +00003797static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00003798_mm512_cvtepi32_pd(__m256i __A)
3799{
Simon Pilgrim698528d2016-11-16 09:27:40 +00003800 return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
Adam Nemet0d5bb552014-07-28 17:14:40 +00003801}
3802
Michael Zuckermanf1544752016-05-09 10:32:51 +00003803static __inline__ __m512d __DEFAULT_FN_ATTRS
3804_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3805{
Simon Pilgrim698528d2016-11-16 09:27:40 +00003806 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3807 (__v8df)_mm512_cvtepi32_pd(__A),
3808 (__v8df)__W);
Michael Zuckermanf1544752016-05-09 10:32:51 +00003809}
3810
3811static __inline__ __m512d __DEFAULT_FN_ATTRS
3812_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
3813{
Simon Pilgrim698528d2016-11-16 09:27:40 +00003814 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3815 (__v8df)_mm512_cvtepi32_pd(__A),
3816 (__v8df)_mm512_setzero_pd());
Michael Zuckermanf1544752016-05-09 10:32:51 +00003817}
3818
Ayman Musa2e250e82016-09-27 14:06:32 +00003819static __inline__ __m512d __DEFAULT_FN_ATTRS
3820_mm512_cvtepi32lo_pd(__m512i __A)
3821{
3822 return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3823}
3824
3825static __inline__ __m512d __DEFAULT_FN_ATTRS
3826_mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3827{
3828 return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
3829}
3830
Michael Zuckermanf1544752016-05-09 10:32:51 +00003831static __inline__ __m512 __DEFAULT_FN_ATTRS
3832_mm512_cvtepi32_ps (__m512i __A)
3833{
Craig Topper842171d2018-05-21 20:19:17 +00003834 return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
Michael Zuckermanf1544752016-05-09 10:32:51 +00003835}
3836
3837static __inline__ __m512 __DEFAULT_FN_ATTRS
3838_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3839{
Craig Topper842171d2018-05-21 20:19:17 +00003840 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3841 (__v16sf)_mm512_cvtepi32_ps(__A),
3842 (__v16sf)__W);
Michael Zuckermanf1544752016-05-09 10:32:51 +00003843}
3844
3845static __inline__ __m512 __DEFAULT_FN_ATTRS
3846_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
3847{
Craig Topper842171d2018-05-21 20:19:17 +00003848 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3849 (__v16sf)_mm512_cvtepi32_ps(__A),
3850 (__v16sf)_mm512_setzero_ps());
Michael Zuckermanf1544752016-05-09 10:32:51 +00003851}
3852
Michael Kupersteine45af542015-06-30 13:36:19 +00003853static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00003854_mm512_cvtepu32_pd(__m256i __A)
3855{
Simon Pilgrim698528d2016-11-16 09:27:40 +00003856 return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
Adam Nemet0d5bb552014-07-28 17:14:40 +00003857}
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00003858
Michael Zuckermanf1544752016-05-09 10:32:51 +00003859static __inline__ __m512d __DEFAULT_FN_ATTRS
3860_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3861{
Simon Pilgrim698528d2016-11-16 09:27:40 +00003862 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3863 (__v8df)_mm512_cvtepu32_pd(__A),
3864 (__v8df)__W);
Michael Zuckermanf1544752016-05-09 10:32:51 +00003865}
3866
3867static __inline__ __m512d __DEFAULT_FN_ATTRS
3868_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
3869{
Simon Pilgrim698528d2016-11-16 09:27:40 +00003870 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3871 (__v8df)_mm512_cvtepu32_pd(__A),
3872 (__v8df)_mm512_setzero_pd());
Michael Zuckermanf1544752016-05-09 10:32:51 +00003873}
3874
Ayman Musa2e250e82016-09-27 14:06:32 +00003875static __inline__ __m512d __DEFAULT_FN_ATTRS
3876_mm512_cvtepu32lo_pd(__m512i __A)
3877{
3878 return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3879}
3880
3881static __inline__ __m512d __DEFAULT_FN_ATTRS
3882_mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3883{
3884 return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
3885}
3886
Craig Topper72c7d512015-02-01 07:35:35 +00003887#define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00003888 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
Craig Topper72c7d512015-02-01 07:35:35 +00003889 (__v8sf)_mm256_setzero_ps(), \
Craig Topper8c18e112016-05-17 04:41:50 +00003890 (__mmask8)-1, (int)(R)); })
Craig Topper72c7d512015-02-01 07:35:35 +00003891
Michael Zuckerman186d8672016-05-31 11:27:34 +00003892#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) __extension__ ({ \
3893 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
Craig Topperf3efec62016-06-08 06:08:07 +00003894 (__v8sf)(__m256)(W), (__mmask8)(U), \
3895 (int)(R)); })
Michael Zuckerman186d8672016-05-31 11:27:34 +00003896
3897#define _mm512_maskz_cvt_roundpd_ps(U, A, R) __extension__ ({ \
3898 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3899 (__v8sf)_mm256_setzero_ps(), \
3900 (__mmask8)(U), (int)(R)); })
3901
Michael Zuckermanf1544752016-05-09 10:32:51 +00003902static __inline__ __m256 __DEFAULT_FN_ATTRS
3903_mm512_cvtpd_ps (__m512d __A)
3904{
3905 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3906 (__v8sf) _mm256_undefined_ps (),
3907 (__mmask8) -1,
3908 _MM_FROUND_CUR_DIRECTION);
3909}
3910
3911static __inline__ __m256 __DEFAULT_FN_ATTRS
3912_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
3913{
3914 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3915 (__v8sf) __W,
3916 (__mmask8) __U,
3917 _MM_FROUND_CUR_DIRECTION);
3918}
3919
3920static __inline__ __m256 __DEFAULT_FN_ATTRS
3921_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
3922{
3923 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3924 (__v8sf) _mm256_setzero_ps (),
3925 (__mmask8) __U,
3926 _MM_FROUND_CUR_DIRECTION);
3927}
Michael Zuckerman2564d2f2016-05-10 10:14:50 +00003928
Ayman Musa2e250e82016-09-27 14:06:32 +00003929static __inline__ __m512 __DEFAULT_FN_ATTRS
3930_mm512_cvtpd_pslo (__m512d __A)
3931{
3932 return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
3933 (__v8sf) _mm256_setzero_ps (),
3934 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3935}
3936
3937static __inline__ __m512 __DEFAULT_FN_ATTRS
3938_mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
3939{
3940 return (__m512) __builtin_shufflevector (
3941 (__v8sf) _mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W),
3942 __U, __A),
3943 (__v8sf) _mm256_setzero_ps (),
3944 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3945}
3946
Craig Topperf3efec62016-06-08 06:08:07 +00003947#define _mm512_cvt_roundps_ph(A, I) __extension__ ({ \
3948 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3949 (__v16hi)_mm256_undefined_si256(), \
3950 (__mmask16)-1); })
Michael Zuckerman6170c152016-06-01 14:41:41 +00003951
Craig Topperf3efec62016-06-08 06:08:07 +00003952#define _mm512_mask_cvt_roundps_ph(U, W, A, I) __extension__ ({ \
3953 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3954 (__v16hi)(__m256i)(U), \
3955 (__mmask16)(W)); })
Michael Zuckerman6170c152016-06-01 14:41:41 +00003956
Craig Topperf3efec62016-06-08 06:08:07 +00003957#define _mm512_maskz_cvt_roundps_ph(W, A, I) __extension__ ({ \
3958 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3959 (__v16hi)_mm256_setzero_si256(), \
3960 (__mmask16)(W)); })
Michael Zuckerman6170c152016-06-01 14:41:41 +00003961
Craig Topper72c7d512015-02-01 07:35:35 +00003962#define _mm512_cvtps_ph(A, I) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00003963 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
Craig Topper72c7d512015-02-01 07:35:35 +00003964 (__v16hi)_mm256_setzero_si256(), \
Craig Topper8c18e112016-05-17 04:41:50 +00003965 (__mmask16)-1); })
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00003966
Craig Topper8c18e112016-05-17 04:41:50 +00003967#define _mm512_mask_cvtps_ph(U, W, A, I) __extension__ ({ \
3968 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3969 (__v16hi)(__m256i)(U), \
3970 (__mmask16)(W)); })
Michael Zuckermanf1544752016-05-09 10:32:51 +00003971
Craig Topper8c18e112016-05-17 04:41:50 +00003972#define _mm512_maskz_cvtps_ph(W, A, I) __extension__ ({\
3973 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3974 (__v16hi)_mm256_setzero_si256(), \
3975 (__mmask16)(W)); })
Michael Zuckerman6170c152016-06-01 14:41:41 +00003976
Craig Topperf3efec62016-06-08 06:08:07 +00003977#define _mm512_cvt_roundph_ps(A, R) __extension__ ({ \
3978 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3979 (__v16sf)_mm512_undefined_ps(), \
3980 (__mmask16)-1, (int)(R)); })
Michael Zuckerman6170c152016-06-01 14:41:41 +00003981
Craig Topperf3efec62016-06-08 06:08:07 +00003982#define _mm512_mask_cvt_roundph_ps(W, U, A, R) __extension__ ({ \
3983 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3984 (__v16sf)(__m512)(W), \
3985 (__mmask16)(U), (int)(R)); })
Michael Zuckerman6170c152016-06-01 14:41:41 +00003986
Craig Topperf3efec62016-06-08 06:08:07 +00003987#define _mm512_maskz_cvt_roundph_ps(U, A, R) __extension__ ({ \
3988 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3989 (__v16sf)_mm512_setzero_ps(), \
3990 (__mmask16)(U), (int)(R)); })
3991
3992
Michael Kupersteine45af542015-06-30 13:36:19 +00003993static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00003994_mm512_cvtph_ps(__m256i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00003995{
3996 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00003997 (__v16sf)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00003998 _mm512_setzero_ps (),
3999 (__mmask16) -1,
4000 _MM_FROUND_CUR_DIRECTION);
4001}
4002
Michael Zuckermanf1544752016-05-09 10:32:51 +00004003static __inline__ __m512 __DEFAULT_FN_ATTRS
4004_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004005{
Michael Zuckermanf1544752016-05-09 10:32:51 +00004006 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
4007 (__v16sf) __W,
4008 (__mmask16) __U,
4009 _MM_FROUND_CUR_DIRECTION);
4010}
4011
4012static __inline__ __m512 __DEFAULT_FN_ATTRS
4013_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
4014{
4015 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
4016 (__v16sf) _mm512_setzero_ps (),
4017 (__mmask16) __U,
4018 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004019}
4020
Michael Zuckerman186d8672016-05-31 11:27:34 +00004021#define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \
4022 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
4023 (__v8si)_mm256_setzero_si256(), \
4024 (__mmask8)-1, (int)(R)); })
4025
4026#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) __extension__ ({ \
4027 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
Craig Topperf3efec62016-06-08 06:08:07 +00004028 (__v8si)(__m256i)(W), \
Michael Zuckerman186d8672016-05-31 11:27:34 +00004029 (__mmask8)(U), (int)(R)); })
4030
Craig Topperf3efec62016-06-08 06:08:07 +00004031#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) __extension__ ({ \
Michael Zuckerman186d8672016-05-31 11:27:34 +00004032 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
4033 (__v8si)_mm256_setzero_si256(), \
4034 (__mmask8)(U), (int)(R)); })
4035
Michael Kupersteine45af542015-06-30 13:36:19 +00004036static __inline __m256i __DEFAULT_FN_ATTRS
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00004037_mm512_cvttpd_epi32(__m512d __a)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004038{
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00004039 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004040 (__v8si)_mm256_setzero_si256(),
4041 (__mmask8) -1,
4042 _MM_FROUND_CUR_DIRECTION);
4043}
4044
Michael Zuckermanf1544752016-05-09 10:32:51 +00004045static __inline__ __m256i __DEFAULT_FN_ATTRS
4046_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
4047{
4048 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4049 (__v8si) __W,
4050 (__mmask8) __U,
4051 _MM_FROUND_CUR_DIRECTION);
4052}
4053
4054static __inline__ __m256i __DEFAULT_FN_ATTRS
4055_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
4056{
4057 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4058 (__v8si) _mm256_setzero_si256 (),
4059 (__mmask8) __U,
4060 _MM_FROUND_CUR_DIRECTION);
4061}
4062
Craig Topper72c7d512015-02-01 07:35:35 +00004063#define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00004064 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
Craig Topper72c7d512015-02-01 07:35:35 +00004065 (__v16si)_mm512_setzero_si512(), \
Craig Topper8c18e112016-05-17 04:41:50 +00004066 (__mmask16)-1, (int)(R)); })
Michael Zuckerman2564d2f2016-05-10 10:14:50 +00004067
Craig Topperf3efec62016-06-08 06:08:07 +00004068#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) __extension__ ({ \
Michael Zuckerman186d8672016-05-31 11:27:34 +00004069 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
Craig Topperf3efec62016-06-08 06:08:07 +00004070 (__v16si)(__m512i)(W), \
Michael Zuckerman186d8672016-05-31 11:27:34 +00004071 (__mmask16)(U), (int)(R)); })
4072
Craig Topperf3efec62016-06-08 06:08:07 +00004073#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) __extension__ ({ \
Michael Zuckerman186d8672016-05-31 11:27:34 +00004074 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
4075 (__v16si)_mm512_setzero_si512(), \
4076 (__mmask16)(U), (int)(R)); })
4077
Michael Zuckermanf1544752016-05-09 10:32:51 +00004078static __inline __m512i __DEFAULT_FN_ATTRS
4079_mm512_cvttps_epi32(__m512 __a)
4080{
4081 return (__m512i)
4082 __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
4083 (__v16si) _mm512_setzero_si512 (),
4084 (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
4085}
4086
4087static __inline__ __m512i __DEFAULT_FN_ATTRS
4088_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
4089{
4090 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4091 (__v16si) __W,
4092 (__mmask16) __U,
4093 _MM_FROUND_CUR_DIRECTION);
4094}
4095
4096static __inline__ __m512i __DEFAULT_FN_ATTRS
4097_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
4098{
4099 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4100 (__v16si) _mm512_setzero_si512 (),
4101 (__mmask16) __U,
4102 _MM_FROUND_CUR_DIRECTION);
4103}
Craig Topper72c7d512015-02-01 07:35:35 +00004104
4105#define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00004106 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
Craig Topper72c7d512015-02-01 07:35:35 +00004107 (__v16si)_mm512_setzero_si512(), \
Craig Topper8c18e112016-05-17 04:41:50 +00004108 (__mmask16)-1, (int)(R)); })
Craig Topper72c7d512015-02-01 07:35:35 +00004109
Craig Topperf3efec62016-06-08 06:08:07 +00004110#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) __extension__ ({ \
4111 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
4112 (__v16si)(__m512i)(W), \
4113 (__mmask16)(U), (int)(R)); })
Michael Zuckerman186d8672016-05-31 11:27:34 +00004114
Craig Topperf3efec62016-06-08 06:08:07 +00004115#define _mm512_maskz_cvt_roundps_epi32(U, A, R) __extension__ ({ \
4116 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
4117 (__v16si)_mm512_setzero_si512(), \
4118 (__mmask16)(U), (int)(R)); })
Michael Zuckerman186d8672016-05-31 11:27:34 +00004119
Michael Zuckermanf1544752016-05-09 10:32:51 +00004120static __inline__ __m512i __DEFAULT_FN_ATTRS
4121_mm512_cvtps_epi32 (__m512 __A)
4122{
4123 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4124 (__v16si) _mm512_undefined_epi32 (),
4125 (__mmask16) -1,
4126 _MM_FROUND_CUR_DIRECTION);
4127}
4128
4129static __inline__ __m512i __DEFAULT_FN_ATTRS
4130_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
4131{
4132 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4133 (__v16si) __W,
4134 (__mmask16) __U,
4135 _MM_FROUND_CUR_DIRECTION);
4136}
4137
4138static __inline__ __m512i __DEFAULT_FN_ATTRS
4139_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
4140{
4141 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4142 (__v16si)
4143 _mm512_setzero_si512 (),
4144 (__mmask16) __U,
4145 _MM_FROUND_CUR_DIRECTION);
4146}
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00004147
Craig Topper72c7d512015-02-01 07:35:35 +00004148#define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00004149 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
Craig Topper72c7d512015-02-01 07:35:35 +00004150 (__v8si)_mm256_setzero_si256(), \
Craig Topper8c18e112016-05-17 04:41:50 +00004151 (__mmask8)-1, (int)(R)); })
Craig Topper72c7d512015-02-01 07:35:35 +00004152
Craig Topperf3efec62016-06-08 06:08:07 +00004153#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) __extension__ ({ \
Michael Zuckerman186d8672016-05-31 11:27:34 +00004154 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
Craig Topperf3efec62016-06-08 06:08:07 +00004155 (__v8si)(__m256i)(W), \
Michael Zuckerman186d8672016-05-31 11:27:34 +00004156 (__mmask8)(U), (int)(R)); })
4157
4158#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) __extension__ ({ \
4159 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
4160 (__v8si)_mm256_setzero_si256(), \
4161 (__mmask8)(U), (int)(R)); })
4162
Michael Zuckermanf1544752016-05-09 10:32:51 +00004163static __inline__ __m256i __DEFAULT_FN_ATTRS
4164_mm512_cvtpd_epi32 (__m512d __A)
4165{
4166 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4167 (__v8si)
4168 _mm256_undefined_si256 (),
4169 (__mmask8) -1,
4170 _MM_FROUND_CUR_DIRECTION);
4171}
4172
4173static __inline__ __m256i __DEFAULT_FN_ATTRS
4174_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
4175{
4176 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4177 (__v8si) __W,
4178 (__mmask8) __U,
4179 _MM_FROUND_CUR_DIRECTION);
4180}
4181
4182static __inline__ __m256i __DEFAULT_FN_ATTRS
4183_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
4184{
4185 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4186 (__v8si)
4187 _mm256_setzero_si256 (),
4188 (__mmask8) __U,
4189 _MM_FROUND_CUR_DIRECTION);
4190}
4191
Craig Topper72c7d512015-02-01 07:35:35 +00004192#define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00004193 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
Craig Topper72c7d512015-02-01 07:35:35 +00004194 (__v16si)_mm512_setzero_si512(), \
Craig Topper8c18e112016-05-17 04:41:50 +00004195 (__mmask16)-1, (int)(R)); })
Craig Topper72c7d512015-02-01 07:35:35 +00004196
Craig Topperf3efec62016-06-08 06:08:07 +00004197#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) __extension__ ({ \
4198 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4199 (__v16si)(__m512i)(W), \
4200 (__mmask16)(U), (int)(R)); })
Michael Zuckerman186d8672016-05-31 11:27:34 +00004201
Craig Topperf3efec62016-06-08 06:08:07 +00004202#define _mm512_maskz_cvt_roundps_epu32(U, A, R) __extension__ ({ \
4203 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4204 (__v16si)_mm512_setzero_si512(), \
4205 (__mmask16)(U), (int)(R)); })
Michael Zuckerman186d8672016-05-31 11:27:34 +00004206
Michael Zuckermanf1544752016-05-09 10:32:51 +00004207static __inline__ __m512i __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00004208_mm512_cvtps_epu32 ( __m512 __A)
Michael Zuckermanf1544752016-05-09 10:32:51 +00004209{
4210 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4211 (__v16si)\
4212 _mm512_undefined_epi32 (),\
4213 (__mmask16) -1,\
4214 _MM_FROUND_CUR_DIRECTION);\
4215}
4216
4217static __inline__ __m512i __DEFAULT_FN_ATTRS
4218_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
4219{
4220 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4221 (__v16si) __W,
4222 (__mmask16) __U,
4223 _MM_FROUND_CUR_DIRECTION);
4224}
4225
Michael Zuckerman9fcf3552016-05-30 13:22:12 +00004226static __inline__ __m512i __DEFAULT_FN_ATTRS
4227_mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
4228{
4229 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00004230 (__v16si)
Michael Zuckerman9fcf3552016-05-30 13:22:12 +00004231 _mm512_setzero_si512 (),
4232 (__mmask16) __U ,
4233 _MM_FROUND_CUR_DIRECTION);
4234}
4235
Craig Topper72c7d512015-02-01 07:35:35 +00004236#define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00004237 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
Craig Topper72c7d512015-02-01 07:35:35 +00004238 (__v8si)_mm256_setzero_si256(), \
Craig Topper8c18e112016-05-17 04:41:50 +00004239 (__mmask8)-1, (int)(R)); })
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004240
Craig Topperf3efec62016-06-08 06:08:07 +00004241#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) __extension__ ({ \
Michael Zuckerman186d8672016-05-31 11:27:34 +00004242 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4243 (__v8si)(W), \
Craig Topperf3efec62016-06-08 06:08:07 +00004244 (__mmask8)(U), (int)(R)); })
Michael Zuckerman186d8672016-05-31 11:27:34 +00004245
4246#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) __extension__ ({ \
4247 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4248 (__v8si)_mm256_setzero_si256(), \
4249 (__mmask8)(U), (int)(R)); })
4250
Michael Zuckermanf1544752016-05-09 10:32:51 +00004251static __inline__ __m256i __DEFAULT_FN_ATTRS
4252_mm512_cvtpd_epu32 (__m512d __A)
4253{
4254 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4255 (__v8si)
4256 _mm256_undefined_si256 (),
4257 (__mmask8) -1,
4258 _MM_FROUND_CUR_DIRECTION);
4259}
4260
4261static __inline__ __m256i __DEFAULT_FN_ATTRS
4262_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
4263{
4264 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4265 (__v8si) __W,
4266 (__mmask8) __U,
4267 _MM_FROUND_CUR_DIRECTION);
4268}
4269
4270static __inline__ __m256i __DEFAULT_FN_ATTRS
4271_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
4272{
4273 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4274 (__v8si)
4275 _mm256_setzero_si256 (),
4276 (__mmask8) __U,
4277 _MM_FROUND_CUR_DIRECTION);
4278}
Craig Topper79f53ca2016-06-23 06:36:42 +00004279
Simon Pilgrim60e92492017-03-21 12:46:13 +00004280static __inline__ double __DEFAULT_FN_ATTRS
4281_mm512_cvtsd_f64(__m512d __a)
4282{
4283 return __a[0];
4284}
4285
4286static __inline__ float __DEFAULT_FN_ATTRS
4287_mm512_cvtss_f32(__m512 __a)
4288{
4289 return __a[0];
4290}
4291
Adam Nemet63a951e2015-01-14 01:31:17 +00004292/* Unpack and Interleave */
Craig Topper79f53ca2016-06-23 06:36:42 +00004293
Michael Kupersteine45af542015-06-30 13:36:19 +00004294static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet63a951e2015-01-14 01:31:17 +00004295_mm512_unpackhi_pd(__m512d __a, __m512d __b)
4296{
Craig Topper79f53ca2016-06-23 06:36:42 +00004297 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4298 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4299}
4300
4301static __inline__ __m512d __DEFAULT_FN_ATTRS
4302_mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4303{
4304 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4305 (__v8df)_mm512_unpackhi_pd(__A, __B),
4306 (__v8df)__W);
4307}
4308
4309static __inline__ __m512d __DEFAULT_FN_ATTRS
4310_mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
4311{
4312 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4313 (__v8df)_mm512_unpackhi_pd(__A, __B),
4314 (__v8df)_mm512_setzero_pd());
Adam Nemet63a951e2015-01-14 01:31:17 +00004315}
4316
Michael Kupersteine45af542015-06-30 13:36:19 +00004317static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet63a951e2015-01-14 01:31:17 +00004318_mm512_unpacklo_pd(__m512d __a, __m512d __b)
4319{
Craig Topper79f53ca2016-06-23 06:36:42 +00004320 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4321 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4322}
4323
4324static __inline__ __m512d __DEFAULT_FN_ATTRS
4325_mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4326{
4327 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4328 (__v8df)_mm512_unpacklo_pd(__A, __B),
4329 (__v8df)__W);
4330}
4331
4332static __inline__ __m512d __DEFAULT_FN_ATTRS
4333_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
4334{
4335 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4336 (__v8df)_mm512_unpacklo_pd(__A, __B),
4337 (__v8df)_mm512_setzero_pd());
Adam Nemet63a951e2015-01-14 01:31:17 +00004338}
4339
Michael Kupersteine45af542015-06-30 13:36:19 +00004340static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet63a951e2015-01-14 01:31:17 +00004341_mm512_unpackhi_ps(__m512 __a, __m512 __b)
4342{
Craig Topper79f53ca2016-06-23 06:36:42 +00004343 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4344 2, 18, 3, 19,
4345 2+4, 18+4, 3+4, 19+4,
4346 2+8, 18+8, 3+8, 19+8,
4347 2+12, 18+12, 3+12, 19+12);
4348}
4349
4350static __inline__ __m512 __DEFAULT_FN_ATTRS
4351_mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4352{
4353 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4354 (__v16sf)_mm512_unpackhi_ps(__A, __B),
4355 (__v16sf)__W);
4356}
4357
4358static __inline__ __m512 __DEFAULT_FN_ATTRS
4359_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
4360{
4361 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4362 (__v16sf)_mm512_unpackhi_ps(__A, __B),
4363 (__v16sf)_mm512_setzero_ps());
Adam Nemet63a951e2015-01-14 01:31:17 +00004364}
4365
Michael Kupersteine45af542015-06-30 13:36:19 +00004366static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet63a951e2015-01-14 01:31:17 +00004367_mm512_unpacklo_ps(__m512 __a, __m512 __b)
4368{
Craig Topper79f53ca2016-06-23 06:36:42 +00004369 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4370 0, 16, 1, 17,
4371 0+4, 16+4, 1+4, 17+4,
4372 0+8, 16+8, 1+8, 17+8,
4373 0+12, 16+12, 1+12, 17+12);
4374}
4375
4376static __inline__ __m512 __DEFAULT_FN_ATTRS
4377_mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4378{
4379 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4380 (__v16sf)_mm512_unpacklo_ps(__A, __B),
4381 (__v16sf)__W);
4382}
4383
4384static __inline__ __m512 __DEFAULT_FN_ATTRS
4385_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
4386{
4387 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4388 (__v16sf)_mm512_unpacklo_ps(__A, __B),
4389 (__v16sf)_mm512_setzero_ps());
4390}
4391
4392static __inline__ __m512i __DEFAULT_FN_ATTRS
4393_mm512_unpackhi_epi32(__m512i __A, __m512i __B)
4394{
4395 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4396 2, 18, 3, 19,
4397 2+4, 18+4, 3+4, 19+4,
4398 2+8, 18+8, 3+8, 19+8,
4399 2+12, 18+12, 3+12, 19+12);
4400}
4401
4402static __inline__ __m512i __DEFAULT_FN_ATTRS
4403_mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4404{
4405 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4406 (__v16si)_mm512_unpackhi_epi32(__A, __B),
4407 (__v16si)__W);
4408}
4409
4410static __inline__ __m512i __DEFAULT_FN_ATTRS
4411_mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4412{
4413 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4414 (__v16si)_mm512_unpackhi_epi32(__A, __B),
4415 (__v16si)_mm512_setzero_si512());
4416}
4417
4418static __inline__ __m512i __DEFAULT_FN_ATTRS
4419_mm512_unpacklo_epi32(__m512i __A, __m512i __B)
4420{
4421 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4422 0, 16, 1, 17,
4423 0+4, 16+4, 1+4, 17+4,
4424 0+8, 16+8, 1+8, 17+8,
4425 0+12, 16+12, 1+12, 17+12);
4426}
4427
4428static __inline__ __m512i __DEFAULT_FN_ATTRS
4429_mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4430{
4431 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4432 (__v16si)_mm512_unpacklo_epi32(__A, __B),
4433 (__v16si)__W);
4434}
4435
4436static __inline__ __m512i __DEFAULT_FN_ATTRS
4437_mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4438{
4439 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4440 (__v16si)_mm512_unpacklo_epi32(__A, __B),
4441 (__v16si)_mm512_setzero_si512());
4442}
4443
4444static __inline__ __m512i __DEFAULT_FN_ATTRS
4445_mm512_unpackhi_epi64(__m512i __A, __m512i __B)
4446{
4447 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4448 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4449}
4450
4451static __inline__ __m512i __DEFAULT_FN_ATTRS
4452_mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4453{
4454 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4455 (__v8di)_mm512_unpackhi_epi64(__A, __B),
4456 (__v8di)__W);
4457}
4458
4459static __inline__ __m512i __DEFAULT_FN_ATTRS
4460_mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4461{
4462 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4463 (__v8di)_mm512_unpackhi_epi64(__A, __B),
4464 (__v8di)_mm512_setzero_si512());
4465}
4466
4467static __inline__ __m512i __DEFAULT_FN_ATTRS
4468_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
4469{
4470 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4471 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4472}
4473
4474static __inline__ __m512i __DEFAULT_FN_ATTRS
4475_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4476{
4477 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4478 (__v8di)_mm512_unpacklo_epi64(__A, __B),
4479 (__v8di)__W);
4480}
4481
4482static __inline__ __m512i __DEFAULT_FN_ATTRS
4483_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4484{
4485 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4486 (__v8di)_mm512_unpacklo_epi64(__A, __B),
4487 (__v8di)_mm512_setzero_si512());
Adam Nemet63a951e2015-01-14 01:31:17 +00004488}
4489
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00004490
Adam Nemet0d5bb552014-07-28 17:14:40 +00004491/* SIMD load ops */
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004492
Michael Kupersteine45af542015-06-30 13:36:19 +00004493static __inline __m512i __DEFAULT_FN_ATTRS
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004494_mm512_loadu_si512 (void const *__P)
4495{
Craig Topper4537ea72016-05-14 06:03:13 +00004496 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004497 (__v16si)
4498 _mm512_setzero_si512 (),
4499 (__mmask16) -1);
4500}
4501
4502static __inline __m512i __DEFAULT_FN_ATTRS
4503_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4504{
Craig Topper4537ea72016-05-14 06:03:13 +00004505 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004506 (__v16si) __W,
4507 (__mmask16) __U);
4508}
4509
4510
4511static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00004512_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004513{
Craig Topper4537ea72016-05-14 06:03:13 +00004514 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004515 (__v16si)
4516 _mm512_setzero_si512 (),
4517 (__mmask16) __U);
4518}
4519
Michael Kupersteine45af542015-06-30 13:36:19 +00004520static __inline __m512i __DEFAULT_FN_ATTRS
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004521_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4522{
Craig Topper4537ea72016-05-14 06:03:13 +00004523 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004524 (__v8di) __W,
4525 (__mmask8) __U);
4526}
4527
4528static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00004529_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004530{
Craig Topper4537ea72016-05-14 06:03:13 +00004531 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004532 (__v8di)
4533 _mm512_setzero_si512 (),
4534 (__mmask8) __U);
4535}
4536
Michael Kupersteine45af542015-06-30 13:36:19 +00004537static __inline __m512 __DEFAULT_FN_ATTRS
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004538_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4539{
Craig Topper4537ea72016-05-14 06:03:13 +00004540 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004541 (__v16sf) __W,
4542 (__mmask16) __U);
4543}
4544
4545static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00004546_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004547{
Craig Topper4537ea72016-05-14 06:03:13 +00004548 return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004549 (__v16sf)
4550 _mm512_setzero_ps (),
4551 (__mmask16) __U);
4552}
4553
Michael Kupersteine45af542015-06-30 13:36:19 +00004554static __inline __m512d __DEFAULT_FN_ATTRS
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004555_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004556{
Craig Topper4537ea72016-05-14 06:03:13 +00004557 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004558 (__v8df) __W,
4559 (__mmask8) __U);
Adam Nemetc0cff242015-01-16 18:51:50 +00004560}
4561
Michael Kupersteine45af542015-06-30 13:36:19 +00004562static __inline __m512d __DEFAULT_FN_ATTRS
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004563_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
Adam Nemetc0cff242015-01-16 18:51:50 +00004564{
Craig Topper4537ea72016-05-14 06:03:13 +00004565 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
Adam Nemetc0cff242015-01-16 18:51:50 +00004566 (__v8df)
4567 _mm512_setzero_pd (),
4568 (__mmask8) __U);
4569}
4570
Michael Kupersteine45af542015-06-30 13:36:19 +00004571static __inline __m512d __DEFAULT_FN_ATTRS
Craig Topper6afc4362017-03-17 05:59:25 +00004572_mm512_loadu_pd(void const *__p)
Adam Nemetda82bcc2014-07-31 04:00:39 +00004573{
4574 struct __loadu_pd {
4575 __m512d __v;
David Majnemer1cf22e62015-02-04 00:26:10 +00004576 } __attribute__((__packed__, __may_alias__));
Adam Nemetda82bcc2014-07-31 04:00:39 +00004577 return ((struct __loadu_pd*)__p)->__v;
4578}
4579
Michael Kupersteine45af542015-06-30 13:36:19 +00004580static __inline __m512 __DEFAULT_FN_ATTRS
Craig Topper6afc4362017-03-17 05:59:25 +00004581_mm512_loadu_ps(void const *__p)
Adam Nemetda82bcc2014-07-31 04:00:39 +00004582{
4583 struct __loadu_ps {
4584 __m512 __v;
David Majnemer1cf22e62015-02-04 00:26:10 +00004585 } __attribute__((__packed__, __may_alias__));
Adam Nemetda82bcc2014-07-31 04:00:39 +00004586 return ((struct __loadu_ps*)__p)->__v;
4587}
4588
Michael Kupersteine45af542015-06-30 13:36:19 +00004589static __inline __m512 __DEFAULT_FN_ATTRS
Craig Topper6afc4362017-03-17 05:59:25 +00004590_mm512_load_ps(void const *__p)
Adam Nemetc0cff242015-01-16 18:51:50 +00004591{
4592 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
4593 (__v16sf)
4594 _mm512_setzero_ps (),
4595 (__mmask16) -1);
4596}
4597
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004598static __inline __m512 __DEFAULT_FN_ATTRS
4599_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4600{
4601 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4602 (__v16sf) __W,
4603 (__mmask16) __U);
4604}
4605
4606static __inline __m512 __DEFAULT_FN_ATTRS
4607_mm512_maskz_load_ps(__mmask16 __U, void const *__P)
4608{
4609 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4610 (__v16sf)
4611 _mm512_setzero_ps (),
4612 (__mmask16) __U);
4613}
4614
Michael Kupersteine45af542015-06-30 13:36:19 +00004615static __inline __m512d __DEFAULT_FN_ATTRS
Craig Topper6afc4362017-03-17 05:59:25 +00004616_mm512_load_pd(void const *__p)
Adam Nemetc0cff242015-01-16 18:51:50 +00004617{
4618 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
4619 (__v8df)
4620 _mm512_setzero_pd (),
4621 (__mmask8) -1);
4622}
4623
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004624static __inline __m512d __DEFAULT_FN_ATTRS
4625_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4626{
4627 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4628 (__v8df) __W,
4629 (__mmask8) __U);
4630}
4631
4632static __inline __m512d __DEFAULT_FN_ATTRS
4633_mm512_maskz_load_pd(__mmask8 __U, void const *__P)
4634{
4635 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4636 (__v8df)
4637 _mm512_setzero_pd (),
4638 (__mmask8) __U);
4639}
4640
4641static __inline __m512i __DEFAULT_FN_ATTRS
4642_mm512_load_si512 (void const *__P)
4643{
4644 return *(__m512i *) __P;
4645}
4646
4647static __inline __m512i __DEFAULT_FN_ATTRS
4648_mm512_load_epi32 (void const *__P)
4649{
4650 return *(__m512i *) __P;
4651}
4652
4653static __inline __m512i __DEFAULT_FN_ATTRS
4654_mm512_load_epi64 (void const *__P)
4655{
4656 return *(__m512i *) __P;
4657}
4658
Adam Nemet0d5bb552014-07-28 17:14:40 +00004659/* SIMD store ops */
4660
Michael Kupersteine45af542015-06-30 13:36:19 +00004661static __inline void __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00004662_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004663{
Craig Topper4537ea72016-05-14 06:03:13 +00004664 __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004665 (__mmask8) __U);
4666}
4667
Michael Kupersteine45af542015-06-30 13:36:19 +00004668static __inline void __DEFAULT_FN_ATTRS
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004669_mm512_storeu_si512 (void *__P, __m512i __A)
4670{
Craig Topper4537ea72016-05-14 06:03:13 +00004671 __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004672 (__mmask16) -1);
4673}
4674
4675static __inline void __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00004676_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004677{
Craig Topper4537ea72016-05-14 06:03:13 +00004678 __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004679 (__mmask16) __U);
4680}
4681
Michael Kupersteine45af542015-06-30 13:36:19 +00004682static __inline void __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00004683_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004684{
Craig Topper4537ea72016-05-14 06:03:13 +00004685 __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004686}
4687
Michael Kupersteine45af542015-06-30 13:36:19 +00004688static __inline void __DEFAULT_FN_ATTRS
Adam Nemetfce1ad02014-07-28 17:14:45 +00004689_mm512_storeu_pd(void *__P, __m512d __A)
4690{
Craig Topper4537ea72016-05-14 06:03:13 +00004691 __builtin_ia32_storeupd512_mask((double *)__P, (__v8df)__A, (__mmask8)-1);
Adam Nemetfce1ad02014-07-28 17:14:45 +00004692}
4693
Michael Kupersteine45af542015-06-30 13:36:19 +00004694static __inline void __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00004695_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004696{
Craig Topper4537ea72016-05-14 06:03:13 +00004697 __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004698 (__mmask16) __U);
4699}
4700
Michael Kupersteine45af542015-06-30 13:36:19 +00004701static __inline void __DEFAULT_FN_ATTRS
Adam Nemetfce1ad02014-07-28 17:14:45 +00004702_mm512_storeu_ps(void *__P, __m512 __A)
4703{
Craig Topper4537ea72016-05-14 06:03:13 +00004704 __builtin_ia32_storeups512_mask((float *)__P, (__v16sf)__A, (__mmask16)-1);
Adam Nemetfce1ad02014-07-28 17:14:45 +00004705}
4706
Michael Kupersteine45af542015-06-30 13:36:19 +00004707static __inline void __DEFAULT_FN_ATTRS
Adam Nemetc0cff242015-01-16 18:51:50 +00004708_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
Adam Nemetfce1ad02014-07-28 17:14:45 +00004709{
Adam Nemetc0cff242015-01-16 18:51:50 +00004710 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
Adam Nemetfce1ad02014-07-28 17:14:45 +00004711}
4712
Michael Kupersteine45af542015-06-30 13:36:19 +00004713static __inline void __DEFAULT_FN_ATTRS
Adam Nemetfce1ad02014-07-28 17:14:45 +00004714_mm512_store_pd(void *__P, __m512d __A)
4715{
4716 *(__m512d*)__P = __A;
4717}
4718
Michael Kupersteine45af542015-06-30 13:36:19 +00004719static __inline void __DEFAULT_FN_ATTRS
Adam Nemetc0cff242015-01-16 18:51:50 +00004720_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4721{
4722 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4723 (__mmask16) __U);
4724}
4725
Michael Kupersteine45af542015-06-30 13:36:19 +00004726static __inline void __DEFAULT_FN_ATTRS
Adam Nemetc0cff242015-01-16 18:51:50 +00004727_mm512_store_ps(void *__P, __m512 __A)
4728{
4729 *(__m512*)__P = __A;
4730}
4731
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004732static __inline void __DEFAULT_FN_ATTRS
4733_mm512_store_si512 (void *__P, __m512i __A)
4734{
4735 *(__m512i *) __P = __A;
4736}
4737
4738static __inline void __DEFAULT_FN_ATTRS
4739_mm512_store_epi32 (void *__P, __m512i __A)
4740{
4741 *(__m512i *) __P = __A;
4742}
4743
4744static __inline void __DEFAULT_FN_ATTRS
4745_mm512_store_epi64 (void *__P, __m512i __A)
4746{
4747 *(__m512i *) __P = __A;
4748}
4749
Adam Nemet2db1d2f2014-07-30 16:51:27 +00004750/* Mask ops */
4751
Michael Kupersteine45af542015-06-30 13:36:19 +00004752static __inline __mmask16 __DEFAULT_FN_ATTRS
Adam Nemet2db1d2f2014-07-30 16:51:27 +00004753_mm512_knot(__mmask16 __M)
4754{
4755 return __builtin_ia32_knothi(__M);
4756}
4757
Robert Khasanovb9f3a912014-10-08 17:18:13 +00004758/* Integer compare */
4759
Craig Topper57f96ac2017-11-06 21:00:49 +00004760#define _mm512_cmpeq_epi32_mask(A, B) \
4761 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4762#define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4763 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4764#define _mm512_cmpge_epi32_mask(A, B) \
4765 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4766#define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4767 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4768#define _mm512_cmpgt_epi32_mask(A, B) \
4769 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4770#define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4771 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4772#define _mm512_cmple_epi32_mask(A, B) \
4773 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4774#define _mm512_mask_cmple_epi32_mask(k, A, B) \
4775 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4776#define _mm512_cmplt_epi32_mask(A, B) \
4777 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4778#define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4779 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4780#define _mm512_cmpneq_epi32_mask(A, B) \
4781 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4782#define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4783 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
Robert Khasanovb9f3a912014-10-08 17:18:13 +00004784
Craig Topper57f96ac2017-11-06 21:00:49 +00004785#define _mm512_cmpeq_epu32_mask(A, B) \
4786 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4787#define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4788 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4789#define _mm512_cmpge_epu32_mask(A, B) \
4790 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4791#define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4792 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4793#define _mm512_cmpgt_epu32_mask(A, B) \
4794 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4795#define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4796 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4797#define _mm512_cmple_epu32_mask(A, B) \
4798 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4799#define _mm512_mask_cmple_epu32_mask(k, A, B) \
4800 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4801#define _mm512_cmplt_epu32_mask(A, B) \
4802 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4803#define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4804 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4805#define _mm512_cmpneq_epu32_mask(A, B) \
4806 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4807#define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4808 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
Robert Khasanovb9f3a912014-10-08 17:18:13 +00004809
Craig Topper57f96ac2017-11-06 21:00:49 +00004810#define _mm512_cmpeq_epi64_mask(A, B) \
4811 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4812#define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4813 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4814#define _mm512_cmpge_epi64_mask(A, B) \
4815 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4816#define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4817 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4818#define _mm512_cmpgt_epi64_mask(A, B) \
4819 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4820#define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4821 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4822#define _mm512_cmple_epi64_mask(A, B) \
4823 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4824#define _mm512_mask_cmple_epi64_mask(k, A, B) \
4825 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4826#define _mm512_cmplt_epi64_mask(A, B) \
4827 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4828#define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4829 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4830#define _mm512_cmpneq_epi64_mask(A, B) \
4831 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4832#define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4833 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
Craig Topper4cac1c22015-01-25 23:30:07 +00004834
Craig Topper57f96ac2017-11-06 21:00:49 +00004835#define _mm512_cmpeq_epu64_mask(A, B) \
4836 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4837#define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4838 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4839#define _mm512_cmpge_epu64_mask(A, B) \
4840 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4841#define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4842 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4843#define _mm512_cmpgt_epu64_mask(A, B) \
4844 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4845#define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4846 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4847#define _mm512_cmple_epu64_mask(A, B) \
4848 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4849#define _mm512_mask_cmple_epu64_mask(k, A, B) \
4850 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4851#define _mm512_cmplt_epu64_mask(A, B) \
4852 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4853#define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4854 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4855#define _mm512_cmpneq_epu64_mask(A, B) \
4856 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4857#define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4858 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
Craig Topper4cac1c22015-01-25 23:30:07 +00004859
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004860static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004861_mm512_cvtepi8_epi32(__m128i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004862{
Craig Topper0c5da262016-10-23 07:35:47 +00004863 /* This function always performs a signed extension, but __v16qi is a char
4864 which may be signed or unsigned, so use __v16qs. */
4865 return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004866}
4867
4868static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004869_mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004870{
Craig Topper0c5da262016-10-23 07:35:47 +00004871 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4872 (__v16si)_mm512_cvtepi8_epi32(__A),
4873 (__v16si)__W);
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004874}
4875
4876static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004877_mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004878{
Craig Topper0c5da262016-10-23 07:35:47 +00004879 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4880 (__v16si)_mm512_cvtepi8_epi32(__A),
4881 (__v16si)_mm512_setzero_si512());
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004882}
4883
4884static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004885_mm512_cvtepi8_epi64(__m128i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004886{
Craig Topper0c5da262016-10-23 07:35:47 +00004887 /* This function always performs a signed extension, but __v16qi is a char
4888 which may be signed or unsigned, so use __v16qs. */
4889 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004890}
4891
4892static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004893_mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004894{
Craig Topper0c5da262016-10-23 07:35:47 +00004895 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4896 (__v8di)_mm512_cvtepi8_epi64(__A),
4897 (__v8di)__W);
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004898}
4899
4900static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004901_mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004902{
Craig Topper0c5da262016-10-23 07:35:47 +00004903 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4904 (__v8di)_mm512_cvtepi8_epi64(__A),
4905 (__v8di)_mm512_setzero_si512 ());
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004906}
4907
4908static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004909_mm512_cvtepi32_epi64(__m256i __X)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004910{
Craig Topper0c5da262016-10-23 07:35:47 +00004911 return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004912}
4913
4914static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004915_mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004916{
Craig Topper0c5da262016-10-23 07:35:47 +00004917 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4918 (__v8di)_mm512_cvtepi32_epi64(__X),
4919 (__v8di)__W);
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004920}
4921
4922static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004923_mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004924{
Craig Topper0c5da262016-10-23 07:35:47 +00004925 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4926 (__v8di)_mm512_cvtepi32_epi64(__X),
4927 (__v8di)_mm512_setzero_si512());
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004928}
4929
4930static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004931_mm512_cvtepi16_epi32(__m256i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004932{
Craig Topper0c5da262016-10-23 07:35:47 +00004933 return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004934}
4935
4936static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004937_mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004938{
Craig Topper0c5da262016-10-23 07:35:47 +00004939 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4940 (__v16si)_mm512_cvtepi16_epi32(__A),
4941 (__v16si)__W);
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004942}
4943
4944static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004945_mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004946{
Craig Topper0c5da262016-10-23 07:35:47 +00004947 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4948 (__v16si)_mm512_cvtepi16_epi32(__A),
4949 (__v16si)_mm512_setzero_si512 ());
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004950}
4951
4952static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004953_mm512_cvtepi16_epi64(__m128i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004954{
Craig Topper0c5da262016-10-23 07:35:47 +00004955 return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004956}
4957
4958static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004959_mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004960{
Craig Topper0c5da262016-10-23 07:35:47 +00004961 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4962 (__v8di)_mm512_cvtepi16_epi64(__A),
4963 (__v8di)__W);
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004964}
4965
4966static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004967_mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004968{
Craig Topper0c5da262016-10-23 07:35:47 +00004969 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4970 (__v8di)_mm512_cvtepi16_epi64(__A),
4971 (__v8di)_mm512_setzero_si512());
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004972}
4973
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004974static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004975_mm512_cvtepu8_epi32(__m128i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004976{
Craig Topper0c5da262016-10-23 07:35:47 +00004977 return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004978}
4979
4980static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004981_mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004982{
Craig Topper0c5da262016-10-23 07:35:47 +00004983 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4984 (__v16si)_mm512_cvtepu8_epi32(__A),
4985 (__v16si)__W);
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004986}
4987
4988static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004989_mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004990{
Craig Topper0c5da262016-10-23 07:35:47 +00004991 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4992 (__v16si)_mm512_cvtepu8_epi32(__A),
4993 (__v16si)_mm512_setzero_si512());
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004994}
4995
4996static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004997_mm512_cvtepu8_epi64(__m128i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004998{
Craig Topper0c5da262016-10-23 07:35:47 +00004999 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005000}
5001
5002static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005003_mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005004{
Craig Topper0c5da262016-10-23 07:35:47 +00005005 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5006 (__v8di)_mm512_cvtepu8_epi64(__A),
5007 (__v8di)__W);
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005008}
5009
5010static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005011_mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005012{
Craig Topper0c5da262016-10-23 07:35:47 +00005013 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5014 (__v8di)_mm512_cvtepu8_epi64(__A),
5015 (__v8di)_mm512_setzero_si512());
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005016}
5017
5018static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005019_mm512_cvtepu32_epi64(__m256i __X)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005020{
Craig Topper0c5da262016-10-23 07:35:47 +00005021 return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005022}
5023
5024static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005025_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005026{
Craig Topper0c5da262016-10-23 07:35:47 +00005027 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5028 (__v8di)_mm512_cvtepu32_epi64(__X),
5029 (__v8di)__W);
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005030}
5031
5032static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005033_mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005034{
Craig Topper0c5da262016-10-23 07:35:47 +00005035 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5036 (__v8di)_mm512_cvtepu32_epi64(__X),
5037 (__v8di)_mm512_setzero_si512());
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005038}
5039
5040static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005041_mm512_cvtepu16_epi32(__m256i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005042{
Craig Topper0c5da262016-10-23 07:35:47 +00005043 return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005044}
5045
5046static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005047_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005048{
Craig Topper0c5da262016-10-23 07:35:47 +00005049 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5050 (__v16si)_mm512_cvtepu16_epi32(__A),
5051 (__v16si)__W);
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005052}
5053
5054static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005055_mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005056{
Craig Topper0c5da262016-10-23 07:35:47 +00005057 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5058 (__v16si)_mm512_cvtepu16_epi32(__A),
5059 (__v16si)_mm512_setzero_si512());
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005060}
5061
5062static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005063_mm512_cvtepu16_epi64(__m128i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005064{
Craig Topper0c5da262016-10-23 07:35:47 +00005065 return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005066}
5067
5068static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005069_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005070{
Craig Topper0c5da262016-10-23 07:35:47 +00005071 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5072 (__v8di)_mm512_cvtepu16_epi64(__A),
5073 (__v8di)__W);
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005074}
5075
5076static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005077_mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005078{
Craig Topper0c5da262016-10-23 07:35:47 +00005079 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5080 (__v8di)_mm512_cvtepu16_epi64(__A),
5081 (__v8di)_mm512_setzero_si512());
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005082}
5083
Michael Zuckermane98cc742016-02-23 15:59:47 +00005084static __inline__ __m512i __DEFAULT_FN_ATTRS
5085_mm512_rorv_epi32 (__m512i __A, __m512i __B)
5086{
5087 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5088 (__v16si) __B,
5089 (__v16si)
5090 _mm512_setzero_si512 (),
5091 (__mmask16) -1);
5092}
5093
5094static __inline__ __m512i __DEFAULT_FN_ATTRS
5095_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5096{
5097 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5098 (__v16si) __B,
5099 (__v16si) __W,
5100 (__mmask16) __U);
5101}
5102
5103static __inline__ __m512i __DEFAULT_FN_ATTRS
5104_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5105{
5106 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5107 (__v16si) __B,
5108 (__v16si)
5109 _mm512_setzero_si512 (),
5110 (__mmask16) __U);
5111}
5112
5113static __inline__ __m512i __DEFAULT_FN_ATTRS
5114_mm512_rorv_epi64 (__m512i __A, __m512i __B)
5115{
5116 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5117 (__v8di) __B,
5118 (__v8di)
5119 _mm512_setzero_si512 (),
5120 (__mmask8) -1);
5121}
5122
5123static __inline__ __m512i __DEFAULT_FN_ATTRS
5124_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5125{
5126 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5127 (__v8di) __B,
5128 (__v8di) __W,
5129 (__mmask8) __U);
5130}
5131
5132static __inline__ __m512i __DEFAULT_FN_ATTRS
5133_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5134{
5135 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5136 (__v8di) __B,
5137 (__v8di)
5138 _mm512_setzero_si512 (),
5139 (__mmask8) __U);
5140}
5141
5142
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005143
Craig Topper4cac1c22015-01-25 23:30:07 +00005144#define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
Craig Topper3a71f352015-11-29 06:50:33 +00005145 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
Craig Topper8c18e112016-05-17 04:41:50 +00005146 (__v16si)(__m512i)(b), (int)(p), \
Craig Topper4cac1c22015-01-25 23:30:07 +00005147 (__mmask16)-1); })
5148
5149#define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
Craig Topper3a71f352015-11-29 06:50:33 +00005150 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
Craig Topper8c18e112016-05-17 04:41:50 +00005151 (__v16si)(__m512i)(b), (int)(p), \
Craig Topper4cac1c22015-01-25 23:30:07 +00005152 (__mmask16)-1); })
5153
5154#define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
Craig Topper3a71f352015-11-29 06:50:33 +00005155 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
Craig Topper8c18e112016-05-17 04:41:50 +00005156 (__v8di)(__m512i)(b), (int)(p), \
Craig Topper4cac1c22015-01-25 23:30:07 +00005157 (__mmask8)-1); })
5158
5159#define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
Craig Topper3a71f352015-11-29 06:50:33 +00005160 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
Craig Topper8c18e112016-05-17 04:41:50 +00005161 (__v8di)(__m512i)(b), (int)(p), \
Craig Topper4cac1c22015-01-25 23:30:07 +00005162 (__mmask8)-1); })
5163
5164#define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
Craig Topper3a71f352015-11-29 06:50:33 +00005165 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
Craig Topper8c18e112016-05-17 04:41:50 +00005166 (__v16si)(__m512i)(b), (int)(p), \
Craig Topper4cac1c22015-01-25 23:30:07 +00005167 (__mmask16)(m)); })
5168
5169#define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
Craig Topper3a71f352015-11-29 06:50:33 +00005170 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
Craig Topper8c18e112016-05-17 04:41:50 +00005171 (__v16si)(__m512i)(b), (int)(p), \
Craig Topper4cac1c22015-01-25 23:30:07 +00005172 (__mmask16)(m)); })
5173
5174#define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
Craig Topper3a71f352015-11-29 06:50:33 +00005175 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
Craig Topper8c18e112016-05-17 04:41:50 +00005176 (__v8di)(__m512i)(b), (int)(p), \
Craig Topper4cac1c22015-01-25 23:30:07 +00005177 (__mmask8)(m)); })
5178
5179#define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
Craig Topper3a71f352015-11-29 06:50:33 +00005180 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
Craig Topper8c18e112016-05-17 04:41:50 +00005181 (__v8di)(__m512i)(b), (int)(p), \
Craig Topper4cac1c22015-01-25 23:30:07 +00005182 (__mmask8)(m)); })
Eric Christopher4d1851682015-06-17 07:09:20 +00005183
Michael Zuckerman38a27272016-02-22 09:05:41 +00005184#define _mm512_rol_epi32(a, b) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00005185 (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5186 (__v16si)_mm512_setzero_si512(), \
5187 (__mmask16)-1); })
Michael Zuckerman38a27272016-02-22 09:05:41 +00005188
5189#define _mm512_mask_rol_epi32(W, U, a, b) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00005190 (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5191 (__v16si)(__m512i)(W), \
5192 (__mmask16)(U)); })
Michael Zuckerman38a27272016-02-22 09:05:41 +00005193
5194#define _mm512_maskz_rol_epi32(U, a, b) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00005195 (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5196 (__v16si)_mm512_setzero_si512(), \
5197 (__mmask16)(U)); })
Michael Zuckerman38a27272016-02-22 09:05:41 +00005198
5199#define _mm512_rol_epi64(a, b) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00005200 (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5201 (__v8di)_mm512_setzero_si512(), \
5202 (__mmask8)-1); })
Michael Zuckerman38a27272016-02-22 09:05:41 +00005203
5204#define _mm512_mask_rol_epi64(W, U, a, b) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00005205 (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5206 (__v8di)(__m512i)(W), (__mmask8)(U)); })
Michael Zuckerman38a27272016-02-22 09:05:41 +00005207
5208#define _mm512_maskz_rol_epi64(U, a, b) __extension__ ({ \
Craig Topper8c18e112016-05-17 04:41:50 +00005209 (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5210 (__v8di)_mm512_setzero_si512(), \
5211 (__mmask8)(U)); })
Michael Zuckerman0231f162016-02-23 13:41:13 +00005212static __inline__ __m512i __DEFAULT_FN_ATTRS
5213_mm512_rolv_epi32 (__m512i __A, __m512i __B)
5214{
5215 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5216 (__v16si) __B,
5217 (__v16si)
5218 _mm512_setzero_si512 (),
5219 (__mmask16) -1);
5220}
5221
5222static __inline__ __m512i __DEFAULT_FN_ATTRS
5223_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5224{
5225 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5226 (__v16si) __B,
5227 (__v16si) __W,
5228 (__mmask16) __U);
5229}
5230
5231static __inline__ __m512i __DEFAULT_FN_ATTRS
5232_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5233{
5234 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5235 (__v16si) __B,
5236 (__v16si)
5237 _mm512_setzero_si512 (),
5238 (__mmask16) __U);
5239}
5240
5241static __inline__ __m512i __DEFAULT_FN_ATTRS
5242_mm512_rolv_epi64 (__m512i __A, __m512i __B)
5243{
5244 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5245 (__v8di) __B,
5246 (__v8di)
5247 _mm512_setzero_si512 (),
5248 (__mmask8) -1);
5249}
5250
5251static __inline__ __m512i __DEFAULT_FN_ATTRS
5252_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5253{
5254 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5255 (__v8di) __B,
5256 (__v8di) __W,
5257 (__mmask8) __U);
5258}
5259
5260static __inline__ __m512i __DEFAULT_FN_ATTRS
5261_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5262{
5263 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5264 (__v8di) __B,
5265 (__v8di)
5266 _mm512_setzero_si512 (),
5267 (__mmask8) __U);
5268}
5269
Craig Topper8c18e112016-05-17 04:41:50 +00005270#define _mm512_ror_epi32(A, B) __extension__ ({ \
5271 (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5272 (__v16si)_mm512_setzero_si512(), \
5273 (__mmask16)-1); })
Michael Zuckerman0231f162016-02-23 13:41:13 +00005274
Craig Topper8c18e112016-05-17 04:41:50 +00005275#define _mm512_mask_ror_epi32(W, U, A, B) __extension__ ({ \
5276 (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5277 (__v16si)(__m512i)(W), \
5278 (__mmask16)(U)); })
Michael Zuckerman0231f162016-02-23 13:41:13 +00005279
Craig Topper8c18e112016-05-17 04:41:50 +00005280#define _mm512_maskz_ror_epi32(U, A, B) __extension__ ({ \
5281 (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5282 (__v16si)_mm512_setzero_si512(), \
5283 (__mmask16)(U)); })
Michael Zuckerman0231f162016-02-23 13:41:13 +00005284
Craig Topper8c18e112016-05-17 04:41:50 +00005285#define _mm512_ror_epi64(A, B) __extension__ ({ \
5286 (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5287 (__v8di)_mm512_setzero_si512(), \
5288 (__mmask8)-1); })
Michael Zuckerman0231f162016-02-23 13:41:13 +00005289
Craig Topper8c18e112016-05-17 04:41:50 +00005290#define _mm512_mask_ror_epi64(W, U, A, B) __extension__ ({ \
5291 (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5292 (__v8di)(__m512i)(W), (__mmask8)(U)); })
Michael Zuckerman0231f162016-02-23 13:41:13 +00005293
Craig Topper8c18e112016-05-17 04:41:50 +00005294#define _mm512_maskz_ror_epi64(U, A, B) __extension__ ({ \
5295 (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5296 (__v8di)_mm512_setzero_si512(), \
5297 (__mmask8)(U)); })
Michael Zuckerman38a27272016-02-22 09:05:41 +00005298
Craig Topper1a441932016-11-12 07:16:59 +00005299static __inline__ __m512i __DEFAULT_FN_ATTRS
5300_mm512_slli_epi32(__m512i __A, int __B)
5301{
5302 return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B);
5303}
Michael Zuckerman1ac360c2016-03-01 11:38:16 +00005304
Craig Topper1a441932016-11-12 07:16:59 +00005305static __inline__ __m512i __DEFAULT_FN_ATTRS
5306_mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
5307{
Craig Topperd7e5b212016-11-13 07:26:31 +00005308 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5309 (__v16si)_mm512_slli_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005310 (__v16si)__W);
5311}
Michael Zuckerman1ac360c2016-03-01 11:38:16 +00005312
Craig Topper1a441932016-11-12 07:16:59 +00005313static __inline__ __m512i __DEFAULT_FN_ATTRS
5314_mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B) {
Craig Topperd7e5b212016-11-13 07:26:31 +00005315 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5316 (__v16si)_mm512_slli_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005317 (__v16si)_mm512_setzero_si512());
5318}
Michael Zuckerman1ac360c2016-03-01 11:38:16 +00005319
Craig Topper1a441932016-11-12 07:16:59 +00005320static __inline__ __m512i __DEFAULT_FN_ATTRS
5321_mm512_slli_epi64(__m512i __A, int __B)
5322{
5323 return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B);
5324}
Michael Zuckerman1ac360c2016-03-01 11:38:16 +00005325
Craig Topper1a441932016-11-12 07:16:59 +00005326static __inline__ __m512i __DEFAULT_FN_ATTRS
5327_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
5328{
Craig Topperd7e5b212016-11-13 07:26:31 +00005329 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5330 (__v8di)_mm512_slli_epi64(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005331 (__v8di)__W);
5332}
Michael Zuckerman1ac360c2016-03-01 11:38:16 +00005333
Craig Topper1a441932016-11-12 07:16:59 +00005334static __inline__ __m512i __DEFAULT_FN_ATTRS
5335_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B)
5336{
Craig Topperd7e5b212016-11-13 07:26:31 +00005337 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5338 (__v8di)_mm512_slli_epi64(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005339 (__v8di)_mm512_setzero_si512());
5340}
Michael Zuckerman1ac360c2016-03-01 11:38:16 +00005341
Craig Topper1a441932016-11-12 07:16:59 +00005342static __inline__ __m512i __DEFAULT_FN_ATTRS
5343_mm512_srli_epi32(__m512i __A, int __B)
5344{
5345 return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B);
5346}
Michael Zuckerman38a27272016-02-22 09:05:41 +00005347
Craig Topper1a441932016-11-12 07:16:59 +00005348static __inline__ __m512i __DEFAULT_FN_ATTRS
5349_mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
5350{
Craig Topperd7e5b212016-11-13 07:26:31 +00005351 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5352 (__v16si)_mm512_srli_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005353 (__v16si)__W);
5354}
Michael Zuckermand176d742016-03-01 17:49:03 +00005355
Craig Topper1a441932016-11-12 07:16:59 +00005356static __inline__ __m512i __DEFAULT_FN_ATTRS
5357_mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B) {
Craig Topperd7e5b212016-11-13 07:26:31 +00005358 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5359 (__v16si)_mm512_srli_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005360 (__v16si)_mm512_setzero_si512());
5361}
Michael Zuckermand176d742016-03-01 17:49:03 +00005362
Craig Topper1a441932016-11-12 07:16:59 +00005363static __inline__ __m512i __DEFAULT_FN_ATTRS
5364_mm512_srli_epi64(__m512i __A, int __B)
5365{
5366 return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B);
5367}
Michael Zuckermand176d742016-03-01 17:49:03 +00005368
Craig Topper1a441932016-11-12 07:16:59 +00005369static __inline__ __m512i __DEFAULT_FN_ATTRS
5370_mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
5371{
Craig Topperd7e5b212016-11-13 07:26:31 +00005372 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5373 (__v8di)_mm512_srli_epi64(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005374 (__v8di)__W);
5375}
Michael Zuckermand176d742016-03-01 17:49:03 +00005376
Craig Topper1a441932016-11-12 07:16:59 +00005377static __inline__ __m512i __DEFAULT_FN_ATTRS
5378_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B)
5379{
Craig Topperd7e5b212016-11-13 07:26:31 +00005380 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5381 (__v8di)_mm512_srli_epi64(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005382 (__v8di)_mm512_setzero_si512());
5383}
Michael Zuckermand176d742016-03-01 17:49:03 +00005384
Michael Zuckermanffbb67a2016-03-03 09:26:01 +00005385static __inline__ __m512i __DEFAULT_FN_ATTRS
5386_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5387{
5388 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5389 (__v16si) __W,
5390 (__mmask16) __U);
5391}
5392
5393static __inline__ __m512i __DEFAULT_FN_ATTRS
5394_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
5395{
5396 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5397 (__v16si)
5398 _mm512_setzero_si512 (),
5399 (__mmask16) __U);
5400}
5401
5402static __inline__ void __DEFAULT_FN_ATTRS
5403_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
5404{
5405 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
5406 (__mmask16) __U);
5407}
5408
5409static __inline__ __m512i __DEFAULT_FN_ATTRS
Michael Zuckermane6542002016-05-23 08:01:48 +00005410_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
5411{
Igor Bregeraadb8762016-06-08 13:59:20 +00005412 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5413 (__v16si) __A,
5414 (__v16si) __W);
Michael Zuckermane6542002016-05-23 08:01:48 +00005415}
5416
5417static __inline__ __m512i __DEFAULT_FN_ATTRS
5418_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
5419{
Igor Bregeraadb8762016-06-08 13:59:20 +00005420 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5421 (__v16si) __A,
5422 (__v16si) _mm512_setzero_si512 ());
Michael Zuckermane6542002016-05-23 08:01:48 +00005423}
5424
5425static __inline__ __m512i __DEFAULT_FN_ATTRS
Michael Zuckermanffbb67a2016-03-03 09:26:01 +00005426_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
5427{
Igor Bregeraadb8762016-06-08 13:59:20 +00005428 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5429 (__v8di) __A,
5430 (__v8di) __W);
Michael Zuckermanffbb67a2016-03-03 09:26:01 +00005431}
5432
5433static __inline__ __m512i __DEFAULT_FN_ATTRS
5434_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
5435{
Igor Bregeraadb8762016-06-08 13:59:20 +00005436 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5437 (__v8di) __A,
5438 (__v8di) _mm512_setzero_si512 ());
Michael Zuckermanffbb67a2016-03-03 09:26:01 +00005439}
5440
5441static __inline__ __m512i __DEFAULT_FN_ATTRS
5442_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5443{
5444 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5445 (__v8di) __W,
5446 (__mmask8) __U);
5447}
5448
5449static __inline__ __m512i __DEFAULT_FN_ATTRS
5450_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
5451{
5452 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5453 (__v8di)
5454 _mm512_setzero_si512 (),
5455 (__mmask8) __U);
5456}
5457
5458static __inline__ void __DEFAULT_FN_ATTRS
5459_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
5460{
5461 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
5462 (__mmask8) __U);
5463}
5464
Michael Zuckerman0d67e4b2016-03-03 13:43:05 +00005465static __inline__ __m512d __DEFAULT_FN_ATTRS
5466_mm512_movedup_pd (__m512d __A)
5467{
Simon Pilgrim275d7212016-07-02 17:16:25 +00005468 return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5469 0, 0, 2, 2, 4, 4, 6, 6);
Michael Zuckerman0d67e4b2016-03-03 13:43:05 +00005470}
5471
5472static __inline__ __m512d __DEFAULT_FN_ATTRS
5473_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
5474{
Simon Pilgrim275d7212016-07-02 17:16:25 +00005475 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5476 (__v8df)_mm512_movedup_pd(__A),
5477 (__v8df)__W);
Michael Zuckerman0d67e4b2016-03-03 13:43:05 +00005478}
5479
5480static __inline__ __m512d __DEFAULT_FN_ATTRS
5481_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
5482{
Simon Pilgrim275d7212016-07-02 17:16:25 +00005483 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5484 (__v8df)_mm512_movedup_pd(__A),
5485 (__v8df)_mm512_setzero_pd());
Michael Zuckerman0d67e4b2016-03-03 13:43:05 +00005486}
5487
Craig Topper8c18e112016-05-17 04:41:50 +00005488#define _mm512_fixupimm_round_pd(A, B, C, imm, R) __extension__ ({ \
5489 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5490 (__v8df)(__m512d)(B), \
5491 (__v8di)(__m512i)(C), (int)(imm), \
5492 (__mmask8)-1, (int)(R)); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005493
Craig Topper8c18e112016-05-17 04:41:50 +00005494#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) __extension__ ({ \
5495 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5496 (__v8df)(__m512d)(B), \
5497 (__v8di)(__m512i)(C), (int)(imm), \
5498 (__mmask8)(U), (int)(R)); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005499
Craig Topper8c18e112016-05-17 04:41:50 +00005500#define _mm512_fixupimm_pd(A, B, C, imm) __extension__ ({ \
5501 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5502 (__v8df)(__m512d)(B), \
5503 (__v8di)(__m512i)(C), (int)(imm), \
5504 (__mmask8)-1, \
5505 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005506
Craig Topper8c18e112016-05-17 04:41:50 +00005507#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
5508 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5509 (__v8df)(__m512d)(B), \
5510 (__v8di)(__m512i)(C), (int)(imm), \
5511 (__mmask8)(U), \
5512 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005513
Craig Topper8c18e112016-05-17 04:41:50 +00005514#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) __extension__ ({ \
5515 (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5516 (__v8df)(__m512d)(B), \
5517 (__v8di)(__m512i)(C), \
5518 (int)(imm), (__mmask8)(U), \
5519 (int)(R)); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005520
Craig Topper8c18e112016-05-17 04:41:50 +00005521#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
5522 (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5523 (__v8df)(__m512d)(B), \
5524 (__v8di)(__m512i)(C), \
5525 (int)(imm), (__mmask8)(U), \
5526 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005527
Craig Topper8c18e112016-05-17 04:41:50 +00005528#define _mm512_fixupimm_round_ps(A, B, C, imm, R) __extension__ ({ \
5529 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5530 (__v16sf)(__m512)(B), \
5531 (__v16si)(__m512i)(C), (int)(imm), \
5532 (__mmask16)-1, (int)(R)); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005533
Craig Topper8c18e112016-05-17 04:41:50 +00005534#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) __extension__ ({ \
5535 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5536 (__v16sf)(__m512)(B), \
5537 (__v16si)(__m512i)(C), (int)(imm), \
5538 (__mmask16)(U), (int)(R)); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005539
Craig Topper8c18e112016-05-17 04:41:50 +00005540#define _mm512_fixupimm_ps(A, B, C, imm) __extension__ ({ \
5541 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5542 (__v16sf)(__m512)(B), \
5543 (__v16si)(__m512i)(C), (int)(imm), \
5544 (__mmask16)-1, \
5545 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005546
Craig Topper8c18e112016-05-17 04:41:50 +00005547#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
5548 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5549 (__v16sf)(__m512)(B), \
5550 (__v16si)(__m512i)(C), (int)(imm), \
5551 (__mmask16)(U), \
5552 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005553
Craig Topper8c18e112016-05-17 04:41:50 +00005554#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) __extension__ ({ \
5555 (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5556 (__v16sf)(__m512)(B), \
5557 (__v16si)(__m512i)(C), \
5558 (int)(imm), (__mmask16)(U), \
5559 (int)(R)); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005560
Craig Topper8c18e112016-05-17 04:41:50 +00005561#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
5562 (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5563 (__v16sf)(__m512)(B), \
5564 (__v16si)(__m512i)(C), \
5565 (int)(imm), (__mmask16)(U), \
5566 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005567
Craig Topper8c18e112016-05-17 04:41:50 +00005568#define _mm_fixupimm_round_sd(A, B, C, imm, R) __extension__ ({ \
5569 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5570 (__v2df)(__m128d)(B), \
5571 (__v2di)(__m128i)(C), (int)(imm), \
5572 (__mmask8)-1, (int)(R)); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005573
Craig Topper8c18e112016-05-17 04:41:50 +00005574#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) __extension__ ({ \
5575 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5576 (__v2df)(__m128d)(B), \
5577 (__v2di)(__m128i)(C), (int)(imm), \
5578 (__mmask8)(U), (int)(R)); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005579
Craig Topper8c18e112016-05-17 04:41:50 +00005580#define _mm_fixupimm_sd(A, B, C, imm) __extension__ ({ \
5581 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5582 (__v2df)(__m128d)(B), \
5583 (__v2di)(__m128i)(C), (int)(imm), \
5584 (__mmask8)-1, \
5585 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005586
Craig Topper8c18e112016-05-17 04:41:50 +00005587#define _mm_mask_fixupimm_sd(A, U, B, C, imm) __extension__ ({ \
5588 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5589 (__v2df)(__m128d)(B), \
5590 (__v2di)(__m128i)(C), (int)(imm), \
5591 (__mmask8)(U), \
5592 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005593
Craig Topper8c18e112016-05-17 04:41:50 +00005594#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) __extension__ ({ \
5595 (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5596 (__v2df)(__m128d)(B), \
5597 (__v2di)(__m128i)(C), (int)(imm), \
5598 (__mmask8)(U), (int)(R)); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005599
Craig Topper8c18e112016-05-17 04:41:50 +00005600#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) __extension__ ({ \
5601 (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5602 (__v2df)(__m128d)(B), \
5603 (__v2di)(__m128i)(C), (int)(imm), \
5604 (__mmask8)(U), \
5605 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005606
Craig Topper8c18e112016-05-17 04:41:50 +00005607#define _mm_fixupimm_round_ss(A, B, C, imm, R) __extension__ ({ \
5608 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5609 (__v4sf)(__m128)(B), \
5610 (__v4si)(__m128i)(C), (int)(imm), \
5611 (__mmask8)-1, (int)(R)); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005612
Craig Topper8c18e112016-05-17 04:41:50 +00005613#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) __extension__ ({ \
5614 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5615 (__v4sf)(__m128)(B), \
5616 (__v4si)(__m128i)(C), (int)(imm), \
5617 (__mmask8)(U), (int)(R)); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005618
Craig Topper8c18e112016-05-17 04:41:50 +00005619#define _mm_fixupimm_ss(A, B, C, imm) __extension__ ({ \
5620 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5621 (__v4sf)(__m128)(B), \
5622 (__v4si)(__m128i)(C), (int)(imm), \
5623 (__mmask8)-1, \
5624 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005625
Craig Topper8c18e112016-05-17 04:41:50 +00005626#define _mm_mask_fixupimm_ss(A, U, B, C, imm) __extension__ ({ \
5627 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5628 (__v4sf)(__m128)(B), \
5629 (__v4si)(__m128i)(C), (int)(imm), \
5630 (__mmask8)(U), \
5631 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005632
Craig Topper8c18e112016-05-17 04:41:50 +00005633#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) __extension__ ({ \
5634 (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5635 (__v4sf)(__m128)(B), \
5636 (__v4si)(__m128i)(C), (int)(imm), \
5637 (__mmask8)(U), (int)(R)); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005638
Craig Topper8c18e112016-05-17 04:41:50 +00005639#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) __extension__ ({ \
5640 (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5641 (__v4sf)(__m128)(B), \
5642 (__v4si)(__m128i)(C), (int)(imm), \
5643 (__mmask8)(U), \
5644 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005645
Craig Topper8c18e112016-05-17 04:41:50 +00005646#define _mm_getexp_round_sd(A, B, R) __extension__ ({ \
5647 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5648 (__v2df)(__m128d)(B), \
5649 (__v2df)_mm_setzero_pd(), \
5650 (__mmask8)-1, (int)(R)); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005651
5652
5653static __inline__ __m128d __DEFAULT_FN_ATTRS
5654_mm_getexp_sd (__m128d __A, __m128d __B)
5655{
5656 return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5657 (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5658}
5659
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005660static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00005661_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005662{
5663 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5664 (__v2df) __B,
5665 (__v2df) __W,
5666 (__mmask8) __U,
5667 _MM_FROUND_CUR_DIRECTION);
5668}
5669
Craig Topper8c18e112016-05-17 04:41:50 +00005670#define _mm_mask_getexp_round_sd(W, U, A, B, R) __extension__ ({\
5671 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5672 (__v2df)(__m128d)(B), \
5673 (__v2df)(__m128d)(W), \
5674 (__mmask8)(U), (int)(R)); })
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005675
5676static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00005677_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005678{
5679 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5680 (__v2df) __B,
5681 (__v2df) _mm_setzero_pd (),
5682 (__mmask8) __U,
5683 _MM_FROUND_CUR_DIRECTION);
5684}
5685
Craig Topper8c18e112016-05-17 04:41:50 +00005686#define _mm_maskz_getexp_round_sd(U, A, B, R) __extension__ ({\
5687 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5688 (__v2df)(__m128d)(B), \
5689 (__v2df)_mm_setzero_pd(), \
5690 (__mmask8)(U), (int)(R)); })
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005691
Craig Topper8c18e112016-05-17 04:41:50 +00005692#define _mm_getexp_round_ss(A, B, R) __extension__ ({ \
5693 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5694 (__v4sf)(__m128)(B), \
5695 (__v4sf)_mm_setzero_ps(), \
5696 (__mmask8)-1, (int)(R)); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005697
5698static __inline__ __m128 __DEFAULT_FN_ATTRS
5699_mm_getexp_ss (__m128 __A, __m128 __B)
5700{
5701 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5702 (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5703}
5704
Craig Topper58187d32016-05-17 04:41:29 +00005705static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00005706_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005707{
Craig Topper58187d32016-05-17 04:41:29 +00005708 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005709 (__v4sf) __B,
5710 (__v4sf) __W,
5711 (__mmask8) __U,
5712 _MM_FROUND_CUR_DIRECTION);
5713}
5714
Craig Topper8c18e112016-05-17 04:41:50 +00005715#define _mm_mask_getexp_round_ss(W, U, A, B, R) __extension__ ({\
5716 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5717 (__v4sf)(__m128)(B), \
5718 (__v4sf)(__m128)(W), \
5719 (__mmask8)(U), (int)(R)); })
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005720
Craig Topper58187d32016-05-17 04:41:29 +00005721static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00005722_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005723{
Craig Topper58187d32016-05-17 04:41:29 +00005724 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005725 (__v4sf) __B,
5726 (__v4sf) _mm_setzero_pd (),
5727 (__mmask8) __U,
5728 _MM_FROUND_CUR_DIRECTION);
5729}
5730
Craig Topper8c18e112016-05-17 04:41:50 +00005731#define _mm_maskz_getexp_round_ss(U, A, B, R) __extension__ ({\
5732 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5733 (__v4sf)(__m128)(B), \
5734 (__v4sf)_mm_setzero_ps(), \
5735 (__mmask8)(U), (int)(R)); })
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005736
Craig Topper8c18e112016-05-17 04:41:50 +00005737#define _mm_getmant_round_sd(A, B, C, D, R) __extension__ ({ \
5738 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5739 (__v2df)(__m128d)(B), \
5740 (int)(((D)<<2) | (C)), \
5741 (__v2df)_mm_setzero_pd(), \
5742 (__mmask8)-1, (int)(R)); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005743
Craig Topper8c18e112016-05-17 04:41:50 +00005744#define _mm_getmant_sd(A, B, C, D) __extension__ ({ \
5745 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5746 (__v2df)(__m128d)(B), \
5747 (int)(((D)<<2) | (C)), \
5748 (__v2df)_mm_setzero_pd(), \
5749 (__mmask8)-1, \
5750 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005751
Craig Topper8c18e112016-05-17 04:41:50 +00005752#define _mm_mask_getmant_sd(W, U, A, B, C, D) __extension__ ({\
5753 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5754 (__v2df)(__m128d)(B), \
5755 (int)(((D)<<2) | (C)), \
5756 (__v2df)(__m128d)(W), \
5757 (__mmask8)(U), \
5758 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005759
Craig Topper8c18e112016-05-17 04:41:50 +00005760#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R)({\
5761 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5762 (__v2df)(__m128d)(B), \
5763 (int)(((D)<<2) | (C)), \
5764 (__v2df)(__m128d)(W), \
5765 (__mmask8)(U), (int)(R)); })
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005766
Craig Topper8c18e112016-05-17 04:41:50 +00005767#define _mm_maskz_getmant_sd(U, A, B, C, D) __extension__ ({\
5768 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5769 (__v2df)(__m128d)(B), \
5770 (int)(((D)<<2) | (C)), \
5771 (__v2df)_mm_setzero_pd(), \
5772 (__mmask8)(U), \
5773 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005774
Craig Topper8c18e112016-05-17 04:41:50 +00005775#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) __extension__ ({\
5776 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5777 (__v2df)(__m128d)(B), \
5778 (int)(((D)<<2) | (C)), \
5779 (__v2df)_mm_setzero_pd(), \
5780 (__mmask8)(U), (int)(R)); })
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005781
Craig Topper8c18e112016-05-17 04:41:50 +00005782#define _mm_getmant_round_ss(A, B, C, D, R) __extension__ ({ \
5783 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5784 (__v4sf)(__m128)(B), \
5785 (int)(((D)<<2) | (C)), \
5786 (__v4sf)_mm_setzero_ps(), \
5787 (__mmask8)-1, (int)(R)); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005788
Craig Topper8c18e112016-05-17 04:41:50 +00005789#define _mm_getmant_ss(A, B, C, D) __extension__ ({ \
5790 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5791 (__v4sf)(__m128)(B), \
5792 (int)(((D)<<2) | (C)), \
5793 (__v4sf)_mm_setzero_ps(), \
5794 (__mmask8)-1, \
5795 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005796
Craig Topper8c18e112016-05-17 04:41:50 +00005797#define _mm_mask_getmant_ss(W, U, A, B, C, D) __extension__ ({\
5798 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5799 (__v4sf)(__m128)(B), \
5800 (int)(((D)<<2) | (C)), \
5801 (__v4sf)(__m128)(W), \
5802 (__mmask8)(U), \
5803 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005804
Craig Topper8c18e112016-05-17 04:41:50 +00005805#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R)({\
5806 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5807 (__v4sf)(__m128)(B), \
5808 (int)(((D)<<2) | (C)), \
5809 (__v4sf)(__m128)(W), \
5810 (__mmask8)(U), (int)(R)); })
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005811
Craig Topper8c18e112016-05-17 04:41:50 +00005812#define _mm_maskz_getmant_ss(U, A, B, C, D) __extension__ ({\
5813 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5814 (__v4sf)(__m128)(B), \
5815 (int)(((D)<<2) | (C)), \
5816 (__v4sf)_mm_setzero_pd(), \
5817 (__mmask8)(U), \
5818 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005819
Craig Topper8c18e112016-05-17 04:41:50 +00005820#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) __extension__ ({\
5821 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5822 (__v4sf)(__m128)(B), \
5823 (int)(((D)<<2) | (C)), \
5824 (__v4sf)_mm_setzero_ps(), \
5825 (__mmask8)(U), (int)(R)); })
Michael Zuckermandef78752016-03-28 12:23:09 +00005826
5827static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5828_mm512_kmov (__mmask16 __A)
5829{
5830 return __A;
5831}
5832
Craig Topper8c18e112016-05-17 04:41:50 +00005833#define _mm_comi_round_sd(A, B, P, R) __extension__ ({\
5834 (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5835 (int)(P), (int)(R)); })
Michael Zuckermane71d59f2016-03-07 19:15:00 +00005836
Craig Topper8c18e112016-05-17 04:41:50 +00005837#define _mm_comi_round_ss(A, B, P, R) __extension__ ({\
5838 (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5839 (int)(P), (int)(R)); })
Michael Zuckermane71d59f2016-03-07 19:15:00 +00005840
Craig Topper45db56c2016-07-21 07:38:39 +00005841#ifdef __x86_64__
Craig Topper8c18e112016-05-17 04:41:50 +00005842#define _mm_cvt_roundsd_si64(A, R) __extension__ ({ \
5843 (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
Craig Topper45db56c2016-07-21 07:38:39 +00005844#endif
Simon Pilgrim427154d2016-07-04 21:30:47 +00005845
Michael Zuckermand8d2f622016-04-11 07:15:34 +00005846static __inline__ __m512i __DEFAULT_FN_ATTRS
5847_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
5848 __mmask16 __U, __m512i __B)
5849{
5850 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
5851 (__v16si) __I
5852 /* idx */ ,
5853 (__v16si) __B,
5854 (__mmask16) __U);
5855}
Michael Zuckermancdd54c82016-04-10 12:54:23 +00005856
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005857static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005858_mm512_sll_epi32(__m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005859{
Craig Topper1a441932016-11-12 07:16:59 +00005860 return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005861}
5862
5863static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005864_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005865{
Craig Topperd7e5b212016-11-13 07:26:31 +00005866 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5867 (__v16si)_mm512_sll_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005868 (__v16si)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005869}
5870
5871static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005872_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005873{
Craig Topperd7e5b212016-11-13 07:26:31 +00005874 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5875 (__v16si)_mm512_sll_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005876 (__v16si)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005877}
5878
5879static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005880_mm512_sll_epi64(__m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005881{
Craig Topper1a441932016-11-12 07:16:59 +00005882 return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005883}
5884
5885static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005886_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005887{
Craig Topperd7e5b212016-11-13 07:26:31 +00005888 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5889 (__v8di)_mm512_sll_epi64(__A, __B),
5890 (__v8di)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005891}
5892
5893static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005894_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005895{
Craig Topperd7e5b212016-11-13 07:26:31 +00005896 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5897 (__v8di)_mm512_sll_epi64(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005898 (__v8di)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005899}
5900
5901static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00005902_mm512_sllv_epi32(__m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005903{
Craig Topper5e0709d2016-11-13 07:26:34 +00005904 return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005905}
5906
5907static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00005908_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005909{
Craig Topper5e0709d2016-11-13 07:26:34 +00005910 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5911 (__v16si)_mm512_sllv_epi32(__X, __Y),
5912 (__v16si)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005913}
5914
5915static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00005916_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005917{
Craig Topper5e0709d2016-11-13 07:26:34 +00005918 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5919 (__v16si)_mm512_sllv_epi32(__X, __Y),
5920 (__v16si)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005921}
5922
5923static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00005924_mm512_sllv_epi64(__m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005925{
Craig Topper5e0709d2016-11-13 07:26:34 +00005926 return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005927}
5928
5929static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00005930_mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005931{
Craig Topper5e0709d2016-11-13 07:26:34 +00005932 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5933 (__v8di)_mm512_sllv_epi64(__X, __Y),
5934 (__v8di)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005935}
5936
5937static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00005938_mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005939{
Craig Topper5e0709d2016-11-13 07:26:34 +00005940 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5941 (__v8di)_mm512_sllv_epi64(__X, __Y),
5942 (__v8di)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005943}
5944
5945static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005946_mm512_sra_epi32(__m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005947{
Craig Topper1a441932016-11-12 07:16:59 +00005948 return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005949}
5950
5951static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005952_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005953{
Craig Topperd7e5b212016-11-13 07:26:31 +00005954 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5955 (__v16si)_mm512_sra_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005956 (__v16si)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005957}
5958
5959static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005960_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005961{
Craig Topperd7e5b212016-11-13 07:26:31 +00005962 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5963 (__v16si)_mm512_sra_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005964 (__v16si)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005965}
5966
5967static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005968_mm512_sra_epi64(__m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005969{
Craig Topper1a441932016-11-12 07:16:59 +00005970 return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005971}
5972
5973static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005974_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005975{
Craig Topperd7e5b212016-11-13 07:26:31 +00005976 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5977 (__v8di)_mm512_sra_epi64(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005978 (__v8di)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005979}
5980
5981static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005982_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005983{
Craig Topperd7e5b212016-11-13 07:26:31 +00005984 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5985 (__v8di)_mm512_sra_epi64(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005986 (__v8di)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005987}
5988
5989static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00005990_mm512_srav_epi32(__m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005991{
Craig Topper5e0709d2016-11-13 07:26:34 +00005992 return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005993}
5994
5995static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00005996_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005997{
Craig Topper5e0709d2016-11-13 07:26:34 +00005998 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5999 (__v16si)_mm512_srav_epi32(__X, __Y),
6000 (__v16si)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006001}
6002
6003static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00006004_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006005{
Craig Topper5e0709d2016-11-13 07:26:34 +00006006 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6007 (__v16si)_mm512_srav_epi32(__X, __Y),
6008 (__v16si)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006009}
6010
6011static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00006012_mm512_srav_epi64(__m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006013{
Craig Topper5e0709d2016-11-13 07:26:34 +00006014 return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006015}
6016
6017static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00006018_mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006019{
Craig Topper5e0709d2016-11-13 07:26:34 +00006020 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6021 (__v8di)_mm512_srav_epi64(__X, __Y),
6022 (__v8di)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006023}
6024
6025static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00006026_mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006027{
Craig Topper5e0709d2016-11-13 07:26:34 +00006028 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6029 (__v8di)_mm512_srav_epi64(__X, __Y),
6030 (__v8di)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006031}
6032
6033static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00006034_mm512_srl_epi32(__m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006035{
Craig Topper1a441932016-11-12 07:16:59 +00006036 return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006037}
6038
6039static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00006040_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006041{
Craig Topperd7e5b212016-11-13 07:26:31 +00006042 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6043 (__v16si)_mm512_srl_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00006044 (__v16si)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006045}
6046
6047static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00006048_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006049{
Craig Topperd7e5b212016-11-13 07:26:31 +00006050 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6051 (__v16si)_mm512_srl_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00006052 (__v16si)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006053}
6054
6055static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00006056_mm512_srl_epi64(__m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006057{
Craig Topper1a441932016-11-12 07:16:59 +00006058 return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006059}
6060
6061static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00006062_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006063{
Craig Topperd7e5b212016-11-13 07:26:31 +00006064 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6065 (__v8di)_mm512_srl_epi64(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00006066 (__v8di)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006067}
6068
6069static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00006070_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006071{
Craig Topperd7e5b212016-11-13 07:26:31 +00006072 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6073 (__v8di)_mm512_srl_epi64(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00006074 (__v8di)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006075}
6076
6077static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00006078_mm512_srlv_epi32(__m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006079{
Craig Topper5e0709d2016-11-13 07:26:34 +00006080 return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006081}
6082
6083static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00006084_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006085{
Craig Topper5e0709d2016-11-13 07:26:34 +00006086 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6087 (__v16si)_mm512_srlv_epi32(__X, __Y),
6088 (__v16si)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006089}
6090
6091static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00006092_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006093{
Craig Topper5e0709d2016-11-13 07:26:34 +00006094 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6095 (__v16si)_mm512_srlv_epi32(__X, __Y),
6096 (__v16si)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006097}
6098
6099static __inline__ __m512i __DEFAULT_FN_ATTRS
6100_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
6101{
Craig Topper5e0709d2016-11-13 07:26:34 +00006102 return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006103}
6104
6105static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00006106_mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006107{
Craig Topper5e0709d2016-11-13 07:26:34 +00006108 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6109 (__v8di)_mm512_srlv_epi64(__X, __Y),
6110 (__v8di)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006111}
6112
6113static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00006114_mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006115{
Craig Topper5e0709d2016-11-13 07:26:34 +00006116 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6117 (__v8di)_mm512_srlv_epi64(__X, __Y),
6118 (__v8di)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006119}
6120
Craig Topper8c18e112016-05-17 04:41:50 +00006121#define _mm512_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
6122 (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
6123 (__v16si)(__m512i)(B), \
6124 (__v16si)(__m512i)(C), (int)(imm), \
6125 (__mmask16)-1); })
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006126
Craig Topper8c18e112016-05-17 04:41:50 +00006127#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
6128 (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
6129 (__v16si)(__m512i)(B), \
6130 (__v16si)(__m512i)(C), (int)(imm), \
6131 (__mmask16)(U)); })
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006132
Craig Topper8c18e112016-05-17 04:41:50 +00006133#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
6134 (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \
6135 (__v16si)(__m512i)(B), \
6136 (__v16si)(__m512i)(C), \
6137 (int)(imm), (__mmask16)(U)); })
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006138
Craig Topper8c18e112016-05-17 04:41:50 +00006139#define _mm512_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
6140 (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
6141 (__v8di)(__m512i)(B), \
6142 (__v8di)(__m512i)(C), (int)(imm), \
6143 (__mmask8)-1); })
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006144
Craig Topper8c18e112016-05-17 04:41:50 +00006145#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
6146 (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
6147 (__v8di)(__m512i)(B), \
6148 (__v8di)(__m512i)(C), (int)(imm), \
6149 (__mmask8)(U)); })
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006150
Craig Topper8c18e112016-05-17 04:41:50 +00006151#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
6152 (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \
6153 (__v8di)(__m512i)(B), \
6154 (__v8di)(__m512i)(C), (int)(imm), \
6155 (__mmask8)(U)); })
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006156
Craig Topper45db56c2016-07-21 07:38:39 +00006157#ifdef __x86_64__
Craig Topper8c18e112016-05-17 04:41:50 +00006158#define _mm_cvt_roundsd_i64(A, R) __extension__ ({ \
6159 (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
Craig Topper45db56c2016-07-21 07:38:39 +00006160#endif
Michael Zuckerman8d161992016-04-10 17:24:03 +00006161
Craig Topper8c18e112016-05-17 04:41:50 +00006162#define _mm_cvt_roundsd_si32(A, R) __extension__ ({ \
6163 (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
Michael Zuckerman8d161992016-04-10 17:24:03 +00006164
Craig Topper8c18e112016-05-17 04:41:50 +00006165#define _mm_cvt_roundsd_i32(A, R) __extension__ ({ \
6166 (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
Michael Zuckerman8d161992016-04-10 17:24:03 +00006167
Craig Topper8c18e112016-05-17 04:41:50 +00006168#define _mm_cvt_roundsd_u32(A, R) __extension__ ({ \
6169 (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
Michael Zuckerman8d161992016-04-10 17:24:03 +00006170
6171static __inline__ unsigned __DEFAULT_FN_ATTRS
6172_mm_cvtsd_u32 (__m128d __A)
6173{
6174 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
6175 _MM_FROUND_CUR_DIRECTION);
6176}
6177
Craig Topper45db56c2016-07-21 07:38:39 +00006178#ifdef __x86_64__
Craig Topper8c18e112016-05-17 04:41:50 +00006179#define _mm_cvt_roundsd_u64(A, R) __extension__ ({ \
6180 (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
6181 (int)(R)); })
Michael Zuckerman8d161992016-04-10 17:24:03 +00006182
6183static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6184_mm_cvtsd_u64 (__m128d __A)
6185{
6186 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
6187 __A,
6188 _MM_FROUND_CUR_DIRECTION);
6189}
Craig Topper45db56c2016-07-21 07:38:39 +00006190#endif
Michael Zuckerman8d161992016-04-10 17:24:03 +00006191
Craig Topper8c18e112016-05-17 04:41:50 +00006192#define _mm_cvt_roundss_si32(A, R) __extension__ ({ \
6193 (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
Michael Zuckerman8d161992016-04-10 17:24:03 +00006194
Craig Topper8c18e112016-05-17 04:41:50 +00006195#define _mm_cvt_roundss_i32(A, R) __extension__ ({ \
6196 (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
Michael Zuckerman8d161992016-04-10 17:24:03 +00006197
Craig Topper45db56c2016-07-21 07:38:39 +00006198#ifdef __x86_64__
Craig Topper8c18e112016-05-17 04:41:50 +00006199#define _mm_cvt_roundss_si64(A, R) __extension__ ({ \
6200 (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
Michael Zuckerman8d161992016-04-10 17:24:03 +00006201
Craig Topper8c18e112016-05-17 04:41:50 +00006202#define _mm_cvt_roundss_i64(A, R) __extension__ ({ \
6203 (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
Craig Topper45db56c2016-07-21 07:38:39 +00006204#endif
Michael Zuckerman8d161992016-04-10 17:24:03 +00006205
Craig Topper8c18e112016-05-17 04:41:50 +00006206#define _mm_cvt_roundss_u32(A, R) __extension__ ({ \
6207 (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)); })
Michael Zuckerman8d161992016-04-10 17:24:03 +00006208
6209static __inline__ unsigned __DEFAULT_FN_ATTRS
6210_mm_cvtss_u32 (__m128 __A)
6211{
6212 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
6213 _MM_FROUND_CUR_DIRECTION);
6214}
6215
Craig Topper45db56c2016-07-21 07:38:39 +00006216#ifdef __x86_64__
Craig Topper8c18e112016-05-17 04:41:50 +00006217#define _mm_cvt_roundss_u64(A, R) __extension__ ({ \
6218 (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
6219 (int)(R)); })
Michael Zuckerman8d161992016-04-10 17:24:03 +00006220
6221static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6222_mm_cvtss_u64 (__m128 __A)
6223{
6224 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
6225 __A,
6226 _MM_FROUND_CUR_DIRECTION);
6227}
Craig Topper45db56c2016-07-21 07:38:39 +00006228#endif
Michael Zuckerman8d161992016-04-10 17:24:03 +00006229
Craig Topper8c18e112016-05-17 04:41:50 +00006230#define _mm_cvtt_roundsd_i32(A, R) __extension__ ({ \
6231 (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
Michael Zuckerman8d161992016-04-10 17:24:03 +00006232
Craig Topper8c18e112016-05-17 04:41:50 +00006233#define _mm_cvtt_roundsd_si32(A, R) __extension__ ({ \
6234 (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
Michael Zuckerman8d161992016-04-10 17:24:03 +00006235
6236static __inline__ int __DEFAULT_FN_ATTRS
6237_mm_cvttsd_i32 (__m128d __A)
6238{
6239 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
6240 _MM_FROUND_CUR_DIRECTION);
6241}
6242
Craig Topper45db56c2016-07-21 07:38:39 +00006243#ifdef __x86_64__
Craig Topper8c18e112016-05-17 04:41:50 +00006244#define _mm_cvtt_roundsd_si64(A, R) __extension__ ({ \
6245 (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
Michael Zuckerman8d161992016-04-10 17:24:03 +00006246
Craig Topper8c18e112016-05-17 04:41:50 +00006247#define _mm_cvtt_roundsd_i64(A, R) __extension__ ({ \
6248 (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
Michael Zuckerman8d161992016-04-10 17:24:03 +00006249
6250static __inline__ long long __DEFAULT_FN_ATTRS
6251_mm_cvttsd_i64 (__m128d __A)
6252{
6253 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
6254 _MM_FROUND_CUR_DIRECTION);
6255}
Craig Topper45db56c2016-07-21 07:38:39 +00006256#endif
Michael Zuckerman8d161992016-04-10 17:24:03 +00006257
Craig Topper8c18e112016-05-17 04:41:50 +00006258#define _mm_cvtt_roundsd_u32(A, R) __extension__ ({ \
6259 (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
Michael Zuckerman8d161992016-04-10 17:24:03 +00006260
6261static __inline__ unsigned __DEFAULT_FN_ATTRS
6262_mm_cvttsd_u32 (__m128d __A)
6263{
6264 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
6265 _MM_FROUND_CUR_DIRECTION);
6266}
6267
Craig Topper45db56c2016-07-21 07:38:39 +00006268#ifdef __x86_64__
Craig Topper8c18e112016-05-17 04:41:50 +00006269#define _mm_cvtt_roundsd_u64(A, R) __extension__ ({ \
6270 (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
6271 (int)(R)); })
Michael Zuckerman8d161992016-04-10 17:24:03 +00006272
6273static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6274_mm_cvttsd_u64 (__m128d __A)
6275{
6276 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
6277 __A,
6278 _MM_FROUND_CUR_DIRECTION);
6279}
Craig Topper45db56c2016-07-21 07:38:39 +00006280#endif
Michael Zuckerman8d161992016-04-10 17:24:03 +00006281
Craig Topper8c18e112016-05-17 04:41:50 +00006282#define _mm_cvtt_roundss_i32(A, R) __extension__ ({ \
6283 (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
Michael Zuckerman8d161992016-04-10 17:24:03 +00006284
Craig Topper8c18e112016-05-17 04:41:50 +00006285#define _mm_cvtt_roundss_si32(A, R) __extension__ ({ \
6286 (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
Michael Zuckerman8d161992016-04-10 17:24:03 +00006287
6288static __inline__ int __DEFAULT_FN_ATTRS
6289_mm_cvttss_i32 (__m128 __A)
6290{
6291 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
6292 _MM_FROUND_CUR_DIRECTION);
6293}
6294
Craig Topper45db56c2016-07-21 07:38:39 +00006295#ifdef __x86_64__
Craig Topper8c18e112016-05-17 04:41:50 +00006296#define _mm_cvtt_roundss_i64(A, R) __extension__ ({ \
6297 (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
Michael Zuckerman8d161992016-04-10 17:24:03 +00006298
Craig Topper8c18e112016-05-17 04:41:50 +00006299#define _mm_cvtt_roundss_si64(A, R) __extension__ ({ \
6300 (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
Michael Zuckerman8d161992016-04-10 17:24:03 +00006301
6302static __inline__ long long __DEFAULT_FN_ATTRS
6303_mm_cvttss_i64 (__m128 __A)
6304{
6305 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
6306 _MM_FROUND_CUR_DIRECTION);
6307}
Craig Topper45db56c2016-07-21 07:38:39 +00006308#endif
Michael Zuckerman8d161992016-04-10 17:24:03 +00006309
Craig Topper8c18e112016-05-17 04:41:50 +00006310#define _mm_cvtt_roundss_u32(A, R) __extension__ ({ \
6311 (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)); })
Michael Zuckerman8d161992016-04-10 17:24:03 +00006312
6313static __inline__ unsigned __DEFAULT_FN_ATTRS
6314_mm_cvttss_u32 (__m128 __A)
6315{
6316 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
6317 _MM_FROUND_CUR_DIRECTION);
6318}
6319
Craig Topper45db56c2016-07-21 07:38:39 +00006320#ifdef __x86_64__
Craig Topper8c18e112016-05-17 04:41:50 +00006321#define _mm_cvtt_roundss_u64(A, R) __extension__ ({ \
6322 (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
6323 (int)(R)); })
Michael Zuckerman8d161992016-04-10 17:24:03 +00006324
6325static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6326_mm_cvttss_u64 (__m128 __A)
6327{
6328 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
6329 __A,
6330 _MM_FROUND_CUR_DIRECTION);
6331}
Craig Topper45db56c2016-07-21 07:38:39 +00006332#endif
Michael Zuckerman8d161992016-04-10 17:24:03 +00006333
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006334static __inline__ __m512d __DEFAULT_FN_ATTRS
6335_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6336 __m512d __B)
6337{
6338 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6339 (__v8di) __I
6340 /* idx */ ,
6341 (__v8df) __B,
6342 (__mmask8) __U);
6343}
6344
6345static __inline__ __m512 __DEFAULT_FN_ATTRS
6346_mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6347 __m512 __B)
6348{
6349 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6350 (__v16si) __I
6351 /* idx */ ,
6352 (__v16sf) __B,
6353 (__mmask16) __U);
6354}
6355
6356static __inline__ __m512i __DEFAULT_FN_ATTRS
6357_mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
6358 __mmask8 __U, __m512i __B)
6359{
6360 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
6361 (__v8di) __I
6362 /* idx */ ,
6363 (__v8di) __B,
6364 (__mmask8) __U);
6365}
6366
Craig Topper8c18e112016-05-17 04:41:50 +00006367#define _mm512_permute_pd(X, C) __extension__ ({ \
Simon Pilgrim17388f22016-07-04 11:06:15 +00006368 (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
Craig Topper2a383c92016-07-04 22:18:01 +00006369 (__v8df)_mm512_undefined_pd(), \
6370 0 + (((C) >> 0) & 0x1), \
6371 0 + (((C) >> 1) & 0x1), \
6372 2 + (((C) >> 2) & 0x1), \
6373 2 + (((C) >> 3) & 0x1), \
6374 4 + (((C) >> 4) & 0x1), \
6375 4 + (((C) >> 5) & 0x1), \
6376 6 + (((C) >> 6) & 0x1), \
6377 6 + (((C) >> 7) & 0x1)); })
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006378
Craig Topper8c18e112016-05-17 04:41:50 +00006379#define _mm512_mask_permute_pd(W, U, X, C) __extension__ ({ \
Simon Pilgrim17388f22016-07-04 11:06:15 +00006380 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6381 (__v8df)_mm512_permute_pd((X), (C)), \
6382 (__v8df)(__m512d)(W)); })
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006383
Craig Topper8c18e112016-05-17 04:41:50 +00006384#define _mm512_maskz_permute_pd(U, X, C) __extension__ ({ \
Simon Pilgrim17388f22016-07-04 11:06:15 +00006385 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6386 (__v8df)_mm512_permute_pd((X), (C)), \
6387 (__v8df)_mm512_setzero_pd()); })
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006388
Craig Topper8c18e112016-05-17 04:41:50 +00006389#define _mm512_permute_ps(X, C) __extension__ ({ \
Simon Pilgrim17388f22016-07-04 11:06:15 +00006390 (__m512)__builtin_shufflevector((__v16sf)(__m512)(X), \
Craig Topper2a383c92016-07-04 22:18:01 +00006391 (__v16sf)_mm512_undefined_ps(), \
6392 0 + (((C) >> 0) & 0x3), \
6393 0 + (((C) >> 2) & 0x3), \
6394 0 + (((C) >> 4) & 0x3), \
6395 0 + (((C) >> 6) & 0x3), \
6396 4 + (((C) >> 0) & 0x3), \
6397 4 + (((C) >> 2) & 0x3), \
6398 4 + (((C) >> 4) & 0x3), \
6399 4 + (((C) >> 6) & 0x3), \
6400 8 + (((C) >> 0) & 0x3), \
6401 8 + (((C) >> 2) & 0x3), \
6402 8 + (((C) >> 4) & 0x3), \
6403 8 + (((C) >> 6) & 0x3), \
6404 12 + (((C) >> 0) & 0x3), \
6405 12 + (((C) >> 2) & 0x3), \
6406 12 + (((C) >> 4) & 0x3), \
6407 12 + (((C) >> 6) & 0x3)); })
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006408
Craig Topper8c18e112016-05-17 04:41:50 +00006409#define _mm512_mask_permute_ps(W, U, X, C) __extension__ ({ \
Simon Pilgrim17388f22016-07-04 11:06:15 +00006410 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6411 (__v16sf)_mm512_permute_ps((X), (C)), \
6412 (__v16sf)(__m512)(W)); })
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006413
Craig Topper8c18e112016-05-17 04:41:50 +00006414#define _mm512_maskz_permute_ps(U, X, C) __extension__ ({ \
Simon Pilgrim17388f22016-07-04 11:06:15 +00006415 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6416 (__v16sf)_mm512_permute_ps((X), (C)), \
6417 (__v16sf)_mm512_setzero_ps()); })
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006418
6419static __inline__ __m512d __DEFAULT_FN_ATTRS
Craig Topper678b07f2016-12-11 01:26:52 +00006420_mm512_permutevar_pd(__m512d __A, __m512i __C)
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006421{
Craig Topper678b07f2016-12-11 01:26:52 +00006422 return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006423}
6424
6425static __inline__ __m512d __DEFAULT_FN_ATTRS
Craig Topper678b07f2016-12-11 01:26:52 +00006426_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006427{
Craig Topper678b07f2016-12-11 01:26:52 +00006428 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6429 (__v8df)_mm512_permutevar_pd(__A, __C),
6430 (__v8df)__W);
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006431}
6432
6433static __inline__ __m512d __DEFAULT_FN_ATTRS
Craig Topper678b07f2016-12-11 01:26:52 +00006434_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006435{
Craig Topper678b07f2016-12-11 01:26:52 +00006436 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6437 (__v8df)_mm512_permutevar_pd(__A, __C),
6438 (__v8df)_mm512_setzero_pd());
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006439}
6440
6441static __inline__ __m512 __DEFAULT_FN_ATTRS
Craig Topper678b07f2016-12-11 01:26:52 +00006442_mm512_permutevar_ps(__m512 __A, __m512i __C)
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006443{
Craig Topper678b07f2016-12-11 01:26:52 +00006444 return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006445}
6446
6447static __inline__ __m512 __DEFAULT_FN_ATTRS
Craig Topper678b07f2016-12-11 01:26:52 +00006448_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006449{
Craig Topper678b07f2016-12-11 01:26:52 +00006450 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6451 (__v16sf)_mm512_permutevar_ps(__A, __C),
6452 (__v16sf)__W);
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006453}
6454
6455static __inline__ __m512 __DEFAULT_FN_ATTRS
Craig Topper678b07f2016-12-11 01:26:52 +00006456_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006457{
Craig Topper678b07f2016-12-11 01:26:52 +00006458 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6459 (__v16sf)_mm512_permutevar_ps(__A, __C),
6460 (__v16sf)_mm512_setzero_ps());
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006461}
6462
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00006463static __inline __m512d __DEFAULT_FN_ATTRS
6464_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006465{
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00006466 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6467 /* idx */ ,
6468 (__v8df) __A,
6469 (__v8df) __B,
6470 (__mmask8) -1);
6471}
6472
6473static __inline__ __m512d __DEFAULT_FN_ATTRS
6474_mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
6475{
6476 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6477 /* idx */ ,
6478 (__v8df) __A,
6479 (__v8df) __B,
6480 (__mmask8) __U);
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006481}
6482
6483static __inline__ __m512d __DEFAULT_FN_ATTRS
6484_mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6485 __m512d __B)
6486{
6487 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00006488 /* idx */ ,
6489 (__v8df) __A,
6490 (__v8df) __B,
6491 (__mmask8) __U);
6492}
6493
6494static __inline __m512 __DEFAULT_FN_ATTRS
6495_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
6496{
6497 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6498 /* idx */ ,
6499 (__v16sf) __A,
6500 (__v16sf) __B,
6501 (__mmask16) -1);
6502}
6503
6504static __inline__ __m512 __DEFAULT_FN_ATTRS
6505_mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6506{
6507 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6508 /* idx */ ,
6509 (__v16sf) __A,
6510 (__v16sf) __B,
6511 (__mmask16) __U);
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006512}
6513
6514static __inline__ __m512 __DEFAULT_FN_ATTRS
6515_mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6516 __m512 __B)
6517{
6518 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00006519 /* idx */ ,
6520 (__v16sf) __A,
6521 (__v16sf) __B,
6522 (__mmask16) __U);
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006523}
6524
Michael Zuckerman07525092016-04-11 10:22:07 +00006525
Craig Topper8c18e112016-05-17 04:41:50 +00006526#define _mm512_cvtt_roundpd_epu32(A, R) __extension__ ({ \
6527 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6528 (__v8si)_mm256_undefined_si256(), \
6529 (__mmask8)-1, (int)(R)); })
Michael Zuckerman138fc5b2016-05-03 11:05:24 +00006530
Craig Topper8c18e112016-05-17 04:41:50 +00006531#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) __extension__ ({ \
6532 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6533 (__v8si)(__m256i)(W), \
6534 (__mmask8)(U), (int)(R)); })
Michael Zuckerman138fc5b2016-05-03 11:05:24 +00006535
Craig Topper8c18e112016-05-17 04:41:50 +00006536#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) __extension__ ({ \
6537 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6538 (__v8si)_mm256_setzero_si256(), \
6539 (__mmask8)(U), (int)(R)); })
Michael Zuckerman138fc5b2016-05-03 11:05:24 +00006540
6541static __inline__ __m256i __DEFAULT_FN_ATTRS
6542_mm512_cvttpd_epu32 (__m512d __A)
6543{
6544 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6545 (__v8si)
6546 _mm256_undefined_si256 (),
6547 (__mmask8) -1,
6548 _MM_FROUND_CUR_DIRECTION);
6549}
6550
6551static __inline__ __m256i __DEFAULT_FN_ATTRS
6552_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
6553{
6554 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6555 (__v8si) __W,
6556 (__mmask8) __U,
6557 _MM_FROUND_CUR_DIRECTION);
6558}
6559
6560static __inline__ __m256i __DEFAULT_FN_ATTRS
6561_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
6562{
6563 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6564 (__v8si)
6565 _mm256_setzero_si256 (),
6566 (__mmask8) __U,
6567 _MM_FROUND_CUR_DIRECTION);
6568}
Michael Zuckerman07525092016-04-11 10:22:07 +00006569
Craig Topper8c18e112016-05-17 04:41:50 +00006570#define _mm_roundscale_round_sd(A, B, imm, R) __extension__ ({ \
6571 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6572 (__v2df)(__m128d)(B), \
6573 (__v2df)_mm_setzero_pd(), \
6574 (__mmask8)-1, (int)(imm), \
6575 (int)(R)); })
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006576
Craig Topper8c18e112016-05-17 04:41:50 +00006577#define _mm_roundscale_sd(A, B, imm) __extension__ ({ \
6578 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6579 (__v2df)(__m128d)(B), \
6580 (__v2df)_mm_setzero_pd(), \
6581 (__mmask8)-1, (int)(imm), \
6582 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006583
Craig Topper8c18e112016-05-17 04:41:50 +00006584#define _mm_mask_roundscale_sd(W, U, A, B, imm) __extension__ ({ \
6585 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6586 (__v2df)(__m128d)(B), \
6587 (__v2df)(__m128d)(W), \
6588 (__mmask8)(U), (int)(imm), \
6589 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006590
Craig Topper8c18e112016-05-17 04:41:50 +00006591#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) __extension__ ({ \
6592 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6593 (__v2df)(__m128d)(B), \
6594 (__v2df)(__m128d)(W), \
6595 (__mmask8)(U), (int)(I), \
6596 (int)(R)); })
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006597
Craig Topper8c18e112016-05-17 04:41:50 +00006598#define _mm_maskz_roundscale_sd(U, A, B, I) __extension__ ({ \
6599 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6600 (__v2df)(__m128d)(B), \
6601 (__v2df)_mm_setzero_pd(), \
6602 (__mmask8)(U), (int)(I), \
6603 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006604
Craig Topper8c18e112016-05-17 04:41:50 +00006605#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) __extension__ ({ \
6606 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6607 (__v2df)(__m128d)(B), \
6608 (__v2df)_mm_setzero_pd(), \
6609 (__mmask8)(U), (int)(I), \
6610 (int)(R)); })
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006611
Craig Topper8c18e112016-05-17 04:41:50 +00006612#define _mm_roundscale_round_ss(A, B, imm, R) __extension__ ({ \
6613 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6614 (__v4sf)(__m128)(B), \
6615 (__v4sf)_mm_setzero_ps(), \
6616 (__mmask8)-1, (int)(imm), \
6617 (int)(R)); })
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006618
Craig Topper8c18e112016-05-17 04:41:50 +00006619#define _mm_roundscale_ss(A, B, imm) __extension__ ({ \
6620 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6621 (__v4sf)(__m128)(B), \
6622 (__v4sf)_mm_setzero_ps(), \
6623 (__mmask8)-1, (int)(imm), \
6624 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006625
Craig Topper8c18e112016-05-17 04:41:50 +00006626#define _mm_mask_roundscale_ss(W, U, A, B, I) __extension__ ({ \
6627 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6628 (__v4sf)(__m128)(B), \
6629 (__v4sf)(__m128)(W), \
6630 (__mmask8)(U), (int)(I), \
6631 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006632
Craig Topper8c18e112016-05-17 04:41:50 +00006633#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) __extension__ ({ \
6634 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6635 (__v4sf)(__m128)(B), \
6636 (__v4sf)(__m128)(W), \
6637 (__mmask8)(U), (int)(I), \
6638 (int)(R)); })
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006639
Craig Topper8c18e112016-05-17 04:41:50 +00006640#define _mm_maskz_roundscale_ss(U, A, B, I) __extension__ ({ \
6641 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6642 (__v4sf)(__m128)(B), \
6643 (__v4sf)_mm_setzero_ps(), \
6644 (__mmask8)(U), (int)(I), \
6645 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006646
Craig Topper8c18e112016-05-17 04:41:50 +00006647#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) __extension__ ({ \
6648 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6649 (__v4sf)(__m128)(B), \
6650 (__v4sf)_mm_setzero_ps(), \
6651 (__mmask8)(U), (int)(I), \
6652 (int)(R)); })
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006653
Craig Topper8c18e112016-05-17 04:41:50 +00006654#define _mm512_scalef_round_pd(A, B, R) __extension__ ({ \
6655 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6656 (__v8df)(__m512d)(B), \
6657 (__v8df)_mm512_undefined_pd(), \
6658 (__mmask8)-1, (int)(R)); })
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006659
Craig Topper8c18e112016-05-17 04:41:50 +00006660#define _mm512_mask_scalef_round_pd(W, U, A, B, R) __extension__ ({ \
6661 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6662 (__v8df)(__m512d)(B), \
6663 (__v8df)(__m512d)(W), \
6664 (__mmask8)(U), (int)(R)); })
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006665
Craig Topper8c18e112016-05-17 04:41:50 +00006666#define _mm512_maskz_scalef_round_pd(U, A, B, R) __extension__ ({ \
6667 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6668 (__v8df)(__m512d)(B), \
6669 (__v8df)_mm512_setzero_pd(), \
6670 (__mmask8)(U), (int)(R)); })
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006671
6672static __inline__ __m512d __DEFAULT_FN_ATTRS
6673_mm512_scalef_pd (__m512d __A, __m512d __B)
6674{
6675 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6676 (__v8df) __B,
6677 (__v8df)
6678 _mm512_undefined_pd (),
6679 (__mmask8) -1,
6680 _MM_FROUND_CUR_DIRECTION);
6681}
6682
6683static __inline__ __m512d __DEFAULT_FN_ATTRS
6684_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
6685{
6686 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6687 (__v8df) __B,
6688 (__v8df) __W,
6689 (__mmask8) __U,
6690 _MM_FROUND_CUR_DIRECTION);
6691}
6692
6693static __inline__ __m512d __DEFAULT_FN_ATTRS
6694_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
6695{
6696 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6697 (__v8df) __B,
6698 (__v8df)
6699 _mm512_setzero_pd (),
6700 (__mmask8) __U,
6701 _MM_FROUND_CUR_DIRECTION);
6702}
6703
Craig Topper8c18e112016-05-17 04:41:50 +00006704#define _mm512_scalef_round_ps(A, B, R) __extension__ ({ \
6705 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6706 (__v16sf)(__m512)(B), \
6707 (__v16sf)_mm512_undefined_ps(), \
6708 (__mmask16)-1, (int)(R)); })
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006709
Craig Topper8c18e112016-05-17 04:41:50 +00006710#define _mm512_mask_scalef_round_ps(W, U, A, B, R) __extension__ ({ \
6711 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6712 (__v16sf)(__m512)(B), \
6713 (__v16sf)(__m512)(W), \
6714 (__mmask16)(U), (int)(R)); })
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006715
Craig Topper8c18e112016-05-17 04:41:50 +00006716#define _mm512_maskz_scalef_round_ps(U, A, B, R) __extension__ ({ \
6717 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6718 (__v16sf)(__m512)(B), \
6719 (__v16sf)_mm512_setzero_ps(), \
6720 (__mmask16)(U), (int)(R)); })
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006721
6722static __inline__ __m512 __DEFAULT_FN_ATTRS
6723_mm512_scalef_ps (__m512 __A, __m512 __B)
6724{
6725 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6726 (__v16sf) __B,
6727 (__v16sf)
6728 _mm512_undefined_ps (),
6729 (__mmask16) -1,
6730 _MM_FROUND_CUR_DIRECTION);
6731}
6732
6733static __inline__ __m512 __DEFAULT_FN_ATTRS
6734_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
6735{
6736 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6737 (__v16sf) __B,
6738 (__v16sf) __W,
6739 (__mmask16) __U,
6740 _MM_FROUND_CUR_DIRECTION);
6741}
6742
6743static __inline__ __m512 __DEFAULT_FN_ATTRS
6744_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
6745{
6746 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6747 (__v16sf) __B,
6748 (__v16sf)
6749 _mm512_setzero_ps (),
6750 (__mmask16) __U,
6751 _MM_FROUND_CUR_DIRECTION);
6752}
6753
Craig Topper8c18e112016-05-17 04:41:50 +00006754#define _mm_scalef_round_sd(A, B, R) __extension__ ({ \
6755 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6756 (__v2df)(__m128d)(B), \
6757 (__v2df)_mm_setzero_pd(), \
6758 (__mmask8)-1, (int)(R)); })
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006759
6760static __inline__ __m128d __DEFAULT_FN_ATTRS
6761_mm_scalef_sd (__m128d __A, __m128d __B)
6762{
6763 return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6764 (__v2df)( __B), (__v2df) _mm_setzero_pd(),
6765 (__mmask8) -1,
6766 _MM_FROUND_CUR_DIRECTION);
6767}
6768
6769static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00006770_mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006771{
6772 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6773 (__v2df) __B,
6774 (__v2df) __W,
6775 (__mmask8) __U,
6776 _MM_FROUND_CUR_DIRECTION);
6777}
6778
Craig Topper8c18e112016-05-17 04:41:50 +00006779#define _mm_mask_scalef_round_sd(W, U, A, B, R) __extension__ ({ \
6780 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6781 (__v2df)(__m128d)(B), \
6782 (__v2df)(__m128d)(W), \
6783 (__mmask8)(U), (int)(R)); })
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006784
6785static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00006786_mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006787{
6788 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6789 (__v2df) __B,
6790 (__v2df) _mm_setzero_pd (),
6791 (__mmask8) __U,
6792 _MM_FROUND_CUR_DIRECTION);
6793}
6794
Craig Topper8c18e112016-05-17 04:41:50 +00006795#define _mm_maskz_scalef_round_sd(U, A, B, R) __extension__ ({ \
6796 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6797 (__v2df)(__m128d)(B), \
6798 (__v2df)_mm_setzero_pd(), \
6799 (__mmask8)(U), (int)(R)); })
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006800
Craig Topper8c18e112016-05-17 04:41:50 +00006801#define _mm_scalef_round_ss(A, B, R) __extension__ ({ \
6802 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6803 (__v4sf)(__m128)(B), \
6804 (__v4sf)_mm_setzero_ps(), \
6805 (__mmask8)-1, (int)(R)); })
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006806
6807static __inline__ __m128 __DEFAULT_FN_ATTRS
6808_mm_scalef_ss (__m128 __A, __m128 __B)
6809{
6810 return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6811 (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
6812 (__mmask8) -1,
6813 _MM_FROUND_CUR_DIRECTION);
6814}
6815
6816static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00006817_mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006818{
6819 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6820 (__v4sf) __B,
6821 (__v4sf) __W,
6822 (__mmask8) __U,
6823 _MM_FROUND_CUR_DIRECTION);
6824}
6825
Craig Topper8c18e112016-05-17 04:41:50 +00006826#define _mm_mask_scalef_round_ss(W, U, A, B, R) __extension__ ({ \
6827 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6828 (__v4sf)(__m128)(B), \
6829 (__v4sf)(__m128)(W), \
6830 (__mmask8)(U), (int)(R)); })
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006831
6832static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00006833_mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006834{
6835 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6836 (__v4sf) __B,
6837 (__v4sf) _mm_setzero_ps (),
6838 (__mmask8) __U,
6839 _MM_FROUND_CUR_DIRECTION);
6840}
6841
Craig Topper8c18e112016-05-17 04:41:50 +00006842#define _mm_maskz_scalef_round_ss(U, A, B, R) __extension__ ({ \
6843 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6844 (__v4sf)(__m128)(B), \
6845 (__v4sf)_mm_setzero_ps(), \
6846 (__mmask8)(U), \
6847 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006848
Craig Topper1a441932016-11-12 07:16:59 +00006849static __inline__ __m512i __DEFAULT_FN_ATTRS
6850_mm512_srai_epi32(__m512i __A, int __B)
6851{
6852 return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B);
6853}
Michael Zuckerman6b5f4d82016-04-11 15:46:39 +00006854
Craig Topper1a441932016-11-12 07:16:59 +00006855static __inline__ __m512i __DEFAULT_FN_ATTRS
6856_mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
6857{
6858 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \
6859 (__v16si)_mm512_srai_epi32(__A, __B), \
6860 (__v16si)__W);
6861}
Michael Zuckerman6b5f4d82016-04-11 15:46:39 +00006862
Craig Topper1a441932016-11-12 07:16:59 +00006863static __inline__ __m512i __DEFAULT_FN_ATTRS
6864_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, int __B) {
6865 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \
6866 (__v16si)_mm512_srai_epi32(__A, __B), \
6867 (__v16si)_mm512_setzero_si512());
6868}
Michael Zuckerman6b5f4d82016-04-11 15:46:39 +00006869
Craig Topper1a441932016-11-12 07:16:59 +00006870static __inline__ __m512i __DEFAULT_FN_ATTRS
6871_mm512_srai_epi64(__m512i __A, int __B)
6872{
6873 return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B);
6874}
Michael Zuckerman6b5f4d82016-04-11 15:46:39 +00006875
Craig Topper1a441932016-11-12 07:16:59 +00006876static __inline__ __m512i __DEFAULT_FN_ATTRS
6877_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
6878{
6879 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \
6880 (__v8di)_mm512_srai_epi64(__A, __B), \
6881 (__v8di)__W);
6882}
Michael Zuckerman6b5f4d82016-04-11 15:46:39 +00006883
Craig Topper1a441932016-11-12 07:16:59 +00006884static __inline__ __m512i __DEFAULT_FN_ATTRS
6885_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B)
6886{
6887 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \
6888 (__v8di)_mm512_srai_epi64(__A, __B), \
6889 (__v8di)_mm512_setzero_si512());
6890}
Michael Zuckerman6b5f4d82016-04-11 15:46:39 +00006891
Craig Topper8c18e112016-05-17 04:41:50 +00006892#define _mm512_shuffle_f32x4(A, B, imm) __extension__ ({ \
Jina Nahiasdca97912017-11-13 09:15:31 +00006893 (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \
6894 (__v16sf)(__m512)(B), \
6895 0 + ((((imm) >> 0) & 0x3) * 4), \
6896 1 + ((((imm) >> 0) & 0x3) * 4), \
6897 2 + ((((imm) >> 0) & 0x3) * 4), \
6898 3 + ((((imm) >> 0) & 0x3) * 4), \
6899 0 + ((((imm) >> 2) & 0x3) * 4), \
6900 1 + ((((imm) >> 2) & 0x3) * 4), \
6901 2 + ((((imm) >> 2) & 0x3) * 4), \
6902 3 + ((((imm) >> 2) & 0x3) * 4), \
6903 16 + ((((imm) >> 4) & 0x3) * 4), \
6904 17 + ((((imm) >> 4) & 0x3) * 4), \
6905 18 + ((((imm) >> 4) & 0x3) * 4), \
6906 19 + ((((imm) >> 4) & 0x3) * 4), \
6907 16 + ((((imm) >> 6) & 0x3) * 4), \
6908 17 + ((((imm) >> 6) & 0x3) * 4), \
6909 18 + ((((imm) >> 6) & 0x3) * 4), \
6910 19 + ((((imm) >> 6) & 0x3) * 4)); })
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006911
Craig Topper8c18e112016-05-17 04:41:50 +00006912#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
Jina Nahiasdca97912017-11-13 09:15:31 +00006913 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6914 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6915 (__v16sf)(__m512)(W)); })
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006916
Craig Topper8c18e112016-05-17 04:41:50 +00006917#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
Jina Nahiasdca97912017-11-13 09:15:31 +00006918 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6919 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6920 (__v16sf)_mm512_setzero_ps()); })
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006921
Craig Topper8c18e112016-05-17 04:41:50 +00006922#define _mm512_shuffle_f64x2(A, B, imm) __extension__ ({ \
Jina Nahiasdca97912017-11-13 09:15:31 +00006923 (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
6924 (__v8df)(__m512d)(B), \
6925 0 + ((((imm) >> 0) & 0x3) * 2), \
6926 1 + ((((imm) >> 0) & 0x3) * 2), \
6927 0 + ((((imm) >> 2) & 0x3) * 2), \
6928 1 + ((((imm) >> 2) & 0x3) * 2), \
6929 8 + ((((imm) >> 4) & 0x3) * 2), \
6930 9 + ((((imm) >> 4) & 0x3) * 2), \
6931 8 + ((((imm) >> 6) & 0x3) * 2), \
6932 9 + ((((imm) >> 6) & 0x3) * 2)); })
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006933
Craig Topper8c18e112016-05-17 04:41:50 +00006934#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
Jina Nahiasdca97912017-11-13 09:15:31 +00006935 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6936 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6937 (__v8df)(__m512d)(W)); })
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006938
Craig Topper8c18e112016-05-17 04:41:50 +00006939#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
Jina Nahiasdca97912017-11-13 09:15:31 +00006940 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6941 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6942 (__v8df)_mm512_setzero_pd()); })
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006943
Craig Topper8c18e112016-05-17 04:41:50 +00006944#define _mm512_shuffle_i32x4(A, B, imm) __extension__ ({ \
Jina Nahiasdca97912017-11-13 09:15:31 +00006945 (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
6946 (__v8di)(__m512i)(B), \
6947 0 + ((((imm) >> 0) & 0x3) * 2), \
6948 1 + ((((imm) >> 0) & 0x3) * 2), \
6949 0 + ((((imm) >> 2) & 0x3) * 2), \
6950 1 + ((((imm) >> 2) & 0x3) * 2), \
6951 8 + ((((imm) >> 4) & 0x3) * 2), \
6952 9 + ((((imm) >> 4) & 0x3) * 2), \
6953 8 + ((((imm) >> 6) & 0x3) * 2), \
6954 9 + ((((imm) >> 6) & 0x3) * 2)); })
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006955
Craig Topper8c18e112016-05-17 04:41:50 +00006956#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
Jina Nahiasdca97912017-11-13 09:15:31 +00006957 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6958 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6959 (__v16si)(__m512i)(W)); })
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006960
Craig Topper8c18e112016-05-17 04:41:50 +00006961#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
Jina Nahiasdca97912017-11-13 09:15:31 +00006962 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6963 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6964 (__v16si)_mm512_setzero_si512()); })
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006965
Craig Topper8c18e112016-05-17 04:41:50 +00006966#define _mm512_shuffle_i64x2(A, B, imm) __extension__ ({ \
Jina Nahiasdca97912017-11-13 09:15:31 +00006967 (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
6968 (__v8di)(__m512i)(B), \
6969 0 + ((((imm) >> 0) & 0x3) * 2), \
6970 1 + ((((imm) >> 0) & 0x3) * 2), \
6971 0 + ((((imm) >> 2) & 0x3) * 2), \
6972 1 + ((((imm) >> 2) & 0x3) * 2), \
6973 8 + ((((imm) >> 4) & 0x3) * 2), \
6974 9 + ((((imm) >> 4) & 0x3) * 2), \
6975 8 + ((((imm) >> 6) & 0x3) * 2), \
6976 9 + ((((imm) >> 6) & 0x3) * 2)); })
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006977
Craig Topper8c18e112016-05-17 04:41:50 +00006978#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
Jina Nahiasdca97912017-11-13 09:15:31 +00006979 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6980 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6981 (__v8di)(__m512i)(W)); })
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006982
Craig Topper8c18e112016-05-17 04:41:50 +00006983#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
Jina Nahiasdca97912017-11-13 09:15:31 +00006984 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6985 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6986 (__v8di)_mm512_setzero_si512()); })
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006987
Simon Pilgrim427154d2016-07-04 21:30:47 +00006988#define _mm512_shuffle_pd(A, B, M) __extension__ ({ \
6989 (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
6990 (__v8df)(__m512d)(B), \
Craig Topper2a383c92016-07-04 22:18:01 +00006991 0 + (((M) >> 0) & 0x1), \
6992 8 + (((M) >> 1) & 0x1), \
6993 2 + (((M) >> 2) & 0x1), \
6994 10 + (((M) >> 3) & 0x1), \
6995 4 + (((M) >> 4) & 0x1), \
6996 12 + (((M) >> 5) & 0x1), \
6997 6 + (((M) >> 6) & 0x1), \
6998 14 + (((M) >> 7) & 0x1)); })
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006999
Simon Pilgrim427154d2016-07-04 21:30:47 +00007000#define _mm512_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
7001 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7002 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
7003 (__v8df)(__m512d)(W)); })
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007004
Simon Pilgrim427154d2016-07-04 21:30:47 +00007005#define _mm512_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
7006 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7007 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
7008 (__v8df)_mm512_setzero_pd()); })
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007009
Craig Topper6e76fb62016-07-10 05:57:21 +00007010#define _mm512_shuffle_ps(A, B, M) __extension__ ({ \
7011 (__m512d)__builtin_shufflevector((__v16sf)(__m512)(A), \
7012 (__v16sf)(__m512)(B), \
7013 0 + (((M) >> 0) & 0x3), \
7014 0 + (((M) >> 2) & 0x3), \
7015 16 + (((M) >> 4) & 0x3), \
7016 16 + (((M) >> 6) & 0x3), \
7017 4 + (((M) >> 0) & 0x3), \
7018 4 + (((M) >> 2) & 0x3), \
7019 20 + (((M) >> 4) & 0x3), \
7020 20 + (((M) >> 6) & 0x3), \
7021 8 + (((M) >> 0) & 0x3), \
7022 8 + (((M) >> 2) & 0x3), \
7023 24 + (((M) >> 4) & 0x3), \
7024 24 + (((M) >> 6) & 0x3), \
7025 12 + (((M) >> 0) & 0x3), \
7026 12 + (((M) >> 2) & 0x3), \
7027 28 + (((M) >> 4) & 0x3), \
7028 28 + (((M) >> 6) & 0x3)); })
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007029
Craig Topper6e76fb62016-07-10 05:57:21 +00007030#define _mm512_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
7031 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7032 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
7033 (__v16sf)(__m512)(W)); })
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007034
Craig Topper6e76fb62016-07-10 05:57:21 +00007035#define _mm512_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
7036 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7037 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
7038 (__v16sf)_mm512_setzero_ps()); })
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007039
Craig Topper8c18e112016-05-17 04:41:50 +00007040#define _mm_sqrt_round_sd(A, B, R) __extension__ ({ \
Asaf Badouhf9cdb8d2016-07-05 11:36:21 +00007041 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7042 (__v2df)(__m128d)(B), \
Craig Topper8c18e112016-05-17 04:41:50 +00007043 (__v2df)_mm_setzero_pd(), \
7044 (__mmask8)-1, (int)(R)); })
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007045
7046static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00007047_mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007048{
Asaf Badouhf9cdb8d2016-07-05 11:36:21 +00007049 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
7050 (__v2df) __B,
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007051 (__v2df) __W,
7052 (__mmask8) __U,
7053 _MM_FROUND_CUR_DIRECTION);
7054}
7055
Craig Topper8c18e112016-05-17 04:41:50 +00007056#define _mm_mask_sqrt_round_sd(W, U, A, B, R) __extension__ ({ \
Asaf Badouhf9cdb8d2016-07-05 11:36:21 +00007057 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7058 (__v2df)(__m128d)(B), \
Craig Topper8c18e112016-05-17 04:41:50 +00007059 (__v2df)(__m128d)(W), \
7060 (__mmask8)(U), (int)(R)); })
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007061
7062static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00007063_mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007064{
Asaf Badouhf9cdb8d2016-07-05 11:36:21 +00007065 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
7066 (__v2df) __B,
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007067 (__v2df) _mm_setzero_pd (),
7068 (__mmask8) __U,
7069 _MM_FROUND_CUR_DIRECTION);
7070}
7071
Craig Topper8c18e112016-05-17 04:41:50 +00007072#define _mm_maskz_sqrt_round_sd(U, A, B, R) __extension__ ({ \
Asaf Badouhf9cdb8d2016-07-05 11:36:21 +00007073 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7074 (__v2df)(__m128d)(B), \
Craig Topper8c18e112016-05-17 04:41:50 +00007075 (__v2df)_mm_setzero_pd(), \
7076 (__mmask8)(U), (int)(R)); })
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007077
Craig Topper8c18e112016-05-17 04:41:50 +00007078#define _mm_sqrt_round_ss(A, B, R) __extension__ ({ \
Asaf Badouhf9cdb8d2016-07-05 11:36:21 +00007079 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7080 (__v4sf)(__m128)(B), \
Craig Topper8c18e112016-05-17 04:41:50 +00007081 (__v4sf)_mm_setzero_ps(), \
7082 (__mmask8)-1, (int)(R)); })
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007083
7084static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00007085_mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007086{
Asaf Badouhf9cdb8d2016-07-05 11:36:21 +00007087 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
7088 (__v4sf) __B,
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007089 (__v4sf) __W,
7090 (__mmask8) __U,
7091 _MM_FROUND_CUR_DIRECTION);
7092}
7093
Craig Topper8c18e112016-05-17 04:41:50 +00007094#define _mm_mask_sqrt_round_ss(W, U, A, B, R) __extension__ ({ \
Asaf Badouhf9cdb8d2016-07-05 11:36:21 +00007095 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7096 (__v4sf)(__m128)(B), \
Craig Topper8c18e112016-05-17 04:41:50 +00007097 (__v4sf)(__m128)(W), (__mmask8)(U), \
7098 (int)(R)); })
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007099
7100static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00007101_mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007102{
7103 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
7104 (__v4sf) __B,
7105 (__v4sf) _mm_setzero_ps (),
7106 (__mmask8) __U,
7107 _MM_FROUND_CUR_DIRECTION);
7108}
7109
Craig Topper8c18e112016-05-17 04:41:50 +00007110#define _mm_maskz_sqrt_round_ss(U, A, B, R) __extension__ ({ \
Asaf Badouhf9cdb8d2016-07-05 11:36:21 +00007111 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7112 (__v4sf)(__m128)(B), \
Craig Topper8c18e112016-05-17 04:41:50 +00007113 (__v4sf)_mm_setzero_ps(), \
7114 (__mmask8)(U), (int)(R)); })
Michael Zuckerman6b5f4d82016-04-11 15:46:39 +00007115
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007116static __inline__ __m512 __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00007117_mm512_broadcast_f32x4(__m128 __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007118{
Craig Topper367c86d2017-01-18 02:17:10 +00007119 return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
7120 0, 1, 2, 3, 0, 1, 2, 3,
7121 0, 1, 2, 3, 0, 1, 2, 3);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007122}
7123
7124static __inline__ __m512 __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00007125_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007126{
Craig Topper367c86d2017-01-18 02:17:10 +00007127 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
7128 (__v16sf)_mm512_broadcast_f32x4(__A),
7129 (__v16sf)__O);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007130}
7131
7132static __inline__ __m512 __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00007133_mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007134{
Craig Topper367c86d2017-01-18 02:17:10 +00007135 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
7136 (__v16sf)_mm512_broadcast_f32x4(__A),
7137 (__v16sf)_mm512_setzero_ps());
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007138}
7139
7140static __inline__ __m512d __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00007141_mm512_broadcast_f64x4(__m256d __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007142{
Craig Topper367c86d2017-01-18 02:17:10 +00007143 return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
7144 0, 1, 2, 3, 0, 1, 2, 3);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007145}
7146
7147static __inline__ __m512d __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00007148_mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007149{
Craig Topper367c86d2017-01-18 02:17:10 +00007150 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
7151 (__v8df)_mm512_broadcast_f64x4(__A),
7152 (__v8df)__O);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007153}
7154
7155static __inline__ __m512d __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00007156_mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007157{
Craig Topper367c86d2017-01-18 02:17:10 +00007158 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
7159 (__v8df)_mm512_broadcast_f64x4(__A),
7160 (__v8df)_mm512_setzero_pd());
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007161}
7162
7163static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00007164_mm512_broadcast_i32x4(__m128i __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007165{
Craig Topper367c86d2017-01-18 02:17:10 +00007166 return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
7167 0, 1, 2, 3, 0, 1, 2, 3,
7168 0, 1, 2, 3, 0, 1, 2, 3);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007169}
7170
7171static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00007172_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007173{
Craig Topper367c86d2017-01-18 02:17:10 +00007174 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
7175 (__v16si)_mm512_broadcast_i32x4(__A),
7176 (__v16si)__O);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007177}
7178
7179static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00007180_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007181{
Craig Topper367c86d2017-01-18 02:17:10 +00007182 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
7183 (__v16si)_mm512_broadcast_i32x4(__A),
7184 (__v16si)_mm512_setzero_si512());
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007185}
7186
7187static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00007188_mm512_broadcast_i64x4(__m256i __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007189{
Craig Topper367c86d2017-01-18 02:17:10 +00007190 return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
7191 0, 1, 2, 3, 0, 1, 2, 3);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007192}
7193
7194static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00007195_mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007196{
Craig Topper367c86d2017-01-18 02:17:10 +00007197 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
7198 (__v8di)_mm512_broadcast_i64x4(__A),
7199 (__v8di)__O);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007200}
7201
7202static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00007203_mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007204{
Craig Topper367c86d2017-01-18 02:17:10 +00007205 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
7206 (__v8di)_mm512_broadcast_i64x4(__A),
7207 (__v8di)_mm512_setzero_si512());
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007208}
7209
7210static __inline__ __m512d __DEFAULT_FN_ATTRS
7211_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
7212{
Simon Pilgrimf5a88372016-07-05 12:59:33 +00007213 return (__m512d)__builtin_ia32_selectpd_512(__M,
7214 (__v8df) _mm512_broadcastsd_pd(__A),
7215 (__v8df) __O);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007216}
7217
7218static __inline__ __m512d __DEFAULT_FN_ATTRS
7219_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
7220{
Simon Pilgrimf5a88372016-07-05 12:59:33 +00007221 return (__m512d)__builtin_ia32_selectpd_512(__M,
7222 (__v8df) _mm512_broadcastsd_pd(__A),
7223 (__v8df) _mm512_setzero_pd());
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007224}
7225
7226static __inline__ __m512 __DEFAULT_FN_ATTRS
7227_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
7228{
Simon Pilgrimf5a88372016-07-05 12:59:33 +00007229 return (__m512)__builtin_ia32_selectps_512(__M,
7230 (__v16sf) _mm512_broadcastss_ps(__A),
7231 (__v16sf) __O);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007232}
7233
7234static __inline__ __m512 __DEFAULT_FN_ATTRS
7235_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
7236{
Simon Pilgrimf5a88372016-07-05 12:59:33 +00007237 return (__m512)__builtin_ia32_selectps_512(__M,
7238 (__v16sf) _mm512_broadcastss_ps(__A),
7239 (__v16sf) _mm512_setzero_ps());
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007240}
7241
Michael Zuckermane1680612016-04-13 15:02:04 +00007242static __inline__ __m128i __DEFAULT_FN_ATTRS
7243_mm512_cvtsepi32_epi8 (__m512i __A)
7244{
7245 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7246 (__v16qi) _mm_undefined_si128 (),
7247 (__mmask16) -1);
7248}
7249
7250static __inline__ __m128i __DEFAULT_FN_ATTRS
7251_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7252{
7253 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7254 (__v16qi) __O, __M);
7255}
7256
7257static __inline__ __m128i __DEFAULT_FN_ATTRS
7258_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
7259{
7260 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7261 (__v16qi) _mm_setzero_si128 (),
7262 __M);
7263}
7264
7265static __inline__ void __DEFAULT_FN_ATTRS
7266_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7267{
7268 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7269}
7270
7271static __inline__ __m256i __DEFAULT_FN_ATTRS
7272_mm512_cvtsepi32_epi16 (__m512i __A)
7273{
7274 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7275 (__v16hi) _mm256_undefined_si256 (),
7276 (__mmask16) -1);
7277}
7278
7279static __inline__ __m256i __DEFAULT_FN_ATTRS
7280_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7281{
7282 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7283 (__v16hi) __O, __M);
7284}
7285
7286static __inline__ __m256i __DEFAULT_FN_ATTRS
7287_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
7288{
7289 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7290 (__v16hi) _mm256_setzero_si256 (),
7291 __M);
7292}
7293
7294static __inline__ void __DEFAULT_FN_ATTRS
7295_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7296{
7297 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7298}
7299
7300static __inline__ __m128i __DEFAULT_FN_ATTRS
7301_mm512_cvtsepi64_epi8 (__m512i __A)
7302{
7303 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7304 (__v16qi) _mm_undefined_si128 (),
7305 (__mmask8) -1);
7306}
7307
7308static __inline__ __m128i __DEFAULT_FN_ATTRS
7309_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7310{
7311 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7312 (__v16qi) __O, __M);
7313}
7314
7315static __inline__ __m128i __DEFAULT_FN_ATTRS
7316_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
7317{
7318 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7319 (__v16qi) _mm_setzero_si128 (),
7320 __M);
7321}
7322
7323static __inline__ void __DEFAULT_FN_ATTRS
7324_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7325{
7326 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7327}
7328
7329static __inline__ __m256i __DEFAULT_FN_ATTRS
7330_mm512_cvtsepi64_epi32 (__m512i __A)
7331{
Michael Zuckermane1680612016-04-13 15:02:04 +00007332 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7333 (__v8si) _mm256_undefined_si256 (),
7334 (__mmask8) -1);
7335}
7336
7337static __inline__ __m256i __DEFAULT_FN_ATTRS
7338_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7339{
7340 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7341 (__v8si) __O, __M);
7342}
7343
7344static __inline__ __m256i __DEFAULT_FN_ATTRS
7345_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
7346{
7347 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7348 (__v8si) _mm256_setzero_si256 (),
7349 __M);
7350}
7351
7352static __inline__ void __DEFAULT_FN_ATTRS
7353_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
7354{
7355 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7356}
7357
7358static __inline__ __m128i __DEFAULT_FN_ATTRS
7359_mm512_cvtsepi64_epi16 (__m512i __A)
7360{
7361 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7362 (__v8hi) _mm_undefined_si128 (),
7363 (__mmask8) -1);
7364}
7365
7366static __inline__ __m128i __DEFAULT_FN_ATTRS
7367_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7368{
7369 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7370 (__v8hi) __O, __M);
7371}
7372
7373static __inline__ __m128i __DEFAULT_FN_ATTRS
7374_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
7375{
7376 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7377 (__v8hi) _mm_setzero_si128 (),
7378 __M);
7379}
7380
7381static __inline__ void __DEFAULT_FN_ATTRS
7382_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
7383{
7384 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7385}
7386
Michael Zuckermand8715312016-04-14 06:48:09 +00007387static __inline__ __m128i __DEFAULT_FN_ATTRS
7388_mm512_cvtusepi32_epi8 (__m512i __A)
7389{
7390 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7391 (__v16qi) _mm_undefined_si128 (),
7392 (__mmask16) -1);
7393}
7394
7395static __inline__ __m128i __DEFAULT_FN_ATTRS
7396_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7397{
7398 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7399 (__v16qi) __O,
7400 __M);
7401}
7402
7403static __inline__ __m128i __DEFAULT_FN_ATTRS
7404_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
7405{
7406 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7407 (__v16qi) _mm_setzero_si128 (),
7408 __M);
7409}
7410
7411static __inline__ void __DEFAULT_FN_ATTRS
7412_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7413{
7414 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7415}
7416
7417static __inline__ __m256i __DEFAULT_FN_ATTRS
7418_mm512_cvtusepi32_epi16 (__m512i __A)
7419{
7420 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7421 (__v16hi) _mm256_undefined_si256 (),
7422 (__mmask16) -1);
7423}
7424
7425static __inline__ __m256i __DEFAULT_FN_ATTRS
7426_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7427{
7428 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7429 (__v16hi) __O,
7430 __M);
7431}
7432
7433static __inline__ __m256i __DEFAULT_FN_ATTRS
7434_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
7435{
7436 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7437 (__v16hi) _mm256_setzero_si256 (),
7438 __M);
7439}
7440
7441static __inline__ void __DEFAULT_FN_ATTRS
7442_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7443{
7444 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7445}
7446
7447static __inline__ __m128i __DEFAULT_FN_ATTRS
7448_mm512_cvtusepi64_epi8 (__m512i __A)
7449{
7450 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7451 (__v16qi) _mm_undefined_si128 (),
7452 (__mmask8) -1);
7453}
7454
7455static __inline__ __m128i __DEFAULT_FN_ATTRS
7456_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7457{
7458 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7459 (__v16qi) __O,
7460 __M);
7461}
7462
7463static __inline__ __m128i __DEFAULT_FN_ATTRS
7464_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
7465{
7466 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7467 (__v16qi) _mm_setzero_si128 (),
7468 __M);
7469}
7470
7471static __inline__ void __DEFAULT_FN_ATTRS
7472_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7473{
7474 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7475}
7476
7477static __inline__ __m256i __DEFAULT_FN_ATTRS
7478_mm512_cvtusepi64_epi32 (__m512i __A)
7479{
7480 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7481 (__v8si) _mm256_undefined_si256 (),
7482 (__mmask8) -1);
7483}
7484
7485static __inline__ __m256i __DEFAULT_FN_ATTRS
7486_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7487{
7488 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7489 (__v8si) __O, __M);
7490}
7491
7492static __inline__ __m256i __DEFAULT_FN_ATTRS
7493_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
7494{
7495 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7496 (__v8si) _mm256_setzero_si256 (),
7497 __M);
7498}
7499
7500static __inline__ void __DEFAULT_FN_ATTRS
7501_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7502{
7503 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
7504}
7505
7506static __inline__ __m128i __DEFAULT_FN_ATTRS
7507_mm512_cvtusepi64_epi16 (__m512i __A)
7508{
7509 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7510 (__v8hi) _mm_undefined_si128 (),
7511 (__mmask8) -1);
7512}
7513
7514static __inline__ __m128i __DEFAULT_FN_ATTRS
7515_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7516{
7517 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7518 (__v8hi) __O, __M);
7519}
7520
7521static __inline__ __m128i __DEFAULT_FN_ATTRS
7522_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
7523{
7524 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7525 (__v8hi) _mm_setzero_si128 (),
7526 __M);
7527}
7528
7529static __inline__ void __DEFAULT_FN_ATTRS
7530_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7531{
7532 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
7533}
7534
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007535static __inline__ __m128i __DEFAULT_FN_ATTRS
7536_mm512_cvtepi32_epi8 (__m512i __A)
7537{
Craig Topper25de41c2018-05-14 17:50:40 +00007538 return (__m128i)__builtin_convertvector((__v16si)__A, __v16qi);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007539}
7540
7541static __inline__ __m128i __DEFAULT_FN_ATTRS
7542_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7543{
Craig Topper9d146bb2018-05-15 03:17:52 +00007544 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7545 (__v16qi) __O, __M);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007546}
7547
7548static __inline__ __m128i __DEFAULT_FN_ATTRS
7549_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
7550{
Craig Topper9d146bb2018-05-15 03:17:52 +00007551 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7552 (__v16qi) _mm_setzero_si128 (),
7553 __M);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007554}
7555
7556static __inline__ void __DEFAULT_FN_ATTRS
7557_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7558{
7559 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7560}
7561
7562static __inline__ __m256i __DEFAULT_FN_ATTRS
7563_mm512_cvtepi32_epi16 (__m512i __A)
7564{
Craig Topper25de41c2018-05-14 17:50:40 +00007565 return (__m256i)__builtin_convertvector((__v16si)__A, __v16hi);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007566}
7567
7568static __inline__ __m256i __DEFAULT_FN_ATTRS
7569_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7570{
Craig Topper9d146bb2018-05-15 03:17:52 +00007571 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7572 (__v16hi) __O, __M);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007573}
7574
7575static __inline__ __m256i __DEFAULT_FN_ATTRS
7576_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
7577{
Craig Topper9d146bb2018-05-15 03:17:52 +00007578 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7579 (__v16hi) _mm256_setzero_si256 (),
7580 __M);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007581}
7582
7583static __inline__ void __DEFAULT_FN_ATTRS
7584_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
7585{
7586 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
7587}
7588
7589static __inline__ __m128i __DEFAULT_FN_ATTRS
7590_mm512_cvtepi64_epi8 (__m512i __A)
7591{
7592 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7593 (__v16qi) _mm_undefined_si128 (),
7594 (__mmask8) -1);
7595}
7596
7597static __inline__ __m128i __DEFAULT_FN_ATTRS
7598_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7599{
7600 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7601 (__v16qi) __O, __M);
7602}
7603
7604static __inline__ __m128i __DEFAULT_FN_ATTRS
7605_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
7606{
7607 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7608 (__v16qi) _mm_setzero_si128 (),
7609 __M);
7610}
7611
7612static __inline__ void __DEFAULT_FN_ATTRS
7613_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7614{
7615 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7616}
7617
7618static __inline__ __m256i __DEFAULT_FN_ATTRS
7619_mm512_cvtepi64_epi32 (__m512i __A)
7620{
Craig Topper25de41c2018-05-14 17:50:40 +00007621 return (__m256i)__builtin_convertvector((__v8di) __A, __v8si);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007622}
7623
7624static __inline__ __m256i __DEFAULT_FN_ATTRS
7625_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7626{
Craig Topper25de41c2018-05-14 17:50:40 +00007627 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
7628 (__v8si)_mm512_cvtepi64_epi32(__A),
7629 (__v8si)__O);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007630}
7631
7632static __inline__ __m256i __DEFAULT_FN_ATTRS
7633_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
7634{
Craig Topper25de41c2018-05-14 17:50:40 +00007635 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
7636 (__v8si)_mm512_cvtepi64_epi32(__A),
7637 (__v8si)_mm256_setzero_si256());
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007638}
7639
7640static __inline__ void __DEFAULT_FN_ATTRS
7641_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7642{
7643 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7644}
7645
7646static __inline__ __m128i __DEFAULT_FN_ATTRS
7647_mm512_cvtepi64_epi16 (__m512i __A)
7648{
Craig Topper25de41c2018-05-14 17:50:40 +00007649 return (__m128i)__builtin_convertvector((__v8di)__A, __v8hi);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007650}
7651
7652static __inline__ __m128i __DEFAULT_FN_ATTRS
7653_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7654{
Craig Topper9d146bb2018-05-15 03:17:52 +00007655 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7656 (__v8hi) __O, __M);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007657}
7658
7659static __inline__ __m128i __DEFAULT_FN_ATTRS
7660_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
7661{
Craig Topper9d146bb2018-05-15 03:17:52 +00007662 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7663 (__v8hi) _mm_setzero_si128 (),
7664 __M);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007665}
7666
7667static __inline__ void __DEFAULT_FN_ATTRS
7668_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7669{
7670 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7671}
7672
Craig Topper93ffabd2016-10-31 04:30:56 +00007673#define _mm512_extracti32x4_epi32(A, imm) __extension__ ({ \
7674 (__m128i)__builtin_shufflevector((__v16si)(__m512i)(A), \
7675 (__v16si)_mm512_undefined_epi32(), \
7676 0 + ((imm) & 0x3) * 4, \
7677 1 + ((imm) & 0x3) * 4, \
7678 2 + ((imm) & 0x3) * 4, \
7679 3 + ((imm) & 0x3) * 4); })
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007680
Craig Topper8c18e112016-05-17 04:41:50 +00007681#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
Craig Topperac9959e2017-04-03 03:41:29 +00007682 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
Craig Topper93ffabd2016-10-31 04:30:56 +00007683 (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
Craig Topperbf824982017-04-03 03:51:57 +00007684 (__v4si)(W)); })
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007685
Craig Topper8c18e112016-05-17 04:41:50 +00007686#define _mm512_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
Craig Topperac9959e2017-04-03 03:41:29 +00007687 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
Craig Topper93ffabd2016-10-31 04:30:56 +00007688 (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
7689 (__v4si)_mm_setzero_si128()); })
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007690
Craig Topper93ffabd2016-10-31 04:30:56 +00007691#define _mm512_extracti64x4_epi64(A, imm) __extension__ ({ \
7692 (__m256i)__builtin_shufflevector((__v8di)(__m512i)(A), \
7693 (__v8di)_mm512_undefined_epi32(), \
7694 ((imm) & 1) ? 4 : 0, \
7695 ((imm) & 1) ? 5 : 1, \
7696 ((imm) & 1) ? 6 : 2, \
7697 ((imm) & 1) ? 7 : 3); })
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007698
Craig Topper8c18e112016-05-17 04:41:50 +00007699#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) __extension__ ({ \
Craig Topperac9959e2017-04-03 03:41:29 +00007700 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
Craig Topper93ffabd2016-10-31 04:30:56 +00007701 (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
Craig Topperbf824982017-04-03 03:51:57 +00007702 (__v4di)(W)); })
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007703
Craig Topper8c18e112016-05-17 04:41:50 +00007704#define _mm512_maskz_extracti64x4_epi64(U, A, imm) __extension__ ({ \
Craig Topperac9959e2017-04-03 03:41:29 +00007705 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
Craig Topper93ffabd2016-10-31 04:30:56 +00007706 (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
7707 (__v4di)_mm256_setzero_si256()); })
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007708
Craig Topper8c18e112016-05-17 04:41:50 +00007709#define _mm512_insertf64x4(A, B, imm) __extension__ ({ \
Craig Topper08bf53f2016-11-01 05:47:56 +00007710 (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
7711 (__v8df)_mm512_castpd256_pd512((__m256d)(B)), \
7712 ((imm) & 0x1) ? 0 : 8, \
7713 ((imm) & 0x1) ? 1 : 9, \
7714 ((imm) & 0x1) ? 2 : 10, \
7715 ((imm) & 0x1) ? 3 : 11, \
7716 ((imm) & 0x1) ? 8 : 4, \
7717 ((imm) & 0x1) ? 9 : 5, \
7718 ((imm) & 0x1) ? 10 : 6, \
7719 ((imm) & 0x1) ? 11 : 7); })
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007720
Craig Topper8c18e112016-05-17 04:41:50 +00007721#define _mm512_mask_insertf64x4(W, U, A, B, imm) __extension__ ({ \
Craig Topper08bf53f2016-11-01 05:47:56 +00007722 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7723 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7724 (__v8df)(W)); })
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007725
Craig Topper8c18e112016-05-17 04:41:50 +00007726#define _mm512_maskz_insertf64x4(U, A, B, imm) __extension__ ({ \
Craig Topper08bf53f2016-11-01 05:47:56 +00007727 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7728 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7729 (__v8df)_mm512_setzero_pd()); })
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007730
Craig Topper8c18e112016-05-17 04:41:50 +00007731#define _mm512_inserti64x4(A, B, imm) __extension__ ({ \
Craig Topper08bf53f2016-11-01 05:47:56 +00007732 (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
7733 (__v8di)_mm512_castsi256_si512((__m256i)(B)), \
7734 ((imm) & 0x1) ? 0 : 8, \
7735 ((imm) & 0x1) ? 1 : 9, \
7736 ((imm) & 0x1) ? 2 : 10, \
7737 ((imm) & 0x1) ? 3 : 11, \
7738 ((imm) & 0x1) ? 8 : 4, \
7739 ((imm) & 0x1) ? 9 : 5, \
7740 ((imm) & 0x1) ? 10 : 6, \
7741 ((imm) & 0x1) ? 11 : 7); })
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007742
Craig Topper8c18e112016-05-17 04:41:50 +00007743#define _mm512_mask_inserti64x4(W, U, A, B, imm) __extension__ ({ \
Craig Topper08bf53f2016-11-01 05:47:56 +00007744 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7745 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7746 (__v8di)(W)); })
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007747
Craig Topper8c18e112016-05-17 04:41:50 +00007748#define _mm512_maskz_inserti64x4(U, A, B, imm) __extension__ ({ \
Craig Topper08bf53f2016-11-01 05:47:56 +00007749 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7750 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7751 (__v8di)_mm512_setzero_si512()); })
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007752
Craig Topper8c18e112016-05-17 04:41:50 +00007753#define _mm512_insertf32x4(A, B, imm) __extension__ ({ \
Craig Topper08bf53f2016-11-01 05:47:56 +00007754 (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \
7755 (__v16sf)_mm512_castps128_ps512((__m128)(B)),\
7756 (((imm) & 0x3) == 0) ? 16 : 0, \
7757 (((imm) & 0x3) == 0) ? 17 : 1, \
7758 (((imm) & 0x3) == 0) ? 18 : 2, \
7759 (((imm) & 0x3) == 0) ? 19 : 3, \
7760 (((imm) & 0x3) == 1) ? 16 : 4, \
7761 (((imm) & 0x3) == 1) ? 17 : 5, \
7762 (((imm) & 0x3) == 1) ? 18 : 6, \
7763 (((imm) & 0x3) == 1) ? 19 : 7, \
7764 (((imm) & 0x3) == 2) ? 16 : 8, \
7765 (((imm) & 0x3) == 2) ? 17 : 9, \
7766 (((imm) & 0x3) == 2) ? 18 : 10, \
7767 (((imm) & 0x3) == 2) ? 19 : 11, \
7768 (((imm) & 0x3) == 3) ? 16 : 12, \
7769 (((imm) & 0x3) == 3) ? 17 : 13, \
7770 (((imm) & 0x3) == 3) ? 18 : 14, \
7771 (((imm) & 0x3) == 3) ? 19 : 15); })
Craig Topperdca1f232016-05-15 21:26:20 +00007772
Craig Topper8c18e112016-05-17 04:41:50 +00007773#define _mm512_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \
Craig Topper08bf53f2016-11-01 05:47:56 +00007774 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7775 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7776 (__v16sf)(W)); })
Craig Topperdca1f232016-05-15 21:26:20 +00007777
Craig Topper8c18e112016-05-17 04:41:50 +00007778#define _mm512_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \
Craig Topper08bf53f2016-11-01 05:47:56 +00007779 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7780 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7781 (__v16sf)_mm512_setzero_ps()); })
Craig Topperdca1f232016-05-15 21:26:20 +00007782
Craig Topper8c18e112016-05-17 04:41:50 +00007783#define _mm512_inserti32x4(A, B, imm) __extension__ ({ \
Craig Topper08bf53f2016-11-01 05:47:56 +00007784 (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
7785 (__v16si)_mm512_castsi128_si512((__m128i)(B)),\
7786 (((imm) & 0x3) == 0) ? 16 : 0, \
7787 (((imm) & 0x3) == 0) ? 17 : 1, \
7788 (((imm) & 0x3) == 0) ? 18 : 2, \
7789 (((imm) & 0x3) == 0) ? 19 : 3, \
7790 (((imm) & 0x3) == 1) ? 16 : 4, \
7791 (((imm) & 0x3) == 1) ? 17 : 5, \
7792 (((imm) & 0x3) == 1) ? 18 : 6, \
7793 (((imm) & 0x3) == 1) ? 19 : 7, \
7794 (((imm) & 0x3) == 2) ? 16 : 8, \
7795 (((imm) & 0x3) == 2) ? 17 : 9, \
7796 (((imm) & 0x3) == 2) ? 18 : 10, \
7797 (((imm) & 0x3) == 2) ? 19 : 11, \
7798 (((imm) & 0x3) == 3) ? 16 : 12, \
7799 (((imm) & 0x3) == 3) ? 17 : 13, \
7800 (((imm) & 0x3) == 3) ? 18 : 14, \
7801 (((imm) & 0x3) == 3) ? 19 : 15); })
Craig Topperdca1f232016-05-15 21:26:20 +00007802
Craig Topper8c18e112016-05-17 04:41:50 +00007803#define _mm512_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \
Craig Topper08bf53f2016-11-01 05:47:56 +00007804 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7805 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7806 (__v16si)(W)); })
Craig Topperdca1f232016-05-15 21:26:20 +00007807
Craig Topper8c18e112016-05-17 04:41:50 +00007808#define _mm512_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \
Craig Topper08bf53f2016-11-01 05:47:56 +00007809 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7810 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7811 (__v16si)_mm512_setzero_si512()); })
Craig Topperdca1f232016-05-15 21:26:20 +00007812
Craig Topper8c18e112016-05-17 04:41:50 +00007813#define _mm512_getmant_round_pd(A, B, C, R) __extension__ ({ \
7814 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7815 (int)(((C)<<2) | (B)), \
7816 (__v8df)_mm512_undefined_pd(), \
7817 (__mmask8)-1, (int)(R)); })
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007818
Craig Topper8c18e112016-05-17 04:41:50 +00007819#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) __extension__ ({ \
7820 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7821 (int)(((C)<<2) | (B)), \
7822 (__v8df)(__m512d)(W), \
7823 (__mmask8)(U), (int)(R)); })
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007824
Craig Topper8c18e112016-05-17 04:41:50 +00007825#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) __extension__ ({ \
7826 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7827 (int)(((C)<<2) | (B)), \
7828 (__v8df)_mm512_setzero_pd(), \
7829 (__mmask8)(U), (int)(R)); })
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007830
Craig Topper8c18e112016-05-17 04:41:50 +00007831#define _mm512_getmant_pd(A, B, C) __extension__ ({ \
7832 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7833 (int)(((C)<<2) | (B)), \
7834 (__v8df)_mm512_setzero_pd(), \
7835 (__mmask8)-1, \
7836 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007837
Craig Topper8c18e112016-05-17 04:41:50 +00007838#define _mm512_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \
7839 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7840 (int)(((C)<<2) | (B)), \
7841 (__v8df)(__m512d)(W), \
7842 (__mmask8)(U), \
7843 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007844
Craig Topper8c18e112016-05-17 04:41:50 +00007845#define _mm512_maskz_getmant_pd(U, A, B, C) __extension__ ({ \
7846 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7847 (int)(((C)<<2) | (B)), \
7848 (__v8df)_mm512_setzero_pd(), \
7849 (__mmask8)(U), \
7850 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007851
Craig Topper8c18e112016-05-17 04:41:50 +00007852#define _mm512_getmant_round_ps(A, B, C, R) __extension__ ({ \
7853 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7854 (int)(((C)<<2) | (B)), \
7855 (__v16sf)_mm512_undefined_ps(), \
7856 (__mmask16)-1, (int)(R)); })
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007857
Craig Topper8c18e112016-05-17 04:41:50 +00007858#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) __extension__ ({ \
7859 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7860 (int)(((C)<<2) | (B)), \
7861 (__v16sf)(__m512)(W), \
7862 (__mmask16)(U), (int)(R)); })
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007863
Craig Topper8c18e112016-05-17 04:41:50 +00007864#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) __extension__ ({ \
7865 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7866 (int)(((C)<<2) | (B)), \
7867 (__v16sf)_mm512_setzero_ps(), \
7868 (__mmask16)(U), (int)(R)); })
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007869
Craig Topper8c18e112016-05-17 04:41:50 +00007870#define _mm512_getmant_ps(A, B, C) __extension__ ({ \
7871 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7872 (int)(((C)<<2)|(B)), \
7873 (__v16sf)_mm512_undefined_ps(), \
7874 (__mmask16)-1, \
7875 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007876
Craig Topper8c18e112016-05-17 04:41:50 +00007877#define _mm512_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
7878 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7879 (int)(((C)<<2)|(B)), \
7880 (__v16sf)(__m512)(W), \
7881 (__mmask16)(U), \
7882 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007883
Craig Topper8c18e112016-05-17 04:41:50 +00007884#define _mm512_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
7885 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7886 (int)(((C)<<2)|(B)), \
7887 (__v16sf)_mm512_setzero_ps(), \
7888 (__mmask16)(U), \
7889 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007890
Craig Topper8c18e112016-05-17 04:41:50 +00007891#define _mm512_getexp_round_pd(A, R) __extension__ ({ \
7892 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7893 (__v8df)_mm512_undefined_pd(), \
7894 (__mmask8)-1, (int)(R)); })
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007895
Craig Topper8c18e112016-05-17 04:41:50 +00007896#define _mm512_mask_getexp_round_pd(W, U, A, R) __extension__ ({ \
7897 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7898 (__v8df)(__m512d)(W), \
7899 (__mmask8)(U), (int)(R)); })
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007900
Craig Topper8c18e112016-05-17 04:41:50 +00007901#define _mm512_maskz_getexp_round_pd(U, A, R) __extension__ ({ \
7902 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7903 (__v8df)_mm512_setzero_pd(), \
7904 (__mmask8)(U), (int)(R)); })
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007905
7906static __inline__ __m512d __DEFAULT_FN_ATTRS
7907_mm512_getexp_pd (__m512d __A)
7908{
7909 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7910 (__v8df) _mm512_undefined_pd (),
7911 (__mmask8) -1,
7912 _MM_FROUND_CUR_DIRECTION);
7913}
7914
7915static __inline__ __m512d __DEFAULT_FN_ATTRS
7916_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
7917{
7918 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7919 (__v8df) __W,
7920 (__mmask8) __U,
7921 _MM_FROUND_CUR_DIRECTION);
7922}
7923
7924static __inline__ __m512d __DEFAULT_FN_ATTRS
7925_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
7926{
7927 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7928 (__v8df) _mm512_setzero_pd (),
7929 (__mmask8) __U,
7930 _MM_FROUND_CUR_DIRECTION);
7931}
7932
Craig Topper8c18e112016-05-17 04:41:50 +00007933#define _mm512_getexp_round_ps(A, R) __extension__ ({ \
7934 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7935 (__v16sf)_mm512_undefined_ps(), \
7936 (__mmask16)-1, (int)(R)); })
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007937
Craig Topper8c18e112016-05-17 04:41:50 +00007938#define _mm512_mask_getexp_round_ps(W, U, A, R) __extension__ ({ \
7939 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7940 (__v16sf)(__m512)(W), \
7941 (__mmask16)(U), (int)(R)); })
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007942
Craig Topper8c18e112016-05-17 04:41:50 +00007943#define _mm512_maskz_getexp_round_ps(U, A, R) __extension__ ({ \
7944 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7945 (__v16sf)_mm512_setzero_ps(), \
7946 (__mmask16)(U), (int)(R)); })
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007947
7948static __inline__ __m512 __DEFAULT_FN_ATTRS
7949_mm512_getexp_ps (__m512 __A)
7950{
7951 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7952 (__v16sf) _mm512_undefined_ps (),
7953 (__mmask16) -1,
7954 _MM_FROUND_CUR_DIRECTION);
7955}
7956
7957static __inline__ __m512 __DEFAULT_FN_ATTRS
7958_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
7959{
7960 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7961 (__v16sf) __W,
7962 (__mmask16) __U,
7963 _MM_FROUND_CUR_DIRECTION);
7964}
7965
7966static __inline__ __m512 __DEFAULT_FN_ATTRS
7967_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
7968{
7969 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7970 (__v16sf) _mm512_setzero_ps (),
7971 (__mmask16) __U,
7972 _MM_FROUND_CUR_DIRECTION);
7973}
7974
Craig Topper8c18e112016-05-17 04:41:50 +00007975#define _mm512_i64gather_ps(index, addr, scale) __extension__ ({ \
7976 (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
7977 (float const *)(addr), \
7978 (__v8di)(__m512i)(index), (__mmask8)-1, \
7979 (int)(scale)); })
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00007980
Craig Topper2e5058c2017-03-17 05:14:37 +00007981#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__({\
7982 (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
7983 (float const *)(addr), \
7984 (__v8di)(__m512i)(index), \
7985 (__mmask8)(mask), (int)(scale)); })
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00007986
Craig Topper8c18e112016-05-17 04:41:50 +00007987#define _mm512_i64gather_epi32(index, addr, scale) __extension__ ({\
7988 (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_ps(), \
7989 (int const *)(addr), \
7990 (__v8di)(__m512i)(index), \
7991 (__mmask8)-1, (int)(scale)); })
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00007992
Craig Topper8c18e112016-05-17 04:41:50 +00007993#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
7994 (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
7995 (int const *)(addr), \
7996 (__v8di)(__m512i)(index), \
7997 (__mmask8)(mask), (int)(scale)); })
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00007998
Craig Topper8c18e112016-05-17 04:41:50 +00007999#define _mm512_i64gather_pd(index, addr, scale) __extension__ ({\
8000 (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
8001 (double const *)(addr), \
8002 (__v8di)(__m512i)(index), (__mmask8)-1, \
8003 (int)(scale)); })
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00008004
Craig Topper8c18e112016-05-17 04:41:50 +00008005#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8006 (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
8007 (double const *)(addr), \
8008 (__v8di)(__m512i)(index), \
8009 (__mmask8)(mask), (int)(scale)); })
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00008010
Craig Topper8c18e112016-05-17 04:41:50 +00008011#define _mm512_i64gather_epi64(index, addr, scale) __extension__ ({\
8012 (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_pd(), \
8013 (long long const *)(addr), \
8014 (__v8di)(__m512i)(index), (__mmask8)-1, \
8015 (int)(scale)); })
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00008016
Craig Topper8c18e112016-05-17 04:41:50 +00008017#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8018 (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
8019 (long long const *)(addr), \
8020 (__v8di)(__m512i)(index), \
8021 (__mmask8)(mask), (int)(scale)); })
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00008022
Craig Topper8c18e112016-05-17 04:41:50 +00008023#define _mm512_i32gather_ps(index, addr, scale) __extension__ ({\
8024 (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
8025 (float const *)(addr), \
8026 (__v16sf)(__m512)(index), \
8027 (__mmask16)-1, (int)(scale)); })
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00008028
Craig Topper8c18e112016-05-17 04:41:50 +00008029#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8030 (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
8031 (float const *)(addr), \
8032 (__v16sf)(__m512)(index), \
8033 (__mmask16)(mask), (int)(scale)); })
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00008034
Craig Topper8c18e112016-05-17 04:41:50 +00008035#define _mm512_i32gather_epi32(index, addr, scale) __extension__ ({\
8036 (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
8037 (int const *)(addr), \
8038 (__v16si)(__m512i)(index), \
8039 (__mmask16)-1, (int)(scale)); })
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00008040
Craig Topper8c18e112016-05-17 04:41:50 +00008041#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8042 (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
8043 (int const *)(addr), \
8044 (__v16si)(__m512i)(index), \
8045 (__mmask16)(mask), (int)(scale)); })
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00008046
Craig Topper8c18e112016-05-17 04:41:50 +00008047#define _mm512_i32gather_pd(index, addr, scale) __extension__ ({\
8048 (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
8049 (double const *)(addr), \
8050 (__v8si)(__m256i)(index), (__mmask8)-1, \
8051 (int)(scale)); })
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00008052
Craig Topper8c18e112016-05-17 04:41:50 +00008053#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8054 (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
8055 (double const *)(addr), \
8056 (__v8si)(__m256i)(index), \
8057 (__mmask8)(mask), (int)(scale)); })
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00008058
Craig Topper8c18e112016-05-17 04:41:50 +00008059#define _mm512_i32gather_epi64(index, addr, scale) __extension__ ({\
8060 (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
8061 (long long const *)(addr), \
8062 (__v8si)(__m256i)(index), (__mmask8)-1, \
8063 (int)(scale)); })
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00008064
Craig Topper8c18e112016-05-17 04:41:50 +00008065#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8066 (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
8067 (long long const *)(addr), \
8068 (__v8si)(__m256i)(index), \
8069 (__mmask8)(mask), (int)(scale)); })
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00008070
Craig Topper8c18e112016-05-17 04:41:50 +00008071#define _mm512_i64scatter_ps(addr, index, v1, scale) __extension__ ({\
8072 __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)-1, \
8073 (__v8di)(__m512i)(index), \
8074 (__v8sf)(__m256)(v1), (int)(scale)); })
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008075
Craig Topper8c18e112016-05-17 04:41:50 +00008076#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({\
8077 __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)(mask), \
8078 (__v8di)(__m512i)(index), \
8079 (__v8sf)(__m256)(v1), (int)(scale)); })
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008080
Craig Topper8c18e112016-05-17 04:41:50 +00008081#define _mm512_i64scatter_epi32(addr, index, v1, scale) __extension__ ({\
8082 __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)-1, \
8083 (__v8di)(__m512i)(index), \
8084 (__v8si)(__m256i)(v1), (int)(scale)); })
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008085
Craig Topper8c18e112016-05-17 04:41:50 +00008086#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\
8087 __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)(mask), \
8088 (__v8di)(__m512i)(index), \
8089 (__v8si)(__m256i)(v1), (int)(scale)); })
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008090
Craig Topper8c18e112016-05-17 04:41:50 +00008091#define _mm512_i64scatter_pd(addr, index, v1, scale) __extension__ ({\
8092 __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)-1, \
8093 (__v8di)(__m512i)(index), \
8094 (__v8df)(__m512d)(v1), (int)(scale)); })
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008095
Craig Topper8c18e112016-05-17 04:41:50 +00008096#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({\
8097 __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)(mask), \
8098 (__v8di)(__m512i)(index), \
8099 (__v8df)(__m512d)(v1), (int)(scale)); })
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008100
Craig Topper8c18e112016-05-17 04:41:50 +00008101#define _mm512_i64scatter_epi64(addr, index, v1, scale) __extension__ ({\
8102 __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)-1, \
8103 (__v8di)(__m512i)(index), \
8104 (__v8di)(__m512i)(v1), (int)(scale)); })
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008105
Craig Topper8c18e112016-05-17 04:41:50 +00008106#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\
8107 __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)(mask), \
8108 (__v8di)(__m512i)(index), \
8109 (__v8di)(__m512i)(v1), (int)(scale)); })
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008110
Craig Topper8c18e112016-05-17 04:41:50 +00008111#define _mm512_i32scatter_ps(addr, index, v1, scale) __extension__ ({\
8112 __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)-1, \
8113 (__v16si)(__m512i)(index), \
8114 (__v16sf)(__m512)(v1), (int)(scale)); })
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008115
Craig Topper8c18e112016-05-17 04:41:50 +00008116#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({\
8117 __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)(mask), \
8118 (__v16si)(__m512i)(index), \
8119 (__v16sf)(__m512)(v1), (int)(scale)); })
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008120
Craig Topper8c18e112016-05-17 04:41:50 +00008121#define _mm512_i32scatter_epi32(addr, index, v1, scale) __extension__ ({\
8122 __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)-1, \
8123 (__v16si)(__m512i)(index), \
8124 (__v16si)(__m512i)(v1), (int)(scale)); })
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008125
Craig Topper8c18e112016-05-17 04:41:50 +00008126#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\
8127 __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)(mask), \
8128 (__v16si)(__m512i)(index), \
8129 (__v16si)(__m512i)(v1), (int)(scale)); })
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008130
Craig Topper8c18e112016-05-17 04:41:50 +00008131#define _mm512_i32scatter_pd(addr, index, v1, scale) __extension__ ({\
8132 __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)-1, \
8133 (__v8si)(__m256i)(index), \
8134 (__v8df)(__m512d)(v1), (int)(scale)); })
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008135
Craig Topper8c18e112016-05-17 04:41:50 +00008136#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({\
8137 __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)(mask), \
8138 (__v8si)(__m256i)(index), \
8139 (__v8df)(__m512d)(v1), (int)(scale)); })
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008140
Craig Topper8c18e112016-05-17 04:41:50 +00008141#define _mm512_i32scatter_epi64(addr, index, v1, scale) __extension__ ({\
8142 __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)-1, \
8143 (__v8si)(__m256i)(index), \
8144 (__v8di)(__m512i)(v1), (int)(scale)); })
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008145
Craig Topper8c18e112016-05-17 04:41:50 +00008146#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\
8147 __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)(mask), \
8148 (__v8si)(__m256i)(index), \
8149 (__v8di)(__m512i)(v1), (int)(scale)); })
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008150
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008151static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008152_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008153{
Craig Topperf2023652016-10-26 05:35:38 +00008154 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8155 (__v4sf) __A,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008156 (__v4sf) __B,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008157 (__mmask8) __U,
8158 _MM_FROUND_CUR_DIRECTION);
8159}
8160
Craig Topper8c18e112016-05-17 04:41:50 +00008161#define _mm_mask_fmadd_round_ss(W, U, A, B, R) __extension__({\
Craig Topperf2023652016-10-26 05:35:38 +00008162 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8163 (__v4sf)(__m128)(A), \
8164 (__v4sf)(__m128)(B), (__mmask8)(U), \
Craig Topper8c18e112016-05-17 04:41:50 +00008165 (int)(R)); })
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008166
8167static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008168_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008169{
8170 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
8171 (__v4sf) __B,
8172 (__v4sf) __C,
8173 (__mmask8) __U,
8174 _MM_FROUND_CUR_DIRECTION);
8175}
8176
Craig Topper8c18e112016-05-17 04:41:50 +00008177#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) __extension__ ({\
8178 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
8179 (__v4sf)(__m128)(B), \
8180 (__v4sf)(__m128)(C), (__mmask8)(U), \
8181 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008182
8183static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008184_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008185{
8186 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
8187 (__v4sf) __X,
8188 (__v4sf) __Y,
8189 (__mmask8) __U,
8190 _MM_FROUND_CUR_DIRECTION);
8191}
8192
Craig Topper8c18e112016-05-17 04:41:50 +00008193#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) __extension__ ({\
8194 (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
8195 (__v4sf)(__m128)(X), \
8196 (__v4sf)(__m128)(Y), (__mmask8)(U), \
8197 (int)(R)); })
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008198
8199static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008200_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008201{
Craig Topperf2023652016-10-26 05:35:38 +00008202 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8203 (__v4sf) __A,
Craig Topper9864c592016-05-15 20:10:03 +00008204 -(__v4sf) __B,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008205 (__mmask8) __U,
8206 _MM_FROUND_CUR_DIRECTION);
8207}
8208
Craig Topper8c18e112016-05-17 04:41:50 +00008209#define _mm_mask_fmsub_round_ss(W, U, A, B, R) __extension__ ({\
Craig Topperf2023652016-10-26 05:35:38 +00008210 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8211 (__v4sf)(__m128)(A), \
8212 (__v4sf)(__m128)(B), (__mmask8)(U), \
Craig Topper8c18e112016-05-17 04:41:50 +00008213 (int)(R)); })
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008214
8215static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008216_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008217{
8218 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
8219 (__v4sf) __B,
Craig Topper9864c592016-05-15 20:10:03 +00008220 -(__v4sf) __C,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008221 (__mmask8) __U,
8222 _MM_FROUND_CUR_DIRECTION);
8223}
8224
Craig Topper8c18e112016-05-17 04:41:50 +00008225#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) __extension__ ({\
8226 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
8227 (__v4sf)(__m128)(B), \
8228 -(__v4sf)(__m128)(C), (__mmask8)(U), \
8229 (int)(R)); })
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008230
8231static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008232_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008233{
Craig Topper2c8f49e2016-11-12 23:24:34 +00008234 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008235 (__v4sf) __X,
Craig Topper2c8f49e2016-11-12 23:24:34 +00008236 (__v4sf) __Y,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008237 (__mmask8) __U,
8238 _MM_FROUND_CUR_DIRECTION);
8239}
8240
Craig Topper8c18e112016-05-17 04:41:50 +00008241#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) __extension__ ({\
Craig Topper2c8f49e2016-11-12 23:24:34 +00008242 (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
Craig Topper8c18e112016-05-17 04:41:50 +00008243 (__v4sf)(__m128)(X), \
Craig Topper2c8f49e2016-11-12 23:24:34 +00008244 (__v4sf)(__m128)(Y), (__mmask8)(U), \
Craig Topper8c18e112016-05-17 04:41:50 +00008245 (int)(R)); })
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008246
8247static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008248_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008249{
Craig Topperf2023652016-10-26 05:35:38 +00008250 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8251 -(__v4sf) __A,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008252 (__v4sf) __B,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008253 (__mmask8) __U,
8254 _MM_FROUND_CUR_DIRECTION);
8255}
8256
Craig Topper8c18e112016-05-17 04:41:50 +00008257#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) __extension__ ({\
Craig Topperf2023652016-10-26 05:35:38 +00008258 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8259 -(__v4sf)(__m128)(A), \
8260 (__v4sf)(__m128)(B), (__mmask8)(U), \
Craig Topper8c18e112016-05-17 04:41:50 +00008261 (int)(R)); })
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008262
8263static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008264_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008265{
Craig Topper9864c592016-05-15 20:10:03 +00008266 return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008267 (__v4sf) __B,
8268 (__v4sf) __C,
8269 (__mmask8) __U,
8270 _MM_FROUND_CUR_DIRECTION);
8271}
8272
Craig Topper8c18e112016-05-17 04:41:50 +00008273#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) __extension__ ({\
8274 (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
8275 (__v4sf)(__m128)(B), \
8276 (__v4sf)(__m128)(C), (__mmask8)(U), \
8277 (int)(R)); })
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008278
8279static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008280_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008281{
Craig Topper9864c592016-05-15 20:10:03 +00008282 return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008283 (__v4sf) __X,
8284 (__v4sf) __Y,
8285 (__mmask8) __U,
8286 _MM_FROUND_CUR_DIRECTION);
8287}
8288
Craig Topper8c18e112016-05-17 04:41:50 +00008289#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) __extension__({\
8290 (__m128)__builtin_ia32_vfmaddss3_mask3(-(__v4sf)(__m128)(W), \
8291 (__v4sf)(__m128)(X), \
8292 (__v4sf)(__m128)(Y), (__mmask8)(U), \
8293 (int)(R)); })
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008294
8295static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008296_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008297{
Craig Topperf2023652016-10-26 05:35:38 +00008298 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8299 -(__v4sf) __A,
Craig Topper9864c592016-05-15 20:10:03 +00008300 -(__v4sf) __B,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008301 (__mmask8) __U,
8302 _MM_FROUND_CUR_DIRECTION);
8303}
8304
Craig Topper8c18e112016-05-17 04:41:50 +00008305#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) __extension__ ({\
Craig Topperf2023652016-10-26 05:35:38 +00008306 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8307 -(__v4sf)(__m128)(A), \
8308 -(__v4sf)(__m128)(B), (__mmask8)(U), \
Craig Topper8c18e112016-05-17 04:41:50 +00008309 (int)(R)); })
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008310
8311static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008312_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008313{
Craig Topper9864c592016-05-15 20:10:03 +00008314 return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008315 (__v4sf) __B,
Craig Topper9864c592016-05-15 20:10:03 +00008316 -(__v4sf) __C,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008317 (__mmask8) __U,
8318 _MM_FROUND_CUR_DIRECTION);
8319}
8320
Craig Topper8c18e112016-05-17 04:41:50 +00008321#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) __extension__ ({\
8322 (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
8323 (__v4sf)(__m128)(B), \
8324 -(__v4sf)(__m128)(C), (__mmask8)(U), \
8325 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008326
8327static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008328_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008329{
Craig Topper2c8f49e2016-11-12 23:24:34 +00008330 return (__m128) __builtin_ia32_vfnmsubss3_mask3 ((__v4sf) __W,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008331 (__v4sf) __X,
Craig Topper2c8f49e2016-11-12 23:24:34 +00008332 (__v4sf) __Y,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008333 (__mmask8) __U,
8334 _MM_FROUND_CUR_DIRECTION);
8335}
8336
Craig Topper8c18e112016-05-17 04:41:50 +00008337#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) __extension__({\
Craig Topper2c8f49e2016-11-12 23:24:34 +00008338 (__m128)__builtin_ia32_vfnmsubss3_mask3((__v4sf)(__m128)(W), \
Craig Topper8c18e112016-05-17 04:41:50 +00008339 (__v4sf)(__m128)(X), \
Craig Topper2c8f49e2016-11-12 23:24:34 +00008340 (__v4sf)(__m128)(Y), (__mmask8)(U), \
Craig Topper8c18e112016-05-17 04:41:50 +00008341 (int)(R)); })
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008342
Craig Topper58187d32016-05-17 04:41:29 +00008343static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008344_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008345{
Craig Topperf2023652016-10-26 05:35:38 +00008346 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8347 (__v2df) __A,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008348 (__v2df) __B,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008349 (__mmask8) __U,
8350 _MM_FROUND_CUR_DIRECTION);
8351}
8352
Craig Topper8c18e112016-05-17 04:41:50 +00008353#define _mm_mask_fmadd_round_sd(W, U, A, B, R) __extension__({\
Craig Topperf2023652016-10-26 05:35:38 +00008354 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8355 (__v2df)(__m128d)(A), \
8356 (__v2df)(__m128d)(B), (__mmask8)(U), \
Craig Topper8c18e112016-05-17 04:41:50 +00008357 (int)(R)); })
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008358
Craig Topper58187d32016-05-17 04:41:29 +00008359static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008360_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008361{
Craig Topper58187d32016-05-17 04:41:29 +00008362 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008363 (__v2df) __B,
8364 (__v2df) __C,
8365 (__mmask8) __U,
8366 _MM_FROUND_CUR_DIRECTION);
8367}
8368
Craig Topper8c18e112016-05-17 04:41:50 +00008369#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) __extension__ ({\
8370 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8371 (__v2df)(__m128d)(B), \
8372 (__v2df)(__m128d)(C), (__mmask8)(U), \
8373 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008374
Craig Topper58187d32016-05-17 04:41:29 +00008375static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008376_mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008377{
Craig Topper58187d32016-05-17 04:41:29 +00008378 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008379 (__v2df) __X,
8380 (__v2df) __Y,
8381 (__mmask8) __U,
8382 _MM_FROUND_CUR_DIRECTION);
8383}
8384
Craig Topper8c18e112016-05-17 04:41:50 +00008385#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) __extension__ ({\
8386 (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8387 (__v2df)(__m128d)(X), \
8388 (__v2df)(__m128d)(Y), (__mmask8)(U), \
8389 (int)(R)); })
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008390
Craig Topper58187d32016-05-17 04:41:29 +00008391static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008392_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008393{
Craig Topperf2023652016-10-26 05:35:38 +00008394 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8395 (__v2df) __A,
Craig Topper9864c592016-05-15 20:10:03 +00008396 -(__v2df) __B,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008397 (__mmask8) __U,
8398 _MM_FROUND_CUR_DIRECTION);
8399}
8400
Craig Topper8c18e112016-05-17 04:41:50 +00008401#define _mm_mask_fmsub_round_sd(W, U, A, B, R) __extension__ ({\
Craig Topperf2023652016-10-26 05:35:38 +00008402 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8403 (__v2df)(__m128d)(A), \
8404 -(__v2df)(__m128d)(B), (__mmask8)(U), \
Craig Topper8c18e112016-05-17 04:41:50 +00008405 (int)(R)); })
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008406
Craig Topper58187d32016-05-17 04:41:29 +00008407static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008408_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008409{
Craig Topper58187d32016-05-17 04:41:29 +00008410 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008411 (__v2df) __B,
Craig Topper9864c592016-05-15 20:10:03 +00008412 -(__v2df) __C,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008413 (__mmask8) __U,
8414 _MM_FROUND_CUR_DIRECTION);
8415}
8416
Craig Topper8c18e112016-05-17 04:41:50 +00008417#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) __extension__ ({\
8418 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8419 (__v2df)(__m128d)(B), \
8420 -(__v2df)(__m128d)(C), \
8421 (__mmask8)(U), (int)(R)); })
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008422
Craig Topper58187d32016-05-17 04:41:29 +00008423static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008424_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008425{
Craig Topper2c8f49e2016-11-12 23:24:34 +00008426 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008427 (__v2df) __X,
Craig Topper2c8f49e2016-11-12 23:24:34 +00008428 (__v2df) __Y,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008429 (__mmask8) __U,
8430 _MM_FROUND_CUR_DIRECTION);
8431}
8432
Craig Topper8c18e112016-05-17 04:41:50 +00008433#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) __extension__ ({\
Craig Topper2c8f49e2016-11-12 23:24:34 +00008434 (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
Craig Topper8c18e112016-05-17 04:41:50 +00008435 (__v2df)(__m128d)(X), \
Craig Topper2c8f49e2016-11-12 23:24:34 +00008436 (__v2df)(__m128d)(Y), \
Craig Topper8c18e112016-05-17 04:41:50 +00008437 (__mmask8)(U), (int)(R)); })
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008438
Craig Topper58187d32016-05-17 04:41:29 +00008439static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008440_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008441{
Craig Topperf2023652016-10-26 05:35:38 +00008442 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8443 -(__v2df) __A,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008444 (__v2df) __B,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008445 (__mmask8) __U,
8446 _MM_FROUND_CUR_DIRECTION);
8447}
8448
Craig Topper8c18e112016-05-17 04:41:50 +00008449#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) __extension__ ({\
Craig Topperf2023652016-10-26 05:35:38 +00008450 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8451 -(__v2df)(__m128d)(A), \
8452 (__v2df)(__m128d)(B), (__mmask8)(U), \
Craig Topper8c18e112016-05-17 04:41:50 +00008453 (int)(R)); })
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008454
Craig Topper58187d32016-05-17 04:41:29 +00008455static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008456_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008457{
Craig Topper58187d32016-05-17 04:41:29 +00008458 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008459 (__v2df) __B,
8460 (__v2df) __C,
8461 (__mmask8) __U,
8462 _MM_FROUND_CUR_DIRECTION);
8463}
8464
Craig Topper8c18e112016-05-17 04:41:50 +00008465#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) __extension__ ({\
8466 (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
8467 (__v2df)(__m128d)(B), \
8468 (__v2df)(__m128d)(C), (__mmask8)(U), \
8469 (int)(R)); })
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008470
Craig Topper58187d32016-05-17 04:41:29 +00008471static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008472_mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008473{
Craig Topper58187d32016-05-17 04:41:29 +00008474 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) __W,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008475 (__v2df) __X,
8476 (__v2df) __Y,
8477 (__mmask8) __U,
8478 _MM_FROUND_CUR_DIRECTION);
8479}
8480
Craig Topper8c18e112016-05-17 04:41:50 +00008481#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) __extension__({\
8482 (__m128d)__builtin_ia32_vfmaddsd3_mask3(-(__v2df)(__m128d)(W), \
8483 (__v2df)(__m128d)(X), \
8484 (__v2df)(__m128d)(Y), (__mmask8)(U), \
8485 (int)(R)); })
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008486
Craig Topper58187d32016-05-17 04:41:29 +00008487static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008488_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008489{
Craig Topperf2023652016-10-26 05:35:38 +00008490 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8491 -(__v2df) __A,
Craig Topper9864c592016-05-15 20:10:03 +00008492 -(__v2df) __B,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008493 (__mmask8) __U,
8494 _MM_FROUND_CUR_DIRECTION);
8495}
8496
Craig Topper8c18e112016-05-17 04:41:50 +00008497#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) __extension__ ({\
Craig Topperf2023652016-10-26 05:35:38 +00008498 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8499 -(__v2df)(__m128d)(A), \
8500 -(__v2df)(__m128d)(B), (__mmask8)(U), \
Craig Topper8c18e112016-05-17 04:41:50 +00008501 (int)(R)); })
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008502
Craig Topper58187d32016-05-17 04:41:29 +00008503static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008504_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008505{
Craig Topper58187d32016-05-17 04:41:29 +00008506 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008507 (__v2df) __B,
Craig Topper9864c592016-05-15 20:10:03 +00008508 -(__v2df) __C,
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008509 (__mmask8) __U,
8510 _MM_FROUND_CUR_DIRECTION);
8511}
8512
Craig Topper8c18e112016-05-17 04:41:50 +00008513#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) __extension__ ({\
8514 (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
8515 (__v2df)(__m128d)(B), \
8516 -(__v2df)(__m128d)(C), \
8517 (__mmask8)(U), \
8518 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008519
Craig Topper58187d32016-05-17 04:41:29 +00008520static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008521_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008522{
Craig Topper2c8f49e2016-11-12 23:24:34 +00008523 return (__m128d) __builtin_ia32_vfnmsubsd3_mask3 ((__v2df) (__W),
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008524 (__v2df) __X,
Craig Topper2c8f49e2016-11-12 23:24:34 +00008525 (__v2df) (__Y),
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008526 (__mmask8) __U,
8527 _MM_FROUND_CUR_DIRECTION);
8528}
8529
Craig Topper8c18e112016-05-17 04:41:50 +00008530#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) __extension__({\
Craig Topper2c8f49e2016-11-12 23:24:34 +00008531 (__m128d)__builtin_ia32_vfnmsubsd3_mask3((__v2df)(__m128d)(W), \
Craig Topper8c18e112016-05-17 04:41:50 +00008532 (__v2df)(__m128d)(X), \
Craig Topper2c8f49e2016-11-12 23:24:34 +00008533 (__v2df)(__m128d)(Y), \
Craig Topper8c18e112016-05-17 04:41:50 +00008534 (__mmask8)(U), (int)(R)); })
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008535
Simon Pilgrim30db8112016-07-04 13:34:44 +00008536#define _mm512_permutex_pd(X, C) __extension__ ({ \
8537 (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
8538 (__v8df)_mm512_undefined_pd(), \
Craig Topper2a383c92016-07-04 22:18:01 +00008539 0 + (((C) >> 0) & 0x3), \
8540 0 + (((C) >> 2) & 0x3), \
8541 0 + (((C) >> 4) & 0x3), \
8542 0 + (((C) >> 6) & 0x3), \
8543 4 + (((C) >> 0) & 0x3), \
8544 4 + (((C) >> 2) & 0x3), \
8545 4 + (((C) >> 4) & 0x3), \
8546 4 + (((C) >> 6) & 0x3)); })
Michael Zuckerman8938e832016-04-25 05:32:35 +00008547
Simon Pilgrim30db8112016-07-04 13:34:44 +00008548#define _mm512_mask_permutex_pd(W, U, X, C) __extension__ ({ \
8549 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8550 (__v8df)_mm512_permutex_pd((X), (C)), \
8551 (__v8df)(__m512d)(W)); })
Michael Zuckerman8938e832016-04-25 05:32:35 +00008552
Simon Pilgrim30db8112016-07-04 13:34:44 +00008553#define _mm512_maskz_permutex_pd(U, X, C) __extension__ ({ \
8554 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8555 (__v8df)_mm512_permutex_pd((X), (C)), \
8556 (__v8df)_mm512_setzero_pd()); })
Michael Zuckerman8938e832016-04-25 05:32:35 +00008557
Simon Pilgrim30db8112016-07-04 13:34:44 +00008558#define _mm512_permutex_epi64(X, C) __extension__ ({ \
8559 (__m512i)__builtin_shufflevector((__v8di)(__m512i)(X), \
8560 (__v8di)_mm512_undefined_epi32(), \
Craig Topper2a383c92016-07-04 22:18:01 +00008561 0 + (((C) >> 0) & 0x3), \
8562 0 + (((C) >> 2) & 0x3), \
8563 0 + (((C) >> 4) & 0x3), \
8564 0 + (((C) >> 6) & 0x3), \
8565 4 + (((C) >> 0) & 0x3), \
8566 4 + (((C) >> 2) & 0x3), \
8567 4 + (((C) >> 4) & 0x3), \
8568 4 + (((C) >> 6) & 0x3)); })
Michael Zuckerman8938e832016-04-25 05:32:35 +00008569
Simon Pilgrim30db8112016-07-04 13:34:44 +00008570#define _mm512_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
8571 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8572 (__v8di)_mm512_permutex_epi64((X), (C)), \
8573 (__v8di)(__m512i)(W)); })
Michael Zuckerman8938e832016-04-25 05:32:35 +00008574
Simon Pilgrim30db8112016-07-04 13:34:44 +00008575#define _mm512_maskz_permutex_epi64(U, X, C) __extension__ ({ \
8576 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8577 (__v8di)_mm512_permutex_epi64((X), (C)), \
8578 (__v8di)_mm512_setzero_si512()); })
Michael Zuckerman8938e832016-04-25 05:32:35 +00008579
8580static __inline__ __m512d __DEFAULT_FN_ATTRS
8581_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
8582{
Craig Topper55b40672018-05-20 23:34:10 +00008583 return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X);
Michael Zuckerman8938e832016-04-25 05:32:35 +00008584}
8585
8586static __inline__ __m512d __DEFAULT_FN_ATTRS
8587_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
8588{
Craig Topper55b40672018-05-20 23:34:10 +00008589 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8590 (__v8df)_mm512_permutexvar_pd(__X, __Y),
8591 (__v8df)__W);
Michael Zuckerman8938e832016-04-25 05:32:35 +00008592}
8593
8594static __inline__ __m512d __DEFAULT_FN_ATTRS
8595_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
8596{
Craig Topper55b40672018-05-20 23:34:10 +00008597 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8598 (__v8df)_mm512_permutexvar_pd(__X, __Y),
8599 (__v8df)_mm512_setzero_pd());
Michael Zuckerman8938e832016-04-25 05:32:35 +00008600}
8601
8602static __inline__ __m512i __DEFAULT_FN_ATTRS
8603_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
8604{
Craig Topper55b40672018-05-20 23:34:10 +00008605 return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
8606}
8607
8608static __inline__ __m512i __DEFAULT_FN_ATTRS
8609_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
8610{
8611 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8612 (__v8di)_mm512_permutexvar_epi64(__X, __Y),
8613 (__v8di)_mm512_setzero_si512());
Michael Zuckerman8938e832016-04-25 05:32:35 +00008614}
8615
8616static __inline__ __m512i __DEFAULT_FN_ATTRS
8617_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
8618 __m512i __Y)
8619{
Craig Topper55b40672018-05-20 23:34:10 +00008620 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8621 (__v8di)_mm512_permutexvar_epi64(__X, __Y),
8622 (__v8di)__W);
Michael Zuckerman8938e832016-04-25 05:32:35 +00008623}
8624
8625static __inline__ __m512 __DEFAULT_FN_ATTRS
8626_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
8627{
Craig Topper55b40672018-05-20 23:34:10 +00008628 return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
Michael Zuckerman8938e832016-04-25 05:32:35 +00008629}
8630
8631static __inline__ __m512 __DEFAULT_FN_ATTRS
8632_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
8633{
Craig Topper55b40672018-05-20 23:34:10 +00008634 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8635 (__v16sf)_mm512_permutexvar_ps(__X, __Y),
8636 (__v16sf)__W);
Michael Zuckerman8938e832016-04-25 05:32:35 +00008637}
8638
8639static __inline__ __m512 __DEFAULT_FN_ATTRS
8640_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
8641{
Craig Topper55b40672018-05-20 23:34:10 +00008642 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8643 (__v16sf)_mm512_permutexvar_ps(__X, __Y),
8644 (__v16sf)_mm512_setzero_ps());
Michael Zuckerman8938e832016-04-25 05:32:35 +00008645}
8646
8647static __inline__ __m512i __DEFAULT_FN_ATTRS
8648_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
8649{
Craig Topper55b40672018-05-20 23:34:10 +00008650 return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
Michael Zuckerman8938e832016-04-25 05:32:35 +00008651}
8652
Igor Bregerf050b792017-03-19 08:27:16 +00008653#define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
8654
Michael Zuckerman8938e832016-04-25 05:32:35 +00008655static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper55b40672018-05-20 23:34:10 +00008656_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
8657{
8658 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8659 (__v16si)_mm512_permutexvar_epi32(__X, __Y),
8660 (__v16si)_mm512_setzero_si512());
8661}
8662
8663static __inline__ __m512i __DEFAULT_FN_ATTRS
Michael Zuckerman8938e832016-04-25 05:32:35 +00008664_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
8665 __m512i __Y)
8666{
Craig Topper55b40672018-05-20 23:34:10 +00008667 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8668 (__v16si)_mm512_permutexvar_epi32(__X, __Y),
8669 (__v16si)__W);
Michael Zuckerman8938e832016-04-25 05:32:35 +00008670}
8671
Igor Bregerf050b792017-03-19 08:27:16 +00008672#define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
8673
Michael Zuckermanfa508e82016-04-25 16:42:29 +00008674static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8675_mm512_kand (__mmask16 __A, __mmask16 __B)
8676{
8677 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
8678}
Michael Zuckerman8938e832016-04-25 05:32:35 +00008679
Michael Zuckermanfa508e82016-04-25 16:42:29 +00008680static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8681_mm512_kandn (__mmask16 __A, __mmask16 __B)
8682{
8683 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
8684}
8685
8686static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8687_mm512_kor (__mmask16 __A, __mmask16 __B)
8688{
8689 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
8690}
8691
8692static __inline__ int __DEFAULT_FN_ATTRS
8693_mm512_kortestc (__mmask16 __A, __mmask16 __B)
8694{
Craig Topper91f23d92016-05-16 01:09:16 +00008695 return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
Michael Zuckermanfa508e82016-04-25 16:42:29 +00008696}
8697
8698static __inline__ int __DEFAULT_FN_ATTRS
8699_mm512_kortestz (__mmask16 __A, __mmask16 __B)
8700{
Craig Topper91f23d92016-05-16 01:09:16 +00008701 return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
Michael Zuckermanfa508e82016-04-25 16:42:29 +00008702}
8703
8704static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8705_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
8706{
Craig Topperf517f1a2018-01-14 19:23:50 +00008707 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
Michael Zuckermanfa508e82016-04-25 16:42:29 +00008708}
8709
8710static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8711_mm512_kxnor (__mmask16 __A, __mmask16 __B)
8712{
8713 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
8714}
8715
8716static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8717_mm512_kxor (__mmask16 __A, __mmask16 __B)
8718{
8719 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
8720}
Michael Zuckerman8938e832016-04-25 05:32:35 +00008721
Michael Zuckerman7c85a8c2016-04-27 10:44:15 +00008722static __inline__ void __DEFAULT_FN_ATTRS
8723_mm512_stream_si512 (__m512i * __P, __m512i __A)
8724{
Simon Pilgrimc14865c2017-07-29 15:33:34 +00008725 typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8726 __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
Michael Zuckerman7c85a8c2016-04-27 10:44:15 +00008727}
8728
8729static __inline__ __m512i __DEFAULT_FN_ATTRS
Simon Pilgrim1ba2bf22017-09-05 10:06:41 +00008730_mm512_stream_load_si512 (void const *__P)
Michael Zuckerman7c85a8c2016-04-27 10:44:15 +00008731{
Simon Pilgrimc14865c2017-07-29 15:33:34 +00008732 typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8733 return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
Michael Zuckerman7c85a8c2016-04-27 10:44:15 +00008734}
8735
8736static __inline__ void __DEFAULT_FN_ATTRS
8737_mm512_stream_pd (double *__P, __m512d __A)
8738{
Simon Pilgrimc14865c2017-07-29 15:33:34 +00008739 typedef __v8df __v8df_aligned __attribute__((aligned(64)));
8740 __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
Michael Zuckerman7c85a8c2016-04-27 10:44:15 +00008741}
8742
8743static __inline__ void __DEFAULT_FN_ATTRS
8744_mm512_stream_ps (float *__P, __m512 __A)
8745{
Simon Pilgrimc14865c2017-07-29 15:33:34 +00008746 typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
8747 __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
Michael Zuckerman7c85a8c2016-04-27 10:44:15 +00008748}
8749
Michael Zuckerman41f5a372016-04-29 08:52:02 +00008750static __inline__ __m512d __DEFAULT_FN_ATTRS
8751_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
8752{
8753 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8754 (__v8df) __W,
8755 (__mmask8) __U);
8756}
8757
8758static __inline__ __m512d __DEFAULT_FN_ATTRS
8759_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
8760{
8761 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8762 (__v8df)
8763 _mm512_setzero_pd (),
8764 (__mmask8) __U);
8765}
8766
8767static __inline__ __m512i __DEFAULT_FN_ATTRS
8768_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8769{
8770 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8771 (__v8di) __W,
8772 (__mmask8) __U);
8773}
8774
8775static __inline__ __m512i __DEFAULT_FN_ATTRS
8776_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
8777{
8778 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8779 (__v8di)
8780 _mm512_setzero_si512 (),
8781 (__mmask8) __U);
8782}
8783
8784static __inline__ __m512 __DEFAULT_FN_ATTRS
8785_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
8786{
8787 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8788 (__v16sf) __W,
8789 (__mmask16) __U);
8790}
8791
8792static __inline__ __m512 __DEFAULT_FN_ATTRS
8793_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
8794{
8795 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8796 (__v16sf)
8797 _mm512_setzero_ps (),
8798 (__mmask16) __U);
8799}
8800
8801static __inline__ __m512i __DEFAULT_FN_ATTRS
8802_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8803{
8804 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8805 (__v16si) __W,
8806 (__mmask16) __U);
8807}
8808
8809static __inline__ __m512i __DEFAULT_FN_ATTRS
8810_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
8811{
8812 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8813 (__v16si)
8814 _mm512_setzero_si512 (),
8815 (__mmask16) __U);
8816}
8817
Craig Topper8c18e112016-05-17 04:41:50 +00008818#define _mm_cmp_round_ss_mask(X, Y, P, R) __extension__ ({ \
8819 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8820 (__v4sf)(__m128)(Y), (int)(P), \
8821 (__mmask8)-1, (int)(R)); })
Michael Zuckerman0b9d1052016-04-29 11:01:16 +00008822
Craig Topper8c18e112016-05-17 04:41:50 +00008823#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) __extension__ ({ \
8824 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8825 (__v4sf)(__m128)(Y), (int)(P), \
8826 (__mmask8)(M), (int)(R)); })
Michael Zuckerman0b9d1052016-04-29 11:01:16 +00008827
Craig Topper8c18e112016-05-17 04:41:50 +00008828#define _mm_cmp_ss_mask(X, Y, P) __extension__ ({ \
8829 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8830 (__v4sf)(__m128)(Y), (int)(P), \
8831 (__mmask8)-1, \
8832 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckerman0b9d1052016-04-29 11:01:16 +00008833
Craig Topper8c18e112016-05-17 04:41:50 +00008834#define _mm_mask_cmp_ss_mask(M, X, Y, P) __extension__ ({ \
8835 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8836 (__v4sf)(__m128)(Y), (int)(P), \
8837 (__mmask8)(M), \
8838 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckerman0b9d1052016-04-29 11:01:16 +00008839
Craig Topper8c18e112016-05-17 04:41:50 +00008840#define _mm_cmp_round_sd_mask(X, Y, P, R) __extension__ ({ \
8841 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8842 (__v2df)(__m128d)(Y), (int)(P), \
8843 (__mmask8)-1, (int)(R)); })
Michael Zuckerman0b9d1052016-04-29 11:01:16 +00008844
Craig Topper8c18e112016-05-17 04:41:50 +00008845#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) __extension__ ({ \
8846 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8847 (__v2df)(__m128d)(Y), (int)(P), \
8848 (__mmask8)(M), (int)(R)); })
Michael Zuckerman0b9d1052016-04-29 11:01:16 +00008849
Craig Topper8c18e112016-05-17 04:41:50 +00008850#define _mm_cmp_sd_mask(X, Y, P) __extension__ ({ \
8851 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8852 (__v2df)(__m128d)(Y), (int)(P), \
8853 (__mmask8)-1, \
8854 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckerman0b9d1052016-04-29 11:01:16 +00008855
Craig Topper8c18e112016-05-17 04:41:50 +00008856#define _mm_mask_cmp_sd_mask(M, X, Y, P) __extension__ ({ \
8857 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8858 (__v2df)(__m128d)(Y), (int)(P), \
8859 (__mmask8)(M), \
8860 _MM_FROUND_CUR_DIRECTION); })
Michael Zuckerman0b9d1052016-04-29 11:01:16 +00008861
Uriel Korach5b2b71d2017-11-13 12:50:52 +00008862/* Bit Test */
8863
8864static __inline __mmask16 __DEFAULT_FN_ATTRS
8865_mm512_test_epi32_mask (__m512i __A, __m512i __B)
8866{
8867 return _mm512_cmpneq_epi32_mask (_mm512_and_epi32(__A, __B),
8868 _mm512_setzero_epi32());
8869}
8870
8871static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8872_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8873{
8874 return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8875 _mm512_setzero_epi32());
8876}
8877
8878static __inline __mmask8 __DEFAULT_FN_ATTRS
8879_mm512_test_epi64_mask (__m512i __A, __m512i __B)
8880{
8881 return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B),
8882 _mm512_setzero_epi32());
8883}
8884
8885static __inline__ __mmask8 __DEFAULT_FN_ATTRS
8886_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8887{
8888 return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8889 _mm512_setzero_epi32());
8890}
8891
8892static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8893_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
8894{
8895 return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B),
8896 _mm512_setzero_epi32());
8897}
8898
8899static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8900_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8901{
8902 return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8903 _mm512_setzero_epi32());
8904}
8905
8906static __inline__ __mmask8 __DEFAULT_FN_ATTRS
8907_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
8908{
8909 return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B),
8910 _mm512_setzero_epi32());
8911}
8912
8913static __inline__ __mmask8 __DEFAULT_FN_ATTRS
8914_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8915{
8916 return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8917 _mm512_setzero_epi32());
8918}
8919
Michael Zuckermanac1e5192016-05-01 14:43:43 +00008920static __inline__ __m512 __DEFAULT_FN_ATTRS
8921_mm512_movehdup_ps (__m512 __A)
8922{
Simon Pilgrim275d7212016-07-02 17:16:25 +00008923 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8924 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
Michael Zuckermanac1e5192016-05-01 14:43:43 +00008925}
8926
8927static __inline__ __m512 __DEFAULT_FN_ATTRS
8928_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
8929{
Simon Pilgrim275d7212016-07-02 17:16:25 +00008930 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8931 (__v16sf)_mm512_movehdup_ps(__A),
8932 (__v16sf)__W);
Michael Zuckermanac1e5192016-05-01 14:43:43 +00008933}
8934
8935static __inline__ __m512 __DEFAULT_FN_ATTRS
8936_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
8937{
Simon Pilgrim275d7212016-07-02 17:16:25 +00008938 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8939 (__v16sf)_mm512_movehdup_ps(__A),
8940 (__v16sf)_mm512_setzero_ps());
Michael Zuckermanac1e5192016-05-01 14:43:43 +00008941}
8942
8943static __inline__ __m512 __DEFAULT_FN_ATTRS
8944_mm512_moveldup_ps (__m512 __A)
8945{
Simon Pilgrim275d7212016-07-02 17:16:25 +00008946 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8947 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
Michael Zuckermanac1e5192016-05-01 14:43:43 +00008948}
8949
8950static __inline__ __m512 __DEFAULT_FN_ATTRS
8951_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
8952{
Simon Pilgrim275d7212016-07-02 17:16:25 +00008953 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8954 (__v16sf)_mm512_moveldup_ps(__A),
8955 (__v16sf)__W);
Michael Zuckermanac1e5192016-05-01 14:43:43 +00008956}
8957
8958static __inline__ __m512 __DEFAULT_FN_ATTRS
8959_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
8960{
Simon Pilgrim275d7212016-07-02 17:16:25 +00008961 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8962 (__v16sf)_mm512_moveldup_ps(__A),
8963 (__v16sf)_mm512_setzero_ps());
Michael Zuckermanac1e5192016-05-01 14:43:43 +00008964}
8965
Michael Zuckerman9e43ccf2016-10-05 12:56:06 +00008966static __inline__ __m128 __DEFAULT_FN_ATTRS
8967_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8968{
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00008969 __m128 res = __A;
Ayman Musae60a41c2016-11-08 12:00:30 +00008970 res[0] = (__U & 1) ? __B[0] : __W[0];
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00008971 return res;
Michael Zuckerman9e43ccf2016-10-05 12:56:06 +00008972}
8973
8974static __inline__ __m128 __DEFAULT_FN_ATTRS
8975_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
8976{
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00008977 __m128 res = __A;
8978 res[0] = (__U & 1) ? __B[0] : 0;
8979 return res;
Michael Zuckerman9e43ccf2016-10-05 12:56:06 +00008980}
8981
8982static __inline__ __m128d __DEFAULT_FN_ATTRS
8983_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8984{
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00008985 __m128d res = __A;
Ayman Musae60a41c2016-11-08 12:00:30 +00008986 res[0] = (__U & 1) ? __B[0] : __W[0];
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00008987 return res;
Michael Zuckerman9e43ccf2016-10-05 12:56:06 +00008988}
8989
8990static __inline__ __m128d __DEFAULT_FN_ATTRS
8991_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
8992{
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00008993 __m128d res = __A;
8994 res[0] = (__U & 1) ? __B[0] : 0;
8995 return res;
Ayman Musae60a41c2016-11-08 12:00:30 +00008996}
8997
8998static __inline__ void __DEFAULT_FN_ATTRS
8999_mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
9000{
Craig Topper74ac0ed2018-05-10 05:43:43 +00009001 __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1);
Ayman Musae60a41c2016-11-08 12:00:30 +00009002}
9003
9004static __inline__ void __DEFAULT_FN_ATTRS
9005_mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
9006{
Craig Topper74ac0ed2018-05-10 05:43:43 +00009007 __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1);
Ayman Musae60a41c2016-11-08 12:00:30 +00009008}
9009
9010static __inline__ __m128 __DEFAULT_FN_ATTRS
9011_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
9012{
9013 __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
9014 (__v4sf) {0.0, 0.0, 0.0, 0.0},
9015 0, 4, 4, 4);
9016
Craig Topper74ac0ed2018-05-10 05:43:43 +00009017 return (__m128) __builtin_ia32_loadss128_mask ((__v4sf *) __A, src, __U & 1);
Ayman Musae60a41c2016-11-08 12:00:30 +00009018}
9019
9020static __inline__ __m128 __DEFAULT_FN_ATTRS
9021_mm_maskz_load_ss (__mmask8 __U, const float* __A)
9022{
Craig Topper74ac0ed2018-05-10 05:43:43 +00009023 return (__m128)__builtin_ia32_loadss128_mask ((__v4sf *) __A,
9024 (__v4sf) _mm_setzero_ps(),
9025 __U & 1);
Ayman Musae60a41c2016-11-08 12:00:30 +00009026}
9027
9028static __inline__ __m128d __DEFAULT_FN_ATTRS
9029_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
9030{
9031 __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
9032 (__v2df) {0.0, 0.0}, 0, 2);
9033
Craig Topper74ac0ed2018-05-10 05:43:43 +00009034 return (__m128d) __builtin_ia32_loadsd128_mask ((__v2df *) __A, src, __U & 1);
Ayman Musae60a41c2016-11-08 12:00:30 +00009035}
9036
9037static __inline__ __m128d __DEFAULT_FN_ATTRS
9038_mm_maskz_load_sd (__mmask8 __U, const double* __A)
9039{
Craig Topper74ac0ed2018-05-10 05:43:43 +00009040 return (__m128d) __builtin_ia32_loadsd128_mask ((__v2df *) __A,
9041 (__v2df) _mm_setzero_pd(),
9042 __U & 1);
Michael Zuckerman9e43ccf2016-10-05 12:56:06 +00009043}
9044
Craig Topper8c18e112016-05-17 04:41:50 +00009045#define _mm512_shuffle_epi32(A, I) __extension__ ({ \
Craig Topper7cc92632016-06-11 12:50:19 +00009046 (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
Craig Topper2a383c92016-07-04 22:18:01 +00009047 (__v16si)_mm512_undefined_epi32(), \
9048 0 + (((I) >> 0) & 0x3), \
9049 0 + (((I) >> 2) & 0x3), \
9050 0 + (((I) >> 4) & 0x3), \
9051 0 + (((I) >> 6) & 0x3), \
9052 4 + (((I) >> 0) & 0x3), \
9053 4 + (((I) >> 2) & 0x3), \
9054 4 + (((I) >> 4) & 0x3), \
9055 4 + (((I) >> 6) & 0x3), \
9056 8 + (((I) >> 0) & 0x3), \
9057 8 + (((I) >> 2) & 0x3), \
9058 8 + (((I) >> 4) & 0x3), \
9059 8 + (((I) >> 6) & 0x3), \
9060 12 + (((I) >> 0) & 0x3), \
9061 12 + (((I) >> 2) & 0x3), \
9062 12 + (((I) >> 4) & 0x3), \
9063 12 + (((I) >> 6) & 0x3)); })
Michael Zuckermanc62f27e2016-05-02 07:35:27 +00009064
Craig Topper8c18e112016-05-17 04:41:50 +00009065#define _mm512_mask_shuffle_epi32(W, U, A, I) __extension__ ({ \
Craig Topper7cc92632016-06-11 12:50:19 +00009066 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
9067 (__v16si)_mm512_shuffle_epi32((A), (I)), \
9068 (__v16si)(__m512i)(W)); })
Michael Zuckermanc62f27e2016-05-02 07:35:27 +00009069
Craig Topper8c18e112016-05-17 04:41:50 +00009070#define _mm512_maskz_shuffle_epi32(U, A, I) __extension__ ({ \
Craig Topper7cc92632016-06-11 12:50:19 +00009071 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
9072 (__v16si)_mm512_shuffle_epi32((A), (I)), \
9073 (__v16si)_mm512_setzero_si512()); })
Michael Zuckermanc62f27e2016-05-02 07:35:27 +00009074
Michael Zuckerman6a0e0872016-05-02 08:36:41 +00009075static __inline__ __m512d __DEFAULT_FN_ATTRS
9076_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
9077{
9078 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9079 (__v8df) __W,
9080 (__mmask8) __U);
9081}
9082
9083static __inline__ __m512d __DEFAULT_FN_ATTRS
9084_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
9085{
9086 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9087 (__v8df) _mm512_setzero_pd (),
9088 (__mmask8) __U);
9089}
9090
9091static __inline__ __m512i __DEFAULT_FN_ATTRS
9092_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9093{
9094 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9095 (__v8di) __W,
9096 (__mmask8) __U);
9097}
9098
9099static __inline__ __m512i __DEFAULT_FN_ATTRS
9100_mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A)
9101{
9102 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9103 (__v8di) _mm512_setzero_pd (),
9104 (__mmask8) __U);
9105}
9106
9107static __inline__ __m512d __DEFAULT_FN_ATTRS
9108_mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
9109{
9110 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
9111 (__v8df) __W,
9112 (__mmask8) __U);
9113}
9114
9115static __inline__ __m512d __DEFAULT_FN_ATTRS
9116_mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
9117{
9118 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
9119 (__v8df) _mm512_setzero_pd(),
9120 (__mmask8) __U);
9121}
9122
9123static __inline__ __m512i __DEFAULT_FN_ATTRS
9124_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
9125{
9126 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
9127 (__v8di) __W,
9128 (__mmask8) __U);
9129}
9130
9131static __inline__ __m512i __DEFAULT_FN_ATTRS
9132_mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
9133{
9134 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
9135 (__v8di) _mm512_setzero_pd(),
9136 (__mmask8) __U);
9137}
9138
9139static __inline__ __m512 __DEFAULT_FN_ATTRS
9140_mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
9141{
9142 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
9143 (__v16sf) __W,
9144 (__mmask16) __U);
9145}
9146
9147static __inline__ __m512 __DEFAULT_FN_ATTRS
9148_mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
9149{
9150 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
9151 (__v16sf) _mm512_setzero_ps(),
9152 (__mmask16) __U);
9153}
9154
9155static __inline__ __m512i __DEFAULT_FN_ATTRS
9156_mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
9157{
9158 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
9159 (__v16si) __W,
9160 (__mmask16) __U);
9161}
9162
9163static __inline__ __m512i __DEFAULT_FN_ATTRS
9164_mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
9165{
9166 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
9167 (__v16si) _mm512_setzero_ps(),
9168 (__mmask16) __U);
9169}
9170
9171static __inline__ __m512 __DEFAULT_FN_ATTRS
9172_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
9173{
9174 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9175 (__v16sf) __W,
9176 (__mmask16) __U);
9177}
9178
9179static __inline__ __m512 __DEFAULT_FN_ATTRS
9180_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
9181{
9182 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9183 (__v16sf) _mm512_setzero_ps(),
9184 (__mmask16) __U);
9185}
9186
9187static __inline__ __m512i __DEFAULT_FN_ATTRS
9188_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9189{
9190 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9191 (__v16si) __W,
9192 (__mmask16) __U);
9193}
9194
9195static __inline__ __m512i __DEFAULT_FN_ATTRS
9196_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
9197{
9198 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9199 (__v16si) _mm512_setzero_ps(),
9200 (__mmask16) __U);
9201}
9202
Craig Topper8c18e112016-05-17 04:41:50 +00009203#define _mm512_cvt_roundps_pd(A, R) __extension__ ({ \
9204 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9205 (__v8df)_mm512_undefined_pd(), \
9206 (__mmask8)-1, (int)(R)); })
Michael Zuckermand6e68ce2016-05-02 09:42:31 +00009207
Craig Topper8c18e112016-05-17 04:41:50 +00009208#define _mm512_mask_cvt_roundps_pd(W, U, A, R) __extension__ ({ \
9209 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9210 (__v8df)(__m512d)(W), \
9211 (__mmask8)(U), (int)(R)); })
Michael Zuckermand6e68ce2016-05-02 09:42:31 +00009212
Craig Topper8c18e112016-05-17 04:41:50 +00009213#define _mm512_maskz_cvt_roundps_pd(U, A, R) __extension__ ({ \
9214 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9215 (__v8df)_mm512_setzero_pd(), \
9216 (__mmask8)(U), (int)(R)); })
Michael Zuckermand6e68ce2016-05-02 09:42:31 +00009217
9218static __inline__ __m512d __DEFAULT_FN_ATTRS
9219_mm512_cvtps_pd (__m256 __A)
9220{
Craig Topperdaaf1052018-05-14 04:05:06 +00009221 return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
Michael Zuckermand6e68ce2016-05-02 09:42:31 +00009222}
9223
9224static __inline__ __m512d __DEFAULT_FN_ATTRS
9225_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
9226{
Craig Topper8cb261e2018-05-14 04:57:46 +00009227 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
9228 (__v8df)_mm512_cvtps_pd(__A),
9229 (__v8df)__W);
Michael Zuckermand6e68ce2016-05-02 09:42:31 +00009230}
9231
9232static __inline__ __m512d __DEFAULT_FN_ATTRS
9233_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
9234{
Craig Topper8cb261e2018-05-14 04:57:46 +00009235 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
9236 (__v8df)_mm512_cvtps_pd(__A),
9237 (__v8df)_mm512_setzero_pd());
Michael Zuckermand6e68ce2016-05-02 09:42:31 +00009238}
9239
Ayman Musa2e250e82016-09-27 14:06:32 +00009240static __inline__ __m512 __DEFAULT_FN_ATTRS
Ayman Musa17a28192016-09-27 15:37:31 +00009241_mm512_cvtpslo_pd (__m512 __A)
Ayman Musa2e250e82016-09-27 14:06:32 +00009242{
9243 return (__m512) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
9244}
9245
9246static __inline__ __m512 __DEFAULT_FN_ATTRS
Ayman Musa17a28192016-09-27 15:37:31 +00009247_mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A)
Ayman Musa2e250e82016-09-27 14:06:32 +00009248{
9249 return (__m512) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
9250}
9251
Michael Zuckerman5f0e96e2016-05-02 14:02:01 +00009252static __inline__ __m512d __DEFAULT_FN_ATTRS
9253_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
9254{
Igor Bregeraadb8762016-06-08 13:59:20 +00009255 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
9256 (__v8df) __A,
9257 (__v8df) __W);
Michael Zuckerman5f0e96e2016-05-02 14:02:01 +00009258}
9259
9260static __inline__ __m512d __DEFAULT_FN_ATTRS
9261_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
9262{
Igor Bregeraadb8762016-06-08 13:59:20 +00009263 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
9264 (__v8df) __A,
9265 (__v8df) _mm512_setzero_pd ());
Michael Zuckerman5f0e96e2016-05-02 14:02:01 +00009266}
9267
9268static __inline__ __m512 __DEFAULT_FN_ATTRS
9269_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
9270{
Igor Bregeraadb8762016-06-08 13:59:20 +00009271 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
9272 (__v16sf) __A,
9273 (__v16sf) __W);
Michael Zuckerman5f0e96e2016-05-02 14:02:01 +00009274}
9275
9276static __inline__ __m512 __DEFAULT_FN_ATTRS
9277_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
9278{
Igor Bregeraadb8762016-06-08 13:59:20 +00009279 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
9280 (__v16sf) __A,
9281 (__v16sf) _mm512_setzero_ps ());
Michael Zuckerman5f0e96e2016-05-02 14:02:01 +00009282}
9283
Michael Zuckerman708e7592016-05-03 10:42:46 +00009284static __inline__ void __DEFAULT_FN_ATTRS
9285_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9286{
9287 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9288 (__mmask8) __U);
9289}
9290
9291static __inline__ void __DEFAULT_FN_ATTRS
9292_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9293{
9294 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9295 (__mmask8) __U);
9296}
9297
9298static __inline__ void __DEFAULT_FN_ATTRS
9299_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9300{
9301 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9302 (__mmask16) __U);
9303}
9304
9305static __inline__ void __DEFAULT_FN_ATTRS
9306_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9307{
9308 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9309 (__mmask16) __U);
9310}
Michael Zuckerman5f0e96e2016-05-02 14:02:01 +00009311
Craig Topper8c18e112016-05-17 04:41:50 +00009312#define _mm_cvt_roundsd_ss(A, B, R) __extension__ ({ \
9313 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9314 (__v2df)(__m128d)(B), \
9315 (__v4sf)_mm_undefined_ps(), \
9316 (__mmask8)-1, (int)(R)); })
Michael Zuckermane6f73892016-05-04 08:55:11 +00009317
Craig Topper8c18e112016-05-17 04:41:50 +00009318#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) __extension__ ({ \
9319 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9320 (__v2df)(__m128d)(B), \
9321 (__v4sf)(__m128)(W), \
9322 (__mmask8)(U), (int)(R)); })
Michael Zuckermane6f73892016-05-04 08:55:11 +00009323
Craig Topper8c18e112016-05-17 04:41:50 +00009324#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) __extension__ ({ \
9325 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9326 (__v2df)(__m128d)(B), \
9327 (__v4sf)_mm_setzero_ps(), \
9328 (__mmask8)(U), (int)(R)); })
Michael Zuckermane6f73892016-05-04 08:55:11 +00009329
Asaf Badouh89f65762016-06-02 08:11:35 +00009330static __inline__ __m128 __DEFAULT_FN_ATTRS
Asaf Badouha0b6f8f2016-07-14 08:40:30 +00009331_mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
Asaf Badouh89f65762016-06-02 08:11:35 +00009332{
Asaf Badouha0b6f8f2016-07-14 08:40:30 +00009333 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
9334 (__v2df)(__B),
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009335 (__v4sf)(__W),
Asaf Badouha0b6f8f2016-07-14 08:40:30 +00009336 (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
Asaf Badouh89f65762016-06-02 08:11:35 +00009337}
9338
9339static __inline__ __m128 __DEFAULT_FN_ATTRS
Asaf Badouha0b6f8f2016-07-14 08:40:30 +00009340_mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
Asaf Badouh89f65762016-06-02 08:11:35 +00009341{
Asaf Badouha0b6f8f2016-07-14 08:40:30 +00009342 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
9343 (__v2df)(__B),
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009344 (__v4sf)_mm_setzero_ps(),
Asaf Badouha0b6f8f2016-07-14 08:40:30 +00009345 (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
Asaf Badouh89f65762016-06-02 08:11:35 +00009346}
9347
9348#define _mm_cvtss_i32 _mm_cvtss_si32
Asaf Badouh89f65762016-06-02 08:11:35 +00009349#define _mm_cvtsd_i32 _mm_cvtsd_si32
Asaf Badouh89f65762016-06-02 08:11:35 +00009350#define _mm_cvti32_sd _mm_cvtsi32_sd
Asaf Badouh89f65762016-06-02 08:11:35 +00009351#define _mm_cvti32_ss _mm_cvtsi32_ss
Craig Topper45db56c2016-07-21 07:38:39 +00009352#ifdef __x86_64__
9353#define _mm_cvtss_i64 _mm_cvtss_si64
9354#define _mm_cvtsd_i64 _mm_cvtsd_si64
9355#define _mm_cvti64_sd _mm_cvtsi64_sd
Asaf Badouh89f65762016-06-02 08:11:35 +00009356#define _mm_cvti64_ss _mm_cvtsi64_ss
Craig Topper45db56c2016-07-21 07:38:39 +00009357#endif
Asaf Badouh89f65762016-06-02 08:11:35 +00009358
Craig Topper45db56c2016-07-21 07:38:39 +00009359#ifdef __x86_64__
Craig Topper8c18e112016-05-17 04:41:50 +00009360#define _mm_cvt_roundi64_sd(A, B, R) __extension__ ({ \
9361 (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9362 (int)(R)); })
Michael Zuckermane6f73892016-05-04 08:55:11 +00009363
Craig Topper8c18e112016-05-17 04:41:50 +00009364#define _mm_cvt_roundsi64_sd(A, B, R) __extension__ ({ \
9365 (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9366 (int)(R)); })
Craig Topper45db56c2016-07-21 07:38:39 +00009367#endif
Michael Zuckermane6f73892016-05-04 08:55:11 +00009368
Craig Topper8c18e112016-05-17 04:41:50 +00009369#define _mm_cvt_roundsi32_ss(A, B, R) __extension__ ({ \
9370 (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
Michael Zuckermane6f73892016-05-04 08:55:11 +00009371
Craig Topper8c18e112016-05-17 04:41:50 +00009372#define _mm_cvt_roundi32_ss(A, B, R) __extension__ ({ \
9373 (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
Michael Zuckermane6f73892016-05-04 08:55:11 +00009374
Craig Topper45db56c2016-07-21 07:38:39 +00009375#ifdef __x86_64__
Craig Topper8c18e112016-05-17 04:41:50 +00009376#define _mm_cvt_roundsi64_ss(A, B, R) __extension__ ({ \
9377 (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9378 (int)(R)); })
Michael Zuckermane6f73892016-05-04 08:55:11 +00009379
Craig Topper8c18e112016-05-17 04:41:50 +00009380#define _mm_cvt_roundi64_ss(A, B, R) __extension__ ({ \
9381 (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9382 (int)(R)); })
Craig Topper45db56c2016-07-21 07:38:39 +00009383#endif
Michael Zuckermane6f73892016-05-04 08:55:11 +00009384
Craig Topper8c18e112016-05-17 04:41:50 +00009385#define _mm_cvt_roundss_sd(A, B, R) __extension__ ({ \
9386 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9387 (__v4sf)(__m128)(B), \
9388 (__v2df)_mm_undefined_pd(), \
9389 (__mmask8)-1, (int)(R)); })
Michael Zuckermane6f73892016-05-04 08:55:11 +00009390
Craig Topper8c18e112016-05-17 04:41:50 +00009391#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) __extension__ ({ \
9392 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9393 (__v4sf)(__m128)(B), \
9394 (__v2df)(__m128d)(W), \
9395 (__mmask8)(U), (int)(R)); })
Michael Zuckermane6f73892016-05-04 08:55:11 +00009396
Craig Topper8c18e112016-05-17 04:41:50 +00009397#define _mm_maskz_cvt_roundss_sd(U, A, B, R) __extension__ ({ \
9398 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9399 (__v4sf)(__m128)(B), \
9400 (__v2df)_mm_setzero_pd(), \
9401 (__mmask8)(U), (int)(R)); })
Michael Zuckermane6f73892016-05-04 08:55:11 +00009402
9403static __inline__ __m128d __DEFAULT_FN_ATTRS
Asaf Badouha0b6f8f2016-07-14 08:40:30 +00009404_mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
Asaf Badouh89f65762016-06-02 08:11:35 +00009405{
Asaf Badouha0b6f8f2016-07-14 08:40:30 +00009406 return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
9407 (__v4sf)(__B),
9408 (__v2df)(__W),
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009409 (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
Asaf Badouh89f65762016-06-02 08:11:35 +00009410}
9411
9412static __inline__ __m128d __DEFAULT_FN_ATTRS
Asaf Badouha0b6f8f2016-07-14 08:40:30 +00009413_mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
Asaf Badouh89f65762016-06-02 08:11:35 +00009414{
Asaf Badouha0b6f8f2016-07-14 08:40:30 +00009415 return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
9416 (__v4sf)(__B),
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009417 (__v2df)_mm_setzero_pd(),
9418 (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
Asaf Badouh89f65762016-06-02 08:11:35 +00009419}
9420
9421static __inline__ __m128d __DEFAULT_FN_ATTRS
Michael Zuckermane6f73892016-05-04 08:55:11 +00009422_mm_cvtu32_sd (__m128d __A, unsigned __B)
9423{
Craig Topper6fa91252018-05-13 23:03:30 +00009424 __A[0] = __B;
9425 return __A;
Michael Zuckermane6f73892016-05-04 08:55:11 +00009426}
9427
Craig Topper45db56c2016-07-21 07:38:39 +00009428#ifdef __x86_64__
Craig Topper8c18e112016-05-17 04:41:50 +00009429#define _mm_cvt_roundu64_sd(A, B, R) __extension__ ({ \
9430 (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
9431 (unsigned long long)(B), (int)(R)); })
Michael Zuckermane6f73892016-05-04 08:55:11 +00009432
9433static __inline__ __m128d __DEFAULT_FN_ATTRS
9434_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
9435{
Craig Topper6fa91252018-05-13 23:03:30 +00009436 __A[0] = __B;
9437 return __A;
Michael Zuckermane6f73892016-05-04 08:55:11 +00009438}
Craig Topper45db56c2016-07-21 07:38:39 +00009439#endif
Michael Zuckermane6f73892016-05-04 08:55:11 +00009440
Craig Topper8c18e112016-05-17 04:41:50 +00009441#define _mm_cvt_roundu32_ss(A, B, R) __extension__ ({ \
9442 (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
9443 (int)(R)); })
Michael Zuckermane6f73892016-05-04 08:55:11 +00009444
9445static __inline__ __m128 __DEFAULT_FN_ATTRS
9446_mm_cvtu32_ss (__m128 __A, unsigned __B)
9447{
Craig Topper6fa91252018-05-13 23:03:30 +00009448 __A[0] = __B;
9449 return __A;
Michael Zuckermane6f73892016-05-04 08:55:11 +00009450}
9451
Craig Topper45db56c2016-07-21 07:38:39 +00009452#ifdef __x86_64__
Craig Topper8c18e112016-05-17 04:41:50 +00009453#define _mm_cvt_roundu64_ss(A, B, R) __extension__ ({ \
9454 (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
9455 (unsigned long long)(B), (int)(R)); })
Michael Zuckermane6f73892016-05-04 08:55:11 +00009456
9457static __inline__ __m128 __DEFAULT_FN_ATTRS
9458_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
9459{
Craig Topper6fa91252018-05-13 23:03:30 +00009460 __A[0] = __B;
9461 return __A;
Michael Zuckermane6f73892016-05-04 08:55:11 +00009462}
Craig Topper45db56c2016-07-21 07:38:39 +00009463#endif
Michael Zuckermane6f73892016-05-04 08:55:11 +00009464
Michael Zuckerman13d3c002016-05-11 11:41:29 +00009465static __inline__ __m512i __DEFAULT_FN_ATTRS
9466_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
9467{
Jina Nahias3ad702a2017-09-19 11:00:27 +00009468 return (__m512i) __builtin_ia32_selectd_512(__M,
9469 (__v16si) _mm512_set1_epi32(__A),
9470 (__v16si) __O);
Michael Zuckerman13d3c002016-05-11 11:41:29 +00009471}
9472
9473static __inline__ __m512i __DEFAULT_FN_ATTRS
9474_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
9475{
Jina Nahias3ad702a2017-09-19 11:00:27 +00009476 return (__m512i) __builtin_ia32_selectq_512(__M,
9477 (__v8di) _mm512_set1_epi64(__A),
9478 (__v8di) __O);
Michael Zuckerman13d3c002016-05-11 11:41:29 +00009479}
9480
Igor Bregerf050b792017-03-19 08:27:16 +00009481static __inline __m512i __DEFAULT_FN_ATTRS
9482_mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59,
9483 char __e58, char __e57, char __e56, char __e55, char __e54, char __e53,
9484 char __e52, char __e51, char __e50, char __e49, char __e48, char __e47,
9485 char __e46, char __e45, char __e44, char __e43, char __e42, char __e41,
9486 char __e40, char __e39, char __e38, char __e37, char __e36, char __e35,
9487 char __e34, char __e33, char __e32, char __e31, char __e30, char __e29,
9488 char __e28, char __e27, char __e26, char __e25, char __e24, char __e23,
9489 char __e22, char __e21, char __e20, char __e19, char __e18, char __e17,
9490 char __e16, char __e15, char __e14, char __e13, char __e12, char __e11,
9491 char __e10, char __e9, char __e8, char __e7, char __e6, char __e5,
9492 char __e4, char __e3, char __e2, char __e1, char __e0) {
9493
9494 return __extension__ (__m512i)(__v64qi)
9495 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9496 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9497 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9498 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
9499 __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
9500 __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
9501 __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
9502 __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
9503}
9504
9505static __inline __m512i __DEFAULT_FN_ATTRS
9506_mm512_set_epi16(short __e31, short __e30, short __e29, short __e28,
9507 short __e27, short __e26, short __e25, short __e24, short __e23,
9508 short __e22, short __e21, short __e20, short __e19, short __e18,
9509 short __e17, short __e16, short __e15, short __e14, short __e13,
9510 short __e12, short __e11, short __e10, short __e9, short __e8,
9511 short __e7, short __e6, short __e5, short __e4, short __e3,
9512 short __e2, short __e1, short __e0) {
9513 return __extension__ (__m512i)(__v32hi)
9514 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9515 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9516 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9517 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
9518}
9519
Michael Zuckerman178113e2016-05-19 12:07:49 +00009520static __inline __m512i __DEFAULT_FN_ATTRS
9521_mm512_set_epi32 (int __A, int __B, int __C, int __D,
9522 int __E, int __F, int __G, int __H,
9523 int __I, int __J, int __K, int __L,
9524 int __M, int __N, int __O, int __P)
9525{
9526 return __extension__ (__m512i)(__v16si)
9527 { __P, __O, __N, __M, __L, __K, __J, __I,
9528 __H, __G, __F, __E, __D, __C, __B, __A };
9529}
9530
9531#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
9532 e8,e9,e10,e11,e12,e13,e14,e15) \
Craig Topper60589492016-06-08 06:08:04 +00009533 _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
9534 (e5),(e4),(e3),(e2),(e1),(e0))
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00009535
Michael Zuckerman13d3c002016-05-11 11:41:29 +00009536static __inline__ __m512i __DEFAULT_FN_ATTRS
9537_mm512_set_epi64 (long long __A, long long __B, long long __C,
9538 long long __D, long long __E, long long __F,
9539 long long __G, long long __H)
9540{
9541 return __extension__ (__m512i) (__v8di)
9542 { __H, __G, __F, __E, __D, __C, __B, __A };
9543}
9544
Michael Zuckerman178113e2016-05-19 12:07:49 +00009545#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
Craig Topper60589492016-06-08 06:08:04 +00009546 _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
Michael Zuckerman178113e2016-05-19 12:07:49 +00009547
Michael Zuckerman13d3c002016-05-11 11:41:29 +00009548static __inline__ __m512d __DEFAULT_FN_ATTRS
9549_mm512_set_pd (double __A, double __B, double __C, double __D,
9550 double __E, double __F, double __G, double __H)
9551{
9552 return __extension__ (__m512d)
9553 { __H, __G, __F, __E, __D, __C, __B, __A };
9554}
9555
Michael Zuckerman178113e2016-05-19 12:07:49 +00009556#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
Craig Topper60589492016-06-08 06:08:04 +00009557 _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
Michael Zuckerman178113e2016-05-19 12:07:49 +00009558
Michael Zuckerman13d3c002016-05-11 11:41:29 +00009559static __inline__ __m512 __DEFAULT_FN_ATTRS
9560_mm512_set_ps (float __A, float __B, float __C, float __D,
9561 float __E, float __F, float __G, float __H,
9562 float __I, float __J, float __K, float __L,
9563 float __M, float __N, float __O, float __P)
9564{
9565 return __extension__ (__m512)
9566 { __P, __O, __N, __M, __L, __K, __J, __I,
9567 __H, __G, __F, __E, __D, __C, __B, __A };
9568}
9569
Michael Zuckerman178113e2016-05-19 12:07:49 +00009570#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
Craig Topper60589492016-06-08 06:08:04 +00009571 _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
9572 (e4),(e3),(e2),(e1),(e0))
Michael Zuckerman178113e2016-05-19 12:07:49 +00009573
Asaf Badouh13633282016-07-05 12:24:14 +00009574static __inline__ __m512 __DEFAULT_FN_ATTRS
Asaf Badouh356bb762016-08-21 07:56:47 +00009575_mm512_abs_ps(__m512 __A)
Asaf Badouh13633282016-07-05 12:24:14 +00009576{
Asaf Badouh356bb762016-08-21 07:56:47 +00009577 return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
Asaf Badouh13633282016-07-05 12:24:14 +00009578}
9579
9580static __inline__ __m512 __DEFAULT_FN_ATTRS
Asaf Badouh356bb762016-08-21 07:56:47 +00009581_mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
Asaf Badouh13633282016-07-05 12:24:14 +00009582{
Asaf Badouh356bb762016-08-21 07:56:47 +00009583 return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
Asaf Badouh13633282016-07-05 12:24:14 +00009584}
9585
9586static __inline__ __m512d __DEFAULT_FN_ATTRS
Asaf Badouh356bb762016-08-21 07:56:47 +00009587_mm512_abs_pd(__m512d __A)
Asaf Badouh13633282016-07-05 12:24:14 +00009588{
Asaf Badouh356bb762016-08-21 07:56:47 +00009589 return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
Asaf Badouh13633282016-07-05 12:24:14 +00009590}
9591
9592static __inline__ __m512d __DEFAULT_FN_ATTRS
Asaf Badouh356bb762016-08-21 07:56:47 +00009593_mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
Asaf Badouh13633282016-07-05 12:24:14 +00009594{
Asaf Badouh356bb762016-08-21 07:56:47 +00009595 return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
Asaf Badouh13633282016-07-05 12:24:14 +00009596}
9597
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009598// Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
9599// outputs. This class of vector operation forms the basis of many scientific
9600// computations. In vector-reduction arithmetic, the evaluation off is
9601// independent of the order of the input elements of V.
9602
9603// Used bisection method. At each step, we partition the vector with previous
9604// step in half, and the operation is performed on its two halves.
9605// This takes log2(n) steps where n is the number of elements in the vector.
9606
9607// Vec512 - Vector with size 512.
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009608// Operator - Can be one of following: +,*,&,|
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009609// T2 - Can get 'i' for int and 'f' for float.
9610// T1 - Can get 'i' for int and 'd' for double.
9611
9612#define _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1) \
9613 __extension__({ \
9614 __m256##T1 Vec256 = __builtin_shufflevector( \
9615 (__v8d##T2)Vec512, \
9616 (__v8d##T2)Vec512, \
9617 0, 1, 2, 3) \
9618 Operator \
9619 __builtin_shufflevector( \
9620 (__v8d##T2)Vec512, \
9621 (__v8d##T2)Vec512, \
9622 4, 5, 6, 7); \
9623 __m128##T1 Vec128 = __builtin_shufflevector( \
9624 (__v4d##T2)Vec256, \
9625 (__v4d##T2)Vec256, \
9626 0, 1) \
9627 Operator \
9628 __builtin_shufflevector( \
9629 (__v4d##T2)Vec256, \
9630 (__v4d##T2)Vec256, \
9631 2, 3); \
9632 Vec128 = __builtin_shufflevector((__v2d##T2)Vec128, \
9633 (__v2d##T2)Vec128, 0, -1) \
9634 Operator \
9635 __builtin_shufflevector((__v2d##T2)Vec128, \
9636 (__v2d##T2)Vec128, 1, -1); \
9637 return Vec128[0]; \
9638 })
9639
9640static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_add_epi64(__m512i __W) {
9641 _mm512_reduce_operator_64bit(__W, +, i, i);
9642}
9643
9644static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_mul_epi64(__m512i __W) {
9645 _mm512_reduce_operator_64bit(__W, *, i, i);
9646}
9647
9648static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_and_epi64(__m512i __W) {
9649 _mm512_reduce_operator_64bit(__W, &, i, i);
9650}
9651
9652static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_or_epi64(__m512i __W) {
9653 _mm512_reduce_operator_64bit(__W, |, i, i);
9654}
9655
9656static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_add_pd(__m512d __W) {
9657 _mm512_reduce_operator_64bit(__W, +, f, d);
9658}
9659
9660static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_mul_pd(__m512d __W) {
9661 _mm512_reduce_operator_64bit(__W, *, f, d);
9662}
9663
9664// Vec512 - Vector with size 512.
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009665// Vec512Neutral - All vector elements set to the identity element.
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009666// Identity element: {+,0},{*,1},{&,0xFFFFFFFFFFFFFFFF},{|,0}
9667// Operator - Can be one of following: +,*,&,|
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009668// Mask - Intrinsic Mask
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009669// T2 - Can get 'i' for int and 'f' for float.
9670// T1 - Can get 'i' for int and 'd' for packed double-precision.
9671// T3 - Can be Pd for packed double or q for q-word.
9672
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009673#define _mm512_mask_reduce_operator_64bit(Vec512, Vec512Neutral, Operator, \
9674 Mask, T2, T1, T3) \
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009675 __extension__({ \
9676 Vec512 = __builtin_ia32_select##T3##_512( \
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009677 (__mmask8)Mask, \
9678 (__v8d##T2)Vec512, \
9679 (__v8d##T2)Vec512Neutral); \
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009680 _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1); \
9681 })
9682
9683static __inline__ long long __DEFAULT_FN_ATTRS
9684_mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) {
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009685 _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), +, __M, i, i, q);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009686}
9687
9688static __inline__ long long __DEFAULT_FN_ATTRS
9689_mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) {
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009690 _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(1), *, __M, i, i, q);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009691}
9692
9693static __inline__ long long __DEFAULT_FN_ATTRS
9694_mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) {
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009695 _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF),
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009696 &, __M, i, i, q);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009697}
9698
9699static __inline__ long long __DEFAULT_FN_ATTRS
9700_mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) {
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009701 _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), |, __M,
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009702 i, i, q);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009703}
9704
9705static __inline__ double __DEFAULT_FN_ATTRS
9706_mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) {
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009707 _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(0), +, __M,
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009708 f, d, pd);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009709}
9710
9711static __inline__ double __DEFAULT_FN_ATTRS
9712_mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) {
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009713 _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(1), *, __M,
9714 f, d, pd);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009715}
Craig Topper9bed2e62018-05-23 06:31:36 +00009716#undef _mm512_reduce_operator_64bit
9717#undef _mm512_mask_reduce_operator_64bit
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009718
9719// Vec512 - Vector with size 512.
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009720// Operator - Can be one of following: +,*,&,|
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009721// T2 - Can get 'i' for int and ' ' for packed single.
9722// T1 - Can get 'i' for int and 'f' for float.
9723
9724#define _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1) __extension__({ \
9725 __m256##T1 Vec256 = \
Michael Zuckermand3436972016-10-30 14:54:05 +00009726 (__m256##T1)(__builtin_shufflevector( \
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009727 (__v16s##T2)Vec512, \
9728 (__v16s##T2)Vec512, \
9729 0, 1, 2, 3, 4, 5, 6, 7) \
9730 Operator \
Michael Zuckermand3436972016-10-30 14:54:05 +00009731 __builtin_shufflevector( \
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009732 (__v16s##T2)Vec512, \
9733 (__v16s##T2)Vec512, \
Michael Zuckermand3436972016-10-30 14:54:05 +00009734 8, 9, 10, 11, 12, 13, 14, 15)); \
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009735 __m128##T1 Vec128 = \
Michael Zuckermand3436972016-10-30 14:54:05 +00009736 (__m128##T1)(__builtin_shufflevector( \
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009737 (__v8s##T2)Vec256, \
9738 (__v8s##T2)Vec256, \
9739 0, 1, 2, 3) \
9740 Operator \
Michael Zuckermand3436972016-10-30 14:54:05 +00009741 __builtin_shufflevector( \
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009742 (__v8s##T2)Vec256, \
9743 (__v8s##T2)Vec256, \
Michael Zuckermand3436972016-10-30 14:54:05 +00009744 4, 5, 6, 7)); \
9745 Vec128 = (__m128##T1)(__builtin_shufflevector( \
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009746 (__v4s##T2)Vec128, \
9747 (__v4s##T2)Vec128, \
9748 0, 1, -1, -1) \
9749 Operator \
Michael Zuckermand3436972016-10-30 14:54:05 +00009750 __builtin_shufflevector( \
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009751 (__v4s##T2)Vec128, \
9752 (__v4s##T2)Vec128, \
Michael Zuckermand3436972016-10-30 14:54:05 +00009753 2, 3, -1, -1)); \
9754 Vec128 = (__m128##T1)(__builtin_shufflevector( \
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009755 (__v4s##T2)Vec128, \
9756 (__v4s##T2)Vec128, \
9757 0, -1, -1, -1) \
9758 Operator \
Michael Zuckermand3436972016-10-30 14:54:05 +00009759 __builtin_shufflevector( \
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009760 (__v4s##T2)Vec128, \
9761 (__v4s##T2)Vec128, \
Michael Zuckermand3436972016-10-30 14:54:05 +00009762 1, -1, -1, -1)); \
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009763 return Vec128[0]; \
9764 })
9765
9766static __inline__ int __DEFAULT_FN_ATTRS
9767_mm512_reduce_add_epi32(__m512i __W) {
9768 _mm512_reduce_operator_32bit(__W, +, i, i);
9769}
9770
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009771static __inline__ int __DEFAULT_FN_ATTRS
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009772_mm512_reduce_mul_epi32(__m512i __W) {
9773 _mm512_reduce_operator_32bit(__W, *, i, i);
9774}
9775
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009776static __inline__ int __DEFAULT_FN_ATTRS
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009777_mm512_reduce_and_epi32(__m512i __W) {
9778 _mm512_reduce_operator_32bit(__W, &, i, i);
9779}
9780
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009781static __inline__ int __DEFAULT_FN_ATTRS
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009782_mm512_reduce_or_epi32(__m512i __W) {
9783 _mm512_reduce_operator_32bit(__W, |, i, i);
9784}
9785
9786static __inline__ float __DEFAULT_FN_ATTRS
9787_mm512_reduce_add_ps(__m512 __W) {
9788 _mm512_reduce_operator_32bit(__W, +, f, );
9789}
9790
9791static __inline__ float __DEFAULT_FN_ATTRS
9792_mm512_reduce_mul_ps(__m512 __W) {
9793 _mm512_reduce_operator_32bit(__W, *, f, );
9794}
9795
9796// Vec512 - Vector with size 512.
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009797// Vec512Neutral - All vector elements set to the identity element.
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009798// Identity element: {+,0},{*,1},{&,0xFFFFFFFF},{|,0}
9799// Operator - Can be one of following: +,*,&,|
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009800// Mask - Intrinsic Mask
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009801// T2 - Can get 'i' for int and 'f' for float.
9802// T1 - Can get 'i' for int and 'd' for double.
9803// T3 - Can be Ps for packed single or d for d-word.
9804
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009805#define _mm512_mask_reduce_operator_32bit(Vec512, Vec512Neutral, Operator, \
9806 Mask, T2, T1, T3) \
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009807 __extension__({ \
9808 Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512( \
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009809 (__mmask16)Mask, \
9810 (__v16s##T2)Vec512, \
9811 (__v16s##T2)Vec512Neutral); \
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009812 _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1); \
9813 })
9814
9815static __inline__ int __DEFAULT_FN_ATTRS
9816_mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) {
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009817 _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), +, __M, i, i, d);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009818}
9819
9820static __inline__ int __DEFAULT_FN_ATTRS
9821_mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) {
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009822 _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(1), *, __M, i, i, d);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009823}
9824
9825static __inline__ int __DEFAULT_FN_ATTRS
9826_mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) {
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009827 _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0xFFFFFFFF), &, __M,
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009828 i, i, d);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009829}
9830
9831static __inline__ int __DEFAULT_FN_ATTRS
9832_mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) {
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009833 _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), |, __M, i, i, d);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009834}
9835
9836static __inline__ float __DEFAULT_FN_ATTRS
9837_mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) {
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009838 _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(0), +, __M, f, , ps);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009839}
9840
9841static __inline__ float __DEFAULT_FN_ATTRS
9842_mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009843 _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(1), *, __M, f, , ps);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009844}
Craig Topper9bed2e62018-05-23 06:31:36 +00009845#undef _mm512_reduce_operator_32bit
9846#undef _mm512_mask_reduce_operator_32bit
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009847
Craig Topperf99532f2018-05-26 18:57:41 +00009848// Used bisection method. At each step, we partition the vector with previous
9849// step in half, and the operation is performed on its two halves.
9850// This takes log2(n) steps where n is the number of elements in the vector.
9851// This macro uses only intrinsics from the AVX512F feature.
9852
9853// Vec512 - Vector with size of 512.
9854// IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example:
9855// __mm512_max_epi64
9856// T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}]
9857// T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}]
9858
9859#define _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2) __extension__({ \
9860 Vec512 = _mm512_##IntrinName( \
9861 (__m512##T1)__builtin_shufflevector( \
9862 (__v8d##T2)Vec512, \
9863 (__v8d##T2)Vec512, \
9864 0, 1, 2, 3, -1, -1, -1, -1), \
9865 (__m512##T1)__builtin_shufflevector( \
9866 (__v8d##T2)Vec512, \
9867 (__v8d##T2)Vec512, \
9868 4, 5, 6, 7, -1, -1, -1, -1)); \
9869 Vec512 = _mm512_##IntrinName( \
9870 (__m512##T1)__builtin_shufflevector( \
9871 (__v8d##T2)Vec512, \
9872 (__v8d##T2)Vec512, \
9873 0, 1, -1, -1, -1, -1, -1, -1),\
9874 (__m512##T1)__builtin_shufflevector( \
9875 (__v8d##T2)Vec512, \
9876 (__v8d##T2)Vec512, \
9877 2, 3, -1, -1, -1, -1, -1, \
9878 -1)); \
9879 Vec512 = _mm512_##IntrinName( \
9880 (__m512##T1)__builtin_shufflevector( \
9881 (__v8d##T2)Vec512, \
9882 (__v8d##T2)Vec512, \
9883 0, -1, -1, -1, -1, -1, -1, -1),\
9884 (__m512##T1)__builtin_shufflevector( \
9885 (__v8d##T2)Vec512, \
9886 (__v8d##T2)Vec512, \
9887 1, -1, -1, -1, -1, -1, -1, -1))\
9888 ; \
9889 return Vec512[0]; \
9890 })
Michael Zuckerman25eb4202016-10-29 10:29:20 +00009891
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009892static __inline__ long long __DEFAULT_FN_ATTRS
Michael Zuckerman25eb4202016-10-29 10:29:20 +00009893_mm512_reduce_max_epi64(__m512i __V) {
Craig Topperf99532f2018-05-26 18:57:41 +00009894 _mm512_reduce_maxMin_64bit(__V, max_epi64, i, i);
Michael Zuckerman25eb4202016-10-29 10:29:20 +00009895}
9896
9897static __inline__ unsigned long long __DEFAULT_FN_ATTRS
9898_mm512_reduce_max_epu64(__m512i __V) {
Craig Topperf99532f2018-05-26 18:57:41 +00009899 _mm512_reduce_maxMin_64bit(__V, max_epu64, i, i);
Michael Zuckerman25eb4202016-10-29 10:29:20 +00009900}
9901
Craig Topperf99532f2018-05-26 18:57:41 +00009902static __inline__ double __DEFAULT_FN_ATTRS
9903_mm512_reduce_max_pd(__m512d __V) {
9904 _mm512_reduce_maxMin_64bit(__V, max_pd, d, f);
9905}
9906
9907static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_min_epi64
9908(__m512i __V) {
9909 _mm512_reduce_maxMin_64bit(__V, min_epi64, i, i);
Michael Zuckerman25eb4202016-10-29 10:29:20 +00009910}
9911
9912static __inline__ unsigned long long __DEFAULT_FN_ATTRS
9913_mm512_reduce_min_epu64(__m512i __V) {
Craig Topperf99532f2018-05-26 18:57:41 +00009914 _mm512_reduce_maxMin_64bit(__V, min_epu64, i, i);
Craig Toppere0915232018-05-26 18:55:24 +00009915}
9916
9917static __inline__ double __DEFAULT_FN_ATTRS
9918_mm512_reduce_min_pd(__m512d __V) {
Craig Topperf99532f2018-05-26 18:57:41 +00009919 _mm512_reduce_maxMin_64bit(__V, min_pd, d, f);
9920}
9921
9922// Vec512 - Vector with size 512.
9923// Vec512Neutral - A 512 length vector with elements set to the identity element
9924// Identity element: {max_epi,0x8000000000000000}
9925// {max_epu,0x0000000000000000}
9926// {max_pd, 0xFFF0000000000000}
9927// {min_epi,0x7FFFFFFFFFFFFFFF}
9928// {min_epu,0xFFFFFFFFFFFFFFFF}
9929// {min_pd, 0x7FF0000000000000}
9930//
9931// IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example:
9932// __mm512_max_epi64
9933// T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}]
9934// T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}]
9935// T3 - Can get 'q' q word and 'pd' for packed double.
9936// [__builtin_ia32_select{q|pd}_512]
9937// Mask - Intrinsic Mask
9938
9939#define _mm512_mask_reduce_maxMin_64bit(Vec512, Vec512Neutral, IntrinName, T1, \
9940 T2, T3, Mask) \
9941 __extension__({ \
9942 Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512( \
9943 (__mmask8)Mask, \
9944 (__v8d##T2)Vec512, \
9945 (__v8d##T2)Vec512Neutral); \
9946 _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2); \
9947 })
9948
9949static __inline__ long long __DEFAULT_FN_ATTRS
9950_mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) {
9951 _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x8000000000000000),
9952 max_epi64, i, i, q, __M);
9953}
9954
9955static __inline__ unsigned long long __DEFAULT_FN_ATTRS
9956_mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) {
9957 _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x0000000000000000),
9958 max_epu64, i, i, q, __M);
Craig Toppere0915232018-05-26 18:55:24 +00009959}
9960
9961static __inline__ double __DEFAULT_FN_ATTRS
9962_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) {
Craig Topperf99532f2018-05-26 18:57:41 +00009963 _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_pd(-__builtin_inf()),
9964 max_pd, d, f, pd, __M);
9965}
9966
9967static __inline__ long long __DEFAULT_FN_ATTRS
9968_mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) {
9969 _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),
9970 min_epi64, i, i, q, __M);
9971}
9972
9973static __inline__ unsigned long long __DEFAULT_FN_ATTRS
9974_mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) {
9975 _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF),
9976 min_epu64, i, i, q, __M);
Craig Toppere0915232018-05-26 18:55:24 +00009977}
9978
9979static __inline__ double __DEFAULT_FN_ATTRS
9980_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) {
Craig Topperf99532f2018-05-26 18:57:41 +00009981 _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_pd(__builtin_inf()),
9982 min_pd, d, f, pd, __M);
Craig Toppere0915232018-05-26 18:55:24 +00009983}
Craig Topperf99532f2018-05-26 18:57:41 +00009984#undef _mm512_reduce_maxMin_64bit
9985#undef _mm512_mask_reduce_maxMin_64bit
Craig Toppere0915232018-05-26 18:55:24 +00009986
Craig Topperf99532f2018-05-26 18:57:41 +00009987// Vec512 - Vector with size 512.
9988// IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example:
9989// __mm512_max_epi32
9990// T1 - Can get 'i' for int and ' ' .[__m512{i|}]
9991// T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}]
Craig Toppere0915232018-05-26 18:55:24 +00009992
Craig Topperf99532f2018-05-26 18:57:41 +00009993#define _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2) __extension__({ \
9994 Vec512 = _mm512_##IntrinName( \
9995 (__m512##T1)__builtin_shufflevector( \
9996 (__v16s##T2)Vec512, \
9997 (__v16s##T2)Vec512, \
9998 0, 1, 2, 3, 4, 5, 6, 7, \
9999 -1, -1, -1, -1, -1, -1, -1, -1), \
10000 (__m512##T1)__builtin_shufflevector( \
10001 (__v16s##T2)Vec512, \
10002 (__v16s##T2)Vec512, \
10003 8, 9, 10, 11, 12, 13, 14, 15, \
10004 -1, -1, -1, -1, -1, -1, -1, -1)); \
10005 Vec512 = _mm512_##IntrinName( \
10006 (__m512##T1)__builtin_shufflevector( \
10007 (__v16s##T2)Vec512, \
10008 (__v16s##T2)Vec512, \
10009 0, 1, 2, 3, -1, -1, -1, -1, \
10010 -1, -1, -1, -1, -1, -1, -1, -1), \
10011 (__m512##T1)__builtin_shufflevector( \
10012 (__v16s##T2)Vec512, \
10013 (__v16s##T2)Vec512, \
10014 4, 5, 6, 7, -1, -1, -1, -1, \
10015 -1, -1, -1, -1, -1, -1, -1, -1)); \
10016 Vec512 = _mm512_##IntrinName( \
10017 (__m512##T1)__builtin_shufflevector( \
10018 (__v16s##T2)Vec512, \
10019 (__v16s##T2)Vec512, \
10020 0, 1, -1, -1, -1, -1, -1, -1, \
10021 -1, -1, -1, -1, -1, -1, -1, -1), \
10022 (__m512##T1)__builtin_shufflevector( \
10023 (__v16s##T2)Vec512, \
10024 (__v16s##T2)Vec512, \
10025 2, 3, -1, -1, -1, -1, -1, -1, \
10026 -1, -1, -1, -1, -1, -1, -1, -1)); \
10027 Vec512 = _mm512_##IntrinName( \
10028 (__m512##T1)__builtin_shufflevector( \
10029 (__v16s##T2)Vec512, \
10030 (__v16s##T2)Vec512, \
10031 0, -1, -1, -1, -1, -1, -1, -1, \
10032 -1, -1, -1, -1, -1, -1, -1, -1), \
10033 (__m512##T1)__builtin_shufflevector( \
10034 (__v16s##T2)Vec512, \
10035 (__v16s##T2)Vec512, \
10036 1, -1, -1, -1, -1, -1, -1, -1, \
10037 -1, -1, -1, -1, -1, -1, -1, -1)); \
10038 return Vec512[0]; \
10039 })
10040
10041static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_max_epi32(__m512i a) {
10042 _mm512_reduce_maxMin_32bit(a, max_epi32, i, i);
Craig Toppere0915232018-05-26 18:55:24 +000010043}
10044
Craig Topperf99532f2018-05-26 18:57:41 +000010045static __inline__ unsigned int __DEFAULT_FN_ATTRS
10046_mm512_reduce_max_epu32(__m512i a) {
10047 _mm512_reduce_maxMin_32bit(a, max_epu32, i, i);
10048}
10049
10050static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_max_ps(__m512 a) {
10051 _mm512_reduce_maxMin_32bit(a, max_ps, , f);
10052}
10053
10054static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_min_epi32(__m512i a) {
10055 _mm512_reduce_maxMin_32bit(a, min_epi32, i, i);
10056}
10057
10058static __inline__ unsigned int __DEFAULT_FN_ATTRS
10059_mm512_reduce_min_epu32(__m512i a) {
10060 _mm512_reduce_maxMin_32bit(a, min_epu32, i, i);
10061}
10062
10063static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_min_ps(__m512 a) {
10064 _mm512_reduce_maxMin_32bit(a, min_ps, , f);
10065}
10066
10067// Vec512 - Vector with size 512.
10068// Vec512Neutral - A 512 length vector with elements set to the identity element
10069// Identity element: {max_epi,0x80000000}
10070// {max_epu,0x00000000}
10071// {max_ps, 0xFF800000}
10072// {min_epi,0x7FFFFFFF}
10073// {min_epu,0xFFFFFFFF}
10074// {min_ps, 0x7F800000}
10075//
10076// IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example:
10077// __mm512_max_epi32
10078// T1 - Can get 'i' for int and ' ' .[__m512{i|}]
10079// T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}]
10080// T3 - Can get 'q' q word and 'pd' for packed double.
10081// [__builtin_ia32_select{q|pd}_512]
10082// Mask - Intrinsic Mask
10083
10084#define _mm512_mask_reduce_maxMin_32bit(Vec512, Vec512Neutral, IntrinName, T1, \
10085 T2, T3, Mask) \
10086 __extension__({ \
10087 Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512( \
10088 (__mmask16)Mask, \
10089 (__v16s##T2)Vec512, \
10090 (__v16s##T2)Vec512Neutral); \
10091 _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2); \
10092 })
10093
10094static __inline__ int __DEFAULT_FN_ATTRS
10095_mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) {
10096 _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x80000000), max_epi32,
10097 i, i, d, __M);
10098}
10099
10100static __inline__ unsigned int __DEFAULT_FN_ATTRS
10101_mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) {
10102 _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x00000000), max_epu32,
10103 i, i, d, __M);
Craig Toppere0915232018-05-26 18:55:24 +000010104}
10105
10106static __inline__ float __DEFAULT_FN_ATTRS
10107_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) {
Craig Topperf99532f2018-05-26 18:57:41 +000010108 _mm512_mask_reduce_maxMin_32bit(__V,_mm512_set1_ps(-__builtin_inff()), max_ps, , f,
10109 ps, __M);
10110}
10111
10112static __inline__ int __DEFAULT_FN_ATTRS
10113_mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) {
10114 _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x7FFFFFFF), min_epi32,
10115 i, i, d, __M);
10116}
10117
10118static __inline__ unsigned int __DEFAULT_FN_ATTRS
10119_mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) {
10120 _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0xFFFFFFFF), min_epu32,
10121 i, i, d, __M);
Michael Zuckerman25eb4202016-10-29 10:29:20 +000010122}
10123
10124static __inline__ float __DEFAULT_FN_ATTRS
10125_mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {
Craig Topperf99532f2018-05-26 18:57:41 +000010126 _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_ps(__builtin_inff()), min_ps, , f,
10127 ps, __M);
Michael Zuckerman25eb4202016-10-29 10:29:20 +000010128}
Craig Topperf99532f2018-05-26 18:57:41 +000010129#undef _mm512_reduce_maxMin_32bit
10130#undef _mm512_mask_reduce_maxMin_32bit
Michael Zuckerman25eb4202016-10-29 10:29:20 +000010131
Michael Kupersteine45af542015-06-30 13:36:19 +000010132#undef __DEFAULT_FN_ATTRS
Eric Christopher4d1851682015-06-17 07:09:20 +000010133
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000010134#endif // __AVX512FINTRIN_H