blob: fc942e98da696dafc93e7c18d86259186cd4893c [file] [log] [blame]
Craig Topper991d4992015-11-03 06:16:31 +00001/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00002 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23#ifndef __IMMINTRIN_H
24#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
25#endif
26
27#ifndef __AVX512FINTRIN_H
28#define __AVX512FINTRIN_H
29
Reid Kleckner89fbd552018-06-04 21:39:20 +000030typedef char __v64qi __attribute__((__vector_size__(64)));
31typedef short __v32hi __attribute__((__vector_size__(64)));
32typedef double __v8df __attribute__((__vector_size__(64)));
33typedef float __v16sf __attribute__((__vector_size__(64)));
34typedef long long __v8di __attribute__((__vector_size__(64)));
35typedef int __v16si __attribute__((__vector_size__(64)));
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000036
Craig Topper6a77b622016-06-04 05:43:41 +000037/* Unsigned types */
Reid Kleckner89fbd552018-06-04 21:39:20 +000038typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
39typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
40typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
41typedef unsigned int __v16su __attribute__((__vector_size__(64)));
Craig Topper6a77b622016-06-04 05:43:41 +000042
Reid Kleckner89fbd552018-06-04 21:39:20 +000043typedef float __m512 __attribute__((__vector_size__(64)));
44typedef double __m512d __attribute__((__vector_size__(64)));
45typedef long long __m512i __attribute__((__vector_size__(64)));
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000046
47typedef unsigned char __mmask8;
48typedef unsigned short __mmask16;
49
50/* Rounding mode macros. */
51#define _MM_FROUND_TO_NEAREST_INT 0x00
52#define _MM_FROUND_TO_NEG_INF 0x01
53#define _MM_FROUND_TO_POS_INF 0x02
54#define _MM_FROUND_TO_ZERO 0x03
55#define _MM_FROUND_CUR_DIRECTION 0x04
56
Asaf Badouh2f344b72016-08-07 10:43:04 +000057/* Constants for integer comparison predicates */
58typedef enum {
59 _MM_CMPINT_EQ, /* Equal */
60 _MM_CMPINT_LT, /* Less than */
61 _MM_CMPINT_LE, /* Less than or Equal */
62 _MM_CMPINT_UNUSED,
63 _MM_CMPINT_NE, /* Not Equal */
64 _MM_CMPINT_NLT, /* Not Less than */
65#define _MM_CMPINT_GE _MM_CMPINT_NLT /* Greater than or Equal */
66 _MM_CMPINT_NLE /* Not Less than or Equal */
67#define _MM_CMPINT_GT _MM_CMPINT_NLE /* Greater than */
68} _MM_CMPINT_ENUM;
69
Michael Zuckermandef78752016-03-28 12:23:09 +000070typedef enum
71{
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +000072 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
73 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
74 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
75 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
76 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
77 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
78 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
79 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
80 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
81 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
82 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
83 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
84 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
85 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
86 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
87 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
88 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
89 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
90 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
91 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
92 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
93 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
94 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
95 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
96 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
97 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
98 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
99 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
100 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
101 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
102 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
103 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
104 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
105 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
106 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
107 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
108 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
109 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
110 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
111 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
112 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
113 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
114 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
115 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
116 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
117 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
118 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
119 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
120 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
121 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
122 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
123 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
124 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
125 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
126 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
127 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
128 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
129 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
130 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
131 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
132 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
133 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
134 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
135 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
136 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
137 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
138 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
139 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
140 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
141 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
142 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
143 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
144 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
145 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
146 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
147 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
148 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
149 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
150 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
151 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
152 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
153 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
154 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
155 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
156 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
157 _MM_PERM_DDDD = 0xFF
158} _MM_PERM_ENUM;
159
160typedef enum
161{
Michael Zuckermandef78752016-03-28 12:23:09 +0000162 _MM_MANT_NORM_1_2, /* interval [1, 2) */
163 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
164 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
165 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
166} _MM_MANTISSA_NORM_ENUM;
167
168typedef enum
169{
170 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
171 _MM_MANT_SIGN_zero, /* sign = 0 */
172 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
173} _MM_MANTISSA_SIGN_ENUM;
174
Eric Christopher4d1851682015-06-17 07:09:20 +0000175/* Define the default attributes for the functions in this file. */
Michael Kupersteine45af542015-06-30 13:36:19 +0000176#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
Eric Christopher4d1851682015-06-17 07:09:20 +0000177
Adam Nemet0d5bb552014-07-28 17:14:40 +0000178/* Create vectors with repeated elements */
179
Michael Kupersteine45af542015-06-30 13:36:19 +0000180static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +0000181_mm512_setzero_si512(void)
182{
Craig Topper63ec0ea2018-05-30 21:08:27 +0000183 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
Adam Nemet0d5bb552014-07-28 17:14:40 +0000184}
185
Michael Zuckermanf36f6eb2016-06-05 15:12:52 +0000186#define _mm512_setzero_epi32 _mm512_setzero_si512
187
Simon Pilgrim5aba9922015-08-26 21:17:12 +0000188static __inline__ __m512d __DEFAULT_FN_ATTRS
Craig Topper3a0c7262016-06-09 05:14:28 +0000189_mm512_undefined_pd(void)
Simon Pilgrim5aba9922015-08-26 21:17:12 +0000190{
191 return (__m512d)__builtin_ia32_undef512();
192}
193
194static __inline__ __m512 __DEFAULT_FN_ATTRS
Craig Topper3a0c7262016-06-09 05:14:28 +0000195_mm512_undefined(void)
Simon Pilgrim5aba9922015-08-26 21:17:12 +0000196{
197 return (__m512)__builtin_ia32_undef512();
198}
199
200static __inline__ __m512 __DEFAULT_FN_ATTRS
Craig Topper3a0c7262016-06-09 05:14:28 +0000201_mm512_undefined_ps(void)
Simon Pilgrim5aba9922015-08-26 21:17:12 +0000202{
203 return (__m512)__builtin_ia32_undef512();
204}
205
206static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper3a0c7262016-06-09 05:14:28 +0000207_mm512_undefined_epi32(void)
Simon Pilgrim5aba9922015-08-26 21:17:12 +0000208{
209 return (__m512i)__builtin_ia32_undef512();
210}
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000211
Michael Zuckerman8c2900f2016-04-27 11:43:14 +0000212static __inline__ __m512i __DEFAULT_FN_ATTRS
213_mm512_broadcastd_epi32 (__m128i __A)
214{
Craig Topper95ed88a2018-06-04 19:28:09 +0000215 return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000216 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +0000217}
218
219static __inline__ __m512i __DEFAULT_FN_ATTRS
220_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
221{
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000222 return (__m512i)__builtin_ia32_selectd_512(__M,
223 (__v16si) _mm512_broadcastd_epi32(__A),
224 (__v16si) __O);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +0000225}
226
227static __inline__ __m512i __DEFAULT_FN_ATTRS
228_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
229{
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000230 return (__m512i)__builtin_ia32_selectd_512(__M,
231 (__v16si) _mm512_broadcastd_epi32(__A),
232 (__v16si) _mm512_setzero_si512());
Michael Zuckerman8c2900f2016-04-27 11:43:14 +0000233}
234
235static __inline__ __m512i __DEFAULT_FN_ATTRS
236_mm512_broadcastq_epi64 (__m128i __A)
237{
Craig Topper95ed88a2018-06-04 19:28:09 +0000238 return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000239 0, 0, 0, 0, 0, 0, 0, 0);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +0000240}
241
242static __inline__ __m512i __DEFAULT_FN_ATTRS
243_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
244{
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000245 return (__m512i)__builtin_ia32_selectq_512(__M,
246 (__v8di) _mm512_broadcastq_epi64(__A),
247 (__v8di) __O);
248
Michael Zuckerman8c2900f2016-04-27 11:43:14 +0000249}
250
251static __inline__ __m512i __DEFAULT_FN_ATTRS
252_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
253{
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000254 return (__m512i)__builtin_ia32_selectq_512(__M,
255 (__v8di) _mm512_broadcastq_epi64(__A),
256 (__v8di) _mm512_setzero_si512());
Michael Zuckerman8c2900f2016-04-27 11:43:14 +0000257}
Simon Pilgrim5aba9922015-08-26 21:17:12 +0000258
Adam Nemet0d5bb552014-07-28 17:14:40 +0000259
Michael Kupersteine45af542015-06-30 13:36:19 +0000260static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +0000261_mm512_setzero_ps(void)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000262{
Craig Topper63ec0ea2018-05-30 21:08:27 +0000263 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
264 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000265}
Michael Zuckermanf36f6eb2016-06-05 15:12:52 +0000266
267#define _mm512_setzero _mm512_setzero_ps
268
Michael Kupersteine45af542015-06-30 13:36:19 +0000269static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +0000270_mm512_setzero_pd(void)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000271{
Craig Topper63ec0ea2018-05-30 21:08:27 +0000272 return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000273}
Adam Nemet0d5bb552014-07-28 17:14:40 +0000274
Michael Kupersteine45af542015-06-30 13:36:19 +0000275static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemetf42e7a22014-07-30 16:51:22 +0000276_mm512_set1_ps(float __w)
277{
Craig Topper63ec0ea2018-05-30 21:08:27 +0000278 return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
279 __w, __w, __w, __w, __w, __w, __w, __w };
Adam Nemetf42e7a22014-07-30 16:51:22 +0000280}
281
Michael Kupersteine45af542015-06-30 13:36:19 +0000282static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemetf42e7a22014-07-30 16:51:22 +0000283_mm512_set1_pd(double __w)
284{
Craig Topper63ec0ea2018-05-30 21:08:27 +0000285 return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
Adam Nemetf42e7a22014-07-30 16:51:22 +0000286}
287
Michael Kupersteine45af542015-06-30 13:36:19 +0000288static __inline __m512i __DEFAULT_FN_ATTRS
Michael Zuckerman6f08ceb2016-05-26 06:54:52 +0000289_mm512_set1_epi8(char __w)
290{
Craig Topper63ec0ea2018-05-30 21:08:27 +0000291 return __extension__ (__m512i)(__v64qi){
292 __w, __w, __w, __w, __w, __w, __w, __w,
293 __w, __w, __w, __w, __w, __w, __w, __w,
294 __w, __w, __w, __w, __w, __w, __w, __w,
295 __w, __w, __w, __w, __w, __w, __w, __w,
296 __w, __w, __w, __w, __w, __w, __w, __w,
297 __w, __w, __w, __w, __w, __w, __w, __w,
298 __w, __w, __w, __w, __w, __w, __w, __w,
299 __w, __w, __w, __w, __w, __w, __w, __w };
Michael Zuckerman6f08ceb2016-05-26 06:54:52 +0000300}
301
302static __inline __m512i __DEFAULT_FN_ATTRS
303_mm512_set1_epi16(short __w)
304{
Craig Topper63ec0ea2018-05-30 21:08:27 +0000305 return __extension__ (__m512i)(__v32hi){
306 __w, __w, __w, __w, __w, __w, __w, __w,
307 __w, __w, __w, __w, __w, __w, __w, __w,
308 __w, __w, __w, __w, __w, __w, __w, __w,
309 __w, __w, __w, __w, __w, __w, __w, __w };
Michael Zuckerman6f08ceb2016-05-26 06:54:52 +0000310}
311
312static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemetf42e7a22014-07-30 16:51:22 +0000313_mm512_set1_epi32(int __s)
314{
Craig Topper63ec0ea2018-05-30 21:08:27 +0000315 return __extension__ (__m512i)(__v16si){
316 __s, __s, __s, __s, __s, __s, __s, __s,
317 __s, __s, __s, __s, __s, __s, __s, __s };
Adam Nemetf42e7a22014-07-30 16:51:22 +0000318}
319
Michael Kupersteine45af542015-06-30 13:36:19 +0000320static __inline __m512i __DEFAULT_FN_ATTRS
Jina Nahias3ad702a2017-09-19 11:00:27 +0000321_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
322{
323 return (__m512i)__builtin_ia32_selectd_512(__M,
324 (__v16si)_mm512_set1_epi32(__A),
325 (__v16si)_mm512_setzero_si512());
326}
327
328static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemetf42e7a22014-07-30 16:51:22 +0000329_mm512_set1_epi64(long long __d)
330{
Craig Topper63ec0ea2018-05-30 21:08:27 +0000331 return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
Adam Nemetf42e7a22014-07-30 16:51:22 +0000332}
333
Jina Nahias3ad702a2017-09-19 11:00:27 +0000334static __inline __m512i __DEFAULT_FN_ATTRS
335_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
336{
337 return (__m512i)__builtin_ia32_selectq_512(__M,
338 (__v8di)_mm512_set1_epi64(__A),
339 (__v8di)_mm512_setzero_si512());
340}
Jina Nahias3ad702a2017-09-19 11:00:27 +0000341
Michael Kupersteine45af542015-06-30 13:36:19 +0000342static __inline__ __m512 __DEFAULT_FN_ATTRS
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000343_mm512_broadcastss_ps(__m128 __A)
Adam Nemet4abc07c2014-08-13 00:29:01 +0000344{
Craig Topper95ed88a2018-06-04 19:28:09 +0000345 return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000346 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
Adam Nemet4abc07c2014-08-13 00:29:01 +0000347}
348
Michael Zuckerman95721ac2016-06-05 15:43:30 +0000349static __inline __m512i __DEFAULT_FN_ATTRS
350_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
351{
Craig Topper63ec0ea2018-05-30 21:08:27 +0000352 return __extension__ (__m512i)(__v16si)
Michael Zuckerman95721ac2016-06-05 15:43:30 +0000353 { __D, __C, __B, __A, __D, __C, __B, __A,
354 __D, __C, __B, __A, __D, __C, __B, __A };
355}
356
357static __inline __m512i __DEFAULT_FN_ATTRS
358_mm512_set4_epi64 (long long __A, long long __B, long long __C,
359 long long __D)
360{
Craig Topper63ec0ea2018-05-30 21:08:27 +0000361 return __extension__ (__m512i) (__v8di)
Michael Zuckerman95721ac2016-06-05 15:43:30 +0000362 { __D, __C, __B, __A, __D, __C, __B, __A };
363}
364
365static __inline __m512d __DEFAULT_FN_ATTRS
366_mm512_set4_pd (double __A, double __B, double __C, double __D)
367{
Craig Topper63ec0ea2018-05-30 21:08:27 +0000368 return __extension__ (__m512d)
Michael Zuckerman95721ac2016-06-05 15:43:30 +0000369 { __D, __C, __B, __A, __D, __C, __B, __A };
370}
371
372static __inline __m512 __DEFAULT_FN_ATTRS
373_mm512_set4_ps (float __A, float __B, float __C, float __D)
374{
Craig Topper63ec0ea2018-05-30 21:08:27 +0000375 return __extension__ (__m512)
Michael Zuckerman95721ac2016-06-05 15:43:30 +0000376 { __D, __C, __B, __A, __D, __C, __B, __A,
377 __D, __C, __B, __A, __D, __C, __B, __A };
378}
379
380#define _mm512_setr4_epi32(e0,e1,e2,e3) \
Craig Topper60589492016-06-08 06:08:04 +0000381 _mm512_set4_epi32((e3),(e2),(e1),(e0))
Michael Zuckerman95721ac2016-06-05 15:43:30 +0000382
383#define _mm512_setr4_epi64(e0,e1,e2,e3) \
Craig Topper60589492016-06-08 06:08:04 +0000384 _mm512_set4_epi64((e3),(e2),(e1),(e0))
Michael Zuckerman95721ac2016-06-05 15:43:30 +0000385
386#define _mm512_setr4_pd(e0,e1,e2,e3) \
Craig Topper60589492016-06-08 06:08:04 +0000387 _mm512_set4_pd((e3),(e2),(e1),(e0))
Michael Zuckerman95721ac2016-06-05 15:43:30 +0000388
389#define _mm512_setr4_ps(e0,e1,e2,e3) \
Craig Topper60589492016-06-08 06:08:04 +0000390 _mm512_set4_ps((e3),(e2),(e1),(e0))
Michael Zuckerman95721ac2016-06-05 15:43:30 +0000391
Michael Kupersteine45af542015-06-30 13:36:19 +0000392static __inline__ __m512d __DEFAULT_FN_ATTRS
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000393_mm512_broadcastsd_pd(__m128d __A)
Adam Nemet4abc07c2014-08-13 00:29:01 +0000394{
Craig Topper95ed88a2018-06-04 19:28:09 +0000395 return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000396 0, 0, 0, 0, 0, 0, 0, 0);
Adam Nemet4abc07c2014-08-13 00:29:01 +0000397}
398
Adam Nemetc871ff92014-07-30 16:51:24 +0000399/* Cast between vector types */
400
Michael Kupersteine45af542015-06-30 13:36:19 +0000401static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemetc871ff92014-07-30 16:51:24 +0000402_mm512_castpd256_pd512(__m256d __a)
403{
404 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
405}
406
Michael Kupersteine45af542015-06-30 13:36:19 +0000407static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemetc871ff92014-07-30 16:51:24 +0000408_mm512_castps256_ps512(__m256 __a)
409{
410 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7,
411 -1, -1, -1, -1, -1, -1, -1, -1);
412}
413
Michael Kupersteine45af542015-06-30 13:36:19 +0000414static __inline __m128d __DEFAULT_FN_ATTRS
Adam Nemetc871ff92014-07-30 16:51:24 +0000415_mm512_castpd512_pd128(__m512d __a)
416{
417 return __builtin_shufflevector(__a, __a, 0, 1);
418}
419
Michael Zuckermand5cc6cd2016-05-25 14:04:21 +0000420static __inline __m256d __DEFAULT_FN_ATTRS
421_mm512_castpd512_pd256 (__m512d __A)
422{
423 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
424}
425
Michael Kupersteine45af542015-06-30 13:36:19 +0000426static __inline __m128 __DEFAULT_FN_ATTRS
Adam Nemetc871ff92014-07-30 16:51:24 +0000427_mm512_castps512_ps128(__m512 __a)
428{
429 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
430}
431
Michael Zuckermand5cc6cd2016-05-25 14:04:21 +0000432static __inline __m256 __DEFAULT_FN_ATTRS
433_mm512_castps512_ps256 (__m512 __A)
434{
435 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
436}
437
438static __inline __m512 __DEFAULT_FN_ATTRS
439_mm512_castpd_ps (__m512d __A)
440{
441 return (__m512) (__A);
442}
443
444static __inline __m512i __DEFAULT_FN_ATTRS
445_mm512_castpd_si512 (__m512d __A)
446{
447 return (__m512i) (__A);
448}
Michael Zuckermanc6677032016-05-03 14:26:52 +0000449
450static __inline__ __m512d __DEFAULT_FN_ATTRS
451_mm512_castpd128_pd512 (__m128d __A)
452{
453 return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
454}
455
Michael Zuckermand5cc6cd2016-05-25 14:04:21 +0000456static __inline __m512d __DEFAULT_FN_ATTRS
457_mm512_castps_pd (__m512 __A)
458{
459 return (__m512d) (__A);
460}
461
462static __inline __m512i __DEFAULT_FN_ATTRS
463_mm512_castps_si512 (__m512 __A)
464{
465 return (__m512i) (__A);
466}
467
Michael Zuckermanc6677032016-05-03 14:26:52 +0000468static __inline__ __m512 __DEFAULT_FN_ATTRS
469_mm512_castps128_ps512 (__m128 __A)
470{
471 return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
472}
473
474static __inline__ __m512i __DEFAULT_FN_ATTRS
475_mm512_castsi128_si512 (__m128i __A)
476{
477 return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
478}
479
480static __inline__ __m512i __DEFAULT_FN_ATTRS
481_mm512_castsi256_si512 (__m256i __A)
482{
483 return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
484}
485
Michael Zuckermand5cc6cd2016-05-25 14:04:21 +0000486static __inline __m512 __DEFAULT_FN_ATTRS
487_mm512_castsi512_ps (__m512i __A)
488{
489 return (__m512) (__A);
490}
491
492static __inline __m512d __DEFAULT_FN_ATTRS
493_mm512_castsi512_pd (__m512i __A)
494{
495 return (__m512d) (__A);
496}
497
498static __inline __m128i __DEFAULT_FN_ATTRS
499_mm512_castsi512_si128 (__m512i __A)
500{
501 return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
502}
503
Michael Zuckerman22c47e62016-05-26 14:32:11 +0000504static __inline __m256i __DEFAULT_FN_ATTRS
505_mm512_castsi512_si256 (__m512i __A)
506{
507 return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
508}
509
Ayman Musae60a41c2016-11-08 12:00:30 +0000510static __inline__ __mmask16 __DEFAULT_FN_ATTRS
511_mm512_int2mask(int __a)
512{
513 return (__mmask16)__a;
514}
515
516static __inline__ int __DEFAULT_FN_ATTRS
517_mm512_mask2int(__mmask16 __a)
518{
519 return (int)__a;
520}
521
Adrian Prantl9fc8faf2018-05-09 01:00:01 +0000522/// Constructs a 512-bit floating-point vector of [8 x double] from a
Simon Pilgrim96d02f52017-04-29 17:17:06 +0000523/// 128-bit floating-point vector of [2 x double]. The lower 128 bits
524/// contain the value of the source vector. The upper 384 bits are set
525/// to zero.
526///
527/// \headerfile <x86intrin.h>
528///
529/// This intrinsic has no corresponding instruction.
530///
531/// \param __a
532/// A 128-bit vector of [2 x double].
533/// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
534/// contain the value of the parameter. The upper 384 bits are set to zero.
535static __inline __m512d __DEFAULT_FN_ATTRS
536_mm512_zextpd128_pd512(__m128d __a)
537{
538 return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
539}
540
Adrian Prantl9fc8faf2018-05-09 01:00:01 +0000541/// Constructs a 512-bit floating-point vector of [8 x double] from a
Simon Pilgrim96d02f52017-04-29 17:17:06 +0000542/// 256-bit floating-point vector of [4 x double]. The lower 256 bits
543/// contain the value of the source vector. The upper 256 bits are set
544/// to zero.
545///
546/// \headerfile <x86intrin.h>
547///
548/// This intrinsic has no corresponding instruction.
549///
550/// \param __a
551/// A 256-bit vector of [4 x double].
552/// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
553/// contain the value of the parameter. The upper 256 bits are set to zero.
554static __inline __m512d __DEFAULT_FN_ATTRS
555_mm512_zextpd256_pd512(__m256d __a)
556{
557 return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
558}
559
Adrian Prantl9fc8faf2018-05-09 01:00:01 +0000560/// Constructs a 512-bit floating-point vector of [16 x float] from a
Simon Pilgrim96d02f52017-04-29 17:17:06 +0000561/// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain
562/// the value of the source vector. The upper 384 bits are set to zero.
563///
564/// \headerfile <x86intrin.h>
565///
566/// This intrinsic has no corresponding instruction.
567///
568/// \param __a
569/// A 128-bit vector of [4 x float].
570/// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
571/// contain the value of the parameter. The upper 384 bits are set to zero.
572static __inline __m512 __DEFAULT_FN_ATTRS
573_mm512_zextps128_ps512(__m128 __a)
574{
575 return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
576}
577
Adrian Prantl9fc8faf2018-05-09 01:00:01 +0000578/// Constructs a 512-bit floating-point vector of [16 x float] from a
Simon Pilgrim96d02f52017-04-29 17:17:06 +0000579/// 256-bit floating-point vector of [8 x float]. The lower 256 bits contain
580/// the value of the source vector. The upper 256 bits are set to zero.
581///
582/// \headerfile <x86intrin.h>
583///
584/// This intrinsic has no corresponding instruction.
585///
586/// \param __a
587/// A 256-bit vector of [8 x float].
588/// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
589/// contain the value of the parameter. The upper 256 bits are set to zero.
590static __inline __m512 __DEFAULT_FN_ATTRS
591_mm512_zextps256_ps512(__m256 __a)
592{
593 return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
594}
595
Adrian Prantl9fc8faf2018-05-09 01:00:01 +0000596/// Constructs a 512-bit integer vector from a 128-bit integer vector.
Simon Pilgrim96d02f52017-04-29 17:17:06 +0000597/// The lower 128 bits contain the value of the source vector. The upper
598/// 384 bits are set to zero.
599///
600/// \headerfile <x86intrin.h>
601///
602/// This intrinsic has no corresponding instruction.
603///
604/// \param __a
605/// A 128-bit integer vector.
606/// \returns A 512-bit integer vector. The lower 128 bits contain the value of
607/// the parameter. The upper 384 bits are set to zero.
608static __inline __m512i __DEFAULT_FN_ATTRS
609_mm512_zextsi128_si512(__m128i __a)
610{
611 return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
612}
613
Adrian Prantl9fc8faf2018-05-09 01:00:01 +0000614/// Constructs a 512-bit integer vector from a 256-bit integer vector.
Simon Pilgrim96d02f52017-04-29 17:17:06 +0000615/// The lower 256 bits contain the value of the source vector. The upper
616/// 256 bits are set to zero.
617///
618/// \headerfile <x86intrin.h>
619///
620/// This intrinsic has no corresponding instruction.
621///
622/// \param __a
623/// A 256-bit integer vector.
624/// \returns A 512-bit integer vector. The lower 256 bits contain the value of
625/// the parameter. The upper 256 bits are set to zero.
626static __inline __m512i __DEFAULT_FN_ATTRS
627_mm512_zextsi256_si512(__m256i __a)
628{
629 return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
630}
631
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000632/* Bitwise operators */
Michael Kupersteine45af542015-06-30 13:36:19 +0000633static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000634_mm512_and_epi32(__m512i __a, __m512i __b)
635{
Craig Topper6a77b622016-06-04 05:43:41 +0000636 return (__m512i)((__v16su)__a & (__v16su)__b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000637}
638
Michael Kupersteine45af542015-06-30 13:36:19 +0000639static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000640_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
641{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000642 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
Asaf Badouh13633282016-07-05 12:24:14 +0000643 (__v16si) _mm512_and_epi32(__a, __b),
644 (__v16si) __src);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000645}
Asaf Badouh13633282016-07-05 12:24:14 +0000646
Michael Kupersteine45af542015-06-30 13:36:19 +0000647static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000648_mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
649{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000650 return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (),
Asaf Badouh13633282016-07-05 12:24:14 +0000651 __k, __a, __b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000652}
653
Michael Kupersteine45af542015-06-30 13:36:19 +0000654static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000655_mm512_and_epi64(__m512i __a, __m512i __b)
656{
Craig Topper6a77b622016-06-04 05:43:41 +0000657 return (__m512i)((__v8du)__a & (__v8du)__b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000658}
659
Michael Kupersteine45af542015-06-30 13:36:19 +0000660static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000661_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
662{
Asaf Badouh13633282016-07-05 12:24:14 +0000663 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
664 (__v8di) _mm512_and_epi64(__a, __b),
665 (__v8di) __src);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000666}
Asaf Badouh13633282016-07-05 12:24:14 +0000667
Michael Kupersteine45af542015-06-30 13:36:19 +0000668static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000669_mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
670{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000671 return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (),
Asaf Badouh13633282016-07-05 12:24:14 +0000672 __k, __a, __b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000673}
674
Michael Kupersteine45af542015-06-30 13:36:19 +0000675static __inline__ __m512i __DEFAULT_FN_ATTRS
Michael Zuckerman2cacc352016-05-18 15:25:53 +0000676_mm512_andnot_si512 (__m512i __A, __m512i __B)
677{
Craig Toppercbf39292018-05-31 01:24:40 +0000678 return (__m512i)(~(__v8du)__A & (__v8du)__B);
Michael Zuckerman2cacc352016-05-18 15:25:53 +0000679}
680
681static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000682_mm512_andnot_epi32 (__m512i __A, __m512i __B)
683{
Craig Toppercbf39292018-05-31 01:24:40 +0000684 return (__m512i)(~(__v16su)__A & (__v16su)__B);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000685}
686
Michael Kupersteine45af542015-06-30 13:36:19 +0000687static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper4d61a3c2016-07-11 06:14:18 +0000688_mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000689{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000690 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
691 (__v16si)_mm512_andnot_epi32(__A, __B),
692 (__v16si)__W);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000693}
694
Michael Kupersteine45af542015-06-30 13:36:19 +0000695static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper4d61a3c2016-07-11 06:14:18 +0000696_mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000697{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000698 return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(),
699 __U, __A, __B);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000700}
701
Michael Kupersteine45af542015-06-30 13:36:19 +0000702static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper4d61a3c2016-07-11 06:14:18 +0000703_mm512_andnot_epi64(__m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000704{
Craig Toppercbf39292018-05-31 01:24:40 +0000705 return (__m512i)(~(__v8du)__A & (__v8du)__B);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000706}
707
Michael Kupersteine45af542015-06-30 13:36:19 +0000708static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper4d61a3c2016-07-11 06:14:18 +0000709_mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000710{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000711 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
712 (__v8di)_mm512_andnot_epi64(__A, __B),
713 (__v8di)__W);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000714}
715
Michael Kupersteine45af542015-06-30 13:36:19 +0000716static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper4d61a3c2016-07-11 06:14:18 +0000717_mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000718{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000719 return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(),
720 __U, __A, __B);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000721}
Craig Topper4d61a3c2016-07-11 06:14:18 +0000722
Michael Kupersteine45af542015-06-30 13:36:19 +0000723static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000724_mm512_or_epi32(__m512i __a, __m512i __b)
725{
Craig Topper6a77b622016-06-04 05:43:41 +0000726 return (__m512i)((__v16su)__a | (__v16su)__b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000727}
728
Michael Kupersteine45af542015-06-30 13:36:19 +0000729static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000730_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
731{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000732 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
733 (__v16si)_mm512_or_epi32(__a, __b),
734 (__v16si)__src);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000735}
Craig Topper4d61a3c2016-07-11 06:14:18 +0000736
Michael Kupersteine45af542015-06-30 13:36:19 +0000737static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000738_mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
739{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000740 return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000741}
742
Michael Kupersteine45af542015-06-30 13:36:19 +0000743static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000744_mm512_or_epi64(__m512i __a, __m512i __b)
745{
Craig Topper6a77b622016-06-04 05:43:41 +0000746 return (__m512i)((__v8du)__a | (__v8du)__b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000747}
748
Michael Kupersteine45af542015-06-30 13:36:19 +0000749static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000750_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
751{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000752 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
753 (__v8di)_mm512_or_epi64(__a, __b),
754 (__v8di)__src);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000755}
Craig Topper4d61a3c2016-07-11 06:14:18 +0000756
Michael Kupersteine45af542015-06-30 13:36:19 +0000757static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000758_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
759{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000760 return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000761}
762
Michael Kupersteine45af542015-06-30 13:36:19 +0000763static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000764_mm512_xor_epi32(__m512i __a, __m512i __b)
765{
Craig Topper6a77b622016-06-04 05:43:41 +0000766 return (__m512i)((__v16su)__a ^ (__v16su)__b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000767}
768
Michael Kupersteine45af542015-06-30 13:36:19 +0000769static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000770_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
771{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000772 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
773 (__v16si)_mm512_xor_epi32(__a, __b),
774 (__v16si)__src);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000775}
Craig Topper4d61a3c2016-07-11 06:14:18 +0000776
Michael Kupersteine45af542015-06-30 13:36:19 +0000777static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000778_mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
779{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000780 return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000781}
782
Michael Kupersteine45af542015-06-30 13:36:19 +0000783static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000784_mm512_xor_epi64(__m512i __a, __m512i __b)
785{
Craig Topper6a77b622016-06-04 05:43:41 +0000786 return (__m512i)((__v8du)__a ^ (__v8du)__b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000787}
788
Michael Kupersteine45af542015-06-30 13:36:19 +0000789static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000790_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
791{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000792 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
793 (__v8di)_mm512_xor_epi64(__a, __b),
794 (__v8di)__src);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000795}
Craig Topper4d61a3c2016-07-11 06:14:18 +0000796
Michael Kupersteine45af542015-06-30 13:36:19 +0000797static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000798_mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
799{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000800 return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000801}
802
Michael Kupersteine45af542015-06-30 13:36:19 +0000803static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000804_mm512_and_si512(__m512i __a, __m512i __b)
805{
Craig Topper6a77b622016-06-04 05:43:41 +0000806 return (__m512i)((__v8du)__a & (__v8du)__b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000807}
808
Michael Kupersteine45af542015-06-30 13:36:19 +0000809static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000810_mm512_or_si512(__m512i __a, __m512i __b)
811{
Craig Topper6a77b622016-06-04 05:43:41 +0000812 return (__m512i)((__v8du)__a | (__v8du)__b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000813}
814
Michael Kupersteine45af542015-06-30 13:36:19 +0000815static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000816_mm512_xor_si512(__m512i __a, __m512i __b)
817{
Craig Topper6a77b622016-06-04 05:43:41 +0000818 return (__m512i)((__v8du)__a ^ (__v8du)__b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000819}
Craig Topper4d61a3c2016-07-11 06:14:18 +0000820
Adam Nemet0d5bb552014-07-28 17:14:40 +0000821/* Arithmetic */
822
Michael Kupersteine45af542015-06-30 13:36:19 +0000823static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemeta3ebe622014-07-28 17:14:42 +0000824_mm512_add_pd(__m512d __a, __m512d __b)
825{
Craig Topper9c6c85f2016-05-16 06:38:36 +0000826 return (__m512d)((__v8df)__a + (__v8df)__b);
Adam Nemeta3ebe622014-07-28 17:14:42 +0000827}
828
Michael Kupersteine45af542015-06-30 13:36:19 +0000829static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemeta3ebe622014-07-28 17:14:42 +0000830_mm512_add_ps(__m512 __a, __m512 __b)
831{
Craig Topper9c6c85f2016-05-16 06:38:36 +0000832 return (__m512)((__v16sf)__a + (__v16sf)__b);
Adam Nemeta3ebe622014-07-28 17:14:42 +0000833}
834
Michael Kupersteine45af542015-06-30 13:36:19 +0000835static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemeta3ebe622014-07-28 17:14:42 +0000836_mm512_mul_pd(__m512d __a, __m512d __b)
837{
Craig Topper9c6c85f2016-05-16 06:38:36 +0000838 return (__m512d)((__v8df)__a * (__v8df)__b);
Adam Nemeta3ebe622014-07-28 17:14:42 +0000839}
840
Michael Kupersteine45af542015-06-30 13:36:19 +0000841static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemeta3ebe622014-07-28 17:14:42 +0000842_mm512_mul_ps(__m512 __a, __m512 __b)
843{
Craig Topper9c6c85f2016-05-16 06:38:36 +0000844 return (__m512)((__v16sf)__a * (__v16sf)__b);
Adam Nemeta3ebe622014-07-28 17:14:42 +0000845}
846
Michael Kupersteine45af542015-06-30 13:36:19 +0000847static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemeta3ebe622014-07-28 17:14:42 +0000848_mm512_sub_pd(__m512d __a, __m512d __b)
849{
Craig Topper9c6c85f2016-05-16 06:38:36 +0000850 return (__m512d)((__v8df)__a - (__v8df)__b);
Adam Nemeta3ebe622014-07-28 17:14:42 +0000851}
852
Michael Kupersteine45af542015-06-30 13:36:19 +0000853static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemeta3ebe622014-07-28 17:14:42 +0000854_mm512_sub_ps(__m512 __a, __m512 __b)
855{
Craig Topper9c6c85f2016-05-16 06:38:36 +0000856 return (__m512)((__v16sf)__a - (__v16sf)__b);
Adam Nemeta3ebe622014-07-28 17:14:42 +0000857}
858
Michael Kupersteine45af542015-06-30 13:36:19 +0000859static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000860_mm512_add_epi64 (__m512i __A, __m512i __B)
861{
Craig Topper6a77b622016-06-04 05:43:41 +0000862 return (__m512i) ((__v8du) __A + (__v8du) __B);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000863}
864
Michael Kupersteine45af542015-06-30 13:36:19 +0000865static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0e189762016-09-03 18:29:35 +0000866_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000867{
Craig Topper0e189762016-09-03 18:29:35 +0000868 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
869 (__v8di)_mm512_add_epi64(__A, __B),
870 (__v8di)__W);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000871}
872
Michael Kupersteine45af542015-06-30 13:36:19 +0000873static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0e189762016-09-03 18:29:35 +0000874_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000875{
Craig Topper0e189762016-09-03 18:29:35 +0000876 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
877 (__v8di)_mm512_add_epi64(__A, __B),
878 (__v8di)_mm512_setzero_si512());
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000879}
880
Michael Kupersteine45af542015-06-30 13:36:19 +0000881static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000882_mm512_sub_epi64 (__m512i __A, __m512i __B)
883{
Craig Topper6a77b622016-06-04 05:43:41 +0000884 return (__m512i) ((__v8du) __A - (__v8du) __B);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000885}
886
Michael Kupersteine45af542015-06-30 13:36:19 +0000887static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0e189762016-09-03 18:29:35 +0000888_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000889{
Craig Topper0e189762016-09-03 18:29:35 +0000890 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
891 (__v8di)_mm512_sub_epi64(__A, __B),
892 (__v8di)__W);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000893}
894
Michael Kupersteine45af542015-06-30 13:36:19 +0000895static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0e189762016-09-03 18:29:35 +0000896_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000897{
Craig Topper0e189762016-09-03 18:29:35 +0000898 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
899 (__v8di)_mm512_sub_epi64(__A, __B),
900 (__v8di)_mm512_setzero_si512());
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000901}
902
Michael Kupersteine45af542015-06-30 13:36:19 +0000903static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000904_mm512_add_epi32 (__m512i __A, __m512i __B)
905{
Craig Topper6a77b622016-06-04 05:43:41 +0000906 return (__m512i) ((__v16su) __A + (__v16su) __B);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000907}
908
Michael Kupersteine45af542015-06-30 13:36:19 +0000909static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0e189762016-09-03 18:29:35 +0000910_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000911{
Craig Topper0e189762016-09-03 18:29:35 +0000912 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
913 (__v16si)_mm512_add_epi32(__A, __B),
914 (__v16si)__W);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000915}
916
Michael Kupersteine45af542015-06-30 13:36:19 +0000917static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000918_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
919{
Craig Topper0e189762016-09-03 18:29:35 +0000920 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
921 (__v16si)_mm512_add_epi32(__A, __B),
922 (__v16si)_mm512_setzero_si512());
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000923}
924
Michael Kupersteine45af542015-06-30 13:36:19 +0000925static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000926_mm512_sub_epi32 (__m512i __A, __m512i __B)
927{
Craig Topper6a77b622016-06-04 05:43:41 +0000928 return (__m512i) ((__v16su) __A - (__v16su) __B);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000929}
930
Michael Kupersteine45af542015-06-30 13:36:19 +0000931static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0e189762016-09-03 18:29:35 +0000932_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000933{
Craig Topper0e189762016-09-03 18:29:35 +0000934 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
935 (__v16si)_mm512_sub_epi32(__A, __B),
936 (__v16si)__W);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000937}
938
Michael Kupersteine45af542015-06-30 13:36:19 +0000939static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0e189762016-09-03 18:29:35 +0000940_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000941{
Craig Topper0e189762016-09-03 18:29:35 +0000942 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
943 (__v16si)_mm512_sub_epi32(__A, __B),
944 (__v16si)_mm512_setzero_si512());
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000945}
946
Craig Topper2da60bc2018-06-21 05:01:01 +0000947#define _mm512_max_round_pd(A, B, R) \
948 (__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \
949 (__v8df)(__m512d)(B), (int)(R))
950
Craig Topperc6338672018-05-31 00:51:20 +0000951#define _mm512_mask_max_round_pd(W, U, A, B, R) \
Craig Topper2da60bc2018-06-21 05:01:01 +0000952 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
953 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
954 (__v8df)(W))
Michael Zuckermane6aa66a2016-06-01 08:34:03 +0000955
Craig Topperc6338672018-05-31 00:51:20 +0000956#define _mm512_maskz_max_round_pd(U, A, B, R) \
Craig Topper2da60bc2018-06-21 05:01:01 +0000957 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
958 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
959 (__v8df)_mm512_setzero_pd())
Michael Zuckermane6aa66a2016-06-01 08:34:03 +0000960
Michael Kupersteine45af542015-06-30 13:36:19 +0000961static __inline__ __m512d __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +0000962_mm512_max_pd(__m512d __A, __m512d __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000963{
Craig Topper2da60bc2018-06-21 05:01:01 +0000964 return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
965 _MM_FROUND_CUR_DIRECTION);
Adam Nemet0d5bb552014-07-28 17:14:40 +0000966}
967
Michael Zuckermanf9be3bb2016-05-09 12:38:49 +0000968static __inline__ __m512d __DEFAULT_FN_ATTRS
969_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
970{
Craig Topper2da60bc2018-06-21 05:01:01 +0000971 return (__m512d)__builtin_ia32_selectpd_512(__U,
972 (__v8df)_mm512_max_pd(__A, __B),
973 (__v8df)__W);
Michael Zuckermanf9be3bb2016-05-09 12:38:49 +0000974}
975
976static __inline__ __m512d __DEFAULT_FN_ATTRS
977_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
978{
Craig Topper2da60bc2018-06-21 05:01:01 +0000979 return (__m512d)__builtin_ia32_selectpd_512(__U,
980 (__v8df)_mm512_max_pd(__A, __B),
981 (__v8df)_mm512_setzero_pd());
Michael Zuckermanf9be3bb2016-05-09 12:38:49 +0000982}
983
Craig Topper2da60bc2018-06-21 05:01:01 +0000984#define _mm512_max_round_ps(A, B, R) \
985 (__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
986 (__v16sf)(__m512)(B), (int)(R))
987
Craig Topperc6338672018-05-31 00:51:20 +0000988#define _mm512_mask_max_round_ps(W, U, A, B, R) \
Craig Topper2da60bc2018-06-21 05:01:01 +0000989 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
990 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
991 (__v16sf)(W))
Michael Zuckermane6aa66a2016-06-01 08:34:03 +0000992
Craig Topperc6338672018-05-31 00:51:20 +0000993#define _mm512_maskz_max_round_ps(U, A, B, R) \
Craig Topper2da60bc2018-06-21 05:01:01 +0000994 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
995 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
996 (__v16sf)_mm512_setzero_ps())
Michael Zuckermane6aa66a2016-06-01 08:34:03 +0000997
Michael Kupersteine45af542015-06-30 13:36:19 +0000998static __inline__ __m512 __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +0000999_mm512_max_ps(__m512 __A, __m512 __B)
1000{
Craig Topper2da60bc2018-06-21 05:01:01 +00001001 return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
1002 _MM_FROUND_CUR_DIRECTION);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001003}
1004
Michael Zuckermanf9be3bb2016-05-09 12:38:49 +00001005static __inline__ __m512 __DEFAULT_FN_ATTRS
1006_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1007{
Craig Topper2da60bc2018-06-21 05:01:01 +00001008 return (__m512)__builtin_ia32_selectps_512(__U,
1009 (__v16sf)_mm512_max_ps(__A, __B),
1010 (__v16sf)__W);
Michael Zuckermanf9be3bb2016-05-09 12:38:49 +00001011}
1012
1013static __inline__ __m512 __DEFAULT_FN_ATTRS
1014_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
1015{
Craig Topper2da60bc2018-06-21 05:01:01 +00001016 return (__m512)__builtin_ia32_selectps_512(__U,
1017 (__v16sf)_mm512_max_ps(__A, __B),
1018 (__v16sf)_mm512_setzero_ps());
Michael Zuckermanf9be3bb2016-05-09 12:38:49 +00001019}
1020
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001021static __inline__ __m128 __DEFAULT_FN_ATTRS
1022_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001023 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001024 (__v4sf) __B,
1025 (__v4sf) __W,
1026 (__mmask8) __U,
1027 _MM_FROUND_CUR_DIRECTION);
1028}
1029
1030static __inline__ __m128 __DEFAULT_FN_ATTRS
1031_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001032 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001033 (__v4sf) __B,
1034 (__v4sf) _mm_setzero_ps (),
1035 (__mmask8) __U,
1036 _MM_FROUND_CUR_DIRECTION);
1037}
1038
Craig Topperc6338672018-05-31 00:51:20 +00001039#define _mm_max_round_ss(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001040 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1041 (__v4sf)(__m128)(B), \
1042 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00001043 (__mmask8)-1, (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001044
Craig Topperc6338672018-05-31 00:51:20 +00001045#define _mm_mask_max_round_ss(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001046 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1047 (__v4sf)(__m128)(B), \
1048 (__v4sf)(__m128)(W), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00001049 (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001050
Craig Topperc6338672018-05-31 00:51:20 +00001051#define _mm_maskz_max_round_ss(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001052 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1053 (__v4sf)(__m128)(B), \
1054 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00001055 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001056
1057static __inline__ __m128d __DEFAULT_FN_ATTRS
1058_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001059 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001060 (__v2df) __B,
1061 (__v2df) __W,
1062 (__mmask8) __U,
1063 _MM_FROUND_CUR_DIRECTION);
1064}
1065
1066static __inline__ __m128d __DEFAULT_FN_ATTRS
1067_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001068 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001069 (__v2df) __B,
1070 (__v2df) _mm_setzero_pd (),
1071 (__mmask8) __U,
1072 _MM_FROUND_CUR_DIRECTION);
1073}
1074
Craig Topperc6338672018-05-31 00:51:20 +00001075#define _mm_max_round_sd(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001076 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1077 (__v2df)(__m128d)(B), \
1078 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00001079 (__mmask8)-1, (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001080
Craig Topperc6338672018-05-31 00:51:20 +00001081#define _mm_mask_max_round_sd(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001082 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1083 (__v2df)(__m128d)(B), \
1084 (__v2df)(__m128d)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00001085 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001086
Craig Topperc6338672018-05-31 00:51:20 +00001087#define _mm_maskz_max_round_sd(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001088 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1089 (__v2df)(__m128d)(B), \
1090 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00001091 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001092
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00001093static __inline __m512i
Michael Kupersteine45af542015-06-30 13:36:19 +00001094__DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001095_mm512_max_epi32(__m512i __A, __m512i __B)
1096{
Craig Topperf2043b02018-05-23 04:51:54 +00001097 return (__m512i)__builtin_ia32_pmaxsd512((__v16si)__A, (__v16si)__B);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001098}
1099
Michael Zuckermande860e52016-05-10 11:34:19 +00001100static __inline__ __m512i __DEFAULT_FN_ATTRS
1101_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1102{
Craig Topperf2043b02018-05-23 04:51:54 +00001103 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1104 (__v16si)_mm512_max_epi32(__A, __B),
1105 (__v16si)__W);
Michael Zuckermande860e52016-05-10 11:34:19 +00001106}
1107
1108static __inline__ __m512i __DEFAULT_FN_ATTRS
1109_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1110{
Craig Topperf2043b02018-05-23 04:51:54 +00001111 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1112 (__v16si)_mm512_max_epi32(__A, __B),
1113 (__v16si)_mm512_setzero_si512());
Michael Zuckermande860e52016-05-10 11:34:19 +00001114}
1115
Michael Kupersteine45af542015-06-30 13:36:19 +00001116static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001117_mm512_max_epu32(__m512i __A, __m512i __B)
1118{
Craig Topperf2043b02018-05-23 04:51:54 +00001119 return (__m512i)__builtin_ia32_pmaxud512((__v16si)__A, (__v16si)__B);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001120}
1121
Michael Zuckermande860e52016-05-10 11:34:19 +00001122static __inline__ __m512i __DEFAULT_FN_ATTRS
1123_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1124{
Craig Topperf2043b02018-05-23 04:51:54 +00001125 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1126 (__v16si)_mm512_max_epu32(__A, __B),
1127 (__v16si)__W);
Michael Zuckermande860e52016-05-10 11:34:19 +00001128}
1129
1130static __inline__ __m512i __DEFAULT_FN_ATTRS
1131_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1132{
Craig Topperf2043b02018-05-23 04:51:54 +00001133 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1134 (__v16si)_mm512_max_epu32(__A, __B),
1135 (__v16si)_mm512_setzero_si512());
Michael Zuckermande860e52016-05-10 11:34:19 +00001136}
1137
Michael Kupersteine45af542015-06-30 13:36:19 +00001138static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001139_mm512_max_epi64(__m512i __A, __m512i __B)
1140{
Craig Topperf2043b02018-05-23 04:51:54 +00001141 return (__m512i)__builtin_ia32_pmaxsq512((__v8di)__A, (__v8di)__B);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001142}
1143
Michael Zuckermande860e52016-05-10 11:34:19 +00001144static __inline__ __m512i __DEFAULT_FN_ATTRS
1145_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1146{
Craig Topperf2043b02018-05-23 04:51:54 +00001147 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1148 (__v8di)_mm512_max_epi64(__A, __B),
1149 (__v8di)__W);
Michael Zuckermande860e52016-05-10 11:34:19 +00001150}
1151
1152static __inline__ __m512i __DEFAULT_FN_ATTRS
1153_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1154{
Craig Topperf2043b02018-05-23 04:51:54 +00001155 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1156 (__v8di)_mm512_max_epi64(__A, __B),
1157 (__v8di)_mm512_setzero_si512());
Michael Zuckermande860e52016-05-10 11:34:19 +00001158}
1159
Michael Kupersteine45af542015-06-30 13:36:19 +00001160static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001161_mm512_max_epu64(__m512i __A, __m512i __B)
1162{
Craig Topperf2043b02018-05-23 04:51:54 +00001163 return (__m512i)__builtin_ia32_pmaxuq512((__v8di)__A, (__v8di)__B);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001164}
1165
Michael Zuckermande860e52016-05-10 11:34:19 +00001166static __inline__ __m512i __DEFAULT_FN_ATTRS
1167_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1168{
Craig Topperf2043b02018-05-23 04:51:54 +00001169 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1170 (__v8di)_mm512_max_epu64(__A, __B),
1171 (__v8di)__W);
Michael Zuckermande860e52016-05-10 11:34:19 +00001172}
1173
1174static __inline__ __m512i __DEFAULT_FN_ATTRS
1175_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1176{
Craig Topperf2043b02018-05-23 04:51:54 +00001177 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1178 (__v8di)_mm512_max_epu64(__A, __B),
1179 (__v8di)_mm512_setzero_si512());
Michael Zuckermande860e52016-05-10 11:34:19 +00001180}
1181
Craig Topper2da60bc2018-06-21 05:01:01 +00001182#define _mm512_min_round_pd(A, B, R) \
1183 (__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \
1184 (__v8df)(__m512d)(B), (int)(R))
1185
Craig Topperc6338672018-05-31 00:51:20 +00001186#define _mm512_mask_min_round_pd(W, U, A, B, R) \
Craig Topper2da60bc2018-06-21 05:01:01 +00001187 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1188 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1189 (__v8df)(W))
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001190
Craig Topperc6338672018-05-31 00:51:20 +00001191#define _mm512_maskz_min_round_pd(U, A, B, R) \
Craig Topper2da60bc2018-06-21 05:01:01 +00001192 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1193 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1194 (__v8df)_mm512_setzero_pd())
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001195
Michael Kupersteine45af542015-06-30 13:36:19 +00001196static __inline__ __m512d __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001197_mm512_min_pd(__m512d __A, __m512d __B)
1198{
Craig Topper2da60bc2018-06-21 05:01:01 +00001199 return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
1200 _MM_FROUND_CUR_DIRECTION);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001201}
1202
Michael Zuckermanf9be3bb2016-05-09 12:38:49 +00001203static __inline__ __m512d __DEFAULT_FN_ATTRS
1204_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
1205{
Craig Topper2da60bc2018-06-21 05:01:01 +00001206 return (__m512d)__builtin_ia32_selectpd_512(__U,
1207 (__v8df)_mm512_min_pd(__A, __B),
1208 (__v8df)__W);
Michael Zuckermanf9be3bb2016-05-09 12:38:49 +00001209}
1210
1211static __inline__ __m512d __DEFAULT_FN_ATTRS
1212_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
1213{
Craig Topper2da60bc2018-06-21 05:01:01 +00001214 return (__m512d)__builtin_ia32_selectpd_512(__U,
1215 (__v8df)_mm512_min_pd(__A, __B),
1216 (__v8df)_mm512_setzero_pd());
Michael Zuckermanf9be3bb2016-05-09 12:38:49 +00001217}
1218
Craig Topper2da60bc2018-06-21 05:01:01 +00001219#define _mm512_min_round_ps(A, B, R) \
1220 (__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
1221 (__v16sf)(__m512)(B), (int)(R))
1222
1223#define _mm512_mask_min_round_ps(W, U, A, B, R) \
1224 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1225 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1226 (__v16sf)(W))
1227
1228#define _mm512_maskz_min_round_ps(U, A, B, R) \
1229 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1230 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1231 (__v16sf)_mm512_setzero_ps())
1232
Michael Kupersteine45af542015-06-30 13:36:19 +00001233static __inline__ __m512 __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001234_mm512_min_ps(__m512 __A, __m512 __B)
1235{
Craig Topper2da60bc2018-06-21 05:01:01 +00001236 return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
1237 _MM_FROUND_CUR_DIRECTION);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001238}
1239
Michael Zuckermanf9be3bb2016-05-09 12:38:49 +00001240static __inline__ __m512 __DEFAULT_FN_ATTRS
1241_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1242{
Craig Topper2da60bc2018-06-21 05:01:01 +00001243 return (__m512)__builtin_ia32_selectps_512(__U,
1244 (__v16sf)_mm512_min_ps(__A, __B),
1245 (__v16sf)__W);
Michael Zuckermanf9be3bb2016-05-09 12:38:49 +00001246}
1247
1248static __inline__ __m512 __DEFAULT_FN_ATTRS
1249_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
1250{
Craig Topper2da60bc2018-06-21 05:01:01 +00001251 return (__m512)__builtin_ia32_selectps_512(__U,
1252 (__v16sf)_mm512_min_ps(__A, __B),
1253 (__v16sf)_mm512_setzero_ps());
Michael Zuckermanf9be3bb2016-05-09 12:38:49 +00001254}
1255
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001256static __inline__ __m128 __DEFAULT_FN_ATTRS
1257_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001258 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001259 (__v4sf) __B,
1260 (__v4sf) __W,
1261 (__mmask8) __U,
1262 _MM_FROUND_CUR_DIRECTION);
1263}
1264
1265static __inline__ __m128 __DEFAULT_FN_ATTRS
1266_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001267 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001268 (__v4sf) __B,
1269 (__v4sf) _mm_setzero_ps (),
1270 (__mmask8) __U,
1271 _MM_FROUND_CUR_DIRECTION);
1272}
1273
Craig Topperc6338672018-05-31 00:51:20 +00001274#define _mm_min_round_ss(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001275 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1276 (__v4sf)(__m128)(B), \
1277 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00001278 (__mmask8)-1, (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001279
Craig Topperc6338672018-05-31 00:51:20 +00001280#define _mm_mask_min_round_ss(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001281 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1282 (__v4sf)(__m128)(B), \
1283 (__v4sf)(__m128)(W), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00001284 (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001285
Craig Topperc6338672018-05-31 00:51:20 +00001286#define _mm_maskz_min_round_ss(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001287 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1288 (__v4sf)(__m128)(B), \
1289 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00001290 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001291
1292static __inline__ __m128d __DEFAULT_FN_ATTRS
1293_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001294 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001295 (__v2df) __B,
1296 (__v2df) __W,
1297 (__mmask8) __U,
1298 _MM_FROUND_CUR_DIRECTION);
1299}
1300
1301static __inline__ __m128d __DEFAULT_FN_ATTRS
1302_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001303 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001304 (__v2df) __B,
1305 (__v2df) _mm_setzero_pd (),
1306 (__mmask8) __U,
1307 _MM_FROUND_CUR_DIRECTION);
1308}
1309
Craig Topperc6338672018-05-31 00:51:20 +00001310#define _mm_min_round_sd(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001311 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1312 (__v2df)(__m128d)(B), \
1313 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00001314 (__mmask8)-1, (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001315
Craig Topperc6338672018-05-31 00:51:20 +00001316#define _mm_mask_min_round_sd(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001317 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1318 (__v2df)(__m128d)(B), \
1319 (__v2df)(__m128d)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00001320 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001321
Craig Topperc6338672018-05-31 00:51:20 +00001322#define _mm_maskz_min_round_sd(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001323 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1324 (__v2df)(__m128d)(B), \
1325 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00001326 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001327
Adam Nemet0d5bb552014-07-28 17:14:40 +00001328static __inline __m512i
Michael Kupersteine45af542015-06-30 13:36:19 +00001329__DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001330_mm512_min_epi32(__m512i __A, __m512i __B)
1331{
Craig Topperf2043b02018-05-23 04:51:54 +00001332 return (__m512i)__builtin_ia32_pminsd512((__v16si)__A, (__v16si)__B);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001333}
1334
Michael Zuckermande860e52016-05-10 11:34:19 +00001335static __inline__ __m512i __DEFAULT_FN_ATTRS
1336_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1337{
Craig Topperf2043b02018-05-23 04:51:54 +00001338 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1339 (__v16si)_mm512_min_epi32(__A, __B),
1340 (__v16si)__W);
Michael Zuckermande860e52016-05-10 11:34:19 +00001341}
1342
1343static __inline__ __m512i __DEFAULT_FN_ATTRS
1344_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1345{
Craig Topperf2043b02018-05-23 04:51:54 +00001346 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1347 (__v16si)_mm512_min_epi32(__A, __B),
1348 (__v16si)_mm512_setzero_si512());
Michael Zuckermande860e52016-05-10 11:34:19 +00001349}
1350
Michael Kupersteine45af542015-06-30 13:36:19 +00001351static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001352_mm512_min_epu32(__m512i __A, __m512i __B)
1353{
Craig Topperf2043b02018-05-23 04:51:54 +00001354 return (__m512i)__builtin_ia32_pminud512((__v16si)__A, (__v16si)__B);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001355}
1356
Michael Zuckermande860e52016-05-10 11:34:19 +00001357static __inline__ __m512i __DEFAULT_FN_ATTRS
1358_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1359{
Craig Topperf2043b02018-05-23 04:51:54 +00001360 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1361 (__v16si)_mm512_min_epu32(__A, __B),
1362 (__v16si)__W);
Michael Zuckermande860e52016-05-10 11:34:19 +00001363}
1364
1365static __inline__ __m512i __DEFAULT_FN_ATTRS
1366_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1367{
Craig Topperf2043b02018-05-23 04:51:54 +00001368 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1369 (__v16si)_mm512_min_epu32(__A, __B),
1370 (__v16si)_mm512_setzero_si512());
Michael Zuckermande860e52016-05-10 11:34:19 +00001371}
1372
Michael Kupersteine45af542015-06-30 13:36:19 +00001373static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001374_mm512_min_epi64(__m512i __A, __m512i __B)
1375{
Craig Topperf2043b02018-05-23 04:51:54 +00001376 return (__m512i)__builtin_ia32_pminsq512((__v8di)__A, (__v8di)__B);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001377}
1378
Michael Zuckermande860e52016-05-10 11:34:19 +00001379static __inline__ __m512i __DEFAULT_FN_ATTRS
1380_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1381{
Craig Topperf2043b02018-05-23 04:51:54 +00001382 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1383 (__v8di)_mm512_min_epi64(__A, __B),
1384 (__v8di)__W);
Michael Zuckermande860e52016-05-10 11:34:19 +00001385}
1386
1387static __inline__ __m512i __DEFAULT_FN_ATTRS
1388_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1389{
Craig Topperf2043b02018-05-23 04:51:54 +00001390 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1391 (__v8di)_mm512_min_epi64(__A, __B),
1392 (__v8di)_mm512_setzero_si512());
Michael Zuckermande860e52016-05-10 11:34:19 +00001393}
1394
Michael Kupersteine45af542015-06-30 13:36:19 +00001395static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001396_mm512_min_epu64(__m512i __A, __m512i __B)
1397{
Craig Topperf2043b02018-05-23 04:51:54 +00001398 return (__m512i)__builtin_ia32_pminuq512((__v8di)__A, (__v8di)__B);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001399}
1400
Michael Zuckermande860e52016-05-10 11:34:19 +00001401static __inline__ __m512i __DEFAULT_FN_ATTRS
1402_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1403{
Craig Topperf2043b02018-05-23 04:51:54 +00001404 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1405 (__v8di)_mm512_min_epu64(__A, __B),
1406 (__v8di)__W);
Michael Zuckermande860e52016-05-10 11:34:19 +00001407}
1408
1409static __inline__ __m512i __DEFAULT_FN_ATTRS
1410_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1411{
Craig Topperf2043b02018-05-23 04:51:54 +00001412 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1413 (__v8di)_mm512_min_epu64(__A, __B),
1414 (__v8di)_mm512_setzero_si512());
Michael Zuckermande860e52016-05-10 11:34:19 +00001415}
1416
Michael Kupersteine45af542015-06-30 13:36:19 +00001417static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001418_mm512_mul_epi32(__m512i __X, __m512i __Y)
1419{
Craig Topper70536f42016-12-27 04:04:57 +00001420 return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001421}
1422
Michael Kupersteine45af542015-06-30 13:36:19 +00001423static __inline __m512i __DEFAULT_FN_ATTRS
Craig Topper70536f42016-12-27 04:04:57 +00001424_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001425{
Craig Topper70536f42016-12-27 04:04:57 +00001426 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1427 (__v8di)_mm512_mul_epi32(__X, __Y),
1428 (__v8di)__W);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001429}
1430
Michael Kupersteine45af542015-06-30 13:36:19 +00001431static __inline __m512i __DEFAULT_FN_ATTRS
Craig Topper70536f42016-12-27 04:04:57 +00001432_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001433{
Craig Topper70536f42016-12-27 04:04:57 +00001434 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1435 (__v8di)_mm512_mul_epi32(__X, __Y),
1436 (__v8di)_mm512_setzero_si512 ());
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001437}
1438
Michael Kupersteine45af542015-06-30 13:36:19 +00001439static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001440_mm512_mul_epu32(__m512i __X, __m512i __Y)
1441{
Craig Topper70536f42016-12-27 04:04:57 +00001442 return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001443}
1444
Michael Kupersteine45af542015-06-30 13:36:19 +00001445static __inline __m512i __DEFAULT_FN_ATTRS
Craig Topper70536f42016-12-27 04:04:57 +00001446_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001447{
Craig Topper70536f42016-12-27 04:04:57 +00001448 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1449 (__v8di)_mm512_mul_epu32(__X, __Y),
1450 (__v8di)__W);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001451}
1452
Michael Kupersteine45af542015-06-30 13:36:19 +00001453static __inline __m512i __DEFAULT_FN_ATTRS
Craig Topper70536f42016-12-27 04:04:57 +00001454_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001455{
Craig Topper70536f42016-12-27 04:04:57 +00001456 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1457 (__v8di)_mm512_mul_epu32(__X, __Y),
1458 (__v8di)_mm512_setzero_si512 ());
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001459}
1460
Michael Kupersteine45af542015-06-30 13:36:19 +00001461static __inline __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001462_mm512_mullo_epi32 (__m512i __A, __m512i __B)
1463{
Craig Topper6a77b622016-06-04 05:43:41 +00001464 return (__m512i) ((__v16su) __A * (__v16su) __B);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001465}
1466
Michael Kupersteine45af542015-06-30 13:36:19 +00001467static __inline __m512i __DEFAULT_FN_ATTRS
Craig Topperf43e4a12016-09-03 19:19:49 +00001468_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001469{
Craig Topperf43e4a12016-09-03 19:19:49 +00001470 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1471 (__v16si)_mm512_mullo_epi32(__A, __B),
1472 (__v16si)_mm512_setzero_si512());
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001473}
1474
Michael Kupersteine45af542015-06-30 13:36:19 +00001475static __inline __m512i __DEFAULT_FN_ATTRS
Craig Topperf43e4a12016-09-03 19:19:49 +00001476_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001477{
Craig Topperf43e4a12016-09-03 19:19:49 +00001478 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1479 (__v16si)_mm512_mullo_epi32(__A, __B),
1480 (__v16si)__W);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001481}
1482
Craig Toppere95bde32018-04-26 05:38:39 +00001483static __inline__ __m512i __DEFAULT_FN_ATTRS
1484_mm512_mullox_epi64 (__m512i __A, __m512i __B) {
1485 return (__m512i) ((__v8du) __A * (__v8du) __B);
1486}
1487
1488static __inline__ __m512i __DEFAULT_FN_ATTRS
1489_mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
1490 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1491 (__v8di)_mm512_mullox_epi64(__A, __B),
1492 (__v8di)__W);
1493}
1494
Craig Topper8bf793f2018-06-29 05:43:33 +00001495#define _mm512_sqrt_round_pd(A, R) \
1496 (__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R))
1497
Craig Topperc6338672018-05-31 00:51:20 +00001498#define _mm512_mask_sqrt_round_pd(W, U, A, R) \
Craig Topper8bf793f2018-06-29 05:43:33 +00001499 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1500 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1501 (__v8df)(__m512d)(W))
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001502
Craig Topperc6338672018-05-31 00:51:20 +00001503#define _mm512_maskz_sqrt_round_pd(U, A, R) \
Craig Topper8bf793f2018-06-29 05:43:33 +00001504 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1505 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1506 (__v8df)_mm512_setzero_pd())
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001507
Michael Kupersteine45af542015-06-30 13:36:19 +00001508static __inline__ __m512d __DEFAULT_FN_ATTRS
Craig Topper8bf793f2018-06-29 05:43:33 +00001509_mm512_sqrt_pd(__m512d __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001510{
Craig Topper8bf793f2018-06-29 05:43:33 +00001511 return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A,
1512 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001513}
1514
Michael Zuckermancb856772016-05-16 11:42:01 +00001515static __inline__ __m512d __DEFAULT_FN_ATTRS
1516_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
1517{
Craig Topper8bf793f2018-06-29 05:43:33 +00001518 return (__m512d)__builtin_ia32_selectpd_512(__U,
1519 (__v8df)_mm512_sqrt_pd(__A),
1520 (__v8df)__W);
Michael Zuckermancb856772016-05-16 11:42:01 +00001521}
1522
1523static __inline__ __m512d __DEFAULT_FN_ATTRS
1524_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
1525{
Craig Topper8bf793f2018-06-29 05:43:33 +00001526 return (__m512d)__builtin_ia32_selectpd_512(__U,
1527 (__v8df)_mm512_sqrt_pd(__A),
1528 (__v8df)_mm512_setzero_pd());
Michael Zuckermancb856772016-05-16 11:42:01 +00001529}
1530
Craig Topper8bf793f2018-06-29 05:43:33 +00001531#define _mm512_sqrt_round_ps(A, R) \
1532 (__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R))
1533
Craig Topperc6338672018-05-31 00:51:20 +00001534#define _mm512_mask_sqrt_round_ps(W, U, A, R) \
Craig Topper8bf793f2018-06-29 05:43:33 +00001535 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1536 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1537 (__v16sf)(__m512)(W))
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001538
Craig Topperc6338672018-05-31 00:51:20 +00001539#define _mm512_maskz_sqrt_round_ps(U, A, R) \
Craig Topper8bf793f2018-06-29 05:43:33 +00001540 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1541 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1542 (__v16sf)_mm512_setzero_ps())
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001543
Michael Kupersteine45af542015-06-30 13:36:19 +00001544static __inline__ __m512 __DEFAULT_FN_ATTRS
Craig Topper8bf793f2018-06-29 05:43:33 +00001545_mm512_sqrt_ps(__m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001546{
Craig Topper8bf793f2018-06-29 05:43:33 +00001547 return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A,
1548 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001549}
1550
Michael Zuckermancb856772016-05-16 11:42:01 +00001551static __inline__ __m512 __DEFAULT_FN_ATTRS
Asaf Badouh880f0c22016-06-13 15:15:57 +00001552_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
Michael Zuckermancb856772016-05-16 11:42:01 +00001553{
Craig Topper8bf793f2018-06-29 05:43:33 +00001554 return (__m512)__builtin_ia32_selectps_512(__U,
1555 (__v16sf)_mm512_sqrt_ps(__A),
1556 (__v16sf)__W);
Michael Zuckermancb856772016-05-16 11:42:01 +00001557}
1558
1559static __inline__ __m512 __DEFAULT_FN_ATTRS
Asaf Badouh880f0c22016-06-13 15:15:57 +00001560_mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
Michael Zuckermancb856772016-05-16 11:42:01 +00001561{
Craig Topper8bf793f2018-06-29 05:43:33 +00001562 return (__m512)__builtin_ia32_selectps_512(__U,
1563 (__v16sf)_mm512_sqrt_ps(__A),
1564 (__v16sf)_mm512_setzero_ps());
Michael Zuckermancb856772016-05-16 11:42:01 +00001565}
1566
Michael Kupersteine45af542015-06-30 13:36:19 +00001567static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001568_mm512_rsqrt14_pd(__m512d __A)
1569{
1570 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1571 (__v8df)
1572 _mm512_setzero_pd (),
1573 (__mmask8) -1);}
1574
Michael Zuckermancb856772016-05-16 11:42:01 +00001575static __inline__ __m512d __DEFAULT_FN_ATTRS
1576_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1577{
1578 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1579 (__v8df) __W,
1580 (__mmask8) __U);
1581}
1582
1583static __inline__ __m512d __DEFAULT_FN_ATTRS
1584_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1585{
1586 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1587 (__v8df)
1588 _mm512_setzero_pd (),
1589 (__mmask8) __U);
1590}
1591
Michael Kupersteine45af542015-06-30 13:36:19 +00001592static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001593_mm512_rsqrt14_ps(__m512 __A)
1594{
1595 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1596 (__v16sf)
1597 _mm512_setzero_ps (),
1598 (__mmask16) -1);
1599}
1600
Michael Zuckermancb856772016-05-16 11:42:01 +00001601static __inline__ __m512 __DEFAULT_FN_ATTRS
1602_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1603{
1604 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1605 (__v16sf) __W,
1606 (__mmask16) __U);
1607}
1608
1609static __inline__ __m512 __DEFAULT_FN_ATTRS
1610_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1611{
1612 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1613 (__v16sf)
1614 _mm512_setzero_ps (),
1615 (__mmask16) __U);
1616}
1617
Michael Kupersteine45af542015-06-30 13:36:19 +00001618static __inline__ __m128 __DEFAULT_FN_ATTRS
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001619_mm_rsqrt14_ss(__m128 __A, __m128 __B)
1620{
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001621 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001622 (__v4sf) __B,
1623 (__v4sf)
1624 _mm_setzero_ps (),
1625 (__mmask8) -1);
1626}
1627
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001628static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00001629_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001630{
1631 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1632 (__v4sf) __B,
1633 (__v4sf) __W,
1634 (__mmask8) __U);
1635}
1636
1637static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00001638_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001639{
1640 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1641 (__v4sf) __B,
1642 (__v4sf) _mm_setzero_ps (),
1643 (__mmask8) __U);
1644}
1645
Michael Kupersteine45af542015-06-30 13:36:19 +00001646static __inline__ __m128d __DEFAULT_FN_ATTRS
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001647_mm_rsqrt14_sd(__m128d __A, __m128d __B)
1648{
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001649 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001650 (__v2df) __B,
1651 (__v2df)
1652 _mm_setzero_pd (),
1653 (__mmask8) -1);
1654}
1655
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001656static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00001657_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001658{
1659 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1660 (__v2df) __B,
1661 (__v2df) __W,
1662 (__mmask8) __U);
1663}
1664
1665static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00001666_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001667{
1668 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1669 (__v2df) __B,
1670 (__v2df) _mm_setzero_pd (),
1671 (__mmask8) __U);
1672}
1673
Michael Kupersteine45af542015-06-30 13:36:19 +00001674static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001675_mm512_rcp14_pd(__m512d __A)
1676{
1677 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1678 (__v8df)
1679 _mm512_setzero_pd (),
1680 (__mmask8) -1);
1681}
1682
Michael Zuckermancb856772016-05-16 11:42:01 +00001683static __inline__ __m512d __DEFAULT_FN_ATTRS
1684_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1685{
1686 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1687 (__v8df) __W,
1688 (__mmask8) __U);
1689}
1690
1691static __inline__ __m512d __DEFAULT_FN_ATTRS
1692_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1693{
1694 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1695 (__v8df)
1696 _mm512_setzero_pd (),
1697 (__mmask8) __U);
1698}
1699
Michael Kupersteine45af542015-06-30 13:36:19 +00001700static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001701_mm512_rcp14_ps(__m512 __A)
1702{
1703 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1704 (__v16sf)
1705 _mm512_setzero_ps (),
1706 (__mmask16) -1);
1707}
Michael Zuckermancb856772016-05-16 11:42:01 +00001708
1709static __inline__ __m512 __DEFAULT_FN_ATTRS
1710_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1711{
1712 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1713 (__v16sf) __W,
1714 (__mmask16) __U);
1715}
1716
1717static __inline__ __m512 __DEFAULT_FN_ATTRS
1718_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1719{
1720 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1721 (__v16sf)
1722 _mm512_setzero_ps (),
1723 (__mmask16) __U);
1724}
1725
Michael Kupersteine45af542015-06-30 13:36:19 +00001726static __inline__ __m128 __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00001727_mm_rcp14_ss(__m128 __A, __m128 __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001728{
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001729 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001730 (__v4sf) __B,
1731 (__v4sf)
1732 _mm_setzero_ps (),
1733 (__mmask8) -1);
1734}
1735
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001736static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00001737_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001738{
1739 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1740 (__v4sf) __B,
1741 (__v4sf) __W,
1742 (__mmask8) __U);
1743}
1744
1745static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00001746_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001747{
1748 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1749 (__v4sf) __B,
1750 (__v4sf) _mm_setzero_ps (),
1751 (__mmask8) __U);
1752}
1753
Michael Kupersteine45af542015-06-30 13:36:19 +00001754static __inline__ __m128d __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00001755_mm_rcp14_sd(__m128d __A, __m128d __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001756{
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001757 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001758 (__v2df) __B,
1759 (__v2df)
1760 _mm_setzero_pd (),
1761 (__mmask8) -1);
1762}
1763
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001764static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00001765_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001766{
1767 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1768 (__v2df) __B,
1769 (__v2df) __W,
1770 (__mmask8) __U);
1771}
1772
1773static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00001774_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001775{
1776 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1777 (__v2df) __B,
1778 (__v2df) _mm_setzero_pd (),
1779 (__mmask8) __U);
1780}
1781
Michael Kupersteine45af542015-06-30 13:36:19 +00001782static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001783_mm512_floor_ps(__m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001784{
Adam Nemet0d5bb552014-07-28 17:14:40 +00001785 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1786 _MM_FROUND_FLOOR,
1787 (__v16sf) __A, -1,
1788 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001789}
1790
Michael Zuckerman7360d8a2016-05-10 07:30:58 +00001791static __inline__ __m512 __DEFAULT_FN_ATTRS
1792_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1793{
1794 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1795 _MM_FROUND_FLOOR,
1796 (__v16sf) __W, __U,
1797 _MM_FROUND_CUR_DIRECTION);
1798}
1799
Michael Kupersteine45af542015-06-30 13:36:19 +00001800static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001801_mm512_floor_pd(__m512d __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001802{
Adam Nemet0d5bb552014-07-28 17:14:40 +00001803 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1804 _MM_FROUND_FLOOR,
1805 (__v8df) __A, -1,
1806 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001807}
1808
Michael Zuckerman7360d8a2016-05-10 07:30:58 +00001809static __inline__ __m512d __DEFAULT_FN_ATTRS
1810_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1811{
1812 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1813 _MM_FROUND_FLOOR,
1814 (__v8df) __W, __U,
1815 _MM_FROUND_CUR_DIRECTION);
1816}
1817
Michael Zuckerman7360d8a2016-05-10 07:30:58 +00001818static __inline__ __m512 __DEFAULT_FN_ATTRS
1819_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1820{
1821 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1822 _MM_FROUND_CEIL,
1823 (__v16sf) __W, __U,
1824 _MM_FROUND_CUR_DIRECTION);
1825}
1826
Michael Kupersteine45af542015-06-30 13:36:19 +00001827static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001828_mm512_ceil_ps(__m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001829{
Adam Nemet0d5bb552014-07-28 17:14:40 +00001830 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1831 _MM_FROUND_CEIL,
1832 (__v16sf) __A, -1,
1833 _MM_FROUND_CUR_DIRECTION);
1834}
1835
Michael Kupersteine45af542015-06-30 13:36:19 +00001836static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001837_mm512_ceil_pd(__m512d __A)
1838{
1839 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1840 _MM_FROUND_CEIL,
1841 (__v8df) __A, -1,
1842 _MM_FROUND_CUR_DIRECTION);
1843}
1844
Michael Zuckerman7360d8a2016-05-10 07:30:58 +00001845static __inline__ __m512d __DEFAULT_FN_ATTRS
1846_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1847{
1848 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1849 _MM_FROUND_CEIL,
1850 (__v8df) __W, __U,
1851 _MM_FROUND_CUR_DIRECTION);
1852}
1853
Michael Kupersteine45af542015-06-30 13:36:19 +00001854static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001855_mm512_abs_epi64(__m512i __A)
1856{
Craig Topperf2043b02018-05-23 04:51:54 +00001857 return (__m512i)__builtin_ia32_pabsq512((__v8di)__A);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001858}
1859
Michael Zuckermanbf05a452016-05-16 18:57:24 +00001860static __inline__ __m512i __DEFAULT_FN_ATTRS
1861_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
1862{
Craig Topperf2043b02018-05-23 04:51:54 +00001863 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1864 (__v8di)_mm512_abs_epi64(__A),
1865 (__v8di)__W);
Michael Zuckermanbf05a452016-05-16 18:57:24 +00001866}
1867
1868static __inline__ __m512i __DEFAULT_FN_ATTRS
1869_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
1870{
Craig Topperf2043b02018-05-23 04:51:54 +00001871 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1872 (__v8di)_mm512_abs_epi64(__A),
1873 (__v8di)_mm512_setzero_si512());
Michael Zuckermanbf05a452016-05-16 18:57:24 +00001874}
1875
Michael Kupersteine45af542015-06-30 13:36:19 +00001876static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001877_mm512_abs_epi32(__m512i __A)
1878{
Craig Topperf2043b02018-05-23 04:51:54 +00001879 return (__m512i)__builtin_ia32_pabsd512((__v16si) __A);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001880}
1881
Michael Zuckermanbf05a452016-05-16 18:57:24 +00001882static __inline__ __m512i __DEFAULT_FN_ATTRS
1883_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
1884{
Craig Topper26df8c42018-05-24 17:32:49 +00001885 return (__m512i)__builtin_ia32_selectd_512(__U,
Craig Topperf2043b02018-05-23 04:51:54 +00001886 (__v16si)_mm512_abs_epi32(__A),
1887 (__v16si)__W);
Michael Zuckermanbf05a452016-05-16 18:57:24 +00001888}
1889
1890static __inline__ __m512i __DEFAULT_FN_ATTRS
1891_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
1892{
Craig Topper26df8c42018-05-24 17:32:49 +00001893 return (__m512i)__builtin_ia32_selectd_512(__U,
Craig Topperf2043b02018-05-23 04:51:54 +00001894 (__v16si)_mm512_abs_epi32(__A),
1895 (__v16si)_mm512_setzero_si512());
Michael Zuckermanbf05a452016-05-16 18:57:24 +00001896}
1897
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001898static __inline__ __m128 __DEFAULT_FN_ATTRS
1899_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
Tomasz Krupa82aa42a2018-06-14 17:36:23 +00001900 __A = _mm_add_ss(__A, __B);
1901 __A[0] = (__U & 1) ? __A[0] : __W[0];
1902 return __A;
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001903}
1904
1905static __inline__ __m128 __DEFAULT_FN_ATTRS
1906_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
Tomasz Krupa82aa42a2018-06-14 17:36:23 +00001907 __A = _mm_add_ss(__A, __B);
1908 __A[0] = (__U & 1) ? __A[0] : 0;
1909 return __A;
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001910}
1911
Craig Topperc6338672018-05-31 00:51:20 +00001912#define _mm_add_round_ss(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001913 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1914 (__v4sf)(__m128)(B), \
1915 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00001916 (__mmask8)-1, (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001917
Craig Topperc6338672018-05-31 00:51:20 +00001918#define _mm_mask_add_round_ss(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001919 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1920 (__v4sf)(__m128)(B), \
1921 (__v4sf)(__m128)(W), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00001922 (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001923
Craig Topperc6338672018-05-31 00:51:20 +00001924#define _mm_maskz_add_round_ss(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001925 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1926 (__v4sf)(__m128)(B), \
1927 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00001928 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001929
1930static __inline__ __m128d __DEFAULT_FN_ATTRS
1931_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
Tomasz Krupa82aa42a2018-06-14 17:36:23 +00001932 __A = _mm_add_sd(__A, __B);
1933 __A[0] = (__U & 1) ? __A[0] : __W[0];
1934 return __A;
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001935}
1936
1937static __inline__ __m128d __DEFAULT_FN_ATTRS
1938_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
Tomasz Krupa82aa42a2018-06-14 17:36:23 +00001939 __A = _mm_add_sd(__A, __B);
1940 __A[0] = (__U & 1) ? __A[0] : 0;
1941 return __A;
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001942}
Craig Topperc6338672018-05-31 00:51:20 +00001943#define _mm_add_round_sd(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001944 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1945 (__v2df)(__m128d)(B), \
1946 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00001947 (__mmask8)-1, (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001948
Craig Topperc6338672018-05-31 00:51:20 +00001949#define _mm_mask_add_round_sd(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001950 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1951 (__v2df)(__m128d)(B), \
1952 (__v2df)(__m128d)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00001953 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001954
Craig Topperc6338672018-05-31 00:51:20 +00001955#define _mm_maskz_add_round_sd(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001956 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1957 (__v2df)(__m128d)(B), \
1958 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00001959 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001960
Asaf Badouhffeb6242015-07-21 15:27:28 +00001961static __inline__ __m512d __DEFAULT_FN_ATTRS
1962_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00001963 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1964 (__v8df)_mm512_add_pd(__A, __B),
1965 (__v8df)__W);
Asaf Badouhffeb6242015-07-21 15:27:28 +00001966}
1967
1968static __inline__ __m512d __DEFAULT_FN_ATTRS
1969_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00001970 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1971 (__v8df)_mm512_add_pd(__A, __B),
1972 (__v8df)_mm512_setzero_pd());
Asaf Badouhffeb6242015-07-21 15:27:28 +00001973}
1974
1975static __inline__ __m512 __DEFAULT_FN_ATTRS
1976_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00001977 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1978 (__v16sf)_mm512_add_ps(__A, __B),
1979 (__v16sf)__W);
Asaf Badouhffeb6242015-07-21 15:27:28 +00001980}
1981
1982static __inline__ __m512 __DEFAULT_FN_ATTRS
1983_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00001984 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1985 (__v16sf)_mm512_add_ps(__A, __B),
1986 (__v16sf)_mm512_setzero_ps());
Asaf Badouhffeb6242015-07-21 15:27:28 +00001987}
1988
Craig Topperc6338672018-05-31 00:51:20 +00001989#define _mm512_add_round_pd(A, B, R) \
Craig Topper3614b412018-06-10 06:01:42 +00001990 (__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
1991 (__v8df)(__m512d)(B), (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00001992
Craig Topperc6338672018-05-31 00:51:20 +00001993#define _mm512_mask_add_round_pd(W, U, A, B, R) \
Craig Topper3614b412018-06-10 06:01:42 +00001994 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1995 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1996 (__v8df)(__m512d)(W));
Asaf Badouhffeb6242015-07-21 15:27:28 +00001997
Craig Topperc6338672018-05-31 00:51:20 +00001998#define _mm512_maskz_add_round_pd(U, A, B, R) \
Craig Topper3614b412018-06-10 06:01:42 +00001999 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2000 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
2001 (__v8df)_mm512_setzero_pd());
Asaf Badouhffeb6242015-07-21 15:27:28 +00002002
Craig Topperc6338672018-05-31 00:51:20 +00002003#define _mm512_add_round_ps(A, B, R) \
Craig Topper3614b412018-06-10 06:01:42 +00002004 (__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
2005 (__v16sf)(__m512)(B), (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002006
Craig Topperc6338672018-05-31 00:51:20 +00002007#define _mm512_mask_add_round_ps(W, U, A, B, R) \
Craig Topper3614b412018-06-10 06:01:42 +00002008 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2009 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
2010 (__v16sf)(__m512)(W));
Asaf Badouhffeb6242015-07-21 15:27:28 +00002011
Craig Topperc6338672018-05-31 00:51:20 +00002012#define _mm512_maskz_add_round_ps(U, A, B, R) \
Craig Topper3614b412018-06-10 06:01:42 +00002013 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2014 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
2015 (__v16sf)_mm512_setzero_ps());
Asaf Badouhffeb6242015-07-21 15:27:28 +00002016
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002017static __inline__ __m128 __DEFAULT_FN_ATTRS
2018_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
Tomasz Krupa82aa42a2018-06-14 17:36:23 +00002019 __A = _mm_sub_ss(__A, __B);
2020 __A[0] = (__U & 1) ? __A[0] : __W[0];
2021 return __A;
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002022}
2023
2024static __inline__ __m128 __DEFAULT_FN_ATTRS
2025_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
Tomasz Krupa82aa42a2018-06-14 17:36:23 +00002026 __A = _mm_sub_ss(__A, __B);
2027 __A[0] = (__U & 1) ? __A[0] : 0;
2028 return __A;
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002029}
Craig Topperc6338672018-05-31 00:51:20 +00002030#define _mm_sub_round_ss(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002031 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2032 (__v4sf)(__m128)(B), \
2033 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00002034 (__mmask8)-1, (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002035
Craig Topperc6338672018-05-31 00:51:20 +00002036#define _mm_mask_sub_round_ss(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002037 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2038 (__v4sf)(__m128)(B), \
2039 (__v4sf)(__m128)(W), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00002040 (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002041
Craig Topperc6338672018-05-31 00:51:20 +00002042#define _mm_maskz_sub_round_ss(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002043 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2044 (__v4sf)(__m128)(B), \
2045 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00002046 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002047
2048static __inline__ __m128d __DEFAULT_FN_ATTRS
2049_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
Tomasz Krupa82aa42a2018-06-14 17:36:23 +00002050 __A = _mm_sub_sd(__A, __B);
2051 __A[0] = (__U & 1) ? __A[0] : __W[0];
2052 return __A;
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002053}
2054
2055static __inline__ __m128d __DEFAULT_FN_ATTRS
2056_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
Tomasz Krupa82aa42a2018-06-14 17:36:23 +00002057 __A = _mm_sub_sd(__A, __B);
2058 __A[0] = (__U & 1) ? __A[0] : 0;
2059 return __A;
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002060}
2061
Craig Topperc6338672018-05-31 00:51:20 +00002062#define _mm_sub_round_sd(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002063 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2064 (__v2df)(__m128d)(B), \
2065 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00002066 (__mmask8)-1, (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002067
Craig Topperc6338672018-05-31 00:51:20 +00002068#define _mm_mask_sub_round_sd(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002069 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2070 (__v2df)(__m128d)(B), \
2071 (__v2df)(__m128d)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00002072 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002073
Craig Topperc6338672018-05-31 00:51:20 +00002074#define _mm_maskz_sub_round_sd(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002075 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2076 (__v2df)(__m128d)(B), \
2077 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00002078 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002079
Asaf Badouhffeb6242015-07-21 15:27:28 +00002080static __inline__ __m512d __DEFAULT_FN_ATTRS
2081_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002082 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2083 (__v8df)_mm512_sub_pd(__A, __B),
2084 (__v8df)__W);
Asaf Badouhffeb6242015-07-21 15:27:28 +00002085}
2086
2087static __inline__ __m512d __DEFAULT_FN_ATTRS
2088_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002089 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2090 (__v8df)_mm512_sub_pd(__A, __B),
2091 (__v8df)_mm512_setzero_pd());
Asaf Badouhffeb6242015-07-21 15:27:28 +00002092}
2093
2094static __inline__ __m512 __DEFAULT_FN_ATTRS
2095_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002096 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2097 (__v16sf)_mm512_sub_ps(__A, __B),
2098 (__v16sf)__W);
Asaf Badouhffeb6242015-07-21 15:27:28 +00002099}
2100
2101static __inline__ __m512 __DEFAULT_FN_ATTRS
2102_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002103 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2104 (__v16sf)_mm512_sub_ps(__A, __B),
2105 (__v16sf)_mm512_setzero_ps());
Asaf Badouhffeb6242015-07-21 15:27:28 +00002106}
2107
Craig Topperc6338672018-05-31 00:51:20 +00002108#define _mm512_sub_round_pd(A, B, R) \
Craig Topper3614b412018-06-10 06:01:42 +00002109 (__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
2110 (__v8df)(__m512d)(B), (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002111
Craig Topperc6338672018-05-31 00:51:20 +00002112#define _mm512_mask_sub_round_pd(W, U, A, B, R) \
Craig Topper3614b412018-06-10 06:01:42 +00002113 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2114 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2115 (__v8df)(__m512d)(W));
Asaf Badouhffeb6242015-07-21 15:27:28 +00002116
Craig Topperc6338672018-05-31 00:51:20 +00002117#define _mm512_maskz_sub_round_pd(U, A, B, R) \
Craig Topper3614b412018-06-10 06:01:42 +00002118 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2119 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2120 (__v8df)_mm512_setzero_pd());
Asaf Badouhffeb6242015-07-21 15:27:28 +00002121
Craig Topperc6338672018-05-31 00:51:20 +00002122#define _mm512_sub_round_ps(A, B, R) \
Craig Topper3614b412018-06-10 06:01:42 +00002123 (__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
2124 (__v16sf)(__m512)(B), (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002125
Craig Topper3614b412018-06-10 06:01:42 +00002126#define _mm512_mask_sub_round_ps(W, U, A, B, R) \
2127 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2128 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2129 (__v16sf)(__m512)(W));
Asaf Badouhffeb6242015-07-21 15:27:28 +00002130
Craig Topper3614b412018-06-10 06:01:42 +00002131#define _mm512_maskz_sub_round_ps(U, A, B, R) \
2132 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2133 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2134 (__v16sf)_mm512_setzero_ps());
Asaf Badouhffeb6242015-07-21 15:27:28 +00002135
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002136static __inline__ __m128 __DEFAULT_FN_ATTRS
2137_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
Tomasz Krupa82aa42a2018-06-14 17:36:23 +00002138 __A = _mm_mul_ss(__A, __B);
2139 __A[0] = (__U & 1) ? __A[0] : __W[0];
2140 return __A;
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002141}
2142
2143static __inline__ __m128 __DEFAULT_FN_ATTRS
2144_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
Tomasz Krupa82aa42a2018-06-14 17:36:23 +00002145 __A = _mm_mul_ss(__A, __B);
2146 __A[0] = (__U & 1) ? __A[0] : 0;
2147 return __A;
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002148}
Craig Topperc6338672018-05-31 00:51:20 +00002149#define _mm_mul_round_ss(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002150 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2151 (__v4sf)(__m128)(B), \
2152 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00002153 (__mmask8)-1, (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002154
Craig Topperc6338672018-05-31 00:51:20 +00002155#define _mm_mask_mul_round_ss(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002156 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2157 (__v4sf)(__m128)(B), \
2158 (__v4sf)(__m128)(W), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00002159 (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002160
Craig Topperc6338672018-05-31 00:51:20 +00002161#define _mm_maskz_mul_round_ss(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002162 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2163 (__v4sf)(__m128)(B), \
2164 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00002165 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002166
2167static __inline__ __m128d __DEFAULT_FN_ATTRS
2168_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
Tomasz Krupa82aa42a2018-06-14 17:36:23 +00002169 __A = _mm_mul_sd(__A, __B);
2170 __A[0] = (__U & 1) ? __A[0] : __W[0];
2171 return __A;
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002172}
2173
2174static __inline__ __m128d __DEFAULT_FN_ATTRS
2175_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
Tomasz Krupa82aa42a2018-06-14 17:36:23 +00002176 __A = _mm_mul_sd(__A, __B);
2177 __A[0] = (__U & 1) ? __A[0] : 0;
2178 return __A;
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002179}
2180
Craig Topperc6338672018-05-31 00:51:20 +00002181#define _mm_mul_round_sd(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002182 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2183 (__v2df)(__m128d)(B), \
2184 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00002185 (__mmask8)-1, (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002186
Craig Topperc6338672018-05-31 00:51:20 +00002187#define _mm_mask_mul_round_sd(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002188 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2189 (__v2df)(__m128d)(B), \
2190 (__v2df)(__m128d)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00002191 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002192
Craig Topperc6338672018-05-31 00:51:20 +00002193#define _mm_maskz_mul_round_sd(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002194 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2195 (__v2df)(__m128d)(B), \
2196 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00002197 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002198
Asaf Badouhffeb6242015-07-21 15:27:28 +00002199static __inline__ __m512d __DEFAULT_FN_ATTRS
2200_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002201 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2202 (__v8df)_mm512_mul_pd(__A, __B),
2203 (__v8df)__W);
Asaf Badouhffeb6242015-07-21 15:27:28 +00002204}
2205
2206static __inline__ __m512d __DEFAULT_FN_ATTRS
2207_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002208 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2209 (__v8df)_mm512_mul_pd(__A, __B),
2210 (__v8df)_mm512_setzero_pd());
Asaf Badouhffeb6242015-07-21 15:27:28 +00002211}
2212
2213static __inline__ __m512 __DEFAULT_FN_ATTRS
2214_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002215 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2216 (__v16sf)_mm512_mul_ps(__A, __B),
2217 (__v16sf)__W);
Asaf Badouhffeb6242015-07-21 15:27:28 +00002218}
2219
2220static __inline__ __m512 __DEFAULT_FN_ATTRS
2221_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002222 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2223 (__v16sf)_mm512_mul_ps(__A, __B),
2224 (__v16sf)_mm512_setzero_ps());
Asaf Badouhffeb6242015-07-21 15:27:28 +00002225}
2226
Craig Topperc6338672018-05-31 00:51:20 +00002227#define _mm512_mul_round_pd(A, B, R) \
Craig Topper3614b412018-06-10 06:01:42 +00002228 (__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
2229 (__v8df)(__m512d)(B), (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002230
Craig Topperc6338672018-05-31 00:51:20 +00002231#define _mm512_mask_mul_round_pd(W, U, A, B, R) \
Craig Topper3614b412018-06-10 06:01:42 +00002232 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2233 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2234 (__v8df)(__m512d)(W));
Asaf Badouhffeb6242015-07-21 15:27:28 +00002235
Craig Topperc6338672018-05-31 00:51:20 +00002236#define _mm512_maskz_mul_round_pd(U, A, B, R) \
Craig Topper3614b412018-06-10 06:01:42 +00002237 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2238 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2239 (__v8df)_mm512_setzero_pd());
Asaf Badouhffeb6242015-07-21 15:27:28 +00002240
Craig Topperc6338672018-05-31 00:51:20 +00002241#define _mm512_mul_round_ps(A, B, R) \
Craig Topper3614b412018-06-10 06:01:42 +00002242 (__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
2243 (__v16sf)(__m512)(B), (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002244
Craig Topper3614b412018-06-10 06:01:42 +00002245#define _mm512_mask_mul_round_ps(W, U, A, B, R) \
2246 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2247 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2248 (__v16sf)(__m512)(W));
Asaf Badouhffeb6242015-07-21 15:27:28 +00002249
Craig Topper3614b412018-06-10 06:01:42 +00002250#define _mm512_maskz_mul_round_ps(U, A, B, R) \
2251 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2252 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2253 (__v16sf)_mm512_setzero_ps());
Asaf Badouhffeb6242015-07-21 15:27:28 +00002254
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002255static __inline__ __m128 __DEFAULT_FN_ATTRS
2256_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002257 return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002258 (__v4sf) __B,
2259 (__v4sf) __W,
2260 (__mmask8) __U,
2261 _MM_FROUND_CUR_DIRECTION);
2262}
2263
2264static __inline__ __m128 __DEFAULT_FN_ATTRS
2265_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002266 return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002267 (__v4sf) __B,
2268 (__v4sf) _mm_setzero_ps (),
2269 (__mmask8) __U,
2270 _MM_FROUND_CUR_DIRECTION);
2271}
2272
Craig Topperc6338672018-05-31 00:51:20 +00002273#define _mm_div_round_ss(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002274 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2275 (__v4sf)(__m128)(B), \
2276 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00002277 (__mmask8)-1, (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002278
Craig Topperc6338672018-05-31 00:51:20 +00002279#define _mm_mask_div_round_ss(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002280 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2281 (__v4sf)(__m128)(B), \
2282 (__v4sf)(__m128)(W), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00002283 (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002284
Craig Topperc6338672018-05-31 00:51:20 +00002285#define _mm_maskz_div_round_ss(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002286 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2287 (__v4sf)(__m128)(B), \
2288 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00002289 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002290
2291static __inline__ __m128d __DEFAULT_FN_ATTRS
2292_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002293 return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002294 (__v2df) __B,
2295 (__v2df) __W,
2296 (__mmask8) __U,
2297 _MM_FROUND_CUR_DIRECTION);
2298}
2299
2300static __inline__ __m128d __DEFAULT_FN_ATTRS
2301_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002302 return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002303 (__v2df) __B,
2304 (__v2df) _mm_setzero_pd (),
2305 (__mmask8) __U,
2306 _MM_FROUND_CUR_DIRECTION);
2307}
2308
Craig Topperc6338672018-05-31 00:51:20 +00002309#define _mm_div_round_sd(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002310 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2311 (__v2df)(__m128d)(B), \
2312 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00002313 (__mmask8)-1, (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002314
Craig Topperc6338672018-05-31 00:51:20 +00002315#define _mm_mask_div_round_sd(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002316 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2317 (__v2df)(__m128d)(B), \
2318 (__v2df)(__m128d)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00002319 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002320
Craig Topperc6338672018-05-31 00:51:20 +00002321#define _mm_maskz_div_round_sd(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002322 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2323 (__v2df)(__m128d)(B), \
2324 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00002325 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002326
Michael Zuckerman223676d2016-06-14 12:38:58 +00002327static __inline __m512d __DEFAULT_FN_ATTRS
2328_mm512_div_pd(__m512d __a, __m512d __b)
2329{
2330 return (__m512d)((__v8df)__a/(__v8df)__b);
2331}
2332
Asaf Badouhffeb6242015-07-21 15:27:28 +00002333static __inline__ __m512d __DEFAULT_FN_ATTRS
2334_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002335 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2336 (__v8df)_mm512_div_pd(__A, __B),
2337 (__v8df)__W);
Asaf Badouhffeb6242015-07-21 15:27:28 +00002338}
2339
2340static __inline__ __m512d __DEFAULT_FN_ATTRS
2341_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002342 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2343 (__v8df)_mm512_div_pd(__A, __B),
2344 (__v8df)_mm512_setzero_pd());
Asaf Badouhffeb6242015-07-21 15:27:28 +00002345}
2346
Michael Zuckerman223676d2016-06-14 12:38:58 +00002347static __inline __m512 __DEFAULT_FN_ATTRS
2348_mm512_div_ps(__m512 __a, __m512 __b)
2349{
2350 return (__m512)((__v16sf)__a/(__v16sf)__b);
2351}
2352
Asaf Badouhffeb6242015-07-21 15:27:28 +00002353static __inline__ __m512 __DEFAULT_FN_ATTRS
2354_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002355 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2356 (__v16sf)_mm512_div_ps(__A, __B),
2357 (__v16sf)__W);
Asaf Badouhffeb6242015-07-21 15:27:28 +00002358}
2359
2360static __inline__ __m512 __DEFAULT_FN_ATTRS
2361_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002362 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2363 (__v16sf)_mm512_div_ps(__A, __B),
2364 (__v16sf)_mm512_setzero_ps());
Asaf Badouhffeb6242015-07-21 15:27:28 +00002365}
2366
Craig Topperc6338672018-05-31 00:51:20 +00002367#define _mm512_div_round_pd(A, B, R) \
Craig Topper3614b412018-06-10 06:01:42 +00002368 (__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
2369 (__v8df)(__m512d)(B), (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002370
Craig Topperc6338672018-05-31 00:51:20 +00002371#define _mm512_mask_div_round_pd(W, U, A, B, R) \
Craig Topper3614b412018-06-10 06:01:42 +00002372 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2373 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2374 (__v8df)(__m512d)(W));
Asaf Badouhffeb6242015-07-21 15:27:28 +00002375
Craig Topperc6338672018-05-31 00:51:20 +00002376#define _mm512_maskz_div_round_pd(U, A, B, R) \
Craig Topper3614b412018-06-10 06:01:42 +00002377 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2378 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2379 (__v8df)_mm512_setzero_pd());
Asaf Badouhffeb6242015-07-21 15:27:28 +00002380
Craig Topperc6338672018-05-31 00:51:20 +00002381#define _mm512_div_round_ps(A, B, R) \
Craig Topper3614b412018-06-10 06:01:42 +00002382 (__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
2383 (__v16sf)(__m512)(B), (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002384
Craig Topper3614b412018-06-10 06:01:42 +00002385#define _mm512_mask_div_round_ps(W, U, A, B, R) \
2386 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2387 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2388 (__v16sf)(__m512)(W));
Asaf Badouhffeb6242015-07-21 15:27:28 +00002389
Craig Topper3614b412018-06-10 06:01:42 +00002390#define _mm512_maskz_div_round_ps(U, A, B, R) \
2391 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2392 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2393 (__v16sf)_mm512_setzero_ps());
Asaf Badouhffeb6242015-07-21 15:27:28 +00002394
Craig Topperc6338672018-05-31 00:51:20 +00002395#define _mm512_roundscale_ps(A, B) \
Craig Topper8c18e112016-05-17 04:41:50 +00002396 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
Craig Topperae5f0a82018-06-04 02:56:18 +00002397 (__v16sf)_mm512_undefined_ps(), \
2398 (__mmask16)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00002399 _MM_FROUND_CUR_DIRECTION)
Craig Topper72c7d512015-02-01 07:35:35 +00002400
Craig Topperc6338672018-05-31 00:51:20 +00002401#define _mm512_mask_roundscale_ps(A, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00002402 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2403 (__v16sf)(__m512)(A), (__mmask16)(B), \
Craig Topperc6338672018-05-31 00:51:20 +00002404 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman7360d8a2016-05-10 07:30:58 +00002405
Craig Topperc6338672018-05-31 00:51:20 +00002406#define _mm512_maskz_roundscale_ps(A, B, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00002407 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2408 (__v16sf)_mm512_setzero_ps(), \
2409 (__mmask16)(A), \
Craig Topperc6338672018-05-31 00:51:20 +00002410 _MM_FROUND_CUR_DIRECTION)
Craig Topper8c18e112016-05-17 04:41:50 +00002411
Craig Topperc6338672018-05-31 00:51:20 +00002412#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00002413 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2414 (__v16sf)(__m512)(A), (__mmask16)(B), \
Craig Topperc6338672018-05-31 00:51:20 +00002415 (int)(R))
Michael Zuckermanc301c192016-06-01 07:35:44 +00002416
Craig Topperc6338672018-05-31 00:51:20 +00002417#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00002418 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2419 (__v16sf)_mm512_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00002420 (__mmask16)(A), (int)(R))
Michael Zuckermanc301c192016-06-01 07:35:44 +00002421
Craig Topperc6338672018-05-31 00:51:20 +00002422#define _mm512_roundscale_round_ps(A, imm, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00002423 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2424 (__v16sf)_mm512_undefined_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00002425 (__mmask16)-1, (int)(R))
Michael Zuckermanc301c192016-06-01 07:35:44 +00002426
Craig Topperc6338672018-05-31 00:51:20 +00002427#define _mm512_roundscale_pd(A, B) \
Craig Topper8c18e112016-05-17 04:41:50 +00002428 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
Craig Topperae5f0a82018-06-04 02:56:18 +00002429 (__v8df)_mm512_undefined_pd(), \
2430 (__mmask8)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00002431 _MM_FROUND_CUR_DIRECTION)
Adam Nemet0d5bb552014-07-28 17:14:40 +00002432
Craig Topperc6338672018-05-31 00:51:20 +00002433#define _mm512_mask_roundscale_pd(A, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00002434 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2435 (__v8df)(__m512d)(A), (__mmask8)(B), \
Craig Topperc6338672018-05-31 00:51:20 +00002436 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman7360d8a2016-05-10 07:30:58 +00002437
Craig Topperc6338672018-05-31 00:51:20 +00002438#define _mm512_maskz_roundscale_pd(A, B, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00002439 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2440 (__v8df)_mm512_setzero_pd(), \
2441 (__mmask8)(A), \
Craig Topperc6338672018-05-31 00:51:20 +00002442 _MM_FROUND_CUR_DIRECTION)
Craig Topper8c18e112016-05-17 04:41:50 +00002443
Craig Topperc6338672018-05-31 00:51:20 +00002444#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00002445 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2446 (__v8df)(__m512d)(A), (__mmask8)(B), \
Craig Topperc6338672018-05-31 00:51:20 +00002447 (int)(R))
Michael Zuckermanc301c192016-06-01 07:35:44 +00002448
Craig Topperc6338672018-05-31 00:51:20 +00002449#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00002450 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2451 (__v8df)_mm512_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00002452 (__mmask8)(A), (int)(R))
Michael Zuckermanc301c192016-06-01 07:35:44 +00002453
Craig Topperc6338672018-05-31 00:51:20 +00002454#define _mm512_roundscale_round_pd(A, imm, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00002455 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2456 (__v8df)_mm512_undefined_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00002457 (__mmask8)-1, (int)(R))
Michael Zuckermanc301c192016-06-01 07:35:44 +00002458
Craig Topperc6338672018-05-31 00:51:20 +00002459#define _mm512_fmadd_round_pd(A, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002460 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2461 (__v8df)(__m512d)(B), \
2462 (__v8df)(__m512d)(C), \
2463 (__mmask8)-1, (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002464
2465
Craig Topperc6338672018-05-31 00:51:20 +00002466#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002467 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2468 (__v8df)(__m512d)(B), \
2469 (__v8df)(__m512d)(C), \
2470 (__mmask8)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002471
2472
Craig Topperc6338672018-05-31 00:51:20 +00002473#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002474 (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2475 (__v8df)(__m512d)(B), \
2476 (__v8df)(__m512d)(C), \
2477 (__mmask8)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002478
2479
Craig Topperc6338672018-05-31 00:51:20 +00002480#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002481 (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2482 (__v8df)(__m512d)(B), \
2483 (__v8df)(__m512d)(C), \
2484 (__mmask8)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002485
2486
Craig Topperc6338672018-05-31 00:51:20 +00002487#define _mm512_fmsub_round_pd(A, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002488 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2489 (__v8df)(__m512d)(B), \
2490 -(__v8df)(__m512d)(C), \
2491 (__mmask8)-1, (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002492
2493
Craig Topperc6338672018-05-31 00:51:20 +00002494#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002495 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2496 (__v8df)(__m512d)(B), \
2497 -(__v8df)(__m512d)(C), \
2498 (__mmask8)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002499
2500
Craig Topperc6338672018-05-31 00:51:20 +00002501#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002502 (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2503 (__v8df)(__m512d)(B), \
2504 -(__v8df)(__m512d)(C), \
2505 (__mmask8)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002506
2507
Craig Topperc6338672018-05-31 00:51:20 +00002508#define _mm512_fnmadd_round_pd(A, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002509 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2510 (__v8df)(__m512d)(B), \
2511 (__v8df)(__m512d)(C), \
2512 (__mmask8)-1, (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002513
2514
Craig Topperc6338672018-05-31 00:51:20 +00002515#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002516 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2517 (__v8df)(__m512d)(B), \
2518 (__v8df)(__m512d)(C), \
2519 (__mmask8)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002520
2521
Craig Topperc6338672018-05-31 00:51:20 +00002522#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002523 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2524 (__v8df)(__m512d)(B), \
2525 (__v8df)(__m512d)(C), \
2526 (__mmask8)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002527
2528
Craig Topperc6338672018-05-31 00:51:20 +00002529#define _mm512_fnmsub_round_pd(A, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002530 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2531 (__v8df)(__m512d)(B), \
2532 -(__v8df)(__m512d)(C), \
2533 (__mmask8)-1, (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002534
2535
Craig Topperc6338672018-05-31 00:51:20 +00002536#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002537 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2538 (__v8df)(__m512d)(B), \
2539 -(__v8df)(__m512d)(C), \
2540 (__mmask8)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002541
2542
Michael Kupersteine45af542015-06-30 13:36:19 +00002543static __inline__ __m512d __DEFAULT_FN_ATTRS
Adam Nemet2278fcb2014-08-14 17:17:57 +00002544_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2545{
Craig Topperb92c77d2018-06-07 02:46:02 +00002546 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2547 (__v8df) __B,
2548 (__v8df) __C,
2549 (__mmask8) -1,
2550 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002551}
2552
Michael Kupersteine45af542015-06-30 13:36:19 +00002553static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002554_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2555{
Craig Topperb92c77d2018-06-07 02:46:02 +00002556 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2557 (__v8df) __B,
2558 (__v8df) __C,
2559 (__mmask8) __U,
2560 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002561}
2562
Michael Kupersteine45af542015-06-30 13:36:19 +00002563static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002564_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2565{
Craig Topperb92c77d2018-06-07 02:46:02 +00002566 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2567 (__v8df) __B,
2568 (__v8df) __C,
2569 (__mmask8) __U,
2570 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002571}
2572
Michael Kupersteine45af542015-06-30 13:36:19 +00002573static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002574_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2575{
Craig Topperb92c77d2018-06-07 02:46:02 +00002576 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2577 (__v8df) __B,
2578 (__v8df) __C,
2579 (__mmask8) __U,
2580 _MM_FROUND_CUR_DIRECTION);
Adam Nemet2278fcb2014-08-14 17:17:57 +00002581}
2582
Michael Kupersteine45af542015-06-30 13:36:19 +00002583static __inline__ __m512d __DEFAULT_FN_ATTRS
Adam Nemet2278fcb2014-08-14 17:17:57 +00002584_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2585{
Craig Topperb92c77d2018-06-07 02:46:02 +00002586 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2587 (__v8df) __B,
2588 -(__v8df) __C,
2589 (__mmask8) -1,
2590 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002591}
2592
Michael Kupersteine45af542015-06-30 13:36:19 +00002593static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002594_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2595{
Craig Topperb92c77d2018-06-07 02:46:02 +00002596 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2597 (__v8df) __B,
2598 -(__v8df) __C,
2599 (__mmask8) __U,
2600 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002601}
2602
Michael Kupersteine45af542015-06-30 13:36:19 +00002603static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002604_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2605{
Craig Topperb92c77d2018-06-07 02:46:02 +00002606 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2607 (__v8df) __B,
2608 -(__v8df) __C,
2609 (__mmask8) __U,
2610 _MM_FROUND_CUR_DIRECTION);
Adam Nemet2278fcb2014-08-14 17:17:57 +00002611}
2612
Michael Kupersteine45af542015-06-30 13:36:19 +00002613static __inline__ __m512d __DEFAULT_FN_ATTRS
Adam Nemet2278fcb2014-08-14 17:17:57 +00002614_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2615{
Craig Topperb92c77d2018-06-07 02:46:02 +00002616 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2617 -(__v8df) __B,
2618 (__v8df) __C,
2619 (__mmask8) -1,
2620 _MM_FROUND_CUR_DIRECTION);
Adam Nemet2278fcb2014-08-14 17:17:57 +00002621}
2622
Michael Kupersteine45af542015-06-30 13:36:19 +00002623static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002624_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2625{
Craig Topperb92c77d2018-06-07 02:46:02 +00002626 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
2627 (__v8df) __B,
2628 (__v8df) __C,
2629 (__mmask8) __U,
2630 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002631}
2632
Michael Kupersteine45af542015-06-30 13:36:19 +00002633static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002634_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2635{
Craig Topperb92c77d2018-06-07 02:46:02 +00002636 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2637 (__v8df) __B,
2638 (__v8df) __C,
2639 (__mmask8) __U,
2640 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002641}
2642
Michael Kupersteine45af542015-06-30 13:36:19 +00002643static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002644_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2645{
Craig Topperb92c77d2018-06-07 02:46:02 +00002646 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2647 -(__v8df) __B,
2648 -(__v8df) __C,
2649 (__mmask8) -1,
2650 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002651}
2652
Michael Kupersteine45af542015-06-30 13:36:19 +00002653static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002654_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2655{
Craig Topperb92c77d2018-06-07 02:46:02 +00002656 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2657 (__v8df) __B,
2658 -(__v8df) __C,
2659 (__mmask8) __U,
2660 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002661}
2662
Craig Topperc6338672018-05-31 00:51:20 +00002663#define _mm512_fmadd_round_ps(A, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002664 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2665 (__v16sf)(__m512)(B), \
2666 (__v16sf)(__m512)(C), \
2667 (__mmask16)-1, (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002668
2669
Craig Topperc6338672018-05-31 00:51:20 +00002670#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002671 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2672 (__v16sf)(__m512)(B), \
2673 (__v16sf)(__m512)(C), \
2674 (__mmask16)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002675
2676
Craig Topperc6338672018-05-31 00:51:20 +00002677#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002678 (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2679 (__v16sf)(__m512)(B), \
2680 (__v16sf)(__m512)(C), \
2681 (__mmask16)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002682
2683
Craig Topperc6338672018-05-31 00:51:20 +00002684#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002685 (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2686 (__v16sf)(__m512)(B), \
2687 (__v16sf)(__m512)(C), \
2688 (__mmask16)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002689
2690
Craig Topperc6338672018-05-31 00:51:20 +00002691#define _mm512_fmsub_round_ps(A, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002692 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2693 (__v16sf)(__m512)(B), \
2694 -(__v16sf)(__m512)(C), \
2695 (__mmask16)-1, (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002696
2697
Craig Topperc6338672018-05-31 00:51:20 +00002698#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002699 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2700 (__v16sf)(__m512)(B), \
2701 -(__v16sf)(__m512)(C), \
2702 (__mmask16)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002703
2704
Craig Topperc6338672018-05-31 00:51:20 +00002705#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002706 (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2707 (__v16sf)(__m512)(B), \
2708 -(__v16sf)(__m512)(C), \
2709 (__mmask16)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002710
2711
Craig Topperc6338672018-05-31 00:51:20 +00002712#define _mm512_fnmadd_round_ps(A, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002713 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2714 -(__v16sf)(__m512)(B), \
2715 (__v16sf)(__m512)(C), \
2716 (__mmask16)-1, (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002717
2718
Craig Topperc6338672018-05-31 00:51:20 +00002719#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002720 (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2721 (__v16sf)(__m512)(B), \
2722 (__v16sf)(__m512)(C), \
2723 (__mmask16)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002724
2725
Craig Topperc6338672018-05-31 00:51:20 +00002726#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002727 (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2728 (__v16sf)(__m512)(B), \
2729 (__v16sf)(__m512)(C), \
2730 (__mmask16)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002731
2732
Craig Topperc6338672018-05-31 00:51:20 +00002733#define _mm512_fnmsub_round_ps(A, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002734 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2735 -(__v16sf)(__m512)(B), \
2736 -(__v16sf)(__m512)(C), \
2737 (__mmask16)-1, (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002738
2739
Craig Topperc6338672018-05-31 00:51:20 +00002740#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002741 (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2742 (__v16sf)(__m512)(B), \
2743 -(__v16sf)(__m512)(C), \
2744 (__mmask16)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002745
2746
Michael Kupersteine45af542015-06-30 13:36:19 +00002747static __inline__ __m512 __DEFAULT_FN_ATTRS
Adam Nemet2278fcb2014-08-14 17:17:57 +00002748_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2749{
Craig Topperb92c77d2018-06-07 02:46:02 +00002750 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2751 (__v16sf) __B,
2752 (__v16sf) __C,
2753 (__mmask16) -1,
2754 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002755}
2756
Michael Kupersteine45af542015-06-30 13:36:19 +00002757static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002758_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2759{
Craig Topperb92c77d2018-06-07 02:46:02 +00002760 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2761 (__v16sf) __B,
2762 (__v16sf) __C,
2763 (__mmask16) __U,
2764 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002765}
2766
Michael Kupersteine45af542015-06-30 13:36:19 +00002767static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002768_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2769{
Craig Topperb92c77d2018-06-07 02:46:02 +00002770 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2771 (__v16sf) __B,
2772 (__v16sf) __C,
2773 (__mmask16) __U,
2774 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002775}
2776
Michael Kupersteine45af542015-06-30 13:36:19 +00002777static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002778_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2779{
Craig Topperb92c77d2018-06-07 02:46:02 +00002780 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2781 (__v16sf) __B,
2782 (__v16sf) __C,
2783 (__mmask16) __U,
2784 _MM_FROUND_CUR_DIRECTION);
Adam Nemet2278fcb2014-08-14 17:17:57 +00002785}
2786
Michael Kupersteine45af542015-06-30 13:36:19 +00002787static __inline__ __m512 __DEFAULT_FN_ATTRS
Adam Nemet2278fcb2014-08-14 17:17:57 +00002788_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2789{
Craig Topperb92c77d2018-06-07 02:46:02 +00002790 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2791 (__v16sf) __B,
2792 -(__v16sf) __C,
2793 (__mmask16) -1,
2794 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002795}
2796
Michael Kupersteine45af542015-06-30 13:36:19 +00002797static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002798_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2799{
Craig Topperb92c77d2018-06-07 02:46:02 +00002800 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2801 (__v16sf) __B,
2802 -(__v16sf) __C,
2803 (__mmask16) __U,
2804 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002805}
2806
Michael Kupersteine45af542015-06-30 13:36:19 +00002807static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002808_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2809{
Craig Topperb92c77d2018-06-07 02:46:02 +00002810 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2811 (__v16sf) __B,
2812 -(__v16sf) __C,
2813 (__mmask16) __U,
2814 _MM_FROUND_CUR_DIRECTION);
Adam Nemet2278fcb2014-08-14 17:17:57 +00002815}
2816
Michael Kupersteine45af542015-06-30 13:36:19 +00002817static __inline__ __m512 __DEFAULT_FN_ATTRS
Adam Nemet2278fcb2014-08-14 17:17:57 +00002818_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2819{
Craig Topperb92c77d2018-06-07 02:46:02 +00002820 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2821 -(__v16sf) __B,
2822 (__v16sf) __C,
2823 (__mmask16) -1,
2824 _MM_FROUND_CUR_DIRECTION);
Adam Nemet2278fcb2014-08-14 17:17:57 +00002825}
2826
Michael Kupersteine45af542015-06-30 13:36:19 +00002827static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002828_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2829{
Craig Topperb92c77d2018-06-07 02:46:02 +00002830 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
2831 (__v16sf) __B,
2832 (__v16sf) __C,
2833 (__mmask16) __U,
2834 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002835}
2836
Michael Kupersteine45af542015-06-30 13:36:19 +00002837static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002838_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2839{
Craig Topperb92c77d2018-06-07 02:46:02 +00002840 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2841 (__v16sf) __B,
2842 (__v16sf) __C,
2843 (__mmask16) __U,
2844 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002845}
2846
Michael Kupersteine45af542015-06-30 13:36:19 +00002847static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002848_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2849{
Craig Topperb92c77d2018-06-07 02:46:02 +00002850 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2851 -(__v16sf) __B,
2852 -(__v16sf) __C,
2853 (__mmask16) -1,
2854 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002855}
2856
Michael Kupersteine45af542015-06-30 13:36:19 +00002857static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002858_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2859{
Craig Topperb92c77d2018-06-07 02:46:02 +00002860 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2861 (__v16sf) __B,
2862 -(__v16sf) __C,
2863 (__mmask16) __U,
2864 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002865}
2866
Craig Topperc6338672018-05-31 00:51:20 +00002867#define _mm512_fmaddsub_round_pd(A, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002868 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2869 (__v8df)(__m512d)(B), \
2870 (__v8df)(__m512d)(C), \
2871 (__mmask8)-1, (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002872
2873
Craig Topperc6338672018-05-31 00:51:20 +00002874#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002875 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2876 (__v8df)(__m512d)(B), \
2877 (__v8df)(__m512d)(C), \
2878 (__mmask8)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002879
2880
Craig Topperc6338672018-05-31 00:51:20 +00002881#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002882 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
2883 (__v8df)(__m512d)(B), \
2884 (__v8df)(__m512d)(C), \
2885 (__mmask8)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002886
2887
Craig Topperc6338672018-05-31 00:51:20 +00002888#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002889 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2890 (__v8df)(__m512d)(B), \
2891 (__v8df)(__m512d)(C), \
2892 (__mmask8)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002893
2894
Craig Topperc6338672018-05-31 00:51:20 +00002895#define _mm512_fmsubadd_round_pd(A, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002896 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2897 (__v8df)(__m512d)(B), \
2898 -(__v8df)(__m512d)(C), \
2899 (__mmask8)-1, (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002900
2901
Craig Topperc6338672018-05-31 00:51:20 +00002902#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002903 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2904 (__v8df)(__m512d)(B), \
2905 -(__v8df)(__m512d)(C), \
2906 (__mmask8)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002907
2908
Craig Topperc6338672018-05-31 00:51:20 +00002909#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002910 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2911 (__v8df)(__m512d)(B), \
2912 -(__v8df)(__m512d)(C), \
2913 (__mmask8)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002914
2915
Michael Kupersteine45af542015-06-30 13:36:19 +00002916static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002917_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
2918{
Craig Topperb92c77d2018-06-07 02:46:02 +00002919 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2920 (__v8df) __B,
2921 (__v8df) __C,
2922 (__mmask8) -1,
2923 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002924}
2925
Michael Kupersteine45af542015-06-30 13:36:19 +00002926static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002927_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2928{
Craig Topperb92c77d2018-06-07 02:46:02 +00002929 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2930 (__v8df) __B,
2931 (__v8df) __C,
2932 (__mmask8) __U,
2933 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002934}
2935
Michael Kupersteine45af542015-06-30 13:36:19 +00002936static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002937_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2938{
Craig Topperb92c77d2018-06-07 02:46:02 +00002939 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2940 (__v8df) __B,
2941 (__v8df) __C,
2942 (__mmask8) __U,
2943 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002944}
2945
Michael Kupersteine45af542015-06-30 13:36:19 +00002946static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002947_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2948{
Craig Topperb92c77d2018-06-07 02:46:02 +00002949 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2950 (__v8df) __B,
2951 (__v8df) __C,
2952 (__mmask8) __U,
2953 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002954}
2955
Michael Kupersteine45af542015-06-30 13:36:19 +00002956static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002957_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
2958{
Craig Topperb92c77d2018-06-07 02:46:02 +00002959 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2960 (__v8df) __B,
2961 -(__v8df) __C,
2962 (__mmask8) -1,
2963 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002964}
2965
Michael Kupersteine45af542015-06-30 13:36:19 +00002966static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002967_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2968{
Craig Topperb92c77d2018-06-07 02:46:02 +00002969 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2970 (__v8df) __B,
2971 -(__v8df) __C,
2972 (__mmask8) __U,
2973 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002974}
2975
Michael Kupersteine45af542015-06-30 13:36:19 +00002976static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002977_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2978{
Craig Topperb92c77d2018-06-07 02:46:02 +00002979 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2980 (__v8df) __B,
2981 -(__v8df) __C,
2982 (__mmask8) __U,
2983 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002984}
2985
Craig Topperc6338672018-05-31 00:51:20 +00002986#define _mm512_fmaddsub_round_ps(A, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002987 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2988 (__v16sf)(__m512)(B), \
2989 (__v16sf)(__m512)(C), \
2990 (__mmask16)-1, (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002991
2992
Craig Topperc6338672018-05-31 00:51:20 +00002993#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00002994 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2995 (__v16sf)(__m512)(B), \
2996 (__v16sf)(__m512)(C), \
2997 (__mmask16)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002998
2999
Craig Topperc6338672018-05-31 00:51:20 +00003000#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00003001 (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
3002 (__v16sf)(__m512)(B), \
3003 (__v16sf)(__m512)(C), \
3004 (__mmask16)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003005
3006
Craig Topperc6338672018-05-31 00:51:20 +00003007#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00003008 (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3009 (__v16sf)(__m512)(B), \
3010 (__v16sf)(__m512)(C), \
3011 (__mmask16)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003012
3013
Craig Topperc6338672018-05-31 00:51:20 +00003014#define _mm512_fmsubadd_round_ps(A, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00003015 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3016 (__v16sf)(__m512)(B), \
3017 -(__v16sf)(__m512)(C), \
3018 (__mmask16)-1, (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003019
3020
Craig Topperc6338672018-05-31 00:51:20 +00003021#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00003022 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3023 (__v16sf)(__m512)(B), \
3024 -(__v16sf)(__m512)(C), \
3025 (__mmask16)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003026
3027
Craig Topperc6338672018-05-31 00:51:20 +00003028#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00003029 (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3030 (__v16sf)(__m512)(B), \
3031 -(__v16sf)(__m512)(C), \
3032 (__mmask16)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003033
3034
Michael Kupersteine45af542015-06-30 13:36:19 +00003035static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003036_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
3037{
Craig Topperb92c77d2018-06-07 02:46:02 +00003038 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3039 (__v16sf) __B,
3040 (__v16sf) __C,
3041 (__mmask16) -1,
3042 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003043}
3044
Michael Kupersteine45af542015-06-30 13:36:19 +00003045static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003046_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3047{
Craig Topperb92c77d2018-06-07 02:46:02 +00003048 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3049 (__v16sf) __B,
3050 (__v16sf) __C,
3051 (__mmask16) __U,
3052 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003053}
3054
Michael Kupersteine45af542015-06-30 13:36:19 +00003055static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003056_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3057{
Craig Topperb92c77d2018-06-07 02:46:02 +00003058 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3059 (__v16sf) __B,
3060 (__v16sf) __C,
3061 (__mmask16) __U,
3062 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003063}
3064
Michael Kupersteine45af542015-06-30 13:36:19 +00003065static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003066_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3067{
Craig Topperb92c77d2018-06-07 02:46:02 +00003068 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3069 (__v16sf) __B,
3070 (__v16sf) __C,
3071 (__mmask16) __U,
3072 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003073}
3074
Michael Kupersteine45af542015-06-30 13:36:19 +00003075static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003076_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
3077{
Craig Topperb92c77d2018-06-07 02:46:02 +00003078 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3079 (__v16sf) __B,
3080 -(__v16sf) __C,
3081 (__mmask16) -1,
3082 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003083}
3084
Michael Kupersteine45af542015-06-30 13:36:19 +00003085static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003086_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3087{
Craig Topperb92c77d2018-06-07 02:46:02 +00003088 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3089 (__v16sf) __B,
3090 -(__v16sf) __C,
3091 (__mmask16) __U,
3092 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003093}
3094
Michael Kupersteine45af542015-06-30 13:36:19 +00003095static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003096_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3097{
Craig Topperb92c77d2018-06-07 02:46:02 +00003098 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3099 (__v16sf) __B,
3100 -(__v16sf) __C,
3101 (__mmask16) __U,
3102 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003103}
3104
Craig Topperc6338672018-05-31 00:51:20 +00003105#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00003106 (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
3107 (__v8df)(__m512d)(B), \
3108 (__v8df)(__m512d)(C), \
3109 (__mmask8)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003110
3111
Michael Kupersteine45af542015-06-30 13:36:19 +00003112static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003113_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3114{
Craig Topperb92c77d2018-06-07 02:46:02 +00003115 return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3116 (__v8df) __B,
3117 (__v8df) __C,
3118 (__mmask8) __U,
3119 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003120}
3121
Craig Topperc6338672018-05-31 00:51:20 +00003122#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00003123 (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
3124 (__v16sf)(__m512)(B), \
3125 (__v16sf)(__m512)(C), \
3126 (__mmask16)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003127
Michael Kupersteine45af542015-06-30 13:36:19 +00003128static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003129_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3130{
Craig Topperb92c77d2018-06-07 02:46:02 +00003131 return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3132 (__v16sf) __B,
3133 (__v16sf) __C,
3134 (__mmask16) __U,
3135 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003136}
3137
Craig Topperc6338672018-05-31 00:51:20 +00003138#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00003139 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
3140 (__v8df)(__m512d)(B), \
3141 (__v8df)(__m512d)(C), \
3142 (__mmask8)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003143
3144
Michael Kupersteine45af542015-06-30 13:36:19 +00003145static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003146_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3147{
Craig Topperb92c77d2018-06-07 02:46:02 +00003148 return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3149 (__v8df) __B,
3150 (__v8df) __C,
3151 (__mmask8) __U,
3152 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003153}
3154
Craig Topperc6338672018-05-31 00:51:20 +00003155#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00003156 (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
3157 (__v16sf)(__m512)(B), \
3158 (__v16sf)(__m512)(C), \
3159 (__mmask16)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003160
3161
Michael Kupersteine45af542015-06-30 13:36:19 +00003162static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003163_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3164{
Craig Topperb92c77d2018-06-07 02:46:02 +00003165 return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3166 (__v16sf) __B,
3167 (__v16sf) __C,
3168 (__mmask16) __U,
3169 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003170}
3171
Craig Topperc6338672018-05-31 00:51:20 +00003172#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00003173 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3174 -(__v8df)(__m512d)(B), \
3175 (__v8df)(__m512d)(C), \
3176 (__mmask8)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003177
3178
Michael Kupersteine45af542015-06-30 13:36:19 +00003179static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003180_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3181{
Craig Topperb92c77d2018-06-07 02:46:02 +00003182 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3183 -(__v8df) __B,
3184 (__v8df) __C,
3185 (__mmask8) __U,
3186 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003187}
3188
Craig Topperc6338672018-05-31 00:51:20 +00003189#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00003190 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3191 -(__v16sf)(__m512)(B), \
3192 (__v16sf)(__m512)(C), \
3193 (__mmask16)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003194
3195
Michael Kupersteine45af542015-06-30 13:36:19 +00003196static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003197_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3198{
Craig Topperb92c77d2018-06-07 02:46:02 +00003199 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3200 -(__v16sf) __B,
3201 (__v16sf) __C,
3202 (__mmask16) __U,
3203 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003204}
3205
Craig Topperc6338672018-05-31 00:51:20 +00003206#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00003207 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3208 -(__v8df)(__m512d)(B), \
3209 -(__v8df)(__m512d)(C), \
3210 (__mmask8)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003211
3212
Craig Topperc6338672018-05-31 00:51:20 +00003213#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00003214 (__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \
3215 (__v8df)(__m512d)(B), \
3216 (__v8df)(__m512d)(C), \
3217 (__mmask8)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003218
3219
Michael Kupersteine45af542015-06-30 13:36:19 +00003220static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003221_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3222{
Craig Topperb92c77d2018-06-07 02:46:02 +00003223 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3224 -(__v8df) __B,
3225 -(__v8df) __C,
3226 (__mmask8) __U,
3227 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003228}
3229
Michael Kupersteine45af542015-06-30 13:36:19 +00003230static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003231_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3232{
Craig Topperb92c77d2018-06-07 02:46:02 +00003233 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A,
3234 (__v8df) __B,
3235 (__v8df) __C,
3236 (__mmask8) __U,
3237 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003238}
3239
Craig Topperc6338672018-05-31 00:51:20 +00003240#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00003241 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3242 -(__v16sf)(__m512)(B), \
3243 -(__v16sf)(__m512)(C), \
3244 (__mmask16)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003245
3246
Craig Topperc6338672018-05-31 00:51:20 +00003247#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
Craig Topperb92c77d2018-06-07 02:46:02 +00003248 (__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
3249 (__v16sf)(__m512)(B), \
3250 (__v16sf)(__m512)(C), \
3251 (__mmask16)(U), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003252
3253
Michael Kupersteine45af542015-06-30 13:36:19 +00003254static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003255_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3256{
Craig Topperb92c77d2018-06-07 02:46:02 +00003257 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3258 -(__v16sf) __B,
3259 -(__v16sf) __C,
3260 (__mmask16) __U,
3261 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003262}
3263
Michael Kupersteine45af542015-06-30 13:36:19 +00003264static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003265_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3266{
Craig Topperb92c77d2018-06-07 02:46:02 +00003267 return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A,
3268 (__v16sf) __B,
3269 (__v16sf) __C,
3270 (__mmask16) __U,
3271 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003272}
3273
3274
3275
Adam Nemet0d5bb552014-07-28 17:14:40 +00003276/* Vector permutations */
3277
Michael Kupersteine45af542015-06-30 13:36:19 +00003278static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00003279_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
3280{
Craig Topper68a272d2018-05-29 03:26:38 +00003281 return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
3282 (__v16si) __B);
Adam Nemet0d5bb552014-07-28 17:14:40 +00003283}
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00003284
3285static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper68a272d2018-05-29 03:26:38 +00003286_mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I,
3287 __m512i __B)
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00003288{
Craig Topper68a272d2018-05-29 03:26:38 +00003289 return (__m512i)__builtin_ia32_selectd_512(__U,
3290 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3291 (__v16si)__A);
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00003292}
3293
3294static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper68a272d2018-05-29 03:26:38 +00003295_mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U,
3296 __m512i __B)
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00003297{
Craig Topper68a272d2018-05-29 03:26:38 +00003298 return (__m512i)__builtin_ia32_selectd_512(__U,
3299 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3300 (__v16si)__I);
3301}
3302
3303static __inline__ __m512i __DEFAULT_FN_ATTRS
3304_mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I,
3305 __m512i __B)
3306{
3307 return (__m512i)__builtin_ia32_selectd_512(__U,
3308 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3309 (__v16si)_mm512_setzero_si512());
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00003310}
3311
Michael Kupersteine45af542015-06-30 13:36:19 +00003312static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00003313_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
3314{
Craig Topper68a272d2018-05-29 03:26:38 +00003315 return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
3316 (__v8di) __B);
Adam Nemet0d5bb552014-07-28 17:14:40 +00003317}
3318
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00003319static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper68a272d2018-05-29 03:26:38 +00003320_mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I,
3321 __m512i __B)
3322{
3323 return (__m512i)__builtin_ia32_selectq_512(__U,
3324 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3325 (__v8di)__A);
3326}
3327
3328static __inline__ __m512i __DEFAULT_FN_ATTRS
3329_mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U,
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00003330 __m512i __B)
Adam Nemet0d5bb552014-07-28 17:14:40 +00003331{
Craig Topper68a272d2018-05-29 03:26:38 +00003332 return (__m512i)__builtin_ia32_selectq_512(__U,
3333 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3334 (__v8di)__I);
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00003335}
3336
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00003337static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper68a272d2018-05-29 03:26:38 +00003338_mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
3339 __m512i __B)
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00003340{
Craig Topper68a272d2018-05-29 03:26:38 +00003341 return (__m512i)__builtin_ia32_selectq_512(__U,
3342 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3343 (__v8di)_mm512_setzero_si512());
Adam Nemet0d5bb552014-07-28 17:14:40 +00003344}
3345
Craig Topperc6338672018-05-31 00:51:20 +00003346#define _mm512_alignr_epi64(A, B, I) \
Craig Toppere56819e2018-06-07 21:27:41 +00003347 (__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
3348 (__v8di)(__m512i)(B), (int)(I))
Adam Nemet5bf7baa2014-08-05 17:28:23 +00003349
Craig Topperc6338672018-05-31 00:51:20 +00003350#define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
Craig Topper6aefe002016-11-23 01:47:12 +00003351 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3352 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00003353 (__v8di)(__m512i)(W))
Michael Zuckerman533e0652016-04-28 12:47:30 +00003354
Craig Topperc6338672018-05-31 00:51:20 +00003355#define _mm512_maskz_alignr_epi64(U, A, B, imm) \
Craig Topper6aefe002016-11-23 01:47:12 +00003356 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3357 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00003358 (__v8di)_mm512_setzero_si512())
Michael Zuckerman533e0652016-04-28 12:47:30 +00003359
Craig Topperc6338672018-05-31 00:51:20 +00003360#define _mm512_alignr_epi32(A, B, I) \
Craig Toppere56819e2018-06-07 21:27:41 +00003361 (__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
3362 (__v16si)(__m512i)(B), (int)(I))
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00003363
Craig Topperc6338672018-05-31 00:51:20 +00003364#define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
Craig Topper6aefe002016-11-23 01:47:12 +00003365 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3366 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00003367 (__v16si)(__m512i)(W))
Adam Nemet5bf7baa2014-08-05 17:28:23 +00003368
Craig Topperc6338672018-05-31 00:51:20 +00003369#define _mm512_maskz_alignr_epi32(U, A, B, imm) \
Craig Topper6aefe002016-11-23 01:47:12 +00003370 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3371 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00003372 (__v16si)_mm512_setzero_si512())
Adam Nemetf893ede2015-01-19 20:12:05 +00003373/* Vector Extract */
3374
Craig Topperc6338672018-05-31 00:51:20 +00003375#define _mm512_extractf64x4_pd(A, I) \
Craig Topper5f50f3382018-06-08 21:50:07 +00003376 (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
Craig Topper5cbeeed2018-07-07 17:03:32 +00003377 (__v4df)_mm256_undefined_pd(), \
Craig Topper5f50f3382018-06-08 21:50:07 +00003378 (__mmask8)-1)
Adam Nemetf893ede2015-01-19 20:12:05 +00003379
Craig Topperc6338672018-05-31 00:51:20 +00003380#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
Craig Topper5f50f3382018-06-08 21:50:07 +00003381 (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3382 (__v4df)(__m256d)(W), \
3383 (__mmask8)(U))
Michael Zuckerman2564d2f2016-05-10 10:14:50 +00003384
Craig Topperc6338672018-05-31 00:51:20 +00003385#define _mm512_maskz_extractf64x4_pd(U, A, imm) \
Craig Topper5f50f3382018-06-08 21:50:07 +00003386 (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3387 (__v4df)_mm256_setzero_pd(), \
3388 (__mmask8)(U))
Michael Zuckerman2564d2f2016-05-10 10:14:50 +00003389
Craig Topperc6338672018-05-31 00:51:20 +00003390#define _mm512_extractf32x4_ps(A, I) \
Craig Topper5f50f3382018-06-08 21:50:07 +00003391 (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
3392 (__v4sf)_mm_undefined_ps(), \
3393 (__mmask8)-1)
Adam Nemetf893ede2015-01-19 20:12:05 +00003394
Craig Topperc6338672018-05-31 00:51:20 +00003395#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
Craig Topper5f50f3382018-06-08 21:50:07 +00003396 (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3397 (__v4sf)(__m128)(W), \
3398 (__mmask8)(U))
Michael Zuckerman2564d2f2016-05-10 10:14:50 +00003399
Craig Topperc6338672018-05-31 00:51:20 +00003400#define _mm512_maskz_extractf32x4_ps(U, A, imm) \
Craig Topper5f50f3382018-06-08 21:50:07 +00003401 (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3402 (__v4sf)_mm_setzero_ps(), \
3403 (__mmask8)(U))
Craig Topper93ffabd2016-10-31 04:30:56 +00003404
Adam Nemet0d5bb552014-07-28 17:14:40 +00003405/* Vector Blend */
3406
Michael Kupersteine45af542015-06-30 13:36:19 +00003407static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00003408_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
3409{
Igor Bregeraadb8762016-06-08 13:59:20 +00003410 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
Adam Nemet0d5bb552014-07-28 17:14:40 +00003411 (__v8df) __W,
Igor Bregeraadb8762016-06-08 13:59:20 +00003412 (__v8df) __A);
Adam Nemet0d5bb552014-07-28 17:14:40 +00003413}
3414
Michael Kupersteine45af542015-06-30 13:36:19 +00003415static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00003416_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
3417{
Igor Bregeraadb8762016-06-08 13:59:20 +00003418 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
Adam Nemet0d5bb552014-07-28 17:14:40 +00003419 (__v16sf) __W,
Igor Bregeraadb8762016-06-08 13:59:20 +00003420 (__v16sf) __A);
Adam Nemet0d5bb552014-07-28 17:14:40 +00003421}
3422
Michael Kupersteine45af542015-06-30 13:36:19 +00003423static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00003424_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
3425{
Igor Bregeraadb8762016-06-08 13:59:20 +00003426 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
Adam Nemet0d5bb552014-07-28 17:14:40 +00003427 (__v8di) __W,
Igor Bregeraadb8762016-06-08 13:59:20 +00003428 (__v8di) __A);
Adam Nemet0d5bb552014-07-28 17:14:40 +00003429}
3430
Michael Kupersteine45af542015-06-30 13:36:19 +00003431static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00003432_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
3433{
Igor Bregeraadb8762016-06-08 13:59:20 +00003434 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
Adam Nemet0d5bb552014-07-28 17:14:40 +00003435 (__v16si) __W,
Igor Bregeraadb8762016-06-08 13:59:20 +00003436 (__v16si) __A);
Adam Nemet0d5bb552014-07-28 17:14:40 +00003437}
3438
3439/* Compare */
3440
Craig Topperc6338672018-05-31 00:51:20 +00003441#define _mm512_cmp_round_ps_mask(A, B, P, R) \
Craig Topper53565c62015-02-01 22:27:40 +00003442 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
Craig Topper8c18e112016-05-17 04:41:50 +00003443 (__v16sf)(__m512)(B), (int)(P), \
Craig Topperc6338672018-05-31 00:51:20 +00003444 (__mmask16)-1, (int)(R))
Adam Nemet0d5bb552014-07-28 17:14:40 +00003445
Craig Topperc6338672018-05-31 00:51:20 +00003446#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \
Craig Topper53565c62015-02-01 22:27:40 +00003447 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
Craig Topper8c18e112016-05-17 04:41:50 +00003448 (__v16sf)(__m512)(B), (int)(P), \
Craig Topperc6338672018-05-31 00:51:20 +00003449 (__mmask16)(U), (int)(R))
Craig Topper53565c62015-02-01 22:27:40 +00003450
3451#define _mm512_cmp_ps_mask(A, B, P) \
3452 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
Craig Topper53565c62015-02-01 22:27:40 +00003453#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3454 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3455
Ayman Musa2e250e82016-09-27 14:06:32 +00003456#define _mm512_cmpeq_ps_mask(A, B) \
3457 _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3458#define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3459 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3460
3461#define _mm512_cmplt_ps_mask(A, B) \
3462 _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3463#define _mm512_mask_cmplt_ps_mask(k, A, B) \
3464 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3465
3466#define _mm512_cmple_ps_mask(A, B) \
3467 _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3468#define _mm512_mask_cmple_ps_mask(k, A, B) \
3469 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3470
3471#define _mm512_cmpunord_ps_mask(A, B) \
3472 _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3473#define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3474 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3475
3476#define _mm512_cmpneq_ps_mask(A, B) \
3477 _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3478#define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3479 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3480
3481#define _mm512_cmpnlt_ps_mask(A, B) \
3482 _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3483#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3484 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3485
3486#define _mm512_cmpnle_ps_mask(A, B) \
3487 _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3488#define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3489 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3490
3491#define _mm512_cmpord_ps_mask(A, B) \
3492 _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3493#define _mm512_mask_cmpord_ps_mask(k, A, B) \
3494 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3495
Craig Topperc6338672018-05-31 00:51:20 +00003496#define _mm512_cmp_round_pd_mask(A, B, P, R) \
Craig Topper53565c62015-02-01 22:27:40 +00003497 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
Craig Topper8c18e112016-05-17 04:41:50 +00003498 (__v8df)(__m512d)(B), (int)(P), \
Craig Topperc6338672018-05-31 00:51:20 +00003499 (__mmask8)-1, (int)(R))
Craig Topper53565c62015-02-01 22:27:40 +00003500
Craig Topperc6338672018-05-31 00:51:20 +00003501#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \
Craig Topper53565c62015-02-01 22:27:40 +00003502 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
Craig Topper8c18e112016-05-17 04:41:50 +00003503 (__v8df)(__m512d)(B), (int)(P), \
Craig Topperc6338672018-05-31 00:51:20 +00003504 (__mmask8)(U), (int)(R))
Craig Topper53565c62015-02-01 22:27:40 +00003505
3506#define _mm512_cmp_pd_mask(A, B, P) \
3507 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
Craig Topper53565c62015-02-01 22:27:40 +00003508#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3509 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
Adam Nemet0d5bb552014-07-28 17:14:40 +00003510
Ayman Musa2e250e82016-09-27 14:06:32 +00003511#define _mm512_cmpeq_pd_mask(A, B) \
3512 _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3513#define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3514 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3515
3516#define _mm512_cmplt_pd_mask(A, B) \
3517 _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3518#define _mm512_mask_cmplt_pd_mask(k, A, B) \
3519 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3520
3521#define _mm512_cmple_pd_mask(A, B) \
3522 _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3523#define _mm512_mask_cmple_pd_mask(k, A, B) \
3524 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3525
3526#define _mm512_cmpunord_pd_mask(A, B) \
3527 _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3528#define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3529 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3530
3531#define _mm512_cmpneq_pd_mask(A, B) \
3532 _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3533#define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3534 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3535
3536#define _mm512_cmpnlt_pd_mask(A, B) \
3537 _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3538#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3539 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3540
3541#define _mm512_cmpnle_pd_mask(A, B) \
3542 _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3543#define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3544 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3545
3546#define _mm512_cmpord_pd_mask(A, B) \
3547 _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3548#define _mm512_mask_cmpord_pd_mask(k, A, B) \
3549 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3550
Adam Nemet0d5bb552014-07-28 17:14:40 +00003551/* Conversion */
3552
Craig Topperc6338672018-05-31 00:51:20 +00003553#define _mm512_cvtt_roundps_epu32(A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00003554 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3555 (__v16si)_mm512_undefined_epi32(), \
Craig Topperc6338672018-05-31 00:51:20 +00003556 (__mmask16)-1, (int)(R))
Michael Zuckerman6170c152016-06-01 14:41:41 +00003557
Craig Topperc6338672018-05-31 00:51:20 +00003558#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00003559 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3560 (__v16si)(__m512i)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00003561 (__mmask16)(U), (int)(R))
Michael Zuckerman6170c152016-06-01 14:41:41 +00003562
Craig Topperc6338672018-05-31 00:51:20 +00003563#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00003564 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3565 (__v16si)_mm512_setzero_si512(), \
Craig Topperc6338672018-05-31 00:51:20 +00003566 (__mmask16)(U), (int)(R))
Michael Zuckerman6170c152016-06-01 14:41:41 +00003567
3568
Michael Kupersteine45af542015-06-30 13:36:19 +00003569static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00003570_mm512_cvttps_epu32(__m512 __A)
3571{
3572 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3573 (__v16si)
3574 _mm512_setzero_si512 (),
3575 (__mmask16) -1,
3576 _MM_FROUND_CUR_DIRECTION);
3577}
3578
Michael Zuckermanf1544752016-05-09 10:32:51 +00003579static __inline__ __m512i __DEFAULT_FN_ATTRS
3580_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3581{
3582 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3583 (__v16si) __W,
3584 (__mmask16) __U,
3585 _MM_FROUND_CUR_DIRECTION);
3586}
3587
3588static __inline__ __m512i __DEFAULT_FN_ATTRS
3589_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
3590{
3591 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3592 (__v16si) _mm512_setzero_si512 (),
3593 (__mmask16) __U,
3594 _MM_FROUND_CUR_DIRECTION);
3595}
3596
Craig Topperc6338672018-05-31 00:51:20 +00003597#define _mm512_cvt_roundepi32_ps(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00003598 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
Craig Topper72c7d512015-02-01 07:35:35 +00003599 (__v16sf)_mm512_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00003600 (__mmask16)-1, (int)(R))
Adam Nemet0d5bb552014-07-28 17:14:40 +00003601
Craig Topperc6338672018-05-31 00:51:20 +00003602#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00003603 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3604 (__v16sf)(__m512)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00003605 (__mmask16)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00003606
Craig Topperc6338672018-05-31 00:51:20 +00003607#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00003608 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3609 (__v16sf)_mm512_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00003610 (__mmask16)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00003611
Craig Topperc6338672018-05-31 00:51:20 +00003612#define _mm512_cvt_roundepu32_ps(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00003613 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
Craig Topper72c7d512015-02-01 07:35:35 +00003614 (__v16sf)_mm512_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00003615 (__mmask16)-1, (int)(R))
Adam Nemet0d5bb552014-07-28 17:14:40 +00003616
Craig Topperc6338672018-05-31 00:51:20 +00003617#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00003618 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3619 (__v16sf)(__m512)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00003620 (__mmask16)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00003621
Craig Topperc6338672018-05-31 00:51:20 +00003622#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00003623 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3624 (__v16sf)_mm512_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00003625 (__mmask16)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00003626
Michael Zuckermanf1544752016-05-09 10:32:51 +00003627static __inline__ __m512 __DEFAULT_FN_ATTRS
3628_mm512_cvtepu32_ps (__m512i __A)
3629{
Craig Topper842171d2018-05-21 20:19:17 +00003630 return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
Michael Zuckermanf1544752016-05-09 10:32:51 +00003631}
3632
3633static __inline__ __m512 __DEFAULT_FN_ATTRS
3634_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3635{
Craig Topper842171d2018-05-21 20:19:17 +00003636 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3637 (__v16sf)_mm512_cvtepu32_ps(__A),
3638 (__v16sf)__W);
Michael Zuckermanf1544752016-05-09 10:32:51 +00003639}
3640
3641static __inline__ __m512 __DEFAULT_FN_ATTRS
3642_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
3643{
Craig Topper842171d2018-05-21 20:19:17 +00003644 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3645 (__v16sf)_mm512_cvtepu32_ps(__A),
3646 (__v16sf)_mm512_setzero_ps());
Michael Zuckermanf1544752016-05-09 10:32:51 +00003647}
3648
Michael Kupersteine45af542015-06-30 13:36:19 +00003649static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00003650_mm512_cvtepi32_pd(__m256i __A)
3651{
Simon Pilgrim698528d2016-11-16 09:27:40 +00003652 return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
Adam Nemet0d5bb552014-07-28 17:14:40 +00003653}
3654
Michael Zuckermanf1544752016-05-09 10:32:51 +00003655static __inline__ __m512d __DEFAULT_FN_ATTRS
3656_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3657{
Simon Pilgrim698528d2016-11-16 09:27:40 +00003658 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3659 (__v8df)_mm512_cvtepi32_pd(__A),
3660 (__v8df)__W);
Michael Zuckermanf1544752016-05-09 10:32:51 +00003661}
3662
3663static __inline__ __m512d __DEFAULT_FN_ATTRS
3664_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
3665{
Simon Pilgrim698528d2016-11-16 09:27:40 +00003666 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3667 (__v8df)_mm512_cvtepi32_pd(__A),
3668 (__v8df)_mm512_setzero_pd());
Michael Zuckermanf1544752016-05-09 10:32:51 +00003669}
3670
Ayman Musa2e250e82016-09-27 14:06:32 +00003671static __inline__ __m512d __DEFAULT_FN_ATTRS
3672_mm512_cvtepi32lo_pd(__m512i __A)
3673{
3674 return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3675}
3676
3677static __inline__ __m512d __DEFAULT_FN_ATTRS
3678_mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3679{
3680 return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
3681}
3682
Michael Zuckermanf1544752016-05-09 10:32:51 +00003683static __inline__ __m512 __DEFAULT_FN_ATTRS
3684_mm512_cvtepi32_ps (__m512i __A)
3685{
Craig Topper842171d2018-05-21 20:19:17 +00003686 return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
Michael Zuckermanf1544752016-05-09 10:32:51 +00003687}
3688
3689static __inline__ __m512 __DEFAULT_FN_ATTRS
3690_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3691{
Craig Topper842171d2018-05-21 20:19:17 +00003692 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3693 (__v16sf)_mm512_cvtepi32_ps(__A),
3694 (__v16sf)__W);
Michael Zuckermanf1544752016-05-09 10:32:51 +00003695}
3696
3697static __inline__ __m512 __DEFAULT_FN_ATTRS
3698_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
3699{
Craig Topper842171d2018-05-21 20:19:17 +00003700 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3701 (__v16sf)_mm512_cvtepi32_ps(__A),
3702 (__v16sf)_mm512_setzero_ps());
Michael Zuckermanf1544752016-05-09 10:32:51 +00003703}
3704
Michael Kupersteine45af542015-06-30 13:36:19 +00003705static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00003706_mm512_cvtepu32_pd(__m256i __A)
3707{
Simon Pilgrim698528d2016-11-16 09:27:40 +00003708 return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
Adam Nemet0d5bb552014-07-28 17:14:40 +00003709}
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00003710
Michael Zuckermanf1544752016-05-09 10:32:51 +00003711static __inline__ __m512d __DEFAULT_FN_ATTRS
3712_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3713{
Simon Pilgrim698528d2016-11-16 09:27:40 +00003714 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3715 (__v8df)_mm512_cvtepu32_pd(__A),
3716 (__v8df)__W);
Michael Zuckermanf1544752016-05-09 10:32:51 +00003717}
3718
3719static __inline__ __m512d __DEFAULT_FN_ATTRS
3720_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
3721{
Simon Pilgrim698528d2016-11-16 09:27:40 +00003722 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3723 (__v8df)_mm512_cvtepu32_pd(__A),
3724 (__v8df)_mm512_setzero_pd());
Michael Zuckermanf1544752016-05-09 10:32:51 +00003725}
3726
Ayman Musa2e250e82016-09-27 14:06:32 +00003727static __inline__ __m512d __DEFAULT_FN_ATTRS
3728_mm512_cvtepu32lo_pd(__m512i __A)
3729{
3730 return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3731}
3732
3733static __inline__ __m512d __DEFAULT_FN_ATTRS
3734_mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3735{
3736 return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
3737}
3738
Craig Topperc6338672018-05-31 00:51:20 +00003739#define _mm512_cvt_roundpd_ps(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00003740 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
Craig Topper72c7d512015-02-01 07:35:35 +00003741 (__v8sf)_mm256_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00003742 (__mmask8)-1, (int)(R))
Craig Topper72c7d512015-02-01 07:35:35 +00003743
Craig Topperc6338672018-05-31 00:51:20 +00003744#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \
Michael Zuckerman186d8672016-05-31 11:27:34 +00003745 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
Craig Topperf3efec62016-06-08 06:08:07 +00003746 (__v8sf)(__m256)(W), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00003747 (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00003748
Craig Topperc6338672018-05-31 00:51:20 +00003749#define _mm512_maskz_cvt_roundpd_ps(U, A, R) \
Michael Zuckerman186d8672016-05-31 11:27:34 +00003750 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3751 (__v8sf)_mm256_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00003752 (__mmask8)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00003753
Michael Zuckermanf1544752016-05-09 10:32:51 +00003754static __inline__ __m256 __DEFAULT_FN_ATTRS
3755_mm512_cvtpd_ps (__m512d __A)
3756{
3757 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3758 (__v8sf) _mm256_undefined_ps (),
3759 (__mmask8) -1,
3760 _MM_FROUND_CUR_DIRECTION);
3761}
3762
3763static __inline__ __m256 __DEFAULT_FN_ATTRS
3764_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
3765{
3766 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3767 (__v8sf) __W,
3768 (__mmask8) __U,
3769 _MM_FROUND_CUR_DIRECTION);
3770}
3771
3772static __inline__ __m256 __DEFAULT_FN_ATTRS
3773_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
3774{
3775 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3776 (__v8sf) _mm256_setzero_ps (),
3777 (__mmask8) __U,
3778 _MM_FROUND_CUR_DIRECTION);
3779}
Michael Zuckerman2564d2f2016-05-10 10:14:50 +00003780
Ayman Musa2e250e82016-09-27 14:06:32 +00003781static __inline__ __m512 __DEFAULT_FN_ATTRS
3782_mm512_cvtpd_pslo (__m512d __A)
3783{
3784 return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
3785 (__v8sf) _mm256_setzero_ps (),
3786 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3787}
3788
3789static __inline__ __m512 __DEFAULT_FN_ATTRS
3790_mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
3791{
3792 return (__m512) __builtin_shufflevector (
3793 (__v8sf) _mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W),
3794 __U, __A),
3795 (__v8sf) _mm256_setzero_ps (),
3796 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3797}
3798
Craig Topperc6338672018-05-31 00:51:20 +00003799#define _mm512_cvt_roundps_ph(A, I) \
Craig Topperf3efec62016-06-08 06:08:07 +00003800 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3801 (__v16hi)_mm256_undefined_si256(), \
Craig Topperc6338672018-05-31 00:51:20 +00003802 (__mmask16)-1)
Michael Zuckerman6170c152016-06-01 14:41:41 +00003803
Craig Topperc6338672018-05-31 00:51:20 +00003804#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
Craig Topperf3efec62016-06-08 06:08:07 +00003805 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3806 (__v16hi)(__m256i)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00003807 (__mmask16)(W))
Michael Zuckerman6170c152016-06-01 14:41:41 +00003808
Craig Topperc6338672018-05-31 00:51:20 +00003809#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
Craig Topperf3efec62016-06-08 06:08:07 +00003810 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3811 (__v16hi)_mm256_setzero_si256(), \
Craig Topperc6338672018-05-31 00:51:20 +00003812 (__mmask16)(W))
Michael Zuckerman6170c152016-06-01 14:41:41 +00003813
Craig Topperc6338672018-05-31 00:51:20 +00003814#define _mm512_cvtps_ph(A, I) \
Craig Topper8c18e112016-05-17 04:41:50 +00003815 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
Craig Topper72c7d512015-02-01 07:35:35 +00003816 (__v16hi)_mm256_setzero_si256(), \
Craig Topperc6338672018-05-31 00:51:20 +00003817 (__mmask16)-1)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00003818
Craig Topperc6338672018-05-31 00:51:20 +00003819#define _mm512_mask_cvtps_ph(U, W, A, I) \
Craig Topper8c18e112016-05-17 04:41:50 +00003820 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3821 (__v16hi)(__m256i)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00003822 (__mmask16)(W))
Michael Zuckermanf1544752016-05-09 10:32:51 +00003823
Craig Topperc6338672018-05-31 00:51:20 +00003824#define _mm512_maskz_cvtps_ph(W, A, I) \
Craig Topper8c18e112016-05-17 04:41:50 +00003825 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3826 (__v16hi)_mm256_setzero_si256(), \
Craig Topperc6338672018-05-31 00:51:20 +00003827 (__mmask16)(W))
Michael Zuckerman6170c152016-06-01 14:41:41 +00003828
Craig Topperc6338672018-05-31 00:51:20 +00003829#define _mm512_cvt_roundph_ps(A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00003830 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3831 (__v16sf)_mm512_undefined_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00003832 (__mmask16)-1, (int)(R))
Michael Zuckerman6170c152016-06-01 14:41:41 +00003833
Craig Topperc6338672018-05-31 00:51:20 +00003834#define _mm512_mask_cvt_roundph_ps(W, U, A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00003835 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3836 (__v16sf)(__m512)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00003837 (__mmask16)(U), (int)(R))
Michael Zuckerman6170c152016-06-01 14:41:41 +00003838
Craig Topperc6338672018-05-31 00:51:20 +00003839#define _mm512_maskz_cvt_roundph_ps(U, A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00003840 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3841 (__v16sf)_mm512_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00003842 (__mmask16)(U), (int)(R))
Craig Topperf3efec62016-06-08 06:08:07 +00003843
3844
Michael Kupersteine45af542015-06-30 13:36:19 +00003845static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00003846_mm512_cvtph_ps(__m256i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00003847{
3848 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00003849 (__v16sf)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00003850 _mm512_setzero_ps (),
3851 (__mmask16) -1,
3852 _MM_FROUND_CUR_DIRECTION);
3853}
3854
Michael Zuckermanf1544752016-05-09 10:32:51 +00003855static __inline__ __m512 __DEFAULT_FN_ATTRS
3856_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00003857{
Michael Zuckermanf1544752016-05-09 10:32:51 +00003858 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3859 (__v16sf) __W,
3860 (__mmask16) __U,
3861 _MM_FROUND_CUR_DIRECTION);
3862}
3863
3864static __inline__ __m512 __DEFAULT_FN_ATTRS
3865_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
3866{
3867 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3868 (__v16sf) _mm512_setzero_ps (),
3869 (__mmask16) __U,
3870 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00003871}
3872
Craig Topperc6338672018-05-31 00:51:20 +00003873#define _mm512_cvtt_roundpd_epi32(A, R) \
Michael Zuckerman186d8672016-05-31 11:27:34 +00003874 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3875 (__v8si)_mm256_setzero_si256(), \
Craig Topperc6338672018-05-31 00:51:20 +00003876 (__mmask8)-1, (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00003877
Craig Topperc6338672018-05-31 00:51:20 +00003878#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \
Michael Zuckerman186d8672016-05-31 11:27:34 +00003879 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
Craig Topperf3efec62016-06-08 06:08:07 +00003880 (__v8si)(__m256i)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00003881 (__mmask8)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00003882
Craig Topperc6338672018-05-31 00:51:20 +00003883#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \
Michael Zuckerman186d8672016-05-31 11:27:34 +00003884 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3885 (__v8si)_mm256_setzero_si256(), \
Craig Topperc6338672018-05-31 00:51:20 +00003886 (__mmask8)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00003887
Michael Kupersteine45af542015-06-30 13:36:19 +00003888static __inline __m256i __DEFAULT_FN_ATTRS
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00003889_mm512_cvttpd_epi32(__m512d __a)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00003890{
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00003891 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00003892 (__v8si)_mm256_setzero_si256(),
3893 (__mmask8) -1,
3894 _MM_FROUND_CUR_DIRECTION);
3895}
3896
Michael Zuckermanf1544752016-05-09 10:32:51 +00003897static __inline__ __m256i __DEFAULT_FN_ATTRS
3898_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3899{
3900 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3901 (__v8si) __W,
3902 (__mmask8) __U,
3903 _MM_FROUND_CUR_DIRECTION);
3904}
3905
3906static __inline__ __m256i __DEFAULT_FN_ATTRS
3907_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
3908{
3909 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3910 (__v8si) _mm256_setzero_si256 (),
3911 (__mmask8) __U,
3912 _MM_FROUND_CUR_DIRECTION);
3913}
3914
Craig Topperc6338672018-05-31 00:51:20 +00003915#define _mm512_cvtt_roundps_epi32(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00003916 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
Craig Topper72c7d512015-02-01 07:35:35 +00003917 (__v16si)_mm512_setzero_si512(), \
Craig Topperc6338672018-05-31 00:51:20 +00003918 (__mmask16)-1, (int)(R))
Michael Zuckerman2564d2f2016-05-10 10:14:50 +00003919
Craig Topperc6338672018-05-31 00:51:20 +00003920#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \
Michael Zuckerman186d8672016-05-31 11:27:34 +00003921 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
Craig Topperf3efec62016-06-08 06:08:07 +00003922 (__v16si)(__m512i)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00003923 (__mmask16)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00003924
Craig Topperc6338672018-05-31 00:51:20 +00003925#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \
Michael Zuckerman186d8672016-05-31 11:27:34 +00003926 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3927 (__v16si)_mm512_setzero_si512(), \
Craig Topperc6338672018-05-31 00:51:20 +00003928 (__mmask16)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00003929
Michael Zuckermanf1544752016-05-09 10:32:51 +00003930static __inline __m512i __DEFAULT_FN_ATTRS
3931_mm512_cvttps_epi32(__m512 __a)
3932{
3933 return (__m512i)
3934 __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
3935 (__v16si) _mm512_setzero_si512 (),
3936 (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
3937}
3938
3939static __inline__ __m512i __DEFAULT_FN_ATTRS
3940_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3941{
3942 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3943 (__v16si) __W,
3944 (__mmask16) __U,
3945 _MM_FROUND_CUR_DIRECTION);
3946}
3947
3948static __inline__ __m512i __DEFAULT_FN_ATTRS
3949_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
3950{
3951 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3952 (__v16si) _mm512_setzero_si512 (),
3953 (__mmask16) __U,
3954 _MM_FROUND_CUR_DIRECTION);
3955}
Craig Topper72c7d512015-02-01 07:35:35 +00003956
Craig Topperc6338672018-05-31 00:51:20 +00003957#define _mm512_cvt_roundps_epi32(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00003958 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
Craig Topper72c7d512015-02-01 07:35:35 +00003959 (__v16si)_mm512_setzero_si512(), \
Craig Topperc6338672018-05-31 00:51:20 +00003960 (__mmask16)-1, (int)(R))
Craig Topper72c7d512015-02-01 07:35:35 +00003961
Craig Topperc6338672018-05-31 00:51:20 +00003962#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00003963 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3964 (__v16si)(__m512i)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00003965 (__mmask16)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00003966
Craig Topperc6338672018-05-31 00:51:20 +00003967#define _mm512_maskz_cvt_roundps_epi32(U, A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00003968 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3969 (__v16si)_mm512_setzero_si512(), \
Craig Topperc6338672018-05-31 00:51:20 +00003970 (__mmask16)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00003971
Michael Zuckermanf1544752016-05-09 10:32:51 +00003972static __inline__ __m512i __DEFAULT_FN_ATTRS
3973_mm512_cvtps_epi32 (__m512 __A)
3974{
3975 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3976 (__v16si) _mm512_undefined_epi32 (),
3977 (__mmask16) -1,
3978 _MM_FROUND_CUR_DIRECTION);
3979}
3980
3981static __inline__ __m512i __DEFAULT_FN_ATTRS
3982_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3983{
3984 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3985 (__v16si) __W,
3986 (__mmask16) __U,
3987 _MM_FROUND_CUR_DIRECTION);
3988}
3989
3990static __inline__ __m512i __DEFAULT_FN_ATTRS
3991_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
3992{
3993 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3994 (__v16si)
3995 _mm512_setzero_si512 (),
3996 (__mmask16) __U,
3997 _MM_FROUND_CUR_DIRECTION);
3998}
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00003999
Craig Topperc6338672018-05-31 00:51:20 +00004000#define _mm512_cvt_roundpd_epi32(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00004001 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
Craig Topper72c7d512015-02-01 07:35:35 +00004002 (__v8si)_mm256_setzero_si256(), \
Craig Topperc6338672018-05-31 00:51:20 +00004003 (__mmask8)-1, (int)(R))
Craig Topper72c7d512015-02-01 07:35:35 +00004004
Craig Topperc6338672018-05-31 00:51:20 +00004005#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \
Michael Zuckerman186d8672016-05-31 11:27:34 +00004006 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
Craig Topperf3efec62016-06-08 06:08:07 +00004007 (__v8si)(__m256i)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00004008 (__mmask8)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00004009
Craig Topperc6338672018-05-31 00:51:20 +00004010#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \
Michael Zuckerman186d8672016-05-31 11:27:34 +00004011 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
4012 (__v8si)_mm256_setzero_si256(), \
Craig Topperc6338672018-05-31 00:51:20 +00004013 (__mmask8)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00004014
Michael Zuckermanf1544752016-05-09 10:32:51 +00004015static __inline__ __m256i __DEFAULT_FN_ATTRS
4016_mm512_cvtpd_epi32 (__m512d __A)
4017{
4018 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4019 (__v8si)
4020 _mm256_undefined_si256 (),
4021 (__mmask8) -1,
4022 _MM_FROUND_CUR_DIRECTION);
4023}
4024
4025static __inline__ __m256i __DEFAULT_FN_ATTRS
4026_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
4027{
4028 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4029 (__v8si) __W,
4030 (__mmask8) __U,
4031 _MM_FROUND_CUR_DIRECTION);
4032}
4033
4034static __inline__ __m256i __DEFAULT_FN_ATTRS
4035_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
4036{
4037 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4038 (__v8si)
4039 _mm256_setzero_si256 (),
4040 (__mmask8) __U,
4041 _MM_FROUND_CUR_DIRECTION);
4042}
4043
Craig Topperc6338672018-05-31 00:51:20 +00004044#define _mm512_cvt_roundps_epu32(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00004045 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
Craig Topper72c7d512015-02-01 07:35:35 +00004046 (__v16si)_mm512_setzero_si512(), \
Craig Topperc6338672018-05-31 00:51:20 +00004047 (__mmask16)-1, (int)(R))
Craig Topper72c7d512015-02-01 07:35:35 +00004048
Craig Topperc6338672018-05-31 00:51:20 +00004049#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00004050 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4051 (__v16si)(__m512i)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00004052 (__mmask16)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00004053
Craig Topperc6338672018-05-31 00:51:20 +00004054#define _mm512_maskz_cvt_roundps_epu32(U, A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00004055 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4056 (__v16si)_mm512_setzero_si512(), \
Craig Topperc6338672018-05-31 00:51:20 +00004057 (__mmask16)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00004058
Michael Zuckermanf1544752016-05-09 10:32:51 +00004059static __inline__ __m512i __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00004060_mm512_cvtps_epu32 ( __m512 __A)
Michael Zuckermanf1544752016-05-09 10:32:51 +00004061{
4062 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4063 (__v16si)\
Craig Topper0a485d12018-07-07 22:03:19 +00004064 _mm512_undefined_epi32 (),
Michael Zuckermanf1544752016-05-09 10:32:51 +00004065 (__mmask16) -1,\
Craig Topper0a485d12018-07-07 22:03:19 +00004066 _MM_FROUND_CUR_DIRECTION);
Michael Zuckermanf1544752016-05-09 10:32:51 +00004067}
4068
4069static __inline__ __m512i __DEFAULT_FN_ATTRS
4070_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
4071{
4072 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4073 (__v16si) __W,
4074 (__mmask16) __U,
4075 _MM_FROUND_CUR_DIRECTION);
4076}
4077
Michael Zuckerman9fcf3552016-05-30 13:22:12 +00004078static __inline__ __m512i __DEFAULT_FN_ATTRS
4079_mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
4080{
4081 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00004082 (__v16si)
Michael Zuckerman9fcf3552016-05-30 13:22:12 +00004083 _mm512_setzero_si512 (),
4084 (__mmask16) __U ,
4085 _MM_FROUND_CUR_DIRECTION);
4086}
4087
Craig Topperc6338672018-05-31 00:51:20 +00004088#define _mm512_cvt_roundpd_epu32(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00004089 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
Craig Topper72c7d512015-02-01 07:35:35 +00004090 (__v8si)_mm256_setzero_si256(), \
Craig Topperc6338672018-05-31 00:51:20 +00004091 (__mmask8)-1, (int)(R))
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004092
Craig Topperc6338672018-05-31 00:51:20 +00004093#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \
Michael Zuckerman186d8672016-05-31 11:27:34 +00004094 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
Craig Toppercbf39292018-05-31 01:24:40 +00004095 (__v8si)(__m256i)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00004096 (__mmask8)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00004097
Craig Topperc6338672018-05-31 00:51:20 +00004098#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \
Michael Zuckerman186d8672016-05-31 11:27:34 +00004099 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4100 (__v8si)_mm256_setzero_si256(), \
Craig Topperc6338672018-05-31 00:51:20 +00004101 (__mmask8)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00004102
Michael Zuckermanf1544752016-05-09 10:32:51 +00004103static __inline__ __m256i __DEFAULT_FN_ATTRS
4104_mm512_cvtpd_epu32 (__m512d __A)
4105{
4106 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4107 (__v8si)
4108 _mm256_undefined_si256 (),
4109 (__mmask8) -1,
4110 _MM_FROUND_CUR_DIRECTION);
4111}
4112
4113static __inline__ __m256i __DEFAULT_FN_ATTRS
4114_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
4115{
4116 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4117 (__v8si) __W,
4118 (__mmask8) __U,
4119 _MM_FROUND_CUR_DIRECTION);
4120}
4121
4122static __inline__ __m256i __DEFAULT_FN_ATTRS
4123_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
4124{
4125 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4126 (__v8si)
4127 _mm256_setzero_si256 (),
4128 (__mmask8) __U,
4129 _MM_FROUND_CUR_DIRECTION);
4130}
Craig Topper79f53ca2016-06-23 06:36:42 +00004131
Simon Pilgrim60e92492017-03-21 12:46:13 +00004132static __inline__ double __DEFAULT_FN_ATTRS
4133_mm512_cvtsd_f64(__m512d __a)
4134{
4135 return __a[0];
4136}
4137
4138static __inline__ float __DEFAULT_FN_ATTRS
4139_mm512_cvtss_f32(__m512 __a)
4140{
4141 return __a[0];
4142}
4143
Adam Nemet63a951e2015-01-14 01:31:17 +00004144/* Unpack and Interleave */
Craig Topper79f53ca2016-06-23 06:36:42 +00004145
Michael Kupersteine45af542015-06-30 13:36:19 +00004146static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet63a951e2015-01-14 01:31:17 +00004147_mm512_unpackhi_pd(__m512d __a, __m512d __b)
4148{
Craig Topper79f53ca2016-06-23 06:36:42 +00004149 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4150 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4151}
4152
4153static __inline__ __m512d __DEFAULT_FN_ATTRS
4154_mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4155{
4156 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4157 (__v8df)_mm512_unpackhi_pd(__A, __B),
4158 (__v8df)__W);
4159}
4160
4161static __inline__ __m512d __DEFAULT_FN_ATTRS
4162_mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
4163{
4164 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4165 (__v8df)_mm512_unpackhi_pd(__A, __B),
4166 (__v8df)_mm512_setzero_pd());
Adam Nemet63a951e2015-01-14 01:31:17 +00004167}
4168
Michael Kupersteine45af542015-06-30 13:36:19 +00004169static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet63a951e2015-01-14 01:31:17 +00004170_mm512_unpacklo_pd(__m512d __a, __m512d __b)
4171{
Craig Topper79f53ca2016-06-23 06:36:42 +00004172 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4173 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4174}
4175
4176static __inline__ __m512d __DEFAULT_FN_ATTRS
4177_mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4178{
4179 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4180 (__v8df)_mm512_unpacklo_pd(__A, __B),
4181 (__v8df)__W);
4182}
4183
4184static __inline__ __m512d __DEFAULT_FN_ATTRS
4185_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
4186{
4187 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4188 (__v8df)_mm512_unpacklo_pd(__A, __B),
4189 (__v8df)_mm512_setzero_pd());
Adam Nemet63a951e2015-01-14 01:31:17 +00004190}
4191
Michael Kupersteine45af542015-06-30 13:36:19 +00004192static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet63a951e2015-01-14 01:31:17 +00004193_mm512_unpackhi_ps(__m512 __a, __m512 __b)
4194{
Craig Topper79f53ca2016-06-23 06:36:42 +00004195 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4196 2, 18, 3, 19,
4197 2+4, 18+4, 3+4, 19+4,
4198 2+8, 18+8, 3+8, 19+8,
4199 2+12, 18+12, 3+12, 19+12);
4200}
4201
4202static __inline__ __m512 __DEFAULT_FN_ATTRS
4203_mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4204{
4205 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4206 (__v16sf)_mm512_unpackhi_ps(__A, __B),
4207 (__v16sf)__W);
4208}
4209
4210static __inline__ __m512 __DEFAULT_FN_ATTRS
4211_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
4212{
4213 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4214 (__v16sf)_mm512_unpackhi_ps(__A, __B),
4215 (__v16sf)_mm512_setzero_ps());
Adam Nemet63a951e2015-01-14 01:31:17 +00004216}
4217
Michael Kupersteine45af542015-06-30 13:36:19 +00004218static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet63a951e2015-01-14 01:31:17 +00004219_mm512_unpacklo_ps(__m512 __a, __m512 __b)
4220{
Craig Topper79f53ca2016-06-23 06:36:42 +00004221 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4222 0, 16, 1, 17,
4223 0+4, 16+4, 1+4, 17+4,
4224 0+8, 16+8, 1+8, 17+8,
4225 0+12, 16+12, 1+12, 17+12);
4226}
4227
4228static __inline__ __m512 __DEFAULT_FN_ATTRS
4229_mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4230{
4231 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4232 (__v16sf)_mm512_unpacklo_ps(__A, __B),
4233 (__v16sf)__W);
4234}
4235
4236static __inline__ __m512 __DEFAULT_FN_ATTRS
4237_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
4238{
4239 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4240 (__v16sf)_mm512_unpacklo_ps(__A, __B),
4241 (__v16sf)_mm512_setzero_ps());
4242}
4243
4244static __inline__ __m512i __DEFAULT_FN_ATTRS
4245_mm512_unpackhi_epi32(__m512i __A, __m512i __B)
4246{
4247 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4248 2, 18, 3, 19,
4249 2+4, 18+4, 3+4, 19+4,
4250 2+8, 18+8, 3+8, 19+8,
4251 2+12, 18+12, 3+12, 19+12);
4252}
4253
4254static __inline__ __m512i __DEFAULT_FN_ATTRS
4255_mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4256{
4257 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4258 (__v16si)_mm512_unpackhi_epi32(__A, __B),
4259 (__v16si)__W);
4260}
4261
4262static __inline__ __m512i __DEFAULT_FN_ATTRS
4263_mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4264{
4265 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4266 (__v16si)_mm512_unpackhi_epi32(__A, __B),
4267 (__v16si)_mm512_setzero_si512());
4268}
4269
4270static __inline__ __m512i __DEFAULT_FN_ATTRS
4271_mm512_unpacklo_epi32(__m512i __A, __m512i __B)
4272{
4273 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4274 0, 16, 1, 17,
4275 0+4, 16+4, 1+4, 17+4,
4276 0+8, 16+8, 1+8, 17+8,
4277 0+12, 16+12, 1+12, 17+12);
4278}
4279
4280static __inline__ __m512i __DEFAULT_FN_ATTRS
4281_mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4282{
4283 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4284 (__v16si)_mm512_unpacklo_epi32(__A, __B),
4285 (__v16si)__W);
4286}
4287
4288static __inline__ __m512i __DEFAULT_FN_ATTRS
4289_mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4290{
4291 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4292 (__v16si)_mm512_unpacklo_epi32(__A, __B),
4293 (__v16si)_mm512_setzero_si512());
4294}
4295
4296static __inline__ __m512i __DEFAULT_FN_ATTRS
4297_mm512_unpackhi_epi64(__m512i __A, __m512i __B)
4298{
4299 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4300 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4301}
4302
4303static __inline__ __m512i __DEFAULT_FN_ATTRS
4304_mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4305{
4306 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4307 (__v8di)_mm512_unpackhi_epi64(__A, __B),
4308 (__v8di)__W);
4309}
4310
4311static __inline__ __m512i __DEFAULT_FN_ATTRS
4312_mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4313{
4314 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4315 (__v8di)_mm512_unpackhi_epi64(__A, __B),
4316 (__v8di)_mm512_setzero_si512());
4317}
4318
4319static __inline__ __m512i __DEFAULT_FN_ATTRS
4320_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
4321{
4322 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4323 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4324}
4325
4326static __inline__ __m512i __DEFAULT_FN_ATTRS
4327_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4328{
4329 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4330 (__v8di)_mm512_unpacklo_epi64(__A, __B),
4331 (__v8di)__W);
4332}
4333
4334static __inline__ __m512i __DEFAULT_FN_ATTRS
4335_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4336{
4337 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4338 (__v8di)_mm512_unpacklo_epi64(__A, __B),
4339 (__v8di)_mm512_setzero_si512());
Adam Nemet63a951e2015-01-14 01:31:17 +00004340}
4341
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00004342
Adam Nemet0d5bb552014-07-28 17:14:40 +00004343/* SIMD load ops */
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004344
Michael Kupersteine45af542015-06-30 13:36:19 +00004345static __inline __m512i __DEFAULT_FN_ATTRS
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004346_mm512_loadu_si512 (void const *__P)
4347{
Craig Toppera6dd2fa2018-05-31 05:02:08 +00004348 struct __loadu_si512 {
4349 __m512i __v;
4350 } __attribute__((__packed__, __may_alias__));
4351 return ((struct __loadu_si512*)__P)->__v;
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004352}
4353
4354static __inline __m512i __DEFAULT_FN_ATTRS
4355_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4356{
Craig Topper4537ea72016-05-14 06:03:13 +00004357 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004358 (__v16si) __W,
4359 (__mmask16) __U);
4360}
4361
4362
4363static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00004364_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004365{
Craig Topper4537ea72016-05-14 06:03:13 +00004366 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004367 (__v16si)
4368 _mm512_setzero_si512 (),
4369 (__mmask16) __U);
4370}
4371
Michael Kupersteine45af542015-06-30 13:36:19 +00004372static __inline __m512i __DEFAULT_FN_ATTRS
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004373_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4374{
Craig Topper4537ea72016-05-14 06:03:13 +00004375 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004376 (__v8di) __W,
4377 (__mmask8) __U);
4378}
4379
4380static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00004381_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004382{
Craig Topper4537ea72016-05-14 06:03:13 +00004383 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004384 (__v8di)
4385 _mm512_setzero_si512 (),
4386 (__mmask8) __U);
4387}
4388
Michael Kupersteine45af542015-06-30 13:36:19 +00004389static __inline __m512 __DEFAULT_FN_ATTRS
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004390_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4391{
Craig Topper4537ea72016-05-14 06:03:13 +00004392 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004393 (__v16sf) __W,
4394 (__mmask16) __U);
4395}
4396
4397static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00004398_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004399{
Craig Topper4537ea72016-05-14 06:03:13 +00004400 return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004401 (__v16sf)
4402 _mm512_setzero_ps (),
4403 (__mmask16) __U);
4404}
4405
Michael Kupersteine45af542015-06-30 13:36:19 +00004406static __inline __m512d __DEFAULT_FN_ATTRS
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004407_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004408{
Craig Topper4537ea72016-05-14 06:03:13 +00004409 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004410 (__v8df) __W,
4411 (__mmask8) __U);
Adam Nemetc0cff242015-01-16 18:51:50 +00004412}
4413
Michael Kupersteine45af542015-06-30 13:36:19 +00004414static __inline __m512d __DEFAULT_FN_ATTRS
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004415_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
Adam Nemetc0cff242015-01-16 18:51:50 +00004416{
Craig Topper4537ea72016-05-14 06:03:13 +00004417 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
Adam Nemetc0cff242015-01-16 18:51:50 +00004418 (__v8df)
4419 _mm512_setzero_pd (),
4420 (__mmask8) __U);
4421}
4422
Michael Kupersteine45af542015-06-30 13:36:19 +00004423static __inline __m512d __DEFAULT_FN_ATTRS
Craig Topper6afc4362017-03-17 05:59:25 +00004424_mm512_loadu_pd(void const *__p)
Adam Nemetda82bcc2014-07-31 04:00:39 +00004425{
4426 struct __loadu_pd {
4427 __m512d __v;
David Majnemer1cf22e62015-02-04 00:26:10 +00004428 } __attribute__((__packed__, __may_alias__));
Adam Nemetda82bcc2014-07-31 04:00:39 +00004429 return ((struct __loadu_pd*)__p)->__v;
4430}
4431
Michael Kupersteine45af542015-06-30 13:36:19 +00004432static __inline __m512 __DEFAULT_FN_ATTRS
Craig Topper6afc4362017-03-17 05:59:25 +00004433_mm512_loadu_ps(void const *__p)
Adam Nemetda82bcc2014-07-31 04:00:39 +00004434{
4435 struct __loadu_ps {
4436 __m512 __v;
David Majnemer1cf22e62015-02-04 00:26:10 +00004437 } __attribute__((__packed__, __may_alias__));
Adam Nemetda82bcc2014-07-31 04:00:39 +00004438 return ((struct __loadu_ps*)__p)->__v;
4439}
4440
Michael Kupersteine45af542015-06-30 13:36:19 +00004441static __inline __m512 __DEFAULT_FN_ATTRS
Craig Topper6afc4362017-03-17 05:59:25 +00004442_mm512_load_ps(void const *__p)
Adam Nemetc0cff242015-01-16 18:51:50 +00004443{
Craig Toppera6dd2fa2018-05-31 05:02:08 +00004444 return *(__m512*)__p;
Adam Nemetc0cff242015-01-16 18:51:50 +00004445}
4446
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004447static __inline __m512 __DEFAULT_FN_ATTRS
4448_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4449{
4450 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4451 (__v16sf) __W,
4452 (__mmask16) __U);
4453}
4454
4455static __inline __m512 __DEFAULT_FN_ATTRS
4456_mm512_maskz_load_ps(__mmask16 __U, void const *__P)
4457{
4458 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4459 (__v16sf)
4460 _mm512_setzero_ps (),
4461 (__mmask16) __U);
4462}
4463
Michael Kupersteine45af542015-06-30 13:36:19 +00004464static __inline __m512d __DEFAULT_FN_ATTRS
Craig Topper6afc4362017-03-17 05:59:25 +00004465_mm512_load_pd(void const *__p)
Adam Nemetc0cff242015-01-16 18:51:50 +00004466{
Craig Toppera6dd2fa2018-05-31 05:02:08 +00004467 return *(__m512d*)__p;
Adam Nemetc0cff242015-01-16 18:51:50 +00004468}
4469
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004470static __inline __m512d __DEFAULT_FN_ATTRS
4471_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4472{
4473 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4474 (__v8df) __W,
4475 (__mmask8) __U);
4476}
4477
4478static __inline __m512d __DEFAULT_FN_ATTRS
4479_mm512_maskz_load_pd(__mmask8 __U, void const *__P)
4480{
4481 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4482 (__v8df)
4483 _mm512_setzero_pd (),
4484 (__mmask8) __U);
4485}
4486
4487static __inline __m512i __DEFAULT_FN_ATTRS
4488_mm512_load_si512 (void const *__P)
4489{
4490 return *(__m512i *) __P;
4491}
4492
4493static __inline __m512i __DEFAULT_FN_ATTRS
4494_mm512_load_epi32 (void const *__P)
4495{
4496 return *(__m512i *) __P;
4497}
4498
4499static __inline __m512i __DEFAULT_FN_ATTRS
4500_mm512_load_epi64 (void const *__P)
4501{
4502 return *(__m512i *) __P;
4503}
4504
Adam Nemet0d5bb552014-07-28 17:14:40 +00004505/* SIMD store ops */
4506
Michael Kupersteine45af542015-06-30 13:36:19 +00004507static __inline void __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00004508_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004509{
Craig Topper4537ea72016-05-14 06:03:13 +00004510 __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004511 (__mmask8) __U);
4512}
4513
Michael Kupersteine45af542015-06-30 13:36:19 +00004514static __inline void __DEFAULT_FN_ATTRS
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004515_mm512_storeu_si512 (void *__P, __m512i __A)
4516{
Craig Toppera6dd2fa2018-05-31 05:02:08 +00004517 struct __storeu_si512 {
4518 __m512i __v;
4519 } __attribute__((__packed__, __may_alias__));
4520 ((struct __storeu_si512*)__P)->__v = __A;
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004521}
4522
4523static __inline void __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00004524_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004525{
Craig Topper4537ea72016-05-14 06:03:13 +00004526 __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004527 (__mmask16) __U);
4528}
4529
Michael Kupersteine45af542015-06-30 13:36:19 +00004530static __inline void __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00004531_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004532{
Craig Topper4537ea72016-05-14 06:03:13 +00004533 __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004534}
4535
Michael Kupersteine45af542015-06-30 13:36:19 +00004536static __inline void __DEFAULT_FN_ATTRS
Adam Nemetfce1ad02014-07-28 17:14:45 +00004537_mm512_storeu_pd(void *__P, __m512d __A)
4538{
Craig Toppera6dd2fa2018-05-31 05:02:08 +00004539 struct __storeu_pd {
4540 __m512d __v;
4541 } __attribute__((__packed__, __may_alias__));
4542 ((struct __storeu_pd*)__P)->__v = __A;
Adam Nemetfce1ad02014-07-28 17:14:45 +00004543}
4544
Michael Kupersteine45af542015-06-30 13:36:19 +00004545static __inline void __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00004546_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004547{
Craig Topper4537ea72016-05-14 06:03:13 +00004548 __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004549 (__mmask16) __U);
4550}
4551
Michael Kupersteine45af542015-06-30 13:36:19 +00004552static __inline void __DEFAULT_FN_ATTRS
Adam Nemetfce1ad02014-07-28 17:14:45 +00004553_mm512_storeu_ps(void *__P, __m512 __A)
4554{
Craig Toppera6dd2fa2018-05-31 05:02:08 +00004555 struct __storeu_ps {
4556 __m512 __v;
4557 } __attribute__((__packed__, __may_alias__));
4558 ((struct __storeu_ps*)__P)->__v = __A;
Adam Nemetfce1ad02014-07-28 17:14:45 +00004559}
4560
Michael Kupersteine45af542015-06-30 13:36:19 +00004561static __inline void __DEFAULT_FN_ATTRS
Adam Nemetc0cff242015-01-16 18:51:50 +00004562_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
Adam Nemetfce1ad02014-07-28 17:14:45 +00004563{
Adam Nemetc0cff242015-01-16 18:51:50 +00004564 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
Adam Nemetfce1ad02014-07-28 17:14:45 +00004565}
4566
Michael Kupersteine45af542015-06-30 13:36:19 +00004567static __inline void __DEFAULT_FN_ATTRS
Adam Nemetfce1ad02014-07-28 17:14:45 +00004568_mm512_store_pd(void *__P, __m512d __A)
4569{
Reid Kleckner89fbd552018-06-04 21:39:20 +00004570 *(__m512d*)__P = __A;
Adam Nemetfce1ad02014-07-28 17:14:45 +00004571}
4572
Michael Kupersteine45af542015-06-30 13:36:19 +00004573static __inline void __DEFAULT_FN_ATTRS
Adam Nemetc0cff242015-01-16 18:51:50 +00004574_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4575{
4576 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4577 (__mmask16) __U);
4578}
4579
Michael Kupersteine45af542015-06-30 13:36:19 +00004580static __inline void __DEFAULT_FN_ATTRS
Adam Nemetc0cff242015-01-16 18:51:50 +00004581_mm512_store_ps(void *__P, __m512 __A)
4582{
Reid Kleckner89fbd552018-06-04 21:39:20 +00004583 *(__m512*)__P = __A;
Adam Nemetc0cff242015-01-16 18:51:50 +00004584}
4585
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004586static __inline void __DEFAULT_FN_ATTRS
4587_mm512_store_si512 (void *__P, __m512i __A)
4588{
4589 *(__m512i *) __P = __A;
4590}
4591
4592static __inline void __DEFAULT_FN_ATTRS
4593_mm512_store_epi32 (void *__P, __m512i __A)
4594{
4595 *(__m512i *) __P = __A;
4596}
4597
4598static __inline void __DEFAULT_FN_ATTRS
4599_mm512_store_epi64 (void *__P, __m512i __A)
4600{
4601 *(__m512i *) __P = __A;
4602}
4603
Adam Nemet2db1d2f2014-07-30 16:51:27 +00004604/* Mask ops */
4605
Michael Kupersteine45af542015-06-30 13:36:19 +00004606static __inline __mmask16 __DEFAULT_FN_ATTRS
Adam Nemet2db1d2f2014-07-30 16:51:27 +00004607_mm512_knot(__mmask16 __M)
4608{
4609 return __builtin_ia32_knothi(__M);
4610}
4611
Robert Khasanovb9f3a912014-10-08 17:18:13 +00004612/* Integer compare */
4613
Craig Topper57f96ac2017-11-06 21:00:49 +00004614#define _mm512_cmpeq_epi32_mask(A, B) \
4615 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4616#define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4617 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4618#define _mm512_cmpge_epi32_mask(A, B) \
4619 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4620#define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4621 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4622#define _mm512_cmpgt_epi32_mask(A, B) \
4623 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4624#define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4625 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4626#define _mm512_cmple_epi32_mask(A, B) \
4627 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4628#define _mm512_mask_cmple_epi32_mask(k, A, B) \
4629 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4630#define _mm512_cmplt_epi32_mask(A, B) \
4631 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4632#define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4633 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4634#define _mm512_cmpneq_epi32_mask(A, B) \
4635 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4636#define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4637 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
Robert Khasanovb9f3a912014-10-08 17:18:13 +00004638
Craig Topper57f96ac2017-11-06 21:00:49 +00004639#define _mm512_cmpeq_epu32_mask(A, B) \
4640 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4641#define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4642 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4643#define _mm512_cmpge_epu32_mask(A, B) \
4644 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4645#define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4646 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4647#define _mm512_cmpgt_epu32_mask(A, B) \
4648 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4649#define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4650 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4651#define _mm512_cmple_epu32_mask(A, B) \
4652 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4653#define _mm512_mask_cmple_epu32_mask(k, A, B) \
4654 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4655#define _mm512_cmplt_epu32_mask(A, B) \
4656 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4657#define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4658 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4659#define _mm512_cmpneq_epu32_mask(A, B) \
4660 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4661#define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4662 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
Robert Khasanovb9f3a912014-10-08 17:18:13 +00004663
Craig Topper57f96ac2017-11-06 21:00:49 +00004664#define _mm512_cmpeq_epi64_mask(A, B) \
4665 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4666#define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4667 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4668#define _mm512_cmpge_epi64_mask(A, B) \
4669 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4670#define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4671 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4672#define _mm512_cmpgt_epi64_mask(A, B) \
4673 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4674#define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4675 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4676#define _mm512_cmple_epi64_mask(A, B) \
4677 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4678#define _mm512_mask_cmple_epi64_mask(k, A, B) \
4679 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4680#define _mm512_cmplt_epi64_mask(A, B) \
4681 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4682#define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4683 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4684#define _mm512_cmpneq_epi64_mask(A, B) \
4685 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4686#define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4687 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
Craig Topper4cac1c22015-01-25 23:30:07 +00004688
Craig Topper57f96ac2017-11-06 21:00:49 +00004689#define _mm512_cmpeq_epu64_mask(A, B) \
4690 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4691#define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4692 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4693#define _mm512_cmpge_epu64_mask(A, B) \
4694 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4695#define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4696 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4697#define _mm512_cmpgt_epu64_mask(A, B) \
4698 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4699#define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4700 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4701#define _mm512_cmple_epu64_mask(A, B) \
4702 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4703#define _mm512_mask_cmple_epu64_mask(k, A, B) \
4704 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4705#define _mm512_cmplt_epu64_mask(A, B) \
4706 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4707#define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4708 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4709#define _mm512_cmpneq_epu64_mask(A, B) \
4710 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4711#define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4712 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
Craig Topper4cac1c22015-01-25 23:30:07 +00004713
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004714static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004715_mm512_cvtepi8_epi32(__m128i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004716{
Craig Topper0c5da262016-10-23 07:35:47 +00004717 /* This function always performs a signed extension, but __v16qi is a char
4718 which may be signed or unsigned, so use __v16qs. */
4719 return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004720}
4721
4722static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004723_mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004724{
Craig Topper0c5da262016-10-23 07:35:47 +00004725 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4726 (__v16si)_mm512_cvtepi8_epi32(__A),
4727 (__v16si)__W);
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004728}
4729
4730static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004731_mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004732{
Craig Topper0c5da262016-10-23 07:35:47 +00004733 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4734 (__v16si)_mm512_cvtepi8_epi32(__A),
4735 (__v16si)_mm512_setzero_si512());
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004736}
4737
4738static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004739_mm512_cvtepi8_epi64(__m128i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004740{
Craig Topper0c5da262016-10-23 07:35:47 +00004741 /* This function always performs a signed extension, but __v16qi is a char
4742 which may be signed or unsigned, so use __v16qs. */
4743 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004744}
4745
4746static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004747_mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004748{
Craig Topper0c5da262016-10-23 07:35:47 +00004749 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4750 (__v8di)_mm512_cvtepi8_epi64(__A),
4751 (__v8di)__W);
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004752}
4753
4754static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004755_mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004756{
Craig Topper0c5da262016-10-23 07:35:47 +00004757 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4758 (__v8di)_mm512_cvtepi8_epi64(__A),
4759 (__v8di)_mm512_setzero_si512 ());
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004760}
4761
4762static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004763_mm512_cvtepi32_epi64(__m256i __X)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004764{
Craig Topper0c5da262016-10-23 07:35:47 +00004765 return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004766}
4767
4768static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004769_mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004770{
Craig Topper0c5da262016-10-23 07:35:47 +00004771 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4772 (__v8di)_mm512_cvtepi32_epi64(__X),
4773 (__v8di)__W);
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004774}
4775
4776static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004777_mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004778{
Craig Topper0c5da262016-10-23 07:35:47 +00004779 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4780 (__v8di)_mm512_cvtepi32_epi64(__X),
4781 (__v8di)_mm512_setzero_si512());
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004782}
4783
4784static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004785_mm512_cvtepi16_epi32(__m256i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004786{
Craig Topper0c5da262016-10-23 07:35:47 +00004787 return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004788}
4789
4790static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004791_mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004792{
Craig Topper0c5da262016-10-23 07:35:47 +00004793 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4794 (__v16si)_mm512_cvtepi16_epi32(__A),
4795 (__v16si)__W);
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004796}
4797
4798static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004799_mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004800{
Craig Topper0c5da262016-10-23 07:35:47 +00004801 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4802 (__v16si)_mm512_cvtepi16_epi32(__A),
4803 (__v16si)_mm512_setzero_si512 ());
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004804}
4805
4806static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004807_mm512_cvtepi16_epi64(__m128i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004808{
Craig Topper0c5da262016-10-23 07:35:47 +00004809 return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004810}
4811
4812static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004813_mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004814{
Craig Topper0c5da262016-10-23 07:35:47 +00004815 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4816 (__v8di)_mm512_cvtepi16_epi64(__A),
4817 (__v8di)__W);
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004818}
4819
4820static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004821_mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004822{
Craig Topper0c5da262016-10-23 07:35:47 +00004823 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4824 (__v8di)_mm512_cvtepi16_epi64(__A),
4825 (__v8di)_mm512_setzero_si512());
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004826}
4827
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004828static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004829_mm512_cvtepu8_epi32(__m128i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004830{
Craig Topper0c5da262016-10-23 07:35:47 +00004831 return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004832}
4833
4834static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004835_mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004836{
Craig Topper0c5da262016-10-23 07:35:47 +00004837 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4838 (__v16si)_mm512_cvtepu8_epi32(__A),
4839 (__v16si)__W);
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004840}
4841
4842static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004843_mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004844{
Craig Topper0c5da262016-10-23 07:35:47 +00004845 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4846 (__v16si)_mm512_cvtepu8_epi32(__A),
4847 (__v16si)_mm512_setzero_si512());
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004848}
4849
4850static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004851_mm512_cvtepu8_epi64(__m128i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004852{
Craig Topper0c5da262016-10-23 07:35:47 +00004853 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004854}
4855
4856static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004857_mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004858{
Craig Topper0c5da262016-10-23 07:35:47 +00004859 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4860 (__v8di)_mm512_cvtepu8_epi64(__A),
4861 (__v8di)__W);
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004862}
4863
4864static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004865_mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004866{
Craig Topper0c5da262016-10-23 07:35:47 +00004867 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4868 (__v8di)_mm512_cvtepu8_epi64(__A),
4869 (__v8di)_mm512_setzero_si512());
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004870}
4871
4872static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004873_mm512_cvtepu32_epi64(__m256i __X)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004874{
Craig Topper0c5da262016-10-23 07:35:47 +00004875 return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004876}
4877
4878static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004879_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004880{
Craig Topper0c5da262016-10-23 07:35:47 +00004881 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4882 (__v8di)_mm512_cvtepu32_epi64(__X),
4883 (__v8di)__W);
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004884}
4885
4886static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004887_mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004888{
Craig Topper0c5da262016-10-23 07:35:47 +00004889 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4890 (__v8di)_mm512_cvtepu32_epi64(__X),
4891 (__v8di)_mm512_setzero_si512());
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004892}
4893
4894static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004895_mm512_cvtepu16_epi32(__m256i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004896{
Craig Topper0c5da262016-10-23 07:35:47 +00004897 return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004898}
4899
4900static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004901_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004902{
Craig Topper0c5da262016-10-23 07:35:47 +00004903 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4904 (__v16si)_mm512_cvtepu16_epi32(__A),
4905 (__v16si)__W);
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004906}
4907
4908static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004909_mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004910{
Craig Topper0c5da262016-10-23 07:35:47 +00004911 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4912 (__v16si)_mm512_cvtepu16_epi32(__A),
4913 (__v16si)_mm512_setzero_si512());
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004914}
4915
4916static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004917_mm512_cvtepu16_epi64(__m128i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004918{
Craig Topper0c5da262016-10-23 07:35:47 +00004919 return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004920}
4921
4922static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004923_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004924{
Craig Topper0c5da262016-10-23 07:35:47 +00004925 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4926 (__v8di)_mm512_cvtepu16_epi64(__A),
4927 (__v8di)__W);
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004928}
4929
4930static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004931_mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004932{
Craig Topper0c5da262016-10-23 07:35:47 +00004933 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4934 (__v8di)_mm512_cvtepu16_epi64(__A),
4935 (__v8di)_mm512_setzero_si512());
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004936}
4937
Michael Zuckermane98cc742016-02-23 15:59:47 +00004938static __inline__ __m512i __DEFAULT_FN_ATTRS
4939_mm512_rorv_epi32 (__m512i __A, __m512i __B)
4940{
Craig Topper0e9de762018-06-30 01:32:14 +00004941 return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B);
Michael Zuckermane98cc742016-02-23 15:59:47 +00004942}
4943
4944static __inline__ __m512i __DEFAULT_FN_ATTRS
4945_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4946{
Craig Topper0e9de762018-06-30 01:32:14 +00004947 return (__m512i)__builtin_ia32_selectd_512(__U,
4948 (__v16si)_mm512_rorv_epi32(__A, __B),
4949 (__v16si)__W);
Michael Zuckermane98cc742016-02-23 15:59:47 +00004950}
4951
4952static __inline__ __m512i __DEFAULT_FN_ATTRS
4953_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4954{
Craig Topper0e9de762018-06-30 01:32:14 +00004955 return (__m512i)__builtin_ia32_selectd_512(__U,
4956 (__v16si)_mm512_rorv_epi32(__A, __B),
4957 (__v16si)_mm512_setzero_si512());
Michael Zuckermane98cc742016-02-23 15:59:47 +00004958}
4959
4960static __inline__ __m512i __DEFAULT_FN_ATTRS
4961_mm512_rorv_epi64 (__m512i __A, __m512i __B)
4962{
Craig Topper0e9de762018-06-30 01:32:14 +00004963 return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B);
Michael Zuckermane98cc742016-02-23 15:59:47 +00004964}
4965
4966static __inline__ __m512i __DEFAULT_FN_ATTRS
4967_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4968{
Craig Topper0e9de762018-06-30 01:32:14 +00004969 return (__m512i)__builtin_ia32_selectq_512(__U,
4970 (__v8di)_mm512_rorv_epi64(__A, __B),
4971 (__v8di)__W);
Michael Zuckermane98cc742016-02-23 15:59:47 +00004972}
4973
4974static __inline__ __m512i __DEFAULT_FN_ATTRS
4975_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4976{
Craig Topper0e9de762018-06-30 01:32:14 +00004977 return (__m512i)__builtin_ia32_selectq_512(__U,
4978 (__v8di)_mm512_rorv_epi64(__A, __B),
4979 (__v8di)_mm512_setzero_si512());
Michael Zuckermane98cc742016-02-23 15:59:47 +00004980}
4981
4982
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00004983
Craig Topperc6338672018-05-31 00:51:20 +00004984#define _mm512_cmp_epi32_mask(a, b, p) \
Craig Topper3a71f352015-11-29 06:50:33 +00004985 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
Craig Topper8c18e112016-05-17 04:41:50 +00004986 (__v16si)(__m512i)(b), (int)(p), \
Craig Topperc6338672018-05-31 00:51:20 +00004987 (__mmask16)-1)
Craig Topper4cac1c22015-01-25 23:30:07 +00004988
Craig Topperc6338672018-05-31 00:51:20 +00004989#define _mm512_cmp_epu32_mask(a, b, p) \
Craig Topper3a71f352015-11-29 06:50:33 +00004990 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
Craig Topper8c18e112016-05-17 04:41:50 +00004991 (__v16si)(__m512i)(b), (int)(p), \
Craig Topperc6338672018-05-31 00:51:20 +00004992 (__mmask16)-1)
Craig Topper4cac1c22015-01-25 23:30:07 +00004993
Craig Topperc6338672018-05-31 00:51:20 +00004994#define _mm512_cmp_epi64_mask(a, b, p) \
Craig Topper3a71f352015-11-29 06:50:33 +00004995 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
Craig Topper8c18e112016-05-17 04:41:50 +00004996 (__v8di)(__m512i)(b), (int)(p), \
Craig Topperc6338672018-05-31 00:51:20 +00004997 (__mmask8)-1)
Craig Topper4cac1c22015-01-25 23:30:07 +00004998
Craig Topperc6338672018-05-31 00:51:20 +00004999#define _mm512_cmp_epu64_mask(a, b, p) \
Craig Topper3a71f352015-11-29 06:50:33 +00005000 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
Craig Topper8c18e112016-05-17 04:41:50 +00005001 (__v8di)(__m512i)(b), (int)(p), \
Craig Topperc6338672018-05-31 00:51:20 +00005002 (__mmask8)-1)
Craig Topper4cac1c22015-01-25 23:30:07 +00005003
Craig Topperc6338672018-05-31 00:51:20 +00005004#define _mm512_mask_cmp_epi32_mask(m, a, b, p) \
Craig Topper3a71f352015-11-29 06:50:33 +00005005 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
Craig Topper8c18e112016-05-17 04:41:50 +00005006 (__v16si)(__m512i)(b), (int)(p), \
Craig Topperc6338672018-05-31 00:51:20 +00005007 (__mmask16)(m))
Craig Topper4cac1c22015-01-25 23:30:07 +00005008
Craig Topperc6338672018-05-31 00:51:20 +00005009#define _mm512_mask_cmp_epu32_mask(m, a, b, p) \
Craig Topper3a71f352015-11-29 06:50:33 +00005010 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
Craig Topper8c18e112016-05-17 04:41:50 +00005011 (__v16si)(__m512i)(b), (int)(p), \
Craig Topperc6338672018-05-31 00:51:20 +00005012 (__mmask16)(m))
Craig Topper4cac1c22015-01-25 23:30:07 +00005013
Craig Topperc6338672018-05-31 00:51:20 +00005014#define _mm512_mask_cmp_epi64_mask(m, a, b, p) \
Craig Topper3a71f352015-11-29 06:50:33 +00005015 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
Craig Topper8c18e112016-05-17 04:41:50 +00005016 (__v8di)(__m512i)(b), (int)(p), \
Craig Topperc6338672018-05-31 00:51:20 +00005017 (__mmask8)(m))
Craig Topper4cac1c22015-01-25 23:30:07 +00005018
Craig Topperc6338672018-05-31 00:51:20 +00005019#define _mm512_mask_cmp_epu64_mask(m, a, b, p) \
Craig Topper3a71f352015-11-29 06:50:33 +00005020 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
Craig Topper8c18e112016-05-17 04:41:50 +00005021 (__v8di)(__m512i)(b), (int)(p), \
Craig Topperc6338672018-05-31 00:51:20 +00005022 (__mmask8)(m))
Eric Christopher4d1851682015-06-17 07:09:20 +00005023
Craig Topperc6338672018-05-31 00:51:20 +00005024#define _mm512_rol_epi32(a, b) \
Craig Topper0e9de762018-06-30 01:32:14 +00005025 (__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b))
Michael Zuckerman38a27272016-02-22 09:05:41 +00005026
Craig Topperc6338672018-05-31 00:51:20 +00005027#define _mm512_mask_rol_epi32(W, U, a, b) \
Craig Topper0e9de762018-06-30 01:32:14 +00005028 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5029 (__v16si)_mm512_rol_epi32((a), (b)), \
5030 (__v16si)(__m512i)(W))
Michael Zuckerman38a27272016-02-22 09:05:41 +00005031
Craig Topperc6338672018-05-31 00:51:20 +00005032#define _mm512_maskz_rol_epi32(U, a, b) \
Craig Topper0e9de762018-06-30 01:32:14 +00005033 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5034 (__v16si)_mm512_rol_epi32((a), (b)), \
5035 (__v16si)_mm512_setzero_si512())
Michael Zuckerman38a27272016-02-22 09:05:41 +00005036
Craig Topperc6338672018-05-31 00:51:20 +00005037#define _mm512_rol_epi64(a, b) \
Craig Topper0e9de762018-06-30 01:32:14 +00005038 (__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b))
Michael Zuckerman38a27272016-02-22 09:05:41 +00005039
Craig Topperc6338672018-05-31 00:51:20 +00005040#define _mm512_mask_rol_epi64(W, U, a, b) \
Craig Topper0e9de762018-06-30 01:32:14 +00005041 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5042 (__v8di)_mm512_rol_epi64((a), (b)), \
5043 (__v8di)(__m512i)(W))
Michael Zuckerman38a27272016-02-22 09:05:41 +00005044
Craig Topperc6338672018-05-31 00:51:20 +00005045#define _mm512_maskz_rol_epi64(U, a, b) \
Craig Topper0e9de762018-06-30 01:32:14 +00005046 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5047 (__v8di)_mm512_rol_epi64((a), (b)), \
5048 (__v8di)_mm512_setzero_si512())
5049
Michael Zuckerman0231f162016-02-23 13:41:13 +00005050static __inline__ __m512i __DEFAULT_FN_ATTRS
5051_mm512_rolv_epi32 (__m512i __A, __m512i __B)
5052{
Craig Topper0e9de762018-06-30 01:32:14 +00005053 return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B);
Michael Zuckerman0231f162016-02-23 13:41:13 +00005054}
5055
5056static __inline__ __m512i __DEFAULT_FN_ATTRS
5057_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5058{
Craig Topper0e9de762018-06-30 01:32:14 +00005059 return (__m512i)__builtin_ia32_selectd_512(__U,
5060 (__v16si)_mm512_rolv_epi32(__A, __B),
5061 (__v16si)__W);
Michael Zuckerman0231f162016-02-23 13:41:13 +00005062}
5063
5064static __inline__ __m512i __DEFAULT_FN_ATTRS
5065_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5066{
Craig Topper0e9de762018-06-30 01:32:14 +00005067 return (__m512i)__builtin_ia32_selectd_512(__U,
5068 (__v16si)_mm512_rolv_epi32(__A, __B),
5069 (__v16si)_mm512_setzero_si512());
Michael Zuckerman0231f162016-02-23 13:41:13 +00005070}
5071
5072static __inline__ __m512i __DEFAULT_FN_ATTRS
5073_mm512_rolv_epi64 (__m512i __A, __m512i __B)
5074{
Craig Topper0e9de762018-06-30 01:32:14 +00005075 return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B);
Michael Zuckerman0231f162016-02-23 13:41:13 +00005076}
5077
5078static __inline__ __m512i __DEFAULT_FN_ATTRS
5079_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5080{
Craig Topper0e9de762018-06-30 01:32:14 +00005081 return (__m512i)__builtin_ia32_selectq_512(__U,
5082 (__v8di)_mm512_rolv_epi64(__A, __B),
5083 (__v8di)__W);
Michael Zuckerman0231f162016-02-23 13:41:13 +00005084}
5085
5086static __inline__ __m512i __DEFAULT_FN_ATTRS
5087_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5088{
Craig Topper0e9de762018-06-30 01:32:14 +00005089 return (__m512i)__builtin_ia32_selectq_512(__U,
5090 (__v8di)_mm512_rolv_epi64(__A, __B),
5091 (__v8di)_mm512_setzero_si512());
Michael Zuckerman0231f162016-02-23 13:41:13 +00005092}
5093
Craig Topperc6338672018-05-31 00:51:20 +00005094#define _mm512_ror_epi32(A, B) \
Craig Topper0e9de762018-06-30 01:32:14 +00005095 (__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B))
Michael Zuckerman0231f162016-02-23 13:41:13 +00005096
Craig Topperc6338672018-05-31 00:51:20 +00005097#define _mm512_mask_ror_epi32(W, U, A, B) \
Craig Topper0e9de762018-06-30 01:32:14 +00005098 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5099 (__v16si)_mm512_ror_epi32((A), (B)), \
5100 (__v16si)(__m512i)(W))
Michael Zuckerman0231f162016-02-23 13:41:13 +00005101
Craig Topperc6338672018-05-31 00:51:20 +00005102#define _mm512_maskz_ror_epi32(U, A, B) \
Craig Topper0e9de762018-06-30 01:32:14 +00005103 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5104 (__v16si)_mm512_ror_epi32((A), (B)), \
5105 (__v16si)_mm512_setzero_si512())
Michael Zuckerman0231f162016-02-23 13:41:13 +00005106
Craig Topperc6338672018-05-31 00:51:20 +00005107#define _mm512_ror_epi64(A, B) \
Craig Topper0e9de762018-06-30 01:32:14 +00005108 (__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B))
Michael Zuckerman0231f162016-02-23 13:41:13 +00005109
Craig Topperc6338672018-05-31 00:51:20 +00005110#define _mm512_mask_ror_epi64(W, U, A, B) \
Craig Topper0e9de762018-06-30 01:32:14 +00005111 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5112 (__v8di)_mm512_ror_epi64((A), (B)), \
5113 (__v8di)(__m512i)(W))
Michael Zuckerman0231f162016-02-23 13:41:13 +00005114
Craig Topperc6338672018-05-31 00:51:20 +00005115#define _mm512_maskz_ror_epi64(U, A, B) \
Craig Topper0e9de762018-06-30 01:32:14 +00005116 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5117 (__v8di)_mm512_ror_epi64((A), (B)), \
5118 (__v8di)_mm512_setzero_si512())
Michael Zuckerman38a27272016-02-22 09:05:41 +00005119
Craig Topper1a441932016-11-12 07:16:59 +00005120static __inline__ __m512i __DEFAULT_FN_ATTRS
5121_mm512_slli_epi32(__m512i __A, int __B)
5122{
5123 return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B);
5124}
Michael Zuckerman1ac360c2016-03-01 11:38:16 +00005125
Craig Topper1a441932016-11-12 07:16:59 +00005126static __inline__ __m512i __DEFAULT_FN_ATTRS
5127_mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
5128{
Craig Topperd7e5b212016-11-13 07:26:31 +00005129 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5130 (__v16si)_mm512_slli_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005131 (__v16si)__W);
5132}
Michael Zuckerman1ac360c2016-03-01 11:38:16 +00005133
Craig Topper1a441932016-11-12 07:16:59 +00005134static __inline__ __m512i __DEFAULT_FN_ATTRS
5135_mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B) {
Craig Topperd7e5b212016-11-13 07:26:31 +00005136 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5137 (__v16si)_mm512_slli_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005138 (__v16si)_mm512_setzero_si512());
5139}
Michael Zuckerman1ac360c2016-03-01 11:38:16 +00005140
Craig Topper1a441932016-11-12 07:16:59 +00005141static __inline__ __m512i __DEFAULT_FN_ATTRS
5142_mm512_slli_epi64(__m512i __A, int __B)
5143{
5144 return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B);
5145}
Michael Zuckerman1ac360c2016-03-01 11:38:16 +00005146
Craig Topper1a441932016-11-12 07:16:59 +00005147static __inline__ __m512i __DEFAULT_FN_ATTRS
5148_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
5149{
Craig Topperd7e5b212016-11-13 07:26:31 +00005150 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5151 (__v8di)_mm512_slli_epi64(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005152 (__v8di)__W);
5153}
Michael Zuckerman1ac360c2016-03-01 11:38:16 +00005154
Craig Topper1a441932016-11-12 07:16:59 +00005155static __inline__ __m512i __DEFAULT_FN_ATTRS
5156_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B)
5157{
Craig Topperd7e5b212016-11-13 07:26:31 +00005158 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5159 (__v8di)_mm512_slli_epi64(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005160 (__v8di)_mm512_setzero_si512());
5161}
Michael Zuckerman1ac360c2016-03-01 11:38:16 +00005162
Craig Topper1a441932016-11-12 07:16:59 +00005163static __inline__ __m512i __DEFAULT_FN_ATTRS
5164_mm512_srli_epi32(__m512i __A, int __B)
5165{
5166 return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B);
5167}
Michael Zuckerman38a27272016-02-22 09:05:41 +00005168
Craig Topper1a441932016-11-12 07:16:59 +00005169static __inline__ __m512i __DEFAULT_FN_ATTRS
5170_mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
5171{
Craig Topperd7e5b212016-11-13 07:26:31 +00005172 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5173 (__v16si)_mm512_srli_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005174 (__v16si)__W);
5175}
Michael Zuckermand176d742016-03-01 17:49:03 +00005176
Craig Topper1a441932016-11-12 07:16:59 +00005177static __inline__ __m512i __DEFAULT_FN_ATTRS
5178_mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B) {
Craig Topperd7e5b212016-11-13 07:26:31 +00005179 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5180 (__v16si)_mm512_srli_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005181 (__v16si)_mm512_setzero_si512());
5182}
Michael Zuckermand176d742016-03-01 17:49:03 +00005183
Craig Topper1a441932016-11-12 07:16:59 +00005184static __inline__ __m512i __DEFAULT_FN_ATTRS
5185_mm512_srli_epi64(__m512i __A, int __B)
5186{
5187 return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B);
5188}
Michael Zuckermand176d742016-03-01 17:49:03 +00005189
Craig Topper1a441932016-11-12 07:16:59 +00005190static __inline__ __m512i __DEFAULT_FN_ATTRS
5191_mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
5192{
Craig Topperd7e5b212016-11-13 07:26:31 +00005193 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5194 (__v8di)_mm512_srli_epi64(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005195 (__v8di)__W);
5196}
Michael Zuckermand176d742016-03-01 17:49:03 +00005197
Craig Topper1a441932016-11-12 07:16:59 +00005198static __inline__ __m512i __DEFAULT_FN_ATTRS
5199_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B)
5200{
Craig Topperd7e5b212016-11-13 07:26:31 +00005201 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5202 (__v8di)_mm512_srli_epi64(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005203 (__v8di)_mm512_setzero_si512());
5204}
Michael Zuckermand176d742016-03-01 17:49:03 +00005205
Michael Zuckermanffbb67a2016-03-03 09:26:01 +00005206static __inline__ __m512i __DEFAULT_FN_ATTRS
5207_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5208{
5209 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5210 (__v16si) __W,
5211 (__mmask16) __U);
5212}
5213
5214static __inline__ __m512i __DEFAULT_FN_ATTRS
5215_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
5216{
5217 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5218 (__v16si)
5219 _mm512_setzero_si512 (),
5220 (__mmask16) __U);
5221}
5222
5223static __inline__ void __DEFAULT_FN_ATTRS
5224_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
5225{
5226 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
5227 (__mmask16) __U);
5228}
5229
5230static __inline__ __m512i __DEFAULT_FN_ATTRS
Michael Zuckermane6542002016-05-23 08:01:48 +00005231_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
5232{
Igor Bregeraadb8762016-06-08 13:59:20 +00005233 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5234 (__v16si) __A,
5235 (__v16si) __W);
Michael Zuckermane6542002016-05-23 08:01:48 +00005236}
5237
5238static __inline__ __m512i __DEFAULT_FN_ATTRS
5239_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
5240{
Igor Bregeraadb8762016-06-08 13:59:20 +00005241 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5242 (__v16si) __A,
5243 (__v16si) _mm512_setzero_si512 ());
Michael Zuckermane6542002016-05-23 08:01:48 +00005244}
5245
5246static __inline__ __m512i __DEFAULT_FN_ATTRS
Michael Zuckermanffbb67a2016-03-03 09:26:01 +00005247_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
5248{
Igor Bregeraadb8762016-06-08 13:59:20 +00005249 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5250 (__v8di) __A,
5251 (__v8di) __W);
Michael Zuckermanffbb67a2016-03-03 09:26:01 +00005252}
5253
5254static __inline__ __m512i __DEFAULT_FN_ATTRS
5255_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
5256{
Igor Bregeraadb8762016-06-08 13:59:20 +00005257 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5258 (__v8di) __A,
5259 (__v8di) _mm512_setzero_si512 ());
Michael Zuckermanffbb67a2016-03-03 09:26:01 +00005260}
5261
5262static __inline__ __m512i __DEFAULT_FN_ATTRS
5263_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5264{
5265 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5266 (__v8di) __W,
5267 (__mmask8) __U);
5268}
5269
5270static __inline__ __m512i __DEFAULT_FN_ATTRS
5271_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
5272{
5273 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5274 (__v8di)
5275 _mm512_setzero_si512 (),
5276 (__mmask8) __U);
5277}
5278
5279static __inline__ void __DEFAULT_FN_ATTRS
5280_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
5281{
5282 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
5283 (__mmask8) __U);
5284}
5285
Michael Zuckerman0d67e4b2016-03-03 13:43:05 +00005286static __inline__ __m512d __DEFAULT_FN_ATTRS
5287_mm512_movedup_pd (__m512d __A)
5288{
Simon Pilgrim275d7212016-07-02 17:16:25 +00005289 return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5290 0, 0, 2, 2, 4, 4, 6, 6);
Michael Zuckerman0d67e4b2016-03-03 13:43:05 +00005291}
5292
5293static __inline__ __m512d __DEFAULT_FN_ATTRS
5294_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
5295{
Simon Pilgrim275d7212016-07-02 17:16:25 +00005296 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5297 (__v8df)_mm512_movedup_pd(__A),
5298 (__v8df)__W);
Michael Zuckerman0d67e4b2016-03-03 13:43:05 +00005299}
5300
5301static __inline__ __m512d __DEFAULT_FN_ATTRS
5302_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
5303{
Simon Pilgrim275d7212016-07-02 17:16:25 +00005304 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5305 (__v8df)_mm512_movedup_pd(__A),
5306 (__v8df)_mm512_setzero_pd());
Michael Zuckerman0d67e4b2016-03-03 13:43:05 +00005307}
5308
Craig Topperc6338672018-05-31 00:51:20 +00005309#define _mm512_fixupimm_round_pd(A, B, C, imm, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005310 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5311 (__v8df)(__m512d)(B), \
5312 (__v8di)(__m512i)(C), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00005313 (__mmask8)-1, (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005314
Craig Topperc6338672018-05-31 00:51:20 +00005315#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005316 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5317 (__v8df)(__m512d)(B), \
5318 (__v8di)(__m512i)(C), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00005319 (__mmask8)(U), (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005320
Craig Topperc6338672018-05-31 00:51:20 +00005321#define _mm512_fixupimm_pd(A, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00005322 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5323 (__v8df)(__m512d)(B), \
5324 (__v8di)(__m512i)(C), (int)(imm), \
5325 (__mmask8)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00005326 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermandef78752016-03-28 12:23:09 +00005327
Craig Topperc6338672018-05-31 00:51:20 +00005328#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00005329 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5330 (__v8df)(__m512d)(B), \
5331 (__v8di)(__m512i)(C), (int)(imm), \
5332 (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00005333 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermandef78752016-03-28 12:23:09 +00005334
Craig Topperc6338672018-05-31 00:51:20 +00005335#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005336 (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5337 (__v8df)(__m512d)(B), \
5338 (__v8di)(__m512i)(C), \
5339 (int)(imm), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00005340 (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005341
Craig Topperc6338672018-05-31 00:51:20 +00005342#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00005343 (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5344 (__v8df)(__m512d)(B), \
5345 (__v8di)(__m512i)(C), \
5346 (int)(imm), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00005347 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermandef78752016-03-28 12:23:09 +00005348
Craig Topperc6338672018-05-31 00:51:20 +00005349#define _mm512_fixupimm_round_ps(A, B, C, imm, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005350 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5351 (__v16sf)(__m512)(B), \
5352 (__v16si)(__m512i)(C), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00005353 (__mmask16)-1, (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005354
Craig Topperc6338672018-05-31 00:51:20 +00005355#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005356 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5357 (__v16sf)(__m512)(B), \
5358 (__v16si)(__m512i)(C), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00005359 (__mmask16)(U), (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005360
Craig Topperc6338672018-05-31 00:51:20 +00005361#define _mm512_fixupimm_ps(A, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00005362 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5363 (__v16sf)(__m512)(B), \
5364 (__v16si)(__m512i)(C), (int)(imm), \
5365 (__mmask16)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00005366 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermandef78752016-03-28 12:23:09 +00005367
Craig Topperc6338672018-05-31 00:51:20 +00005368#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00005369 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5370 (__v16sf)(__m512)(B), \
5371 (__v16si)(__m512i)(C), (int)(imm), \
5372 (__mmask16)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00005373 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermandef78752016-03-28 12:23:09 +00005374
Craig Topperc6338672018-05-31 00:51:20 +00005375#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005376 (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5377 (__v16sf)(__m512)(B), \
5378 (__v16si)(__m512i)(C), \
5379 (int)(imm), (__mmask16)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00005380 (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005381
Craig Topperc6338672018-05-31 00:51:20 +00005382#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00005383 (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5384 (__v16sf)(__m512)(B), \
5385 (__v16si)(__m512i)(C), \
5386 (int)(imm), (__mmask16)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00005387 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermandef78752016-03-28 12:23:09 +00005388
Craig Topperc6338672018-05-31 00:51:20 +00005389#define _mm_fixupimm_round_sd(A, B, C, imm, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005390 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5391 (__v2df)(__m128d)(B), \
5392 (__v2di)(__m128i)(C), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00005393 (__mmask8)-1, (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005394
Craig Topperc6338672018-05-31 00:51:20 +00005395#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005396 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5397 (__v2df)(__m128d)(B), \
5398 (__v2di)(__m128i)(C), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00005399 (__mmask8)(U), (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005400
Craig Topperc6338672018-05-31 00:51:20 +00005401#define _mm_fixupimm_sd(A, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00005402 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5403 (__v2df)(__m128d)(B), \
5404 (__v2di)(__m128i)(C), (int)(imm), \
5405 (__mmask8)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00005406 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermandef78752016-03-28 12:23:09 +00005407
Craig Topperc6338672018-05-31 00:51:20 +00005408#define _mm_mask_fixupimm_sd(A, U, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00005409 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5410 (__v2df)(__m128d)(B), \
5411 (__v2di)(__m128i)(C), (int)(imm), \
5412 (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00005413 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermandef78752016-03-28 12:23:09 +00005414
Craig Topperc6338672018-05-31 00:51:20 +00005415#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005416 (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5417 (__v2df)(__m128d)(B), \
5418 (__v2di)(__m128i)(C), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00005419 (__mmask8)(U), (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005420
Craig Topperc6338672018-05-31 00:51:20 +00005421#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00005422 (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5423 (__v2df)(__m128d)(B), \
5424 (__v2di)(__m128i)(C), (int)(imm), \
5425 (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00005426 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermandef78752016-03-28 12:23:09 +00005427
Craig Topperc6338672018-05-31 00:51:20 +00005428#define _mm_fixupimm_round_ss(A, B, C, imm, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005429 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5430 (__v4sf)(__m128)(B), \
5431 (__v4si)(__m128i)(C), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00005432 (__mmask8)-1, (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005433
Craig Topperc6338672018-05-31 00:51:20 +00005434#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005435 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5436 (__v4sf)(__m128)(B), \
5437 (__v4si)(__m128i)(C), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00005438 (__mmask8)(U), (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005439
Craig Topperc6338672018-05-31 00:51:20 +00005440#define _mm_fixupimm_ss(A, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00005441 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5442 (__v4sf)(__m128)(B), \
5443 (__v4si)(__m128i)(C), (int)(imm), \
5444 (__mmask8)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00005445 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermandef78752016-03-28 12:23:09 +00005446
Craig Topperc6338672018-05-31 00:51:20 +00005447#define _mm_mask_fixupimm_ss(A, U, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00005448 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5449 (__v4sf)(__m128)(B), \
5450 (__v4si)(__m128i)(C), (int)(imm), \
5451 (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00005452 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermandef78752016-03-28 12:23:09 +00005453
Craig Topperc6338672018-05-31 00:51:20 +00005454#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005455 (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5456 (__v4sf)(__m128)(B), \
5457 (__v4si)(__m128i)(C), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00005458 (__mmask8)(U), (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005459
Craig Topperc6338672018-05-31 00:51:20 +00005460#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00005461 (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5462 (__v4sf)(__m128)(B), \
5463 (__v4si)(__m128i)(C), (int)(imm), \
5464 (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00005465 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermandef78752016-03-28 12:23:09 +00005466
Craig Topperc6338672018-05-31 00:51:20 +00005467#define _mm_getexp_round_sd(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005468 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5469 (__v2df)(__m128d)(B), \
5470 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00005471 (__mmask8)-1, (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005472
5473
5474static __inline__ __m128d __DEFAULT_FN_ATTRS
5475_mm_getexp_sd (__m128d __A, __m128d __B)
5476{
5477 return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5478 (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5479}
5480
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005481static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00005482_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005483{
5484 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5485 (__v2df) __B,
5486 (__v2df) __W,
5487 (__mmask8) __U,
5488 _MM_FROUND_CUR_DIRECTION);
5489}
5490
Craig Topperc6338672018-05-31 00:51:20 +00005491#define _mm_mask_getexp_round_sd(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005492 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5493 (__v2df)(__m128d)(B), \
5494 (__v2df)(__m128d)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00005495 (__mmask8)(U), (int)(R))
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005496
5497static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00005498_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005499{
5500 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5501 (__v2df) __B,
5502 (__v2df) _mm_setzero_pd (),
5503 (__mmask8) __U,
5504 _MM_FROUND_CUR_DIRECTION);
5505}
5506
Craig Topperc6338672018-05-31 00:51:20 +00005507#define _mm_maskz_getexp_round_sd(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005508 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5509 (__v2df)(__m128d)(B), \
5510 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00005511 (__mmask8)(U), (int)(R))
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005512
Craig Topperc6338672018-05-31 00:51:20 +00005513#define _mm_getexp_round_ss(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005514 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5515 (__v4sf)(__m128)(B), \
5516 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00005517 (__mmask8)-1, (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005518
5519static __inline__ __m128 __DEFAULT_FN_ATTRS
5520_mm_getexp_ss (__m128 __A, __m128 __B)
5521{
5522 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5523 (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5524}
5525
Craig Topper58187d32016-05-17 04:41:29 +00005526static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00005527_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005528{
Craig Topper58187d32016-05-17 04:41:29 +00005529 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005530 (__v4sf) __B,
5531 (__v4sf) __W,
5532 (__mmask8) __U,
5533 _MM_FROUND_CUR_DIRECTION);
5534}
5535
Craig Topperc6338672018-05-31 00:51:20 +00005536#define _mm_mask_getexp_round_ss(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005537 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5538 (__v4sf)(__m128)(B), \
5539 (__v4sf)(__m128)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00005540 (__mmask8)(U), (int)(R))
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005541
Craig Topper58187d32016-05-17 04:41:29 +00005542static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00005543_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005544{
Craig Topper58187d32016-05-17 04:41:29 +00005545 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005546 (__v4sf) __B,
Craig Topper5cbeeed2018-07-07 17:03:32 +00005547 (__v4sf) _mm_setzero_ps (),
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005548 (__mmask8) __U,
5549 _MM_FROUND_CUR_DIRECTION);
5550}
5551
Craig Topperc6338672018-05-31 00:51:20 +00005552#define _mm_maskz_getexp_round_ss(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005553 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5554 (__v4sf)(__m128)(B), \
5555 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00005556 (__mmask8)(U), (int)(R))
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005557
Craig Topperc6338672018-05-31 00:51:20 +00005558#define _mm_getmant_round_sd(A, B, C, D, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005559 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5560 (__v2df)(__m128d)(B), \
5561 (int)(((D)<<2) | (C)), \
5562 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00005563 (__mmask8)-1, (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005564
Craig Topperc6338672018-05-31 00:51:20 +00005565#define _mm_getmant_sd(A, B, C, D) \
Craig Topper8c18e112016-05-17 04:41:50 +00005566 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5567 (__v2df)(__m128d)(B), \
5568 (int)(((D)<<2) | (C)), \
5569 (__v2df)_mm_setzero_pd(), \
5570 (__mmask8)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00005571 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermandef78752016-03-28 12:23:09 +00005572
Craig Topperc6338672018-05-31 00:51:20 +00005573#define _mm_mask_getmant_sd(W, U, A, B, C, D) \
Craig Topper8c18e112016-05-17 04:41:50 +00005574 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5575 (__v2df)(__m128d)(B), \
5576 (int)(((D)<<2) | (C)), \
5577 (__v2df)(__m128d)(W), \
5578 (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00005579 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005580
Craig Topperc6338672018-05-31 00:51:20 +00005581#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005582 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5583 (__v2df)(__m128d)(B), \
5584 (int)(((D)<<2) | (C)), \
5585 (__v2df)(__m128d)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00005586 (__mmask8)(U), (int)(R))
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005587
Craig Topperc6338672018-05-31 00:51:20 +00005588#define _mm_maskz_getmant_sd(U, A, B, C, D) \
Craig Topper8c18e112016-05-17 04:41:50 +00005589 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5590 (__v2df)(__m128d)(B), \
5591 (int)(((D)<<2) | (C)), \
5592 (__v2df)_mm_setzero_pd(), \
5593 (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00005594 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005595
Craig Topperc6338672018-05-31 00:51:20 +00005596#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005597 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5598 (__v2df)(__m128d)(B), \
5599 (int)(((D)<<2) | (C)), \
5600 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00005601 (__mmask8)(U), (int)(R))
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005602
Craig Topperc6338672018-05-31 00:51:20 +00005603#define _mm_getmant_round_ss(A, B, C, D, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005604 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5605 (__v4sf)(__m128)(B), \
5606 (int)(((D)<<2) | (C)), \
5607 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00005608 (__mmask8)-1, (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005609
Craig Topperc6338672018-05-31 00:51:20 +00005610#define _mm_getmant_ss(A, B, C, D) \
Craig Topper8c18e112016-05-17 04:41:50 +00005611 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5612 (__v4sf)(__m128)(B), \
5613 (int)(((D)<<2) | (C)), \
5614 (__v4sf)_mm_setzero_ps(), \
5615 (__mmask8)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00005616 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermandef78752016-03-28 12:23:09 +00005617
Craig Topperc6338672018-05-31 00:51:20 +00005618#define _mm_mask_getmant_ss(W, U, A, B, C, D) \
Craig Topper8c18e112016-05-17 04:41:50 +00005619 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5620 (__v4sf)(__m128)(B), \
5621 (int)(((D)<<2) | (C)), \
5622 (__v4sf)(__m128)(W), \
5623 (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00005624 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005625
Craig Topperc6338672018-05-31 00:51:20 +00005626#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005627 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5628 (__v4sf)(__m128)(B), \
5629 (int)(((D)<<2) | (C)), \
5630 (__v4sf)(__m128)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00005631 (__mmask8)(U), (int)(R))
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005632
Craig Topperc6338672018-05-31 00:51:20 +00005633#define _mm_maskz_getmant_ss(U, A, B, C, D) \
Craig Topper8c18e112016-05-17 04:41:50 +00005634 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5635 (__v4sf)(__m128)(B), \
5636 (int)(((D)<<2) | (C)), \
Craig Topper5cbeeed2018-07-07 17:03:32 +00005637 (__v4sf)_mm_setzero_ps(), \
Craig Topper8c18e112016-05-17 04:41:50 +00005638 (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00005639 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005640
Craig Topperc6338672018-05-31 00:51:20 +00005641#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005642 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5643 (__v4sf)(__m128)(B), \
5644 (int)(((D)<<2) | (C)), \
5645 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00005646 (__mmask8)(U), (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005647
5648static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5649_mm512_kmov (__mmask16 __A)
5650{
5651 return __A;
5652}
5653
Craig Topperc6338672018-05-31 00:51:20 +00005654#define _mm_comi_round_sd(A, B, P, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005655 (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
Craig Topperc6338672018-05-31 00:51:20 +00005656 (int)(P), (int)(R))
Michael Zuckermane71d59f2016-03-07 19:15:00 +00005657
Craig Topperc6338672018-05-31 00:51:20 +00005658#define _mm_comi_round_ss(A, B, P, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005659 (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
Craig Topperc6338672018-05-31 00:51:20 +00005660 (int)(P), (int)(R))
Michael Zuckermane71d59f2016-03-07 19:15:00 +00005661
Craig Topper45db56c2016-07-21 07:38:39 +00005662#ifdef __x86_64__
Craig Topperc6338672018-05-31 00:51:20 +00005663#define _mm_cvt_roundsd_si64(A, R) \
5664 (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))
Craig Topper45db56c2016-07-21 07:38:39 +00005665#endif
Simon Pilgrim427154d2016-07-04 21:30:47 +00005666
Michael Zuckermand8d2f622016-04-11 07:15:34 +00005667static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005668_mm512_sll_epi32(__m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005669{
Craig Topper1a441932016-11-12 07:16:59 +00005670 return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005671}
5672
5673static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005674_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005675{
Craig Topperd7e5b212016-11-13 07:26:31 +00005676 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5677 (__v16si)_mm512_sll_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005678 (__v16si)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005679}
5680
5681static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005682_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005683{
Craig Topperd7e5b212016-11-13 07:26:31 +00005684 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5685 (__v16si)_mm512_sll_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005686 (__v16si)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005687}
5688
5689static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005690_mm512_sll_epi64(__m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005691{
Craig Topper1a441932016-11-12 07:16:59 +00005692 return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005693}
5694
5695static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005696_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005697{
Craig Topperd7e5b212016-11-13 07:26:31 +00005698 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5699 (__v8di)_mm512_sll_epi64(__A, __B),
5700 (__v8di)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005701}
5702
5703static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005704_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005705{
Craig Topperd7e5b212016-11-13 07:26:31 +00005706 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5707 (__v8di)_mm512_sll_epi64(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005708 (__v8di)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005709}
5710
5711static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00005712_mm512_sllv_epi32(__m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005713{
Craig Topper5e0709d2016-11-13 07:26:34 +00005714 return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005715}
5716
5717static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00005718_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005719{
Craig Topper5e0709d2016-11-13 07:26:34 +00005720 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5721 (__v16si)_mm512_sllv_epi32(__X, __Y),
5722 (__v16si)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005723}
5724
5725static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00005726_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005727{
Craig Topper5e0709d2016-11-13 07:26:34 +00005728 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5729 (__v16si)_mm512_sllv_epi32(__X, __Y),
5730 (__v16si)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005731}
5732
5733static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00005734_mm512_sllv_epi64(__m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005735{
Craig Topper5e0709d2016-11-13 07:26:34 +00005736 return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005737}
5738
5739static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00005740_mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005741{
Craig Topper5e0709d2016-11-13 07:26:34 +00005742 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5743 (__v8di)_mm512_sllv_epi64(__X, __Y),
5744 (__v8di)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005745}
5746
5747static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00005748_mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005749{
Craig Topper5e0709d2016-11-13 07:26:34 +00005750 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5751 (__v8di)_mm512_sllv_epi64(__X, __Y),
5752 (__v8di)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005753}
5754
5755static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005756_mm512_sra_epi32(__m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005757{
Craig Topper1a441932016-11-12 07:16:59 +00005758 return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005759}
5760
5761static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005762_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005763{
Craig Topperd7e5b212016-11-13 07:26:31 +00005764 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5765 (__v16si)_mm512_sra_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005766 (__v16si)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005767}
5768
5769static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005770_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005771{
Craig Topperd7e5b212016-11-13 07:26:31 +00005772 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5773 (__v16si)_mm512_sra_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005774 (__v16si)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005775}
5776
5777static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005778_mm512_sra_epi64(__m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005779{
Craig Topper1a441932016-11-12 07:16:59 +00005780 return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005781}
5782
5783static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005784_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005785{
Craig Topperd7e5b212016-11-13 07:26:31 +00005786 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5787 (__v8di)_mm512_sra_epi64(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005788 (__v8di)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005789}
5790
5791static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005792_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005793{
Craig Topperd7e5b212016-11-13 07:26:31 +00005794 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5795 (__v8di)_mm512_sra_epi64(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005796 (__v8di)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005797}
5798
5799static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00005800_mm512_srav_epi32(__m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005801{
Craig Topper5e0709d2016-11-13 07:26:34 +00005802 return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005803}
5804
5805static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00005806_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005807{
Craig Topper5e0709d2016-11-13 07:26:34 +00005808 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5809 (__v16si)_mm512_srav_epi32(__X, __Y),
5810 (__v16si)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005811}
5812
5813static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00005814_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005815{
Craig Topper5e0709d2016-11-13 07:26:34 +00005816 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5817 (__v16si)_mm512_srav_epi32(__X, __Y),
5818 (__v16si)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005819}
5820
5821static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00005822_mm512_srav_epi64(__m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005823{
Craig Topper5e0709d2016-11-13 07:26:34 +00005824 return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005825}
5826
5827static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00005828_mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005829{
Craig Topper5e0709d2016-11-13 07:26:34 +00005830 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5831 (__v8di)_mm512_srav_epi64(__X, __Y),
5832 (__v8di)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005833}
5834
5835static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00005836_mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005837{
Craig Topper5e0709d2016-11-13 07:26:34 +00005838 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5839 (__v8di)_mm512_srav_epi64(__X, __Y),
5840 (__v8di)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005841}
5842
5843static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005844_mm512_srl_epi32(__m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005845{
Craig Topper1a441932016-11-12 07:16:59 +00005846 return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005847}
5848
5849static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005850_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005851{
Craig Topperd7e5b212016-11-13 07:26:31 +00005852 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5853 (__v16si)_mm512_srl_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005854 (__v16si)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005855}
5856
5857static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005858_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005859{
Craig Topperd7e5b212016-11-13 07:26:31 +00005860 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5861 (__v16si)_mm512_srl_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005862 (__v16si)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005863}
5864
5865static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005866_mm512_srl_epi64(__m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005867{
Craig Topper1a441932016-11-12 07:16:59 +00005868 return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005869}
5870
5871static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005872_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005873{
Craig Topperd7e5b212016-11-13 07:26:31 +00005874 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5875 (__v8di)_mm512_srl_epi64(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005876 (__v8di)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005877}
5878
5879static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005880_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005881{
Craig Topperd7e5b212016-11-13 07:26:31 +00005882 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5883 (__v8di)_mm512_srl_epi64(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005884 (__v8di)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005885}
5886
5887static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00005888_mm512_srlv_epi32(__m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005889{
Craig Topper5e0709d2016-11-13 07:26:34 +00005890 return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005891}
5892
5893static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00005894_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005895{
Craig Topper5e0709d2016-11-13 07:26:34 +00005896 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5897 (__v16si)_mm512_srlv_epi32(__X, __Y),
5898 (__v16si)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005899}
5900
5901static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00005902_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005903{
Craig Topper5e0709d2016-11-13 07:26:34 +00005904 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5905 (__v16si)_mm512_srlv_epi32(__X, __Y),
5906 (__v16si)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005907}
5908
5909static __inline__ __m512i __DEFAULT_FN_ATTRS
5910_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
5911{
Craig Topper5e0709d2016-11-13 07:26:34 +00005912 return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005913}
5914
5915static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00005916_mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005917{
Craig Topper5e0709d2016-11-13 07:26:34 +00005918 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5919 (__v8di)_mm512_srlv_epi64(__X, __Y),
5920 (__v8di)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005921}
5922
5923static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00005924_mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005925{
Craig Topper5e0709d2016-11-13 07:26:34 +00005926 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5927 (__v8di)_mm512_srlv_epi64(__X, __Y),
5928 (__v8di)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005929}
5930
Craig Topperc6338672018-05-31 00:51:20 +00005931#define _mm512_ternarylogic_epi32(A, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00005932 (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
5933 (__v16si)(__m512i)(B), \
5934 (__v16si)(__m512i)(C), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00005935 (__mmask16)-1)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005936
Craig Topperc6338672018-05-31 00:51:20 +00005937#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00005938 (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
5939 (__v16si)(__m512i)(B), \
5940 (__v16si)(__m512i)(C), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00005941 (__mmask16)(U))
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005942
Craig Topperc6338672018-05-31 00:51:20 +00005943#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00005944 (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \
5945 (__v16si)(__m512i)(B), \
5946 (__v16si)(__m512i)(C), \
Craig Topperc6338672018-05-31 00:51:20 +00005947 (int)(imm), (__mmask16)(U))
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005948
Craig Topperc6338672018-05-31 00:51:20 +00005949#define _mm512_ternarylogic_epi64(A, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00005950 (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
5951 (__v8di)(__m512i)(B), \
5952 (__v8di)(__m512i)(C), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00005953 (__mmask8)-1)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005954
Craig Topperc6338672018-05-31 00:51:20 +00005955#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00005956 (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
5957 (__v8di)(__m512i)(B), \
5958 (__v8di)(__m512i)(C), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00005959 (__mmask8)(U))
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005960
Craig Topperc6338672018-05-31 00:51:20 +00005961#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00005962 (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \
5963 (__v8di)(__m512i)(B), \
5964 (__v8di)(__m512i)(C), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00005965 (__mmask8)(U))
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005966
Craig Topper45db56c2016-07-21 07:38:39 +00005967#ifdef __x86_64__
Craig Topperc6338672018-05-31 00:51:20 +00005968#define _mm_cvt_roundsd_i64(A, R) \
5969 (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))
Craig Topper45db56c2016-07-21 07:38:39 +00005970#endif
Michael Zuckerman8d161992016-04-10 17:24:03 +00005971
Craig Topperc6338672018-05-31 00:51:20 +00005972#define _mm_cvt_roundsd_si32(A, R) \
5973 (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00005974
Craig Topperc6338672018-05-31 00:51:20 +00005975#define _mm_cvt_roundsd_i32(A, R) \
5976 (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00005977
Craig Topperc6338672018-05-31 00:51:20 +00005978#define _mm_cvt_roundsd_u32(A, R) \
5979 (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00005980
5981static __inline__ unsigned __DEFAULT_FN_ATTRS
5982_mm_cvtsd_u32 (__m128d __A)
5983{
5984 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
5985 _MM_FROUND_CUR_DIRECTION);
5986}
5987
Craig Topper45db56c2016-07-21 07:38:39 +00005988#ifdef __x86_64__
Craig Topperc6338672018-05-31 00:51:20 +00005989#define _mm_cvt_roundsd_u64(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005990 (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
Craig Topperc6338672018-05-31 00:51:20 +00005991 (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00005992
5993static __inline__ unsigned long long __DEFAULT_FN_ATTRS
5994_mm_cvtsd_u64 (__m128d __A)
5995{
5996 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
5997 __A,
5998 _MM_FROUND_CUR_DIRECTION);
5999}
Craig Topper45db56c2016-07-21 07:38:39 +00006000#endif
Michael Zuckerman8d161992016-04-10 17:24:03 +00006001
Craig Topperc6338672018-05-31 00:51:20 +00006002#define _mm_cvt_roundss_si32(A, R) \
6003 (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006004
Craig Topperc6338672018-05-31 00:51:20 +00006005#define _mm_cvt_roundss_i32(A, R) \
6006 (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006007
Craig Topper45db56c2016-07-21 07:38:39 +00006008#ifdef __x86_64__
Craig Topperc6338672018-05-31 00:51:20 +00006009#define _mm_cvt_roundss_si64(A, R) \
6010 (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006011
Craig Topperc6338672018-05-31 00:51:20 +00006012#define _mm_cvt_roundss_i64(A, R) \
6013 (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R))
Craig Topper45db56c2016-07-21 07:38:39 +00006014#endif
Michael Zuckerman8d161992016-04-10 17:24:03 +00006015
Craig Topperc6338672018-05-31 00:51:20 +00006016#define _mm_cvt_roundss_u32(A, R) \
6017 (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006018
6019static __inline__ unsigned __DEFAULT_FN_ATTRS
6020_mm_cvtss_u32 (__m128 __A)
6021{
6022 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
6023 _MM_FROUND_CUR_DIRECTION);
6024}
6025
Craig Topper45db56c2016-07-21 07:38:39 +00006026#ifdef __x86_64__
Craig Topperc6338672018-05-31 00:51:20 +00006027#define _mm_cvt_roundss_u64(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006028 (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
Craig Topperc6338672018-05-31 00:51:20 +00006029 (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006030
6031static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6032_mm_cvtss_u64 (__m128 __A)
6033{
6034 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
6035 __A,
6036 _MM_FROUND_CUR_DIRECTION);
6037}
Craig Topper45db56c2016-07-21 07:38:39 +00006038#endif
Michael Zuckerman8d161992016-04-10 17:24:03 +00006039
Craig Topperc6338672018-05-31 00:51:20 +00006040#define _mm_cvtt_roundsd_i32(A, R) \
6041 (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006042
Craig Topperc6338672018-05-31 00:51:20 +00006043#define _mm_cvtt_roundsd_si32(A, R) \
6044 (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006045
6046static __inline__ int __DEFAULT_FN_ATTRS
6047_mm_cvttsd_i32 (__m128d __A)
6048{
6049 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
6050 _MM_FROUND_CUR_DIRECTION);
6051}
6052
Craig Topper45db56c2016-07-21 07:38:39 +00006053#ifdef __x86_64__
Craig Topperc6338672018-05-31 00:51:20 +00006054#define _mm_cvtt_roundsd_si64(A, R) \
6055 (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006056
Craig Topperc6338672018-05-31 00:51:20 +00006057#define _mm_cvtt_roundsd_i64(A, R) \
6058 (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006059
6060static __inline__ long long __DEFAULT_FN_ATTRS
6061_mm_cvttsd_i64 (__m128d __A)
6062{
6063 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
6064 _MM_FROUND_CUR_DIRECTION);
6065}
Craig Topper45db56c2016-07-21 07:38:39 +00006066#endif
Michael Zuckerman8d161992016-04-10 17:24:03 +00006067
Craig Topperc6338672018-05-31 00:51:20 +00006068#define _mm_cvtt_roundsd_u32(A, R) \
6069 (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006070
6071static __inline__ unsigned __DEFAULT_FN_ATTRS
6072_mm_cvttsd_u32 (__m128d __A)
6073{
6074 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
6075 _MM_FROUND_CUR_DIRECTION);
6076}
6077
Craig Topper45db56c2016-07-21 07:38:39 +00006078#ifdef __x86_64__
Craig Topperc6338672018-05-31 00:51:20 +00006079#define _mm_cvtt_roundsd_u64(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006080 (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
Craig Topperc6338672018-05-31 00:51:20 +00006081 (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006082
6083static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6084_mm_cvttsd_u64 (__m128d __A)
6085{
6086 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
6087 __A,
6088 _MM_FROUND_CUR_DIRECTION);
6089}
Craig Topper45db56c2016-07-21 07:38:39 +00006090#endif
Michael Zuckerman8d161992016-04-10 17:24:03 +00006091
Craig Topperc6338672018-05-31 00:51:20 +00006092#define _mm_cvtt_roundss_i32(A, R) \
6093 (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006094
Craig Topperc6338672018-05-31 00:51:20 +00006095#define _mm_cvtt_roundss_si32(A, R) \
6096 (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006097
6098static __inline__ int __DEFAULT_FN_ATTRS
6099_mm_cvttss_i32 (__m128 __A)
6100{
6101 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
6102 _MM_FROUND_CUR_DIRECTION);
6103}
6104
Craig Topper45db56c2016-07-21 07:38:39 +00006105#ifdef __x86_64__
Craig Topperc6338672018-05-31 00:51:20 +00006106#define _mm_cvtt_roundss_i64(A, R) \
6107 (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006108
Craig Topperc6338672018-05-31 00:51:20 +00006109#define _mm_cvtt_roundss_si64(A, R) \
6110 (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006111
6112static __inline__ long long __DEFAULT_FN_ATTRS
6113_mm_cvttss_i64 (__m128 __A)
6114{
6115 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
6116 _MM_FROUND_CUR_DIRECTION);
6117}
Craig Topper45db56c2016-07-21 07:38:39 +00006118#endif
Michael Zuckerman8d161992016-04-10 17:24:03 +00006119
Craig Topperc6338672018-05-31 00:51:20 +00006120#define _mm_cvtt_roundss_u32(A, R) \
6121 (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006122
6123static __inline__ unsigned __DEFAULT_FN_ATTRS
6124_mm_cvttss_u32 (__m128 __A)
6125{
6126 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
6127 _MM_FROUND_CUR_DIRECTION);
6128}
6129
Craig Topper45db56c2016-07-21 07:38:39 +00006130#ifdef __x86_64__
Craig Topperc6338672018-05-31 00:51:20 +00006131#define _mm_cvtt_roundss_u64(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006132 (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
Craig Topperc6338672018-05-31 00:51:20 +00006133 (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006134
6135static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6136_mm_cvttss_u64 (__m128 __A)
6137{
6138 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
6139 __A,
6140 _MM_FROUND_CUR_DIRECTION);
6141}
Craig Topper45db56c2016-07-21 07:38:39 +00006142#endif
Michael Zuckerman8d161992016-04-10 17:24:03 +00006143
Craig Topperc6338672018-05-31 00:51:20 +00006144#define _mm512_permute_pd(X, C) \
Craig Topperacf56012018-06-08 00:59:27 +00006145 (__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C))
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006146
Craig Topperc6338672018-05-31 00:51:20 +00006147#define _mm512_mask_permute_pd(W, U, X, C) \
Simon Pilgrim17388f22016-07-04 11:06:15 +00006148 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6149 (__v8df)_mm512_permute_pd((X), (C)), \
Craig Topperc6338672018-05-31 00:51:20 +00006150 (__v8df)(__m512d)(W))
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006151
Craig Topperc6338672018-05-31 00:51:20 +00006152#define _mm512_maskz_permute_pd(U, X, C) \
Simon Pilgrim17388f22016-07-04 11:06:15 +00006153 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6154 (__v8df)_mm512_permute_pd((X), (C)), \
Craig Topperc6338672018-05-31 00:51:20 +00006155 (__v8df)_mm512_setzero_pd())
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006156
Craig Topperc6338672018-05-31 00:51:20 +00006157#define _mm512_permute_ps(X, C) \
Craig Topperacf56012018-06-08 00:59:27 +00006158 (__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C))
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006159
Craig Topperc6338672018-05-31 00:51:20 +00006160#define _mm512_mask_permute_ps(W, U, X, C) \
Simon Pilgrim17388f22016-07-04 11:06:15 +00006161 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6162 (__v16sf)_mm512_permute_ps((X), (C)), \
Craig Topperc6338672018-05-31 00:51:20 +00006163 (__v16sf)(__m512)(W))
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006164
Craig Topperc6338672018-05-31 00:51:20 +00006165#define _mm512_maskz_permute_ps(U, X, C) \
Simon Pilgrim17388f22016-07-04 11:06:15 +00006166 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6167 (__v16sf)_mm512_permute_ps((X), (C)), \
Craig Topperc6338672018-05-31 00:51:20 +00006168 (__v16sf)_mm512_setzero_ps())
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006169
6170static __inline__ __m512d __DEFAULT_FN_ATTRS
Craig Topper678b07f2016-12-11 01:26:52 +00006171_mm512_permutevar_pd(__m512d __A, __m512i __C)
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006172{
Craig Topper678b07f2016-12-11 01:26:52 +00006173 return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006174}
6175
6176static __inline__ __m512d __DEFAULT_FN_ATTRS
Craig Topper678b07f2016-12-11 01:26:52 +00006177_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006178{
Craig Topper678b07f2016-12-11 01:26:52 +00006179 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6180 (__v8df)_mm512_permutevar_pd(__A, __C),
6181 (__v8df)__W);
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006182}
6183
6184static __inline__ __m512d __DEFAULT_FN_ATTRS
Craig Topper678b07f2016-12-11 01:26:52 +00006185_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006186{
Craig Topper678b07f2016-12-11 01:26:52 +00006187 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6188 (__v8df)_mm512_permutevar_pd(__A, __C),
6189 (__v8df)_mm512_setzero_pd());
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006190}
6191
6192static __inline__ __m512 __DEFAULT_FN_ATTRS
Craig Topper678b07f2016-12-11 01:26:52 +00006193_mm512_permutevar_ps(__m512 __A, __m512i __C)
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006194{
Craig Topper678b07f2016-12-11 01:26:52 +00006195 return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006196}
6197
6198static __inline__ __m512 __DEFAULT_FN_ATTRS
Craig Topper678b07f2016-12-11 01:26:52 +00006199_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006200{
Craig Topper678b07f2016-12-11 01:26:52 +00006201 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6202 (__v16sf)_mm512_permutevar_ps(__A, __C),
6203 (__v16sf)__W);
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006204}
6205
6206static __inline__ __m512 __DEFAULT_FN_ATTRS
Craig Topper678b07f2016-12-11 01:26:52 +00006207_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006208{
Craig Topper678b07f2016-12-11 01:26:52 +00006209 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6210 (__v16sf)_mm512_permutevar_ps(__A, __C),
6211 (__v16sf)_mm512_setzero_ps());
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006212}
6213
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00006214static __inline __m512d __DEFAULT_FN_ATTRS
6215_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006216{
Craig Topper68a272d2018-05-29 03:26:38 +00006217 return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
6218 (__v8df)__B);
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00006219}
6220
6221static __inline__ __m512d __DEFAULT_FN_ATTRS
Craig Topper68a272d2018-05-29 03:26:38 +00006222_mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00006223{
Craig Topper68a272d2018-05-29 03:26:38 +00006224 return (__m512d)__builtin_ia32_selectpd_512(__U,
6225 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6226 (__v8df)__A);
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006227}
6228
6229static __inline__ __m512d __DEFAULT_FN_ATTRS
Craig Topper68a272d2018-05-29 03:26:38 +00006230_mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U,
6231 __m512d __B)
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006232{
Craig Topper68a272d2018-05-29 03:26:38 +00006233 return (__m512d)__builtin_ia32_selectpd_512(__U,
6234 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6235 (__v8df)(__m512d)__I);
6236}
6237
6238static __inline__ __m512d __DEFAULT_FN_ATTRS
6239_mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I,
6240 __m512d __B)
6241{
6242 return (__m512d)__builtin_ia32_selectpd_512(__U,
6243 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6244 (__v8df)_mm512_setzero_pd());
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00006245}
6246
6247static __inline __m512 __DEFAULT_FN_ATTRS
6248_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
6249{
Craig Topper68a272d2018-05-29 03:26:38 +00006250 return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
6251 (__v16sf) __B);
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00006252}
6253
6254static __inline__ __m512 __DEFAULT_FN_ATTRS
Craig Topper68a272d2018-05-29 03:26:38 +00006255_mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00006256{
Craig Topper68a272d2018-05-29 03:26:38 +00006257 return (__m512)__builtin_ia32_selectps_512(__U,
6258 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6259 (__v16sf)__A);
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006260}
6261
6262static __inline__ __m512 __DEFAULT_FN_ATTRS
Craig Topper68a272d2018-05-29 03:26:38 +00006263_mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006264{
Craig Topper68a272d2018-05-29 03:26:38 +00006265 return (__m512)__builtin_ia32_selectps_512(__U,
6266 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6267 (__v16sf)(__m512)__I);
6268}
6269
6270static __inline__ __m512 __DEFAULT_FN_ATTRS
6271_mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
6272{
6273 return (__m512)__builtin_ia32_selectps_512(__U,
6274 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6275 (__v16sf)_mm512_setzero_ps());
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006276}
6277
Michael Zuckerman07525092016-04-11 10:22:07 +00006278
Craig Topperc6338672018-05-31 00:51:20 +00006279#define _mm512_cvtt_roundpd_epu32(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006280 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6281 (__v8si)_mm256_undefined_si256(), \
Craig Topperc6338672018-05-31 00:51:20 +00006282 (__mmask8)-1, (int)(R))
Michael Zuckerman138fc5b2016-05-03 11:05:24 +00006283
Craig Topperc6338672018-05-31 00:51:20 +00006284#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006285 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6286 (__v8si)(__m256i)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00006287 (__mmask8)(U), (int)(R))
Michael Zuckerman138fc5b2016-05-03 11:05:24 +00006288
Craig Topperc6338672018-05-31 00:51:20 +00006289#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006290 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6291 (__v8si)_mm256_setzero_si256(), \
Craig Topperc6338672018-05-31 00:51:20 +00006292 (__mmask8)(U), (int)(R))
Michael Zuckerman138fc5b2016-05-03 11:05:24 +00006293
6294static __inline__ __m256i __DEFAULT_FN_ATTRS
6295_mm512_cvttpd_epu32 (__m512d __A)
6296{
6297 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6298 (__v8si)
6299 _mm256_undefined_si256 (),
6300 (__mmask8) -1,
6301 _MM_FROUND_CUR_DIRECTION);
6302}
6303
6304static __inline__ __m256i __DEFAULT_FN_ATTRS
6305_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
6306{
6307 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6308 (__v8si) __W,
6309 (__mmask8) __U,
6310 _MM_FROUND_CUR_DIRECTION);
6311}
6312
6313static __inline__ __m256i __DEFAULT_FN_ATTRS
6314_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
6315{
6316 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6317 (__v8si)
6318 _mm256_setzero_si256 (),
6319 (__mmask8) __U,
6320 _MM_FROUND_CUR_DIRECTION);
6321}
Michael Zuckerman07525092016-04-11 10:22:07 +00006322
Craig Topperc6338672018-05-31 00:51:20 +00006323#define _mm_roundscale_round_sd(A, B, imm, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006324 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6325 (__v2df)(__m128d)(B), \
6326 (__v2df)_mm_setzero_pd(), \
6327 (__mmask8)-1, (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00006328 (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006329
Craig Topperc6338672018-05-31 00:51:20 +00006330#define _mm_roundscale_sd(A, B, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00006331 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6332 (__v2df)(__m128d)(B), \
6333 (__v2df)_mm_setzero_pd(), \
6334 (__mmask8)-1, (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00006335 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006336
Craig Topperc6338672018-05-31 00:51:20 +00006337#define _mm_mask_roundscale_sd(W, U, A, B, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00006338 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6339 (__v2df)(__m128d)(B), \
6340 (__v2df)(__m128d)(W), \
6341 (__mmask8)(U), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00006342 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006343
Craig Topperc6338672018-05-31 00:51:20 +00006344#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006345 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6346 (__v2df)(__m128d)(B), \
6347 (__v2df)(__m128d)(W), \
6348 (__mmask8)(U), (int)(I), \
Craig Topperc6338672018-05-31 00:51:20 +00006349 (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006350
Craig Topperc6338672018-05-31 00:51:20 +00006351#define _mm_maskz_roundscale_sd(U, A, B, I) \
Craig Topper8c18e112016-05-17 04:41:50 +00006352 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6353 (__v2df)(__m128d)(B), \
6354 (__v2df)_mm_setzero_pd(), \
6355 (__mmask8)(U), (int)(I), \
Craig Topperc6338672018-05-31 00:51:20 +00006356 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006357
Craig Topperc6338672018-05-31 00:51:20 +00006358#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006359 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6360 (__v2df)(__m128d)(B), \
6361 (__v2df)_mm_setzero_pd(), \
6362 (__mmask8)(U), (int)(I), \
Craig Topperc6338672018-05-31 00:51:20 +00006363 (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006364
Craig Topperc6338672018-05-31 00:51:20 +00006365#define _mm_roundscale_round_ss(A, B, imm, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006366 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6367 (__v4sf)(__m128)(B), \
6368 (__v4sf)_mm_setzero_ps(), \
6369 (__mmask8)-1, (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00006370 (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006371
Craig Topperc6338672018-05-31 00:51:20 +00006372#define _mm_roundscale_ss(A, B, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00006373 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6374 (__v4sf)(__m128)(B), \
6375 (__v4sf)_mm_setzero_ps(), \
6376 (__mmask8)-1, (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00006377 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006378
Craig Topperc6338672018-05-31 00:51:20 +00006379#define _mm_mask_roundscale_ss(W, U, A, B, I) \
Craig Topper8c18e112016-05-17 04:41:50 +00006380 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6381 (__v4sf)(__m128)(B), \
6382 (__v4sf)(__m128)(W), \
6383 (__mmask8)(U), (int)(I), \
Craig Topperc6338672018-05-31 00:51:20 +00006384 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006385
Craig Topperc6338672018-05-31 00:51:20 +00006386#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006387 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6388 (__v4sf)(__m128)(B), \
6389 (__v4sf)(__m128)(W), \
6390 (__mmask8)(U), (int)(I), \
Craig Topperc6338672018-05-31 00:51:20 +00006391 (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006392
Craig Topperc6338672018-05-31 00:51:20 +00006393#define _mm_maskz_roundscale_ss(U, A, B, I) \
Craig Topper8c18e112016-05-17 04:41:50 +00006394 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6395 (__v4sf)(__m128)(B), \
6396 (__v4sf)_mm_setzero_ps(), \
6397 (__mmask8)(U), (int)(I), \
Craig Topperc6338672018-05-31 00:51:20 +00006398 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006399
Craig Topperc6338672018-05-31 00:51:20 +00006400#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006401 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6402 (__v4sf)(__m128)(B), \
6403 (__v4sf)_mm_setzero_ps(), \
6404 (__mmask8)(U), (int)(I), \
Craig Topperc6338672018-05-31 00:51:20 +00006405 (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006406
Craig Topperc6338672018-05-31 00:51:20 +00006407#define _mm512_scalef_round_pd(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006408 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6409 (__v8df)(__m512d)(B), \
6410 (__v8df)_mm512_undefined_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00006411 (__mmask8)-1, (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006412
Craig Topperc6338672018-05-31 00:51:20 +00006413#define _mm512_mask_scalef_round_pd(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006414 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6415 (__v8df)(__m512d)(B), \
6416 (__v8df)(__m512d)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00006417 (__mmask8)(U), (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006418
Craig Topperc6338672018-05-31 00:51:20 +00006419#define _mm512_maskz_scalef_round_pd(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006420 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6421 (__v8df)(__m512d)(B), \
6422 (__v8df)_mm512_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00006423 (__mmask8)(U), (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006424
6425static __inline__ __m512d __DEFAULT_FN_ATTRS
6426_mm512_scalef_pd (__m512d __A, __m512d __B)
6427{
6428 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6429 (__v8df) __B,
6430 (__v8df)
6431 _mm512_undefined_pd (),
6432 (__mmask8) -1,
6433 _MM_FROUND_CUR_DIRECTION);
6434}
6435
6436static __inline__ __m512d __DEFAULT_FN_ATTRS
6437_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
6438{
6439 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6440 (__v8df) __B,
6441 (__v8df) __W,
6442 (__mmask8) __U,
6443 _MM_FROUND_CUR_DIRECTION);
6444}
6445
6446static __inline__ __m512d __DEFAULT_FN_ATTRS
6447_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
6448{
6449 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6450 (__v8df) __B,
6451 (__v8df)
6452 _mm512_setzero_pd (),
6453 (__mmask8) __U,
6454 _MM_FROUND_CUR_DIRECTION);
6455}
6456
Craig Topperc6338672018-05-31 00:51:20 +00006457#define _mm512_scalef_round_ps(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006458 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6459 (__v16sf)(__m512)(B), \
6460 (__v16sf)_mm512_undefined_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00006461 (__mmask16)-1, (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006462
Craig Topperc6338672018-05-31 00:51:20 +00006463#define _mm512_mask_scalef_round_ps(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006464 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6465 (__v16sf)(__m512)(B), \
6466 (__v16sf)(__m512)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00006467 (__mmask16)(U), (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006468
Craig Topperc6338672018-05-31 00:51:20 +00006469#define _mm512_maskz_scalef_round_ps(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006470 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6471 (__v16sf)(__m512)(B), \
6472 (__v16sf)_mm512_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00006473 (__mmask16)(U), (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006474
6475static __inline__ __m512 __DEFAULT_FN_ATTRS
6476_mm512_scalef_ps (__m512 __A, __m512 __B)
6477{
6478 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6479 (__v16sf) __B,
6480 (__v16sf)
6481 _mm512_undefined_ps (),
6482 (__mmask16) -1,
6483 _MM_FROUND_CUR_DIRECTION);
6484}
6485
6486static __inline__ __m512 __DEFAULT_FN_ATTRS
6487_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
6488{
6489 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6490 (__v16sf) __B,
6491 (__v16sf) __W,
6492 (__mmask16) __U,
6493 _MM_FROUND_CUR_DIRECTION);
6494}
6495
6496static __inline__ __m512 __DEFAULT_FN_ATTRS
6497_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
6498{
6499 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6500 (__v16sf) __B,
6501 (__v16sf)
6502 _mm512_setzero_ps (),
6503 (__mmask16) __U,
6504 _MM_FROUND_CUR_DIRECTION);
6505}
6506
Craig Topperc6338672018-05-31 00:51:20 +00006507#define _mm_scalef_round_sd(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006508 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6509 (__v2df)(__m128d)(B), \
6510 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00006511 (__mmask8)-1, (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006512
6513static __inline__ __m128d __DEFAULT_FN_ATTRS
6514_mm_scalef_sd (__m128d __A, __m128d __B)
6515{
6516 return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6517 (__v2df)( __B), (__v2df) _mm_setzero_pd(),
6518 (__mmask8) -1,
6519 _MM_FROUND_CUR_DIRECTION);
6520}
6521
6522static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00006523_mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006524{
6525 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6526 (__v2df) __B,
6527 (__v2df) __W,
6528 (__mmask8) __U,
6529 _MM_FROUND_CUR_DIRECTION);
6530}
6531
Craig Topperc6338672018-05-31 00:51:20 +00006532#define _mm_mask_scalef_round_sd(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006533 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6534 (__v2df)(__m128d)(B), \
6535 (__v2df)(__m128d)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00006536 (__mmask8)(U), (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006537
6538static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00006539_mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006540{
6541 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6542 (__v2df) __B,
6543 (__v2df) _mm_setzero_pd (),
6544 (__mmask8) __U,
6545 _MM_FROUND_CUR_DIRECTION);
6546}
6547
Craig Topperc6338672018-05-31 00:51:20 +00006548#define _mm_maskz_scalef_round_sd(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006549 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6550 (__v2df)(__m128d)(B), \
6551 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00006552 (__mmask8)(U), (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006553
Craig Topperc6338672018-05-31 00:51:20 +00006554#define _mm_scalef_round_ss(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006555 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6556 (__v4sf)(__m128)(B), \
6557 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00006558 (__mmask8)-1, (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006559
6560static __inline__ __m128 __DEFAULT_FN_ATTRS
6561_mm_scalef_ss (__m128 __A, __m128 __B)
6562{
6563 return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6564 (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
6565 (__mmask8) -1,
6566 _MM_FROUND_CUR_DIRECTION);
6567}
6568
6569static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00006570_mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006571{
6572 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6573 (__v4sf) __B,
6574 (__v4sf) __W,
6575 (__mmask8) __U,
6576 _MM_FROUND_CUR_DIRECTION);
6577}
6578
Craig Topperc6338672018-05-31 00:51:20 +00006579#define _mm_mask_scalef_round_ss(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006580 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6581 (__v4sf)(__m128)(B), \
6582 (__v4sf)(__m128)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00006583 (__mmask8)(U), (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006584
6585static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00006586_mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006587{
6588 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6589 (__v4sf) __B,
6590 (__v4sf) _mm_setzero_ps (),
6591 (__mmask8) __U,
6592 _MM_FROUND_CUR_DIRECTION);
6593}
6594
Craig Topperc6338672018-05-31 00:51:20 +00006595#define _mm_maskz_scalef_round_ss(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006596 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6597 (__v4sf)(__m128)(B), \
6598 (__v4sf)_mm_setzero_ps(), \
6599 (__mmask8)(U), \
Craig Topper3e720a32018-07-07 22:03:16 +00006600 (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006601
Craig Topper1a441932016-11-12 07:16:59 +00006602static __inline__ __m512i __DEFAULT_FN_ATTRS
6603_mm512_srai_epi32(__m512i __A, int __B)
6604{
6605 return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B);
6606}
Michael Zuckerman6b5f4d82016-04-11 15:46:39 +00006607
Craig Topper1a441932016-11-12 07:16:59 +00006608static __inline__ __m512i __DEFAULT_FN_ATTRS
6609_mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
6610{
Craig Topper0a485d12018-07-07 22:03:19 +00006611 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6612 (__v16si)_mm512_srai_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00006613 (__v16si)__W);
6614}
Michael Zuckerman6b5f4d82016-04-11 15:46:39 +00006615
Craig Topper1a441932016-11-12 07:16:59 +00006616static __inline__ __m512i __DEFAULT_FN_ATTRS
6617_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, int __B) {
Craig Topper0a485d12018-07-07 22:03:19 +00006618 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6619 (__v16si)_mm512_srai_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00006620 (__v16si)_mm512_setzero_si512());
6621}
Michael Zuckerman6b5f4d82016-04-11 15:46:39 +00006622
Craig Topper1a441932016-11-12 07:16:59 +00006623static __inline__ __m512i __DEFAULT_FN_ATTRS
6624_mm512_srai_epi64(__m512i __A, int __B)
6625{
6626 return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B);
6627}
Michael Zuckerman6b5f4d82016-04-11 15:46:39 +00006628
Craig Topper1a441932016-11-12 07:16:59 +00006629static __inline__ __m512i __DEFAULT_FN_ATTRS
6630_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
6631{
Craig Topper0a485d12018-07-07 22:03:19 +00006632 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6633 (__v8di)_mm512_srai_epi64(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00006634 (__v8di)__W);
6635}
Michael Zuckerman6b5f4d82016-04-11 15:46:39 +00006636
Craig Topper1a441932016-11-12 07:16:59 +00006637static __inline__ __m512i __DEFAULT_FN_ATTRS
6638_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B)
6639{
Craig Topper0a485d12018-07-07 22:03:19 +00006640 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6641 (__v8di)_mm512_srai_epi64(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00006642 (__v8di)_mm512_setzero_si512());
6643}
Michael Zuckerman6b5f4d82016-04-11 15:46:39 +00006644
Craig Topperc6338672018-05-31 00:51:20 +00006645#define _mm512_shuffle_f32x4(A, B, imm) \
Craig Topper93921362018-06-07 23:03:08 +00006646 (__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \
6647 (__v16sf)(__m512)(B), (int)(imm))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006648
Craig Topperc6338672018-05-31 00:51:20 +00006649#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \
Jina Nahiasdca97912017-11-13 09:15:31 +00006650 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6651 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00006652 (__v16sf)(__m512)(W))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006653
Craig Topperc6338672018-05-31 00:51:20 +00006654#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \
Jina Nahiasdca97912017-11-13 09:15:31 +00006655 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6656 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00006657 (__v16sf)_mm512_setzero_ps())
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006658
Craig Topperc6338672018-05-31 00:51:20 +00006659#define _mm512_shuffle_f64x2(A, B, imm) \
Craig Topper93921362018-06-07 23:03:08 +00006660 (__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \
6661 (__v8df)(__m512d)(B), (int)(imm))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006662
Craig Topperc6338672018-05-31 00:51:20 +00006663#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \
Jina Nahiasdca97912017-11-13 09:15:31 +00006664 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6665 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00006666 (__v8df)(__m512d)(W))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006667
Craig Topperc6338672018-05-31 00:51:20 +00006668#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \
Jina Nahiasdca97912017-11-13 09:15:31 +00006669 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6670 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00006671 (__v8df)_mm512_setzero_pd())
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006672
Craig Topperc6338672018-05-31 00:51:20 +00006673#define _mm512_shuffle_i32x4(A, B, imm) \
Craig Topper93921362018-06-07 23:03:08 +00006674 (__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \
6675 (__v16si)(__m512i)(B), (int)(imm))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006676
Craig Topperc6338672018-05-31 00:51:20 +00006677#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \
Jina Nahiasdca97912017-11-13 09:15:31 +00006678 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6679 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00006680 (__v16si)(__m512i)(W))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006681
Craig Topperc6338672018-05-31 00:51:20 +00006682#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \
Jina Nahiasdca97912017-11-13 09:15:31 +00006683 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6684 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00006685 (__v16si)_mm512_setzero_si512())
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006686
Craig Topperc6338672018-05-31 00:51:20 +00006687#define _mm512_shuffle_i64x2(A, B, imm) \
Craig Topper93921362018-06-07 23:03:08 +00006688 (__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \
6689 (__v8di)(__m512i)(B), (int)(imm))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006690
Craig Topperc6338672018-05-31 00:51:20 +00006691#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \
Jina Nahiasdca97912017-11-13 09:15:31 +00006692 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6693 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00006694 (__v8di)(__m512i)(W))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006695
Craig Topperc6338672018-05-31 00:51:20 +00006696#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \
Jina Nahiasdca97912017-11-13 09:15:31 +00006697 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6698 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00006699 (__v8di)_mm512_setzero_si512())
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006700
Craig Topperc6338672018-05-31 00:51:20 +00006701#define _mm512_shuffle_pd(A, B, M) \
Craig Topper218da622018-07-07 17:03:34 +00006702 (__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \
6703 (__v8df)(__m512d)(B), (int)(M))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006704
Craig Topperc6338672018-05-31 00:51:20 +00006705#define _mm512_mask_shuffle_pd(W, U, A, B, M) \
Simon Pilgrim427154d2016-07-04 21:30:47 +00006706 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6707 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
Craig Topperc6338672018-05-31 00:51:20 +00006708 (__v8df)(__m512d)(W))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006709
Craig Topperc6338672018-05-31 00:51:20 +00006710#define _mm512_maskz_shuffle_pd(U, A, B, M) \
Simon Pilgrim427154d2016-07-04 21:30:47 +00006711 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6712 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
Craig Topperc6338672018-05-31 00:51:20 +00006713 (__v8df)_mm512_setzero_pd())
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006714
Craig Topperc6338672018-05-31 00:51:20 +00006715#define _mm512_shuffle_ps(A, B, M) \
Craig Topper218da622018-07-07 17:03:34 +00006716 (__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \
6717 (__v16sf)(__m512)(B), (int)(M))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006718
Craig Topperc6338672018-05-31 00:51:20 +00006719#define _mm512_mask_shuffle_ps(W, U, A, B, M) \
Craig Topper6e76fb62016-07-10 05:57:21 +00006720 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6721 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
Craig Topperc6338672018-05-31 00:51:20 +00006722 (__v16sf)(__m512)(W))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006723
Craig Topperc6338672018-05-31 00:51:20 +00006724#define _mm512_maskz_shuffle_ps(U, A, B, M) \
Craig Topper6e76fb62016-07-10 05:57:21 +00006725 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6726 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
Craig Topperc6338672018-05-31 00:51:20 +00006727 (__v16sf)_mm512_setzero_ps())
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006728
Craig Topperc6338672018-05-31 00:51:20 +00006729#define _mm_sqrt_round_sd(A, B, R) \
Asaf Badouhf9cdb8d2016-07-05 11:36:21 +00006730 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6731 (__v2df)(__m128d)(B), \
Craig Topper8c18e112016-05-17 04:41:50 +00006732 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00006733 (__mmask8)-1, (int)(R))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006734
6735static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00006736_mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006737{
Asaf Badouhf9cdb8d2016-07-05 11:36:21 +00006738 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6739 (__v2df) __B,
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006740 (__v2df) __W,
6741 (__mmask8) __U,
6742 _MM_FROUND_CUR_DIRECTION);
6743}
6744
Craig Topperc6338672018-05-31 00:51:20 +00006745#define _mm_mask_sqrt_round_sd(W, U, A, B, R) \
Asaf Badouhf9cdb8d2016-07-05 11:36:21 +00006746 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6747 (__v2df)(__m128d)(B), \
Craig Topper8c18e112016-05-17 04:41:50 +00006748 (__v2df)(__m128d)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00006749 (__mmask8)(U), (int)(R))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006750
6751static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00006752_mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006753{
Asaf Badouhf9cdb8d2016-07-05 11:36:21 +00006754 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6755 (__v2df) __B,
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006756 (__v2df) _mm_setzero_pd (),
6757 (__mmask8) __U,
6758 _MM_FROUND_CUR_DIRECTION);
6759}
6760
Craig Topperc6338672018-05-31 00:51:20 +00006761#define _mm_maskz_sqrt_round_sd(U, A, B, R) \
Asaf Badouhf9cdb8d2016-07-05 11:36:21 +00006762 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6763 (__v2df)(__m128d)(B), \
Craig Topper8c18e112016-05-17 04:41:50 +00006764 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00006765 (__mmask8)(U), (int)(R))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006766
Craig Topperc6338672018-05-31 00:51:20 +00006767#define _mm_sqrt_round_ss(A, B, R) \
Asaf Badouhf9cdb8d2016-07-05 11:36:21 +00006768 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6769 (__v4sf)(__m128)(B), \
Craig Topper8c18e112016-05-17 04:41:50 +00006770 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00006771 (__mmask8)-1, (int)(R))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006772
6773static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00006774_mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006775{
Asaf Badouhf9cdb8d2016-07-05 11:36:21 +00006776 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6777 (__v4sf) __B,
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006778 (__v4sf) __W,
6779 (__mmask8) __U,
6780 _MM_FROUND_CUR_DIRECTION);
6781}
6782
Craig Topperc6338672018-05-31 00:51:20 +00006783#define _mm_mask_sqrt_round_ss(W, U, A, B, R) \
Asaf Badouhf9cdb8d2016-07-05 11:36:21 +00006784 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6785 (__v4sf)(__m128)(B), \
Craig Topper8c18e112016-05-17 04:41:50 +00006786 (__v4sf)(__m128)(W), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00006787 (int)(R))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006788
6789static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00006790_mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006791{
6792 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6793 (__v4sf) __B,
6794 (__v4sf) _mm_setzero_ps (),
6795 (__mmask8) __U,
6796 _MM_FROUND_CUR_DIRECTION);
6797}
6798
Craig Topperc6338672018-05-31 00:51:20 +00006799#define _mm_maskz_sqrt_round_ss(U, A, B, R) \
Asaf Badouhf9cdb8d2016-07-05 11:36:21 +00006800 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6801 (__v4sf)(__m128)(B), \
Craig Topper8c18e112016-05-17 04:41:50 +00006802 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00006803 (__mmask8)(U), (int)(R))
Michael Zuckerman6b5f4d82016-04-11 15:46:39 +00006804
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00006805static __inline__ __m512 __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00006806_mm512_broadcast_f32x4(__m128 __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00006807{
Craig Topper367c86d2017-01-18 02:17:10 +00006808 return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6809 0, 1, 2, 3, 0, 1, 2, 3,
6810 0, 1, 2, 3, 0, 1, 2, 3);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00006811}
6812
6813static __inline__ __m512 __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00006814_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00006815{
Craig Topper367c86d2017-01-18 02:17:10 +00006816 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6817 (__v16sf)_mm512_broadcast_f32x4(__A),
6818 (__v16sf)__O);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00006819}
6820
6821static __inline__ __m512 __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00006822_mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00006823{
Craig Topper367c86d2017-01-18 02:17:10 +00006824 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6825 (__v16sf)_mm512_broadcast_f32x4(__A),
6826 (__v16sf)_mm512_setzero_ps());
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00006827}
6828
6829static __inline__ __m512d __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00006830_mm512_broadcast_f64x4(__m256d __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00006831{
Craig Topper367c86d2017-01-18 02:17:10 +00006832 return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
6833 0, 1, 2, 3, 0, 1, 2, 3);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00006834}
6835
6836static __inline__ __m512d __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00006837_mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00006838{
Craig Topper367c86d2017-01-18 02:17:10 +00006839 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6840 (__v8df)_mm512_broadcast_f64x4(__A),
6841 (__v8df)__O);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00006842}
6843
6844static __inline__ __m512d __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00006845_mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00006846{
Craig Topper367c86d2017-01-18 02:17:10 +00006847 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6848 (__v8df)_mm512_broadcast_f64x4(__A),
6849 (__v8df)_mm512_setzero_pd());
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00006850}
6851
6852static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00006853_mm512_broadcast_i32x4(__m128i __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00006854{
Craig Topper367c86d2017-01-18 02:17:10 +00006855 return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6856 0, 1, 2, 3, 0, 1, 2, 3,
6857 0, 1, 2, 3, 0, 1, 2, 3);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00006858}
6859
6860static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00006861_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00006862{
Craig Topper367c86d2017-01-18 02:17:10 +00006863 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6864 (__v16si)_mm512_broadcast_i32x4(__A),
6865 (__v16si)__O);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00006866}
6867
6868static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00006869_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00006870{
Craig Topper367c86d2017-01-18 02:17:10 +00006871 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6872 (__v16si)_mm512_broadcast_i32x4(__A),
6873 (__v16si)_mm512_setzero_si512());
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00006874}
6875
6876static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00006877_mm512_broadcast_i64x4(__m256i __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00006878{
Craig Topper367c86d2017-01-18 02:17:10 +00006879 return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
6880 0, 1, 2, 3, 0, 1, 2, 3);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00006881}
6882
6883static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00006884_mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00006885{
Craig Topper367c86d2017-01-18 02:17:10 +00006886 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6887 (__v8di)_mm512_broadcast_i64x4(__A),
6888 (__v8di)__O);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00006889}
6890
6891static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00006892_mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00006893{
Craig Topper367c86d2017-01-18 02:17:10 +00006894 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6895 (__v8di)_mm512_broadcast_i64x4(__A),
6896 (__v8di)_mm512_setzero_si512());
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00006897}
6898
6899static __inline__ __m512d __DEFAULT_FN_ATTRS
6900_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
6901{
Simon Pilgrimf5a88372016-07-05 12:59:33 +00006902 return (__m512d)__builtin_ia32_selectpd_512(__M,
6903 (__v8df) _mm512_broadcastsd_pd(__A),
6904 (__v8df) __O);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00006905}
6906
6907static __inline__ __m512d __DEFAULT_FN_ATTRS
6908_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
6909{
Simon Pilgrimf5a88372016-07-05 12:59:33 +00006910 return (__m512d)__builtin_ia32_selectpd_512(__M,
6911 (__v8df) _mm512_broadcastsd_pd(__A),
6912 (__v8df) _mm512_setzero_pd());
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00006913}
6914
6915static __inline__ __m512 __DEFAULT_FN_ATTRS
6916_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
6917{
Simon Pilgrimf5a88372016-07-05 12:59:33 +00006918 return (__m512)__builtin_ia32_selectps_512(__M,
6919 (__v16sf) _mm512_broadcastss_ps(__A),
6920 (__v16sf) __O);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00006921}
6922
6923static __inline__ __m512 __DEFAULT_FN_ATTRS
6924_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
6925{
Simon Pilgrimf5a88372016-07-05 12:59:33 +00006926 return (__m512)__builtin_ia32_selectps_512(__M,
6927 (__v16sf) _mm512_broadcastss_ps(__A),
6928 (__v16sf) _mm512_setzero_ps());
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00006929}
6930
Michael Zuckermane1680612016-04-13 15:02:04 +00006931static __inline__ __m128i __DEFAULT_FN_ATTRS
6932_mm512_cvtsepi32_epi8 (__m512i __A)
6933{
6934 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6935 (__v16qi) _mm_undefined_si128 (),
6936 (__mmask16) -1);
6937}
6938
6939static __inline__ __m128i __DEFAULT_FN_ATTRS
6940_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6941{
6942 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6943 (__v16qi) __O, __M);
6944}
6945
6946static __inline__ __m128i __DEFAULT_FN_ATTRS
6947_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
6948{
6949 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6950 (__v16qi) _mm_setzero_si128 (),
6951 __M);
6952}
6953
6954static __inline__ void __DEFAULT_FN_ATTRS
6955_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
6956{
6957 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6958}
6959
6960static __inline__ __m256i __DEFAULT_FN_ATTRS
6961_mm512_cvtsepi32_epi16 (__m512i __A)
6962{
6963 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6964 (__v16hi) _mm256_undefined_si256 (),
6965 (__mmask16) -1);
6966}
6967
6968static __inline__ __m256i __DEFAULT_FN_ATTRS
6969_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6970{
6971 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6972 (__v16hi) __O, __M);
6973}
6974
6975static __inline__ __m256i __DEFAULT_FN_ATTRS
6976_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
6977{
6978 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6979 (__v16hi) _mm256_setzero_si256 (),
6980 __M);
6981}
6982
6983static __inline__ void __DEFAULT_FN_ATTRS
6984_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
6985{
6986 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
6987}
6988
6989static __inline__ __m128i __DEFAULT_FN_ATTRS
6990_mm512_cvtsepi64_epi8 (__m512i __A)
6991{
6992 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6993 (__v16qi) _mm_undefined_si128 (),
6994 (__mmask8) -1);
6995}
6996
6997static __inline__ __m128i __DEFAULT_FN_ATTRS
6998_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6999{
7000 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7001 (__v16qi) __O, __M);
7002}
7003
7004static __inline__ __m128i __DEFAULT_FN_ATTRS
7005_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
7006{
7007 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7008 (__v16qi) _mm_setzero_si128 (),
7009 __M);
7010}
7011
7012static __inline__ void __DEFAULT_FN_ATTRS
7013_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7014{
7015 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7016}
7017
7018static __inline__ __m256i __DEFAULT_FN_ATTRS
7019_mm512_cvtsepi64_epi32 (__m512i __A)
7020{
Michael Zuckermane1680612016-04-13 15:02:04 +00007021 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7022 (__v8si) _mm256_undefined_si256 (),
7023 (__mmask8) -1);
7024}
7025
7026static __inline__ __m256i __DEFAULT_FN_ATTRS
7027_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7028{
7029 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7030 (__v8si) __O, __M);
7031}
7032
7033static __inline__ __m256i __DEFAULT_FN_ATTRS
7034_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
7035{
7036 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7037 (__v8si) _mm256_setzero_si256 (),
7038 __M);
7039}
7040
7041static __inline__ void __DEFAULT_FN_ATTRS
7042_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
7043{
7044 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7045}
7046
7047static __inline__ __m128i __DEFAULT_FN_ATTRS
7048_mm512_cvtsepi64_epi16 (__m512i __A)
7049{
7050 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7051 (__v8hi) _mm_undefined_si128 (),
7052 (__mmask8) -1);
7053}
7054
7055static __inline__ __m128i __DEFAULT_FN_ATTRS
7056_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7057{
7058 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7059 (__v8hi) __O, __M);
7060}
7061
7062static __inline__ __m128i __DEFAULT_FN_ATTRS
7063_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
7064{
7065 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7066 (__v8hi) _mm_setzero_si128 (),
7067 __M);
7068}
7069
7070static __inline__ void __DEFAULT_FN_ATTRS
7071_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
7072{
7073 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7074}
7075
Michael Zuckermand8715312016-04-14 06:48:09 +00007076static __inline__ __m128i __DEFAULT_FN_ATTRS
7077_mm512_cvtusepi32_epi8 (__m512i __A)
7078{
7079 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7080 (__v16qi) _mm_undefined_si128 (),
7081 (__mmask16) -1);
7082}
7083
7084static __inline__ __m128i __DEFAULT_FN_ATTRS
7085_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7086{
7087 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7088 (__v16qi) __O,
7089 __M);
7090}
7091
7092static __inline__ __m128i __DEFAULT_FN_ATTRS
7093_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
7094{
7095 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7096 (__v16qi) _mm_setzero_si128 (),
7097 __M);
7098}
7099
7100static __inline__ void __DEFAULT_FN_ATTRS
7101_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7102{
7103 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7104}
7105
7106static __inline__ __m256i __DEFAULT_FN_ATTRS
7107_mm512_cvtusepi32_epi16 (__m512i __A)
7108{
7109 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7110 (__v16hi) _mm256_undefined_si256 (),
7111 (__mmask16) -1);
7112}
7113
7114static __inline__ __m256i __DEFAULT_FN_ATTRS
7115_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7116{
7117 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7118 (__v16hi) __O,
7119 __M);
7120}
7121
7122static __inline__ __m256i __DEFAULT_FN_ATTRS
7123_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
7124{
7125 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7126 (__v16hi) _mm256_setzero_si256 (),
7127 __M);
7128}
7129
7130static __inline__ void __DEFAULT_FN_ATTRS
7131_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7132{
7133 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7134}
7135
7136static __inline__ __m128i __DEFAULT_FN_ATTRS
7137_mm512_cvtusepi64_epi8 (__m512i __A)
7138{
7139 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7140 (__v16qi) _mm_undefined_si128 (),
7141 (__mmask8) -1);
7142}
7143
7144static __inline__ __m128i __DEFAULT_FN_ATTRS
7145_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7146{
7147 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7148 (__v16qi) __O,
7149 __M);
7150}
7151
7152static __inline__ __m128i __DEFAULT_FN_ATTRS
7153_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
7154{
7155 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7156 (__v16qi) _mm_setzero_si128 (),
7157 __M);
7158}
7159
7160static __inline__ void __DEFAULT_FN_ATTRS
7161_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7162{
7163 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7164}
7165
7166static __inline__ __m256i __DEFAULT_FN_ATTRS
7167_mm512_cvtusepi64_epi32 (__m512i __A)
7168{
7169 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7170 (__v8si) _mm256_undefined_si256 (),
7171 (__mmask8) -1);
7172}
7173
7174static __inline__ __m256i __DEFAULT_FN_ATTRS
7175_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7176{
7177 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7178 (__v8si) __O, __M);
7179}
7180
7181static __inline__ __m256i __DEFAULT_FN_ATTRS
7182_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
7183{
7184 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7185 (__v8si) _mm256_setzero_si256 (),
7186 __M);
7187}
7188
7189static __inline__ void __DEFAULT_FN_ATTRS
7190_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7191{
7192 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
7193}
7194
7195static __inline__ __m128i __DEFAULT_FN_ATTRS
7196_mm512_cvtusepi64_epi16 (__m512i __A)
7197{
7198 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7199 (__v8hi) _mm_undefined_si128 (),
7200 (__mmask8) -1);
7201}
7202
7203static __inline__ __m128i __DEFAULT_FN_ATTRS
7204_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7205{
7206 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7207 (__v8hi) __O, __M);
7208}
7209
7210static __inline__ __m128i __DEFAULT_FN_ATTRS
7211_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
7212{
7213 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7214 (__v8hi) _mm_setzero_si128 (),
7215 __M);
7216}
7217
7218static __inline__ void __DEFAULT_FN_ATTRS
7219_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7220{
7221 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
7222}
7223
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007224static __inline__ __m128i __DEFAULT_FN_ATTRS
7225_mm512_cvtepi32_epi8 (__m512i __A)
7226{
Craig Topper88097d92018-06-08 21:50:08 +00007227 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7228 (__v16qi) _mm_undefined_si128 (),
7229 (__mmask16) -1);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007230}
7231
7232static __inline__ __m128i __DEFAULT_FN_ATTRS
7233_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7234{
Craig Topper9d146bb2018-05-15 03:17:52 +00007235 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7236 (__v16qi) __O, __M);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007237}
7238
7239static __inline__ __m128i __DEFAULT_FN_ATTRS
7240_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
7241{
Craig Topper9d146bb2018-05-15 03:17:52 +00007242 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7243 (__v16qi) _mm_setzero_si128 (),
7244 __M);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007245}
7246
7247static __inline__ void __DEFAULT_FN_ATTRS
7248_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7249{
7250 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7251}
7252
7253static __inline__ __m256i __DEFAULT_FN_ATTRS
7254_mm512_cvtepi32_epi16 (__m512i __A)
7255{
Craig Topper88097d92018-06-08 21:50:08 +00007256 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7257 (__v16hi) _mm256_undefined_si256 (),
7258 (__mmask16) -1);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007259}
7260
7261static __inline__ __m256i __DEFAULT_FN_ATTRS
7262_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7263{
Craig Topper9d146bb2018-05-15 03:17:52 +00007264 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7265 (__v16hi) __O, __M);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007266}
7267
7268static __inline__ __m256i __DEFAULT_FN_ATTRS
7269_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
7270{
Craig Topper9d146bb2018-05-15 03:17:52 +00007271 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7272 (__v16hi) _mm256_setzero_si256 (),
7273 __M);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007274}
7275
7276static __inline__ void __DEFAULT_FN_ATTRS
7277_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
7278{
7279 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
7280}
7281
7282static __inline__ __m128i __DEFAULT_FN_ATTRS
7283_mm512_cvtepi64_epi8 (__m512i __A)
7284{
7285 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7286 (__v16qi) _mm_undefined_si128 (),
7287 (__mmask8) -1);
7288}
7289
7290static __inline__ __m128i __DEFAULT_FN_ATTRS
7291_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7292{
7293 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7294 (__v16qi) __O, __M);
7295}
7296
7297static __inline__ __m128i __DEFAULT_FN_ATTRS
7298_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
7299{
7300 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7301 (__v16qi) _mm_setzero_si128 (),
7302 __M);
7303}
7304
7305static __inline__ void __DEFAULT_FN_ATTRS
7306_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7307{
7308 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7309}
7310
7311static __inline__ __m256i __DEFAULT_FN_ATTRS
7312_mm512_cvtepi64_epi32 (__m512i __A)
7313{
Craig Topper88097d92018-06-08 21:50:08 +00007314 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7315 (__v8si) _mm256_undefined_si256 (),
7316 (__mmask8) -1);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007317}
7318
7319static __inline__ __m256i __DEFAULT_FN_ATTRS
7320_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7321{
Craig Topper88097d92018-06-08 21:50:08 +00007322 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7323 (__v8si) __O, __M);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007324}
7325
7326static __inline__ __m256i __DEFAULT_FN_ATTRS
7327_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
7328{
Craig Topper88097d92018-06-08 21:50:08 +00007329 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7330 (__v8si) _mm256_setzero_si256 (),
7331 __M);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007332}
7333
7334static __inline__ void __DEFAULT_FN_ATTRS
7335_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7336{
7337 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7338}
7339
7340static __inline__ __m128i __DEFAULT_FN_ATTRS
7341_mm512_cvtepi64_epi16 (__m512i __A)
7342{
Craig Topper88097d92018-06-08 21:50:08 +00007343 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7344 (__v8hi) _mm_undefined_si128 (),
7345 (__mmask8) -1);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007346}
7347
7348static __inline__ __m128i __DEFAULT_FN_ATTRS
7349_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7350{
Craig Topper9d146bb2018-05-15 03:17:52 +00007351 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7352 (__v8hi) __O, __M);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007353}
7354
7355static __inline__ __m128i __DEFAULT_FN_ATTRS
7356_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
7357{
Craig Topper9d146bb2018-05-15 03:17:52 +00007358 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7359 (__v8hi) _mm_setzero_si128 (),
7360 __M);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007361}
7362
7363static __inline__ void __DEFAULT_FN_ATTRS
7364_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7365{
7366 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7367}
7368
Craig Topperc6338672018-05-31 00:51:20 +00007369#define _mm512_extracti32x4_epi32(A, imm) \
Craig Topper5f50f3382018-06-08 21:50:07 +00007370 (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7371 (__v4si)_mm_undefined_si128(), \
7372 (__mmask8)-1)
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007373
Craig Topperc6338672018-05-31 00:51:20 +00007374#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
Craig Topper5f50f3382018-06-08 21:50:07 +00007375 (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7376 (__v4si)(__m128i)(W), \
7377 (__mmask8)(U))
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007378
Craig Topperc6338672018-05-31 00:51:20 +00007379#define _mm512_maskz_extracti32x4_epi32(U, A, imm) \
Craig Topper5f50f3382018-06-08 21:50:07 +00007380 (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7381 (__v4si)_mm_setzero_si128(), \
7382 (__mmask8)(U))
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007383
Craig Topperc6338672018-05-31 00:51:20 +00007384#define _mm512_extracti64x4_epi64(A, imm) \
Craig Topper5f50f3382018-06-08 21:50:07 +00007385 (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7386 (__v4di)_mm256_undefined_si256(), \
7387 (__mmask8)-1)
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007388
Craig Topperc6338672018-05-31 00:51:20 +00007389#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
Craig Topper5f50f3382018-06-08 21:50:07 +00007390 (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7391 (__v4di)(__m256i)(W), \
7392 (__mmask8)(U))
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007393
Craig Topperc6338672018-05-31 00:51:20 +00007394#define _mm512_maskz_extracti64x4_epi64(U, A, imm) \
Craig Topper5f50f3382018-06-08 21:50:07 +00007395 (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7396 (__v4di)_mm256_setzero_si256(), \
7397 (__mmask8)(U))
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007398
Craig Topperc6338672018-05-31 00:51:20 +00007399#define _mm512_insertf64x4(A, B, imm) \
Craig Topper3428bee2018-06-08 03:24:47 +00007400 (__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \
7401 (__v4df)(__m256d)(B), (int)(imm))
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007402
Craig Topperc6338672018-05-31 00:51:20 +00007403#define _mm512_mask_insertf64x4(W, U, A, B, imm) \
Craig Topper08bf53f2016-11-01 05:47:56 +00007404 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7405 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
Craig Toppercbf39292018-05-31 01:24:40 +00007406 (__v8df)(__m512d)(W))
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007407
Craig Topperc6338672018-05-31 00:51:20 +00007408#define _mm512_maskz_insertf64x4(U, A, B, imm) \
Craig Topper08bf53f2016-11-01 05:47:56 +00007409 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7410 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00007411 (__v8df)_mm512_setzero_pd())
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007412
Craig Topperc6338672018-05-31 00:51:20 +00007413#define _mm512_inserti64x4(A, B, imm) \
Craig Topper3428bee2018-06-08 03:24:47 +00007414 (__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
7415 (__v4di)(__m256i)(B), (int)(imm))
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007416
Craig Topperc6338672018-05-31 00:51:20 +00007417#define _mm512_mask_inserti64x4(W, U, A, B, imm) \
Craig Topper08bf53f2016-11-01 05:47:56 +00007418 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7419 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
Craig Toppercbf39292018-05-31 01:24:40 +00007420 (__v8di)(__m512i)(W))
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007421
Craig Topperc6338672018-05-31 00:51:20 +00007422#define _mm512_maskz_inserti64x4(U, A, B, imm) \
Craig Topper08bf53f2016-11-01 05:47:56 +00007423 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7424 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00007425 (__v8di)_mm512_setzero_si512())
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007426
Craig Topperc6338672018-05-31 00:51:20 +00007427#define _mm512_insertf32x4(A, B, imm) \
Craig Topper3428bee2018-06-08 03:24:47 +00007428 (__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \
7429 (__v4sf)(__m128)(B), (int)(imm))
Craig Topperdca1f232016-05-15 21:26:20 +00007430
Craig Topperc6338672018-05-31 00:51:20 +00007431#define _mm512_mask_insertf32x4(W, U, A, B, imm) \
Craig Topper08bf53f2016-11-01 05:47:56 +00007432 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7433 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
Craig Toppercbf39292018-05-31 01:24:40 +00007434 (__v16sf)(__m512)(W))
Craig Topperdca1f232016-05-15 21:26:20 +00007435
Craig Topperc6338672018-05-31 00:51:20 +00007436#define _mm512_maskz_insertf32x4(U, A, B, imm) \
Craig Topper08bf53f2016-11-01 05:47:56 +00007437 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7438 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00007439 (__v16sf)_mm512_setzero_ps())
Craig Topperdca1f232016-05-15 21:26:20 +00007440
Craig Topperc6338672018-05-31 00:51:20 +00007441#define _mm512_inserti32x4(A, B, imm) \
Craig Topper3428bee2018-06-08 03:24:47 +00007442 (__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \
7443 (__v4si)(__m128i)(B), (int)(imm))
Craig Topperdca1f232016-05-15 21:26:20 +00007444
Craig Topperc6338672018-05-31 00:51:20 +00007445#define _mm512_mask_inserti32x4(W, U, A, B, imm) \
Craig Topper08bf53f2016-11-01 05:47:56 +00007446 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7447 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
Craig Toppercbf39292018-05-31 01:24:40 +00007448 (__v16si)(__m512i)(W))
Craig Topperdca1f232016-05-15 21:26:20 +00007449
Craig Topperc6338672018-05-31 00:51:20 +00007450#define _mm512_maskz_inserti32x4(U, A, B, imm) \
Craig Topper08bf53f2016-11-01 05:47:56 +00007451 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7452 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00007453 (__v16si)_mm512_setzero_si512())
Craig Topperdca1f232016-05-15 21:26:20 +00007454
Craig Topperc6338672018-05-31 00:51:20 +00007455#define _mm512_getmant_round_pd(A, B, C, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00007456 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7457 (int)(((C)<<2) | (B)), \
7458 (__v8df)_mm512_undefined_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00007459 (__mmask8)-1, (int)(R))
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007460
Craig Topperc6338672018-05-31 00:51:20 +00007461#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00007462 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7463 (int)(((C)<<2) | (B)), \
7464 (__v8df)(__m512d)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00007465 (__mmask8)(U), (int)(R))
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007466
Craig Topperc6338672018-05-31 00:51:20 +00007467#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00007468 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7469 (int)(((C)<<2) | (B)), \
7470 (__v8df)_mm512_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00007471 (__mmask8)(U), (int)(R))
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007472
Craig Topperc6338672018-05-31 00:51:20 +00007473#define _mm512_getmant_pd(A, B, C) \
Craig Topper8c18e112016-05-17 04:41:50 +00007474 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7475 (int)(((C)<<2) | (B)), \
7476 (__v8df)_mm512_setzero_pd(), \
7477 (__mmask8)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00007478 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007479
Craig Topperc6338672018-05-31 00:51:20 +00007480#define _mm512_mask_getmant_pd(W, U, A, B, C) \
Craig Topper8c18e112016-05-17 04:41:50 +00007481 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7482 (int)(((C)<<2) | (B)), \
7483 (__v8df)(__m512d)(W), \
7484 (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00007485 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007486
Craig Topperc6338672018-05-31 00:51:20 +00007487#define _mm512_maskz_getmant_pd(U, A, B, C) \
Craig Topper8c18e112016-05-17 04:41:50 +00007488 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7489 (int)(((C)<<2) | (B)), \
7490 (__v8df)_mm512_setzero_pd(), \
7491 (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00007492 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007493
Craig Topperc6338672018-05-31 00:51:20 +00007494#define _mm512_getmant_round_ps(A, B, C, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00007495 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7496 (int)(((C)<<2) | (B)), \
7497 (__v16sf)_mm512_undefined_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00007498 (__mmask16)-1, (int)(R))
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007499
Craig Topperc6338672018-05-31 00:51:20 +00007500#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00007501 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7502 (int)(((C)<<2) | (B)), \
7503 (__v16sf)(__m512)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00007504 (__mmask16)(U), (int)(R))
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007505
Craig Topperc6338672018-05-31 00:51:20 +00007506#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00007507 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7508 (int)(((C)<<2) | (B)), \
7509 (__v16sf)_mm512_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00007510 (__mmask16)(U), (int)(R))
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007511
Craig Topperc6338672018-05-31 00:51:20 +00007512#define _mm512_getmant_ps(A, B, C) \
Craig Topper8c18e112016-05-17 04:41:50 +00007513 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7514 (int)(((C)<<2)|(B)), \
7515 (__v16sf)_mm512_undefined_ps(), \
7516 (__mmask16)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00007517 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007518
Craig Topperc6338672018-05-31 00:51:20 +00007519#define _mm512_mask_getmant_ps(W, U, A, B, C) \
Craig Topper8c18e112016-05-17 04:41:50 +00007520 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7521 (int)(((C)<<2)|(B)), \
7522 (__v16sf)(__m512)(W), \
7523 (__mmask16)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00007524 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007525
Craig Topperc6338672018-05-31 00:51:20 +00007526#define _mm512_maskz_getmant_ps(U, A, B, C) \
Craig Topper8c18e112016-05-17 04:41:50 +00007527 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7528 (int)(((C)<<2)|(B)), \
7529 (__v16sf)_mm512_setzero_ps(), \
7530 (__mmask16)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00007531 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007532
Craig Topperc6338672018-05-31 00:51:20 +00007533#define _mm512_getexp_round_pd(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00007534 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7535 (__v8df)_mm512_undefined_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00007536 (__mmask8)-1, (int)(R))
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007537
Craig Topperc6338672018-05-31 00:51:20 +00007538#define _mm512_mask_getexp_round_pd(W, U, A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00007539 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7540 (__v8df)(__m512d)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00007541 (__mmask8)(U), (int)(R))
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007542
Craig Topperc6338672018-05-31 00:51:20 +00007543#define _mm512_maskz_getexp_round_pd(U, A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00007544 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7545 (__v8df)_mm512_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00007546 (__mmask8)(U), (int)(R))
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007547
7548static __inline__ __m512d __DEFAULT_FN_ATTRS
7549_mm512_getexp_pd (__m512d __A)
7550{
7551 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7552 (__v8df) _mm512_undefined_pd (),
7553 (__mmask8) -1,
7554 _MM_FROUND_CUR_DIRECTION);
7555}
7556
7557static __inline__ __m512d __DEFAULT_FN_ATTRS
7558_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
7559{
7560 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7561 (__v8df) __W,
7562 (__mmask8) __U,
7563 _MM_FROUND_CUR_DIRECTION);
7564}
7565
7566static __inline__ __m512d __DEFAULT_FN_ATTRS
7567_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
7568{
7569 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7570 (__v8df) _mm512_setzero_pd (),
7571 (__mmask8) __U,
7572 _MM_FROUND_CUR_DIRECTION);
7573}
7574
Craig Topperc6338672018-05-31 00:51:20 +00007575#define _mm512_getexp_round_ps(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00007576 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7577 (__v16sf)_mm512_undefined_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00007578 (__mmask16)-1, (int)(R))
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007579
Craig Topperc6338672018-05-31 00:51:20 +00007580#define _mm512_mask_getexp_round_ps(W, U, A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00007581 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7582 (__v16sf)(__m512)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00007583 (__mmask16)(U), (int)(R))
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007584
Craig Topperc6338672018-05-31 00:51:20 +00007585#define _mm512_maskz_getexp_round_ps(U, A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00007586 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7587 (__v16sf)_mm512_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00007588 (__mmask16)(U), (int)(R))
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007589
7590static __inline__ __m512 __DEFAULT_FN_ATTRS
7591_mm512_getexp_ps (__m512 __A)
7592{
7593 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7594 (__v16sf) _mm512_undefined_ps (),
7595 (__mmask16) -1,
7596 _MM_FROUND_CUR_DIRECTION);
7597}
7598
7599static __inline__ __m512 __DEFAULT_FN_ATTRS
7600_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
7601{
7602 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7603 (__v16sf) __W,
7604 (__mmask16) __U,
7605 _MM_FROUND_CUR_DIRECTION);
7606}
7607
7608static __inline__ __m512 __DEFAULT_FN_ATTRS
7609_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
7610{
7611 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7612 (__v16sf) _mm512_setzero_ps (),
7613 (__mmask16) __U,
7614 _MM_FROUND_CUR_DIRECTION);
7615}
7616
Craig Topperc6338672018-05-31 00:51:20 +00007617#define _mm512_i64gather_ps(index, addr, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00007618 (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
7619 (float const *)(addr), \
7620 (__v8di)(__m512i)(index), (__mmask8)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00007621 (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00007622
Craig Topperc6338672018-05-31 00:51:20 +00007623#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \
Craig Topper2e5058c2017-03-17 05:14:37 +00007624 (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
7625 (float const *)(addr), \
7626 (__v8di)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00007627 (__mmask8)(mask), (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00007628
Craig Topperc6338672018-05-31 00:51:20 +00007629#define _mm512_i64gather_epi32(index, addr, scale) \
Craig Topper5cbeeed2018-07-07 17:03:32 +00007630 (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \
Craig Topper8c18e112016-05-17 04:41:50 +00007631 (int const *)(addr), \
7632 (__v8di)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00007633 (__mmask8)-1, (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00007634
Craig Topperc6338672018-05-31 00:51:20 +00007635#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00007636 (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
7637 (int const *)(addr), \
7638 (__v8di)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00007639 (__mmask8)(mask), (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00007640
Craig Topperc6338672018-05-31 00:51:20 +00007641#define _mm512_i64gather_pd(index, addr, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00007642 (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
7643 (double const *)(addr), \
7644 (__v8di)(__m512i)(index), (__mmask8)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00007645 (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00007646
Craig Topperc6338672018-05-31 00:51:20 +00007647#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00007648 (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
7649 (double const *)(addr), \
7650 (__v8di)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00007651 (__mmask8)(mask), (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00007652
Craig Topperc6338672018-05-31 00:51:20 +00007653#define _mm512_i64gather_epi64(index, addr, scale) \
Craig Topper5cbeeed2018-07-07 17:03:32 +00007654 (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \
Craig Topper8c18e112016-05-17 04:41:50 +00007655 (long long const *)(addr), \
7656 (__v8di)(__m512i)(index), (__mmask8)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00007657 (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00007658
Craig Topperc6338672018-05-31 00:51:20 +00007659#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00007660 (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
7661 (long long const *)(addr), \
7662 (__v8di)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00007663 (__mmask8)(mask), (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00007664
Craig Topperc6338672018-05-31 00:51:20 +00007665#define _mm512_i32gather_ps(index, addr, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00007666 (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
7667 (float const *)(addr), \
7668 (__v16sf)(__m512)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00007669 (__mmask16)-1, (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00007670
Craig Topperc6338672018-05-31 00:51:20 +00007671#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00007672 (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
7673 (float const *)(addr), \
7674 (__v16sf)(__m512)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00007675 (__mmask16)(mask), (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00007676
Craig Topperc6338672018-05-31 00:51:20 +00007677#define _mm512_i32gather_epi32(index, addr, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00007678 (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
7679 (int const *)(addr), \
7680 (__v16si)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00007681 (__mmask16)-1, (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00007682
Craig Topperc6338672018-05-31 00:51:20 +00007683#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00007684 (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
7685 (int const *)(addr), \
7686 (__v16si)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00007687 (__mmask16)(mask), (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00007688
Craig Topperc6338672018-05-31 00:51:20 +00007689#define _mm512_i32gather_pd(index, addr, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00007690 (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
7691 (double const *)(addr), \
7692 (__v8si)(__m256i)(index), (__mmask8)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00007693 (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00007694
Craig Topperc6338672018-05-31 00:51:20 +00007695#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00007696 (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
7697 (double const *)(addr), \
7698 (__v8si)(__m256i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00007699 (__mmask8)(mask), (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00007700
Craig Topperc6338672018-05-31 00:51:20 +00007701#define _mm512_i32gather_epi64(index, addr, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00007702 (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
7703 (long long const *)(addr), \
7704 (__v8si)(__m256i)(index), (__mmask8)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00007705 (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00007706
Craig Topperc6338672018-05-31 00:51:20 +00007707#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00007708 (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
7709 (long long const *)(addr), \
7710 (__v8si)(__m256i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00007711 (__mmask8)(mask), (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00007712
Craig Topperc6338672018-05-31 00:51:20 +00007713#define _mm512_i64scatter_ps(addr, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00007714 __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)-1, \
7715 (__v8di)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00007716 (__v8sf)(__m256)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00007717
Craig Topperc6338672018-05-31 00:51:20 +00007718#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00007719 __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)(mask), \
7720 (__v8di)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00007721 (__v8sf)(__m256)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00007722
Craig Topperc6338672018-05-31 00:51:20 +00007723#define _mm512_i64scatter_epi32(addr, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00007724 __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)-1, \
7725 (__v8di)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00007726 (__v8si)(__m256i)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00007727
Craig Topperc6338672018-05-31 00:51:20 +00007728#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00007729 __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)(mask), \
7730 (__v8di)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00007731 (__v8si)(__m256i)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00007732
Craig Topperc6338672018-05-31 00:51:20 +00007733#define _mm512_i64scatter_pd(addr, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00007734 __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)-1, \
7735 (__v8di)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00007736 (__v8df)(__m512d)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00007737
Craig Topperc6338672018-05-31 00:51:20 +00007738#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00007739 __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)(mask), \
7740 (__v8di)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00007741 (__v8df)(__m512d)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00007742
Craig Topperc6338672018-05-31 00:51:20 +00007743#define _mm512_i64scatter_epi64(addr, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00007744 __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)-1, \
7745 (__v8di)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00007746 (__v8di)(__m512i)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00007747
Craig Topperc6338672018-05-31 00:51:20 +00007748#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00007749 __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)(mask), \
7750 (__v8di)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00007751 (__v8di)(__m512i)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00007752
Craig Topperc6338672018-05-31 00:51:20 +00007753#define _mm512_i32scatter_ps(addr, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00007754 __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)-1, \
7755 (__v16si)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00007756 (__v16sf)(__m512)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00007757
Craig Topperc6338672018-05-31 00:51:20 +00007758#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00007759 __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)(mask), \
7760 (__v16si)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00007761 (__v16sf)(__m512)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00007762
Craig Topperc6338672018-05-31 00:51:20 +00007763#define _mm512_i32scatter_epi32(addr, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00007764 __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)-1, \
7765 (__v16si)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00007766 (__v16si)(__m512i)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00007767
Craig Topperc6338672018-05-31 00:51:20 +00007768#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00007769 __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)(mask), \
7770 (__v16si)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00007771 (__v16si)(__m512i)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00007772
Craig Topperc6338672018-05-31 00:51:20 +00007773#define _mm512_i32scatter_pd(addr, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00007774 __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)-1, \
7775 (__v8si)(__m256i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00007776 (__v8df)(__m512d)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00007777
Craig Topperc6338672018-05-31 00:51:20 +00007778#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00007779 __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)(mask), \
7780 (__v8si)(__m256i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00007781 (__v8df)(__m512d)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00007782
Craig Topperc6338672018-05-31 00:51:20 +00007783#define _mm512_i32scatter_epi64(addr, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00007784 __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)-1, \
7785 (__v8si)(__m256i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00007786 (__v8di)(__m512i)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00007787
Craig Topperc6338672018-05-31 00:51:20 +00007788#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00007789 __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)(mask), \
7790 (__v8si)(__m256i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00007791 (__v8di)(__m512i)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00007792
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007793static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00007794_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007795{
Gabor Buella70d8d512018-05-30 15:27:49 +00007796 __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
7797 (__v4sf) __A,
7798 (__v4sf) __B);
7799 __W[0] = (__U & 1) ? __Z[0] : __W[0];
7800 return __W;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007801}
7802
Craig Topper10f20fc2018-07-06 22:08:43 +00007803#define _mm_fmadd_round_ss(A, B, C, R) \
Craig Topper5cbeeed2018-07-07 17:03:32 +00007804 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7805 (__v4sf)(__m128)(B), \
7806 (__v4sf)(__m128)(C), (__mmask8)-1, \
7807 (int)(R))
Craig Topper10f20fc2018-07-06 22:08:43 +00007808
Craig Topperc6338672018-05-31 00:51:20 +00007809#define _mm_mask_fmadd_round_ss(W, U, A, B, R) \
Craig Topper5cbeeed2018-07-07 17:03:32 +00007810 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7811 (__v4sf)(__m128)(A), \
7812 (__v4sf)(__m128)(B), (__mmask8)(U), \
7813 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007814
7815static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00007816_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007817{
Gabor Buella70d8d512018-05-30 15:27:49 +00007818 __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A,
7819 (__v4sf) __B,
7820 (__v4sf) __C);
7821 __A[0] = (__U & 1) ? __Z[0] : 0;
7822 return __A;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007823}
7824
Craig Topperc6338672018-05-31 00:51:20 +00007825#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00007826 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7827 (__v4sf)(__m128)(B), \
7828 (__v4sf)(__m128)(C), (__mmask8)(U), \
Craig Topper3e720a32018-07-07 22:03:16 +00007829 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007830
7831static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00007832_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007833{
Gabor Buella70d8d512018-05-30 15:27:49 +00007834 __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
7835 (__v4sf) __X,
7836 (__v4sf) __Y);
7837 __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
7838 return __Y;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007839}
7840
Craig Topperc6338672018-05-31 00:51:20 +00007841#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00007842 (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7843 (__v4sf)(__m128)(X), \
7844 (__v4sf)(__m128)(Y), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00007845 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007846
7847static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00007848_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007849{
Gabor Buella70d8d512018-05-30 15:27:49 +00007850 __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
7851 (__v4sf) __A,
7852 -(__v4sf) __B);
7853 __W[0] = (__U & 1) ? __Z[0] : __W[0];
7854 return __W;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007855}
7856
Craig Topper10f20fc2018-07-06 22:08:43 +00007857#define _mm_fmsub_round_ss(A, B, C, R) \
7858 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7859 (__v4sf)(__m128)(B), \
7860 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7861 (int)(R))
7862
Craig Topperc6338672018-05-31 00:51:20 +00007863#define _mm_mask_fmsub_round_ss(W, U, A, B, R) \
Craig Topperf2023652016-10-26 05:35:38 +00007864 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7865 (__v4sf)(__m128)(A), \
Gabor Buella70d8d512018-05-30 15:27:49 +00007866 -(__v4sf)(__m128)(B), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00007867 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007868
7869static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00007870_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007871{
Gabor Buella70d8d512018-05-30 15:27:49 +00007872 __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A,
7873 (__v4sf) __B,
7874 -(__v4sf) __C);
7875 __A[0] = (__U & 1) ? __Z[0] : 0;
7876 return __A;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007877}
7878
Craig Topperc6338672018-05-31 00:51:20 +00007879#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00007880 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7881 (__v4sf)(__m128)(B), \
7882 -(__v4sf)(__m128)(C), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00007883 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007884
7885static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00007886_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007887{
Gabor Buella70d8d512018-05-30 15:27:49 +00007888 __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
7889 (__v4sf) __X,
7890 -(__v4sf) __Y);
7891 __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
7892 return __Y;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007893}
7894
Craig Topperc6338672018-05-31 00:51:20 +00007895#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \
Craig Topper2c8f49e2016-11-12 23:24:34 +00007896 (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
Craig Topper8c18e112016-05-17 04:41:50 +00007897 (__v4sf)(__m128)(X), \
Craig Topper2c8f49e2016-11-12 23:24:34 +00007898 (__v4sf)(__m128)(Y), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00007899 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007900
7901static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00007902_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007903{
Gabor Buella70d8d512018-05-30 15:27:49 +00007904 __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
7905 -(__v4sf) __A,
7906 (__v4sf) __B);
7907 __W[0] = (__U & 1) ? __Z[0] : __W[0];
7908 return __W;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007909}
7910
Craig Topper10f20fc2018-07-06 22:08:43 +00007911#define _mm_fnmadd_round_ss(A, B, C, R) \
7912 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7913 -(__v4sf)(__m128)(B), \
7914 (__v4sf)(__m128)(C), (__mmask8)-1, \
7915 (int)(R))
7916
Craig Topperc6338672018-05-31 00:51:20 +00007917#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \
Craig Topperf2023652016-10-26 05:35:38 +00007918 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7919 -(__v4sf)(__m128)(A), \
7920 (__v4sf)(__m128)(B), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00007921 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007922
7923static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00007924_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007925{
Gabor Buella70d8d512018-05-30 15:27:49 +00007926 __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A,
7927 -(__v4sf) __B,
7928 (__v4sf) __C);
7929 __A[0] = (__U & 1) ? __Z[0] : 0;
7930 return __A;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007931}
7932
Craig Topperc6338672018-05-31 00:51:20 +00007933#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00007934 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7935 -(__v4sf)(__m128)(B), \
Craig Topper8c18e112016-05-17 04:41:50 +00007936 (__v4sf)(__m128)(C), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00007937 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007938
7939static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00007940_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007941{
Gabor Buella70d8d512018-05-30 15:27:49 +00007942 __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
7943 -(__v4sf) __X,
7944 (__v4sf) __Y);
7945 __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
7946 return __Y;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007947}
7948
Craig Topperc6338672018-05-31 00:51:20 +00007949#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00007950 (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7951 -(__v4sf)(__m128)(X), \
Craig Topper8c18e112016-05-17 04:41:50 +00007952 (__v4sf)(__m128)(Y), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00007953 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007954
7955static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00007956_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007957{
Gabor Buella70d8d512018-05-30 15:27:49 +00007958 __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
7959 -(__v4sf) __A,
7960 -(__v4sf) __B);
7961 __W[0] = (__U & 1) ? __Z[0] : __W[0];
7962 return __W;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007963}
7964
Craig Topper10f20fc2018-07-06 22:08:43 +00007965#define _mm_fnmsub_round_ss(A, B, C, R) \
7966 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7967 -(__v4sf)(__m128)(B), \
7968 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7969 (int)(R))
7970
Craig Topperc6338672018-05-31 00:51:20 +00007971#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \
Craig Topperf2023652016-10-26 05:35:38 +00007972 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7973 -(__v4sf)(__m128)(A), \
7974 -(__v4sf)(__m128)(B), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00007975 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007976
7977static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00007978_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007979{
Gabor Buella70d8d512018-05-30 15:27:49 +00007980 __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A,
7981 -(__v4sf) __B,
7982 -(__v4sf) __C);
7983 __A[0] = (__U & 1) ? __Z[0] : 0;
7984 return __A;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007985}
7986
Craig Topperc6338672018-05-31 00:51:20 +00007987#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00007988 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7989 -(__v4sf)(__m128)(B), \
Craig Topper8c18e112016-05-17 04:41:50 +00007990 -(__v4sf)(__m128)(C), (__mmask8)(U), \
Craig Topper3e720a32018-07-07 22:03:16 +00007991 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007992
7993static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00007994_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00007995{
Gabor Buella70d8d512018-05-30 15:27:49 +00007996 __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
7997 -(__v4sf) __X,
7998 -(__v4sf) __Y);
7999 __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
8000 return __Y;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008001}
8002
Craig Topperc6338672018-05-31 00:51:20 +00008003#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00008004 (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
8005 -(__v4sf)(__m128)(X), \
Craig Topper2c8f49e2016-11-12 23:24:34 +00008006 (__v4sf)(__m128)(Y), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00008007 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008008
Craig Topper58187d32016-05-17 04:41:29 +00008009static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008010_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008011{
Gabor Buella70d8d512018-05-30 15:27:49 +00008012 __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
8013 (__v2df) __A,
8014 (__v2df) __B);
8015 __W[0] = (__U & 1) ? __Z[0] : __W[0];
8016 return __W;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008017}
8018
Craig Topper10f20fc2018-07-06 22:08:43 +00008019#define _mm_fmadd_round_sd(A, B, C, R) \
8020 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8021 (__v2df)(__m128d)(B), \
8022 (__v2df)(__m128d)(C), (__mmask8)-1, \
8023 (int)(R))
8024
Craig Topperc6338672018-05-31 00:51:20 +00008025#define _mm_mask_fmadd_round_sd(W, U, A, B, R) \
Craig Topperf2023652016-10-26 05:35:38 +00008026 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8027 (__v2df)(__m128d)(A), \
8028 (__v2df)(__m128d)(B), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00008029 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008030
Craig Topper58187d32016-05-17 04:41:29 +00008031static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008032_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008033{
Gabor Buella70d8d512018-05-30 15:27:49 +00008034 __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A,
8035 (__v2df) __B,
8036 (__v2df) __C);
8037 __A[0] = (__U & 1) ? __Z[0] : 0;
8038 return __A;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008039}
8040
Craig Topperc6338672018-05-31 00:51:20 +00008041#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00008042 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8043 (__v2df)(__m128d)(B), \
8044 (__v2df)(__m128d)(C), (__mmask8)(U), \
Craig Topper3e720a32018-07-07 22:03:16 +00008045 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008046
Craig Topper58187d32016-05-17 04:41:29 +00008047static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008048_mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008049{
Gabor Buella70d8d512018-05-30 15:27:49 +00008050 __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
8051 (__v2df) __X,
8052 (__v2df) __Y);
8053 __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
8054 return __Y;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008055}
8056
Craig Topperc6338672018-05-31 00:51:20 +00008057#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00008058 (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8059 (__v2df)(__m128d)(X), \
8060 (__v2df)(__m128d)(Y), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00008061 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008062
Craig Topper58187d32016-05-17 04:41:29 +00008063static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008064_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008065{
Gabor Buella70d8d512018-05-30 15:27:49 +00008066 __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
8067 (__v2df) __A,
8068 -(__v2df) __B);
8069 __W[0] = (__U & 1) ? __Z[0] : __W[0];
8070 return __W;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008071}
8072
Craig Topper10f20fc2018-07-06 22:08:43 +00008073#define _mm_fmsub_round_sd(A, B, C, R) \
8074 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8075 (__v2df)(__m128d)(B), \
8076 -(__v2df)(__m128d)(C), (__mmask8)-1, \
8077 (int)(R))
8078
Craig Topperc6338672018-05-31 00:51:20 +00008079#define _mm_mask_fmsub_round_sd(W, U, A, B, R) \
Craig Topperf2023652016-10-26 05:35:38 +00008080 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8081 (__v2df)(__m128d)(A), \
8082 -(__v2df)(__m128d)(B), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00008083 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008084
Craig Topper58187d32016-05-17 04:41:29 +00008085static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008086_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008087{
Gabor Buella70d8d512018-05-30 15:27:49 +00008088 __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A,
8089 (__v2df) __B,
8090 -(__v2df) __C);
8091 __A[0] = (__U & 1) ? __Z[0] : 0;
8092 return __A;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008093}
8094
Craig Topperc6338672018-05-31 00:51:20 +00008095#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00008096 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8097 (__v2df)(__m128d)(B), \
8098 -(__v2df)(__m128d)(C), \
Craig Topperc6338672018-05-31 00:51:20 +00008099 (__mmask8)(U), (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008100
Craig Topper58187d32016-05-17 04:41:29 +00008101static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008102_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008103{
Gabor Buella70d8d512018-05-30 15:27:49 +00008104 __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
8105 (__v2df) __X,
8106 -(__v2df) __Y);
8107 __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
8108 return __Y;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008109}
8110
Craig Topperc6338672018-05-31 00:51:20 +00008111#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \
Craig Topper2c8f49e2016-11-12 23:24:34 +00008112 (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
Craig Topper8c18e112016-05-17 04:41:50 +00008113 (__v2df)(__m128d)(X), \
Craig Topper2c8f49e2016-11-12 23:24:34 +00008114 (__v2df)(__m128d)(Y), \
Craig Topperc6338672018-05-31 00:51:20 +00008115 (__mmask8)(U), (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008116
Craig Topper58187d32016-05-17 04:41:29 +00008117static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008118_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008119{
Gabor Buella70d8d512018-05-30 15:27:49 +00008120 __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
8121 -(__v2df) __A,
8122 (__v2df) __B);
8123 __W[0] = (__U & 1) ? __Z[0] : __W[0];
8124 return __W;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008125}
8126
Craig Topper10f20fc2018-07-06 22:08:43 +00008127#define _mm_fnmadd_round_sd(A, B, C, R) \
8128 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8129 -(__v2df)(__m128d)(B), \
8130 (__v2df)(__m128d)(C), (__mmask8)-1, \
8131 (int)(R))
8132
Craig Topperc6338672018-05-31 00:51:20 +00008133#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \
Craig Topperf2023652016-10-26 05:35:38 +00008134 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8135 -(__v2df)(__m128d)(A), \
8136 (__v2df)(__m128d)(B), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00008137 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008138
Craig Topper58187d32016-05-17 04:41:29 +00008139static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008140_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008141{
Gabor Buella70d8d512018-05-30 15:27:49 +00008142 __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A,
8143 -(__v2df) __B,
8144 (__v2df) __C);
8145 __A[0] = (__U & 1) ? __Z[0] : 0;
8146 return __A;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008147}
8148
Craig Topperc6338672018-05-31 00:51:20 +00008149#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00008150 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8151 -(__v2df)(__m128d)(B), \
Craig Topper8c18e112016-05-17 04:41:50 +00008152 (__v2df)(__m128d)(C), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00008153 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008154
Craig Topper58187d32016-05-17 04:41:29 +00008155static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008156_mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008157{
Gabor Buella70d8d512018-05-30 15:27:49 +00008158 __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
8159 -(__v2df) __X,
8160 (__v2df) __Y);
8161 __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
8162 return __Y;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008163}
8164
Craig Topperc6338672018-05-31 00:51:20 +00008165#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00008166 (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8167 -(__v2df)(__m128d)(X), \
Craig Topper8c18e112016-05-17 04:41:50 +00008168 (__v2df)(__m128d)(Y), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00008169 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008170
Craig Topper58187d32016-05-17 04:41:29 +00008171static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008172_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008173{
Gabor Buella70d8d512018-05-30 15:27:49 +00008174 __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
8175 -(__v2df) __A,
8176 -(__v2df) __B);
8177 __W[0] = (__U & 1) ? __Z[0] : __W[0];
8178 return __W;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008179}
8180
Craig Topper10f20fc2018-07-06 22:08:43 +00008181#define _mm_fnmsub_round_sd(A, B, C, R) \
8182 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8183 -(__v2df)(__m128d)(B), \
8184 -(__v2df)(__m128d)(C), (__mmask8)-1, \
8185 (int)(R))
8186
Craig Topperc6338672018-05-31 00:51:20 +00008187#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \
Craig Topperf2023652016-10-26 05:35:38 +00008188 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8189 -(__v2df)(__m128d)(A), \
8190 -(__v2df)(__m128d)(B), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00008191 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008192
Craig Topper58187d32016-05-17 04:41:29 +00008193static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008194_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008195{
Gabor Buella70d8d512018-05-30 15:27:49 +00008196 __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A,
8197 -(__v2df) __B,
8198 -(__v2df) __C);
8199 __A[0] = (__U & 1) ? __Z[0] : 0;
8200 return __A;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008201}
8202
Craig Topperc6338672018-05-31 00:51:20 +00008203#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00008204 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8205 -(__v2df)(__m128d)(B), \
Craig Topper8c18e112016-05-17 04:41:50 +00008206 -(__v2df)(__m128d)(C), \
8207 (__mmask8)(U), \
Craig Topper3e720a32018-07-07 22:03:16 +00008208 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008209
Craig Topper58187d32016-05-17 04:41:29 +00008210static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008211_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008212{
Gabor Buella70d8d512018-05-30 15:27:49 +00008213 __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
8214 -(__v2df) __X,
8215 -(__v2df) __Y);
8216 __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
8217 return __Y;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008218}
8219
Craig Topperc6338672018-05-31 00:51:20 +00008220#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00008221 (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8222 -(__v2df)(__m128d)(X), \
Craig Topper2c8f49e2016-11-12 23:24:34 +00008223 (__v2df)(__m128d)(Y), \
Craig Topperc6338672018-05-31 00:51:20 +00008224 (__mmask8)(U), (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008225
Craig Topperc6338672018-05-31 00:51:20 +00008226#define _mm512_permutex_pd(X, C) \
Craig Topper03f4f042018-06-08 18:00:25 +00008227 (__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C))
Michael Zuckerman8938e832016-04-25 05:32:35 +00008228
Craig Topperc6338672018-05-31 00:51:20 +00008229#define _mm512_mask_permutex_pd(W, U, X, C) \
Simon Pilgrim30db8112016-07-04 13:34:44 +00008230 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8231 (__v8df)_mm512_permutex_pd((X), (C)), \
Craig Topperc6338672018-05-31 00:51:20 +00008232 (__v8df)(__m512d)(W))
Michael Zuckerman8938e832016-04-25 05:32:35 +00008233
Craig Topperc6338672018-05-31 00:51:20 +00008234#define _mm512_maskz_permutex_pd(U, X, C) \
Simon Pilgrim30db8112016-07-04 13:34:44 +00008235 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8236 (__v8df)_mm512_permutex_pd((X), (C)), \
Craig Topperc6338672018-05-31 00:51:20 +00008237 (__v8df)_mm512_setzero_pd())
Michael Zuckerman8938e832016-04-25 05:32:35 +00008238
Craig Topperc6338672018-05-31 00:51:20 +00008239#define _mm512_permutex_epi64(X, C) \
Craig Topper03f4f042018-06-08 18:00:25 +00008240 (__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C))
Michael Zuckerman8938e832016-04-25 05:32:35 +00008241
Craig Topperc6338672018-05-31 00:51:20 +00008242#define _mm512_mask_permutex_epi64(W, U, X, C) \
Simon Pilgrim30db8112016-07-04 13:34:44 +00008243 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8244 (__v8di)_mm512_permutex_epi64((X), (C)), \
Craig Topperc6338672018-05-31 00:51:20 +00008245 (__v8di)(__m512i)(W))
Michael Zuckerman8938e832016-04-25 05:32:35 +00008246
Craig Topperc6338672018-05-31 00:51:20 +00008247#define _mm512_maskz_permutex_epi64(U, X, C) \
Simon Pilgrim30db8112016-07-04 13:34:44 +00008248 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8249 (__v8di)_mm512_permutex_epi64((X), (C)), \
Craig Topperc6338672018-05-31 00:51:20 +00008250 (__v8di)_mm512_setzero_si512())
Michael Zuckerman8938e832016-04-25 05:32:35 +00008251
8252static __inline__ __m512d __DEFAULT_FN_ATTRS
8253_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
8254{
Craig Topper55b40672018-05-20 23:34:10 +00008255 return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X);
Michael Zuckerman8938e832016-04-25 05:32:35 +00008256}
8257
8258static __inline__ __m512d __DEFAULT_FN_ATTRS
8259_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
8260{
Craig Topper55b40672018-05-20 23:34:10 +00008261 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8262 (__v8df)_mm512_permutexvar_pd(__X, __Y),
8263 (__v8df)__W);
Michael Zuckerman8938e832016-04-25 05:32:35 +00008264}
8265
8266static __inline__ __m512d __DEFAULT_FN_ATTRS
8267_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
8268{
Craig Topper55b40672018-05-20 23:34:10 +00008269 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8270 (__v8df)_mm512_permutexvar_pd(__X, __Y),
8271 (__v8df)_mm512_setzero_pd());
Michael Zuckerman8938e832016-04-25 05:32:35 +00008272}
8273
8274static __inline__ __m512i __DEFAULT_FN_ATTRS
8275_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
8276{
Craig Topper55b40672018-05-20 23:34:10 +00008277 return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
8278}
8279
8280static __inline__ __m512i __DEFAULT_FN_ATTRS
8281_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
8282{
8283 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8284 (__v8di)_mm512_permutexvar_epi64(__X, __Y),
8285 (__v8di)_mm512_setzero_si512());
Michael Zuckerman8938e832016-04-25 05:32:35 +00008286}
8287
8288static __inline__ __m512i __DEFAULT_FN_ATTRS
8289_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
8290 __m512i __Y)
8291{
Craig Topper55b40672018-05-20 23:34:10 +00008292 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8293 (__v8di)_mm512_permutexvar_epi64(__X, __Y),
8294 (__v8di)__W);
Michael Zuckerman8938e832016-04-25 05:32:35 +00008295}
8296
8297static __inline__ __m512 __DEFAULT_FN_ATTRS
8298_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
8299{
Craig Topper55b40672018-05-20 23:34:10 +00008300 return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
Michael Zuckerman8938e832016-04-25 05:32:35 +00008301}
8302
8303static __inline__ __m512 __DEFAULT_FN_ATTRS
8304_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
8305{
Craig Topper55b40672018-05-20 23:34:10 +00008306 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8307 (__v16sf)_mm512_permutexvar_ps(__X, __Y),
8308 (__v16sf)__W);
Michael Zuckerman8938e832016-04-25 05:32:35 +00008309}
8310
8311static __inline__ __m512 __DEFAULT_FN_ATTRS
8312_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
8313{
Craig Topper55b40672018-05-20 23:34:10 +00008314 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8315 (__v16sf)_mm512_permutexvar_ps(__X, __Y),
8316 (__v16sf)_mm512_setzero_ps());
Michael Zuckerman8938e832016-04-25 05:32:35 +00008317}
8318
8319static __inline__ __m512i __DEFAULT_FN_ATTRS
8320_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
8321{
Craig Topper55b40672018-05-20 23:34:10 +00008322 return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
Michael Zuckerman8938e832016-04-25 05:32:35 +00008323}
8324
Igor Bregerf050b792017-03-19 08:27:16 +00008325#define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
8326
Michael Zuckerman8938e832016-04-25 05:32:35 +00008327static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper55b40672018-05-20 23:34:10 +00008328_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
8329{
8330 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8331 (__v16si)_mm512_permutexvar_epi32(__X, __Y),
8332 (__v16si)_mm512_setzero_si512());
8333}
8334
8335static __inline__ __m512i __DEFAULT_FN_ATTRS
Michael Zuckerman8938e832016-04-25 05:32:35 +00008336_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
8337 __m512i __Y)
8338{
Craig Topper55b40672018-05-20 23:34:10 +00008339 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8340 (__v16si)_mm512_permutexvar_epi32(__X, __Y),
8341 (__v16si)__W);
Michael Zuckerman8938e832016-04-25 05:32:35 +00008342}
8343
Igor Bregerf050b792017-03-19 08:27:16 +00008344#define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
8345
Michael Zuckermanfa508e82016-04-25 16:42:29 +00008346static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8347_mm512_kand (__mmask16 __A, __mmask16 __B)
8348{
8349 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
8350}
Michael Zuckerman8938e832016-04-25 05:32:35 +00008351
Michael Zuckermanfa508e82016-04-25 16:42:29 +00008352static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8353_mm512_kandn (__mmask16 __A, __mmask16 __B)
8354{
8355 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
8356}
8357
8358static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8359_mm512_kor (__mmask16 __A, __mmask16 __B)
8360{
8361 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
8362}
8363
8364static __inline__ int __DEFAULT_FN_ATTRS
8365_mm512_kortestc (__mmask16 __A, __mmask16 __B)
8366{
Craig Topper91f23d92016-05-16 01:09:16 +00008367 return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
Michael Zuckermanfa508e82016-04-25 16:42:29 +00008368}
8369
8370static __inline__ int __DEFAULT_FN_ATTRS
8371_mm512_kortestz (__mmask16 __A, __mmask16 __B)
8372{
Craig Topper91f23d92016-05-16 01:09:16 +00008373 return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
Michael Zuckermanfa508e82016-04-25 16:42:29 +00008374}
8375
8376static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8377_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
8378{
Craig Topperf517f1a2018-01-14 19:23:50 +00008379 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
Michael Zuckermanfa508e82016-04-25 16:42:29 +00008380}
8381
8382static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8383_mm512_kxnor (__mmask16 __A, __mmask16 __B)
8384{
8385 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
8386}
8387
8388static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8389_mm512_kxor (__mmask16 __A, __mmask16 __B)
8390{
8391 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
8392}
Michael Zuckerman8938e832016-04-25 05:32:35 +00008393
Michael Zuckerman7c85a8c2016-04-27 10:44:15 +00008394static __inline__ void __DEFAULT_FN_ATTRS
8395_mm512_stream_si512 (__m512i * __P, __m512i __A)
8396{
Reid Kleckner89fbd552018-06-04 21:39:20 +00008397 typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8398 __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
Michael Zuckerman7c85a8c2016-04-27 10:44:15 +00008399}
8400
8401static __inline__ __m512i __DEFAULT_FN_ATTRS
Simon Pilgrim1ba2bf22017-09-05 10:06:41 +00008402_mm512_stream_load_si512 (void const *__P)
Michael Zuckerman7c85a8c2016-04-27 10:44:15 +00008403{
Reid Kleckner89fbd552018-06-04 21:39:20 +00008404 typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8405 return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
Michael Zuckerman7c85a8c2016-04-27 10:44:15 +00008406}
8407
8408static __inline__ void __DEFAULT_FN_ATTRS
8409_mm512_stream_pd (double *__P, __m512d __A)
8410{
Reid Kleckner89fbd552018-06-04 21:39:20 +00008411 typedef __v8df __v8df_aligned __attribute__((aligned(64)));
8412 __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
Michael Zuckerman7c85a8c2016-04-27 10:44:15 +00008413}
8414
8415static __inline__ void __DEFAULT_FN_ATTRS
8416_mm512_stream_ps (float *__P, __m512 __A)
8417{
Reid Kleckner89fbd552018-06-04 21:39:20 +00008418 typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
8419 __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
Michael Zuckerman7c85a8c2016-04-27 10:44:15 +00008420}
8421
Michael Zuckerman41f5a372016-04-29 08:52:02 +00008422static __inline__ __m512d __DEFAULT_FN_ATTRS
8423_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
8424{
8425 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8426 (__v8df) __W,
8427 (__mmask8) __U);
8428}
8429
8430static __inline__ __m512d __DEFAULT_FN_ATTRS
8431_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
8432{
8433 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8434 (__v8df)
8435 _mm512_setzero_pd (),
8436 (__mmask8) __U);
8437}
8438
8439static __inline__ __m512i __DEFAULT_FN_ATTRS
8440_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8441{
8442 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8443 (__v8di) __W,
8444 (__mmask8) __U);
8445}
8446
8447static __inline__ __m512i __DEFAULT_FN_ATTRS
8448_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
8449{
8450 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8451 (__v8di)
8452 _mm512_setzero_si512 (),
8453 (__mmask8) __U);
8454}
8455
8456static __inline__ __m512 __DEFAULT_FN_ATTRS
8457_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
8458{
8459 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8460 (__v16sf) __W,
8461 (__mmask16) __U);
8462}
8463
8464static __inline__ __m512 __DEFAULT_FN_ATTRS
8465_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
8466{
8467 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8468 (__v16sf)
8469 _mm512_setzero_ps (),
8470 (__mmask16) __U);
8471}
8472
8473static __inline__ __m512i __DEFAULT_FN_ATTRS
8474_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8475{
8476 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8477 (__v16si) __W,
8478 (__mmask16) __U);
8479}
8480
8481static __inline__ __m512i __DEFAULT_FN_ATTRS
8482_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
8483{
8484 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8485 (__v16si)
8486 _mm512_setzero_si512 (),
8487 (__mmask16) __U);
8488}
8489
Craig Topperc6338672018-05-31 00:51:20 +00008490#define _mm_cmp_round_ss_mask(X, Y, P, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00008491 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8492 (__v4sf)(__m128)(Y), (int)(P), \
Craig Topperc6338672018-05-31 00:51:20 +00008493 (__mmask8)-1, (int)(R))
Michael Zuckerman0b9d1052016-04-29 11:01:16 +00008494
Craig Topperc6338672018-05-31 00:51:20 +00008495#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00008496 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8497 (__v4sf)(__m128)(Y), (int)(P), \
Craig Topperc6338672018-05-31 00:51:20 +00008498 (__mmask8)(M), (int)(R))
Michael Zuckerman0b9d1052016-04-29 11:01:16 +00008499
Craig Topperc6338672018-05-31 00:51:20 +00008500#define _mm_cmp_ss_mask(X, Y, P) \
Craig Topper8c18e112016-05-17 04:41:50 +00008501 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8502 (__v4sf)(__m128)(Y), (int)(P), \
8503 (__mmask8)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00008504 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman0b9d1052016-04-29 11:01:16 +00008505
Craig Topperc6338672018-05-31 00:51:20 +00008506#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
Craig Topper8c18e112016-05-17 04:41:50 +00008507 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8508 (__v4sf)(__m128)(Y), (int)(P), \
8509 (__mmask8)(M), \
Craig Topperc6338672018-05-31 00:51:20 +00008510 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman0b9d1052016-04-29 11:01:16 +00008511
Craig Topperc6338672018-05-31 00:51:20 +00008512#define _mm_cmp_round_sd_mask(X, Y, P, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00008513 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8514 (__v2df)(__m128d)(Y), (int)(P), \
Craig Topperc6338672018-05-31 00:51:20 +00008515 (__mmask8)-1, (int)(R))
Michael Zuckerman0b9d1052016-04-29 11:01:16 +00008516
Craig Topperc6338672018-05-31 00:51:20 +00008517#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00008518 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8519 (__v2df)(__m128d)(Y), (int)(P), \
Craig Topperc6338672018-05-31 00:51:20 +00008520 (__mmask8)(M), (int)(R))
Michael Zuckerman0b9d1052016-04-29 11:01:16 +00008521
Craig Topperc6338672018-05-31 00:51:20 +00008522#define _mm_cmp_sd_mask(X, Y, P) \
Craig Topper8c18e112016-05-17 04:41:50 +00008523 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8524 (__v2df)(__m128d)(Y), (int)(P), \
8525 (__mmask8)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00008526 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman0b9d1052016-04-29 11:01:16 +00008527
Craig Topperc6338672018-05-31 00:51:20 +00008528#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
Craig Topper8c18e112016-05-17 04:41:50 +00008529 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8530 (__v2df)(__m128d)(Y), (int)(P), \
8531 (__mmask8)(M), \
Craig Topperc6338672018-05-31 00:51:20 +00008532 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman0b9d1052016-04-29 11:01:16 +00008533
Uriel Korach5b2b71d2017-11-13 12:50:52 +00008534/* Bit Test */
8535
8536static __inline __mmask16 __DEFAULT_FN_ATTRS
8537_mm512_test_epi32_mask (__m512i __A, __m512i __B)
8538{
8539 return _mm512_cmpneq_epi32_mask (_mm512_and_epi32(__A, __B),
Craig Topperdff5b312018-05-30 18:02:11 +00008540 _mm512_setzero_si512());
Uriel Korach5b2b71d2017-11-13 12:50:52 +00008541}
8542
8543static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8544_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8545{
8546 return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
Craig Topperdff5b312018-05-30 18:02:11 +00008547 _mm512_setzero_si512());
Uriel Korach5b2b71d2017-11-13 12:50:52 +00008548}
8549
8550static __inline __mmask8 __DEFAULT_FN_ATTRS
8551_mm512_test_epi64_mask (__m512i __A, __m512i __B)
8552{
8553 return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B),
Craig Topperdff5b312018-05-30 18:02:11 +00008554 _mm512_setzero_si512());
Uriel Korach5b2b71d2017-11-13 12:50:52 +00008555}
8556
8557static __inline__ __mmask8 __DEFAULT_FN_ATTRS
8558_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8559{
8560 return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
Craig Topperdff5b312018-05-30 18:02:11 +00008561 _mm512_setzero_si512());
Uriel Korach5b2b71d2017-11-13 12:50:52 +00008562}
8563
8564static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8565_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
8566{
8567 return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B),
Craig Topperdff5b312018-05-30 18:02:11 +00008568 _mm512_setzero_si512());
Uriel Korach5b2b71d2017-11-13 12:50:52 +00008569}
8570
8571static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8572_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8573{
8574 return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
Craig Topperdff5b312018-05-30 18:02:11 +00008575 _mm512_setzero_si512());
Uriel Korach5b2b71d2017-11-13 12:50:52 +00008576}
8577
8578static __inline__ __mmask8 __DEFAULT_FN_ATTRS
8579_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
8580{
8581 return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B),
Craig Topperdff5b312018-05-30 18:02:11 +00008582 _mm512_setzero_si512());
Uriel Korach5b2b71d2017-11-13 12:50:52 +00008583}
8584
8585static __inline__ __mmask8 __DEFAULT_FN_ATTRS
8586_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8587{
8588 return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
Craig Topperdff5b312018-05-30 18:02:11 +00008589 _mm512_setzero_si512());
Uriel Korach5b2b71d2017-11-13 12:50:52 +00008590}
8591
Michael Zuckermanac1e5192016-05-01 14:43:43 +00008592static __inline__ __m512 __DEFAULT_FN_ATTRS
8593_mm512_movehdup_ps (__m512 __A)
8594{
Simon Pilgrim275d7212016-07-02 17:16:25 +00008595 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8596 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
Michael Zuckermanac1e5192016-05-01 14:43:43 +00008597}
8598
8599static __inline__ __m512 __DEFAULT_FN_ATTRS
8600_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
8601{
Simon Pilgrim275d7212016-07-02 17:16:25 +00008602 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8603 (__v16sf)_mm512_movehdup_ps(__A),
8604 (__v16sf)__W);
Michael Zuckermanac1e5192016-05-01 14:43:43 +00008605}
8606
8607static __inline__ __m512 __DEFAULT_FN_ATTRS
8608_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
8609{
Simon Pilgrim275d7212016-07-02 17:16:25 +00008610 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8611 (__v16sf)_mm512_movehdup_ps(__A),
8612 (__v16sf)_mm512_setzero_ps());
Michael Zuckermanac1e5192016-05-01 14:43:43 +00008613}
8614
8615static __inline__ __m512 __DEFAULT_FN_ATTRS
8616_mm512_moveldup_ps (__m512 __A)
8617{
Simon Pilgrim275d7212016-07-02 17:16:25 +00008618 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8619 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
Michael Zuckermanac1e5192016-05-01 14:43:43 +00008620}
8621
8622static __inline__ __m512 __DEFAULT_FN_ATTRS
8623_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
8624{
Simon Pilgrim275d7212016-07-02 17:16:25 +00008625 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8626 (__v16sf)_mm512_moveldup_ps(__A),
8627 (__v16sf)__W);
Michael Zuckermanac1e5192016-05-01 14:43:43 +00008628}
8629
8630static __inline__ __m512 __DEFAULT_FN_ATTRS
8631_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
8632{
Simon Pilgrim275d7212016-07-02 17:16:25 +00008633 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8634 (__v16sf)_mm512_moveldup_ps(__A),
8635 (__v16sf)_mm512_setzero_ps());
Michael Zuckermanac1e5192016-05-01 14:43:43 +00008636}
8637
Michael Zuckerman9e43ccf2016-10-05 12:56:06 +00008638static __inline__ __m128 __DEFAULT_FN_ATTRS
8639_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8640{
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00008641 __m128 res = __A;
Ayman Musae60a41c2016-11-08 12:00:30 +00008642 res[0] = (__U & 1) ? __B[0] : __W[0];
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00008643 return res;
Michael Zuckerman9e43ccf2016-10-05 12:56:06 +00008644}
8645
8646static __inline__ __m128 __DEFAULT_FN_ATTRS
8647_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
8648{
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00008649 __m128 res = __A;
8650 res[0] = (__U & 1) ? __B[0] : 0;
8651 return res;
Michael Zuckerman9e43ccf2016-10-05 12:56:06 +00008652}
8653
8654static __inline__ __m128d __DEFAULT_FN_ATTRS
8655_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8656{
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00008657 __m128d res = __A;
Ayman Musae60a41c2016-11-08 12:00:30 +00008658 res[0] = (__U & 1) ? __B[0] : __W[0];
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00008659 return res;
Michael Zuckerman9e43ccf2016-10-05 12:56:06 +00008660}
8661
8662static __inline__ __m128d __DEFAULT_FN_ATTRS
8663_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
8664{
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00008665 __m128d res = __A;
8666 res[0] = (__U & 1) ? __B[0] : 0;
8667 return res;
Ayman Musae60a41c2016-11-08 12:00:30 +00008668}
8669
8670static __inline__ void __DEFAULT_FN_ATTRS
8671_mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
8672{
Craig Topper74ac0ed2018-05-10 05:43:43 +00008673 __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1);
Ayman Musae60a41c2016-11-08 12:00:30 +00008674}
8675
8676static __inline__ void __DEFAULT_FN_ATTRS
8677_mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
8678{
Craig Topper74ac0ed2018-05-10 05:43:43 +00008679 __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1);
Ayman Musae60a41c2016-11-08 12:00:30 +00008680}
8681
8682static __inline__ __m128 __DEFAULT_FN_ATTRS
8683_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
8684{
8685 __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
Craig Topper63ec0ea2018-05-30 21:08:27 +00008686 (__v4sf)_mm_setzero_ps(),
Ayman Musae60a41c2016-11-08 12:00:30 +00008687 0, 4, 4, 4);
8688
Craig Topper74ac0ed2018-05-10 05:43:43 +00008689 return (__m128) __builtin_ia32_loadss128_mask ((__v4sf *) __A, src, __U & 1);
Ayman Musae60a41c2016-11-08 12:00:30 +00008690}
8691
8692static __inline__ __m128 __DEFAULT_FN_ATTRS
8693_mm_maskz_load_ss (__mmask8 __U, const float* __A)
8694{
Craig Topper74ac0ed2018-05-10 05:43:43 +00008695 return (__m128)__builtin_ia32_loadss128_mask ((__v4sf *) __A,
8696 (__v4sf) _mm_setzero_ps(),
8697 __U & 1);
Ayman Musae60a41c2016-11-08 12:00:30 +00008698}
8699
8700static __inline__ __m128d __DEFAULT_FN_ATTRS
8701_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
8702{
8703 __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
Craig Topper63ec0ea2018-05-30 21:08:27 +00008704 (__v2df)_mm_setzero_pd(),
8705 0, 2);
Ayman Musae60a41c2016-11-08 12:00:30 +00008706
Craig Topper74ac0ed2018-05-10 05:43:43 +00008707 return (__m128d) __builtin_ia32_loadsd128_mask ((__v2df *) __A, src, __U & 1);
Ayman Musae60a41c2016-11-08 12:00:30 +00008708}
8709
8710static __inline__ __m128d __DEFAULT_FN_ATTRS
8711_mm_maskz_load_sd (__mmask8 __U, const double* __A)
8712{
Craig Topper74ac0ed2018-05-10 05:43:43 +00008713 return (__m128d) __builtin_ia32_loadsd128_mask ((__v2df *) __A,
8714 (__v2df) _mm_setzero_pd(),
8715 __U & 1);
Michael Zuckerman9e43ccf2016-10-05 12:56:06 +00008716}
8717
Craig Topperc6338672018-05-31 00:51:20 +00008718#define _mm512_shuffle_epi32(A, I) \
Craig Topper03de1662018-06-08 06:13:16 +00008719 (__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I))
Michael Zuckermanc62f27e2016-05-02 07:35:27 +00008720
Craig Topperc6338672018-05-31 00:51:20 +00008721#define _mm512_mask_shuffle_epi32(W, U, A, I) \
Craig Topper7cc92632016-06-11 12:50:19 +00008722 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8723 (__v16si)_mm512_shuffle_epi32((A), (I)), \
Craig Topperc6338672018-05-31 00:51:20 +00008724 (__v16si)(__m512i)(W))
Michael Zuckermanc62f27e2016-05-02 07:35:27 +00008725
Craig Topperc6338672018-05-31 00:51:20 +00008726#define _mm512_maskz_shuffle_epi32(U, A, I) \
Craig Topper7cc92632016-06-11 12:50:19 +00008727 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8728 (__v16si)_mm512_shuffle_epi32((A), (I)), \
Craig Topperc6338672018-05-31 00:51:20 +00008729 (__v16si)_mm512_setzero_si512())
Michael Zuckermanc62f27e2016-05-02 07:35:27 +00008730
Michael Zuckerman6a0e0872016-05-02 08:36:41 +00008731static __inline__ __m512d __DEFAULT_FN_ATTRS
8732_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
8733{
8734 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8735 (__v8df) __W,
8736 (__mmask8) __U);
8737}
8738
8739static __inline__ __m512d __DEFAULT_FN_ATTRS
8740_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
8741{
8742 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8743 (__v8df) _mm512_setzero_pd (),
8744 (__mmask8) __U);
8745}
8746
8747static __inline__ __m512i __DEFAULT_FN_ATTRS
8748_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8749{
8750 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8751 (__v8di) __W,
8752 (__mmask8) __U);
8753}
8754
8755static __inline__ __m512i __DEFAULT_FN_ATTRS
8756_mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A)
8757{
8758 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
Craig Topper5cbeeed2018-07-07 17:03:32 +00008759 (__v8di) _mm512_setzero_si512 (),
Michael Zuckerman6a0e0872016-05-02 08:36:41 +00008760 (__mmask8) __U);
8761}
8762
8763static __inline__ __m512d __DEFAULT_FN_ATTRS
8764_mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
8765{
8766 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8767 (__v8df) __W,
8768 (__mmask8) __U);
8769}
8770
8771static __inline__ __m512d __DEFAULT_FN_ATTRS
8772_mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
8773{
8774 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8775 (__v8df) _mm512_setzero_pd(),
8776 (__mmask8) __U);
8777}
8778
8779static __inline__ __m512i __DEFAULT_FN_ATTRS
8780_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
8781{
8782 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8783 (__v8di) __W,
8784 (__mmask8) __U);
8785}
8786
8787static __inline__ __m512i __DEFAULT_FN_ATTRS
8788_mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
8789{
8790 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
Craig Topper5cbeeed2018-07-07 17:03:32 +00008791 (__v8di) _mm512_setzero_si512(),
Michael Zuckerman6a0e0872016-05-02 08:36:41 +00008792 (__mmask8) __U);
8793}
8794
8795static __inline__ __m512 __DEFAULT_FN_ATTRS
8796_mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
8797{
8798 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8799 (__v16sf) __W,
8800 (__mmask16) __U);
8801}
8802
8803static __inline__ __m512 __DEFAULT_FN_ATTRS
8804_mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
8805{
8806 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8807 (__v16sf) _mm512_setzero_ps(),
8808 (__mmask16) __U);
8809}
8810
8811static __inline__ __m512i __DEFAULT_FN_ATTRS
8812_mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
8813{
8814 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8815 (__v16si) __W,
8816 (__mmask16) __U);
8817}
8818
8819static __inline__ __m512i __DEFAULT_FN_ATTRS
8820_mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
8821{
8822 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
Craig Topper5cbeeed2018-07-07 17:03:32 +00008823 (__v16si) _mm512_setzero_si512(),
Michael Zuckerman6a0e0872016-05-02 08:36:41 +00008824 (__mmask16) __U);
8825}
8826
8827static __inline__ __m512 __DEFAULT_FN_ATTRS
8828_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
8829{
8830 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8831 (__v16sf) __W,
8832 (__mmask16) __U);
8833}
8834
8835static __inline__ __m512 __DEFAULT_FN_ATTRS
8836_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
8837{
8838 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8839 (__v16sf) _mm512_setzero_ps(),
8840 (__mmask16) __U);
8841}
8842
8843static __inline__ __m512i __DEFAULT_FN_ATTRS
8844_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8845{
8846 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8847 (__v16si) __W,
8848 (__mmask16) __U);
8849}
8850
8851static __inline__ __m512i __DEFAULT_FN_ATTRS
8852_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
8853{
8854 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
Craig Topper5cbeeed2018-07-07 17:03:32 +00008855 (__v16si) _mm512_setzero_si512(),
Michael Zuckerman6a0e0872016-05-02 08:36:41 +00008856 (__mmask16) __U);
8857}
8858
Craig Topperc6338672018-05-31 00:51:20 +00008859#define _mm512_cvt_roundps_pd(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00008860 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8861 (__v8df)_mm512_undefined_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00008862 (__mmask8)-1, (int)(R))
Michael Zuckermand6e68ce2016-05-02 09:42:31 +00008863
Craig Topperc6338672018-05-31 00:51:20 +00008864#define _mm512_mask_cvt_roundps_pd(W, U, A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00008865 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8866 (__v8df)(__m512d)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00008867 (__mmask8)(U), (int)(R))
Michael Zuckermand6e68ce2016-05-02 09:42:31 +00008868
Craig Topperc6338672018-05-31 00:51:20 +00008869#define _mm512_maskz_cvt_roundps_pd(U, A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00008870 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8871 (__v8df)_mm512_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00008872 (__mmask8)(U), (int)(R))
Michael Zuckermand6e68ce2016-05-02 09:42:31 +00008873
8874static __inline__ __m512d __DEFAULT_FN_ATTRS
8875_mm512_cvtps_pd (__m256 __A)
8876{
Craig Topperdaaf1052018-05-14 04:05:06 +00008877 return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
Michael Zuckermand6e68ce2016-05-02 09:42:31 +00008878}
8879
8880static __inline__ __m512d __DEFAULT_FN_ATTRS
8881_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
8882{
Craig Topper8cb261e2018-05-14 04:57:46 +00008883 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8884 (__v8df)_mm512_cvtps_pd(__A),
8885 (__v8df)__W);
Michael Zuckermand6e68ce2016-05-02 09:42:31 +00008886}
8887
8888static __inline__ __m512d __DEFAULT_FN_ATTRS
8889_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
8890{
Craig Topper8cb261e2018-05-14 04:57:46 +00008891 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8892 (__v8df)_mm512_cvtps_pd(__A),
8893 (__v8df)_mm512_setzero_pd());
Michael Zuckermand6e68ce2016-05-02 09:42:31 +00008894}
8895
Craig Topper5cbeeed2018-07-07 17:03:32 +00008896static __inline__ __m512d __DEFAULT_FN_ATTRS
Ayman Musa17a28192016-09-27 15:37:31 +00008897_mm512_cvtpslo_pd (__m512 __A)
Ayman Musa2e250e82016-09-27 14:06:32 +00008898{
Craig Topper5cbeeed2018-07-07 17:03:32 +00008899 return (__m512d) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
Ayman Musa2e250e82016-09-27 14:06:32 +00008900}
8901
Craig Topper5cbeeed2018-07-07 17:03:32 +00008902static __inline__ __m512d __DEFAULT_FN_ATTRS
Ayman Musa17a28192016-09-27 15:37:31 +00008903_mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A)
Ayman Musa2e250e82016-09-27 14:06:32 +00008904{
Craig Topper5cbeeed2018-07-07 17:03:32 +00008905 return (__m512d) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
Ayman Musa2e250e82016-09-27 14:06:32 +00008906}
8907
Michael Zuckerman5f0e96e2016-05-02 14:02:01 +00008908static __inline__ __m512d __DEFAULT_FN_ATTRS
8909_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
8910{
Igor Bregeraadb8762016-06-08 13:59:20 +00008911 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
8912 (__v8df) __A,
8913 (__v8df) __W);
Michael Zuckerman5f0e96e2016-05-02 14:02:01 +00008914}
8915
8916static __inline__ __m512d __DEFAULT_FN_ATTRS
8917_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
8918{
Igor Bregeraadb8762016-06-08 13:59:20 +00008919 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
8920 (__v8df) __A,
8921 (__v8df) _mm512_setzero_pd ());
Michael Zuckerman5f0e96e2016-05-02 14:02:01 +00008922}
8923
8924static __inline__ __m512 __DEFAULT_FN_ATTRS
8925_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
8926{
Igor Bregeraadb8762016-06-08 13:59:20 +00008927 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
8928 (__v16sf) __A,
8929 (__v16sf) __W);
Michael Zuckerman5f0e96e2016-05-02 14:02:01 +00008930}
8931
8932static __inline__ __m512 __DEFAULT_FN_ATTRS
8933_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
8934{
Igor Bregeraadb8762016-06-08 13:59:20 +00008935 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
8936 (__v16sf) __A,
8937 (__v16sf) _mm512_setzero_ps ());
Michael Zuckerman5f0e96e2016-05-02 14:02:01 +00008938}
8939
Michael Zuckerman708e7592016-05-03 10:42:46 +00008940static __inline__ void __DEFAULT_FN_ATTRS
8941_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
8942{
8943 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
8944 (__mmask8) __U);
8945}
8946
8947static __inline__ void __DEFAULT_FN_ATTRS
8948_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
8949{
8950 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
8951 (__mmask8) __U);
8952}
8953
8954static __inline__ void __DEFAULT_FN_ATTRS
8955_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
8956{
8957 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
8958 (__mmask16) __U);
8959}
8960
8961static __inline__ void __DEFAULT_FN_ATTRS
8962_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
8963{
8964 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
8965 (__mmask16) __U);
8966}
Michael Zuckerman5f0e96e2016-05-02 14:02:01 +00008967
Craig Topperc6338672018-05-31 00:51:20 +00008968#define _mm_cvt_roundsd_ss(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00008969 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8970 (__v2df)(__m128d)(B), \
8971 (__v4sf)_mm_undefined_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00008972 (__mmask8)-1, (int)(R))
Michael Zuckermane6f73892016-05-04 08:55:11 +00008973
Craig Topperc6338672018-05-31 00:51:20 +00008974#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00008975 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8976 (__v2df)(__m128d)(B), \
8977 (__v4sf)(__m128)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00008978 (__mmask8)(U), (int)(R))
Michael Zuckermane6f73892016-05-04 08:55:11 +00008979
Craig Topperc6338672018-05-31 00:51:20 +00008980#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00008981 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8982 (__v2df)(__m128d)(B), \
8983 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00008984 (__mmask8)(U), (int)(R))
Michael Zuckermane6f73892016-05-04 08:55:11 +00008985
Asaf Badouh89f65762016-06-02 08:11:35 +00008986static __inline__ __m128 __DEFAULT_FN_ATTRS
Asaf Badouha0b6f8f2016-07-14 08:40:30 +00008987_mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
Asaf Badouh89f65762016-06-02 08:11:35 +00008988{
Craig Toppercbf39292018-05-31 01:24:40 +00008989 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
8990 (__v2df)__B,
8991 (__v4sf)__W,
8992 (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
Asaf Badouh89f65762016-06-02 08:11:35 +00008993}
8994
8995static __inline__ __m128 __DEFAULT_FN_ATTRS
Asaf Badouha0b6f8f2016-07-14 08:40:30 +00008996_mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
Asaf Badouh89f65762016-06-02 08:11:35 +00008997{
Craig Toppercbf39292018-05-31 01:24:40 +00008998 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
8999 (__v2df)__B,
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009000 (__v4sf)_mm_setzero_ps(),
Craig Toppercbf39292018-05-31 01:24:40 +00009001 (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
Asaf Badouh89f65762016-06-02 08:11:35 +00009002}
9003
9004#define _mm_cvtss_i32 _mm_cvtss_si32
Asaf Badouh89f65762016-06-02 08:11:35 +00009005#define _mm_cvtsd_i32 _mm_cvtsd_si32
Asaf Badouh89f65762016-06-02 08:11:35 +00009006#define _mm_cvti32_sd _mm_cvtsi32_sd
Asaf Badouh89f65762016-06-02 08:11:35 +00009007#define _mm_cvti32_ss _mm_cvtsi32_ss
Craig Topper45db56c2016-07-21 07:38:39 +00009008#ifdef __x86_64__
9009#define _mm_cvtss_i64 _mm_cvtss_si64
9010#define _mm_cvtsd_i64 _mm_cvtsd_si64
9011#define _mm_cvti64_sd _mm_cvtsi64_sd
Asaf Badouh89f65762016-06-02 08:11:35 +00009012#define _mm_cvti64_ss _mm_cvtsi64_ss
Craig Topper45db56c2016-07-21 07:38:39 +00009013#endif
Asaf Badouh89f65762016-06-02 08:11:35 +00009014
Craig Topper45db56c2016-07-21 07:38:39 +00009015#ifdef __x86_64__
Craig Topperc6338672018-05-31 00:51:20 +00009016#define _mm_cvt_roundi64_sd(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00009017 (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
Craig Topperc6338672018-05-31 00:51:20 +00009018 (int)(R))
Michael Zuckermane6f73892016-05-04 08:55:11 +00009019
Craig Topperc6338672018-05-31 00:51:20 +00009020#define _mm_cvt_roundsi64_sd(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00009021 (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
Craig Topperc6338672018-05-31 00:51:20 +00009022 (int)(R))
Craig Topper45db56c2016-07-21 07:38:39 +00009023#endif
Michael Zuckermane6f73892016-05-04 08:55:11 +00009024
Craig Topperc6338672018-05-31 00:51:20 +00009025#define _mm_cvt_roundsi32_ss(A, B, R) \
9026 (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R))
Michael Zuckermane6f73892016-05-04 08:55:11 +00009027
Craig Topperc6338672018-05-31 00:51:20 +00009028#define _mm_cvt_roundi32_ss(A, B, R) \
9029 (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R))
Michael Zuckermane6f73892016-05-04 08:55:11 +00009030
Craig Topper45db56c2016-07-21 07:38:39 +00009031#ifdef __x86_64__
Craig Topperc6338672018-05-31 00:51:20 +00009032#define _mm_cvt_roundsi64_ss(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00009033 (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
Craig Topperc6338672018-05-31 00:51:20 +00009034 (int)(R))
Michael Zuckermane6f73892016-05-04 08:55:11 +00009035
Craig Topperc6338672018-05-31 00:51:20 +00009036#define _mm_cvt_roundi64_ss(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00009037 (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
Craig Topperc6338672018-05-31 00:51:20 +00009038 (int)(R))
Craig Topper45db56c2016-07-21 07:38:39 +00009039#endif
Michael Zuckermane6f73892016-05-04 08:55:11 +00009040
Craig Topperc6338672018-05-31 00:51:20 +00009041#define _mm_cvt_roundss_sd(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00009042 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9043 (__v4sf)(__m128)(B), \
9044 (__v2df)_mm_undefined_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00009045 (__mmask8)-1, (int)(R))
Michael Zuckermane6f73892016-05-04 08:55:11 +00009046
Craig Topperc6338672018-05-31 00:51:20 +00009047#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00009048 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9049 (__v4sf)(__m128)(B), \
9050 (__v2df)(__m128d)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00009051 (__mmask8)(U), (int)(R))
Michael Zuckermane6f73892016-05-04 08:55:11 +00009052
Craig Topperc6338672018-05-31 00:51:20 +00009053#define _mm_maskz_cvt_roundss_sd(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00009054 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9055 (__v4sf)(__m128)(B), \
9056 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00009057 (__mmask8)(U), (int)(R))
Michael Zuckermane6f73892016-05-04 08:55:11 +00009058
9059static __inline__ __m128d __DEFAULT_FN_ATTRS
Asaf Badouha0b6f8f2016-07-14 08:40:30 +00009060_mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
Asaf Badouh89f65762016-06-02 08:11:35 +00009061{
Craig Toppercbf39292018-05-31 01:24:40 +00009062 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9063 (__v4sf)__B,
9064 (__v2df)__W,
9065 (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
Asaf Badouh89f65762016-06-02 08:11:35 +00009066}
9067
9068static __inline__ __m128d __DEFAULT_FN_ATTRS
Asaf Badouha0b6f8f2016-07-14 08:40:30 +00009069_mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
Asaf Badouh89f65762016-06-02 08:11:35 +00009070{
Craig Toppercbf39292018-05-31 01:24:40 +00009071 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9072 (__v4sf)__B,
9073 (__v2df)_mm_setzero_pd(),
9074 (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
Asaf Badouh89f65762016-06-02 08:11:35 +00009075}
9076
9077static __inline__ __m128d __DEFAULT_FN_ATTRS
Michael Zuckermane6f73892016-05-04 08:55:11 +00009078_mm_cvtu32_sd (__m128d __A, unsigned __B)
9079{
Craig Topper6fa91252018-05-13 23:03:30 +00009080 __A[0] = __B;
9081 return __A;
Michael Zuckermane6f73892016-05-04 08:55:11 +00009082}
9083
Craig Topper45db56c2016-07-21 07:38:39 +00009084#ifdef __x86_64__
Craig Topperc6338672018-05-31 00:51:20 +00009085#define _mm_cvt_roundu64_sd(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00009086 (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
Craig Topperc6338672018-05-31 00:51:20 +00009087 (unsigned long long)(B), (int)(R))
Michael Zuckermane6f73892016-05-04 08:55:11 +00009088
9089static __inline__ __m128d __DEFAULT_FN_ATTRS
9090_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
9091{
Craig Topper6fa91252018-05-13 23:03:30 +00009092 __A[0] = __B;
9093 return __A;
Michael Zuckermane6f73892016-05-04 08:55:11 +00009094}
Craig Topper45db56c2016-07-21 07:38:39 +00009095#endif
Michael Zuckermane6f73892016-05-04 08:55:11 +00009096
Craig Topperc6338672018-05-31 00:51:20 +00009097#define _mm_cvt_roundu32_ss(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00009098 (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
Craig Topperc6338672018-05-31 00:51:20 +00009099 (int)(R))
Michael Zuckermane6f73892016-05-04 08:55:11 +00009100
9101static __inline__ __m128 __DEFAULT_FN_ATTRS
9102_mm_cvtu32_ss (__m128 __A, unsigned __B)
9103{
Craig Topper6fa91252018-05-13 23:03:30 +00009104 __A[0] = __B;
9105 return __A;
Michael Zuckermane6f73892016-05-04 08:55:11 +00009106}
9107
Craig Topper45db56c2016-07-21 07:38:39 +00009108#ifdef __x86_64__
Craig Topperc6338672018-05-31 00:51:20 +00009109#define _mm_cvt_roundu64_ss(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00009110 (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
Craig Topperc6338672018-05-31 00:51:20 +00009111 (unsigned long long)(B), (int)(R))
Michael Zuckermane6f73892016-05-04 08:55:11 +00009112
9113static __inline__ __m128 __DEFAULT_FN_ATTRS
9114_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
9115{
Craig Topper6fa91252018-05-13 23:03:30 +00009116 __A[0] = __B;
9117 return __A;
Michael Zuckermane6f73892016-05-04 08:55:11 +00009118}
Craig Topper45db56c2016-07-21 07:38:39 +00009119#endif
Michael Zuckermane6f73892016-05-04 08:55:11 +00009120
Michael Zuckerman13d3c002016-05-11 11:41:29 +00009121static __inline__ __m512i __DEFAULT_FN_ATTRS
9122_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
9123{
Jina Nahias3ad702a2017-09-19 11:00:27 +00009124 return (__m512i) __builtin_ia32_selectd_512(__M,
9125 (__v16si) _mm512_set1_epi32(__A),
9126 (__v16si) __O);
Michael Zuckerman13d3c002016-05-11 11:41:29 +00009127}
9128
9129static __inline__ __m512i __DEFAULT_FN_ATTRS
9130_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
9131{
Jina Nahias3ad702a2017-09-19 11:00:27 +00009132 return (__m512i) __builtin_ia32_selectq_512(__M,
9133 (__v8di) _mm512_set1_epi64(__A),
9134 (__v8di) __O);
Michael Zuckerman13d3c002016-05-11 11:41:29 +00009135}
9136
Igor Bregerf050b792017-03-19 08:27:16 +00009137static __inline __m512i __DEFAULT_FN_ATTRS
9138_mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59,
9139 char __e58, char __e57, char __e56, char __e55, char __e54, char __e53,
9140 char __e52, char __e51, char __e50, char __e49, char __e48, char __e47,
9141 char __e46, char __e45, char __e44, char __e43, char __e42, char __e41,
9142 char __e40, char __e39, char __e38, char __e37, char __e36, char __e35,
9143 char __e34, char __e33, char __e32, char __e31, char __e30, char __e29,
9144 char __e28, char __e27, char __e26, char __e25, char __e24, char __e23,
9145 char __e22, char __e21, char __e20, char __e19, char __e18, char __e17,
9146 char __e16, char __e15, char __e14, char __e13, char __e12, char __e11,
9147 char __e10, char __e9, char __e8, char __e7, char __e6, char __e5,
9148 char __e4, char __e3, char __e2, char __e1, char __e0) {
9149
9150 return __extension__ (__m512i)(__v64qi)
9151 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9152 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9153 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9154 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
9155 __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
9156 __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
9157 __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
9158 __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
9159}
9160
9161static __inline __m512i __DEFAULT_FN_ATTRS
9162_mm512_set_epi16(short __e31, short __e30, short __e29, short __e28,
9163 short __e27, short __e26, short __e25, short __e24, short __e23,
9164 short __e22, short __e21, short __e20, short __e19, short __e18,
9165 short __e17, short __e16, short __e15, short __e14, short __e13,
9166 short __e12, short __e11, short __e10, short __e9, short __e8,
9167 short __e7, short __e6, short __e5, short __e4, short __e3,
9168 short __e2, short __e1, short __e0) {
9169 return __extension__ (__m512i)(__v32hi)
9170 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9171 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9172 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9173 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
9174}
9175
Michael Zuckerman178113e2016-05-19 12:07:49 +00009176static __inline __m512i __DEFAULT_FN_ATTRS
9177_mm512_set_epi32 (int __A, int __B, int __C, int __D,
9178 int __E, int __F, int __G, int __H,
9179 int __I, int __J, int __K, int __L,
9180 int __M, int __N, int __O, int __P)
9181{
9182 return __extension__ (__m512i)(__v16si)
9183 { __P, __O, __N, __M, __L, __K, __J, __I,
9184 __H, __G, __F, __E, __D, __C, __B, __A };
9185}
9186
9187#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
9188 e8,e9,e10,e11,e12,e13,e14,e15) \
Craig Topper60589492016-06-08 06:08:04 +00009189 _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
9190 (e5),(e4),(e3),(e2),(e1),(e0))
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00009191
Michael Zuckerman13d3c002016-05-11 11:41:29 +00009192static __inline__ __m512i __DEFAULT_FN_ATTRS
9193_mm512_set_epi64 (long long __A, long long __B, long long __C,
9194 long long __D, long long __E, long long __F,
9195 long long __G, long long __H)
9196{
9197 return __extension__ (__m512i) (__v8di)
9198 { __H, __G, __F, __E, __D, __C, __B, __A };
9199}
9200
Michael Zuckerman178113e2016-05-19 12:07:49 +00009201#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
Craig Topper60589492016-06-08 06:08:04 +00009202 _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
Michael Zuckerman178113e2016-05-19 12:07:49 +00009203
Michael Zuckerman13d3c002016-05-11 11:41:29 +00009204static __inline__ __m512d __DEFAULT_FN_ATTRS
9205_mm512_set_pd (double __A, double __B, double __C, double __D,
9206 double __E, double __F, double __G, double __H)
9207{
9208 return __extension__ (__m512d)
9209 { __H, __G, __F, __E, __D, __C, __B, __A };
9210}
9211
Michael Zuckerman178113e2016-05-19 12:07:49 +00009212#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
Craig Topper60589492016-06-08 06:08:04 +00009213 _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
Michael Zuckerman178113e2016-05-19 12:07:49 +00009214
Michael Zuckerman13d3c002016-05-11 11:41:29 +00009215static __inline__ __m512 __DEFAULT_FN_ATTRS
9216_mm512_set_ps (float __A, float __B, float __C, float __D,
9217 float __E, float __F, float __G, float __H,
9218 float __I, float __J, float __K, float __L,
9219 float __M, float __N, float __O, float __P)
9220{
9221 return __extension__ (__m512)
9222 { __P, __O, __N, __M, __L, __K, __J, __I,
9223 __H, __G, __F, __E, __D, __C, __B, __A };
9224}
9225
Michael Zuckerman178113e2016-05-19 12:07:49 +00009226#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
Craig Topper60589492016-06-08 06:08:04 +00009227 _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
9228 (e4),(e3),(e2),(e1),(e0))
Michael Zuckerman178113e2016-05-19 12:07:49 +00009229
Asaf Badouh13633282016-07-05 12:24:14 +00009230static __inline__ __m512 __DEFAULT_FN_ATTRS
Asaf Badouh356bb762016-08-21 07:56:47 +00009231_mm512_abs_ps(__m512 __A)
Asaf Badouh13633282016-07-05 12:24:14 +00009232{
Asaf Badouh356bb762016-08-21 07:56:47 +00009233 return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
Asaf Badouh13633282016-07-05 12:24:14 +00009234}
9235
9236static __inline__ __m512 __DEFAULT_FN_ATTRS
Asaf Badouh356bb762016-08-21 07:56:47 +00009237_mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
Asaf Badouh13633282016-07-05 12:24:14 +00009238{
Asaf Badouh356bb762016-08-21 07:56:47 +00009239 return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
Asaf Badouh13633282016-07-05 12:24:14 +00009240}
9241
9242static __inline__ __m512d __DEFAULT_FN_ATTRS
Asaf Badouh356bb762016-08-21 07:56:47 +00009243_mm512_abs_pd(__m512d __A)
Asaf Badouh13633282016-07-05 12:24:14 +00009244{
Asaf Badouh356bb762016-08-21 07:56:47 +00009245 return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
Asaf Badouh13633282016-07-05 12:24:14 +00009246}
9247
9248static __inline__ __m512d __DEFAULT_FN_ATTRS
Asaf Badouh356bb762016-08-21 07:56:47 +00009249_mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
Asaf Badouh13633282016-07-05 12:24:14 +00009250{
Asaf Badouh356bb762016-08-21 07:56:47 +00009251 return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
Asaf Badouh13633282016-07-05 12:24:14 +00009252}
9253
Craig Topper73d1d402018-05-30 22:33:21 +00009254/* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
9255 * outputs. This class of vector operation forms the basis of many scientific
9256 * computations. In vector-reduction arithmetic, the evaluation off is
9257 * independent of the order of the input elements of V.
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009258
Craig Topper73d1d402018-05-30 22:33:21 +00009259 * Used bisection method. At each step, we partition the vector with previous
9260 * step in half, and the operation is performed on its two halves.
9261 * This takes log2(n) steps where n is the number of elements in the vector.
Craig Topper73d1d402018-05-30 22:33:21 +00009262 */
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009263
Craig Topperddfe69c2018-06-21 16:41:28 +00009264#define _mm512_mask_reduce_operator(op) \
9265 __v4du __t1 = (__v4du)_mm512_extracti64x4_epi64(__W, 0); \
9266 __v4du __t2 = (__v4du)_mm512_extracti64x4_epi64(__W, 1); \
9267 __m256i __t3 = (__m256i)(__t1 op __t2); \
9268 __v2du __t4 = (__v2du)_mm256_extracti128_si256(__t3, 0); \
9269 __v2du __t5 = (__v2du)_mm256_extracti128_si256(__t3, 1); \
9270 __v2du __t6 = __t4 op __t5; \
9271 __v2du __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
9272 __v2du __t8 = __t6 op __t7; \
9273 return __t8[0];
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009274
9275static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_add_epi64(__m512i __W) {
Craig Topperddfe69c2018-06-21 16:41:28 +00009276 _mm512_mask_reduce_operator(+);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009277}
9278
9279static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_mul_epi64(__m512i __W) {
Craig Topperddfe69c2018-06-21 16:41:28 +00009280 _mm512_mask_reduce_operator(*);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009281}
9282
9283static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_and_epi64(__m512i __W) {
Craig Topperddfe69c2018-06-21 16:41:28 +00009284 _mm512_mask_reduce_operator(&);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009285}
9286
9287static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_or_epi64(__m512i __W) {
Craig Topperddfe69c2018-06-21 16:41:28 +00009288 _mm512_mask_reduce_operator(|);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009289}
9290
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009291static __inline__ long long __DEFAULT_FN_ATTRS
9292_mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) {
Craig Topperddfe69c2018-06-21 16:41:28 +00009293 __W = _mm512_maskz_mov_epi64(__M, __W);
9294 _mm512_mask_reduce_operator(+);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009295}
9296
9297static __inline__ long long __DEFAULT_FN_ATTRS
9298_mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) {
Craig Topperddfe69c2018-06-21 16:41:28 +00009299 __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(1), __M, __W);
9300 _mm512_mask_reduce_operator(*);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009301}
9302
9303static __inline__ long long __DEFAULT_FN_ATTRS
9304_mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) {
Craig Topperddfe69c2018-06-21 16:41:28 +00009305 __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(~0ULL), __M, __W);
9306 _mm512_mask_reduce_operator(&);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009307}
9308
9309static __inline__ long long __DEFAULT_FN_ATTRS
9310_mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) {
Craig Topperddfe69c2018-06-21 16:41:28 +00009311 __W = _mm512_maskz_mov_epi64(__M, __W);
9312 _mm512_mask_reduce_operator(|);
9313}
9314#undef _mm512_mask_reduce_operator
9315
9316#define _mm512_mask_reduce_operator(op) \
9317 __m256d __t1 = _mm512_extractf64x4_pd(__W, 0); \
9318 __m256d __t2 = _mm512_extractf64x4_pd(__W, 1); \
9319 __m256d __t3 = __t1 op __t2; \
9320 __m128d __t4 = _mm256_extractf128_pd(__t3, 0); \
9321 __m128d __t5 = _mm256_extractf128_pd(__t3, 1); \
9322 __m128d __t6 = __t4 op __t5; \
9323 __m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
9324 __m128d __t8 = __t6 op __t7; \
9325 return __t8[0];
9326
9327static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_add_pd(__m512d __W) {
9328 _mm512_mask_reduce_operator(+);
9329}
9330
9331static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_mul_pd(__m512d __W) {
9332 _mm512_mask_reduce_operator(*);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009333}
9334
9335static __inline__ double __DEFAULT_FN_ATTRS
9336_mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) {
Craig Topperddfe69c2018-06-21 16:41:28 +00009337 __W = _mm512_maskz_mov_pd(__M, __W);
9338 _mm512_mask_reduce_operator(+);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009339}
9340
9341static __inline__ double __DEFAULT_FN_ATTRS
9342_mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) {
Craig Topperddfe69c2018-06-21 16:41:28 +00009343 __W = _mm512_mask_mov_pd(_mm512_set1_pd(1.0), __M, __W);
9344 _mm512_mask_reduce_operator(*);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009345}
Craig Topperddfe69c2018-06-21 16:41:28 +00009346#undef _mm512_mask_reduce_operator
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009347
Craig Topperddfe69c2018-06-21 16:41:28 +00009348#define _mm512_mask_reduce_operator(op) \
9349 __v8su __t1 = (__v8su)_mm512_extracti64x4_epi64(__W, 0); \
9350 __v8su __t2 = (__v8su)_mm512_extracti64x4_epi64(__W, 1); \
9351 __m256i __t3 = (__m256i)(__t1 op __t2); \
9352 __v4su __t4 = (__v4su)_mm256_extracti128_si256(__t3, 0); \
9353 __v4su __t5 = (__v4su)_mm256_extracti128_si256(__t3, 1); \
9354 __v4su __t6 = __t4 op __t5; \
9355 __v4su __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \
9356 __v4su __t8 = __t6 op __t7; \
9357 __v4su __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \
9358 __v4su __t10 = __t8 op __t9; \
9359 return __t10[0];
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009360
9361static __inline__ int __DEFAULT_FN_ATTRS
9362_mm512_reduce_add_epi32(__m512i __W) {
Craig Topperddfe69c2018-06-21 16:41:28 +00009363 _mm512_mask_reduce_operator(+);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009364}
9365
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009366static __inline__ int __DEFAULT_FN_ATTRS
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009367_mm512_reduce_mul_epi32(__m512i __W) {
Craig Topperddfe69c2018-06-21 16:41:28 +00009368 _mm512_mask_reduce_operator(*);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009369}
9370
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009371static __inline__ int __DEFAULT_FN_ATTRS
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009372_mm512_reduce_and_epi32(__m512i __W) {
Craig Topperddfe69c2018-06-21 16:41:28 +00009373 _mm512_mask_reduce_operator(&);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009374}
9375
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009376static __inline__ int __DEFAULT_FN_ATTRS
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009377_mm512_reduce_or_epi32(__m512i __W) {
Craig Topperddfe69c2018-06-21 16:41:28 +00009378 _mm512_mask_reduce_operator(|);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009379}
9380
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009381static __inline__ int __DEFAULT_FN_ATTRS
9382_mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) {
Craig Topperddfe69c2018-06-21 16:41:28 +00009383 __W = _mm512_maskz_mov_epi32(__M, __W);
9384 _mm512_mask_reduce_operator(+);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009385}
9386
9387static __inline__ int __DEFAULT_FN_ATTRS
9388_mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) {
Craig Topperddfe69c2018-06-21 16:41:28 +00009389 __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(1), __M, __W);
9390 _mm512_mask_reduce_operator(*);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009391}
9392
9393static __inline__ int __DEFAULT_FN_ATTRS
9394_mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) {
Craig Topperddfe69c2018-06-21 16:41:28 +00009395 __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M, __W);
9396 _mm512_mask_reduce_operator(&);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009397}
9398
9399static __inline__ int __DEFAULT_FN_ATTRS
9400_mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) {
Craig Topperddfe69c2018-06-21 16:41:28 +00009401 __W = _mm512_maskz_mov_epi32(__M, __W);
9402 _mm512_mask_reduce_operator(|);
9403}
9404#undef _mm512_mask_reduce_operator
9405
9406#define _mm512_mask_reduce_operator(op) \
9407 __m256 __t1 = (__m256)_mm512_extractf64x4_pd((__m512d)__W, 0); \
9408 __m256 __t2 = (__m256)_mm512_extractf64x4_pd((__m512d)__W, 1); \
9409 __m256 __t3 = __t1 op __t2; \
9410 __m128 __t4 = _mm256_extractf128_ps(__t3, 0); \
9411 __m128 __t5 = _mm256_extractf128_ps(__t3, 1); \
9412 __m128 __t6 = __t4 op __t5; \
9413 __m128 __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \
9414 __m128 __t8 = __t6 op __t7; \
9415 __m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \
9416 __m128 __t10 = __t8 op __t9; \
9417 return __t10[0];
9418
9419static __inline__ float __DEFAULT_FN_ATTRS
9420_mm512_reduce_add_ps(__m512 __W) {
9421 _mm512_mask_reduce_operator(+);
9422}
9423
9424static __inline__ float __DEFAULT_FN_ATTRS
9425_mm512_reduce_mul_ps(__m512 __W) {
9426 _mm512_mask_reduce_operator(*);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009427}
9428
9429static __inline__ float __DEFAULT_FN_ATTRS
9430_mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) {
Craig Topperddfe69c2018-06-21 16:41:28 +00009431 __W = _mm512_maskz_mov_ps(__M, __W);
9432 _mm512_mask_reduce_operator(+);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009433}
9434
9435static __inline__ float __DEFAULT_FN_ATTRS
9436_mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
Craig Topperddfe69c2018-06-21 16:41:28 +00009437 __W = _mm512_mask_mov_ps(_mm512_set1_ps(1.0f), __M, __W);
9438 _mm512_mask_reduce_operator(*);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009439}
Craig Topperddfe69c2018-06-21 16:41:28 +00009440#undef _mm512_mask_reduce_operator
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009441
Craig Topper61495b32018-06-19 21:00:30 +00009442#define _mm512_mask_reduce_operator(op) \
9443 __m512i __t1 = (__m512i)__builtin_shufflevector((__v8di)__V, (__v8di)__V, 4, 5, 6, 7, 0, 1, 2, 3); \
9444 __m512i __t2 = _mm512_##op(__V, __t1); \
9445 __m512i __t3 = (__m512i)__builtin_shufflevector((__v8di)__t2, (__v8di)__t2, 2, 3, 0, 1, 6, 7, 4, 5); \
9446 __m512i __t4 = _mm512_##op(__t2, __t3); \
9447 __m512i __t5 = (__m512i)__builtin_shufflevector((__v8di)__t4, (__v8di)__t4, 1, 0, 3, 2, 5, 4, 7, 6); \
9448 __v8di __t6 = (__v8di)_mm512_##op(__t4, __t5); \
9449 return __t6[0];
Michael Zuckerman25eb4202016-10-29 10:29:20 +00009450
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009451static __inline__ long long __DEFAULT_FN_ATTRS
Michael Zuckerman25eb4202016-10-29 10:29:20 +00009452_mm512_reduce_max_epi64(__m512i __V) {
Craig Topper61495b32018-06-19 21:00:30 +00009453 _mm512_mask_reduce_operator(max_epi64);
Michael Zuckerman25eb4202016-10-29 10:29:20 +00009454}
9455
9456static __inline__ unsigned long long __DEFAULT_FN_ATTRS
9457_mm512_reduce_max_epu64(__m512i __V) {
Craig Topper61495b32018-06-19 21:00:30 +00009458 _mm512_mask_reduce_operator(max_epu64);
Michael Zuckerman25eb4202016-10-29 10:29:20 +00009459}
9460
Craig Topper61495b32018-06-19 21:00:30 +00009461static __inline__ long long __DEFAULT_FN_ATTRS
9462_mm512_reduce_min_epi64(__m512i __V) {
9463 _mm512_mask_reduce_operator(min_epi64);
Michael Zuckerman25eb4202016-10-29 10:29:20 +00009464}
9465
9466static __inline__ unsigned long long __DEFAULT_FN_ATTRS
9467_mm512_reduce_min_epu64(__m512i __V) {
Craig Topper61495b32018-06-19 21:00:30 +00009468 _mm512_mask_reduce_operator(min_epu64);
Craig Topper873afd02018-06-19 19:13:54 +00009469}
9470
Craig Topper79b13a02018-06-19 19:37:07 +00009471static __inline__ long long __DEFAULT_FN_ATTRS
9472_mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) {
Craig Topper61495b32018-06-19 21:00:30 +00009473 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-__LONG_LONG_MAX__ - 1LL), __M, __V);
9474 _mm512_mask_reduce_operator(max_epi64);
Craig Topper79b13a02018-06-19 19:37:07 +00009475}
9476
9477static __inline__ unsigned long long __DEFAULT_FN_ATTRS
9478_mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) {
Craig Topper61495b32018-06-19 21:00:30 +00009479 __V = _mm512_maskz_mov_epi64(__M, __V);
9480 _mm512_mask_reduce_operator(max_epu64);
Craig Topper79b13a02018-06-19 19:37:07 +00009481}
9482
9483static __inline__ long long __DEFAULT_FN_ATTRS
9484_mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) {
Craig Topper61495b32018-06-19 21:00:30 +00009485 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(__LONG_LONG_MAX__), __M, __V);
9486 _mm512_mask_reduce_operator(min_epi64);
Craig Topper79b13a02018-06-19 19:37:07 +00009487}
9488
9489static __inline__ unsigned long long __DEFAULT_FN_ATTRS
9490_mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) {
Craig Topper61495b32018-06-19 21:00:30 +00009491 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(~0ULL), __M, __V);
9492 _mm512_mask_reduce_operator(min_epu64);
Craig Topper873afd02018-06-19 19:13:54 +00009493}
Craig Topper61495b32018-06-19 21:00:30 +00009494#undef _mm512_mask_reduce_operator
Craig Topper873afd02018-06-19 19:13:54 +00009495
Craig Topper61495b32018-06-19 21:00:30 +00009496#define _mm512_mask_reduce_operator(op) \
9497 __m256i __t1 = _mm512_extracti64x4_epi64(__V, 0); \
9498 __m256i __t2 = _mm512_extracti64x4_epi64(__V, 1); \
9499 __m256i __t3 = _mm256_##op(__t1, __t2); \
9500 __m128i __t4 = _mm256_extracti128_si256(__t3, 0); \
9501 __m128i __t5 = _mm256_extracti128_si256(__t3, 1); \
9502 __m128i __t6 = _mm_##op(__t4, __t5); \
9503 __m128i __t7 = (__m128i)__builtin_shufflevector((__v4si)__t6, (__v4si)__t6, 2, 3, 0, 1); \
9504 __m128i __t8 = _mm_##op(__t6, __t7); \
9505 __m128i __t9 = (__m128i)__builtin_shufflevector((__v4si)__t8, (__v4si)__t8, 1, 0, 3, 2); \
9506 __v4si __t10 = (__v4si)_mm_##op(__t8, __t9); \
9507 return __t10[0];
Craig Topper873afd02018-06-19 19:13:54 +00009508
Craig Topper61495b32018-06-19 21:00:30 +00009509static __inline__ int __DEFAULT_FN_ATTRS
9510_mm512_reduce_max_epi32(__m512i __V) {
9511 _mm512_mask_reduce_operator(max_epi32);
Craig Topper873afd02018-06-19 19:13:54 +00009512}
9513
Craig Topper79b13a02018-06-19 19:37:07 +00009514static __inline__ unsigned int __DEFAULT_FN_ATTRS
Craig Topper61495b32018-06-19 21:00:30 +00009515_mm512_reduce_max_epu32(__m512i __V) {
9516 _mm512_mask_reduce_operator(max_epu32);
Craig Topper79b13a02018-06-19 19:37:07 +00009517}
9518
Craig Topper61495b32018-06-19 21:00:30 +00009519static __inline__ int __DEFAULT_FN_ATTRS
9520_mm512_reduce_min_epi32(__m512i __V) {
9521 _mm512_mask_reduce_operator(min_epi32);
Craig Topper79b13a02018-06-19 19:37:07 +00009522}
9523
9524static __inline__ unsigned int __DEFAULT_FN_ATTRS
Craig Topper61495b32018-06-19 21:00:30 +00009525_mm512_reduce_min_epu32(__m512i __V) {
9526 _mm512_mask_reduce_operator(min_epu32);
Craig Topper79b13a02018-06-19 19:37:07 +00009527}
9528
Craig Topper79b13a02018-06-19 19:37:07 +00009529static __inline__ int __DEFAULT_FN_ATTRS
9530_mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) {
Craig Topper61495b32018-06-19 21:00:30 +00009531 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-__INT_MAX__ - 1), __M, __V);
9532 _mm512_mask_reduce_operator(max_epi32);
Craig Topper79b13a02018-06-19 19:37:07 +00009533}
9534
9535static __inline__ unsigned int __DEFAULT_FN_ATTRS
9536_mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) {
Craig Topper61495b32018-06-19 21:00:30 +00009537 __V = _mm512_maskz_mov_epi32(__M, __V);
9538 _mm512_mask_reduce_operator(max_epu32);
Craig Topper79b13a02018-06-19 19:37:07 +00009539}
9540
9541static __inline__ int __DEFAULT_FN_ATTRS
9542_mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) {
Craig Topper61495b32018-06-19 21:00:30 +00009543 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(__INT_MAX__), __M, __V);
9544 _mm512_mask_reduce_operator(min_epi32);
Craig Topper79b13a02018-06-19 19:37:07 +00009545}
9546
9547static __inline__ unsigned int __DEFAULT_FN_ATTRS
9548_mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) {
Craig Topper61495b32018-06-19 21:00:30 +00009549 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M, __V);
9550 _mm512_mask_reduce_operator(min_epu32);
9551}
Craig Topperd3346152018-06-20 00:31:39 +00009552#undef _mm512_mask_reduce_operator
Craig Topper61495b32018-06-19 21:00:30 +00009553
9554#define _mm512_mask_reduce_operator(op) \
9555 __m256d __t1 = _mm512_extractf64x4_pd(__V, 0); \
9556 __m256d __t2 = _mm512_extractf64x4_pd(__V, 1); \
9557 __m256d __t3 = _mm256_##op(__t1, __t2); \
9558 __m128d __t4 = _mm256_extractf128_pd(__t3, 0); \
9559 __m128d __t5 = _mm256_extractf128_pd(__t3, 1); \
9560 __m128d __t6 = _mm_##op(__t4, __t5); \
9561 __m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
9562 __m128d __t8 = _mm_##op(__t6, __t7); \
9563 return __t8[0];
9564
9565static __inline__ double __DEFAULT_FN_ATTRS
9566_mm512_reduce_max_pd(__m512d __V) {
9567 _mm512_mask_reduce_operator(max_pd);
9568}
9569
9570static __inline__ double __DEFAULT_FN_ATTRS
9571_mm512_reduce_min_pd(__m512d __V) {
9572 _mm512_mask_reduce_operator(min_pd);
9573}
9574
9575static __inline__ double __DEFAULT_FN_ATTRS
9576_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) {
9577 __V = _mm512_mask_mov_pd(_mm512_set1_pd(-__builtin_inf()), __M, __V);
9578 _mm512_mask_reduce_operator(max_pd);
9579}
9580
9581static __inline__ double __DEFAULT_FN_ATTRS
9582_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) {
9583 __V = _mm512_mask_mov_pd(_mm512_set1_pd(__builtin_inf()), __M, __V);
9584 _mm512_mask_reduce_operator(min_pd);
9585}
9586#undef _mm512_mask_reduce_operator
9587
9588#define _mm512_mask_reduce_operator(op) \
9589 __m256 __t1 = (__m256)_mm512_extractf64x4_pd((__m512d)__V, 0); \
9590 __m256 __t2 = (__m256)_mm512_extractf64x4_pd((__m512d)__V, 1); \
9591 __m256 __t3 = _mm256_##op(__t1, __t2); \
9592 __m128 __t4 = _mm256_extractf128_ps(__t3, 0); \
9593 __m128 __t5 = _mm256_extractf128_ps(__t3, 1); \
9594 __m128 __t6 = _mm_##op(__t4, __t5); \
9595 __m128 __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \
9596 __m128 __t8 = _mm_##op(__t6, __t7); \
9597 __m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \
9598 __m128 __t10 = _mm_##op(__t8, __t9); \
9599 return __t10[0];
9600
9601static __inline__ float __DEFAULT_FN_ATTRS
9602_mm512_reduce_max_ps(__m512 __V) {
9603 _mm512_mask_reduce_operator(max_ps);
9604}
9605
9606static __inline__ float __DEFAULT_FN_ATTRS
9607_mm512_reduce_min_ps(__m512 __V) {
9608 _mm512_mask_reduce_operator(min_ps);
9609}
9610
9611static __inline__ float __DEFAULT_FN_ATTRS
9612_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) {
9613 __V = _mm512_mask_mov_ps(_mm512_set1_ps(-__builtin_inff()), __M, __V);
9614 _mm512_mask_reduce_operator(max_ps);
Michael Zuckerman25eb4202016-10-29 10:29:20 +00009615}
9616
9617static __inline__ float __DEFAULT_FN_ATTRS
9618_mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {
Craig Topper61495b32018-06-19 21:00:30 +00009619 __V = _mm512_mask_mov_ps(_mm512_set1_ps(__builtin_inff()), __M, __V);
9620 _mm512_mask_reduce_operator(min_ps);
Michael Zuckerman25eb4202016-10-29 10:29:20 +00009621}
Craig Topper61495b32018-06-19 21:00:30 +00009622#undef _mm512_mask_reduce_operator
Michael Zuckerman25eb4202016-10-29 10:29:20 +00009623
Michael Kupersteine45af542015-06-30 13:36:19 +00009624#undef __DEFAULT_FN_ATTRS
Eric Christopher4d1851682015-06-17 07:09:20 +00009625
Craig Topper73d1d402018-05-30 22:33:21 +00009626#endif /* __AVX512FINTRIN_H */