blob: 906556a39e210dbf946679eb06ebb2f96b38ce0c [file] [log] [blame]
Craig Topper991d4992015-11-03 06:16:31 +00001/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00002 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23#ifndef __IMMINTRIN_H
24#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
25#endif
26
27#ifndef __AVX512FINTRIN_H
28#define __AVX512FINTRIN_H
29
Michael Zuckerman6f08ceb2016-05-26 06:54:52 +000030typedef char __v64qi __attribute__((__vector_size__(64)));
31typedef short __v32hi __attribute__((__vector_size__(64)));
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000032typedef double __v8df __attribute__((__vector_size__(64)));
33typedef float __v16sf __attribute__((__vector_size__(64)));
34typedef long long __v8di __attribute__((__vector_size__(64)));
35typedef int __v16si __attribute__((__vector_size__(64)));
36
Craig Topper6a77b622016-06-04 05:43:41 +000037/* Unsigned types */
38typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
39typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
40typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
41typedef unsigned int __v16su __attribute__((__vector_size__(64)));
42
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000043typedef float __m512 __attribute__((__vector_size__(64)));
44typedef double __m512d __attribute__((__vector_size__(64)));
45typedef long long __m512i __attribute__((__vector_size__(64)));
46
47typedef unsigned char __mmask8;
48typedef unsigned short __mmask16;
49
50/* Rounding mode macros. */
51#define _MM_FROUND_TO_NEAREST_INT 0x00
52#define _MM_FROUND_TO_NEG_INF 0x01
53#define _MM_FROUND_TO_POS_INF 0x02
54#define _MM_FROUND_TO_ZERO 0x03
55#define _MM_FROUND_CUR_DIRECTION 0x04
56
Asaf Badouh2f344b72016-08-07 10:43:04 +000057/* Constants for integer comparison predicates */
58typedef enum {
59 _MM_CMPINT_EQ, /* Equal */
60 _MM_CMPINT_LT, /* Less than */
61 _MM_CMPINT_LE, /* Less than or Equal */
62 _MM_CMPINT_UNUSED,
63 _MM_CMPINT_NE, /* Not Equal */
64 _MM_CMPINT_NLT, /* Not Less than */
65#define _MM_CMPINT_GE _MM_CMPINT_NLT /* Greater than or Equal */
66 _MM_CMPINT_NLE /* Not Less than or Equal */
67#define _MM_CMPINT_GT _MM_CMPINT_NLE /* Greater than */
68} _MM_CMPINT_ENUM;
69
Michael Zuckermandef78752016-03-28 12:23:09 +000070typedef enum
71{
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +000072 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
73 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
74 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
75 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
76 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
77 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
78 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
79 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
80 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
81 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
82 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
83 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
84 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
85 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
86 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
87 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
88 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
89 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
90 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
91 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
92 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
93 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
94 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
95 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
96 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
97 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
98 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
99 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
100 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
101 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
102 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
103 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
104 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
105 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
106 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
107 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
108 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
109 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
110 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
111 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
112 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
113 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
114 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
115 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
116 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
117 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
118 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
119 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
120 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
121 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
122 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
123 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
124 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
125 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
126 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
127 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
128 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
129 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
130 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
131 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
132 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
133 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
134 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
135 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
136 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
137 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
138 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
139 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
140 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
141 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
142 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
143 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
144 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
145 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
146 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
147 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
148 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
149 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
150 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
151 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
152 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
153 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
154 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
155 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
156 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
157 _MM_PERM_DDDD = 0xFF
158} _MM_PERM_ENUM;
159
160typedef enum
161{
Michael Zuckermandef78752016-03-28 12:23:09 +0000162 _MM_MANT_NORM_1_2, /* interval [1, 2) */
163 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
164 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
165 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
166} _MM_MANTISSA_NORM_ENUM;
167
168typedef enum
169{
170 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
171 _MM_MANT_SIGN_zero, /* sign = 0 */
172 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
173} _MM_MANTISSA_SIGN_ENUM;
174
Eric Christopher4d1851682015-06-17 07:09:20 +0000175/* Define the default attributes for the functions in this file. */
Michael Kupersteine45af542015-06-30 13:36:19 +0000176#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
Eric Christopher4d1851682015-06-17 07:09:20 +0000177
Adam Nemet0d5bb552014-07-28 17:14:40 +0000178/* Create vectors with repeated elements */
179
Michael Kupersteine45af542015-06-30 13:36:19 +0000180static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +0000181_mm512_setzero_si512(void)
182{
Craig Topper63ec0ea2018-05-30 21:08:27 +0000183 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
Adam Nemet0d5bb552014-07-28 17:14:40 +0000184}
185
Michael Zuckermanf36f6eb2016-06-05 15:12:52 +0000186#define _mm512_setzero_epi32 _mm512_setzero_si512
187
Simon Pilgrim5aba9922015-08-26 21:17:12 +0000188static __inline__ __m512d __DEFAULT_FN_ATTRS
Craig Topper3a0c7262016-06-09 05:14:28 +0000189_mm512_undefined_pd(void)
Simon Pilgrim5aba9922015-08-26 21:17:12 +0000190{
191 return (__m512d)__builtin_ia32_undef512();
192}
193
194static __inline__ __m512 __DEFAULT_FN_ATTRS
Craig Topper3a0c7262016-06-09 05:14:28 +0000195_mm512_undefined(void)
Simon Pilgrim5aba9922015-08-26 21:17:12 +0000196{
197 return (__m512)__builtin_ia32_undef512();
198}
199
200static __inline__ __m512 __DEFAULT_FN_ATTRS
Craig Topper3a0c7262016-06-09 05:14:28 +0000201_mm512_undefined_ps(void)
Simon Pilgrim5aba9922015-08-26 21:17:12 +0000202{
203 return (__m512)__builtin_ia32_undef512();
204}
205
206static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper3a0c7262016-06-09 05:14:28 +0000207_mm512_undefined_epi32(void)
Simon Pilgrim5aba9922015-08-26 21:17:12 +0000208{
209 return (__m512i)__builtin_ia32_undef512();
210}
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000211
Michael Zuckerman8c2900f2016-04-27 11:43:14 +0000212static __inline__ __m512i __DEFAULT_FN_ATTRS
213_mm512_broadcastd_epi32 (__m128i __A)
214{
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000215 return (__m512i)__builtin_shufflevector((__v4si) __A,
216 (__v4si)_mm_undefined_si128(),
217 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +0000218}
219
220static __inline__ __m512i __DEFAULT_FN_ATTRS
221_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
222{
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000223 return (__m512i)__builtin_ia32_selectd_512(__M,
224 (__v16si) _mm512_broadcastd_epi32(__A),
225 (__v16si) __O);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +0000226}
227
228static __inline__ __m512i __DEFAULT_FN_ATTRS
229_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
230{
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000231 return (__m512i)__builtin_ia32_selectd_512(__M,
232 (__v16si) _mm512_broadcastd_epi32(__A),
233 (__v16si) _mm512_setzero_si512());
Michael Zuckerman8c2900f2016-04-27 11:43:14 +0000234}
235
236static __inline__ __m512i __DEFAULT_FN_ATTRS
237_mm512_broadcastq_epi64 (__m128i __A)
238{
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000239 return (__m512i)__builtin_shufflevector((__v2di) __A,
240 (__v2di) _mm_undefined_si128(),
241 0, 0, 0, 0, 0, 0, 0, 0);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +0000242}
243
244static __inline__ __m512i __DEFAULT_FN_ATTRS
245_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
246{
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000247 return (__m512i)__builtin_ia32_selectq_512(__M,
248 (__v8di) _mm512_broadcastq_epi64(__A),
249 (__v8di) __O);
250
Michael Zuckerman8c2900f2016-04-27 11:43:14 +0000251}
252
253static __inline__ __m512i __DEFAULT_FN_ATTRS
254_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
255{
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000256 return (__m512i)__builtin_ia32_selectq_512(__M,
257 (__v8di) _mm512_broadcastq_epi64(__A),
258 (__v8di) _mm512_setzero_si512());
Michael Zuckerman8c2900f2016-04-27 11:43:14 +0000259}
Simon Pilgrim5aba9922015-08-26 21:17:12 +0000260
Adam Nemet0d5bb552014-07-28 17:14:40 +0000261
Michael Kupersteine45af542015-06-30 13:36:19 +0000262static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +0000263_mm512_setzero_ps(void)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000264{
Craig Topper63ec0ea2018-05-30 21:08:27 +0000265 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
266 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000267}
Michael Zuckermanf36f6eb2016-06-05 15:12:52 +0000268
269#define _mm512_setzero _mm512_setzero_ps
270
Michael Kupersteine45af542015-06-30 13:36:19 +0000271static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +0000272_mm512_setzero_pd(void)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000273{
Craig Topper63ec0ea2018-05-30 21:08:27 +0000274 return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000275}
Adam Nemet0d5bb552014-07-28 17:14:40 +0000276
Michael Kupersteine45af542015-06-30 13:36:19 +0000277static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemetf42e7a22014-07-30 16:51:22 +0000278_mm512_set1_ps(float __w)
279{
Craig Topper63ec0ea2018-05-30 21:08:27 +0000280 return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
281 __w, __w, __w, __w, __w, __w, __w, __w };
Adam Nemetf42e7a22014-07-30 16:51:22 +0000282}
283
Michael Kupersteine45af542015-06-30 13:36:19 +0000284static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemetf42e7a22014-07-30 16:51:22 +0000285_mm512_set1_pd(double __w)
286{
Craig Topper63ec0ea2018-05-30 21:08:27 +0000287 return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
Adam Nemetf42e7a22014-07-30 16:51:22 +0000288}
289
Michael Kupersteine45af542015-06-30 13:36:19 +0000290static __inline __m512i __DEFAULT_FN_ATTRS
Michael Zuckerman6f08ceb2016-05-26 06:54:52 +0000291_mm512_set1_epi8(char __w)
292{
Craig Topper63ec0ea2018-05-30 21:08:27 +0000293 return __extension__ (__m512i)(__v64qi){
294 __w, __w, __w, __w, __w, __w, __w, __w,
295 __w, __w, __w, __w, __w, __w, __w, __w,
296 __w, __w, __w, __w, __w, __w, __w, __w,
297 __w, __w, __w, __w, __w, __w, __w, __w,
298 __w, __w, __w, __w, __w, __w, __w, __w,
299 __w, __w, __w, __w, __w, __w, __w, __w,
300 __w, __w, __w, __w, __w, __w, __w, __w,
301 __w, __w, __w, __w, __w, __w, __w, __w };
Michael Zuckerman6f08ceb2016-05-26 06:54:52 +0000302}
303
304static __inline __m512i __DEFAULT_FN_ATTRS
305_mm512_set1_epi16(short __w)
306{
Craig Topper63ec0ea2018-05-30 21:08:27 +0000307 return __extension__ (__m512i)(__v32hi){
308 __w, __w, __w, __w, __w, __w, __w, __w,
309 __w, __w, __w, __w, __w, __w, __w, __w,
310 __w, __w, __w, __w, __w, __w, __w, __w,
311 __w, __w, __w, __w, __w, __w, __w, __w };
Michael Zuckerman6f08ceb2016-05-26 06:54:52 +0000312}
313
314static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemetf42e7a22014-07-30 16:51:22 +0000315_mm512_set1_epi32(int __s)
316{
Craig Topper63ec0ea2018-05-30 21:08:27 +0000317 return __extension__ (__m512i)(__v16si){
318 __s, __s, __s, __s, __s, __s, __s, __s,
319 __s, __s, __s, __s, __s, __s, __s, __s };
Adam Nemetf42e7a22014-07-30 16:51:22 +0000320}
321
Michael Kupersteine45af542015-06-30 13:36:19 +0000322static __inline __m512i __DEFAULT_FN_ATTRS
Jina Nahias3ad702a2017-09-19 11:00:27 +0000323_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
324{
325 return (__m512i)__builtin_ia32_selectd_512(__M,
326 (__v16si)_mm512_set1_epi32(__A),
327 (__v16si)_mm512_setzero_si512());
328}
329
330static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemetf42e7a22014-07-30 16:51:22 +0000331_mm512_set1_epi64(long long __d)
332{
Craig Topper63ec0ea2018-05-30 21:08:27 +0000333 return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
Adam Nemetf42e7a22014-07-30 16:51:22 +0000334}
335
Jina Nahias3ad702a2017-09-19 11:00:27 +0000336static __inline __m512i __DEFAULT_FN_ATTRS
337_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
338{
339 return (__m512i)__builtin_ia32_selectq_512(__M,
340 (__v8di)_mm512_set1_epi64(__A),
341 (__v8di)_mm512_setzero_si512());
342}
Jina Nahias3ad702a2017-09-19 11:00:27 +0000343
Michael Kupersteine45af542015-06-30 13:36:19 +0000344static __inline__ __m512 __DEFAULT_FN_ATTRS
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000345_mm512_broadcastss_ps(__m128 __A)
Adam Nemet4abc07c2014-08-13 00:29:01 +0000346{
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000347 return (__m512)__builtin_shufflevector((__v4sf) __A,
348 (__v4sf)_mm_undefined_ps(),
349 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
Adam Nemet4abc07c2014-08-13 00:29:01 +0000350}
351
Michael Zuckerman95721ac2016-06-05 15:43:30 +0000352static __inline __m512i __DEFAULT_FN_ATTRS
353_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
354{
Craig Topper63ec0ea2018-05-30 21:08:27 +0000355 return __extension__ (__m512i)(__v16si)
Michael Zuckerman95721ac2016-06-05 15:43:30 +0000356 { __D, __C, __B, __A, __D, __C, __B, __A,
357 __D, __C, __B, __A, __D, __C, __B, __A };
358}
359
360static __inline __m512i __DEFAULT_FN_ATTRS
361_mm512_set4_epi64 (long long __A, long long __B, long long __C,
362 long long __D)
363{
Craig Topper63ec0ea2018-05-30 21:08:27 +0000364 return __extension__ (__m512i) (__v8di)
Michael Zuckerman95721ac2016-06-05 15:43:30 +0000365 { __D, __C, __B, __A, __D, __C, __B, __A };
366}
367
368static __inline __m512d __DEFAULT_FN_ATTRS
369_mm512_set4_pd (double __A, double __B, double __C, double __D)
370{
Craig Topper63ec0ea2018-05-30 21:08:27 +0000371 return __extension__ (__m512d)
Michael Zuckerman95721ac2016-06-05 15:43:30 +0000372 { __D, __C, __B, __A, __D, __C, __B, __A };
373}
374
375static __inline __m512 __DEFAULT_FN_ATTRS
376_mm512_set4_ps (float __A, float __B, float __C, float __D)
377{
Craig Topper63ec0ea2018-05-30 21:08:27 +0000378 return __extension__ (__m512)
Michael Zuckerman95721ac2016-06-05 15:43:30 +0000379 { __D, __C, __B, __A, __D, __C, __B, __A,
380 __D, __C, __B, __A, __D, __C, __B, __A };
381}
382
383#define _mm512_setr4_epi32(e0,e1,e2,e3) \
Craig Topper60589492016-06-08 06:08:04 +0000384 _mm512_set4_epi32((e3),(e2),(e1),(e0))
Michael Zuckerman95721ac2016-06-05 15:43:30 +0000385
386#define _mm512_setr4_epi64(e0,e1,e2,e3) \
Craig Topper60589492016-06-08 06:08:04 +0000387 _mm512_set4_epi64((e3),(e2),(e1),(e0))
Michael Zuckerman95721ac2016-06-05 15:43:30 +0000388
389#define _mm512_setr4_pd(e0,e1,e2,e3) \
Craig Topper60589492016-06-08 06:08:04 +0000390 _mm512_set4_pd((e3),(e2),(e1),(e0))
Michael Zuckerman95721ac2016-06-05 15:43:30 +0000391
392#define _mm512_setr4_ps(e0,e1,e2,e3) \
Craig Topper60589492016-06-08 06:08:04 +0000393 _mm512_set4_ps((e3),(e2),(e1),(e0))
Michael Zuckerman95721ac2016-06-05 15:43:30 +0000394
Michael Kupersteine45af542015-06-30 13:36:19 +0000395static __inline__ __m512d __DEFAULT_FN_ATTRS
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000396_mm512_broadcastsd_pd(__m128d __A)
Adam Nemet4abc07c2014-08-13 00:29:01 +0000397{
Simon Pilgrimf5a88372016-07-05 12:59:33 +0000398 return (__m512d)__builtin_shufflevector((__v2df) __A,
399 (__v2df) _mm_undefined_pd(),
400 0, 0, 0, 0, 0, 0, 0, 0);
Adam Nemet4abc07c2014-08-13 00:29:01 +0000401}
402
Adam Nemetc871ff92014-07-30 16:51:24 +0000403/* Cast between vector types */
404
Michael Kupersteine45af542015-06-30 13:36:19 +0000405static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemetc871ff92014-07-30 16:51:24 +0000406_mm512_castpd256_pd512(__m256d __a)
407{
408 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
409}
410
Michael Kupersteine45af542015-06-30 13:36:19 +0000411static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemetc871ff92014-07-30 16:51:24 +0000412_mm512_castps256_ps512(__m256 __a)
413{
414 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7,
415 -1, -1, -1, -1, -1, -1, -1, -1);
416}
417
Michael Kupersteine45af542015-06-30 13:36:19 +0000418static __inline __m128d __DEFAULT_FN_ATTRS
Adam Nemetc871ff92014-07-30 16:51:24 +0000419_mm512_castpd512_pd128(__m512d __a)
420{
421 return __builtin_shufflevector(__a, __a, 0, 1);
422}
423
Michael Zuckermand5cc6cd2016-05-25 14:04:21 +0000424static __inline __m256d __DEFAULT_FN_ATTRS
425_mm512_castpd512_pd256 (__m512d __A)
426{
427 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
428}
429
Michael Kupersteine45af542015-06-30 13:36:19 +0000430static __inline __m128 __DEFAULT_FN_ATTRS
Adam Nemetc871ff92014-07-30 16:51:24 +0000431_mm512_castps512_ps128(__m512 __a)
432{
433 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
434}
435
Michael Zuckermand5cc6cd2016-05-25 14:04:21 +0000436static __inline __m256 __DEFAULT_FN_ATTRS
437_mm512_castps512_ps256 (__m512 __A)
438{
439 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
440}
441
442static __inline __m512 __DEFAULT_FN_ATTRS
443_mm512_castpd_ps (__m512d __A)
444{
445 return (__m512) (__A);
446}
447
448static __inline __m512i __DEFAULT_FN_ATTRS
449_mm512_castpd_si512 (__m512d __A)
450{
451 return (__m512i) (__A);
452}
Michael Zuckermanc6677032016-05-03 14:26:52 +0000453
454static __inline__ __m512d __DEFAULT_FN_ATTRS
455_mm512_castpd128_pd512 (__m128d __A)
456{
457 return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
458}
459
Michael Zuckermand5cc6cd2016-05-25 14:04:21 +0000460static __inline __m512d __DEFAULT_FN_ATTRS
461_mm512_castps_pd (__m512 __A)
462{
463 return (__m512d) (__A);
464}
465
466static __inline __m512i __DEFAULT_FN_ATTRS
467_mm512_castps_si512 (__m512 __A)
468{
469 return (__m512i) (__A);
470}
471
Michael Zuckermanc6677032016-05-03 14:26:52 +0000472static __inline__ __m512 __DEFAULT_FN_ATTRS
473_mm512_castps128_ps512 (__m128 __A)
474{
475 return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
476}
477
478static __inline__ __m512i __DEFAULT_FN_ATTRS
479_mm512_castsi128_si512 (__m128i __A)
480{
481 return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
482}
483
484static __inline__ __m512i __DEFAULT_FN_ATTRS
485_mm512_castsi256_si512 (__m256i __A)
486{
487 return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
488}
489
Michael Zuckermand5cc6cd2016-05-25 14:04:21 +0000490static __inline __m512 __DEFAULT_FN_ATTRS
491_mm512_castsi512_ps (__m512i __A)
492{
493 return (__m512) (__A);
494}
495
496static __inline __m512d __DEFAULT_FN_ATTRS
497_mm512_castsi512_pd (__m512i __A)
498{
499 return (__m512d) (__A);
500}
501
502static __inline __m128i __DEFAULT_FN_ATTRS
503_mm512_castsi512_si128 (__m512i __A)
504{
505 return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
506}
507
Michael Zuckerman22c47e62016-05-26 14:32:11 +0000508static __inline __m256i __DEFAULT_FN_ATTRS
509_mm512_castsi512_si256 (__m512i __A)
510{
511 return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
512}
513
Ayman Musae60a41c2016-11-08 12:00:30 +0000514static __inline__ __mmask16 __DEFAULT_FN_ATTRS
515_mm512_int2mask(int __a)
516{
517 return (__mmask16)__a;
518}
519
520static __inline__ int __DEFAULT_FN_ATTRS
521_mm512_mask2int(__mmask16 __a)
522{
523 return (int)__a;
524}
525
Adrian Prantl9fc8faf2018-05-09 01:00:01 +0000526/// Constructs a 512-bit floating-point vector of [8 x double] from a
Simon Pilgrim96d02f52017-04-29 17:17:06 +0000527/// 128-bit floating-point vector of [2 x double]. The lower 128 bits
528/// contain the value of the source vector. The upper 384 bits are set
529/// to zero.
530///
531/// \headerfile <x86intrin.h>
532///
533/// This intrinsic has no corresponding instruction.
534///
535/// \param __a
536/// A 128-bit vector of [2 x double].
537/// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
538/// contain the value of the parameter. The upper 384 bits are set to zero.
539static __inline __m512d __DEFAULT_FN_ATTRS
540_mm512_zextpd128_pd512(__m128d __a)
541{
542 return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
543}
544
Adrian Prantl9fc8faf2018-05-09 01:00:01 +0000545/// Constructs a 512-bit floating-point vector of [8 x double] from a
Simon Pilgrim96d02f52017-04-29 17:17:06 +0000546/// 256-bit floating-point vector of [4 x double]. The lower 256 bits
547/// contain the value of the source vector. The upper 256 bits are set
548/// to zero.
549///
550/// \headerfile <x86intrin.h>
551///
552/// This intrinsic has no corresponding instruction.
553///
554/// \param __a
555/// A 256-bit vector of [4 x double].
556/// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
557/// contain the value of the parameter. The upper 256 bits are set to zero.
558static __inline __m512d __DEFAULT_FN_ATTRS
559_mm512_zextpd256_pd512(__m256d __a)
560{
561 return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
562}
563
Adrian Prantl9fc8faf2018-05-09 01:00:01 +0000564/// Constructs a 512-bit floating-point vector of [16 x float] from a
Simon Pilgrim96d02f52017-04-29 17:17:06 +0000565/// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain
566/// the value of the source vector. The upper 384 bits are set to zero.
567///
568/// \headerfile <x86intrin.h>
569///
570/// This intrinsic has no corresponding instruction.
571///
572/// \param __a
573/// A 128-bit vector of [4 x float].
574/// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
575/// contain the value of the parameter. The upper 384 bits are set to zero.
576static __inline __m512 __DEFAULT_FN_ATTRS
577_mm512_zextps128_ps512(__m128 __a)
578{
579 return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
580}
581
Adrian Prantl9fc8faf2018-05-09 01:00:01 +0000582/// Constructs a 512-bit floating-point vector of [16 x float] from a
Simon Pilgrim96d02f52017-04-29 17:17:06 +0000583/// 256-bit floating-point vector of [8 x float]. The lower 256 bits contain
584/// the value of the source vector. The upper 256 bits are set to zero.
585///
586/// \headerfile <x86intrin.h>
587///
588/// This intrinsic has no corresponding instruction.
589///
590/// \param __a
591/// A 256-bit vector of [8 x float].
592/// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
593/// contain the value of the parameter. The upper 256 bits are set to zero.
594static __inline __m512 __DEFAULT_FN_ATTRS
595_mm512_zextps256_ps512(__m256 __a)
596{
597 return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
598}
599
Adrian Prantl9fc8faf2018-05-09 01:00:01 +0000600/// Constructs a 512-bit integer vector from a 128-bit integer vector.
Simon Pilgrim96d02f52017-04-29 17:17:06 +0000601/// The lower 128 bits contain the value of the source vector. The upper
602/// 384 bits are set to zero.
603///
604/// \headerfile <x86intrin.h>
605///
606/// This intrinsic has no corresponding instruction.
607///
608/// \param __a
609/// A 128-bit integer vector.
610/// \returns A 512-bit integer vector. The lower 128 bits contain the value of
611/// the parameter. The upper 384 bits are set to zero.
612static __inline __m512i __DEFAULT_FN_ATTRS
613_mm512_zextsi128_si512(__m128i __a)
614{
615 return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
616}
617
Adrian Prantl9fc8faf2018-05-09 01:00:01 +0000618/// Constructs a 512-bit integer vector from a 256-bit integer vector.
Simon Pilgrim96d02f52017-04-29 17:17:06 +0000619/// The lower 256 bits contain the value of the source vector. The upper
620/// 256 bits are set to zero.
621///
622/// \headerfile <x86intrin.h>
623///
624/// This intrinsic has no corresponding instruction.
625///
626/// \param __a
627/// A 256-bit integer vector.
628/// \returns A 512-bit integer vector. The lower 256 bits contain the value of
629/// the parameter. The upper 256 bits are set to zero.
630static __inline __m512i __DEFAULT_FN_ATTRS
631_mm512_zextsi256_si512(__m256i __a)
632{
633 return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
634}
635
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000636/* Bitwise operators */
Michael Kupersteine45af542015-06-30 13:36:19 +0000637static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000638_mm512_and_epi32(__m512i __a, __m512i __b)
639{
Craig Topper6a77b622016-06-04 05:43:41 +0000640 return (__m512i)((__v16su)__a & (__v16su)__b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000641}
642
Michael Kupersteine45af542015-06-30 13:36:19 +0000643static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000644_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
645{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000646 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
Asaf Badouh13633282016-07-05 12:24:14 +0000647 (__v16si) _mm512_and_epi32(__a, __b),
648 (__v16si) __src);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000649}
Asaf Badouh13633282016-07-05 12:24:14 +0000650
Michael Kupersteine45af542015-06-30 13:36:19 +0000651static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000652_mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
653{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000654 return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (),
Asaf Badouh13633282016-07-05 12:24:14 +0000655 __k, __a, __b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000656}
657
Michael Kupersteine45af542015-06-30 13:36:19 +0000658static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000659_mm512_and_epi64(__m512i __a, __m512i __b)
660{
Craig Topper6a77b622016-06-04 05:43:41 +0000661 return (__m512i)((__v8du)__a & (__v8du)__b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000662}
663
Michael Kupersteine45af542015-06-30 13:36:19 +0000664static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000665_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
666{
Asaf Badouh13633282016-07-05 12:24:14 +0000667 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
668 (__v8di) _mm512_and_epi64(__a, __b),
669 (__v8di) __src);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000670}
Asaf Badouh13633282016-07-05 12:24:14 +0000671
Michael Kupersteine45af542015-06-30 13:36:19 +0000672static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000673_mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
674{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000675 return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (),
Asaf Badouh13633282016-07-05 12:24:14 +0000676 __k, __a, __b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000677}
678
Michael Kupersteine45af542015-06-30 13:36:19 +0000679static __inline__ __m512i __DEFAULT_FN_ATTRS
Michael Zuckerman2cacc352016-05-18 15:25:53 +0000680_mm512_andnot_si512 (__m512i __A, __m512i __B)
681{
Craig Toppercbf39292018-05-31 01:24:40 +0000682 return (__m512i)(~(__v8du)__A & (__v8du)__B);
Michael Zuckerman2cacc352016-05-18 15:25:53 +0000683}
684
685static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000686_mm512_andnot_epi32 (__m512i __A, __m512i __B)
687{
Craig Toppercbf39292018-05-31 01:24:40 +0000688 return (__m512i)(~(__v16su)__A & (__v16su)__B);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000689}
690
Michael Kupersteine45af542015-06-30 13:36:19 +0000691static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper4d61a3c2016-07-11 06:14:18 +0000692_mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000693{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000694 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
695 (__v16si)_mm512_andnot_epi32(__A, __B),
696 (__v16si)__W);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000697}
698
Michael Kupersteine45af542015-06-30 13:36:19 +0000699static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper4d61a3c2016-07-11 06:14:18 +0000700_mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000701{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000702 return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(),
703 __U, __A, __B);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000704}
705
Michael Kupersteine45af542015-06-30 13:36:19 +0000706static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper4d61a3c2016-07-11 06:14:18 +0000707_mm512_andnot_epi64(__m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000708{
Craig Toppercbf39292018-05-31 01:24:40 +0000709 return (__m512i)(~(__v8du)__A & (__v8du)__B);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000710}
711
Michael Kupersteine45af542015-06-30 13:36:19 +0000712static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper4d61a3c2016-07-11 06:14:18 +0000713_mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000714{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000715 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
716 (__v8di)_mm512_andnot_epi64(__A, __B),
717 (__v8di)__W);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000718}
719
Michael Kupersteine45af542015-06-30 13:36:19 +0000720static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper4d61a3c2016-07-11 06:14:18 +0000721_mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000722{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000723 return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(),
724 __U, __A, __B);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000725}
Craig Topper4d61a3c2016-07-11 06:14:18 +0000726
Michael Kupersteine45af542015-06-30 13:36:19 +0000727static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000728_mm512_or_epi32(__m512i __a, __m512i __b)
729{
Craig Topper6a77b622016-06-04 05:43:41 +0000730 return (__m512i)((__v16su)__a | (__v16su)__b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000731}
732
Michael Kupersteine45af542015-06-30 13:36:19 +0000733static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000734_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
735{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000736 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
737 (__v16si)_mm512_or_epi32(__a, __b),
738 (__v16si)__src);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000739}
Craig Topper4d61a3c2016-07-11 06:14:18 +0000740
Michael Kupersteine45af542015-06-30 13:36:19 +0000741static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000742_mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
743{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000744 return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000745}
746
Michael Kupersteine45af542015-06-30 13:36:19 +0000747static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000748_mm512_or_epi64(__m512i __a, __m512i __b)
749{
Craig Topper6a77b622016-06-04 05:43:41 +0000750 return (__m512i)((__v8du)__a | (__v8du)__b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000751}
752
Michael Kupersteine45af542015-06-30 13:36:19 +0000753static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000754_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
755{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000756 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
757 (__v8di)_mm512_or_epi64(__a, __b),
758 (__v8di)__src);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000759}
Craig Topper4d61a3c2016-07-11 06:14:18 +0000760
Michael Kupersteine45af542015-06-30 13:36:19 +0000761static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000762_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
763{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000764 return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000765}
766
Michael Kupersteine45af542015-06-30 13:36:19 +0000767static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000768_mm512_xor_epi32(__m512i __a, __m512i __b)
769{
Craig Topper6a77b622016-06-04 05:43:41 +0000770 return (__m512i)((__v16su)__a ^ (__v16su)__b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000771}
772
Michael Kupersteine45af542015-06-30 13:36:19 +0000773static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000774_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
775{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000776 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
777 (__v16si)_mm512_xor_epi32(__a, __b),
778 (__v16si)__src);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000779}
Craig Topper4d61a3c2016-07-11 06:14:18 +0000780
Michael Kupersteine45af542015-06-30 13:36:19 +0000781static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000782_mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
783{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000784 return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000785}
786
Michael Kupersteine45af542015-06-30 13:36:19 +0000787static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000788_mm512_xor_epi64(__m512i __a, __m512i __b)
789{
Craig Topper6a77b622016-06-04 05:43:41 +0000790 return (__m512i)((__v8du)__a ^ (__v8du)__b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000791}
792
Michael Kupersteine45af542015-06-30 13:36:19 +0000793static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000794_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
795{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000796 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
797 (__v8di)_mm512_xor_epi64(__a, __b),
798 (__v8di)__src);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000799}
Craig Topper4d61a3c2016-07-11 06:14:18 +0000800
Michael Kupersteine45af542015-06-30 13:36:19 +0000801static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000802_mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
803{
Craig Topper4d61a3c2016-07-11 06:14:18 +0000804 return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000805}
806
Michael Kupersteine45af542015-06-30 13:36:19 +0000807static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000808_mm512_and_si512(__m512i __a, __m512i __b)
809{
Craig Topper6a77b622016-06-04 05:43:41 +0000810 return (__m512i)((__v8du)__a & (__v8du)__b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000811}
812
Michael Kupersteine45af542015-06-30 13:36:19 +0000813static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000814_mm512_or_si512(__m512i __a, __m512i __b)
815{
Craig Topper6a77b622016-06-04 05:43:41 +0000816 return (__m512i)((__v8du)__a | (__v8du)__b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000817}
818
Michael Kupersteine45af542015-06-30 13:36:19 +0000819static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000820_mm512_xor_si512(__m512i __a, __m512i __b)
821{
Craig Topper6a77b622016-06-04 05:43:41 +0000822 return (__m512i)((__v8du)__a ^ (__v8du)__b);
Elena Demikhovsky29da2fb2015-04-01 06:54:16 +0000823}
Craig Topper4d61a3c2016-07-11 06:14:18 +0000824
Adam Nemet0d5bb552014-07-28 17:14:40 +0000825/* Arithmetic */
826
Michael Kupersteine45af542015-06-30 13:36:19 +0000827static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemeta3ebe622014-07-28 17:14:42 +0000828_mm512_add_pd(__m512d __a, __m512d __b)
829{
Craig Topper9c6c85f2016-05-16 06:38:36 +0000830 return (__m512d)((__v8df)__a + (__v8df)__b);
Adam Nemeta3ebe622014-07-28 17:14:42 +0000831}
832
Michael Kupersteine45af542015-06-30 13:36:19 +0000833static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemeta3ebe622014-07-28 17:14:42 +0000834_mm512_add_ps(__m512 __a, __m512 __b)
835{
Craig Topper9c6c85f2016-05-16 06:38:36 +0000836 return (__m512)((__v16sf)__a + (__v16sf)__b);
Adam Nemeta3ebe622014-07-28 17:14:42 +0000837}
838
Michael Kupersteine45af542015-06-30 13:36:19 +0000839static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemeta3ebe622014-07-28 17:14:42 +0000840_mm512_mul_pd(__m512d __a, __m512d __b)
841{
Craig Topper9c6c85f2016-05-16 06:38:36 +0000842 return (__m512d)((__v8df)__a * (__v8df)__b);
Adam Nemeta3ebe622014-07-28 17:14:42 +0000843}
844
Michael Kupersteine45af542015-06-30 13:36:19 +0000845static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemeta3ebe622014-07-28 17:14:42 +0000846_mm512_mul_ps(__m512 __a, __m512 __b)
847{
Craig Topper9c6c85f2016-05-16 06:38:36 +0000848 return (__m512)((__v16sf)__a * (__v16sf)__b);
Adam Nemeta3ebe622014-07-28 17:14:42 +0000849}
850
Michael Kupersteine45af542015-06-30 13:36:19 +0000851static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemeta3ebe622014-07-28 17:14:42 +0000852_mm512_sub_pd(__m512d __a, __m512d __b)
853{
Craig Topper9c6c85f2016-05-16 06:38:36 +0000854 return (__m512d)((__v8df)__a - (__v8df)__b);
Adam Nemeta3ebe622014-07-28 17:14:42 +0000855}
856
Michael Kupersteine45af542015-06-30 13:36:19 +0000857static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemeta3ebe622014-07-28 17:14:42 +0000858_mm512_sub_ps(__m512 __a, __m512 __b)
859{
Craig Topper9c6c85f2016-05-16 06:38:36 +0000860 return (__m512)((__v16sf)__a - (__v16sf)__b);
Adam Nemeta3ebe622014-07-28 17:14:42 +0000861}
862
Michael Kupersteine45af542015-06-30 13:36:19 +0000863static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000864_mm512_add_epi64 (__m512i __A, __m512i __B)
865{
Craig Topper6a77b622016-06-04 05:43:41 +0000866 return (__m512i) ((__v8du) __A + (__v8du) __B);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000867}
868
Michael Kupersteine45af542015-06-30 13:36:19 +0000869static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0e189762016-09-03 18:29:35 +0000870_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000871{
Craig Topper0e189762016-09-03 18:29:35 +0000872 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
873 (__v8di)_mm512_add_epi64(__A, __B),
874 (__v8di)__W);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000875}
876
Michael Kupersteine45af542015-06-30 13:36:19 +0000877static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0e189762016-09-03 18:29:35 +0000878_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000879{
Craig Topper0e189762016-09-03 18:29:35 +0000880 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
881 (__v8di)_mm512_add_epi64(__A, __B),
882 (__v8di)_mm512_setzero_si512());
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000883}
884
Michael Kupersteine45af542015-06-30 13:36:19 +0000885static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000886_mm512_sub_epi64 (__m512i __A, __m512i __B)
887{
Craig Topper6a77b622016-06-04 05:43:41 +0000888 return (__m512i) ((__v8du) __A - (__v8du) __B);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000889}
890
Michael Kupersteine45af542015-06-30 13:36:19 +0000891static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0e189762016-09-03 18:29:35 +0000892_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000893{
Craig Topper0e189762016-09-03 18:29:35 +0000894 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
895 (__v8di)_mm512_sub_epi64(__A, __B),
896 (__v8di)__W);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000897}
898
Michael Kupersteine45af542015-06-30 13:36:19 +0000899static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0e189762016-09-03 18:29:35 +0000900_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000901{
Craig Topper0e189762016-09-03 18:29:35 +0000902 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
903 (__v8di)_mm512_sub_epi64(__A, __B),
904 (__v8di)_mm512_setzero_si512());
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000905}
906
Michael Kupersteine45af542015-06-30 13:36:19 +0000907static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000908_mm512_add_epi32 (__m512i __A, __m512i __B)
909{
Craig Topper6a77b622016-06-04 05:43:41 +0000910 return (__m512i) ((__v16su) __A + (__v16su) __B);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000911}
912
Michael Kupersteine45af542015-06-30 13:36:19 +0000913static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0e189762016-09-03 18:29:35 +0000914_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000915{
Craig Topper0e189762016-09-03 18:29:35 +0000916 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
917 (__v16si)_mm512_add_epi32(__A, __B),
918 (__v16si)__W);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000919}
920
Michael Kupersteine45af542015-06-30 13:36:19 +0000921static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000922_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
923{
Craig Topper0e189762016-09-03 18:29:35 +0000924 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
925 (__v16si)_mm512_add_epi32(__A, __B),
926 (__v16si)_mm512_setzero_si512());
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000927}
928
Michael Kupersteine45af542015-06-30 13:36:19 +0000929static __inline__ __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000930_mm512_sub_epi32 (__m512i __A, __m512i __B)
931{
Craig Topper6a77b622016-06-04 05:43:41 +0000932 return (__m512i) ((__v16su) __A - (__v16su) __B);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000933}
934
Michael Kupersteine45af542015-06-30 13:36:19 +0000935static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0e189762016-09-03 18:29:35 +0000936_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000937{
Craig Topper0e189762016-09-03 18:29:35 +0000938 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
939 (__v16si)_mm512_sub_epi32(__A, __B),
940 (__v16si)__W);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000941}
942
Michael Kupersteine45af542015-06-30 13:36:19 +0000943static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0e189762016-09-03 18:29:35 +0000944_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000945{
Craig Topper0e189762016-09-03 18:29:35 +0000946 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
947 (__v16si)_mm512_sub_epi32(__A, __B),
948 (__v16si)_mm512_setzero_si512());
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +0000949}
950
Craig Topperc6338672018-05-31 00:51:20 +0000951#define _mm512_mask_max_round_pd(W, U, A, B, R) \
Craig Topperf3efec62016-06-08 06:08:07 +0000952 (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
953 (__v8df)(__m512d)(B), \
954 (__v8df)(__m512d)(W), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +0000955 (int)(R))
Michael Zuckermane6aa66a2016-06-01 08:34:03 +0000956
Craig Topperc6338672018-05-31 00:51:20 +0000957#define _mm512_maskz_max_round_pd(U, A, B, R) \
Craig Topperf3efec62016-06-08 06:08:07 +0000958 (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
959 (__v8df)(__m512d)(B), \
960 (__v8df)_mm512_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +0000961 (__mmask8)(U), (int)(R))
Michael Zuckermane6aa66a2016-06-01 08:34:03 +0000962
Craig Topperc6338672018-05-31 00:51:20 +0000963#define _mm512_max_round_pd(A, B, R) \
Craig Topperf3efec62016-06-08 06:08:07 +0000964 (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
965 (__v8df)(__m512d)(B), \
966 (__v8df)_mm512_undefined_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +0000967 (__mmask8)-1, (int)(R))
Michael Zuckermane6aa66a2016-06-01 08:34:03 +0000968
Michael Kupersteine45af542015-06-30 13:36:19 +0000969static __inline__ __m512d __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +0000970_mm512_max_pd(__m512d __A, __m512d __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000971{
Adam Nemet0d5bb552014-07-28 17:14:40 +0000972 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
973 (__v8df) __B,
974 (__v8df)
975 _mm512_setzero_pd (),
976 (__mmask8) -1,
977 _MM_FROUND_CUR_DIRECTION);
978}
979
Michael Zuckermanf9be3bb2016-05-09 12:38:49 +0000980static __inline__ __m512d __DEFAULT_FN_ATTRS
981_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
982{
983 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
984 (__v8df) __B,
985 (__v8df) __W,
986 (__mmask8) __U,
987 _MM_FROUND_CUR_DIRECTION);
988}
989
990static __inline__ __m512d __DEFAULT_FN_ATTRS
991_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
992{
993 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
994 (__v8df) __B,
995 (__v8df)
996 _mm512_setzero_pd (),
997 (__mmask8) __U,
998 _MM_FROUND_CUR_DIRECTION);
999}
1000
Craig Topperc6338672018-05-31 00:51:20 +00001001#define _mm512_mask_max_round_ps(W, U, A, B, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00001002 (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
1003 (__v16sf)(__m512)(B), \
1004 (__v16sf)(__m512)(W), (__mmask16)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00001005 (int)(R))
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001006
Craig Topperc6338672018-05-31 00:51:20 +00001007#define _mm512_maskz_max_round_ps(U, A, B, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00001008 (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
1009 (__v16sf)(__m512)(B), \
1010 (__v16sf)_mm512_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00001011 (__mmask16)(U), (int)(R))
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001012
Craig Topperc6338672018-05-31 00:51:20 +00001013#define _mm512_max_round_ps(A, B, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00001014 (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
1015 (__v16sf)(__m512)(B), \
1016 (__v16sf)_mm512_undefined_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00001017 (__mmask16)-1, (int)(R))
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001018
Michael Kupersteine45af542015-06-30 13:36:19 +00001019static __inline__ __m512 __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001020_mm512_max_ps(__m512 __A, __m512 __B)
1021{
1022 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
1023 (__v16sf) __B,
1024 (__v16sf)
1025 _mm512_setzero_ps (),
1026 (__mmask16) -1,
1027 _MM_FROUND_CUR_DIRECTION);
1028}
1029
Michael Zuckermanf9be3bb2016-05-09 12:38:49 +00001030static __inline__ __m512 __DEFAULT_FN_ATTRS
1031_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1032{
1033 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
1034 (__v16sf) __B,
1035 (__v16sf) __W,
1036 (__mmask16) __U,
1037 _MM_FROUND_CUR_DIRECTION);
1038}
1039
1040static __inline__ __m512 __DEFAULT_FN_ATTRS
1041_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
1042{
1043 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
1044 (__v16sf) __B,
1045 (__v16sf)
1046 _mm512_setzero_ps (),
1047 (__mmask16) __U,
1048 _MM_FROUND_CUR_DIRECTION);
1049}
1050
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001051static __inline__ __m128 __DEFAULT_FN_ATTRS
1052_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001053 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001054 (__v4sf) __B,
1055 (__v4sf) __W,
1056 (__mmask8) __U,
1057 _MM_FROUND_CUR_DIRECTION);
1058}
1059
1060static __inline__ __m128 __DEFAULT_FN_ATTRS
1061_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001062 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001063 (__v4sf) __B,
1064 (__v4sf) _mm_setzero_ps (),
1065 (__mmask8) __U,
1066 _MM_FROUND_CUR_DIRECTION);
1067}
1068
Craig Topperc6338672018-05-31 00:51:20 +00001069#define _mm_max_round_ss(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001070 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1071 (__v4sf)(__m128)(B), \
1072 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00001073 (__mmask8)-1, (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001074
Craig Topperc6338672018-05-31 00:51:20 +00001075#define _mm_mask_max_round_ss(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001076 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1077 (__v4sf)(__m128)(B), \
1078 (__v4sf)(__m128)(W), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00001079 (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001080
Craig Topperc6338672018-05-31 00:51:20 +00001081#define _mm_maskz_max_round_ss(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001082 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1083 (__v4sf)(__m128)(B), \
1084 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00001085 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001086
1087static __inline__ __m128d __DEFAULT_FN_ATTRS
1088_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001089 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001090 (__v2df) __B,
1091 (__v2df) __W,
1092 (__mmask8) __U,
1093 _MM_FROUND_CUR_DIRECTION);
1094}
1095
1096static __inline__ __m128d __DEFAULT_FN_ATTRS
1097_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001098 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001099 (__v2df) __B,
1100 (__v2df) _mm_setzero_pd (),
1101 (__mmask8) __U,
1102 _MM_FROUND_CUR_DIRECTION);
1103}
1104
Craig Topperc6338672018-05-31 00:51:20 +00001105#define _mm_max_round_sd(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001106 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1107 (__v2df)(__m128d)(B), \
1108 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00001109 (__mmask8)-1, (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001110
Craig Topperc6338672018-05-31 00:51:20 +00001111#define _mm_mask_max_round_sd(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001112 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1113 (__v2df)(__m128d)(B), \
1114 (__v2df)(__m128d)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00001115 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001116
Craig Topperc6338672018-05-31 00:51:20 +00001117#define _mm_maskz_max_round_sd(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001118 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1119 (__v2df)(__m128d)(B), \
1120 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00001121 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001122
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00001123static __inline __m512i
Michael Kupersteine45af542015-06-30 13:36:19 +00001124__DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001125_mm512_max_epi32(__m512i __A, __m512i __B)
1126{
Craig Topperf2043b02018-05-23 04:51:54 +00001127 return (__m512i)__builtin_ia32_pmaxsd512((__v16si)__A, (__v16si)__B);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001128}
1129
Michael Zuckermande860e52016-05-10 11:34:19 +00001130static __inline__ __m512i __DEFAULT_FN_ATTRS
1131_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1132{
Craig Topperf2043b02018-05-23 04:51:54 +00001133 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1134 (__v16si)_mm512_max_epi32(__A, __B),
1135 (__v16si)__W);
Michael Zuckermande860e52016-05-10 11:34:19 +00001136}
1137
1138static __inline__ __m512i __DEFAULT_FN_ATTRS
1139_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1140{
Craig Topperf2043b02018-05-23 04:51:54 +00001141 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1142 (__v16si)_mm512_max_epi32(__A, __B),
1143 (__v16si)_mm512_setzero_si512());
Michael Zuckermande860e52016-05-10 11:34:19 +00001144}
1145
Michael Kupersteine45af542015-06-30 13:36:19 +00001146static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001147_mm512_max_epu32(__m512i __A, __m512i __B)
1148{
Craig Topperf2043b02018-05-23 04:51:54 +00001149 return (__m512i)__builtin_ia32_pmaxud512((__v16si)__A, (__v16si)__B);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001150}
1151
Michael Zuckermande860e52016-05-10 11:34:19 +00001152static __inline__ __m512i __DEFAULT_FN_ATTRS
1153_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1154{
Craig Topperf2043b02018-05-23 04:51:54 +00001155 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1156 (__v16si)_mm512_max_epu32(__A, __B),
1157 (__v16si)__W);
Michael Zuckermande860e52016-05-10 11:34:19 +00001158}
1159
1160static __inline__ __m512i __DEFAULT_FN_ATTRS
1161_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1162{
Craig Topperf2043b02018-05-23 04:51:54 +00001163 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1164 (__v16si)_mm512_max_epu32(__A, __B),
1165 (__v16si)_mm512_setzero_si512());
Michael Zuckermande860e52016-05-10 11:34:19 +00001166}
1167
Michael Kupersteine45af542015-06-30 13:36:19 +00001168static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001169_mm512_max_epi64(__m512i __A, __m512i __B)
1170{
Craig Topperf2043b02018-05-23 04:51:54 +00001171 return (__m512i)__builtin_ia32_pmaxsq512((__v8di)__A, (__v8di)__B);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001172}
1173
Michael Zuckermande860e52016-05-10 11:34:19 +00001174static __inline__ __m512i __DEFAULT_FN_ATTRS
1175_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1176{
Craig Topperf2043b02018-05-23 04:51:54 +00001177 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1178 (__v8di)_mm512_max_epi64(__A, __B),
1179 (__v8di)__W);
Michael Zuckermande860e52016-05-10 11:34:19 +00001180}
1181
1182static __inline__ __m512i __DEFAULT_FN_ATTRS
1183_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1184{
Craig Topperf2043b02018-05-23 04:51:54 +00001185 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1186 (__v8di)_mm512_max_epi64(__A, __B),
1187 (__v8di)_mm512_setzero_si512());
Michael Zuckermande860e52016-05-10 11:34:19 +00001188}
1189
Michael Kupersteine45af542015-06-30 13:36:19 +00001190static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001191_mm512_max_epu64(__m512i __A, __m512i __B)
1192{
Craig Topperf2043b02018-05-23 04:51:54 +00001193 return (__m512i)__builtin_ia32_pmaxuq512((__v8di)__A, (__v8di)__B);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001194}
1195
Michael Zuckermande860e52016-05-10 11:34:19 +00001196static __inline__ __m512i __DEFAULT_FN_ATTRS
1197_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1198{
Craig Topperf2043b02018-05-23 04:51:54 +00001199 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1200 (__v8di)_mm512_max_epu64(__A, __B),
1201 (__v8di)__W);
Michael Zuckermande860e52016-05-10 11:34:19 +00001202}
1203
1204static __inline__ __m512i __DEFAULT_FN_ATTRS
1205_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1206{
Craig Topperf2043b02018-05-23 04:51:54 +00001207 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1208 (__v8di)_mm512_max_epu64(__A, __B),
1209 (__v8di)_mm512_setzero_si512());
Michael Zuckermande860e52016-05-10 11:34:19 +00001210}
1211
Craig Topperc6338672018-05-31 00:51:20 +00001212#define _mm512_mask_min_round_pd(W, U, A, B, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00001213 (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1214 (__v8df)(__m512d)(B), \
1215 (__v8df)(__m512d)(W), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00001216 (int)(R))
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001217
Craig Topperc6338672018-05-31 00:51:20 +00001218#define _mm512_maskz_min_round_pd(U, A, B, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00001219 (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1220 (__v8df)(__m512d)(B), \
1221 (__v8df)_mm512_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00001222 (__mmask8)(U), (int)(R))
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001223
Craig Topperc6338672018-05-31 00:51:20 +00001224#define _mm512_min_round_pd(A, B, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00001225 (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1226 (__v8df)(__m512d)(B), \
1227 (__v8df)_mm512_undefined_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00001228 (__mmask8)-1, (int)(R))
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001229
Michael Kupersteine45af542015-06-30 13:36:19 +00001230static __inline__ __m512d __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001231_mm512_min_pd(__m512d __A, __m512d __B)
1232{
1233 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1234 (__v8df) __B,
1235 (__v8df)
1236 _mm512_setzero_pd (),
1237 (__mmask8) -1,
1238 _MM_FROUND_CUR_DIRECTION);
1239}
1240
Michael Zuckermanf9be3bb2016-05-09 12:38:49 +00001241static __inline__ __m512d __DEFAULT_FN_ATTRS
1242_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
1243{
1244 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1245 (__v8df) __B,
1246 (__v8df) __W,
1247 (__mmask8) __U,
1248 _MM_FROUND_CUR_DIRECTION);
1249}
1250
Craig Topperc6338672018-05-31 00:51:20 +00001251#define _mm512_mask_min_round_ps(W, U, A, B, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00001252 (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1253 (__v16sf)(__m512)(B), \
1254 (__v16sf)(__m512)(W), (__mmask16)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00001255 (int)(R))
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001256
Craig Topperc6338672018-05-31 00:51:20 +00001257#define _mm512_maskz_min_round_ps(U, A, B, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00001258 (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1259 (__v16sf)(__m512)(B), \
1260 (__v16sf)_mm512_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00001261 (__mmask16)(U), (int)(R))
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001262
Craig Topperc6338672018-05-31 00:51:20 +00001263#define _mm512_min_round_ps(A, B, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00001264 (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1265 (__v16sf)(__m512)(B), \
1266 (__v16sf)_mm512_undefined_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00001267 (__mmask16)-1, (int)(R))
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001268
Michael Zuckermanf9be3bb2016-05-09 12:38:49 +00001269static __inline__ __m512d __DEFAULT_FN_ATTRS
1270_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
1271{
1272 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1273 (__v8df) __B,
1274 (__v8df)
1275 _mm512_setzero_pd (),
1276 (__mmask8) __U,
1277 _MM_FROUND_CUR_DIRECTION);
1278}
1279
Michael Kupersteine45af542015-06-30 13:36:19 +00001280static __inline__ __m512 __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001281_mm512_min_ps(__m512 __A, __m512 __B)
1282{
1283 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1284 (__v16sf) __B,
1285 (__v16sf)
1286 _mm512_setzero_ps (),
1287 (__mmask16) -1,
1288 _MM_FROUND_CUR_DIRECTION);
1289}
1290
Michael Zuckermanf9be3bb2016-05-09 12:38:49 +00001291static __inline__ __m512 __DEFAULT_FN_ATTRS
1292_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1293{
1294 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1295 (__v16sf) __B,
1296 (__v16sf) __W,
1297 (__mmask16) __U,
1298 _MM_FROUND_CUR_DIRECTION);
1299}
1300
1301static __inline__ __m512 __DEFAULT_FN_ATTRS
1302_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
1303{
1304 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1305 (__v16sf) __B,
1306 (__v16sf)
1307 _mm512_setzero_ps (),
1308 (__mmask16) __U,
1309 _MM_FROUND_CUR_DIRECTION);
1310}
1311
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001312static __inline__ __m128 __DEFAULT_FN_ATTRS
1313_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001314 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001315 (__v4sf) __B,
1316 (__v4sf) __W,
1317 (__mmask8) __U,
1318 _MM_FROUND_CUR_DIRECTION);
1319}
1320
1321static __inline__ __m128 __DEFAULT_FN_ATTRS
1322_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001323 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001324 (__v4sf) __B,
1325 (__v4sf) _mm_setzero_ps (),
1326 (__mmask8) __U,
1327 _MM_FROUND_CUR_DIRECTION);
1328}
1329
Craig Topperc6338672018-05-31 00:51:20 +00001330#define _mm_min_round_ss(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001331 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1332 (__v4sf)(__m128)(B), \
1333 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00001334 (__mmask8)-1, (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001335
Craig Topperc6338672018-05-31 00:51:20 +00001336#define _mm_mask_min_round_ss(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001337 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1338 (__v4sf)(__m128)(B), \
1339 (__v4sf)(__m128)(W), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00001340 (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001341
Craig Topperc6338672018-05-31 00:51:20 +00001342#define _mm_maskz_min_round_ss(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001343 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1344 (__v4sf)(__m128)(B), \
1345 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00001346 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001347
1348static __inline__ __m128d __DEFAULT_FN_ATTRS
1349_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001350 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001351 (__v2df) __B,
1352 (__v2df) __W,
1353 (__mmask8) __U,
1354 _MM_FROUND_CUR_DIRECTION);
1355}
1356
1357static __inline__ __m128d __DEFAULT_FN_ATTRS
1358_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001359 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001360 (__v2df) __B,
1361 (__v2df) _mm_setzero_pd (),
1362 (__mmask8) __U,
1363 _MM_FROUND_CUR_DIRECTION);
1364}
1365
Craig Topperc6338672018-05-31 00:51:20 +00001366#define _mm_min_round_sd(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001367 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1368 (__v2df)(__m128d)(B), \
1369 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00001370 (__mmask8)-1, (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001371
Craig Topperc6338672018-05-31 00:51:20 +00001372#define _mm_mask_min_round_sd(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001373 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1374 (__v2df)(__m128d)(B), \
1375 (__v2df)(__m128d)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00001376 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001377
Craig Topperc6338672018-05-31 00:51:20 +00001378#define _mm_maskz_min_round_sd(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001379 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1380 (__v2df)(__m128d)(B), \
1381 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00001382 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001383
Adam Nemet0d5bb552014-07-28 17:14:40 +00001384static __inline __m512i
Michael Kupersteine45af542015-06-30 13:36:19 +00001385__DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001386_mm512_min_epi32(__m512i __A, __m512i __B)
1387{
Craig Topperf2043b02018-05-23 04:51:54 +00001388 return (__m512i)__builtin_ia32_pminsd512((__v16si)__A, (__v16si)__B);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001389}
1390
Michael Zuckermande860e52016-05-10 11:34:19 +00001391static __inline__ __m512i __DEFAULT_FN_ATTRS
1392_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1393{
Craig Topperf2043b02018-05-23 04:51:54 +00001394 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1395 (__v16si)_mm512_min_epi32(__A, __B),
1396 (__v16si)__W);
Michael Zuckermande860e52016-05-10 11:34:19 +00001397}
1398
1399static __inline__ __m512i __DEFAULT_FN_ATTRS
1400_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1401{
Craig Topperf2043b02018-05-23 04:51:54 +00001402 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1403 (__v16si)_mm512_min_epi32(__A, __B),
1404 (__v16si)_mm512_setzero_si512());
Michael Zuckermande860e52016-05-10 11:34:19 +00001405}
1406
Michael Kupersteine45af542015-06-30 13:36:19 +00001407static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001408_mm512_min_epu32(__m512i __A, __m512i __B)
1409{
Craig Topperf2043b02018-05-23 04:51:54 +00001410 return (__m512i)__builtin_ia32_pminud512((__v16si)__A, (__v16si)__B);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001411}
1412
Michael Zuckermande860e52016-05-10 11:34:19 +00001413static __inline__ __m512i __DEFAULT_FN_ATTRS
1414_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1415{
Craig Topperf2043b02018-05-23 04:51:54 +00001416 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1417 (__v16si)_mm512_min_epu32(__A, __B),
1418 (__v16si)__W);
Michael Zuckermande860e52016-05-10 11:34:19 +00001419}
1420
1421static __inline__ __m512i __DEFAULT_FN_ATTRS
1422_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1423{
Craig Topperf2043b02018-05-23 04:51:54 +00001424 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1425 (__v16si)_mm512_min_epu32(__A, __B),
1426 (__v16si)_mm512_setzero_si512());
Michael Zuckermande860e52016-05-10 11:34:19 +00001427}
1428
Michael Kupersteine45af542015-06-30 13:36:19 +00001429static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001430_mm512_min_epi64(__m512i __A, __m512i __B)
1431{
Craig Topperf2043b02018-05-23 04:51:54 +00001432 return (__m512i)__builtin_ia32_pminsq512((__v8di)__A, (__v8di)__B);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001433}
1434
Michael Zuckermande860e52016-05-10 11:34:19 +00001435static __inline__ __m512i __DEFAULT_FN_ATTRS
1436_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1437{
Craig Topperf2043b02018-05-23 04:51:54 +00001438 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1439 (__v8di)_mm512_min_epi64(__A, __B),
1440 (__v8di)__W);
Michael Zuckermande860e52016-05-10 11:34:19 +00001441}
1442
1443static __inline__ __m512i __DEFAULT_FN_ATTRS
1444_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1445{
Craig Topperf2043b02018-05-23 04:51:54 +00001446 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1447 (__v8di)_mm512_min_epi64(__A, __B),
1448 (__v8di)_mm512_setzero_si512());
Michael Zuckermande860e52016-05-10 11:34:19 +00001449}
1450
Michael Kupersteine45af542015-06-30 13:36:19 +00001451static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001452_mm512_min_epu64(__m512i __A, __m512i __B)
1453{
Craig Topperf2043b02018-05-23 04:51:54 +00001454 return (__m512i)__builtin_ia32_pminuq512((__v8di)__A, (__v8di)__B);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001455}
1456
Michael Zuckermande860e52016-05-10 11:34:19 +00001457static __inline__ __m512i __DEFAULT_FN_ATTRS
1458_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1459{
Craig Topperf2043b02018-05-23 04:51:54 +00001460 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1461 (__v8di)_mm512_min_epu64(__A, __B),
1462 (__v8di)__W);
Michael Zuckermande860e52016-05-10 11:34:19 +00001463}
1464
1465static __inline__ __m512i __DEFAULT_FN_ATTRS
1466_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1467{
Craig Topperf2043b02018-05-23 04:51:54 +00001468 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1469 (__v8di)_mm512_min_epu64(__A, __B),
1470 (__v8di)_mm512_setzero_si512());
Michael Zuckermande860e52016-05-10 11:34:19 +00001471}
1472
Michael Kupersteine45af542015-06-30 13:36:19 +00001473static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001474_mm512_mul_epi32(__m512i __X, __m512i __Y)
1475{
Craig Topper70536f42016-12-27 04:04:57 +00001476 return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001477}
1478
Michael Kupersteine45af542015-06-30 13:36:19 +00001479static __inline __m512i __DEFAULT_FN_ATTRS
Craig Topper70536f42016-12-27 04:04:57 +00001480_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001481{
Craig Topper70536f42016-12-27 04:04:57 +00001482 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1483 (__v8di)_mm512_mul_epi32(__X, __Y),
1484 (__v8di)__W);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001485}
1486
Michael Kupersteine45af542015-06-30 13:36:19 +00001487static __inline __m512i __DEFAULT_FN_ATTRS
Craig Topper70536f42016-12-27 04:04:57 +00001488_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001489{
Craig Topper70536f42016-12-27 04:04:57 +00001490 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1491 (__v8di)_mm512_mul_epi32(__X, __Y),
1492 (__v8di)_mm512_setzero_si512 ());
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001493}
1494
Michael Kupersteine45af542015-06-30 13:36:19 +00001495static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001496_mm512_mul_epu32(__m512i __X, __m512i __Y)
1497{
Craig Topper70536f42016-12-27 04:04:57 +00001498 return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001499}
1500
Michael Kupersteine45af542015-06-30 13:36:19 +00001501static __inline __m512i __DEFAULT_FN_ATTRS
Craig Topper70536f42016-12-27 04:04:57 +00001502_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001503{
Craig Topper70536f42016-12-27 04:04:57 +00001504 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1505 (__v8di)_mm512_mul_epu32(__X, __Y),
1506 (__v8di)__W);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001507}
1508
Michael Kupersteine45af542015-06-30 13:36:19 +00001509static __inline __m512i __DEFAULT_FN_ATTRS
Craig Topper70536f42016-12-27 04:04:57 +00001510_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001511{
Craig Topper70536f42016-12-27 04:04:57 +00001512 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1513 (__v8di)_mm512_mul_epu32(__X, __Y),
1514 (__v8di)_mm512_setzero_si512 ());
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001515}
1516
Michael Kupersteine45af542015-06-30 13:36:19 +00001517static __inline __m512i __DEFAULT_FN_ATTRS
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001518_mm512_mullo_epi32 (__m512i __A, __m512i __B)
1519{
Craig Topper6a77b622016-06-04 05:43:41 +00001520 return (__m512i) ((__v16su) __A * (__v16su) __B);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001521}
1522
Michael Kupersteine45af542015-06-30 13:36:19 +00001523static __inline __m512i __DEFAULT_FN_ATTRS
Craig Topperf43e4a12016-09-03 19:19:49 +00001524_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001525{
Craig Topperf43e4a12016-09-03 19:19:49 +00001526 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1527 (__v16si)_mm512_mullo_epi32(__A, __B),
1528 (__v16si)_mm512_setzero_si512());
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001529}
1530
Michael Kupersteine45af542015-06-30 13:36:19 +00001531static __inline __m512i __DEFAULT_FN_ATTRS
Craig Topperf43e4a12016-09-03 19:19:49 +00001532_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001533{
Craig Topperf43e4a12016-09-03 19:19:49 +00001534 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1535 (__v16si)_mm512_mullo_epi32(__A, __B),
1536 (__v16si)__W);
Elena Demikhovsky35dc8c02015-04-28 13:28:01 +00001537}
1538
Craig Toppere95bde32018-04-26 05:38:39 +00001539static __inline__ __m512i __DEFAULT_FN_ATTRS
1540_mm512_mullox_epi64 (__m512i __A, __m512i __B) {
1541 return (__m512i) ((__v8du) __A * (__v8du) __B);
1542}
1543
1544static __inline__ __m512i __DEFAULT_FN_ATTRS
1545_mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
1546 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1547 (__v8di)_mm512_mullox_epi64(__A, __B),
1548 (__v8di)__W);
1549}
1550
Craig Topperc6338672018-05-31 00:51:20 +00001551#define _mm512_mask_sqrt_round_pd(W, U, A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00001552 (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1553 (__v8df)(__m512d)(W), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00001554 (int)(R))
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001555
Craig Topperc6338672018-05-31 00:51:20 +00001556#define _mm512_maskz_sqrt_round_pd(U, A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00001557 (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1558 (__v8df)_mm512_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00001559 (__mmask8)(U), (int)(R))
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001560
Craig Topperc6338672018-05-31 00:51:20 +00001561#define _mm512_sqrt_round_pd(A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00001562 (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1563 (__v8df)_mm512_undefined_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00001564 (__mmask8)-1, (int)(R))
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001565
Michael Kupersteine45af542015-06-30 13:36:19 +00001566static __inline__ __m512d __DEFAULT_FN_ATTRS
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00001567_mm512_sqrt_pd(__m512d __a)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001568{
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00001569 return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001570 (__v8df) _mm512_setzero_pd (),
1571 (__mmask8) -1,
1572 _MM_FROUND_CUR_DIRECTION);
1573}
1574
Michael Zuckermancb856772016-05-16 11:42:01 +00001575static __inline__ __m512d __DEFAULT_FN_ATTRS
1576_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
1577{
1578 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1579 (__v8df) __W,
1580 (__mmask8) __U,
1581 _MM_FROUND_CUR_DIRECTION);
1582}
1583
1584static __inline__ __m512d __DEFAULT_FN_ATTRS
1585_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
1586{
1587 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1588 (__v8df)
1589 _mm512_setzero_pd (),
1590 (__mmask8) __U,
1591 _MM_FROUND_CUR_DIRECTION);
1592}
1593
Craig Topperc6338672018-05-31 00:51:20 +00001594#define _mm512_mask_sqrt_round_ps(W, U, A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00001595 (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1596 (__v16sf)(__m512)(W), (__mmask16)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00001597 (int)(R))
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001598
Craig Topperc6338672018-05-31 00:51:20 +00001599#define _mm512_maskz_sqrt_round_ps(U, A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00001600 (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1601 (__v16sf)_mm512_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00001602 (__mmask16)(U), (int)(R))
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001603
Craig Topperc6338672018-05-31 00:51:20 +00001604#define _mm512_sqrt_round_ps(A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00001605 (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1606 (__v16sf)_mm512_undefined_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00001607 (__mmask16)-1, (int)(R))
Michael Zuckermane6aa66a2016-06-01 08:34:03 +00001608
Michael Kupersteine45af542015-06-30 13:36:19 +00001609static __inline__ __m512 __DEFAULT_FN_ATTRS
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00001610_mm512_sqrt_ps(__m512 __a)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001611{
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00001612 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001613 (__v16sf) _mm512_setzero_ps (),
1614 (__mmask16) -1,
1615 _MM_FROUND_CUR_DIRECTION);
1616}
1617
Michael Zuckermancb856772016-05-16 11:42:01 +00001618static __inline__ __m512 __DEFAULT_FN_ATTRS
Asaf Badouh880f0c22016-06-13 15:15:57 +00001619_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
Michael Zuckermancb856772016-05-16 11:42:01 +00001620{
1621 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
1622 (__v16sf) __W,
1623 (__mmask16) __U,
1624 _MM_FROUND_CUR_DIRECTION);
1625}
1626
1627static __inline__ __m512 __DEFAULT_FN_ATTRS
Asaf Badouh880f0c22016-06-13 15:15:57 +00001628_mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
Michael Zuckermancb856772016-05-16 11:42:01 +00001629{
1630 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
1631 (__v16sf) _mm512_setzero_ps (),
1632 (__mmask16) __U,
1633 _MM_FROUND_CUR_DIRECTION);
1634}
1635
Michael Kupersteine45af542015-06-30 13:36:19 +00001636static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001637_mm512_rsqrt14_pd(__m512d __A)
1638{
1639 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1640 (__v8df)
1641 _mm512_setzero_pd (),
1642 (__mmask8) -1);}
1643
Michael Zuckermancb856772016-05-16 11:42:01 +00001644static __inline__ __m512d __DEFAULT_FN_ATTRS
1645_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1646{
1647 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1648 (__v8df) __W,
1649 (__mmask8) __U);
1650}
1651
1652static __inline__ __m512d __DEFAULT_FN_ATTRS
1653_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1654{
1655 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1656 (__v8df)
1657 _mm512_setzero_pd (),
1658 (__mmask8) __U);
1659}
1660
Michael Kupersteine45af542015-06-30 13:36:19 +00001661static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001662_mm512_rsqrt14_ps(__m512 __A)
1663{
1664 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1665 (__v16sf)
1666 _mm512_setzero_ps (),
1667 (__mmask16) -1);
1668}
1669
Michael Zuckermancb856772016-05-16 11:42:01 +00001670static __inline__ __m512 __DEFAULT_FN_ATTRS
1671_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1672{
1673 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1674 (__v16sf) __W,
1675 (__mmask16) __U);
1676}
1677
1678static __inline__ __m512 __DEFAULT_FN_ATTRS
1679_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1680{
1681 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1682 (__v16sf)
1683 _mm512_setzero_ps (),
1684 (__mmask16) __U);
1685}
1686
Michael Kupersteine45af542015-06-30 13:36:19 +00001687static __inline__ __m128 __DEFAULT_FN_ATTRS
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001688_mm_rsqrt14_ss(__m128 __A, __m128 __B)
1689{
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001690 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001691 (__v4sf) __B,
1692 (__v4sf)
1693 _mm_setzero_ps (),
1694 (__mmask8) -1);
1695}
1696
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001697static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00001698_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001699{
1700 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1701 (__v4sf) __B,
1702 (__v4sf) __W,
1703 (__mmask8) __U);
1704}
1705
1706static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00001707_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001708{
1709 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1710 (__v4sf) __B,
1711 (__v4sf) _mm_setzero_ps (),
1712 (__mmask8) __U);
1713}
1714
Michael Kupersteine45af542015-06-30 13:36:19 +00001715static __inline__ __m128d __DEFAULT_FN_ATTRS
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001716_mm_rsqrt14_sd(__m128d __A, __m128d __B)
1717{
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001718 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001719 (__v2df) __B,
1720 (__v2df)
1721 _mm_setzero_pd (),
1722 (__mmask8) -1);
1723}
1724
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001725static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00001726_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001727{
1728 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1729 (__v2df) __B,
1730 (__v2df) __W,
1731 (__mmask8) __U);
1732}
1733
1734static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00001735_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001736{
1737 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1738 (__v2df) __B,
1739 (__v2df) _mm_setzero_pd (),
1740 (__mmask8) __U);
1741}
1742
Michael Kupersteine45af542015-06-30 13:36:19 +00001743static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001744_mm512_rcp14_pd(__m512d __A)
1745{
1746 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1747 (__v8df)
1748 _mm512_setzero_pd (),
1749 (__mmask8) -1);
1750}
1751
Michael Zuckermancb856772016-05-16 11:42:01 +00001752static __inline__ __m512d __DEFAULT_FN_ATTRS
1753_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1754{
1755 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1756 (__v8df) __W,
1757 (__mmask8) __U);
1758}
1759
1760static __inline__ __m512d __DEFAULT_FN_ATTRS
1761_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1762{
1763 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1764 (__v8df)
1765 _mm512_setzero_pd (),
1766 (__mmask8) __U);
1767}
1768
Michael Kupersteine45af542015-06-30 13:36:19 +00001769static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001770_mm512_rcp14_ps(__m512 __A)
1771{
1772 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1773 (__v16sf)
1774 _mm512_setzero_ps (),
1775 (__mmask16) -1);
1776}
Michael Zuckermancb856772016-05-16 11:42:01 +00001777
1778static __inline__ __m512 __DEFAULT_FN_ATTRS
1779_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1780{
1781 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1782 (__v16sf) __W,
1783 (__mmask16) __U);
1784}
1785
1786static __inline__ __m512 __DEFAULT_FN_ATTRS
1787_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1788{
1789 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1790 (__v16sf)
1791 _mm512_setzero_ps (),
1792 (__mmask16) __U);
1793}
1794
Michael Kupersteine45af542015-06-30 13:36:19 +00001795static __inline__ __m128 __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00001796_mm_rcp14_ss(__m128 __A, __m128 __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001797{
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001798 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001799 (__v4sf) __B,
1800 (__v4sf)
1801 _mm_setzero_ps (),
1802 (__mmask8) -1);
1803}
1804
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001805static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00001806_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001807{
1808 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1809 (__v4sf) __B,
1810 (__v4sf) __W,
1811 (__mmask8) __U);
1812}
1813
1814static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00001815_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001816{
1817 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1818 (__v4sf) __B,
1819 (__v4sf) _mm_setzero_ps (),
1820 (__mmask8) __U);
1821}
1822
Michael Kupersteine45af542015-06-30 13:36:19 +00001823static __inline__ __m128d __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00001824_mm_rcp14_sd(__m128d __A, __m128d __B)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001825{
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001826 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001827 (__v2df) __B,
1828 (__v2df)
1829 _mm_setzero_pd (),
1830 (__mmask8) -1);
1831}
1832
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001833static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00001834_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001835{
1836 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1837 (__v2df) __B,
1838 (__v2df) __W,
1839 (__mmask8) __U);
1840}
1841
1842static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00001843_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00001844{
1845 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1846 (__v2df) __B,
1847 (__v2df) _mm_setzero_pd (),
1848 (__mmask8) __U);
1849}
1850
Michael Kupersteine45af542015-06-30 13:36:19 +00001851static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001852_mm512_floor_ps(__m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001853{
Adam Nemet0d5bb552014-07-28 17:14:40 +00001854 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1855 _MM_FROUND_FLOOR,
1856 (__v16sf) __A, -1,
1857 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001858}
1859
Michael Zuckerman7360d8a2016-05-10 07:30:58 +00001860static __inline__ __m512 __DEFAULT_FN_ATTRS
1861_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1862{
1863 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1864 _MM_FROUND_FLOOR,
1865 (__v16sf) __W, __U,
1866 _MM_FROUND_CUR_DIRECTION);
1867}
1868
Michael Kupersteine45af542015-06-30 13:36:19 +00001869static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001870_mm512_floor_pd(__m512d __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001871{
Adam Nemet0d5bb552014-07-28 17:14:40 +00001872 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1873 _MM_FROUND_FLOOR,
1874 (__v8df) __A, -1,
1875 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001876}
1877
Michael Zuckerman7360d8a2016-05-10 07:30:58 +00001878static __inline__ __m512d __DEFAULT_FN_ATTRS
1879_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1880{
1881 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1882 _MM_FROUND_FLOOR,
1883 (__v8df) __W, __U,
1884 _MM_FROUND_CUR_DIRECTION);
1885}
1886
Michael Zuckerman7360d8a2016-05-10 07:30:58 +00001887static __inline__ __m512 __DEFAULT_FN_ATTRS
1888_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1889{
1890 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1891 _MM_FROUND_CEIL,
1892 (__v16sf) __W, __U,
1893 _MM_FROUND_CUR_DIRECTION);
1894}
1895
Michael Kupersteine45af542015-06-30 13:36:19 +00001896static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001897_mm512_ceil_ps(__m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001898{
Adam Nemet0d5bb552014-07-28 17:14:40 +00001899 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1900 _MM_FROUND_CEIL,
1901 (__v16sf) __A, -1,
1902 _MM_FROUND_CUR_DIRECTION);
1903}
1904
Michael Kupersteine45af542015-06-30 13:36:19 +00001905static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001906_mm512_ceil_pd(__m512d __A)
1907{
1908 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1909 _MM_FROUND_CEIL,
1910 (__v8df) __A, -1,
1911 _MM_FROUND_CUR_DIRECTION);
1912}
1913
Michael Zuckerman7360d8a2016-05-10 07:30:58 +00001914static __inline__ __m512d __DEFAULT_FN_ATTRS
1915_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1916{
1917 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1918 _MM_FROUND_CEIL,
1919 (__v8df) __W, __U,
1920 _MM_FROUND_CUR_DIRECTION);
1921}
1922
Michael Kupersteine45af542015-06-30 13:36:19 +00001923static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001924_mm512_abs_epi64(__m512i __A)
1925{
Craig Topperf2043b02018-05-23 04:51:54 +00001926 return (__m512i)__builtin_ia32_pabsq512((__v8di)__A);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001927}
1928
Michael Zuckermanbf05a452016-05-16 18:57:24 +00001929static __inline__ __m512i __DEFAULT_FN_ATTRS
1930_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
1931{
Craig Topperf2043b02018-05-23 04:51:54 +00001932 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1933 (__v8di)_mm512_abs_epi64(__A),
1934 (__v8di)__W);
Michael Zuckermanbf05a452016-05-16 18:57:24 +00001935}
1936
1937static __inline__ __m512i __DEFAULT_FN_ATTRS
1938_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
1939{
Craig Topperf2043b02018-05-23 04:51:54 +00001940 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1941 (__v8di)_mm512_abs_epi64(__A),
1942 (__v8di)_mm512_setzero_si512());
Michael Zuckermanbf05a452016-05-16 18:57:24 +00001943}
1944
Michael Kupersteine45af542015-06-30 13:36:19 +00001945static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00001946_mm512_abs_epi32(__m512i __A)
1947{
Craig Topperf2043b02018-05-23 04:51:54 +00001948 return (__m512i)__builtin_ia32_pabsd512((__v16si) __A);
Adam Nemet0d5bb552014-07-28 17:14:40 +00001949}
1950
Michael Zuckermanbf05a452016-05-16 18:57:24 +00001951static __inline__ __m512i __DEFAULT_FN_ATTRS
1952_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
1953{
Craig Topper26df8c42018-05-24 17:32:49 +00001954 return (__m512i)__builtin_ia32_selectd_512(__U,
Craig Topperf2043b02018-05-23 04:51:54 +00001955 (__v16si)_mm512_abs_epi32(__A),
1956 (__v16si)__W);
Michael Zuckermanbf05a452016-05-16 18:57:24 +00001957}
1958
1959static __inline__ __m512i __DEFAULT_FN_ATTRS
1960_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
1961{
Craig Topper26df8c42018-05-24 17:32:49 +00001962 return (__m512i)__builtin_ia32_selectd_512(__U,
Craig Topperf2043b02018-05-23 04:51:54 +00001963 (__v16si)_mm512_abs_epi32(__A),
1964 (__v16si)_mm512_setzero_si512());
Michael Zuckermanbf05a452016-05-16 18:57:24 +00001965}
1966
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001967static __inline__ __m128 __DEFAULT_FN_ATTRS
1968_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001969 return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001970 (__v4sf) __B,
1971 (__v4sf) __W,
1972 (__mmask8) __U,
1973 _MM_FROUND_CUR_DIRECTION);
1974}
1975
1976static __inline__ __m128 __DEFAULT_FN_ATTRS
1977_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00001978 return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001979 (__v4sf) __B,
1980 (__v4sf) _mm_setzero_ps (),
1981 (__mmask8) __U,
1982 _MM_FROUND_CUR_DIRECTION);
1983}
1984
Craig Topperc6338672018-05-31 00:51:20 +00001985#define _mm_add_round_ss(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001986 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1987 (__v4sf)(__m128)(B), \
1988 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00001989 (__mmask8)-1, (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001990
Craig Topperc6338672018-05-31 00:51:20 +00001991#define _mm_mask_add_round_ss(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001992 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1993 (__v4sf)(__m128)(B), \
1994 (__v4sf)(__m128)(W), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00001995 (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00001996
Craig Topperc6338672018-05-31 00:51:20 +00001997#define _mm_maskz_add_round_ss(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00001998 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1999 (__v4sf)(__m128)(B), \
2000 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00002001 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002002
2003static __inline__ __m128d __DEFAULT_FN_ATTRS
2004_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002005 return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002006 (__v2df) __B,
2007 (__v2df) __W,
2008 (__mmask8) __U,
2009 _MM_FROUND_CUR_DIRECTION);
2010}
2011
2012static __inline__ __m128d __DEFAULT_FN_ATTRS
2013_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002014 return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002015 (__v2df) __B,
2016 (__v2df) _mm_setzero_pd (),
2017 (__mmask8) __U,
2018 _MM_FROUND_CUR_DIRECTION);
2019}
Craig Topperc6338672018-05-31 00:51:20 +00002020#define _mm_add_round_sd(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002021 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
2022 (__v2df)(__m128d)(B), \
2023 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00002024 (__mmask8)-1, (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002025
Craig Topperc6338672018-05-31 00:51:20 +00002026#define _mm_mask_add_round_sd(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002027 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
2028 (__v2df)(__m128d)(B), \
2029 (__v2df)(__m128d)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00002030 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002031
Craig Topperc6338672018-05-31 00:51:20 +00002032#define _mm_maskz_add_round_sd(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002033 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
2034 (__v2df)(__m128d)(B), \
2035 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00002036 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002037
Asaf Badouhffeb6242015-07-21 15:27:28 +00002038static __inline__ __m512d __DEFAULT_FN_ATTRS
2039_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002040 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2041 (__v8df)_mm512_add_pd(__A, __B),
2042 (__v8df)__W);
Asaf Badouhffeb6242015-07-21 15:27:28 +00002043}
2044
2045static __inline__ __m512d __DEFAULT_FN_ATTRS
2046_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002047 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2048 (__v8df)_mm512_add_pd(__A, __B),
2049 (__v8df)_mm512_setzero_pd());
Asaf Badouhffeb6242015-07-21 15:27:28 +00002050}
2051
2052static __inline__ __m512 __DEFAULT_FN_ATTRS
2053_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002054 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2055 (__v16sf)_mm512_add_ps(__A, __B),
2056 (__v16sf)__W);
Asaf Badouhffeb6242015-07-21 15:27:28 +00002057}
2058
2059static __inline__ __m512 __DEFAULT_FN_ATTRS
2060_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002061 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2062 (__v16sf)_mm512_add_ps(__A, __B),
2063 (__v16sf)_mm512_setzero_ps());
Asaf Badouhffeb6242015-07-21 15:27:28 +00002064}
2065
Craig Topperc6338672018-05-31 00:51:20 +00002066#define _mm512_add_round_pd(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002067 (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2068 (__v8df)(__m512d)(B), \
2069 (__v8df)_mm512_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00002070 (__mmask8)-1, (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002071
Craig Topperc6338672018-05-31 00:51:20 +00002072#define _mm512_mask_add_round_pd(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002073 (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2074 (__v8df)(__m512d)(B), \
2075 (__v8df)(__m512d)(W), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00002076 (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002077
Craig Topperc6338672018-05-31 00:51:20 +00002078#define _mm512_maskz_add_round_pd(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002079 (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2080 (__v8df)(__m512d)(B), \
2081 (__v8df)_mm512_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00002082 (__mmask8)(U), (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002083
Craig Topperc6338672018-05-31 00:51:20 +00002084#define _mm512_add_round_ps(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002085 (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2086 (__v16sf)(__m512)(B), \
2087 (__v16sf)_mm512_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00002088 (__mmask16)-1, (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002089
Craig Topperc6338672018-05-31 00:51:20 +00002090#define _mm512_mask_add_round_ps(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002091 (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2092 (__v16sf)(__m512)(B), \
2093 (__v16sf)(__m512)(W), (__mmask16)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00002094 (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002095
Craig Topperc6338672018-05-31 00:51:20 +00002096#define _mm512_maskz_add_round_ps(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002097 (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2098 (__v16sf)(__m512)(B), \
2099 (__v16sf)_mm512_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00002100 (__mmask16)(U), (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002101
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002102static __inline__ __m128 __DEFAULT_FN_ATTRS
2103_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002104 return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002105 (__v4sf) __B,
2106 (__v4sf) __W,
2107 (__mmask8) __U,
2108 _MM_FROUND_CUR_DIRECTION);
2109}
2110
2111static __inline__ __m128 __DEFAULT_FN_ATTRS
2112_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002113 return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002114 (__v4sf) __B,
2115 (__v4sf) _mm_setzero_ps (),
2116 (__mmask8) __U,
2117 _MM_FROUND_CUR_DIRECTION);
2118}
Craig Topperc6338672018-05-31 00:51:20 +00002119#define _mm_sub_round_ss(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002120 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2121 (__v4sf)(__m128)(B), \
2122 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00002123 (__mmask8)-1, (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002124
Craig Topperc6338672018-05-31 00:51:20 +00002125#define _mm_mask_sub_round_ss(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002126 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2127 (__v4sf)(__m128)(B), \
2128 (__v4sf)(__m128)(W), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00002129 (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002130
Craig Topperc6338672018-05-31 00:51:20 +00002131#define _mm_maskz_sub_round_ss(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002132 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2133 (__v4sf)(__m128)(B), \
2134 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00002135 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002136
2137static __inline__ __m128d __DEFAULT_FN_ATTRS
2138_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002139 return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002140 (__v2df) __B,
2141 (__v2df) __W,
2142 (__mmask8) __U,
2143 _MM_FROUND_CUR_DIRECTION);
2144}
2145
2146static __inline__ __m128d __DEFAULT_FN_ATTRS
2147_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002148 return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002149 (__v2df) __B,
2150 (__v2df) _mm_setzero_pd (),
2151 (__mmask8) __U,
2152 _MM_FROUND_CUR_DIRECTION);
2153}
2154
Craig Topperc6338672018-05-31 00:51:20 +00002155#define _mm_sub_round_sd(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002156 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2157 (__v2df)(__m128d)(B), \
2158 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00002159 (__mmask8)-1, (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002160
Craig Topperc6338672018-05-31 00:51:20 +00002161#define _mm_mask_sub_round_sd(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002162 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2163 (__v2df)(__m128d)(B), \
2164 (__v2df)(__m128d)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00002165 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002166
Craig Topperc6338672018-05-31 00:51:20 +00002167#define _mm_maskz_sub_round_sd(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002168 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2169 (__v2df)(__m128d)(B), \
2170 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00002171 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002172
Asaf Badouhffeb6242015-07-21 15:27:28 +00002173static __inline__ __m512d __DEFAULT_FN_ATTRS
2174_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002175 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2176 (__v8df)_mm512_sub_pd(__A, __B),
2177 (__v8df)__W);
Asaf Badouhffeb6242015-07-21 15:27:28 +00002178}
2179
2180static __inline__ __m512d __DEFAULT_FN_ATTRS
2181_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002182 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2183 (__v8df)_mm512_sub_pd(__A, __B),
2184 (__v8df)_mm512_setzero_pd());
Asaf Badouhffeb6242015-07-21 15:27:28 +00002185}
2186
2187static __inline__ __m512 __DEFAULT_FN_ATTRS
2188_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002189 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2190 (__v16sf)_mm512_sub_ps(__A, __B),
2191 (__v16sf)__W);
Asaf Badouhffeb6242015-07-21 15:27:28 +00002192}
2193
2194static __inline__ __m512 __DEFAULT_FN_ATTRS
2195_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002196 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2197 (__v16sf)_mm512_sub_ps(__A, __B),
2198 (__v16sf)_mm512_setzero_ps());
Asaf Badouhffeb6242015-07-21 15:27:28 +00002199}
2200
Craig Topperc6338672018-05-31 00:51:20 +00002201#define _mm512_sub_round_pd(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002202 (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2203 (__v8df)(__m512d)(B), \
2204 (__v8df)_mm512_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00002205 (__mmask8)-1, (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002206
Craig Topperc6338672018-05-31 00:51:20 +00002207#define _mm512_mask_sub_round_pd(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002208 (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2209 (__v8df)(__m512d)(B), \
2210 (__v8df)(__m512d)(W), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00002211 (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002212
Craig Topperc6338672018-05-31 00:51:20 +00002213#define _mm512_maskz_sub_round_pd(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002214 (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2215 (__v8df)(__m512d)(B), \
2216 (__v8df)_mm512_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00002217 (__mmask8)(U), (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002218
Craig Topperc6338672018-05-31 00:51:20 +00002219#define _mm512_sub_round_ps(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002220 (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2221 (__v16sf)(__m512)(B), \
2222 (__v16sf)_mm512_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00002223 (__mmask16)-1, (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002224
Craig Topperc6338672018-05-31 00:51:20 +00002225#define _mm512_mask_sub_round_ps(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002226 (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2227 (__v16sf)(__m512)(B), \
2228 (__v16sf)(__m512)(W), (__mmask16)(U), \
Martin Storsjocad7a5f2018-06-01 09:40:50 +00002229 (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002230
Craig Topperc6338672018-05-31 00:51:20 +00002231#define _mm512_maskz_sub_round_ps(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002232 (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2233 (__v16sf)(__m512)(B), \
2234 (__v16sf)_mm512_setzero_ps(), \
Martin Storsjocad7a5f2018-06-01 09:40:50 +00002235 (__mmask16)(U), (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002236
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002237static __inline__ __m128 __DEFAULT_FN_ATTRS
2238_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002239 return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002240 (__v4sf) __B,
2241 (__v4sf) __W,
2242 (__mmask8) __U,
2243 _MM_FROUND_CUR_DIRECTION);
2244}
2245
2246static __inline__ __m128 __DEFAULT_FN_ATTRS
2247_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002248 return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002249 (__v4sf) __B,
2250 (__v4sf) _mm_setzero_ps (),
2251 (__mmask8) __U,
2252 _MM_FROUND_CUR_DIRECTION);
2253}
Craig Topperc6338672018-05-31 00:51:20 +00002254#define _mm_mul_round_ss(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002255 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2256 (__v4sf)(__m128)(B), \
2257 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00002258 (__mmask8)-1, (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002259
Craig Topperc6338672018-05-31 00:51:20 +00002260#define _mm_mask_mul_round_ss(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002261 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2262 (__v4sf)(__m128)(B), \
2263 (__v4sf)(__m128)(W), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00002264 (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002265
Craig Topperc6338672018-05-31 00:51:20 +00002266#define _mm_maskz_mul_round_ss(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002267 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2268 (__v4sf)(__m128)(B), \
2269 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00002270 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002271
2272static __inline__ __m128d __DEFAULT_FN_ATTRS
2273_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002274 return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002275 (__v2df) __B,
2276 (__v2df) __W,
2277 (__mmask8) __U,
2278 _MM_FROUND_CUR_DIRECTION);
2279}
2280
2281static __inline__ __m128d __DEFAULT_FN_ATTRS
2282_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002283 return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002284 (__v2df) __B,
2285 (__v2df) _mm_setzero_pd (),
2286 (__mmask8) __U,
2287 _MM_FROUND_CUR_DIRECTION);
2288}
2289
Craig Topperc6338672018-05-31 00:51:20 +00002290#define _mm_mul_round_sd(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002291 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2292 (__v2df)(__m128d)(B), \
2293 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00002294 (__mmask8)-1, (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002295
Craig Topperc6338672018-05-31 00:51:20 +00002296#define _mm_mask_mul_round_sd(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002297 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2298 (__v2df)(__m128d)(B), \
2299 (__v2df)(__m128d)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00002300 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002301
Craig Topperc6338672018-05-31 00:51:20 +00002302#define _mm_maskz_mul_round_sd(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002303 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2304 (__v2df)(__m128d)(B), \
2305 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00002306 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002307
Asaf Badouhffeb6242015-07-21 15:27:28 +00002308static __inline__ __m512d __DEFAULT_FN_ATTRS
2309_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002310 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2311 (__v8df)_mm512_mul_pd(__A, __B),
2312 (__v8df)__W);
Asaf Badouhffeb6242015-07-21 15:27:28 +00002313}
2314
2315static __inline__ __m512d __DEFAULT_FN_ATTRS
2316_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002317 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2318 (__v8df)_mm512_mul_pd(__A, __B),
2319 (__v8df)_mm512_setzero_pd());
Asaf Badouhffeb6242015-07-21 15:27:28 +00002320}
2321
2322static __inline__ __m512 __DEFAULT_FN_ATTRS
2323_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002324 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2325 (__v16sf)_mm512_mul_ps(__A, __B),
2326 (__v16sf)__W);
Asaf Badouhffeb6242015-07-21 15:27:28 +00002327}
2328
2329static __inline__ __m512 __DEFAULT_FN_ATTRS
2330_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002331 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2332 (__v16sf)_mm512_mul_ps(__A, __B),
2333 (__v16sf)_mm512_setzero_ps());
Asaf Badouhffeb6242015-07-21 15:27:28 +00002334}
2335
Craig Topperc6338672018-05-31 00:51:20 +00002336#define _mm512_mul_round_pd(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002337 (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2338 (__v8df)(__m512d)(B), \
2339 (__v8df)_mm512_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00002340 (__mmask8)-1, (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002341
Craig Topperc6338672018-05-31 00:51:20 +00002342#define _mm512_mask_mul_round_pd(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002343 (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2344 (__v8df)(__m512d)(B), \
2345 (__v8df)(__m512d)(W), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00002346 (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002347
Craig Topperc6338672018-05-31 00:51:20 +00002348#define _mm512_maskz_mul_round_pd(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002349 (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2350 (__v8df)(__m512d)(B), \
2351 (__v8df)_mm512_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00002352 (__mmask8)(U), (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002353
Craig Topperc6338672018-05-31 00:51:20 +00002354#define _mm512_mul_round_ps(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002355 (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2356 (__v16sf)(__m512)(B), \
2357 (__v16sf)_mm512_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00002358 (__mmask16)-1, (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002359
Craig Topperc6338672018-05-31 00:51:20 +00002360#define _mm512_mask_mul_round_ps(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002361 (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2362 (__v16sf)(__m512)(B), \
2363 (__v16sf)(__m512)(W), (__mmask16)(U), \
Martin Storsjocad7a5f2018-06-01 09:40:50 +00002364 (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002365
Craig Topperc6338672018-05-31 00:51:20 +00002366#define _mm512_maskz_mul_round_ps(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002367 (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2368 (__v16sf)(__m512)(B), \
2369 (__v16sf)_mm512_setzero_ps(), \
Martin Storsjocad7a5f2018-06-01 09:40:50 +00002370 (__mmask16)(U), (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002371
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002372static __inline__ __m128 __DEFAULT_FN_ATTRS
2373_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002374 return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002375 (__v4sf) __B,
2376 (__v4sf) __W,
2377 (__mmask8) __U,
2378 _MM_FROUND_CUR_DIRECTION);
2379}
2380
2381static __inline__ __m128 __DEFAULT_FN_ATTRS
2382_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002383 return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002384 (__v4sf) __B,
2385 (__v4sf) _mm_setzero_ps (),
2386 (__mmask8) __U,
2387 _MM_FROUND_CUR_DIRECTION);
2388}
2389
Craig Topperc6338672018-05-31 00:51:20 +00002390#define _mm_div_round_ss(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002391 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2392 (__v4sf)(__m128)(B), \
2393 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00002394 (__mmask8)-1, (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002395
Craig Topperc6338672018-05-31 00:51:20 +00002396#define _mm_mask_div_round_ss(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002397 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2398 (__v4sf)(__m128)(B), \
2399 (__v4sf)(__m128)(W), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00002400 (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002401
Craig Topperc6338672018-05-31 00:51:20 +00002402#define _mm_maskz_div_round_ss(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002403 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2404 (__v4sf)(__m128)(B), \
2405 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00002406 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002407
2408static __inline__ __m128d __DEFAULT_FN_ATTRS
2409_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002410 return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002411 (__v2df) __B,
2412 (__v2df) __W,
2413 (__mmask8) __U,
2414 _MM_FROUND_CUR_DIRECTION);
2415}
2416
2417static __inline__ __m128d __DEFAULT_FN_ATTRS
2418_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
Igor Breger9c2a0bf2016-02-08 12:36:48 +00002419 return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002420 (__v2df) __B,
2421 (__v2df) _mm_setzero_pd (),
2422 (__mmask8) __U,
2423 _MM_FROUND_CUR_DIRECTION);
2424}
2425
Craig Topperc6338672018-05-31 00:51:20 +00002426#define _mm_div_round_sd(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002427 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2428 (__v2df)(__m128d)(B), \
2429 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00002430 (__mmask8)-1, (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002431
Craig Topperc6338672018-05-31 00:51:20 +00002432#define _mm_mask_div_round_sd(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002433 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2434 (__v2df)(__m128d)(B), \
2435 (__v2df)(__m128d)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00002436 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002437
Craig Topperc6338672018-05-31 00:51:20 +00002438#define _mm_maskz_div_round_sd(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002439 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2440 (__v2df)(__m128d)(B), \
2441 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00002442 (__mmask8)(U), (int)(R))
Asaf Badouhf6a58b62015-07-23 12:13:32 +00002443
Michael Zuckerman223676d2016-06-14 12:38:58 +00002444static __inline __m512d __DEFAULT_FN_ATTRS
2445_mm512_div_pd(__m512d __a, __m512d __b)
2446{
2447 return (__m512d)((__v8df)__a/(__v8df)__b);
2448}
2449
Asaf Badouhffeb6242015-07-21 15:27:28 +00002450static __inline__ __m512d __DEFAULT_FN_ATTRS
2451_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002452 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2453 (__v8df)_mm512_div_pd(__A, __B),
2454 (__v8df)__W);
Asaf Badouhffeb6242015-07-21 15:27:28 +00002455}
2456
2457static __inline__ __m512d __DEFAULT_FN_ATTRS
2458_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002459 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2460 (__v8df)_mm512_div_pd(__A, __B),
2461 (__v8df)_mm512_setzero_pd());
Asaf Badouhffeb6242015-07-21 15:27:28 +00002462}
2463
Michael Zuckerman223676d2016-06-14 12:38:58 +00002464static __inline __m512 __DEFAULT_FN_ATTRS
2465_mm512_div_ps(__m512 __a, __m512 __b)
2466{
2467 return (__m512)((__v16sf)__a/(__v16sf)__b);
2468}
2469
Asaf Badouhffeb6242015-07-21 15:27:28 +00002470static __inline__ __m512 __DEFAULT_FN_ATTRS
2471_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002472 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2473 (__v16sf)_mm512_div_ps(__A, __B),
2474 (__v16sf)__W);
Asaf Badouhffeb6242015-07-21 15:27:28 +00002475}
2476
2477static __inline__ __m512 __DEFAULT_FN_ATTRS
2478_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
Craig Topperc4a82282016-10-02 17:43:00 +00002479 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2480 (__v16sf)_mm512_div_ps(__A, __B),
2481 (__v16sf)_mm512_setzero_ps());
Asaf Badouhffeb6242015-07-21 15:27:28 +00002482}
2483
Craig Topperc6338672018-05-31 00:51:20 +00002484#define _mm512_div_round_pd(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002485 (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2486 (__v8df)(__m512d)(B), \
2487 (__v8df)_mm512_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00002488 (__mmask8)-1, (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002489
Craig Topperc6338672018-05-31 00:51:20 +00002490#define _mm512_mask_div_round_pd(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002491 (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2492 (__v8df)(__m512d)(B), \
2493 (__v8df)(__m512d)(W), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00002494 (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002495
Craig Topperc6338672018-05-31 00:51:20 +00002496#define _mm512_maskz_div_round_pd(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002497 (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2498 (__v8df)(__m512d)(B), \
2499 (__v8df)_mm512_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00002500 (__mmask8)(U), (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002501
Craig Topperc6338672018-05-31 00:51:20 +00002502#define _mm512_div_round_ps(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002503 (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2504 (__v16sf)(__m512)(B), \
2505 (__v16sf)_mm512_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00002506 (__mmask16)-1, (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002507
Craig Topperc6338672018-05-31 00:51:20 +00002508#define _mm512_mask_div_round_ps(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002509 (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2510 (__v16sf)(__m512)(B), \
2511 (__v16sf)(__m512)(W), (__mmask16)(U), \
Martin Storsjocad7a5f2018-06-01 09:40:50 +00002512 (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002513
Craig Topperc6338672018-05-31 00:51:20 +00002514#define _mm512_maskz_div_round_ps(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00002515 (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2516 (__v16sf)(__m512)(B), \
2517 (__v16sf)_mm512_setzero_ps(), \
Martin Storsjocad7a5f2018-06-01 09:40:50 +00002518 (__mmask16)(U), (int)(R))
Asaf Badouhffeb6242015-07-21 15:27:28 +00002519
Craig Topperc6338672018-05-31 00:51:20 +00002520#define _mm512_roundscale_ps(A, B) \
Craig Topper8c18e112016-05-17 04:41:50 +00002521 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2522 (__v16sf)(__m512)(A), (__mmask16)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00002523 _MM_FROUND_CUR_DIRECTION)
Craig Topper72c7d512015-02-01 07:35:35 +00002524
Craig Topperc6338672018-05-31 00:51:20 +00002525#define _mm512_mask_roundscale_ps(A, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00002526 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2527 (__v16sf)(__m512)(A), (__mmask16)(B), \
Craig Topperc6338672018-05-31 00:51:20 +00002528 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman7360d8a2016-05-10 07:30:58 +00002529
Craig Topperc6338672018-05-31 00:51:20 +00002530#define _mm512_maskz_roundscale_ps(A, B, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00002531 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2532 (__v16sf)_mm512_setzero_ps(), \
2533 (__mmask16)(A), \
Craig Topperc6338672018-05-31 00:51:20 +00002534 _MM_FROUND_CUR_DIRECTION)
Craig Topper8c18e112016-05-17 04:41:50 +00002535
Craig Topperc6338672018-05-31 00:51:20 +00002536#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00002537 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2538 (__v16sf)(__m512)(A), (__mmask16)(B), \
Craig Topperc6338672018-05-31 00:51:20 +00002539 (int)(R))
Michael Zuckermanc301c192016-06-01 07:35:44 +00002540
Craig Topperc6338672018-05-31 00:51:20 +00002541#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00002542 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2543 (__v16sf)_mm512_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00002544 (__mmask16)(A), (int)(R))
Michael Zuckermanc301c192016-06-01 07:35:44 +00002545
Craig Topperc6338672018-05-31 00:51:20 +00002546#define _mm512_roundscale_round_ps(A, imm, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00002547 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2548 (__v16sf)_mm512_undefined_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00002549 (__mmask16)-1, (int)(R))
Michael Zuckermanc301c192016-06-01 07:35:44 +00002550
Craig Topperc6338672018-05-31 00:51:20 +00002551#define _mm512_roundscale_pd(A, B) \
Craig Topper8c18e112016-05-17 04:41:50 +00002552 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2553 (__v8df)(__m512d)(A), (__mmask8)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00002554 _MM_FROUND_CUR_DIRECTION)
Adam Nemet0d5bb552014-07-28 17:14:40 +00002555
Craig Topperc6338672018-05-31 00:51:20 +00002556#define _mm512_mask_roundscale_pd(A, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00002557 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2558 (__v8df)(__m512d)(A), (__mmask8)(B), \
Craig Topperc6338672018-05-31 00:51:20 +00002559 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman7360d8a2016-05-10 07:30:58 +00002560
Craig Topperc6338672018-05-31 00:51:20 +00002561#define _mm512_maskz_roundscale_pd(A, B, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00002562 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2563 (__v8df)_mm512_setzero_pd(), \
2564 (__mmask8)(A), \
Craig Topperc6338672018-05-31 00:51:20 +00002565 _MM_FROUND_CUR_DIRECTION)
Craig Topper8c18e112016-05-17 04:41:50 +00002566
Craig Topperc6338672018-05-31 00:51:20 +00002567#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00002568 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2569 (__v8df)(__m512d)(A), (__mmask8)(B), \
Craig Topperc6338672018-05-31 00:51:20 +00002570 (int)(R))
Michael Zuckermanc301c192016-06-01 07:35:44 +00002571
Craig Topperc6338672018-05-31 00:51:20 +00002572#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00002573 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2574 (__v8df)_mm512_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00002575 (__mmask8)(A), (int)(R))
Michael Zuckermanc301c192016-06-01 07:35:44 +00002576
Craig Topperc6338672018-05-31 00:51:20 +00002577#define _mm512_roundscale_round_pd(A, imm, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00002578 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2579 (__v8df)_mm512_undefined_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00002580 (__mmask8)-1, (int)(R))
Michael Zuckermanc301c192016-06-01 07:35:44 +00002581
Craig Topperc6338672018-05-31 00:51:20 +00002582#define _mm512_fmadd_round_pd(A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00002583 (__m512d)__builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
2584 (__v8df)(__m512d)(B), \
Craig Topperc6338672018-05-31 00:51:20 +00002585 (__v8df)(__m512d)(C), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002586
2587
Craig Topperc6338672018-05-31 00:51:20 +00002588#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00002589 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2590 __builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
2591 (__v8df)(__m512d)(B), \
2592 (__v8df)(__m512d)(C), \
2593 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00002594 (__v8df)(__m512d)(A))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002595
2596
Craig Topperc6338672018-05-31 00:51:20 +00002597#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00002598 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2599 __builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
2600 (__v8df)(__m512d)(B), \
2601 (__v8df)(__m512d)(C), \
2602 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00002603 (__v8df)(__m512d)(C))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002604
2605
Craig Topperc6338672018-05-31 00:51:20 +00002606#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00002607 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2608 __builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
2609 (__v8df)(__m512d)(B), \
2610 (__v8df)(__m512d)(C), \
2611 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00002612 (__v8df)_mm512_setzero_pd())
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002613
2614
Craig Topperc6338672018-05-31 00:51:20 +00002615#define _mm512_fmsub_round_pd(A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00002616 (__m512d)__builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
2617 (__v8df)(__m512d)(B), \
2618 -(__v8df)(__m512d)(C), \
Craig Topperc6338672018-05-31 00:51:20 +00002619 (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002620
2621
Craig Topperc6338672018-05-31 00:51:20 +00002622#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00002623 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2624 __builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
2625 (__v8df)(__m512d)(B), \
2626 -(__v8df)(__m512d)(C), \
2627 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00002628 (__v8df)(__m512d)(A))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002629
2630
Craig Topperc6338672018-05-31 00:51:20 +00002631#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00002632 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2633 __builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
2634 (__v8df)(__m512d)(B), \
2635 -(__v8df)(__m512d)(C), \
2636 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00002637 (__v8df)_mm512_setzero_pd())
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002638
2639
Craig Topperc6338672018-05-31 00:51:20 +00002640#define _mm512_fnmadd_round_pd(A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00002641 (__m512d)__builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \
2642 (__v8df)(__m512d)(B), \
Craig Topperc6338672018-05-31 00:51:20 +00002643 (__v8df)(__m512d)(C), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002644
2645
Craig Topperc6338672018-05-31 00:51:20 +00002646#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00002647 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2648 __builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \
2649 (__v8df)(__m512d)(B), \
2650 (__v8df)(__m512d)(C), \
2651 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00002652 (__v8df)(__m512d)(C))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002653
2654
Craig Topperc6338672018-05-31 00:51:20 +00002655#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00002656 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2657 __builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \
2658 (__v8df)(__m512d)(B), \
2659 (__v8df)(__m512d)(C), \
2660 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00002661 (__v8df)_mm512_setzero_pd())
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002662
2663
Craig Topperc6338672018-05-31 00:51:20 +00002664#define _mm512_fnmsub_round_pd(A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00002665 (__m512d)__builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \
2666 (__v8df)(__m512d)(B), \
2667 -(__v8df)(__m512d)(C), \
Craig Topperc6338672018-05-31 00:51:20 +00002668 (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002669
2670
Craig Topperc6338672018-05-31 00:51:20 +00002671#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00002672 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2673 __builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \
2674 (__v8df)(__m512d)(B), \
2675 -(__v8df)(__m512d)(C), \
2676 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00002677 (__v8df)_mm512_setzero_pd())
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002678
2679
Michael Kupersteine45af542015-06-30 13:36:19 +00002680static __inline__ __m512d __DEFAULT_FN_ATTRS
Adam Nemet2278fcb2014-08-14 17:17:57 +00002681_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2682{
Gabor Buella70d8d512018-05-30 15:27:49 +00002683 return (__m512d) __builtin_ia32_vfmaddpd512 ((__v8df) __A,
2684 (__v8df) __B,
2685 (__v8df) __C,
2686 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002687}
2688
Michael Kupersteine45af542015-06-30 13:36:19 +00002689static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002690_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2691{
Gabor Buella70d8d512018-05-30 15:27:49 +00002692 return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
2693 __builtin_ia32_vfmaddpd512 ((__v8df) __A,
2694 (__v8df) __B,
2695 (__v8df) __C,
2696 _MM_FROUND_CUR_DIRECTION),
2697 (__v8df) __A);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002698}
2699
Michael Kupersteine45af542015-06-30 13:36:19 +00002700static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002701_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2702{
Gabor Buella70d8d512018-05-30 15:27:49 +00002703 return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
2704 __builtin_ia32_vfmaddpd512 ((__v8df) __A,
2705 (__v8df) __B,
2706 (__v8df) __C,
2707 _MM_FROUND_CUR_DIRECTION),
2708 (__v8df) __C);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002709}
2710
Michael Kupersteine45af542015-06-30 13:36:19 +00002711static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002712_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2713{
Gabor Buella70d8d512018-05-30 15:27:49 +00002714 return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
2715 __builtin_ia32_vfmaddpd512 ((__v8df) __A,
2716 (__v8df) __B,
2717 (__v8df) __C,
2718 _MM_FROUND_CUR_DIRECTION),
2719 (__v8df) _mm512_setzero_pd());
Adam Nemet2278fcb2014-08-14 17:17:57 +00002720}
2721
Michael Kupersteine45af542015-06-30 13:36:19 +00002722static __inline__ __m512d __DEFAULT_FN_ATTRS
Adam Nemet2278fcb2014-08-14 17:17:57 +00002723_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2724{
Gabor Buella70d8d512018-05-30 15:27:49 +00002725 return (__m512d) __builtin_ia32_vfmaddpd512 ((__v8df) __A,
2726 (__v8df) __B,
2727 -(__v8df) __C,
2728 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002729}
2730
Michael Kupersteine45af542015-06-30 13:36:19 +00002731static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002732_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2733{
Gabor Buella70d8d512018-05-30 15:27:49 +00002734 return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
2735 __builtin_ia32_vfmaddpd512 ((__v8df) __A,
2736 (__v8df) __B,
2737 -(__v8df) __C,
2738 _MM_FROUND_CUR_DIRECTION),
2739 (__v8df) __A);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002740}
2741
Michael Kupersteine45af542015-06-30 13:36:19 +00002742static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002743_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2744{
Gabor Buella70d8d512018-05-30 15:27:49 +00002745 return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
2746 __builtin_ia32_vfmaddpd512 ((__v8df) __A,
2747 (__v8df) __B,
2748 -(__v8df) __C,
2749 _MM_FROUND_CUR_DIRECTION),
2750 (__v8df) _mm512_setzero_pd());
Adam Nemet2278fcb2014-08-14 17:17:57 +00002751}
2752
Michael Kupersteine45af542015-06-30 13:36:19 +00002753static __inline__ __m512d __DEFAULT_FN_ATTRS
Adam Nemet2278fcb2014-08-14 17:17:57 +00002754_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2755{
Gabor Buella70d8d512018-05-30 15:27:49 +00002756 return (__m512d) __builtin_ia32_vfmaddpd512 (-(__v8df) __A,
2757 (__v8df) __B,
2758 (__v8df) __C,
2759 _MM_FROUND_CUR_DIRECTION);
Adam Nemet2278fcb2014-08-14 17:17:57 +00002760}
2761
Michael Kupersteine45af542015-06-30 13:36:19 +00002762static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002763_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2764{
Gabor Buella70d8d512018-05-30 15:27:49 +00002765 return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
2766 __builtin_ia32_vfmaddpd512 (-(__v8df) __A,
2767 (__v8df) __B,
2768 (__v8df) __C,
2769 _MM_FROUND_CUR_DIRECTION),
2770 (__v8df) __C);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002771}
2772
Michael Kupersteine45af542015-06-30 13:36:19 +00002773static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002774_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2775{
Gabor Buella70d8d512018-05-30 15:27:49 +00002776 return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
2777 __builtin_ia32_vfmaddpd512 (-(__v8df) __A,
2778 (__v8df) __B,
2779 (__v8df) __C,
2780 _MM_FROUND_CUR_DIRECTION),
2781 (__v8df) _mm512_setzero_pd());
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002782}
2783
Michael Kupersteine45af542015-06-30 13:36:19 +00002784static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002785_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2786{
Gabor Buella70d8d512018-05-30 15:27:49 +00002787 return (__m512d) __builtin_ia32_vfmaddpd512 (-(__v8df) __A,
2788 (__v8df) __B,
2789 -(__v8df) __C,
2790 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002791}
2792
Michael Kupersteine45af542015-06-30 13:36:19 +00002793static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002794_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2795{
Gabor Buella70d8d512018-05-30 15:27:49 +00002796 return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
2797 __builtin_ia32_vfmaddpd512 (-(__v8df) __A,
2798 (__v8df) __B,
2799 -(__v8df) __C,
2800 _MM_FROUND_CUR_DIRECTION),
2801 (__v8df) _mm512_setzero_pd());
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002802}
2803
Craig Topperc6338672018-05-31 00:51:20 +00002804#define _mm512_fmadd_round_ps(A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00002805 (__m512)__builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
2806 (__v16sf)(__m512)(B), \
Craig Topperc6338672018-05-31 00:51:20 +00002807 (__v16sf)(__m512)(C), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002808
2809
Craig Topperc6338672018-05-31 00:51:20 +00002810#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00002811 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2812 __builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
2813 (__v16sf)(__m512)(B), \
2814 (__v16sf)(__m512)(C), \
2815 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00002816 (__v16sf)(__m512)(A))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002817
2818
Craig Topperc6338672018-05-31 00:51:20 +00002819#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00002820 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2821 __builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
2822 (__v16sf)(__m512)(B), \
2823 (__v16sf)(__m512)(C), \
2824 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00002825 (__v16sf)(__m512)(C))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002826
2827
Craig Topperc6338672018-05-31 00:51:20 +00002828#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00002829 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2830 __builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
2831 (__v16sf)(__m512)(B), \
2832 (__v16sf)(__m512)(C), \
2833 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00002834 (__v16sf)_mm512_setzero_ps())
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002835
2836
Craig Topperc6338672018-05-31 00:51:20 +00002837#define _mm512_fmsub_round_ps(A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00002838 (__m512)__builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
2839 (__v16sf)(__m512)(B), \
2840 -(__v16sf)(__m512)(C), \
Craig Topperc6338672018-05-31 00:51:20 +00002841 (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002842
2843
Craig Topperc6338672018-05-31 00:51:20 +00002844#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00002845 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2846 __builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
2847 (__v16sf)(__m512)(B), \
2848 -(__v16sf)(__m512)(C), \
2849 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00002850 (__v16sf)(__m512)(A))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002851
2852
Craig Topperc6338672018-05-31 00:51:20 +00002853#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00002854 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2855 __builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
2856 (__v16sf)(__m512)(B), \
2857 -(__v16sf)(__m512)(C), \
2858 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00002859 (__v16sf)_mm512_setzero_ps())
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002860
2861
Craig Topperc6338672018-05-31 00:51:20 +00002862#define _mm512_fnmadd_round_ps(A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00002863 (__m512)__builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \
2864 (__v16sf)(__m512)(B), \
Craig Topperc6338672018-05-31 00:51:20 +00002865 (__v16sf)(__m512)(C), (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002866
2867
Craig Topperc6338672018-05-31 00:51:20 +00002868#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00002869 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2870 __builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \
2871 (__v16sf)(__m512)(B), \
2872 (__v16sf)(__m512)(C), \
2873 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00002874 (__v16sf)(__m512)(C))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002875
2876
Craig Topperc6338672018-05-31 00:51:20 +00002877#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00002878 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2879 __builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \
2880 (__v16sf)(__m512)(B), \
2881 (__v16sf)(__m512)(C), \
2882 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00002883 (__v16sf)_mm512_setzero_ps())
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002884
2885
Craig Topperc6338672018-05-31 00:51:20 +00002886#define _mm512_fnmsub_round_ps(A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00002887 (__m512)__builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \
2888 (__v16sf)(__m512)(B), \
2889 -(__v16sf)(__m512)(C), \
Craig Topperc6338672018-05-31 00:51:20 +00002890 (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002891
2892
Craig Topperc6338672018-05-31 00:51:20 +00002893#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00002894 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2895 __builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \
2896 (__v16sf)(__m512)(B), \
2897 -(__v16sf)(__m512)(C), \
2898 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00002899 (__v16sf)_mm512_setzero_ps())
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002900
2901
Michael Kupersteine45af542015-06-30 13:36:19 +00002902static __inline__ __m512 __DEFAULT_FN_ATTRS
Adam Nemet2278fcb2014-08-14 17:17:57 +00002903_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2904{
Gabor Buella70d8d512018-05-30 15:27:49 +00002905 return (__m512) __builtin_ia32_vfmaddps512 ((__v16sf) __A,
2906 (__v16sf) __B,
2907 (__v16sf) __C,
2908 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002909}
2910
Michael Kupersteine45af542015-06-30 13:36:19 +00002911static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002912_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2913{
Gabor Buella70d8d512018-05-30 15:27:49 +00002914 return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
2915 __builtin_ia32_vfmaddps512 ((__v16sf) __A,
2916 (__v16sf) __B,
2917 (__v16sf) __C,
2918 _MM_FROUND_CUR_DIRECTION), \
2919 (__v16sf) __A);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002920}
2921
Michael Kupersteine45af542015-06-30 13:36:19 +00002922static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002923_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2924{
Gabor Buella70d8d512018-05-30 15:27:49 +00002925 return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
2926 __builtin_ia32_vfmaddps512 ((__v16sf) __A,
2927 (__v16sf) __B,
2928 (__v16sf) __C,
2929 _MM_FROUND_CUR_DIRECTION), \
2930 (__v16sf) __C);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002931}
2932
Michael Kupersteine45af542015-06-30 13:36:19 +00002933static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002934_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2935{
Gabor Buella70d8d512018-05-30 15:27:49 +00002936 return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
2937 __builtin_ia32_vfmaddps512 ((__v16sf) __A,
2938 (__v16sf) __B,
2939 (__v16sf) __C,
2940 _MM_FROUND_CUR_DIRECTION),
2941 (__v16sf) _mm512_setzero_ps());
Adam Nemet2278fcb2014-08-14 17:17:57 +00002942}
2943
Michael Kupersteine45af542015-06-30 13:36:19 +00002944static __inline__ __m512 __DEFAULT_FN_ATTRS
Adam Nemet2278fcb2014-08-14 17:17:57 +00002945_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2946{
Gabor Buella70d8d512018-05-30 15:27:49 +00002947 return (__m512) __builtin_ia32_vfmaddps512 ((__v16sf) __A,
2948 (__v16sf) __B,
2949 -(__v16sf) __C,
2950 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002951}
2952
Michael Kupersteine45af542015-06-30 13:36:19 +00002953static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002954_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2955{
Gabor Buella70d8d512018-05-30 15:27:49 +00002956 return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
2957 __builtin_ia32_vfmaddps512 ((__v16sf) __A,
2958 (__v16sf) __B,
2959 -(__v16sf) __C,
2960 _MM_FROUND_CUR_DIRECTION),
2961 (__v16sf) __A);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002962}
2963
Michael Kupersteine45af542015-06-30 13:36:19 +00002964static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002965_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2966{
Gabor Buella70d8d512018-05-30 15:27:49 +00002967 return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
2968 __builtin_ia32_vfmaddps512 ((__v16sf) __A,
2969 (__v16sf) __B,
2970 -(__v16sf) __C,
2971 _MM_FROUND_CUR_DIRECTION),
2972 (__v16sf) _mm512_setzero_ps());
Adam Nemet2278fcb2014-08-14 17:17:57 +00002973}
2974
Michael Kupersteine45af542015-06-30 13:36:19 +00002975static __inline__ __m512 __DEFAULT_FN_ATTRS
Adam Nemet2278fcb2014-08-14 17:17:57 +00002976_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2977{
Gabor Buella70d8d512018-05-30 15:27:49 +00002978 return (__m512) __builtin_ia32_vfmaddps512 (-(__v16sf) __A,
2979 (__v16sf) __B,
2980 (__v16sf) __C,
2981 _MM_FROUND_CUR_DIRECTION);
Adam Nemet2278fcb2014-08-14 17:17:57 +00002982}
2983
Michael Kupersteine45af542015-06-30 13:36:19 +00002984static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002985_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2986{
Gabor Buella70d8d512018-05-30 15:27:49 +00002987 return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
2988 __builtin_ia32_vfmaddps512 (-(__v16sf) __A,
2989 (__v16sf) __B,
2990 (__v16sf) __C,
2991 _MM_FROUND_CUR_DIRECTION),
2992 (__v16sf) __C);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002993}
2994
Michael Kupersteine45af542015-06-30 13:36:19 +00002995static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00002996_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2997{
Gabor Buella70d8d512018-05-30 15:27:49 +00002998 return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
2999 __builtin_ia32_vfmaddps512 (-(__v16sf) __A,
3000 (__v16sf) __B,
3001 (__v16sf) __C,
3002 _MM_FROUND_CUR_DIRECTION),
3003 (__v16sf) _mm512_setzero_ps());
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003004}
3005
Michael Kupersteine45af542015-06-30 13:36:19 +00003006static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003007_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
3008{
Gabor Buella70d8d512018-05-30 15:27:49 +00003009 return (__m512) __builtin_ia32_vfmaddps512 (-(__v16sf) __A,
3010 (__v16sf) __B,
3011 -(__v16sf) __C,
3012 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003013}
3014
Michael Kupersteine45af542015-06-30 13:36:19 +00003015static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003016_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3017{
Gabor Buella70d8d512018-05-30 15:27:49 +00003018 return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
3019 __builtin_ia32_vfmaddps512 (-(__v16sf) __A,
3020 (__v16sf) __B,
3021 -(__v16sf) __C,
3022 _MM_FROUND_CUR_DIRECTION),
3023 (__v16sf) _mm512_setzero_ps());
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003024}
3025
Craig Topperc6338672018-05-31 00:51:20 +00003026#define _mm512_fmaddsub_round_pd(A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00003027 (__m512d)__builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
3028 (__v8df)(__m512d)(B), \
3029 (__v8df)(__m512d)(C), \
Craig Topperc6338672018-05-31 00:51:20 +00003030 (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003031
3032
Craig Topperc6338672018-05-31 00:51:20 +00003033#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00003034 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
3035 __builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
3036 (__v8df)(__m512d)(B), \
3037 (__v8df)(__m512d)(C), \
3038 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00003039 (__v8df)(__m512d)(A))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003040
3041
Craig Topperc6338672018-05-31 00:51:20 +00003042#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00003043 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
3044 __builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
3045 (__v8df)(__m512d)(B), \
3046 (__v8df)(__m512d)(C), \
3047 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00003048 (__v8df)(__m512d)(C))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003049
3050
Craig Topperc6338672018-05-31 00:51:20 +00003051#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00003052 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
3053 __builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
3054 (__v8df)(__m512d)(B), \
3055 (__v8df)(__m512d)(C), \
3056 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00003057 (__v8df)_mm512_setzero_pd())
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003058
3059
Craig Topperc6338672018-05-31 00:51:20 +00003060#define _mm512_fmsubadd_round_pd(A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00003061 (__m512d)__builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
3062 (__v8df)(__m512d)(B), \
3063 -(__v8df)(__m512d)(C), \
Craig Topperc6338672018-05-31 00:51:20 +00003064 (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003065
3066
Craig Topperc6338672018-05-31 00:51:20 +00003067#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00003068 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
3069 __builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
3070 (__v8df)(__m512d)(B), \
3071 -(__v8df)(__m512d)(C), \
3072 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00003073 (__v8df)(__m512d)(A))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003074
3075
Craig Topperc6338672018-05-31 00:51:20 +00003076#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00003077 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
3078 __builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
3079 (__v8df)(__m512d)(B), \
3080 -(__v8df)(__m512d)(C), \
3081 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00003082 (__v8df)_mm512_setzero_pd())
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003083
3084
Michael Kupersteine45af542015-06-30 13:36:19 +00003085static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003086_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
3087{
Gabor Buella70d8d512018-05-30 15:27:49 +00003088 return (__m512d) __builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
3089 (__v8df) __B,
3090 (__v8df) __C,
3091 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003092}
3093
Michael Kupersteine45af542015-06-30 13:36:19 +00003094static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003095_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3096{
Gabor Buella70d8d512018-05-30 15:27:49 +00003097 return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
3098 __builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
3099 (__v8df) __B,
3100 (__v8df) __C,
3101 _MM_FROUND_CUR_DIRECTION),
3102 (__v8df) __A);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003103}
3104
Michael Kupersteine45af542015-06-30 13:36:19 +00003105static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003106_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3107{
Gabor Buella70d8d512018-05-30 15:27:49 +00003108 return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
3109 __builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
3110 (__v8df) __B,
3111 (__v8df) __C,
3112 _MM_FROUND_CUR_DIRECTION),
3113 (__v8df) __C);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003114}
3115
Michael Kupersteine45af542015-06-30 13:36:19 +00003116static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003117_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
3118{
Gabor Buella70d8d512018-05-30 15:27:49 +00003119 return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
3120 __builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
3121 (__v8df) __B,
3122 (__v8df) __C,
3123 _MM_FROUND_CUR_DIRECTION),
3124 (__v8df) _mm512_setzero_pd());
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003125}
3126
Michael Kupersteine45af542015-06-30 13:36:19 +00003127static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003128_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
3129{
Gabor Buella70d8d512018-05-30 15:27:49 +00003130 return (__m512d) __builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
3131 (__v8df) __B,
3132 -(__v8df) __C,
3133 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003134}
3135
Michael Kupersteine45af542015-06-30 13:36:19 +00003136static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003137_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3138{
Gabor Buella70d8d512018-05-30 15:27:49 +00003139 return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
3140 __builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
3141 (__v8df) __B,
3142 -(__v8df) __C,
3143 _MM_FROUND_CUR_DIRECTION),
3144 (__v8df) __A);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003145}
3146
Michael Kupersteine45af542015-06-30 13:36:19 +00003147static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003148_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
3149{
Gabor Buella70d8d512018-05-30 15:27:49 +00003150 return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
3151 __builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
3152 (__v8df) __B,
3153 -(__v8df) __C,
3154 _MM_FROUND_CUR_DIRECTION),
3155 (__v8df) _mm512_setzero_pd());
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003156}
3157
Craig Topperc6338672018-05-31 00:51:20 +00003158#define _mm512_fmaddsub_round_ps(A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00003159 (__m512)__builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
3160 (__v16sf)(__m512)(B), \
3161 (__v16sf)(__m512)(C), \
Craig Topperc6338672018-05-31 00:51:20 +00003162 (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003163
3164
Craig Topperc6338672018-05-31 00:51:20 +00003165#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00003166 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
3167 __builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
3168 (__v16sf)(__m512)(B), \
3169 (__v16sf)(__m512)(C), \
3170 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00003171 (__v16sf)(__m512)(A))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003172
3173
Craig Topperc6338672018-05-31 00:51:20 +00003174#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00003175 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
3176 __builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
3177 (__v16sf)(__m512)(B), \
3178 (__v16sf)(__m512)(C), \
3179 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00003180 (__v16sf)(__m512)(C))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003181
3182
Craig Topperc6338672018-05-31 00:51:20 +00003183#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00003184 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
3185 __builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
3186 (__v16sf)(__m512)(B), \
3187 (__v16sf)(__m512)(C), \
3188 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00003189 (__v16sf)_mm512_setzero_ps())
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003190
3191
Craig Topperc6338672018-05-31 00:51:20 +00003192#define _mm512_fmsubadd_round_ps(A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00003193 (__m512)__builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
3194 (__v16sf)(__m512)(B), \
3195 -(__v16sf)(__m512)(C), \
Craig Topperc6338672018-05-31 00:51:20 +00003196 (int)(R))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003197
3198
Craig Topperc6338672018-05-31 00:51:20 +00003199#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00003200 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
3201 __builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
3202 (__v16sf)(__m512)(B), \
3203 -(__v16sf)(__m512)(C), \
3204 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00003205 (__v16sf)(__m512)(A))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003206
3207
Craig Topperc6338672018-05-31 00:51:20 +00003208#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00003209 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
3210 __builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
3211 (__v16sf)(__m512)(B), \
3212 -(__v16sf)(__m512)(C), \
3213 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00003214 (__v16sf)_mm512_setzero_ps())
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003215
3216
Michael Kupersteine45af542015-06-30 13:36:19 +00003217static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003218_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
3219{
Gabor Buella70d8d512018-05-30 15:27:49 +00003220 return (__m512) __builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
3221 (__v16sf) __B,
3222 (__v16sf) __C,
3223 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003224}
3225
Michael Kupersteine45af542015-06-30 13:36:19 +00003226static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003227_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3228{
Gabor Buella70d8d512018-05-30 15:27:49 +00003229 return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
3230 __builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
3231 (__v16sf) __B,
3232 (__v16sf) __C,
3233 _MM_FROUND_CUR_DIRECTION), \
3234 (__v16sf) __A);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003235}
3236
Michael Kupersteine45af542015-06-30 13:36:19 +00003237static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003238_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3239{
Gabor Buella70d8d512018-05-30 15:27:49 +00003240 return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
3241 __builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
3242 (__v16sf) __B,
3243 (__v16sf) __C,
3244 _MM_FROUND_CUR_DIRECTION), \
3245 (__v16sf) __C);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003246}
3247
Michael Kupersteine45af542015-06-30 13:36:19 +00003248static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003249_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3250{
Gabor Buella70d8d512018-05-30 15:27:49 +00003251 return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
3252 __builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
3253 (__v16sf) __B,
3254 (__v16sf) __C,
3255 _MM_FROUND_CUR_DIRECTION),
3256 (__v16sf) _mm512_setzero_ps());
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003257}
3258
Michael Kupersteine45af542015-06-30 13:36:19 +00003259static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003260_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
3261{
Gabor Buella70d8d512018-05-30 15:27:49 +00003262 return (__m512) __builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
3263 (__v16sf) __B,
3264 -(__v16sf) __C,
3265 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003266}
3267
Michael Kupersteine45af542015-06-30 13:36:19 +00003268static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003269_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3270{
Gabor Buella70d8d512018-05-30 15:27:49 +00003271 return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
3272 __builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
3273 (__v16sf) __B,
3274 -(__v16sf) __C,
3275 _MM_FROUND_CUR_DIRECTION),
3276 (__v16sf) __A);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003277}
3278
Michael Kupersteine45af542015-06-30 13:36:19 +00003279static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003280_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3281{
Gabor Buella70d8d512018-05-30 15:27:49 +00003282 return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
3283 __builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
3284 (__v16sf) __B,
3285 -(__v16sf) __C,
3286 _MM_FROUND_CUR_DIRECTION),
3287 (__v16sf) _mm512_setzero_ps());
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003288}
3289
Craig Topperc6338672018-05-31 00:51:20 +00003290#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00003291 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
3292 (__m512d)__builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
3293 (__v8df)(__m512d)(B), \
3294 -(__v8df)(__m512d)(C), \
3295 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00003296 (__v8df)(__m512d)(C))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003297
3298
Michael Kupersteine45af542015-06-30 13:36:19 +00003299static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003300_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3301{
Gabor Buella70d8d512018-05-30 15:27:49 +00003302 return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
3303 (__m512d)__builtin_ia32_vfmaddpd512 ((__v8df) __A,
3304 (__v8df) __B,
3305 -(__v8df) __C,
3306 _MM_FROUND_CUR_DIRECTION),
3307 (__v8df) __C);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003308}
3309
Craig Topperc6338672018-05-31 00:51:20 +00003310#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00003311 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
3312 (__m512)__builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
3313 (__v16sf)(__m512)(B), \
3314 -(__v16sf)(__m512)(C), \
3315 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00003316 (__v16sf)(__m512)(C))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003317
Michael Kupersteine45af542015-06-30 13:36:19 +00003318static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003319_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3320{
Gabor Buella70d8d512018-05-30 15:27:49 +00003321 return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
3322 (__m512)__builtin_ia32_vfmaddps512 ((__v16sf) __A,
3323 (__v16sf) __B,
3324 -(__v16sf) __C,
3325 _MM_FROUND_CUR_DIRECTION),
3326 (__v16sf) __C);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003327}
3328
Craig Topperc6338672018-05-31 00:51:20 +00003329#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00003330 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
3331 (__m512d)__builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
3332 (__v8df)(__m512d)(B), \
3333 -(__v8df)(__m512d)(C), \
3334 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00003335 (__v8df)(__m512d)(C))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003336
3337
Michael Kupersteine45af542015-06-30 13:36:19 +00003338static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003339_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3340{
Gabor Buella70d8d512018-05-30 15:27:49 +00003341 return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
3342 (__m512d)__builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
3343 (__v8df) __B,
3344 -(__v8df) __C,
3345 _MM_FROUND_CUR_DIRECTION),
3346 (__v8df) __C);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003347}
3348
Craig Topperc6338672018-05-31 00:51:20 +00003349#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00003350 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
3351 (__m512)__builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
3352 (__v16sf)(__m512)(B), \
3353 -(__v16sf)(__m512)(C), \
3354 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00003355 (__v16sf)(__m512)(C))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003356
3357
Michael Kupersteine45af542015-06-30 13:36:19 +00003358static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003359_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3360{
Gabor Buella70d8d512018-05-30 15:27:49 +00003361 return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
3362 (__m512)__builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
3363 (__v16sf) __B,
3364 -(__v16sf) __C,
3365 _MM_FROUND_CUR_DIRECTION),
3366 (__v16sf) __C);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003367}
3368
Craig Topperc6338672018-05-31 00:51:20 +00003369#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00003370 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
3371 __builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \
3372 (__v8df)(__m512d)(B), \
3373 (__v8df)(__m512d)(C), \
3374 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00003375 (__v8df)(__m512d)(A))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003376
3377
Michael Kupersteine45af542015-06-30 13:36:19 +00003378static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003379_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3380{
Gabor Buella70d8d512018-05-30 15:27:49 +00003381 return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
3382 __builtin_ia32_vfmaddpd512 (-(__v8df) __A,
3383 (__v8df) __B,
3384 (__v8df) __C,
3385 _MM_FROUND_CUR_DIRECTION),
3386 (__v8df) __A);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003387}
3388
Craig Topperc6338672018-05-31 00:51:20 +00003389#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00003390 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
3391 __builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \
3392 (__v16sf)(__m512)(B), \
3393 (__v16sf)(__m512)(C), \
3394 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00003395 (__v16sf)(__m512)(A))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003396
3397
Michael Kupersteine45af542015-06-30 13:36:19 +00003398static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003399_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3400{
Gabor Buella70d8d512018-05-30 15:27:49 +00003401 return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
3402 __builtin_ia32_vfmaddps512 (-(__v16sf) __A,
3403 (__v16sf) __B,
3404 (__v16sf) __C,
3405 _MM_FROUND_CUR_DIRECTION),
3406 (__v16sf) __A);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003407}
3408
Craig Topperc6338672018-05-31 00:51:20 +00003409#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00003410 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
3411 (__m512d)__builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
3412 -(__v8df)(__m512d)(B), \
3413 -(__v8df)(__m512d)(C), \
3414 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00003415 (__v8df)(__m512d)(A))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003416
3417
Craig Topperc6338672018-05-31 00:51:20 +00003418#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00003419 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
3420 (__m512d)__builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
3421 -(__v8df)(__m512d)(B), \
3422 -(__v8df)(__m512d)(C), \
3423 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00003424 (__v8df)(__m512d)(C))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003425
3426
Michael Kupersteine45af542015-06-30 13:36:19 +00003427static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003428_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3429{
Gabor Buella70d8d512018-05-30 15:27:49 +00003430 return (__m512d) __builtin_ia32_selectpd_512((__mmask16) __U,
3431 (__m512d) __builtin_ia32_vfmaddpd512 ((__v8df) __A,
3432 -(__v8df) __B,
3433 -(__v8df) __C,
3434 _MM_FROUND_CUR_DIRECTION),
3435 (__v8df) __A);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003436}
3437
Michael Kupersteine45af542015-06-30 13:36:19 +00003438static __inline__ __m512d __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003439_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3440{
Gabor Buella70d8d512018-05-30 15:27:49 +00003441 return (__m512d) __builtin_ia32_selectpd_512((__mmask16) __U,
3442 (__m512d) __builtin_ia32_vfmaddpd512 ((__v8df) __A,
3443 -(__v8df) __B,
3444 -(__v8df) __C,
3445 _MM_FROUND_CUR_DIRECTION),
3446 (__v8df) __C);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003447}
3448
Craig Topperc6338672018-05-31 00:51:20 +00003449#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00003450 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
3451 (__m512)__builtin_ia32_vfmaddps512 ((__v16sf)(__m512)(A), \
3452 -(__v16sf)(__m512)(B), \
3453 -(__v16sf)(__m512)(C), \
3454 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00003455 (__v16sf)(__m512)(A))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003456
3457
Craig Topperc6338672018-05-31 00:51:20 +00003458#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00003459 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
3460 (__m512)__builtin_ia32_vfmaddps512 ((__v16sf)(__m512)(A), \
3461 -(__v16sf)(__m512)(B), \
3462 -(__v16sf)(__m512)(C), \
3463 (int)(R)), \
Craig Topperc6338672018-05-31 00:51:20 +00003464 (__v16sf)(__m512)(C))
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003465
3466
Michael Kupersteine45af542015-06-30 13:36:19 +00003467static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003468_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3469{
Gabor Buella70d8d512018-05-30 15:27:49 +00003470 return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
3471 (__m512) __builtin_ia32_vfmaddps512 ((__v16sf) __A,
3472 -(__v16sf) __B,
3473 -(__v16sf) __C,
3474 _MM_FROUND_CUR_DIRECTION),
3475 (__v16sf) __A);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003476}
3477
Michael Kupersteine45af542015-06-30 13:36:19 +00003478static __inline__ __m512 __DEFAULT_FN_ATTRS
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003479_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3480{
Gabor Buella70d8d512018-05-30 15:27:49 +00003481 return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
3482 (__m512) __builtin_ia32_vfmaddps512 ((__v16sf) __A,
3483 -(__v16sf) __B,
3484 -(__v16sf) __C,
3485 _MM_FROUND_CUR_DIRECTION),
3486 (__v16sf) __C);
Elena Demikhovskyc563c2c2015-06-29 09:20:57 +00003487}
3488
3489
3490
Adam Nemet0d5bb552014-07-28 17:14:40 +00003491/* Vector permutations */
3492
Michael Kupersteine45af542015-06-30 13:36:19 +00003493static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00003494_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
3495{
Craig Topper68a272d2018-05-29 03:26:38 +00003496 return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
3497 (__v16si) __B);
Adam Nemet0d5bb552014-07-28 17:14:40 +00003498}
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00003499
3500static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper68a272d2018-05-29 03:26:38 +00003501_mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I,
3502 __m512i __B)
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00003503{
Craig Topper68a272d2018-05-29 03:26:38 +00003504 return (__m512i)__builtin_ia32_selectd_512(__U,
3505 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3506 (__v16si)__A);
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00003507}
3508
3509static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper68a272d2018-05-29 03:26:38 +00003510_mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U,
3511 __m512i __B)
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00003512{
Craig Topper68a272d2018-05-29 03:26:38 +00003513 return (__m512i)__builtin_ia32_selectd_512(__U,
3514 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3515 (__v16si)__I);
3516}
3517
3518static __inline__ __m512i __DEFAULT_FN_ATTRS
3519_mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I,
3520 __m512i __B)
3521{
3522 return (__m512i)__builtin_ia32_selectd_512(__U,
3523 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3524 (__v16si)_mm512_setzero_si512());
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00003525}
3526
Michael Kupersteine45af542015-06-30 13:36:19 +00003527static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00003528_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
3529{
Craig Topper68a272d2018-05-29 03:26:38 +00003530 return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
3531 (__v8di) __B);
Adam Nemet0d5bb552014-07-28 17:14:40 +00003532}
3533
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00003534static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper68a272d2018-05-29 03:26:38 +00003535_mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I,
3536 __m512i __B)
3537{
3538 return (__m512i)__builtin_ia32_selectq_512(__U,
3539 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3540 (__v8di)__A);
3541}
3542
3543static __inline__ __m512i __DEFAULT_FN_ATTRS
3544_mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U,
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00003545 __m512i __B)
Adam Nemet0d5bb552014-07-28 17:14:40 +00003546{
Craig Topper68a272d2018-05-29 03:26:38 +00003547 return (__m512i)__builtin_ia32_selectq_512(__U,
3548 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3549 (__v8di)__I);
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00003550}
3551
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00003552static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper68a272d2018-05-29 03:26:38 +00003553_mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
3554 __m512i __B)
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00003555{
Craig Topper68a272d2018-05-29 03:26:38 +00003556 return (__m512i)__builtin_ia32_selectq_512(__U,
3557 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3558 (__v8di)_mm512_setzero_si512());
Adam Nemet0d5bb552014-07-28 17:14:40 +00003559}
3560
Craig Topperc6338672018-05-31 00:51:20 +00003561#define _mm512_alignr_epi64(A, B, I) \
Craig Topper6aefe002016-11-23 01:47:12 +00003562 (__m512i)__builtin_shufflevector((__v8di)(__m512i)(B), \
3563 (__v8di)(__m512i)(A), \
3564 ((int)(I) & 0x7) + 0, \
3565 ((int)(I) & 0x7) + 1, \
3566 ((int)(I) & 0x7) + 2, \
3567 ((int)(I) & 0x7) + 3, \
3568 ((int)(I) & 0x7) + 4, \
3569 ((int)(I) & 0x7) + 5, \
3570 ((int)(I) & 0x7) + 6, \
Craig Topperc6338672018-05-31 00:51:20 +00003571 ((int)(I) & 0x7) + 7)
Adam Nemet5bf7baa2014-08-05 17:28:23 +00003572
Craig Topperc6338672018-05-31 00:51:20 +00003573#define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
Craig Topper6aefe002016-11-23 01:47:12 +00003574 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3575 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00003576 (__v8di)(__m512i)(W))
Michael Zuckerman533e0652016-04-28 12:47:30 +00003577
Craig Topperc6338672018-05-31 00:51:20 +00003578#define _mm512_maskz_alignr_epi64(U, A, B, imm) \
Craig Topper6aefe002016-11-23 01:47:12 +00003579 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3580 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00003581 (__v8di)_mm512_setzero_si512())
Michael Zuckerman533e0652016-04-28 12:47:30 +00003582
Craig Topperc6338672018-05-31 00:51:20 +00003583#define _mm512_alignr_epi32(A, B, I) \
Craig Topper6aefe002016-11-23 01:47:12 +00003584 (__m512i)__builtin_shufflevector((__v16si)(__m512i)(B), \
3585 (__v16si)(__m512i)(A), \
3586 ((int)(I) & 0xf) + 0, \
3587 ((int)(I) & 0xf) + 1, \
3588 ((int)(I) & 0xf) + 2, \
3589 ((int)(I) & 0xf) + 3, \
3590 ((int)(I) & 0xf) + 4, \
3591 ((int)(I) & 0xf) + 5, \
3592 ((int)(I) & 0xf) + 6, \
3593 ((int)(I) & 0xf) + 7, \
3594 ((int)(I) & 0xf) + 8, \
3595 ((int)(I) & 0xf) + 9, \
3596 ((int)(I) & 0xf) + 10, \
3597 ((int)(I) & 0xf) + 11, \
3598 ((int)(I) & 0xf) + 12, \
3599 ((int)(I) & 0xf) + 13, \
3600 ((int)(I) & 0xf) + 14, \
Craig Topperc6338672018-05-31 00:51:20 +00003601 ((int)(I) & 0xf) + 15)
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00003602
Craig Topperc6338672018-05-31 00:51:20 +00003603#define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
Craig Topper6aefe002016-11-23 01:47:12 +00003604 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3605 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00003606 (__v16si)(__m512i)(W))
Adam Nemet5bf7baa2014-08-05 17:28:23 +00003607
Craig Topperc6338672018-05-31 00:51:20 +00003608#define _mm512_maskz_alignr_epi32(U, A, B, imm) \
Craig Topper6aefe002016-11-23 01:47:12 +00003609 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3610 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00003611 (__v16si)_mm512_setzero_si512())
Adam Nemetf893ede2015-01-19 20:12:05 +00003612/* Vector Extract */
3613
Craig Topperc6338672018-05-31 00:51:20 +00003614#define _mm512_extractf64x4_pd(A, I) \
Craig Topper93ffabd2016-10-31 04:30:56 +00003615 (__m256d)__builtin_shufflevector((__v8df)(__m512d)(A), \
3616 (__v8df)_mm512_undefined_pd(), \
3617 ((I) & 1) ? 4 : 0, \
3618 ((I) & 1) ? 5 : 1, \
3619 ((I) & 1) ? 6 : 2, \
Craig Topperc6338672018-05-31 00:51:20 +00003620 ((I) & 1) ? 7 : 3)
Adam Nemetf893ede2015-01-19 20:12:05 +00003621
Craig Topperc6338672018-05-31 00:51:20 +00003622#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
Craig Topper93ffabd2016-10-31 04:30:56 +00003623 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
3624 (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
Craig Toppercbf39292018-05-31 01:24:40 +00003625 (__v4df)(__m256d)(W))
Michael Zuckerman2564d2f2016-05-10 10:14:50 +00003626
Craig Topperc6338672018-05-31 00:51:20 +00003627#define _mm512_maskz_extractf64x4_pd(U, A, imm) \
Craig Topper93ffabd2016-10-31 04:30:56 +00003628 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
3629 (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00003630 (__v4df)_mm256_setzero_pd())
Michael Zuckerman2564d2f2016-05-10 10:14:50 +00003631
Craig Topperc6338672018-05-31 00:51:20 +00003632#define _mm512_extractf32x4_ps(A, I) \
Craig Topper93ffabd2016-10-31 04:30:56 +00003633 (__m128)__builtin_shufflevector((__v16sf)(__m512)(A), \
3634 (__v16sf)_mm512_undefined_ps(), \
3635 0 + ((I) & 0x3) * 4, \
3636 1 + ((I) & 0x3) * 4, \
3637 2 + ((I) & 0x3) * 4, \
Craig Topperc6338672018-05-31 00:51:20 +00003638 3 + ((I) & 0x3) * 4)
Adam Nemetf893ede2015-01-19 20:12:05 +00003639
Craig Topperc6338672018-05-31 00:51:20 +00003640#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
Craig Topper93ffabd2016-10-31 04:30:56 +00003641 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
3642 (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
Craig Toppercbf39292018-05-31 01:24:40 +00003643 (__v4sf)(__m128)(W))
Michael Zuckerman2564d2f2016-05-10 10:14:50 +00003644
Craig Topperc6338672018-05-31 00:51:20 +00003645#define _mm512_maskz_extractf32x4_ps(U, A, imm) \
Craig Topper93ffabd2016-10-31 04:30:56 +00003646 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
3647 (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00003648 (__v4sf)_mm_setzero_ps())
Craig Topper93ffabd2016-10-31 04:30:56 +00003649
Adam Nemet0d5bb552014-07-28 17:14:40 +00003650/* Vector Blend */
3651
Michael Kupersteine45af542015-06-30 13:36:19 +00003652static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00003653_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
3654{
Igor Bregeraadb8762016-06-08 13:59:20 +00003655 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
Adam Nemet0d5bb552014-07-28 17:14:40 +00003656 (__v8df) __W,
Igor Bregeraadb8762016-06-08 13:59:20 +00003657 (__v8df) __A);
Adam Nemet0d5bb552014-07-28 17:14:40 +00003658}
3659
Michael Kupersteine45af542015-06-30 13:36:19 +00003660static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00003661_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
3662{
Igor Bregeraadb8762016-06-08 13:59:20 +00003663 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
Adam Nemet0d5bb552014-07-28 17:14:40 +00003664 (__v16sf) __W,
Igor Bregeraadb8762016-06-08 13:59:20 +00003665 (__v16sf) __A);
Adam Nemet0d5bb552014-07-28 17:14:40 +00003666}
3667
Michael Kupersteine45af542015-06-30 13:36:19 +00003668static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00003669_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
3670{
Igor Bregeraadb8762016-06-08 13:59:20 +00003671 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
Adam Nemet0d5bb552014-07-28 17:14:40 +00003672 (__v8di) __W,
Igor Bregeraadb8762016-06-08 13:59:20 +00003673 (__v8di) __A);
Adam Nemet0d5bb552014-07-28 17:14:40 +00003674}
3675
Michael Kupersteine45af542015-06-30 13:36:19 +00003676static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00003677_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
3678{
Igor Bregeraadb8762016-06-08 13:59:20 +00003679 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
Adam Nemet0d5bb552014-07-28 17:14:40 +00003680 (__v16si) __W,
Igor Bregeraadb8762016-06-08 13:59:20 +00003681 (__v16si) __A);
Adam Nemet0d5bb552014-07-28 17:14:40 +00003682}
3683
3684/* Compare */
3685
Craig Topperc6338672018-05-31 00:51:20 +00003686#define _mm512_cmp_round_ps_mask(A, B, P, R) \
Craig Topper53565c62015-02-01 22:27:40 +00003687 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
Craig Topper8c18e112016-05-17 04:41:50 +00003688 (__v16sf)(__m512)(B), (int)(P), \
Craig Topperc6338672018-05-31 00:51:20 +00003689 (__mmask16)-1, (int)(R))
Adam Nemet0d5bb552014-07-28 17:14:40 +00003690
Craig Topperc6338672018-05-31 00:51:20 +00003691#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \
Craig Topper53565c62015-02-01 22:27:40 +00003692 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
Craig Topper8c18e112016-05-17 04:41:50 +00003693 (__v16sf)(__m512)(B), (int)(P), \
Craig Topperc6338672018-05-31 00:51:20 +00003694 (__mmask16)(U), (int)(R))
Craig Topper53565c62015-02-01 22:27:40 +00003695
3696#define _mm512_cmp_ps_mask(A, B, P) \
3697 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
Craig Topper53565c62015-02-01 22:27:40 +00003698#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3699 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3700
Ayman Musa2e250e82016-09-27 14:06:32 +00003701#define _mm512_cmpeq_ps_mask(A, B) \
3702 _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3703#define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3704 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3705
3706#define _mm512_cmplt_ps_mask(A, B) \
3707 _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3708#define _mm512_mask_cmplt_ps_mask(k, A, B) \
3709 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3710
3711#define _mm512_cmple_ps_mask(A, B) \
3712 _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3713#define _mm512_mask_cmple_ps_mask(k, A, B) \
3714 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3715
3716#define _mm512_cmpunord_ps_mask(A, B) \
3717 _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3718#define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3719 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3720
3721#define _mm512_cmpneq_ps_mask(A, B) \
3722 _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3723#define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3724 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3725
3726#define _mm512_cmpnlt_ps_mask(A, B) \
3727 _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3728#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3729 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3730
3731#define _mm512_cmpnle_ps_mask(A, B) \
3732 _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3733#define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3734 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3735
3736#define _mm512_cmpord_ps_mask(A, B) \
3737 _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3738#define _mm512_mask_cmpord_ps_mask(k, A, B) \
3739 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3740
Craig Topperc6338672018-05-31 00:51:20 +00003741#define _mm512_cmp_round_pd_mask(A, B, P, R) \
Craig Topper53565c62015-02-01 22:27:40 +00003742 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
Craig Topper8c18e112016-05-17 04:41:50 +00003743 (__v8df)(__m512d)(B), (int)(P), \
Craig Topperc6338672018-05-31 00:51:20 +00003744 (__mmask8)-1, (int)(R))
Craig Topper53565c62015-02-01 22:27:40 +00003745
Craig Topperc6338672018-05-31 00:51:20 +00003746#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \
Craig Topper53565c62015-02-01 22:27:40 +00003747 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
Craig Topper8c18e112016-05-17 04:41:50 +00003748 (__v8df)(__m512d)(B), (int)(P), \
Craig Topperc6338672018-05-31 00:51:20 +00003749 (__mmask8)(U), (int)(R))
Craig Topper53565c62015-02-01 22:27:40 +00003750
3751#define _mm512_cmp_pd_mask(A, B, P) \
3752 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
Craig Topper53565c62015-02-01 22:27:40 +00003753#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3754 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
Adam Nemet0d5bb552014-07-28 17:14:40 +00003755
Ayman Musa2e250e82016-09-27 14:06:32 +00003756#define _mm512_cmpeq_pd_mask(A, B) \
3757 _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3758#define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3759 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3760
3761#define _mm512_cmplt_pd_mask(A, B) \
3762 _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3763#define _mm512_mask_cmplt_pd_mask(k, A, B) \
3764 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3765
3766#define _mm512_cmple_pd_mask(A, B) \
3767 _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3768#define _mm512_mask_cmple_pd_mask(k, A, B) \
3769 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3770
3771#define _mm512_cmpunord_pd_mask(A, B) \
3772 _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3773#define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3774 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3775
3776#define _mm512_cmpneq_pd_mask(A, B) \
3777 _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3778#define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3779 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3780
3781#define _mm512_cmpnlt_pd_mask(A, B) \
3782 _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3783#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3784 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3785
3786#define _mm512_cmpnle_pd_mask(A, B) \
3787 _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3788#define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3789 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3790
3791#define _mm512_cmpord_pd_mask(A, B) \
3792 _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3793#define _mm512_mask_cmpord_pd_mask(k, A, B) \
3794 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3795
Adam Nemet0d5bb552014-07-28 17:14:40 +00003796/* Conversion */
3797
Craig Topperc6338672018-05-31 00:51:20 +00003798#define _mm512_cvtt_roundps_epu32(A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00003799 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3800 (__v16si)_mm512_undefined_epi32(), \
Craig Topperc6338672018-05-31 00:51:20 +00003801 (__mmask16)-1, (int)(R))
Michael Zuckerman6170c152016-06-01 14:41:41 +00003802
Craig Topperc6338672018-05-31 00:51:20 +00003803#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00003804 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3805 (__v16si)(__m512i)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00003806 (__mmask16)(U), (int)(R))
Michael Zuckerman6170c152016-06-01 14:41:41 +00003807
Craig Topperc6338672018-05-31 00:51:20 +00003808#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00003809 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3810 (__v16si)_mm512_setzero_si512(), \
Craig Topperc6338672018-05-31 00:51:20 +00003811 (__mmask16)(U), (int)(R))
Michael Zuckerman6170c152016-06-01 14:41:41 +00003812
3813
Michael Kupersteine45af542015-06-30 13:36:19 +00003814static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00003815_mm512_cvttps_epu32(__m512 __A)
3816{
3817 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3818 (__v16si)
3819 _mm512_setzero_si512 (),
3820 (__mmask16) -1,
3821 _MM_FROUND_CUR_DIRECTION);
3822}
3823
Michael Zuckermanf1544752016-05-09 10:32:51 +00003824static __inline__ __m512i __DEFAULT_FN_ATTRS
3825_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3826{
3827 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3828 (__v16si) __W,
3829 (__mmask16) __U,
3830 _MM_FROUND_CUR_DIRECTION);
3831}
3832
3833static __inline__ __m512i __DEFAULT_FN_ATTRS
3834_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
3835{
3836 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3837 (__v16si) _mm512_setzero_si512 (),
3838 (__mmask16) __U,
3839 _MM_FROUND_CUR_DIRECTION);
3840}
3841
Craig Topperc6338672018-05-31 00:51:20 +00003842#define _mm512_cvt_roundepi32_ps(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00003843 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
Craig Topper72c7d512015-02-01 07:35:35 +00003844 (__v16sf)_mm512_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00003845 (__mmask16)-1, (int)(R))
Adam Nemet0d5bb552014-07-28 17:14:40 +00003846
Craig Topperc6338672018-05-31 00:51:20 +00003847#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00003848 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3849 (__v16sf)(__m512)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00003850 (__mmask16)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00003851
Craig Topperc6338672018-05-31 00:51:20 +00003852#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00003853 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3854 (__v16sf)_mm512_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00003855 (__mmask16)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00003856
Craig Topperc6338672018-05-31 00:51:20 +00003857#define _mm512_cvt_roundepu32_ps(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00003858 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
Craig Topper72c7d512015-02-01 07:35:35 +00003859 (__v16sf)_mm512_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00003860 (__mmask16)-1, (int)(R))
Adam Nemet0d5bb552014-07-28 17:14:40 +00003861
Craig Topperc6338672018-05-31 00:51:20 +00003862#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00003863 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3864 (__v16sf)(__m512)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00003865 (__mmask16)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00003866
Craig Topperc6338672018-05-31 00:51:20 +00003867#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00003868 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3869 (__v16sf)_mm512_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00003870 (__mmask16)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00003871
Michael Zuckermanf1544752016-05-09 10:32:51 +00003872static __inline__ __m512 __DEFAULT_FN_ATTRS
3873_mm512_cvtepu32_ps (__m512i __A)
3874{
Craig Topper842171d2018-05-21 20:19:17 +00003875 return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
Michael Zuckermanf1544752016-05-09 10:32:51 +00003876}
3877
3878static __inline__ __m512 __DEFAULT_FN_ATTRS
3879_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3880{
Craig Topper842171d2018-05-21 20:19:17 +00003881 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3882 (__v16sf)_mm512_cvtepu32_ps(__A),
3883 (__v16sf)__W);
Michael Zuckermanf1544752016-05-09 10:32:51 +00003884}
3885
3886static __inline__ __m512 __DEFAULT_FN_ATTRS
3887_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
3888{
Craig Topper842171d2018-05-21 20:19:17 +00003889 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3890 (__v16sf)_mm512_cvtepu32_ps(__A),
3891 (__v16sf)_mm512_setzero_ps());
Michael Zuckermanf1544752016-05-09 10:32:51 +00003892}
3893
Michael Kupersteine45af542015-06-30 13:36:19 +00003894static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00003895_mm512_cvtepi32_pd(__m256i __A)
3896{
Simon Pilgrim698528d2016-11-16 09:27:40 +00003897 return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
Adam Nemet0d5bb552014-07-28 17:14:40 +00003898}
3899
Michael Zuckermanf1544752016-05-09 10:32:51 +00003900static __inline__ __m512d __DEFAULT_FN_ATTRS
3901_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3902{
Simon Pilgrim698528d2016-11-16 09:27:40 +00003903 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3904 (__v8df)_mm512_cvtepi32_pd(__A),
3905 (__v8df)__W);
Michael Zuckermanf1544752016-05-09 10:32:51 +00003906}
3907
3908static __inline__ __m512d __DEFAULT_FN_ATTRS
3909_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
3910{
Simon Pilgrim698528d2016-11-16 09:27:40 +00003911 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3912 (__v8df)_mm512_cvtepi32_pd(__A),
3913 (__v8df)_mm512_setzero_pd());
Michael Zuckermanf1544752016-05-09 10:32:51 +00003914}
3915
Ayman Musa2e250e82016-09-27 14:06:32 +00003916static __inline__ __m512d __DEFAULT_FN_ATTRS
3917_mm512_cvtepi32lo_pd(__m512i __A)
3918{
3919 return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3920}
3921
3922static __inline__ __m512d __DEFAULT_FN_ATTRS
3923_mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3924{
3925 return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
3926}
3927
Michael Zuckermanf1544752016-05-09 10:32:51 +00003928static __inline__ __m512 __DEFAULT_FN_ATTRS
3929_mm512_cvtepi32_ps (__m512i __A)
3930{
Craig Topper842171d2018-05-21 20:19:17 +00003931 return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
Michael Zuckermanf1544752016-05-09 10:32:51 +00003932}
3933
3934static __inline__ __m512 __DEFAULT_FN_ATTRS
3935_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3936{
Craig Topper842171d2018-05-21 20:19:17 +00003937 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3938 (__v16sf)_mm512_cvtepi32_ps(__A),
3939 (__v16sf)__W);
Michael Zuckermanf1544752016-05-09 10:32:51 +00003940}
3941
3942static __inline__ __m512 __DEFAULT_FN_ATTRS
3943_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
3944{
Craig Topper842171d2018-05-21 20:19:17 +00003945 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3946 (__v16sf)_mm512_cvtepi32_ps(__A),
3947 (__v16sf)_mm512_setzero_ps());
Michael Zuckermanf1544752016-05-09 10:32:51 +00003948}
3949
Michael Kupersteine45af542015-06-30 13:36:19 +00003950static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet0d5bb552014-07-28 17:14:40 +00003951_mm512_cvtepu32_pd(__m256i __A)
3952{
Simon Pilgrim698528d2016-11-16 09:27:40 +00003953 return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
Adam Nemet0d5bb552014-07-28 17:14:40 +00003954}
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00003955
Michael Zuckermanf1544752016-05-09 10:32:51 +00003956static __inline__ __m512d __DEFAULT_FN_ATTRS
3957_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3958{
Simon Pilgrim698528d2016-11-16 09:27:40 +00003959 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3960 (__v8df)_mm512_cvtepu32_pd(__A),
3961 (__v8df)__W);
Michael Zuckermanf1544752016-05-09 10:32:51 +00003962}
3963
3964static __inline__ __m512d __DEFAULT_FN_ATTRS
3965_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
3966{
Simon Pilgrim698528d2016-11-16 09:27:40 +00003967 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3968 (__v8df)_mm512_cvtepu32_pd(__A),
3969 (__v8df)_mm512_setzero_pd());
Michael Zuckermanf1544752016-05-09 10:32:51 +00003970}
3971
Ayman Musa2e250e82016-09-27 14:06:32 +00003972static __inline__ __m512d __DEFAULT_FN_ATTRS
3973_mm512_cvtepu32lo_pd(__m512i __A)
3974{
3975 return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3976}
3977
3978static __inline__ __m512d __DEFAULT_FN_ATTRS
3979_mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3980{
3981 return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
3982}
3983
Craig Topperc6338672018-05-31 00:51:20 +00003984#define _mm512_cvt_roundpd_ps(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00003985 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
Craig Topper72c7d512015-02-01 07:35:35 +00003986 (__v8sf)_mm256_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00003987 (__mmask8)-1, (int)(R))
Craig Topper72c7d512015-02-01 07:35:35 +00003988
Craig Topperc6338672018-05-31 00:51:20 +00003989#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \
Michael Zuckerman186d8672016-05-31 11:27:34 +00003990 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
Craig Topperf3efec62016-06-08 06:08:07 +00003991 (__v8sf)(__m256)(W), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00003992 (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00003993
Craig Topperc6338672018-05-31 00:51:20 +00003994#define _mm512_maskz_cvt_roundpd_ps(U, A, R) \
Michael Zuckerman186d8672016-05-31 11:27:34 +00003995 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3996 (__v8sf)_mm256_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00003997 (__mmask8)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00003998
Michael Zuckermanf1544752016-05-09 10:32:51 +00003999static __inline__ __m256 __DEFAULT_FN_ATTRS
4000_mm512_cvtpd_ps (__m512d __A)
4001{
4002 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
4003 (__v8sf) _mm256_undefined_ps (),
4004 (__mmask8) -1,
4005 _MM_FROUND_CUR_DIRECTION);
4006}
4007
4008static __inline__ __m256 __DEFAULT_FN_ATTRS
4009_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
4010{
4011 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
4012 (__v8sf) __W,
4013 (__mmask8) __U,
4014 _MM_FROUND_CUR_DIRECTION);
4015}
4016
4017static __inline__ __m256 __DEFAULT_FN_ATTRS
4018_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
4019{
4020 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
4021 (__v8sf) _mm256_setzero_ps (),
4022 (__mmask8) __U,
4023 _MM_FROUND_CUR_DIRECTION);
4024}
Michael Zuckerman2564d2f2016-05-10 10:14:50 +00004025
Ayman Musa2e250e82016-09-27 14:06:32 +00004026static __inline__ __m512 __DEFAULT_FN_ATTRS
4027_mm512_cvtpd_pslo (__m512d __A)
4028{
4029 return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
4030 (__v8sf) _mm256_setzero_ps (),
4031 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
4032}
4033
4034static __inline__ __m512 __DEFAULT_FN_ATTRS
4035_mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
4036{
4037 return (__m512) __builtin_shufflevector (
4038 (__v8sf) _mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W),
4039 __U, __A),
4040 (__v8sf) _mm256_setzero_ps (),
4041 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
4042}
4043
Craig Topperc6338672018-05-31 00:51:20 +00004044#define _mm512_cvt_roundps_ph(A, I) \
Craig Topperf3efec62016-06-08 06:08:07 +00004045 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4046 (__v16hi)_mm256_undefined_si256(), \
Craig Topperc6338672018-05-31 00:51:20 +00004047 (__mmask16)-1)
Michael Zuckerman6170c152016-06-01 14:41:41 +00004048
Craig Topperc6338672018-05-31 00:51:20 +00004049#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
Craig Topperf3efec62016-06-08 06:08:07 +00004050 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4051 (__v16hi)(__m256i)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00004052 (__mmask16)(W))
Michael Zuckerman6170c152016-06-01 14:41:41 +00004053
Craig Topperc6338672018-05-31 00:51:20 +00004054#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
Craig Topperf3efec62016-06-08 06:08:07 +00004055 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4056 (__v16hi)_mm256_setzero_si256(), \
Craig Topperc6338672018-05-31 00:51:20 +00004057 (__mmask16)(W))
Michael Zuckerman6170c152016-06-01 14:41:41 +00004058
Craig Topperc6338672018-05-31 00:51:20 +00004059#define _mm512_cvtps_ph(A, I) \
Craig Topper8c18e112016-05-17 04:41:50 +00004060 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
Craig Topper72c7d512015-02-01 07:35:35 +00004061 (__v16hi)_mm256_setzero_si256(), \
Craig Topperc6338672018-05-31 00:51:20 +00004062 (__mmask16)-1)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004063
Craig Topperc6338672018-05-31 00:51:20 +00004064#define _mm512_mask_cvtps_ph(U, W, A, I) \
Craig Topper8c18e112016-05-17 04:41:50 +00004065 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4066 (__v16hi)(__m256i)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00004067 (__mmask16)(W))
Michael Zuckermanf1544752016-05-09 10:32:51 +00004068
Craig Topperc6338672018-05-31 00:51:20 +00004069#define _mm512_maskz_cvtps_ph(W, A, I) \
Craig Topper8c18e112016-05-17 04:41:50 +00004070 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4071 (__v16hi)_mm256_setzero_si256(), \
Craig Topperc6338672018-05-31 00:51:20 +00004072 (__mmask16)(W))
Michael Zuckerman6170c152016-06-01 14:41:41 +00004073
Craig Topperc6338672018-05-31 00:51:20 +00004074#define _mm512_cvt_roundph_ps(A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00004075 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
4076 (__v16sf)_mm512_undefined_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00004077 (__mmask16)-1, (int)(R))
Michael Zuckerman6170c152016-06-01 14:41:41 +00004078
Craig Topperc6338672018-05-31 00:51:20 +00004079#define _mm512_mask_cvt_roundph_ps(W, U, A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00004080 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
4081 (__v16sf)(__m512)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00004082 (__mmask16)(U), (int)(R))
Michael Zuckerman6170c152016-06-01 14:41:41 +00004083
Craig Topperc6338672018-05-31 00:51:20 +00004084#define _mm512_maskz_cvt_roundph_ps(U, A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00004085 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
4086 (__v16sf)_mm512_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00004087 (__mmask16)(U), (int)(R))
Craig Topperf3efec62016-06-08 06:08:07 +00004088
4089
Michael Kupersteine45af542015-06-30 13:36:19 +00004090static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00004091_mm512_cvtph_ps(__m256i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004092{
4093 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00004094 (__v16sf)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004095 _mm512_setzero_ps (),
4096 (__mmask16) -1,
4097 _MM_FROUND_CUR_DIRECTION);
4098}
4099
Michael Zuckermanf1544752016-05-09 10:32:51 +00004100static __inline__ __m512 __DEFAULT_FN_ATTRS
4101_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004102{
Michael Zuckermanf1544752016-05-09 10:32:51 +00004103 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
4104 (__v16sf) __W,
4105 (__mmask16) __U,
4106 _MM_FROUND_CUR_DIRECTION);
4107}
4108
4109static __inline__ __m512 __DEFAULT_FN_ATTRS
4110_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
4111{
4112 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
4113 (__v16sf) _mm512_setzero_ps (),
4114 (__mmask16) __U,
4115 _MM_FROUND_CUR_DIRECTION);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004116}
4117
Craig Topperc6338672018-05-31 00:51:20 +00004118#define _mm512_cvtt_roundpd_epi32(A, R) \
Michael Zuckerman186d8672016-05-31 11:27:34 +00004119 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
4120 (__v8si)_mm256_setzero_si256(), \
Craig Topperc6338672018-05-31 00:51:20 +00004121 (__mmask8)-1, (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00004122
Craig Topperc6338672018-05-31 00:51:20 +00004123#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \
Michael Zuckerman186d8672016-05-31 11:27:34 +00004124 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
Craig Topperf3efec62016-06-08 06:08:07 +00004125 (__v8si)(__m256i)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00004126 (__mmask8)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00004127
Craig Topperc6338672018-05-31 00:51:20 +00004128#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \
Michael Zuckerman186d8672016-05-31 11:27:34 +00004129 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
4130 (__v8si)_mm256_setzero_si256(), \
Craig Topperc6338672018-05-31 00:51:20 +00004131 (__mmask8)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00004132
Michael Kupersteine45af542015-06-30 13:36:19 +00004133static __inline __m256i __DEFAULT_FN_ATTRS
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00004134_mm512_cvttpd_epi32(__m512d __a)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004135{
Michael Kuperstein5c2cb0e2015-09-21 11:45:27 +00004136 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004137 (__v8si)_mm256_setzero_si256(),
4138 (__mmask8) -1,
4139 _MM_FROUND_CUR_DIRECTION);
4140}
4141
Michael Zuckermanf1544752016-05-09 10:32:51 +00004142static __inline__ __m256i __DEFAULT_FN_ATTRS
4143_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
4144{
4145 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4146 (__v8si) __W,
4147 (__mmask8) __U,
4148 _MM_FROUND_CUR_DIRECTION);
4149}
4150
4151static __inline__ __m256i __DEFAULT_FN_ATTRS
4152_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
4153{
4154 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4155 (__v8si) _mm256_setzero_si256 (),
4156 (__mmask8) __U,
4157 _MM_FROUND_CUR_DIRECTION);
4158}
4159
Craig Topperc6338672018-05-31 00:51:20 +00004160#define _mm512_cvtt_roundps_epi32(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00004161 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
Craig Topper72c7d512015-02-01 07:35:35 +00004162 (__v16si)_mm512_setzero_si512(), \
Craig Topperc6338672018-05-31 00:51:20 +00004163 (__mmask16)-1, (int)(R))
Michael Zuckerman2564d2f2016-05-10 10:14:50 +00004164
Craig Topperc6338672018-05-31 00:51:20 +00004165#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \
Michael Zuckerman186d8672016-05-31 11:27:34 +00004166 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
Craig Topperf3efec62016-06-08 06:08:07 +00004167 (__v16si)(__m512i)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00004168 (__mmask16)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00004169
Craig Topperc6338672018-05-31 00:51:20 +00004170#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \
Michael Zuckerman186d8672016-05-31 11:27:34 +00004171 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
4172 (__v16si)_mm512_setzero_si512(), \
Craig Topperc6338672018-05-31 00:51:20 +00004173 (__mmask16)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00004174
Michael Zuckermanf1544752016-05-09 10:32:51 +00004175static __inline __m512i __DEFAULT_FN_ATTRS
4176_mm512_cvttps_epi32(__m512 __a)
4177{
4178 return (__m512i)
4179 __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
4180 (__v16si) _mm512_setzero_si512 (),
4181 (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
4182}
4183
4184static __inline__ __m512i __DEFAULT_FN_ATTRS
4185_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
4186{
4187 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4188 (__v16si) __W,
4189 (__mmask16) __U,
4190 _MM_FROUND_CUR_DIRECTION);
4191}
4192
4193static __inline__ __m512i __DEFAULT_FN_ATTRS
4194_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
4195{
4196 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4197 (__v16si) _mm512_setzero_si512 (),
4198 (__mmask16) __U,
4199 _MM_FROUND_CUR_DIRECTION);
4200}
Craig Topper72c7d512015-02-01 07:35:35 +00004201
Craig Topperc6338672018-05-31 00:51:20 +00004202#define _mm512_cvt_roundps_epi32(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00004203 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
Craig Topper72c7d512015-02-01 07:35:35 +00004204 (__v16si)_mm512_setzero_si512(), \
Craig Topperc6338672018-05-31 00:51:20 +00004205 (__mmask16)-1, (int)(R))
Craig Topper72c7d512015-02-01 07:35:35 +00004206
Craig Topperc6338672018-05-31 00:51:20 +00004207#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00004208 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
4209 (__v16si)(__m512i)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00004210 (__mmask16)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00004211
Craig Topperc6338672018-05-31 00:51:20 +00004212#define _mm512_maskz_cvt_roundps_epi32(U, A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00004213 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
4214 (__v16si)_mm512_setzero_si512(), \
Craig Topperc6338672018-05-31 00:51:20 +00004215 (__mmask16)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00004216
Michael Zuckermanf1544752016-05-09 10:32:51 +00004217static __inline__ __m512i __DEFAULT_FN_ATTRS
4218_mm512_cvtps_epi32 (__m512 __A)
4219{
4220 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4221 (__v16si) _mm512_undefined_epi32 (),
4222 (__mmask16) -1,
4223 _MM_FROUND_CUR_DIRECTION);
4224}
4225
4226static __inline__ __m512i __DEFAULT_FN_ATTRS
4227_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
4228{
4229 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4230 (__v16si) __W,
4231 (__mmask16) __U,
4232 _MM_FROUND_CUR_DIRECTION);
4233}
4234
4235static __inline__ __m512i __DEFAULT_FN_ATTRS
4236_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
4237{
4238 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4239 (__v16si)
4240 _mm512_setzero_si512 (),
4241 (__mmask16) __U,
4242 _MM_FROUND_CUR_DIRECTION);
4243}
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00004244
Craig Topperc6338672018-05-31 00:51:20 +00004245#define _mm512_cvt_roundpd_epi32(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00004246 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
Craig Topper72c7d512015-02-01 07:35:35 +00004247 (__v8si)_mm256_setzero_si256(), \
Craig Topperc6338672018-05-31 00:51:20 +00004248 (__mmask8)-1, (int)(R))
Craig Topper72c7d512015-02-01 07:35:35 +00004249
Craig Topperc6338672018-05-31 00:51:20 +00004250#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \
Michael Zuckerman186d8672016-05-31 11:27:34 +00004251 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
Craig Topperf3efec62016-06-08 06:08:07 +00004252 (__v8si)(__m256i)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00004253 (__mmask8)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00004254
Craig Topperc6338672018-05-31 00:51:20 +00004255#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \
Michael Zuckerman186d8672016-05-31 11:27:34 +00004256 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
4257 (__v8si)_mm256_setzero_si256(), \
Craig Topperc6338672018-05-31 00:51:20 +00004258 (__mmask8)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00004259
Michael Zuckermanf1544752016-05-09 10:32:51 +00004260static __inline__ __m256i __DEFAULT_FN_ATTRS
4261_mm512_cvtpd_epi32 (__m512d __A)
4262{
4263 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4264 (__v8si)
4265 _mm256_undefined_si256 (),
4266 (__mmask8) -1,
4267 _MM_FROUND_CUR_DIRECTION);
4268}
4269
4270static __inline__ __m256i __DEFAULT_FN_ATTRS
4271_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
4272{
4273 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4274 (__v8si) __W,
4275 (__mmask8) __U,
4276 _MM_FROUND_CUR_DIRECTION);
4277}
4278
4279static __inline__ __m256i __DEFAULT_FN_ATTRS
4280_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
4281{
4282 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4283 (__v8si)
4284 _mm256_setzero_si256 (),
4285 (__mmask8) __U,
4286 _MM_FROUND_CUR_DIRECTION);
4287}
4288
Craig Topperc6338672018-05-31 00:51:20 +00004289#define _mm512_cvt_roundps_epu32(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00004290 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
Craig Topper72c7d512015-02-01 07:35:35 +00004291 (__v16si)_mm512_setzero_si512(), \
Craig Topperc6338672018-05-31 00:51:20 +00004292 (__mmask16)-1, (int)(R))
Craig Topper72c7d512015-02-01 07:35:35 +00004293
Craig Topperc6338672018-05-31 00:51:20 +00004294#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00004295 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4296 (__v16si)(__m512i)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00004297 (__mmask16)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00004298
Craig Topperc6338672018-05-31 00:51:20 +00004299#define _mm512_maskz_cvt_roundps_epu32(U, A, R) \
Craig Topperf3efec62016-06-08 06:08:07 +00004300 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4301 (__v16si)_mm512_setzero_si512(), \
Craig Topperc6338672018-05-31 00:51:20 +00004302 (__mmask16)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00004303
Michael Zuckermanf1544752016-05-09 10:32:51 +00004304static __inline__ __m512i __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00004305_mm512_cvtps_epu32 ( __m512 __A)
Michael Zuckermanf1544752016-05-09 10:32:51 +00004306{
4307 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4308 (__v16si)\
4309 _mm512_undefined_epi32 (),\
4310 (__mmask16) -1,\
4311 _MM_FROUND_CUR_DIRECTION);\
4312}
4313
4314static __inline__ __m512i __DEFAULT_FN_ATTRS
4315_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
4316{
4317 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4318 (__v16si) __W,
4319 (__mmask16) __U,
4320 _MM_FROUND_CUR_DIRECTION);
4321}
4322
Michael Zuckerman9fcf3552016-05-30 13:22:12 +00004323static __inline__ __m512i __DEFAULT_FN_ATTRS
4324_mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
4325{
4326 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00004327 (__v16si)
Michael Zuckerman9fcf3552016-05-30 13:22:12 +00004328 _mm512_setzero_si512 (),
4329 (__mmask16) __U ,
4330 _MM_FROUND_CUR_DIRECTION);
4331}
4332
Craig Topperc6338672018-05-31 00:51:20 +00004333#define _mm512_cvt_roundpd_epu32(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00004334 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
Craig Topper72c7d512015-02-01 07:35:35 +00004335 (__v8si)_mm256_setzero_si256(), \
Craig Topperc6338672018-05-31 00:51:20 +00004336 (__mmask8)-1, (int)(R))
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004337
Craig Topperc6338672018-05-31 00:51:20 +00004338#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \
Michael Zuckerman186d8672016-05-31 11:27:34 +00004339 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
Craig Toppercbf39292018-05-31 01:24:40 +00004340 (__v8si)(__m256i)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00004341 (__mmask8)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00004342
Craig Topperc6338672018-05-31 00:51:20 +00004343#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \
Michael Zuckerman186d8672016-05-31 11:27:34 +00004344 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4345 (__v8si)_mm256_setzero_si256(), \
Craig Topperc6338672018-05-31 00:51:20 +00004346 (__mmask8)(U), (int)(R))
Michael Zuckerman186d8672016-05-31 11:27:34 +00004347
Michael Zuckermanf1544752016-05-09 10:32:51 +00004348static __inline__ __m256i __DEFAULT_FN_ATTRS
4349_mm512_cvtpd_epu32 (__m512d __A)
4350{
4351 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4352 (__v8si)
4353 _mm256_undefined_si256 (),
4354 (__mmask8) -1,
4355 _MM_FROUND_CUR_DIRECTION);
4356}
4357
4358static __inline__ __m256i __DEFAULT_FN_ATTRS
4359_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
4360{
4361 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4362 (__v8si) __W,
4363 (__mmask8) __U,
4364 _MM_FROUND_CUR_DIRECTION);
4365}
4366
4367static __inline__ __m256i __DEFAULT_FN_ATTRS
4368_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
4369{
4370 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4371 (__v8si)
4372 _mm256_setzero_si256 (),
4373 (__mmask8) __U,
4374 _MM_FROUND_CUR_DIRECTION);
4375}
Craig Topper79f53ca2016-06-23 06:36:42 +00004376
Simon Pilgrim60e92492017-03-21 12:46:13 +00004377static __inline__ double __DEFAULT_FN_ATTRS
4378_mm512_cvtsd_f64(__m512d __a)
4379{
4380 return __a[0];
4381}
4382
4383static __inline__ float __DEFAULT_FN_ATTRS
4384_mm512_cvtss_f32(__m512 __a)
4385{
4386 return __a[0];
4387}
4388
Adam Nemet63a951e2015-01-14 01:31:17 +00004389/* Unpack and Interleave */
Craig Topper79f53ca2016-06-23 06:36:42 +00004390
Michael Kupersteine45af542015-06-30 13:36:19 +00004391static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet63a951e2015-01-14 01:31:17 +00004392_mm512_unpackhi_pd(__m512d __a, __m512d __b)
4393{
Craig Topper79f53ca2016-06-23 06:36:42 +00004394 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4395 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4396}
4397
4398static __inline__ __m512d __DEFAULT_FN_ATTRS
4399_mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4400{
4401 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4402 (__v8df)_mm512_unpackhi_pd(__A, __B),
4403 (__v8df)__W);
4404}
4405
4406static __inline__ __m512d __DEFAULT_FN_ATTRS
4407_mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
4408{
4409 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4410 (__v8df)_mm512_unpackhi_pd(__A, __B),
4411 (__v8df)_mm512_setzero_pd());
Adam Nemet63a951e2015-01-14 01:31:17 +00004412}
4413
Michael Kupersteine45af542015-06-30 13:36:19 +00004414static __inline __m512d __DEFAULT_FN_ATTRS
Adam Nemet63a951e2015-01-14 01:31:17 +00004415_mm512_unpacklo_pd(__m512d __a, __m512d __b)
4416{
Craig Topper79f53ca2016-06-23 06:36:42 +00004417 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4418 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4419}
4420
4421static __inline__ __m512d __DEFAULT_FN_ATTRS
4422_mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4423{
4424 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4425 (__v8df)_mm512_unpacklo_pd(__A, __B),
4426 (__v8df)__W);
4427}
4428
4429static __inline__ __m512d __DEFAULT_FN_ATTRS
4430_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
4431{
4432 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4433 (__v8df)_mm512_unpacklo_pd(__A, __B),
4434 (__v8df)_mm512_setzero_pd());
Adam Nemet63a951e2015-01-14 01:31:17 +00004435}
4436
Michael Kupersteine45af542015-06-30 13:36:19 +00004437static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet63a951e2015-01-14 01:31:17 +00004438_mm512_unpackhi_ps(__m512 __a, __m512 __b)
4439{
Craig Topper79f53ca2016-06-23 06:36:42 +00004440 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4441 2, 18, 3, 19,
4442 2+4, 18+4, 3+4, 19+4,
4443 2+8, 18+8, 3+8, 19+8,
4444 2+12, 18+12, 3+12, 19+12);
4445}
4446
4447static __inline__ __m512 __DEFAULT_FN_ATTRS
4448_mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4449{
4450 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4451 (__v16sf)_mm512_unpackhi_ps(__A, __B),
4452 (__v16sf)__W);
4453}
4454
4455static __inline__ __m512 __DEFAULT_FN_ATTRS
4456_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
4457{
4458 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4459 (__v16sf)_mm512_unpackhi_ps(__A, __B),
4460 (__v16sf)_mm512_setzero_ps());
Adam Nemet63a951e2015-01-14 01:31:17 +00004461}
4462
Michael Kupersteine45af542015-06-30 13:36:19 +00004463static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet63a951e2015-01-14 01:31:17 +00004464_mm512_unpacklo_ps(__m512 __a, __m512 __b)
4465{
Craig Topper79f53ca2016-06-23 06:36:42 +00004466 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4467 0, 16, 1, 17,
4468 0+4, 16+4, 1+4, 17+4,
4469 0+8, 16+8, 1+8, 17+8,
4470 0+12, 16+12, 1+12, 17+12);
4471}
4472
4473static __inline__ __m512 __DEFAULT_FN_ATTRS
4474_mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4475{
4476 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4477 (__v16sf)_mm512_unpacklo_ps(__A, __B),
4478 (__v16sf)__W);
4479}
4480
4481static __inline__ __m512 __DEFAULT_FN_ATTRS
4482_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
4483{
4484 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4485 (__v16sf)_mm512_unpacklo_ps(__A, __B),
4486 (__v16sf)_mm512_setzero_ps());
4487}
4488
4489static __inline__ __m512i __DEFAULT_FN_ATTRS
4490_mm512_unpackhi_epi32(__m512i __A, __m512i __B)
4491{
4492 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4493 2, 18, 3, 19,
4494 2+4, 18+4, 3+4, 19+4,
4495 2+8, 18+8, 3+8, 19+8,
4496 2+12, 18+12, 3+12, 19+12);
4497}
4498
4499static __inline__ __m512i __DEFAULT_FN_ATTRS
4500_mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4501{
4502 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4503 (__v16si)_mm512_unpackhi_epi32(__A, __B),
4504 (__v16si)__W);
4505}
4506
4507static __inline__ __m512i __DEFAULT_FN_ATTRS
4508_mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4509{
4510 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4511 (__v16si)_mm512_unpackhi_epi32(__A, __B),
4512 (__v16si)_mm512_setzero_si512());
4513}
4514
4515static __inline__ __m512i __DEFAULT_FN_ATTRS
4516_mm512_unpacklo_epi32(__m512i __A, __m512i __B)
4517{
4518 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4519 0, 16, 1, 17,
4520 0+4, 16+4, 1+4, 17+4,
4521 0+8, 16+8, 1+8, 17+8,
4522 0+12, 16+12, 1+12, 17+12);
4523}
4524
4525static __inline__ __m512i __DEFAULT_FN_ATTRS
4526_mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4527{
4528 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4529 (__v16si)_mm512_unpacklo_epi32(__A, __B),
4530 (__v16si)__W);
4531}
4532
4533static __inline__ __m512i __DEFAULT_FN_ATTRS
4534_mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4535{
4536 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4537 (__v16si)_mm512_unpacklo_epi32(__A, __B),
4538 (__v16si)_mm512_setzero_si512());
4539}
4540
4541static __inline__ __m512i __DEFAULT_FN_ATTRS
4542_mm512_unpackhi_epi64(__m512i __A, __m512i __B)
4543{
4544 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4545 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4546}
4547
4548static __inline__ __m512i __DEFAULT_FN_ATTRS
4549_mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4550{
4551 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4552 (__v8di)_mm512_unpackhi_epi64(__A, __B),
4553 (__v8di)__W);
4554}
4555
4556static __inline__ __m512i __DEFAULT_FN_ATTRS
4557_mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4558{
4559 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4560 (__v8di)_mm512_unpackhi_epi64(__A, __B),
4561 (__v8di)_mm512_setzero_si512());
4562}
4563
4564static __inline__ __m512i __DEFAULT_FN_ATTRS
4565_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
4566{
4567 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4568 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4569}
4570
4571static __inline__ __m512i __DEFAULT_FN_ATTRS
4572_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4573{
4574 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4575 (__v8di)_mm512_unpacklo_epi64(__A, __B),
4576 (__v8di)__W);
4577}
4578
4579static __inline__ __m512i __DEFAULT_FN_ATTRS
4580_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4581{
4582 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4583 (__v8di)_mm512_unpacklo_epi64(__A, __B),
4584 (__v8di)_mm512_setzero_si512());
Adam Nemet63a951e2015-01-14 01:31:17 +00004585}
4586
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00004587
Adam Nemet0d5bb552014-07-28 17:14:40 +00004588/* SIMD load ops */
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004589
Michael Kupersteine45af542015-06-30 13:36:19 +00004590static __inline __m512i __DEFAULT_FN_ATTRS
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004591_mm512_loadu_si512 (void const *__P)
4592{
Craig Toppera6dd2fa2018-05-31 05:02:08 +00004593 struct __loadu_si512 {
4594 __m512i __v;
4595 } __attribute__((__packed__, __may_alias__));
4596 return ((struct __loadu_si512*)__P)->__v;
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004597}
4598
4599static __inline __m512i __DEFAULT_FN_ATTRS
4600_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4601{
Craig Topper4537ea72016-05-14 06:03:13 +00004602 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004603 (__v16si) __W,
4604 (__mmask16) __U);
4605}
4606
4607
4608static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00004609_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004610{
Craig Topper4537ea72016-05-14 06:03:13 +00004611 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004612 (__v16si)
4613 _mm512_setzero_si512 (),
4614 (__mmask16) __U);
4615}
4616
Michael Kupersteine45af542015-06-30 13:36:19 +00004617static __inline __m512i __DEFAULT_FN_ATTRS
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004618_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4619{
Craig Topper4537ea72016-05-14 06:03:13 +00004620 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004621 (__v8di) __W,
4622 (__mmask8) __U);
4623}
4624
4625static __inline __m512i __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00004626_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004627{
Craig Topper4537ea72016-05-14 06:03:13 +00004628 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004629 (__v8di)
4630 _mm512_setzero_si512 (),
4631 (__mmask8) __U);
4632}
4633
Michael Kupersteine45af542015-06-30 13:36:19 +00004634static __inline __m512 __DEFAULT_FN_ATTRS
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004635_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4636{
Craig Topper4537ea72016-05-14 06:03:13 +00004637 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004638 (__v16sf) __W,
4639 (__mmask16) __U);
4640}
4641
4642static __inline __m512 __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00004643_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004644{
Craig Topper4537ea72016-05-14 06:03:13 +00004645 return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004646 (__v16sf)
4647 _mm512_setzero_ps (),
4648 (__mmask16) __U);
4649}
4650
Michael Kupersteine45af542015-06-30 13:36:19 +00004651static __inline __m512d __DEFAULT_FN_ATTRS
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004652_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004653{
Craig Topper4537ea72016-05-14 06:03:13 +00004654 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004655 (__v8df) __W,
4656 (__mmask8) __U);
Adam Nemetc0cff242015-01-16 18:51:50 +00004657}
4658
Michael Kupersteine45af542015-06-30 13:36:19 +00004659static __inline __m512d __DEFAULT_FN_ATTRS
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004660_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
Adam Nemetc0cff242015-01-16 18:51:50 +00004661{
Craig Topper4537ea72016-05-14 06:03:13 +00004662 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
Adam Nemetc0cff242015-01-16 18:51:50 +00004663 (__v8df)
4664 _mm512_setzero_pd (),
4665 (__mmask8) __U);
4666}
4667
Michael Kupersteine45af542015-06-30 13:36:19 +00004668static __inline __m512d __DEFAULT_FN_ATTRS
Craig Topper6afc4362017-03-17 05:59:25 +00004669_mm512_loadu_pd(void const *__p)
Adam Nemetda82bcc2014-07-31 04:00:39 +00004670{
4671 struct __loadu_pd {
4672 __m512d __v;
David Majnemer1cf22e62015-02-04 00:26:10 +00004673 } __attribute__((__packed__, __may_alias__));
Adam Nemetda82bcc2014-07-31 04:00:39 +00004674 return ((struct __loadu_pd*)__p)->__v;
4675}
4676
Michael Kupersteine45af542015-06-30 13:36:19 +00004677static __inline __m512 __DEFAULT_FN_ATTRS
Craig Topper6afc4362017-03-17 05:59:25 +00004678_mm512_loadu_ps(void const *__p)
Adam Nemetda82bcc2014-07-31 04:00:39 +00004679{
4680 struct __loadu_ps {
4681 __m512 __v;
David Majnemer1cf22e62015-02-04 00:26:10 +00004682 } __attribute__((__packed__, __may_alias__));
Adam Nemetda82bcc2014-07-31 04:00:39 +00004683 return ((struct __loadu_ps*)__p)->__v;
4684}
4685
Michael Kupersteine45af542015-06-30 13:36:19 +00004686static __inline __m512 __DEFAULT_FN_ATTRS
Craig Topper6afc4362017-03-17 05:59:25 +00004687_mm512_load_ps(void const *__p)
Adam Nemetc0cff242015-01-16 18:51:50 +00004688{
Craig Toppera6dd2fa2018-05-31 05:02:08 +00004689 return *(__m512*)__p;
Adam Nemetc0cff242015-01-16 18:51:50 +00004690}
4691
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004692static __inline __m512 __DEFAULT_FN_ATTRS
4693_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4694{
4695 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4696 (__v16sf) __W,
4697 (__mmask16) __U);
4698}
4699
4700static __inline __m512 __DEFAULT_FN_ATTRS
4701_mm512_maskz_load_ps(__mmask16 __U, void const *__P)
4702{
4703 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4704 (__v16sf)
4705 _mm512_setzero_ps (),
4706 (__mmask16) __U);
4707}
4708
Michael Kupersteine45af542015-06-30 13:36:19 +00004709static __inline __m512d __DEFAULT_FN_ATTRS
Craig Topper6afc4362017-03-17 05:59:25 +00004710_mm512_load_pd(void const *__p)
Adam Nemetc0cff242015-01-16 18:51:50 +00004711{
Craig Toppera6dd2fa2018-05-31 05:02:08 +00004712 return *(__m512d*)__p;
Adam Nemetc0cff242015-01-16 18:51:50 +00004713}
4714
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004715static __inline __m512d __DEFAULT_FN_ATTRS
4716_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4717{
4718 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4719 (__v8df) __W,
4720 (__mmask8) __U);
4721}
4722
4723static __inline __m512d __DEFAULT_FN_ATTRS
4724_mm512_maskz_load_pd(__mmask8 __U, void const *__P)
4725{
4726 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4727 (__v8df)
4728 _mm512_setzero_pd (),
4729 (__mmask8) __U);
4730}
4731
4732static __inline __m512i __DEFAULT_FN_ATTRS
4733_mm512_load_si512 (void const *__P)
4734{
4735 return *(__m512i *) __P;
4736}
4737
4738static __inline __m512i __DEFAULT_FN_ATTRS
4739_mm512_load_epi32 (void const *__P)
4740{
4741 return *(__m512i *) __P;
4742}
4743
4744static __inline __m512i __DEFAULT_FN_ATTRS
4745_mm512_load_epi64 (void const *__P)
4746{
4747 return *(__m512i *) __P;
4748}
4749
Adam Nemet0d5bb552014-07-28 17:14:40 +00004750/* SIMD store ops */
4751
Michael Kupersteine45af542015-06-30 13:36:19 +00004752static __inline void __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00004753_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004754{
Craig Topper4537ea72016-05-14 06:03:13 +00004755 __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004756 (__mmask8) __U);
4757}
4758
Michael Kupersteine45af542015-06-30 13:36:19 +00004759static __inline void __DEFAULT_FN_ATTRS
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004760_mm512_storeu_si512 (void *__P, __m512i __A)
4761{
Craig Toppera6dd2fa2018-05-31 05:02:08 +00004762 struct __storeu_si512 {
4763 __m512i __v;
4764 } __attribute__((__packed__, __may_alias__));
4765 ((struct __storeu_si512*)__P)->__v = __A;
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004766}
4767
4768static __inline void __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00004769_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004770{
Craig Topper4537ea72016-05-14 06:03:13 +00004771 __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004772 (__mmask16) __U);
4773}
4774
Michael Kupersteine45af542015-06-30 13:36:19 +00004775static __inline void __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00004776_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004777{
Craig Topper4537ea72016-05-14 06:03:13 +00004778 __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004779}
4780
Michael Kupersteine45af542015-06-30 13:36:19 +00004781static __inline void __DEFAULT_FN_ATTRS
Adam Nemetfce1ad02014-07-28 17:14:45 +00004782_mm512_storeu_pd(void *__P, __m512d __A)
4783{
Craig Toppera6dd2fa2018-05-31 05:02:08 +00004784 struct __storeu_pd {
4785 __m512d __v;
4786 } __attribute__((__packed__, __may_alias__));
4787 ((struct __storeu_pd*)__P)->__v = __A;
Adam Nemetfce1ad02014-07-28 17:14:45 +00004788}
4789
Michael Kupersteine45af542015-06-30 13:36:19 +00004790static __inline void __DEFAULT_FN_ATTRS
Adam Nemet9a3ea602014-07-28 17:14:38 +00004791_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004792{
Craig Topper4537ea72016-05-14 06:03:13 +00004793 __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00004794 (__mmask16) __U);
4795}
4796
Michael Kupersteine45af542015-06-30 13:36:19 +00004797static __inline void __DEFAULT_FN_ATTRS
Adam Nemetfce1ad02014-07-28 17:14:45 +00004798_mm512_storeu_ps(void *__P, __m512 __A)
4799{
Craig Toppera6dd2fa2018-05-31 05:02:08 +00004800 struct __storeu_ps {
4801 __m512 __v;
4802 } __attribute__((__packed__, __may_alias__));
4803 ((struct __storeu_ps*)__P)->__v = __A;
Adam Nemetfce1ad02014-07-28 17:14:45 +00004804}
4805
Michael Kupersteine45af542015-06-30 13:36:19 +00004806static __inline void __DEFAULT_FN_ATTRS
Adam Nemetc0cff242015-01-16 18:51:50 +00004807_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
Adam Nemetfce1ad02014-07-28 17:14:45 +00004808{
Adam Nemetc0cff242015-01-16 18:51:50 +00004809 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
Adam Nemetfce1ad02014-07-28 17:14:45 +00004810}
4811
Michael Kupersteine45af542015-06-30 13:36:19 +00004812static __inline void __DEFAULT_FN_ATTRS
Adam Nemetfce1ad02014-07-28 17:14:45 +00004813_mm512_store_pd(void *__P, __m512d __A)
4814{
4815 *(__m512d*)__P = __A;
4816}
4817
Michael Kupersteine45af542015-06-30 13:36:19 +00004818static __inline void __DEFAULT_FN_ATTRS
Adam Nemetc0cff242015-01-16 18:51:50 +00004819_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4820{
4821 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4822 (__mmask16) __U);
4823}
4824
Michael Kupersteine45af542015-06-30 13:36:19 +00004825static __inline void __DEFAULT_FN_ATTRS
Adam Nemetc0cff242015-01-16 18:51:50 +00004826_mm512_store_ps(void *__P, __m512 __A)
4827{
4828 *(__m512*)__P = __A;
4829}
4830
Michael Zuckermane9e8e572016-05-10 13:13:54 +00004831static __inline void __DEFAULT_FN_ATTRS
4832_mm512_store_si512 (void *__P, __m512i __A)
4833{
4834 *(__m512i *) __P = __A;
4835}
4836
4837static __inline void __DEFAULT_FN_ATTRS
4838_mm512_store_epi32 (void *__P, __m512i __A)
4839{
4840 *(__m512i *) __P = __A;
4841}
4842
4843static __inline void __DEFAULT_FN_ATTRS
4844_mm512_store_epi64 (void *__P, __m512i __A)
4845{
4846 *(__m512i *) __P = __A;
4847}
4848
Adam Nemet2db1d2f2014-07-30 16:51:27 +00004849/* Mask ops */
4850
Michael Kupersteine45af542015-06-30 13:36:19 +00004851static __inline __mmask16 __DEFAULT_FN_ATTRS
Adam Nemet2db1d2f2014-07-30 16:51:27 +00004852_mm512_knot(__mmask16 __M)
4853{
4854 return __builtin_ia32_knothi(__M);
4855}
4856
Robert Khasanovb9f3a912014-10-08 17:18:13 +00004857/* Integer compare */
4858
Craig Topper57f96ac2017-11-06 21:00:49 +00004859#define _mm512_cmpeq_epi32_mask(A, B) \
4860 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4861#define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4862 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4863#define _mm512_cmpge_epi32_mask(A, B) \
4864 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4865#define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4866 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4867#define _mm512_cmpgt_epi32_mask(A, B) \
4868 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4869#define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4870 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4871#define _mm512_cmple_epi32_mask(A, B) \
4872 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4873#define _mm512_mask_cmple_epi32_mask(k, A, B) \
4874 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4875#define _mm512_cmplt_epi32_mask(A, B) \
4876 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4877#define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4878 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4879#define _mm512_cmpneq_epi32_mask(A, B) \
4880 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4881#define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4882 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
Robert Khasanovb9f3a912014-10-08 17:18:13 +00004883
Craig Topper57f96ac2017-11-06 21:00:49 +00004884#define _mm512_cmpeq_epu32_mask(A, B) \
4885 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4886#define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4887 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4888#define _mm512_cmpge_epu32_mask(A, B) \
4889 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4890#define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4891 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4892#define _mm512_cmpgt_epu32_mask(A, B) \
4893 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4894#define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4895 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4896#define _mm512_cmple_epu32_mask(A, B) \
4897 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4898#define _mm512_mask_cmple_epu32_mask(k, A, B) \
4899 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4900#define _mm512_cmplt_epu32_mask(A, B) \
4901 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4902#define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4903 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4904#define _mm512_cmpneq_epu32_mask(A, B) \
4905 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4906#define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4907 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
Robert Khasanovb9f3a912014-10-08 17:18:13 +00004908
Craig Topper57f96ac2017-11-06 21:00:49 +00004909#define _mm512_cmpeq_epi64_mask(A, B) \
4910 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4911#define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4912 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4913#define _mm512_cmpge_epi64_mask(A, B) \
4914 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4915#define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4916 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4917#define _mm512_cmpgt_epi64_mask(A, B) \
4918 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4919#define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4920 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4921#define _mm512_cmple_epi64_mask(A, B) \
4922 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4923#define _mm512_mask_cmple_epi64_mask(k, A, B) \
4924 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4925#define _mm512_cmplt_epi64_mask(A, B) \
4926 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4927#define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4928 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4929#define _mm512_cmpneq_epi64_mask(A, B) \
4930 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4931#define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4932 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
Craig Topper4cac1c22015-01-25 23:30:07 +00004933
Craig Topper57f96ac2017-11-06 21:00:49 +00004934#define _mm512_cmpeq_epu64_mask(A, B) \
4935 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4936#define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4937 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4938#define _mm512_cmpge_epu64_mask(A, B) \
4939 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4940#define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4941 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4942#define _mm512_cmpgt_epu64_mask(A, B) \
4943 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4944#define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4945 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4946#define _mm512_cmple_epu64_mask(A, B) \
4947 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4948#define _mm512_mask_cmple_epu64_mask(k, A, B) \
4949 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4950#define _mm512_cmplt_epu64_mask(A, B) \
4951 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4952#define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4953 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4954#define _mm512_cmpneq_epu64_mask(A, B) \
4955 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4956#define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4957 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
Craig Topper4cac1c22015-01-25 23:30:07 +00004958
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004959static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004960_mm512_cvtepi8_epi32(__m128i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004961{
Craig Topper0c5da262016-10-23 07:35:47 +00004962 /* This function always performs a signed extension, but __v16qi is a char
4963 which may be signed or unsigned, so use __v16qs. */
4964 return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004965}
4966
4967static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004968_mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004969{
Craig Topper0c5da262016-10-23 07:35:47 +00004970 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4971 (__v16si)_mm512_cvtepi8_epi32(__A),
4972 (__v16si)__W);
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004973}
4974
4975static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004976_mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004977{
Craig Topper0c5da262016-10-23 07:35:47 +00004978 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4979 (__v16si)_mm512_cvtepi8_epi32(__A),
4980 (__v16si)_mm512_setzero_si512());
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004981}
4982
4983static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004984_mm512_cvtepi8_epi64(__m128i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004985{
Craig Topper0c5da262016-10-23 07:35:47 +00004986 /* This function always performs a signed extension, but __v16qi is a char
4987 which may be signed or unsigned, so use __v16qs. */
4988 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004989}
4990
4991static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00004992_mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004993{
Craig Topper0c5da262016-10-23 07:35:47 +00004994 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4995 (__v8di)_mm512_cvtepi8_epi64(__A),
4996 (__v8di)__W);
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00004997}
4998
4999static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005000_mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00005001{
Craig Topper0c5da262016-10-23 07:35:47 +00005002 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5003 (__v8di)_mm512_cvtepi8_epi64(__A),
5004 (__v8di)_mm512_setzero_si512 ());
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00005005}
5006
5007static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005008_mm512_cvtepi32_epi64(__m256i __X)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00005009{
Craig Topper0c5da262016-10-23 07:35:47 +00005010 return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00005011}
5012
5013static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005014_mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00005015{
Craig Topper0c5da262016-10-23 07:35:47 +00005016 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5017 (__v8di)_mm512_cvtepi32_epi64(__X),
5018 (__v8di)__W);
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00005019}
5020
5021static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005022_mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00005023{
Craig Topper0c5da262016-10-23 07:35:47 +00005024 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5025 (__v8di)_mm512_cvtepi32_epi64(__X),
5026 (__v8di)_mm512_setzero_si512());
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00005027}
5028
5029static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005030_mm512_cvtepi16_epi32(__m256i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00005031{
Craig Topper0c5da262016-10-23 07:35:47 +00005032 return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00005033}
5034
5035static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005036_mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00005037{
Craig Topper0c5da262016-10-23 07:35:47 +00005038 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5039 (__v16si)_mm512_cvtepi16_epi32(__A),
5040 (__v16si)__W);
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00005041}
5042
5043static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005044_mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00005045{
Craig Topper0c5da262016-10-23 07:35:47 +00005046 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5047 (__v16si)_mm512_cvtepi16_epi32(__A),
5048 (__v16si)_mm512_setzero_si512 ());
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00005049}
5050
5051static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005052_mm512_cvtepi16_epi64(__m128i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00005053{
Craig Topper0c5da262016-10-23 07:35:47 +00005054 return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00005055}
5056
5057static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005058_mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00005059{
Craig Topper0c5da262016-10-23 07:35:47 +00005060 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5061 (__v8di)_mm512_cvtepi16_epi64(__A),
5062 (__v8di)__W);
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00005063}
5064
5065static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005066_mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00005067{
Craig Topper0c5da262016-10-23 07:35:47 +00005068 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5069 (__v8di)_mm512_cvtepi16_epi64(__A),
5070 (__v8di)_mm512_setzero_si512());
Michael Zuckerman7cdb72f2016-02-18 09:09:34 +00005071}
5072
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005073static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005074_mm512_cvtepu8_epi32(__m128i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005075{
Craig Topper0c5da262016-10-23 07:35:47 +00005076 return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005077}
5078
5079static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005080_mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005081{
Craig Topper0c5da262016-10-23 07:35:47 +00005082 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5083 (__v16si)_mm512_cvtepu8_epi32(__A),
5084 (__v16si)__W);
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005085}
5086
5087static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005088_mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005089{
Craig Topper0c5da262016-10-23 07:35:47 +00005090 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5091 (__v16si)_mm512_cvtepu8_epi32(__A),
5092 (__v16si)_mm512_setzero_si512());
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005093}
5094
5095static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005096_mm512_cvtepu8_epi64(__m128i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005097{
Craig Topper0c5da262016-10-23 07:35:47 +00005098 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005099}
5100
5101static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005102_mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005103{
Craig Topper0c5da262016-10-23 07:35:47 +00005104 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5105 (__v8di)_mm512_cvtepu8_epi64(__A),
5106 (__v8di)__W);
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005107}
5108
5109static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005110_mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005111{
Craig Topper0c5da262016-10-23 07:35:47 +00005112 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5113 (__v8di)_mm512_cvtepu8_epi64(__A),
5114 (__v8di)_mm512_setzero_si512());
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005115}
5116
5117static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005118_mm512_cvtepu32_epi64(__m256i __X)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005119{
Craig Topper0c5da262016-10-23 07:35:47 +00005120 return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005121}
5122
5123static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005124_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005125{
Craig Topper0c5da262016-10-23 07:35:47 +00005126 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5127 (__v8di)_mm512_cvtepu32_epi64(__X),
5128 (__v8di)__W);
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005129}
5130
5131static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005132_mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005133{
Craig Topper0c5da262016-10-23 07:35:47 +00005134 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5135 (__v8di)_mm512_cvtepu32_epi64(__X),
5136 (__v8di)_mm512_setzero_si512());
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005137}
5138
5139static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005140_mm512_cvtepu16_epi32(__m256i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005141{
Craig Topper0c5da262016-10-23 07:35:47 +00005142 return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005143}
5144
5145static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005146_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005147{
Craig Topper0c5da262016-10-23 07:35:47 +00005148 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5149 (__v16si)_mm512_cvtepu16_epi32(__A),
5150 (__v16si)__W);
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005151}
5152
5153static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005154_mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005155{
Craig Topper0c5da262016-10-23 07:35:47 +00005156 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5157 (__v16si)_mm512_cvtepu16_epi32(__A),
5158 (__v16si)_mm512_setzero_si512());
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005159}
5160
5161static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005162_mm512_cvtepu16_epi64(__m128i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005163{
Craig Topper0c5da262016-10-23 07:35:47 +00005164 return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005165}
5166
5167static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005168_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005169{
Craig Topper0c5da262016-10-23 07:35:47 +00005170 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5171 (__v8di)_mm512_cvtepu16_epi64(__A),
5172 (__v8di)__W);
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005173}
5174
5175static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper0c5da262016-10-23 07:35:47 +00005176_mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005177{
Craig Topper0c5da262016-10-23 07:35:47 +00005178 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5179 (__v8di)_mm512_cvtepu16_epi64(__A),
5180 (__v8di)_mm512_setzero_si512());
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005181}
5182
Michael Zuckermane98cc742016-02-23 15:59:47 +00005183static __inline__ __m512i __DEFAULT_FN_ATTRS
5184_mm512_rorv_epi32 (__m512i __A, __m512i __B)
5185{
5186 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5187 (__v16si) __B,
5188 (__v16si)
5189 _mm512_setzero_si512 (),
5190 (__mmask16) -1);
5191}
5192
5193static __inline__ __m512i __DEFAULT_FN_ATTRS
5194_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5195{
5196 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5197 (__v16si) __B,
5198 (__v16si) __W,
5199 (__mmask16) __U);
5200}
5201
5202static __inline__ __m512i __DEFAULT_FN_ATTRS
5203_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5204{
5205 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5206 (__v16si) __B,
5207 (__v16si)
5208 _mm512_setzero_si512 (),
5209 (__mmask16) __U);
5210}
5211
5212static __inline__ __m512i __DEFAULT_FN_ATTRS
5213_mm512_rorv_epi64 (__m512i __A, __m512i __B)
5214{
5215 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5216 (__v8di) __B,
5217 (__v8di)
5218 _mm512_setzero_si512 (),
5219 (__mmask8) -1);
5220}
5221
5222static __inline__ __m512i __DEFAULT_FN_ATTRS
5223_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5224{
5225 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5226 (__v8di) __B,
5227 (__v8di) __W,
5228 (__mmask8) __U);
5229}
5230
5231static __inline__ __m512i __DEFAULT_FN_ATTRS
5232_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5233{
5234 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5235 (__v8di) __B,
5236 (__v8di)
5237 _mm512_setzero_si512 (),
5238 (__mmask8) __U);
5239}
5240
5241
Michael Zuckerman7a33dce2016-02-21 14:00:11 +00005242
Craig Topperc6338672018-05-31 00:51:20 +00005243#define _mm512_cmp_epi32_mask(a, b, p) \
Craig Topper3a71f352015-11-29 06:50:33 +00005244 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
Craig Topper8c18e112016-05-17 04:41:50 +00005245 (__v16si)(__m512i)(b), (int)(p), \
Craig Topperc6338672018-05-31 00:51:20 +00005246 (__mmask16)-1)
Craig Topper4cac1c22015-01-25 23:30:07 +00005247
Craig Topperc6338672018-05-31 00:51:20 +00005248#define _mm512_cmp_epu32_mask(a, b, p) \
Craig Topper3a71f352015-11-29 06:50:33 +00005249 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
Craig Topper8c18e112016-05-17 04:41:50 +00005250 (__v16si)(__m512i)(b), (int)(p), \
Craig Topperc6338672018-05-31 00:51:20 +00005251 (__mmask16)-1)
Craig Topper4cac1c22015-01-25 23:30:07 +00005252
Craig Topperc6338672018-05-31 00:51:20 +00005253#define _mm512_cmp_epi64_mask(a, b, p) \
Craig Topper3a71f352015-11-29 06:50:33 +00005254 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
Craig Topper8c18e112016-05-17 04:41:50 +00005255 (__v8di)(__m512i)(b), (int)(p), \
Craig Topperc6338672018-05-31 00:51:20 +00005256 (__mmask8)-1)
Craig Topper4cac1c22015-01-25 23:30:07 +00005257
Craig Topperc6338672018-05-31 00:51:20 +00005258#define _mm512_cmp_epu64_mask(a, b, p) \
Craig Topper3a71f352015-11-29 06:50:33 +00005259 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
Craig Topper8c18e112016-05-17 04:41:50 +00005260 (__v8di)(__m512i)(b), (int)(p), \
Craig Topperc6338672018-05-31 00:51:20 +00005261 (__mmask8)-1)
Craig Topper4cac1c22015-01-25 23:30:07 +00005262
Craig Topperc6338672018-05-31 00:51:20 +00005263#define _mm512_mask_cmp_epi32_mask(m, a, b, p) \
Craig Topper3a71f352015-11-29 06:50:33 +00005264 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
Craig Topper8c18e112016-05-17 04:41:50 +00005265 (__v16si)(__m512i)(b), (int)(p), \
Craig Topperc6338672018-05-31 00:51:20 +00005266 (__mmask16)(m))
Craig Topper4cac1c22015-01-25 23:30:07 +00005267
Craig Topperc6338672018-05-31 00:51:20 +00005268#define _mm512_mask_cmp_epu32_mask(m, a, b, p) \
Craig Topper3a71f352015-11-29 06:50:33 +00005269 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
Craig Topper8c18e112016-05-17 04:41:50 +00005270 (__v16si)(__m512i)(b), (int)(p), \
Craig Topperc6338672018-05-31 00:51:20 +00005271 (__mmask16)(m))
Craig Topper4cac1c22015-01-25 23:30:07 +00005272
Craig Topperc6338672018-05-31 00:51:20 +00005273#define _mm512_mask_cmp_epi64_mask(m, a, b, p) \
Craig Topper3a71f352015-11-29 06:50:33 +00005274 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
Craig Topper8c18e112016-05-17 04:41:50 +00005275 (__v8di)(__m512i)(b), (int)(p), \
Craig Topperc6338672018-05-31 00:51:20 +00005276 (__mmask8)(m))
Craig Topper4cac1c22015-01-25 23:30:07 +00005277
Craig Topperc6338672018-05-31 00:51:20 +00005278#define _mm512_mask_cmp_epu64_mask(m, a, b, p) \
Craig Topper3a71f352015-11-29 06:50:33 +00005279 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
Craig Topper8c18e112016-05-17 04:41:50 +00005280 (__v8di)(__m512i)(b), (int)(p), \
Craig Topperc6338672018-05-31 00:51:20 +00005281 (__mmask8)(m))
Eric Christopher4d1851682015-06-17 07:09:20 +00005282
Craig Topperc6338672018-05-31 00:51:20 +00005283#define _mm512_rol_epi32(a, b) \
Craig Topper8c18e112016-05-17 04:41:50 +00005284 (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5285 (__v16si)_mm512_setzero_si512(), \
Craig Topperc6338672018-05-31 00:51:20 +00005286 (__mmask16)-1)
Michael Zuckerman38a27272016-02-22 09:05:41 +00005287
Craig Topperc6338672018-05-31 00:51:20 +00005288#define _mm512_mask_rol_epi32(W, U, a, b) \
Craig Topper8c18e112016-05-17 04:41:50 +00005289 (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5290 (__v16si)(__m512i)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00005291 (__mmask16)(U))
Michael Zuckerman38a27272016-02-22 09:05:41 +00005292
Craig Topperc6338672018-05-31 00:51:20 +00005293#define _mm512_maskz_rol_epi32(U, a, b) \
Craig Topper8c18e112016-05-17 04:41:50 +00005294 (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5295 (__v16si)_mm512_setzero_si512(), \
Craig Topperc6338672018-05-31 00:51:20 +00005296 (__mmask16)(U))
Michael Zuckerman38a27272016-02-22 09:05:41 +00005297
Craig Topperc6338672018-05-31 00:51:20 +00005298#define _mm512_rol_epi64(a, b) \
Craig Topper8c18e112016-05-17 04:41:50 +00005299 (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5300 (__v8di)_mm512_setzero_si512(), \
Craig Topperc6338672018-05-31 00:51:20 +00005301 (__mmask8)-1)
Michael Zuckerman38a27272016-02-22 09:05:41 +00005302
Craig Topperc6338672018-05-31 00:51:20 +00005303#define _mm512_mask_rol_epi64(W, U, a, b) \
Craig Topper8c18e112016-05-17 04:41:50 +00005304 (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
Craig Topperc6338672018-05-31 00:51:20 +00005305 (__v8di)(__m512i)(W), (__mmask8)(U))
Michael Zuckerman38a27272016-02-22 09:05:41 +00005306
Craig Topperc6338672018-05-31 00:51:20 +00005307#define _mm512_maskz_rol_epi64(U, a, b) \
Craig Topper8c18e112016-05-17 04:41:50 +00005308 (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5309 (__v8di)_mm512_setzero_si512(), \
Craig Topperc6338672018-05-31 00:51:20 +00005310 (__mmask8)(U))
Michael Zuckerman0231f162016-02-23 13:41:13 +00005311static __inline__ __m512i __DEFAULT_FN_ATTRS
5312_mm512_rolv_epi32 (__m512i __A, __m512i __B)
5313{
5314 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5315 (__v16si) __B,
5316 (__v16si)
5317 _mm512_setzero_si512 (),
5318 (__mmask16) -1);
5319}
5320
5321static __inline__ __m512i __DEFAULT_FN_ATTRS
5322_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5323{
5324 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5325 (__v16si) __B,
5326 (__v16si) __W,
5327 (__mmask16) __U);
5328}
5329
5330static __inline__ __m512i __DEFAULT_FN_ATTRS
5331_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5332{
5333 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5334 (__v16si) __B,
5335 (__v16si)
5336 _mm512_setzero_si512 (),
5337 (__mmask16) __U);
5338}
5339
5340static __inline__ __m512i __DEFAULT_FN_ATTRS
5341_mm512_rolv_epi64 (__m512i __A, __m512i __B)
5342{
5343 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5344 (__v8di) __B,
5345 (__v8di)
5346 _mm512_setzero_si512 (),
5347 (__mmask8) -1);
5348}
5349
5350static __inline__ __m512i __DEFAULT_FN_ATTRS
5351_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5352{
5353 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5354 (__v8di) __B,
5355 (__v8di) __W,
5356 (__mmask8) __U);
5357}
5358
5359static __inline__ __m512i __DEFAULT_FN_ATTRS
5360_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5361{
5362 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5363 (__v8di) __B,
5364 (__v8di)
5365 _mm512_setzero_si512 (),
5366 (__mmask8) __U);
5367}
5368
Craig Topperc6338672018-05-31 00:51:20 +00005369#define _mm512_ror_epi32(A, B) \
Craig Topper8c18e112016-05-17 04:41:50 +00005370 (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5371 (__v16si)_mm512_setzero_si512(), \
Craig Topperc6338672018-05-31 00:51:20 +00005372 (__mmask16)-1)
Michael Zuckerman0231f162016-02-23 13:41:13 +00005373
Craig Topperc6338672018-05-31 00:51:20 +00005374#define _mm512_mask_ror_epi32(W, U, A, B) \
Craig Topper8c18e112016-05-17 04:41:50 +00005375 (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5376 (__v16si)(__m512i)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00005377 (__mmask16)(U))
Michael Zuckerman0231f162016-02-23 13:41:13 +00005378
Craig Topperc6338672018-05-31 00:51:20 +00005379#define _mm512_maskz_ror_epi32(U, A, B) \
Craig Topper8c18e112016-05-17 04:41:50 +00005380 (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5381 (__v16si)_mm512_setzero_si512(), \
Craig Topperc6338672018-05-31 00:51:20 +00005382 (__mmask16)(U))
Michael Zuckerman0231f162016-02-23 13:41:13 +00005383
Craig Topperc6338672018-05-31 00:51:20 +00005384#define _mm512_ror_epi64(A, B) \
Craig Topper8c18e112016-05-17 04:41:50 +00005385 (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5386 (__v8di)_mm512_setzero_si512(), \
Craig Topperc6338672018-05-31 00:51:20 +00005387 (__mmask8)-1)
Michael Zuckerman0231f162016-02-23 13:41:13 +00005388
Craig Topperc6338672018-05-31 00:51:20 +00005389#define _mm512_mask_ror_epi64(W, U, A, B) \
Craig Topper8c18e112016-05-17 04:41:50 +00005390 (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
Craig Topperc6338672018-05-31 00:51:20 +00005391 (__v8di)(__m512i)(W), (__mmask8)(U))
Michael Zuckerman0231f162016-02-23 13:41:13 +00005392
Craig Topperc6338672018-05-31 00:51:20 +00005393#define _mm512_maskz_ror_epi64(U, A, B) \
Craig Topper8c18e112016-05-17 04:41:50 +00005394 (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5395 (__v8di)_mm512_setzero_si512(), \
Craig Topperc6338672018-05-31 00:51:20 +00005396 (__mmask8)(U))
Michael Zuckerman38a27272016-02-22 09:05:41 +00005397
Craig Topper1a441932016-11-12 07:16:59 +00005398static __inline__ __m512i __DEFAULT_FN_ATTRS
5399_mm512_slli_epi32(__m512i __A, int __B)
5400{
5401 return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B);
5402}
Michael Zuckerman1ac360c2016-03-01 11:38:16 +00005403
Craig Topper1a441932016-11-12 07:16:59 +00005404static __inline__ __m512i __DEFAULT_FN_ATTRS
5405_mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
5406{
Craig Topperd7e5b212016-11-13 07:26:31 +00005407 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5408 (__v16si)_mm512_slli_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005409 (__v16si)__W);
5410}
Michael Zuckerman1ac360c2016-03-01 11:38:16 +00005411
Craig Topper1a441932016-11-12 07:16:59 +00005412static __inline__ __m512i __DEFAULT_FN_ATTRS
5413_mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B) {
Craig Topperd7e5b212016-11-13 07:26:31 +00005414 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5415 (__v16si)_mm512_slli_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005416 (__v16si)_mm512_setzero_si512());
5417}
Michael Zuckerman1ac360c2016-03-01 11:38:16 +00005418
Craig Topper1a441932016-11-12 07:16:59 +00005419static __inline__ __m512i __DEFAULT_FN_ATTRS
5420_mm512_slli_epi64(__m512i __A, int __B)
5421{
5422 return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B);
5423}
Michael Zuckerman1ac360c2016-03-01 11:38:16 +00005424
Craig Topper1a441932016-11-12 07:16:59 +00005425static __inline__ __m512i __DEFAULT_FN_ATTRS
5426_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
5427{
Craig Topperd7e5b212016-11-13 07:26:31 +00005428 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5429 (__v8di)_mm512_slli_epi64(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005430 (__v8di)__W);
5431}
Michael Zuckerman1ac360c2016-03-01 11:38:16 +00005432
Craig Topper1a441932016-11-12 07:16:59 +00005433static __inline__ __m512i __DEFAULT_FN_ATTRS
5434_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B)
5435{
Craig Topperd7e5b212016-11-13 07:26:31 +00005436 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5437 (__v8di)_mm512_slli_epi64(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005438 (__v8di)_mm512_setzero_si512());
5439}
Michael Zuckerman1ac360c2016-03-01 11:38:16 +00005440
Craig Topper1a441932016-11-12 07:16:59 +00005441static __inline__ __m512i __DEFAULT_FN_ATTRS
5442_mm512_srli_epi32(__m512i __A, int __B)
5443{
5444 return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B);
5445}
Michael Zuckerman38a27272016-02-22 09:05:41 +00005446
Craig Topper1a441932016-11-12 07:16:59 +00005447static __inline__ __m512i __DEFAULT_FN_ATTRS
5448_mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
5449{
Craig Topperd7e5b212016-11-13 07:26:31 +00005450 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5451 (__v16si)_mm512_srli_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005452 (__v16si)__W);
5453}
Michael Zuckermand176d742016-03-01 17:49:03 +00005454
Craig Topper1a441932016-11-12 07:16:59 +00005455static __inline__ __m512i __DEFAULT_FN_ATTRS
5456_mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B) {
Craig Topperd7e5b212016-11-13 07:26:31 +00005457 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5458 (__v16si)_mm512_srli_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005459 (__v16si)_mm512_setzero_si512());
5460}
Michael Zuckermand176d742016-03-01 17:49:03 +00005461
Craig Topper1a441932016-11-12 07:16:59 +00005462static __inline__ __m512i __DEFAULT_FN_ATTRS
5463_mm512_srli_epi64(__m512i __A, int __B)
5464{
5465 return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B);
5466}
Michael Zuckermand176d742016-03-01 17:49:03 +00005467
Craig Topper1a441932016-11-12 07:16:59 +00005468static __inline__ __m512i __DEFAULT_FN_ATTRS
5469_mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
5470{
Craig Topperd7e5b212016-11-13 07:26:31 +00005471 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5472 (__v8di)_mm512_srli_epi64(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005473 (__v8di)__W);
5474}
Michael Zuckermand176d742016-03-01 17:49:03 +00005475
Craig Topper1a441932016-11-12 07:16:59 +00005476static __inline__ __m512i __DEFAULT_FN_ATTRS
5477_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B)
5478{
Craig Topperd7e5b212016-11-13 07:26:31 +00005479 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5480 (__v8di)_mm512_srli_epi64(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005481 (__v8di)_mm512_setzero_si512());
5482}
Michael Zuckermand176d742016-03-01 17:49:03 +00005483
Michael Zuckermanffbb67a2016-03-03 09:26:01 +00005484static __inline__ __m512i __DEFAULT_FN_ATTRS
5485_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5486{
5487 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5488 (__v16si) __W,
5489 (__mmask16) __U);
5490}
5491
5492static __inline__ __m512i __DEFAULT_FN_ATTRS
5493_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
5494{
5495 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5496 (__v16si)
5497 _mm512_setzero_si512 (),
5498 (__mmask16) __U);
5499}
5500
5501static __inline__ void __DEFAULT_FN_ATTRS
5502_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
5503{
5504 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
5505 (__mmask16) __U);
5506}
5507
5508static __inline__ __m512i __DEFAULT_FN_ATTRS
Michael Zuckermane6542002016-05-23 08:01:48 +00005509_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
5510{
Igor Bregeraadb8762016-06-08 13:59:20 +00005511 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5512 (__v16si) __A,
5513 (__v16si) __W);
Michael Zuckermane6542002016-05-23 08:01:48 +00005514}
5515
5516static __inline__ __m512i __DEFAULT_FN_ATTRS
5517_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
5518{
Igor Bregeraadb8762016-06-08 13:59:20 +00005519 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5520 (__v16si) __A,
5521 (__v16si) _mm512_setzero_si512 ());
Michael Zuckermane6542002016-05-23 08:01:48 +00005522}
5523
5524static __inline__ __m512i __DEFAULT_FN_ATTRS
Michael Zuckermanffbb67a2016-03-03 09:26:01 +00005525_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
5526{
Igor Bregeraadb8762016-06-08 13:59:20 +00005527 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5528 (__v8di) __A,
5529 (__v8di) __W);
Michael Zuckermanffbb67a2016-03-03 09:26:01 +00005530}
5531
5532static __inline__ __m512i __DEFAULT_FN_ATTRS
5533_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
5534{
Igor Bregeraadb8762016-06-08 13:59:20 +00005535 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5536 (__v8di) __A,
5537 (__v8di) _mm512_setzero_si512 ());
Michael Zuckermanffbb67a2016-03-03 09:26:01 +00005538}
5539
5540static __inline__ __m512i __DEFAULT_FN_ATTRS
5541_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5542{
5543 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5544 (__v8di) __W,
5545 (__mmask8) __U);
5546}
5547
5548static __inline__ __m512i __DEFAULT_FN_ATTRS
5549_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
5550{
5551 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5552 (__v8di)
5553 _mm512_setzero_si512 (),
5554 (__mmask8) __U);
5555}
5556
5557static __inline__ void __DEFAULT_FN_ATTRS
5558_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
5559{
5560 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
5561 (__mmask8) __U);
5562}
5563
Michael Zuckerman0d67e4b2016-03-03 13:43:05 +00005564static __inline__ __m512d __DEFAULT_FN_ATTRS
5565_mm512_movedup_pd (__m512d __A)
5566{
Simon Pilgrim275d7212016-07-02 17:16:25 +00005567 return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5568 0, 0, 2, 2, 4, 4, 6, 6);
Michael Zuckerman0d67e4b2016-03-03 13:43:05 +00005569}
5570
5571static __inline__ __m512d __DEFAULT_FN_ATTRS
5572_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
5573{
Simon Pilgrim275d7212016-07-02 17:16:25 +00005574 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5575 (__v8df)_mm512_movedup_pd(__A),
5576 (__v8df)__W);
Michael Zuckerman0d67e4b2016-03-03 13:43:05 +00005577}
5578
5579static __inline__ __m512d __DEFAULT_FN_ATTRS
5580_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
5581{
Simon Pilgrim275d7212016-07-02 17:16:25 +00005582 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5583 (__v8df)_mm512_movedup_pd(__A),
5584 (__v8df)_mm512_setzero_pd());
Michael Zuckerman0d67e4b2016-03-03 13:43:05 +00005585}
5586
Craig Topperc6338672018-05-31 00:51:20 +00005587#define _mm512_fixupimm_round_pd(A, B, C, imm, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005588 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5589 (__v8df)(__m512d)(B), \
5590 (__v8di)(__m512i)(C), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00005591 (__mmask8)-1, (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005592
Craig Topperc6338672018-05-31 00:51:20 +00005593#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005594 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5595 (__v8df)(__m512d)(B), \
5596 (__v8di)(__m512i)(C), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00005597 (__mmask8)(U), (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005598
Craig Topperc6338672018-05-31 00:51:20 +00005599#define _mm512_fixupimm_pd(A, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00005600 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5601 (__v8df)(__m512d)(B), \
5602 (__v8di)(__m512i)(C), (int)(imm), \
5603 (__mmask8)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00005604 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermandef78752016-03-28 12:23:09 +00005605
Craig Topperc6338672018-05-31 00:51:20 +00005606#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00005607 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5608 (__v8df)(__m512d)(B), \
5609 (__v8di)(__m512i)(C), (int)(imm), \
5610 (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00005611 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermandef78752016-03-28 12:23:09 +00005612
Craig Topperc6338672018-05-31 00:51:20 +00005613#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005614 (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5615 (__v8df)(__m512d)(B), \
5616 (__v8di)(__m512i)(C), \
5617 (int)(imm), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00005618 (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005619
Craig Topperc6338672018-05-31 00:51:20 +00005620#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00005621 (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5622 (__v8df)(__m512d)(B), \
5623 (__v8di)(__m512i)(C), \
5624 (int)(imm), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00005625 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermandef78752016-03-28 12:23:09 +00005626
Craig Topperc6338672018-05-31 00:51:20 +00005627#define _mm512_fixupimm_round_ps(A, B, C, imm, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005628 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5629 (__v16sf)(__m512)(B), \
5630 (__v16si)(__m512i)(C), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00005631 (__mmask16)-1, (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005632
Craig Topperc6338672018-05-31 00:51:20 +00005633#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005634 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5635 (__v16sf)(__m512)(B), \
5636 (__v16si)(__m512i)(C), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00005637 (__mmask16)(U), (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005638
Craig Topperc6338672018-05-31 00:51:20 +00005639#define _mm512_fixupimm_ps(A, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00005640 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5641 (__v16sf)(__m512)(B), \
5642 (__v16si)(__m512i)(C), (int)(imm), \
5643 (__mmask16)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00005644 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermandef78752016-03-28 12:23:09 +00005645
Craig Topperc6338672018-05-31 00:51:20 +00005646#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00005647 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5648 (__v16sf)(__m512)(B), \
5649 (__v16si)(__m512i)(C), (int)(imm), \
5650 (__mmask16)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00005651 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermandef78752016-03-28 12:23:09 +00005652
Craig Topperc6338672018-05-31 00:51:20 +00005653#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005654 (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5655 (__v16sf)(__m512)(B), \
5656 (__v16si)(__m512i)(C), \
5657 (int)(imm), (__mmask16)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00005658 (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005659
Craig Topperc6338672018-05-31 00:51:20 +00005660#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00005661 (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5662 (__v16sf)(__m512)(B), \
5663 (__v16si)(__m512i)(C), \
5664 (int)(imm), (__mmask16)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00005665 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermandef78752016-03-28 12:23:09 +00005666
Craig Topperc6338672018-05-31 00:51:20 +00005667#define _mm_fixupimm_round_sd(A, B, C, imm, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005668 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5669 (__v2df)(__m128d)(B), \
5670 (__v2di)(__m128i)(C), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00005671 (__mmask8)-1, (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005672
Craig Topperc6338672018-05-31 00:51:20 +00005673#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005674 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5675 (__v2df)(__m128d)(B), \
5676 (__v2di)(__m128i)(C), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00005677 (__mmask8)(U), (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005678
Craig Topperc6338672018-05-31 00:51:20 +00005679#define _mm_fixupimm_sd(A, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00005680 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5681 (__v2df)(__m128d)(B), \
5682 (__v2di)(__m128i)(C), (int)(imm), \
5683 (__mmask8)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00005684 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermandef78752016-03-28 12:23:09 +00005685
Craig Topperc6338672018-05-31 00:51:20 +00005686#define _mm_mask_fixupimm_sd(A, U, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00005687 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5688 (__v2df)(__m128d)(B), \
5689 (__v2di)(__m128i)(C), (int)(imm), \
5690 (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00005691 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermandef78752016-03-28 12:23:09 +00005692
Craig Topperc6338672018-05-31 00:51:20 +00005693#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005694 (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5695 (__v2df)(__m128d)(B), \
5696 (__v2di)(__m128i)(C), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00005697 (__mmask8)(U), (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005698
Craig Topperc6338672018-05-31 00:51:20 +00005699#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00005700 (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5701 (__v2df)(__m128d)(B), \
5702 (__v2di)(__m128i)(C), (int)(imm), \
5703 (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00005704 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermandef78752016-03-28 12:23:09 +00005705
Craig Topperc6338672018-05-31 00:51:20 +00005706#define _mm_fixupimm_round_ss(A, B, C, imm, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005707 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5708 (__v4sf)(__m128)(B), \
5709 (__v4si)(__m128i)(C), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00005710 (__mmask8)-1, (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005711
Craig Topperc6338672018-05-31 00:51:20 +00005712#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005713 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5714 (__v4sf)(__m128)(B), \
5715 (__v4si)(__m128i)(C), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00005716 (__mmask8)(U), (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005717
Craig Topperc6338672018-05-31 00:51:20 +00005718#define _mm_fixupimm_ss(A, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00005719 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5720 (__v4sf)(__m128)(B), \
5721 (__v4si)(__m128i)(C), (int)(imm), \
5722 (__mmask8)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00005723 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermandef78752016-03-28 12:23:09 +00005724
Craig Topperc6338672018-05-31 00:51:20 +00005725#define _mm_mask_fixupimm_ss(A, U, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00005726 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5727 (__v4sf)(__m128)(B), \
5728 (__v4si)(__m128i)(C), (int)(imm), \
5729 (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00005730 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermandef78752016-03-28 12:23:09 +00005731
Craig Topperc6338672018-05-31 00:51:20 +00005732#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005733 (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5734 (__v4sf)(__m128)(B), \
5735 (__v4si)(__m128i)(C), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00005736 (__mmask8)(U), (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005737
Craig Topperc6338672018-05-31 00:51:20 +00005738#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00005739 (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5740 (__v4sf)(__m128)(B), \
5741 (__v4si)(__m128i)(C), (int)(imm), \
5742 (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00005743 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermandef78752016-03-28 12:23:09 +00005744
Craig Topperc6338672018-05-31 00:51:20 +00005745#define _mm_getexp_round_sd(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005746 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5747 (__v2df)(__m128d)(B), \
5748 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00005749 (__mmask8)-1, (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005750
5751
5752static __inline__ __m128d __DEFAULT_FN_ATTRS
5753_mm_getexp_sd (__m128d __A, __m128d __B)
5754{
5755 return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5756 (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5757}
5758
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005759static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00005760_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005761{
5762 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5763 (__v2df) __B,
5764 (__v2df) __W,
5765 (__mmask8) __U,
5766 _MM_FROUND_CUR_DIRECTION);
5767}
5768
Craig Topperc6338672018-05-31 00:51:20 +00005769#define _mm_mask_getexp_round_sd(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005770 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5771 (__v2df)(__m128d)(B), \
5772 (__v2df)(__m128d)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00005773 (__mmask8)(U), (int)(R))
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005774
5775static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00005776_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005777{
5778 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5779 (__v2df) __B,
5780 (__v2df) _mm_setzero_pd (),
5781 (__mmask8) __U,
5782 _MM_FROUND_CUR_DIRECTION);
5783}
5784
Craig Topperc6338672018-05-31 00:51:20 +00005785#define _mm_maskz_getexp_round_sd(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005786 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5787 (__v2df)(__m128d)(B), \
5788 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00005789 (__mmask8)(U), (int)(R))
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005790
Craig Topperc6338672018-05-31 00:51:20 +00005791#define _mm_getexp_round_ss(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005792 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5793 (__v4sf)(__m128)(B), \
5794 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00005795 (__mmask8)-1, (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005796
5797static __inline__ __m128 __DEFAULT_FN_ATTRS
5798_mm_getexp_ss (__m128 __A, __m128 __B)
5799{
5800 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5801 (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5802}
5803
Craig Topper58187d32016-05-17 04:41:29 +00005804static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00005805_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005806{
Craig Topper58187d32016-05-17 04:41:29 +00005807 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005808 (__v4sf) __B,
5809 (__v4sf) __W,
5810 (__mmask8) __U,
5811 _MM_FROUND_CUR_DIRECTION);
5812}
5813
Craig Topperc6338672018-05-31 00:51:20 +00005814#define _mm_mask_getexp_round_ss(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005815 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5816 (__v4sf)(__m128)(B), \
5817 (__v4sf)(__m128)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00005818 (__mmask8)(U), (int)(R))
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005819
Craig Topper58187d32016-05-17 04:41:29 +00005820static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00005821_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005822{
Craig Topper58187d32016-05-17 04:41:29 +00005823 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005824 (__v4sf) __B,
5825 (__v4sf) _mm_setzero_pd (),
5826 (__mmask8) __U,
5827 _MM_FROUND_CUR_DIRECTION);
5828}
5829
Craig Topperc6338672018-05-31 00:51:20 +00005830#define _mm_maskz_getexp_round_ss(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005831 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5832 (__v4sf)(__m128)(B), \
5833 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00005834 (__mmask8)(U), (int)(R))
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005835
Craig Topperc6338672018-05-31 00:51:20 +00005836#define _mm_getmant_round_sd(A, B, C, D, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005837 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5838 (__v2df)(__m128d)(B), \
5839 (int)(((D)<<2) | (C)), \
5840 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00005841 (__mmask8)-1, (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005842
Craig Topperc6338672018-05-31 00:51:20 +00005843#define _mm_getmant_sd(A, B, C, D) \
Craig Topper8c18e112016-05-17 04:41:50 +00005844 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5845 (__v2df)(__m128d)(B), \
5846 (int)(((D)<<2) | (C)), \
5847 (__v2df)_mm_setzero_pd(), \
5848 (__mmask8)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00005849 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermandef78752016-03-28 12:23:09 +00005850
Craig Topperc6338672018-05-31 00:51:20 +00005851#define _mm_mask_getmant_sd(W, U, A, B, C, D) \
Craig Topper8c18e112016-05-17 04:41:50 +00005852 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5853 (__v2df)(__m128d)(B), \
5854 (int)(((D)<<2) | (C)), \
5855 (__v2df)(__m128d)(W), \
5856 (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00005857 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005858
Craig Topperc6338672018-05-31 00:51:20 +00005859#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005860 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5861 (__v2df)(__m128d)(B), \
5862 (int)(((D)<<2) | (C)), \
5863 (__v2df)(__m128d)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00005864 (__mmask8)(U), (int)(R))
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005865
Craig Topperc6338672018-05-31 00:51:20 +00005866#define _mm_maskz_getmant_sd(U, A, B, C, D) \
Craig Topper8c18e112016-05-17 04:41:50 +00005867 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5868 (__v2df)(__m128d)(B), \
5869 (int)(((D)<<2) | (C)), \
5870 (__v2df)_mm_setzero_pd(), \
5871 (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00005872 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005873
Craig Topperc6338672018-05-31 00:51:20 +00005874#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005875 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5876 (__v2df)(__m128d)(B), \
5877 (int)(((D)<<2) | (C)), \
5878 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00005879 (__mmask8)(U), (int)(R))
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005880
Craig Topperc6338672018-05-31 00:51:20 +00005881#define _mm_getmant_round_ss(A, B, C, D, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005882 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5883 (__v4sf)(__m128)(B), \
5884 (int)(((D)<<2) | (C)), \
5885 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00005886 (__mmask8)-1, (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005887
Craig Topperc6338672018-05-31 00:51:20 +00005888#define _mm_getmant_ss(A, B, C, D) \
Craig Topper8c18e112016-05-17 04:41:50 +00005889 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5890 (__v4sf)(__m128)(B), \
5891 (int)(((D)<<2) | (C)), \
5892 (__v4sf)_mm_setzero_ps(), \
5893 (__mmask8)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00005894 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermandef78752016-03-28 12:23:09 +00005895
Craig Topperc6338672018-05-31 00:51:20 +00005896#define _mm_mask_getmant_ss(W, U, A, B, C, D) \
Craig Topper8c18e112016-05-17 04:41:50 +00005897 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5898 (__v4sf)(__m128)(B), \
5899 (int)(((D)<<2) | (C)), \
5900 (__v4sf)(__m128)(W), \
5901 (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00005902 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005903
Craig Topperc6338672018-05-31 00:51:20 +00005904#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005905 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5906 (__v4sf)(__m128)(B), \
5907 (int)(((D)<<2) | (C)), \
5908 (__v4sf)(__m128)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00005909 (__mmask8)(U), (int)(R))
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005910
Craig Topperc6338672018-05-31 00:51:20 +00005911#define _mm_maskz_getmant_ss(U, A, B, C, D) \
Craig Topper8c18e112016-05-17 04:41:50 +00005912 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5913 (__v4sf)(__m128)(B), \
5914 (int)(((D)<<2) | (C)), \
5915 (__v4sf)_mm_setzero_pd(), \
5916 (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00005917 _MM_FROUND_CUR_DIRECTION)
Michael Zuckermana1ceca22016-04-22 10:06:10 +00005918
Craig Topperc6338672018-05-31 00:51:20 +00005919#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005920 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5921 (__v4sf)(__m128)(B), \
5922 (int)(((D)<<2) | (C)), \
5923 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00005924 (__mmask8)(U), (int)(R))
Michael Zuckermandef78752016-03-28 12:23:09 +00005925
5926static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5927_mm512_kmov (__mmask16 __A)
5928{
5929 return __A;
5930}
5931
Craig Topperc6338672018-05-31 00:51:20 +00005932#define _mm_comi_round_sd(A, B, P, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005933 (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
Craig Topperc6338672018-05-31 00:51:20 +00005934 (int)(P), (int)(R))
Michael Zuckermane71d59f2016-03-07 19:15:00 +00005935
Craig Topperc6338672018-05-31 00:51:20 +00005936#define _mm_comi_round_ss(A, B, P, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00005937 (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
Craig Topperc6338672018-05-31 00:51:20 +00005938 (int)(P), (int)(R))
Michael Zuckermane71d59f2016-03-07 19:15:00 +00005939
Craig Topper45db56c2016-07-21 07:38:39 +00005940#ifdef __x86_64__
Craig Topperc6338672018-05-31 00:51:20 +00005941#define _mm_cvt_roundsd_si64(A, R) \
5942 (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))
Craig Topper45db56c2016-07-21 07:38:39 +00005943#endif
Simon Pilgrim427154d2016-07-04 21:30:47 +00005944
Michael Zuckermand8d2f622016-04-11 07:15:34 +00005945static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005946_mm512_sll_epi32(__m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005947{
Craig Topper1a441932016-11-12 07:16:59 +00005948 return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005949}
5950
5951static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005952_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005953{
Craig Topperd7e5b212016-11-13 07:26:31 +00005954 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5955 (__v16si)_mm512_sll_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005956 (__v16si)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005957}
5958
5959static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005960_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005961{
Craig Topperd7e5b212016-11-13 07:26:31 +00005962 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5963 (__v16si)_mm512_sll_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005964 (__v16si)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005965}
5966
5967static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005968_mm512_sll_epi64(__m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005969{
Craig Topper1a441932016-11-12 07:16:59 +00005970 return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005971}
5972
5973static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005974_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005975{
Craig Topperd7e5b212016-11-13 07:26:31 +00005976 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5977 (__v8di)_mm512_sll_epi64(__A, __B),
5978 (__v8di)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005979}
5980
5981static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00005982_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005983{
Craig Topperd7e5b212016-11-13 07:26:31 +00005984 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5985 (__v8di)_mm512_sll_epi64(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00005986 (__v8di)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005987}
5988
5989static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00005990_mm512_sllv_epi32(__m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005991{
Craig Topper5e0709d2016-11-13 07:26:34 +00005992 return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005993}
5994
5995static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00005996_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00005997{
Craig Topper5e0709d2016-11-13 07:26:34 +00005998 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5999 (__v16si)_mm512_sllv_epi32(__X, __Y),
6000 (__v16si)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006001}
6002
6003static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00006004_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006005{
Craig Topper5e0709d2016-11-13 07:26:34 +00006006 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6007 (__v16si)_mm512_sllv_epi32(__X, __Y),
6008 (__v16si)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006009}
6010
6011static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00006012_mm512_sllv_epi64(__m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006013{
Craig Topper5e0709d2016-11-13 07:26:34 +00006014 return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006015}
6016
6017static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00006018_mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006019{
Craig Topper5e0709d2016-11-13 07:26:34 +00006020 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6021 (__v8di)_mm512_sllv_epi64(__X, __Y),
6022 (__v8di)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006023}
6024
6025static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00006026_mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006027{
Craig Topper5e0709d2016-11-13 07:26:34 +00006028 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6029 (__v8di)_mm512_sllv_epi64(__X, __Y),
6030 (__v8di)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006031}
6032
6033static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00006034_mm512_sra_epi32(__m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006035{
Craig Topper1a441932016-11-12 07:16:59 +00006036 return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006037}
6038
6039static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00006040_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006041{
Craig Topperd7e5b212016-11-13 07:26:31 +00006042 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6043 (__v16si)_mm512_sra_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00006044 (__v16si)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006045}
6046
6047static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00006048_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006049{
Craig Topperd7e5b212016-11-13 07:26:31 +00006050 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6051 (__v16si)_mm512_sra_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00006052 (__v16si)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006053}
6054
6055static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00006056_mm512_sra_epi64(__m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006057{
Craig Topper1a441932016-11-12 07:16:59 +00006058 return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006059}
6060
6061static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00006062_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006063{
Craig Topperd7e5b212016-11-13 07:26:31 +00006064 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6065 (__v8di)_mm512_sra_epi64(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00006066 (__v8di)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006067}
6068
6069static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00006070_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006071{
Craig Topperd7e5b212016-11-13 07:26:31 +00006072 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6073 (__v8di)_mm512_sra_epi64(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00006074 (__v8di)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006075}
6076
6077static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00006078_mm512_srav_epi32(__m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006079{
Craig Topper5e0709d2016-11-13 07:26:34 +00006080 return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006081}
6082
6083static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00006084_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006085{
Craig Topper5e0709d2016-11-13 07:26:34 +00006086 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6087 (__v16si)_mm512_srav_epi32(__X, __Y),
6088 (__v16si)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006089}
6090
6091static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00006092_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006093{
Craig Topper5e0709d2016-11-13 07:26:34 +00006094 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6095 (__v16si)_mm512_srav_epi32(__X, __Y),
6096 (__v16si)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006097}
6098
6099static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00006100_mm512_srav_epi64(__m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006101{
Craig Topper5e0709d2016-11-13 07:26:34 +00006102 return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006103}
6104
6105static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00006106_mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006107{
Craig Topper5e0709d2016-11-13 07:26:34 +00006108 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6109 (__v8di)_mm512_srav_epi64(__X, __Y),
6110 (__v8di)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006111}
6112
6113static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00006114_mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006115{
Craig Topper5e0709d2016-11-13 07:26:34 +00006116 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6117 (__v8di)_mm512_srav_epi64(__X, __Y),
6118 (__v8di)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006119}
6120
6121static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00006122_mm512_srl_epi32(__m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006123{
Craig Topper1a441932016-11-12 07:16:59 +00006124 return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006125}
6126
6127static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00006128_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006129{
Craig Topperd7e5b212016-11-13 07:26:31 +00006130 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6131 (__v16si)_mm512_srl_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00006132 (__v16si)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006133}
6134
6135static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00006136_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006137{
Craig Topperd7e5b212016-11-13 07:26:31 +00006138 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6139 (__v16si)_mm512_srl_epi32(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00006140 (__v16si)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006141}
6142
6143static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00006144_mm512_srl_epi64(__m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006145{
Craig Topper1a441932016-11-12 07:16:59 +00006146 return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006147}
6148
6149static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00006150_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006151{
Craig Topperd7e5b212016-11-13 07:26:31 +00006152 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6153 (__v8di)_mm512_srl_epi64(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00006154 (__v8di)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006155}
6156
6157static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper1a441932016-11-12 07:16:59 +00006158_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006159{
Craig Topperd7e5b212016-11-13 07:26:31 +00006160 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6161 (__v8di)_mm512_srl_epi64(__A, __B),
Craig Topper1a441932016-11-12 07:16:59 +00006162 (__v8di)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006163}
6164
6165static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00006166_mm512_srlv_epi32(__m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006167{
Craig Topper5e0709d2016-11-13 07:26:34 +00006168 return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006169}
6170
6171static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00006172_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006173{
Craig Topper5e0709d2016-11-13 07:26:34 +00006174 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6175 (__v16si)_mm512_srlv_epi32(__X, __Y),
6176 (__v16si)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006177}
6178
6179static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00006180_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006181{
Craig Topper5e0709d2016-11-13 07:26:34 +00006182 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6183 (__v16si)_mm512_srlv_epi32(__X, __Y),
6184 (__v16si)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006185}
6186
6187static __inline__ __m512i __DEFAULT_FN_ATTRS
6188_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
6189{
Craig Topper5e0709d2016-11-13 07:26:34 +00006190 return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006191}
6192
6193static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00006194_mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006195{
Craig Topper5e0709d2016-11-13 07:26:34 +00006196 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6197 (__v8di)_mm512_srlv_epi64(__X, __Y),
6198 (__v8di)__W);
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006199}
6200
6201static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper5e0709d2016-11-13 07:26:34 +00006202_mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006203{
Craig Topper5e0709d2016-11-13 07:26:34 +00006204 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6205 (__v8di)_mm512_srlv_epi64(__X, __Y),
6206 (__v8di)_mm512_setzero_si512());
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006207}
6208
Craig Topperc6338672018-05-31 00:51:20 +00006209#define _mm512_ternarylogic_epi32(A, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00006210 (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
6211 (__v16si)(__m512i)(B), \
6212 (__v16si)(__m512i)(C), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00006213 (__mmask16)-1)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006214
Craig Topperc6338672018-05-31 00:51:20 +00006215#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00006216 (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
6217 (__v16si)(__m512i)(B), \
6218 (__v16si)(__m512i)(C), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00006219 (__mmask16)(U))
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006220
Craig Topperc6338672018-05-31 00:51:20 +00006221#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00006222 (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \
6223 (__v16si)(__m512i)(B), \
6224 (__v16si)(__m512i)(C), \
Craig Topperc6338672018-05-31 00:51:20 +00006225 (int)(imm), (__mmask16)(U))
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006226
Craig Topperc6338672018-05-31 00:51:20 +00006227#define _mm512_ternarylogic_epi64(A, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00006228 (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
6229 (__v8di)(__m512i)(B), \
6230 (__v8di)(__m512i)(C), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00006231 (__mmask8)-1)
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006232
Craig Topperc6338672018-05-31 00:51:20 +00006233#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00006234 (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
6235 (__v8di)(__m512i)(B), \
6236 (__v8di)(__m512i)(C), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00006237 (__mmask8)(U))
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006238
Craig Topperc6338672018-05-31 00:51:20 +00006239#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00006240 (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \
6241 (__v8di)(__m512i)(B), \
6242 (__v8di)(__m512i)(C), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00006243 (__mmask8)(U))
Michael Zuckerman81f468c2016-04-11 17:04:21 +00006244
Craig Topper45db56c2016-07-21 07:38:39 +00006245#ifdef __x86_64__
Craig Topperc6338672018-05-31 00:51:20 +00006246#define _mm_cvt_roundsd_i64(A, R) \
6247 (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))
Craig Topper45db56c2016-07-21 07:38:39 +00006248#endif
Michael Zuckerman8d161992016-04-10 17:24:03 +00006249
Craig Topperc6338672018-05-31 00:51:20 +00006250#define _mm_cvt_roundsd_si32(A, R) \
6251 (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006252
Craig Topperc6338672018-05-31 00:51:20 +00006253#define _mm_cvt_roundsd_i32(A, R) \
6254 (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006255
Craig Topperc6338672018-05-31 00:51:20 +00006256#define _mm_cvt_roundsd_u32(A, R) \
6257 (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006258
6259static __inline__ unsigned __DEFAULT_FN_ATTRS
6260_mm_cvtsd_u32 (__m128d __A)
6261{
6262 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
6263 _MM_FROUND_CUR_DIRECTION);
6264}
6265
Craig Topper45db56c2016-07-21 07:38:39 +00006266#ifdef __x86_64__
Craig Topperc6338672018-05-31 00:51:20 +00006267#define _mm_cvt_roundsd_u64(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006268 (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
Craig Topperc6338672018-05-31 00:51:20 +00006269 (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006270
6271static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6272_mm_cvtsd_u64 (__m128d __A)
6273{
6274 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
6275 __A,
6276 _MM_FROUND_CUR_DIRECTION);
6277}
Craig Topper45db56c2016-07-21 07:38:39 +00006278#endif
Michael Zuckerman8d161992016-04-10 17:24:03 +00006279
Craig Topperc6338672018-05-31 00:51:20 +00006280#define _mm_cvt_roundss_si32(A, R) \
6281 (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006282
Craig Topperc6338672018-05-31 00:51:20 +00006283#define _mm_cvt_roundss_i32(A, R) \
6284 (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006285
Craig Topper45db56c2016-07-21 07:38:39 +00006286#ifdef __x86_64__
Craig Topperc6338672018-05-31 00:51:20 +00006287#define _mm_cvt_roundss_si64(A, R) \
6288 (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006289
Craig Topperc6338672018-05-31 00:51:20 +00006290#define _mm_cvt_roundss_i64(A, R) \
6291 (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R))
Craig Topper45db56c2016-07-21 07:38:39 +00006292#endif
Michael Zuckerman8d161992016-04-10 17:24:03 +00006293
Craig Topperc6338672018-05-31 00:51:20 +00006294#define _mm_cvt_roundss_u32(A, R) \
6295 (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006296
6297static __inline__ unsigned __DEFAULT_FN_ATTRS
6298_mm_cvtss_u32 (__m128 __A)
6299{
6300 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
6301 _MM_FROUND_CUR_DIRECTION);
6302}
6303
Craig Topper45db56c2016-07-21 07:38:39 +00006304#ifdef __x86_64__
Craig Topperc6338672018-05-31 00:51:20 +00006305#define _mm_cvt_roundss_u64(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006306 (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
Craig Topperc6338672018-05-31 00:51:20 +00006307 (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006308
6309static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6310_mm_cvtss_u64 (__m128 __A)
6311{
6312 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
6313 __A,
6314 _MM_FROUND_CUR_DIRECTION);
6315}
Craig Topper45db56c2016-07-21 07:38:39 +00006316#endif
Michael Zuckerman8d161992016-04-10 17:24:03 +00006317
Craig Topperc6338672018-05-31 00:51:20 +00006318#define _mm_cvtt_roundsd_i32(A, R) \
6319 (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006320
Craig Topperc6338672018-05-31 00:51:20 +00006321#define _mm_cvtt_roundsd_si32(A, R) \
6322 (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006323
6324static __inline__ int __DEFAULT_FN_ATTRS
6325_mm_cvttsd_i32 (__m128d __A)
6326{
6327 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
6328 _MM_FROUND_CUR_DIRECTION);
6329}
6330
Craig Topper45db56c2016-07-21 07:38:39 +00006331#ifdef __x86_64__
Craig Topperc6338672018-05-31 00:51:20 +00006332#define _mm_cvtt_roundsd_si64(A, R) \
6333 (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006334
Craig Topperc6338672018-05-31 00:51:20 +00006335#define _mm_cvtt_roundsd_i64(A, R) \
6336 (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006337
6338static __inline__ long long __DEFAULT_FN_ATTRS
6339_mm_cvttsd_i64 (__m128d __A)
6340{
6341 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
6342 _MM_FROUND_CUR_DIRECTION);
6343}
Craig Topper45db56c2016-07-21 07:38:39 +00006344#endif
Michael Zuckerman8d161992016-04-10 17:24:03 +00006345
Craig Topperc6338672018-05-31 00:51:20 +00006346#define _mm_cvtt_roundsd_u32(A, R) \
6347 (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006348
6349static __inline__ unsigned __DEFAULT_FN_ATTRS
6350_mm_cvttsd_u32 (__m128d __A)
6351{
6352 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
6353 _MM_FROUND_CUR_DIRECTION);
6354}
6355
Craig Topper45db56c2016-07-21 07:38:39 +00006356#ifdef __x86_64__
Craig Topperc6338672018-05-31 00:51:20 +00006357#define _mm_cvtt_roundsd_u64(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006358 (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
Craig Topperc6338672018-05-31 00:51:20 +00006359 (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006360
6361static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6362_mm_cvttsd_u64 (__m128d __A)
6363{
6364 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
6365 __A,
6366 _MM_FROUND_CUR_DIRECTION);
6367}
Craig Topper45db56c2016-07-21 07:38:39 +00006368#endif
Michael Zuckerman8d161992016-04-10 17:24:03 +00006369
Craig Topperc6338672018-05-31 00:51:20 +00006370#define _mm_cvtt_roundss_i32(A, R) \
6371 (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006372
Craig Topperc6338672018-05-31 00:51:20 +00006373#define _mm_cvtt_roundss_si32(A, R) \
6374 (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006375
6376static __inline__ int __DEFAULT_FN_ATTRS
6377_mm_cvttss_i32 (__m128 __A)
6378{
6379 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
6380 _MM_FROUND_CUR_DIRECTION);
6381}
6382
Craig Topper45db56c2016-07-21 07:38:39 +00006383#ifdef __x86_64__
Craig Topperc6338672018-05-31 00:51:20 +00006384#define _mm_cvtt_roundss_i64(A, R) \
6385 (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006386
Craig Topperc6338672018-05-31 00:51:20 +00006387#define _mm_cvtt_roundss_si64(A, R) \
6388 (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006389
6390static __inline__ long long __DEFAULT_FN_ATTRS
6391_mm_cvttss_i64 (__m128 __A)
6392{
6393 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
6394 _MM_FROUND_CUR_DIRECTION);
6395}
Craig Topper45db56c2016-07-21 07:38:39 +00006396#endif
Michael Zuckerman8d161992016-04-10 17:24:03 +00006397
Craig Topperc6338672018-05-31 00:51:20 +00006398#define _mm_cvtt_roundss_u32(A, R) \
6399 (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006400
6401static __inline__ unsigned __DEFAULT_FN_ATTRS
6402_mm_cvttss_u32 (__m128 __A)
6403{
6404 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
6405 _MM_FROUND_CUR_DIRECTION);
6406}
6407
Craig Topper45db56c2016-07-21 07:38:39 +00006408#ifdef __x86_64__
Craig Topperc6338672018-05-31 00:51:20 +00006409#define _mm_cvtt_roundss_u64(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006410 (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
Craig Topperc6338672018-05-31 00:51:20 +00006411 (int)(R))
Michael Zuckerman8d161992016-04-10 17:24:03 +00006412
6413static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6414_mm_cvttss_u64 (__m128 __A)
6415{
6416 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
6417 __A,
6418 _MM_FROUND_CUR_DIRECTION);
6419}
Craig Topper45db56c2016-07-21 07:38:39 +00006420#endif
Michael Zuckerman8d161992016-04-10 17:24:03 +00006421
Craig Topperc6338672018-05-31 00:51:20 +00006422#define _mm512_permute_pd(X, C) \
Simon Pilgrim17388f22016-07-04 11:06:15 +00006423 (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
Craig Topper2a383c92016-07-04 22:18:01 +00006424 (__v8df)_mm512_undefined_pd(), \
6425 0 + (((C) >> 0) & 0x1), \
6426 0 + (((C) >> 1) & 0x1), \
6427 2 + (((C) >> 2) & 0x1), \
6428 2 + (((C) >> 3) & 0x1), \
6429 4 + (((C) >> 4) & 0x1), \
6430 4 + (((C) >> 5) & 0x1), \
6431 6 + (((C) >> 6) & 0x1), \
Craig Topperc6338672018-05-31 00:51:20 +00006432 6 + (((C) >> 7) & 0x1))
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006433
Craig Topperc6338672018-05-31 00:51:20 +00006434#define _mm512_mask_permute_pd(W, U, X, C) \
Simon Pilgrim17388f22016-07-04 11:06:15 +00006435 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6436 (__v8df)_mm512_permute_pd((X), (C)), \
Craig Topperc6338672018-05-31 00:51:20 +00006437 (__v8df)(__m512d)(W))
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006438
Craig Topperc6338672018-05-31 00:51:20 +00006439#define _mm512_maskz_permute_pd(U, X, C) \
Simon Pilgrim17388f22016-07-04 11:06:15 +00006440 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6441 (__v8df)_mm512_permute_pd((X), (C)), \
Craig Topperc6338672018-05-31 00:51:20 +00006442 (__v8df)_mm512_setzero_pd())
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006443
Craig Topperc6338672018-05-31 00:51:20 +00006444#define _mm512_permute_ps(X, C) \
Simon Pilgrim17388f22016-07-04 11:06:15 +00006445 (__m512)__builtin_shufflevector((__v16sf)(__m512)(X), \
Craig Topper2a383c92016-07-04 22:18:01 +00006446 (__v16sf)_mm512_undefined_ps(), \
6447 0 + (((C) >> 0) & 0x3), \
6448 0 + (((C) >> 2) & 0x3), \
6449 0 + (((C) >> 4) & 0x3), \
6450 0 + (((C) >> 6) & 0x3), \
6451 4 + (((C) >> 0) & 0x3), \
6452 4 + (((C) >> 2) & 0x3), \
6453 4 + (((C) >> 4) & 0x3), \
6454 4 + (((C) >> 6) & 0x3), \
6455 8 + (((C) >> 0) & 0x3), \
6456 8 + (((C) >> 2) & 0x3), \
6457 8 + (((C) >> 4) & 0x3), \
6458 8 + (((C) >> 6) & 0x3), \
6459 12 + (((C) >> 0) & 0x3), \
6460 12 + (((C) >> 2) & 0x3), \
6461 12 + (((C) >> 4) & 0x3), \
Craig Topperc6338672018-05-31 00:51:20 +00006462 12 + (((C) >> 6) & 0x3))
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006463
Craig Topperc6338672018-05-31 00:51:20 +00006464#define _mm512_mask_permute_ps(W, U, X, C) \
Simon Pilgrim17388f22016-07-04 11:06:15 +00006465 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6466 (__v16sf)_mm512_permute_ps((X), (C)), \
Craig Topperc6338672018-05-31 00:51:20 +00006467 (__v16sf)(__m512)(W))
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006468
Craig Topperc6338672018-05-31 00:51:20 +00006469#define _mm512_maskz_permute_ps(U, X, C) \
Simon Pilgrim17388f22016-07-04 11:06:15 +00006470 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6471 (__v16sf)_mm512_permute_ps((X), (C)), \
Craig Topperc6338672018-05-31 00:51:20 +00006472 (__v16sf)_mm512_setzero_ps())
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006473
6474static __inline__ __m512d __DEFAULT_FN_ATTRS
Craig Topper678b07f2016-12-11 01:26:52 +00006475_mm512_permutevar_pd(__m512d __A, __m512i __C)
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006476{
Craig Topper678b07f2016-12-11 01:26:52 +00006477 return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006478}
6479
6480static __inline__ __m512d __DEFAULT_FN_ATTRS
Craig Topper678b07f2016-12-11 01:26:52 +00006481_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006482{
Craig Topper678b07f2016-12-11 01:26:52 +00006483 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6484 (__v8df)_mm512_permutevar_pd(__A, __C),
6485 (__v8df)__W);
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006486}
6487
6488static __inline__ __m512d __DEFAULT_FN_ATTRS
Craig Topper678b07f2016-12-11 01:26:52 +00006489_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006490{
Craig Topper678b07f2016-12-11 01:26:52 +00006491 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6492 (__v8df)_mm512_permutevar_pd(__A, __C),
6493 (__v8df)_mm512_setzero_pd());
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006494}
6495
6496static __inline__ __m512 __DEFAULT_FN_ATTRS
Craig Topper678b07f2016-12-11 01:26:52 +00006497_mm512_permutevar_ps(__m512 __A, __m512i __C)
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006498{
Craig Topper678b07f2016-12-11 01:26:52 +00006499 return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006500}
6501
6502static __inline__ __m512 __DEFAULT_FN_ATTRS
Craig Topper678b07f2016-12-11 01:26:52 +00006503_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006504{
Craig Topper678b07f2016-12-11 01:26:52 +00006505 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6506 (__v16sf)_mm512_permutevar_ps(__A, __C),
6507 (__v16sf)__W);
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006508}
6509
6510static __inline__ __m512 __DEFAULT_FN_ATTRS
Craig Topper678b07f2016-12-11 01:26:52 +00006511_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006512{
Craig Topper678b07f2016-12-11 01:26:52 +00006513 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6514 (__v16sf)_mm512_permutevar_ps(__A, __C),
6515 (__v16sf)_mm512_setzero_ps());
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006516}
6517
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00006518static __inline __m512d __DEFAULT_FN_ATTRS
6519_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006520{
Craig Topper68a272d2018-05-29 03:26:38 +00006521 return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
6522 (__v8df)__B);
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00006523}
6524
6525static __inline__ __m512d __DEFAULT_FN_ATTRS
Craig Topper68a272d2018-05-29 03:26:38 +00006526_mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00006527{
Craig Topper68a272d2018-05-29 03:26:38 +00006528 return (__m512d)__builtin_ia32_selectpd_512(__U,
6529 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6530 (__v8df)__A);
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006531}
6532
6533static __inline__ __m512d __DEFAULT_FN_ATTRS
Craig Topper68a272d2018-05-29 03:26:38 +00006534_mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U,
6535 __m512d __B)
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006536{
Craig Topper68a272d2018-05-29 03:26:38 +00006537 return (__m512d)__builtin_ia32_selectpd_512(__U,
6538 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6539 (__v8df)(__m512d)__I);
6540}
6541
6542static __inline__ __m512d __DEFAULT_FN_ATTRS
6543_mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I,
6544 __m512d __B)
6545{
6546 return (__m512d)__builtin_ia32_selectpd_512(__U,
6547 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6548 (__v8df)_mm512_setzero_pd());
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00006549}
6550
6551static __inline __m512 __DEFAULT_FN_ATTRS
6552_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
6553{
Craig Topper68a272d2018-05-29 03:26:38 +00006554 return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
6555 (__v16sf) __B);
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00006556}
6557
6558static __inline__ __m512 __DEFAULT_FN_ATTRS
Craig Topper68a272d2018-05-29 03:26:38 +00006559_mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
Michael Zuckerman5e2c6b62016-05-11 11:21:18 +00006560{
Craig Topper68a272d2018-05-29 03:26:38 +00006561 return (__m512)__builtin_ia32_selectps_512(__U,
6562 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6563 (__v16sf)__A);
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006564}
6565
6566static __inline__ __m512 __DEFAULT_FN_ATTRS
Craig Topper68a272d2018-05-29 03:26:38 +00006567_mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006568{
Craig Topper68a272d2018-05-29 03:26:38 +00006569 return (__m512)__builtin_ia32_selectps_512(__U,
6570 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6571 (__v16sf)(__m512)__I);
6572}
6573
6574static __inline__ __m512 __DEFAULT_FN_ATTRS
6575_mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
6576{
6577 return (__m512)__builtin_ia32_selectps_512(__U,
6578 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6579 (__v16sf)_mm512_setzero_ps());
Michael Zuckermand8d2f622016-04-11 07:15:34 +00006580}
6581
Michael Zuckerman07525092016-04-11 10:22:07 +00006582
Craig Topperc6338672018-05-31 00:51:20 +00006583#define _mm512_cvtt_roundpd_epu32(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006584 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6585 (__v8si)_mm256_undefined_si256(), \
Craig Topperc6338672018-05-31 00:51:20 +00006586 (__mmask8)-1, (int)(R))
Michael Zuckerman138fc5b2016-05-03 11:05:24 +00006587
Craig Topperc6338672018-05-31 00:51:20 +00006588#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006589 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6590 (__v8si)(__m256i)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00006591 (__mmask8)(U), (int)(R))
Michael Zuckerman138fc5b2016-05-03 11:05:24 +00006592
Craig Topperc6338672018-05-31 00:51:20 +00006593#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006594 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6595 (__v8si)_mm256_setzero_si256(), \
Craig Topperc6338672018-05-31 00:51:20 +00006596 (__mmask8)(U), (int)(R))
Michael Zuckerman138fc5b2016-05-03 11:05:24 +00006597
6598static __inline__ __m256i __DEFAULT_FN_ATTRS
6599_mm512_cvttpd_epu32 (__m512d __A)
6600{
6601 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6602 (__v8si)
6603 _mm256_undefined_si256 (),
6604 (__mmask8) -1,
6605 _MM_FROUND_CUR_DIRECTION);
6606}
6607
6608static __inline__ __m256i __DEFAULT_FN_ATTRS
6609_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
6610{
6611 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6612 (__v8si) __W,
6613 (__mmask8) __U,
6614 _MM_FROUND_CUR_DIRECTION);
6615}
6616
6617static __inline__ __m256i __DEFAULT_FN_ATTRS
6618_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
6619{
6620 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6621 (__v8si)
6622 _mm256_setzero_si256 (),
6623 (__mmask8) __U,
6624 _MM_FROUND_CUR_DIRECTION);
6625}
Michael Zuckerman07525092016-04-11 10:22:07 +00006626
Craig Topperc6338672018-05-31 00:51:20 +00006627#define _mm_roundscale_round_sd(A, B, imm, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006628 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6629 (__v2df)(__m128d)(B), \
6630 (__v2df)_mm_setzero_pd(), \
6631 (__mmask8)-1, (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00006632 (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006633
Craig Topperc6338672018-05-31 00:51:20 +00006634#define _mm_roundscale_sd(A, B, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00006635 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6636 (__v2df)(__m128d)(B), \
6637 (__v2df)_mm_setzero_pd(), \
6638 (__mmask8)-1, (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00006639 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006640
Craig Topperc6338672018-05-31 00:51:20 +00006641#define _mm_mask_roundscale_sd(W, U, A, B, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00006642 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6643 (__v2df)(__m128d)(B), \
6644 (__v2df)(__m128d)(W), \
6645 (__mmask8)(U), (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00006646 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006647
Craig Topperc6338672018-05-31 00:51:20 +00006648#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006649 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6650 (__v2df)(__m128d)(B), \
6651 (__v2df)(__m128d)(W), \
6652 (__mmask8)(U), (int)(I), \
Craig Topperc6338672018-05-31 00:51:20 +00006653 (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006654
Craig Topperc6338672018-05-31 00:51:20 +00006655#define _mm_maskz_roundscale_sd(U, A, B, I) \
Craig Topper8c18e112016-05-17 04:41:50 +00006656 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6657 (__v2df)(__m128d)(B), \
6658 (__v2df)_mm_setzero_pd(), \
6659 (__mmask8)(U), (int)(I), \
Craig Topperc6338672018-05-31 00:51:20 +00006660 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006661
Craig Topperc6338672018-05-31 00:51:20 +00006662#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006663 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6664 (__v2df)(__m128d)(B), \
6665 (__v2df)_mm_setzero_pd(), \
6666 (__mmask8)(U), (int)(I), \
Craig Topperc6338672018-05-31 00:51:20 +00006667 (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006668
Craig Topperc6338672018-05-31 00:51:20 +00006669#define _mm_roundscale_round_ss(A, B, imm, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006670 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6671 (__v4sf)(__m128)(B), \
6672 (__v4sf)_mm_setzero_ps(), \
6673 (__mmask8)-1, (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00006674 (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006675
Craig Topperc6338672018-05-31 00:51:20 +00006676#define _mm_roundscale_ss(A, B, imm) \
Craig Topper8c18e112016-05-17 04:41:50 +00006677 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6678 (__v4sf)(__m128)(B), \
6679 (__v4sf)_mm_setzero_ps(), \
6680 (__mmask8)-1, (int)(imm), \
Craig Topperc6338672018-05-31 00:51:20 +00006681 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006682
Craig Topperc6338672018-05-31 00:51:20 +00006683#define _mm_mask_roundscale_ss(W, U, A, B, I) \
Craig Topper8c18e112016-05-17 04:41:50 +00006684 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6685 (__v4sf)(__m128)(B), \
6686 (__v4sf)(__m128)(W), \
6687 (__mmask8)(U), (int)(I), \
Craig Topperc6338672018-05-31 00:51:20 +00006688 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006689
Craig Topperc6338672018-05-31 00:51:20 +00006690#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006691 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6692 (__v4sf)(__m128)(B), \
6693 (__v4sf)(__m128)(W), \
6694 (__mmask8)(U), (int)(I), \
Craig Topperc6338672018-05-31 00:51:20 +00006695 (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006696
Craig Topperc6338672018-05-31 00:51:20 +00006697#define _mm_maskz_roundscale_ss(U, A, B, I) \
Craig Topper8c18e112016-05-17 04:41:50 +00006698 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6699 (__v4sf)(__m128)(B), \
6700 (__v4sf)_mm_setzero_ps(), \
6701 (__mmask8)(U), (int)(I), \
Craig Topperc6338672018-05-31 00:51:20 +00006702 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006703
Craig Topperc6338672018-05-31 00:51:20 +00006704#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006705 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6706 (__v4sf)(__m128)(B), \
6707 (__v4sf)_mm_setzero_ps(), \
6708 (__mmask8)(U), (int)(I), \
Craig Topperc6338672018-05-31 00:51:20 +00006709 (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006710
Craig Topperc6338672018-05-31 00:51:20 +00006711#define _mm512_scalef_round_pd(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006712 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6713 (__v8df)(__m512d)(B), \
6714 (__v8df)_mm512_undefined_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00006715 (__mmask8)-1, (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006716
Craig Topperc6338672018-05-31 00:51:20 +00006717#define _mm512_mask_scalef_round_pd(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006718 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6719 (__v8df)(__m512d)(B), \
6720 (__v8df)(__m512d)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00006721 (__mmask8)(U), (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006722
Craig Topperc6338672018-05-31 00:51:20 +00006723#define _mm512_maskz_scalef_round_pd(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006724 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6725 (__v8df)(__m512d)(B), \
6726 (__v8df)_mm512_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00006727 (__mmask8)(U), (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006728
6729static __inline__ __m512d __DEFAULT_FN_ATTRS
6730_mm512_scalef_pd (__m512d __A, __m512d __B)
6731{
6732 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6733 (__v8df) __B,
6734 (__v8df)
6735 _mm512_undefined_pd (),
6736 (__mmask8) -1,
6737 _MM_FROUND_CUR_DIRECTION);
6738}
6739
6740static __inline__ __m512d __DEFAULT_FN_ATTRS
6741_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
6742{
6743 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6744 (__v8df) __B,
6745 (__v8df) __W,
6746 (__mmask8) __U,
6747 _MM_FROUND_CUR_DIRECTION);
6748}
6749
6750static __inline__ __m512d __DEFAULT_FN_ATTRS
6751_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
6752{
6753 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6754 (__v8df) __B,
6755 (__v8df)
6756 _mm512_setzero_pd (),
6757 (__mmask8) __U,
6758 _MM_FROUND_CUR_DIRECTION);
6759}
6760
Craig Topperc6338672018-05-31 00:51:20 +00006761#define _mm512_scalef_round_ps(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006762 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6763 (__v16sf)(__m512)(B), \
6764 (__v16sf)_mm512_undefined_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00006765 (__mmask16)-1, (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006766
Craig Topperc6338672018-05-31 00:51:20 +00006767#define _mm512_mask_scalef_round_ps(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006768 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6769 (__v16sf)(__m512)(B), \
6770 (__v16sf)(__m512)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00006771 (__mmask16)(U), (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006772
Craig Topperc6338672018-05-31 00:51:20 +00006773#define _mm512_maskz_scalef_round_ps(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006774 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6775 (__v16sf)(__m512)(B), \
6776 (__v16sf)_mm512_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00006777 (__mmask16)(U), (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006778
6779static __inline__ __m512 __DEFAULT_FN_ATTRS
6780_mm512_scalef_ps (__m512 __A, __m512 __B)
6781{
6782 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6783 (__v16sf) __B,
6784 (__v16sf)
6785 _mm512_undefined_ps (),
6786 (__mmask16) -1,
6787 _MM_FROUND_CUR_DIRECTION);
6788}
6789
6790static __inline__ __m512 __DEFAULT_FN_ATTRS
6791_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
6792{
6793 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6794 (__v16sf) __B,
6795 (__v16sf) __W,
6796 (__mmask16) __U,
6797 _MM_FROUND_CUR_DIRECTION);
6798}
6799
6800static __inline__ __m512 __DEFAULT_FN_ATTRS
6801_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
6802{
6803 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6804 (__v16sf) __B,
6805 (__v16sf)
6806 _mm512_setzero_ps (),
6807 (__mmask16) __U,
6808 _MM_FROUND_CUR_DIRECTION);
6809}
6810
Craig Topperc6338672018-05-31 00:51:20 +00006811#define _mm_scalef_round_sd(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006812 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6813 (__v2df)(__m128d)(B), \
6814 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00006815 (__mmask8)-1, (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006816
6817static __inline__ __m128d __DEFAULT_FN_ATTRS
6818_mm_scalef_sd (__m128d __A, __m128d __B)
6819{
6820 return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6821 (__v2df)( __B), (__v2df) _mm_setzero_pd(),
6822 (__mmask8) -1,
6823 _MM_FROUND_CUR_DIRECTION);
6824}
6825
6826static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00006827_mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006828{
6829 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6830 (__v2df) __B,
6831 (__v2df) __W,
6832 (__mmask8) __U,
6833 _MM_FROUND_CUR_DIRECTION);
6834}
6835
Craig Topperc6338672018-05-31 00:51:20 +00006836#define _mm_mask_scalef_round_sd(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006837 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6838 (__v2df)(__m128d)(B), \
6839 (__v2df)(__m128d)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00006840 (__mmask8)(U), (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006841
6842static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00006843_mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006844{
6845 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6846 (__v2df) __B,
6847 (__v2df) _mm_setzero_pd (),
6848 (__mmask8) __U,
6849 _MM_FROUND_CUR_DIRECTION);
6850}
6851
Craig Topperc6338672018-05-31 00:51:20 +00006852#define _mm_maskz_scalef_round_sd(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006853 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6854 (__v2df)(__m128d)(B), \
6855 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00006856 (__mmask8)(U), (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006857
Craig Topperc6338672018-05-31 00:51:20 +00006858#define _mm_scalef_round_ss(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006859 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6860 (__v4sf)(__m128)(B), \
6861 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00006862 (__mmask8)-1, (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006863
6864static __inline__ __m128 __DEFAULT_FN_ATTRS
6865_mm_scalef_ss (__m128 __A, __m128 __B)
6866{
6867 return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6868 (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
6869 (__mmask8) -1,
6870 _MM_FROUND_CUR_DIRECTION);
6871}
6872
6873static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00006874_mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006875{
6876 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6877 (__v4sf) __B,
6878 (__v4sf) __W,
6879 (__mmask8) __U,
6880 _MM_FROUND_CUR_DIRECTION);
6881}
6882
Craig Topperc6338672018-05-31 00:51:20 +00006883#define _mm_mask_scalef_round_ss(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006884 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6885 (__v4sf)(__m128)(B), \
6886 (__v4sf)(__m128)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00006887 (__mmask8)(U), (int)(R))
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006888
6889static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00006890_mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006891{
6892 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6893 (__v4sf) __B,
6894 (__v4sf) _mm_setzero_ps (),
6895 (__mmask8) __U,
6896 _MM_FROUND_CUR_DIRECTION);
6897}
6898
Craig Topperc6338672018-05-31 00:51:20 +00006899#define _mm_maskz_scalef_round_ss(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00006900 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6901 (__v4sf)(__m128)(B), \
6902 (__v4sf)_mm_setzero_ps(), \
6903 (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00006904 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman1af947a2016-04-11 12:32:31 +00006905
Craig Topper1a441932016-11-12 07:16:59 +00006906static __inline__ __m512i __DEFAULT_FN_ATTRS
6907_mm512_srai_epi32(__m512i __A, int __B)
6908{
6909 return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B);
6910}
Michael Zuckerman6b5f4d82016-04-11 15:46:39 +00006911
Craig Topper1a441932016-11-12 07:16:59 +00006912static __inline__ __m512i __DEFAULT_FN_ATTRS
6913_mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
6914{
6915 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \
6916 (__v16si)_mm512_srai_epi32(__A, __B), \
6917 (__v16si)__W);
6918}
Michael Zuckerman6b5f4d82016-04-11 15:46:39 +00006919
Craig Topper1a441932016-11-12 07:16:59 +00006920static __inline__ __m512i __DEFAULT_FN_ATTRS
6921_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, int __B) {
6922 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \
6923 (__v16si)_mm512_srai_epi32(__A, __B), \
6924 (__v16si)_mm512_setzero_si512());
6925}
Michael Zuckerman6b5f4d82016-04-11 15:46:39 +00006926
Craig Topper1a441932016-11-12 07:16:59 +00006927static __inline__ __m512i __DEFAULT_FN_ATTRS
6928_mm512_srai_epi64(__m512i __A, int __B)
6929{
6930 return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B);
6931}
Michael Zuckerman6b5f4d82016-04-11 15:46:39 +00006932
Craig Topper1a441932016-11-12 07:16:59 +00006933static __inline__ __m512i __DEFAULT_FN_ATTRS
6934_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
6935{
6936 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \
6937 (__v8di)_mm512_srai_epi64(__A, __B), \
6938 (__v8di)__W);
6939}
Michael Zuckerman6b5f4d82016-04-11 15:46:39 +00006940
Craig Topper1a441932016-11-12 07:16:59 +00006941static __inline__ __m512i __DEFAULT_FN_ATTRS
6942_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B)
6943{
6944 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \
6945 (__v8di)_mm512_srai_epi64(__A, __B), \
6946 (__v8di)_mm512_setzero_si512());
6947}
Michael Zuckerman6b5f4d82016-04-11 15:46:39 +00006948
Craig Topperc6338672018-05-31 00:51:20 +00006949#define _mm512_shuffle_f32x4(A, B, imm) \
Jina Nahiasdca97912017-11-13 09:15:31 +00006950 (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \
6951 (__v16sf)(__m512)(B), \
6952 0 + ((((imm) >> 0) & 0x3) * 4), \
6953 1 + ((((imm) >> 0) & 0x3) * 4), \
6954 2 + ((((imm) >> 0) & 0x3) * 4), \
6955 3 + ((((imm) >> 0) & 0x3) * 4), \
6956 0 + ((((imm) >> 2) & 0x3) * 4), \
6957 1 + ((((imm) >> 2) & 0x3) * 4), \
6958 2 + ((((imm) >> 2) & 0x3) * 4), \
6959 3 + ((((imm) >> 2) & 0x3) * 4), \
6960 16 + ((((imm) >> 4) & 0x3) * 4), \
6961 17 + ((((imm) >> 4) & 0x3) * 4), \
6962 18 + ((((imm) >> 4) & 0x3) * 4), \
6963 19 + ((((imm) >> 4) & 0x3) * 4), \
6964 16 + ((((imm) >> 6) & 0x3) * 4), \
6965 17 + ((((imm) >> 6) & 0x3) * 4), \
6966 18 + ((((imm) >> 6) & 0x3) * 4), \
Craig Topperc6338672018-05-31 00:51:20 +00006967 19 + ((((imm) >> 6) & 0x3) * 4))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006968
Craig Topperc6338672018-05-31 00:51:20 +00006969#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \
Jina Nahiasdca97912017-11-13 09:15:31 +00006970 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6971 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00006972 (__v16sf)(__m512)(W))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006973
Craig Topperc6338672018-05-31 00:51:20 +00006974#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \
Jina Nahiasdca97912017-11-13 09:15:31 +00006975 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6976 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00006977 (__v16sf)_mm512_setzero_ps())
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006978
Craig Topperc6338672018-05-31 00:51:20 +00006979#define _mm512_shuffle_f64x2(A, B, imm) \
Jina Nahiasdca97912017-11-13 09:15:31 +00006980 (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
6981 (__v8df)(__m512d)(B), \
6982 0 + ((((imm) >> 0) & 0x3) * 2), \
6983 1 + ((((imm) >> 0) & 0x3) * 2), \
6984 0 + ((((imm) >> 2) & 0x3) * 2), \
6985 1 + ((((imm) >> 2) & 0x3) * 2), \
6986 8 + ((((imm) >> 4) & 0x3) * 2), \
6987 9 + ((((imm) >> 4) & 0x3) * 2), \
6988 8 + ((((imm) >> 6) & 0x3) * 2), \
Craig Topperc6338672018-05-31 00:51:20 +00006989 9 + ((((imm) >> 6) & 0x3) * 2))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006990
Craig Topperc6338672018-05-31 00:51:20 +00006991#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \
Jina Nahiasdca97912017-11-13 09:15:31 +00006992 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6993 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00006994 (__v8df)(__m512d)(W))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00006995
Craig Topperc6338672018-05-31 00:51:20 +00006996#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \
Jina Nahiasdca97912017-11-13 09:15:31 +00006997 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6998 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00006999 (__v8df)_mm512_setzero_pd())
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007000
Craig Topperc6338672018-05-31 00:51:20 +00007001#define _mm512_shuffle_i32x4(A, B, imm) \
Jina Nahiasdca97912017-11-13 09:15:31 +00007002 (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
7003 (__v8di)(__m512i)(B), \
7004 0 + ((((imm) >> 0) & 0x3) * 2), \
7005 1 + ((((imm) >> 0) & 0x3) * 2), \
7006 0 + ((((imm) >> 2) & 0x3) * 2), \
7007 1 + ((((imm) >> 2) & 0x3) * 2), \
7008 8 + ((((imm) >> 4) & 0x3) * 2), \
7009 9 + ((((imm) >> 4) & 0x3) * 2), \
7010 8 + ((((imm) >> 6) & 0x3) * 2), \
Craig Topperc6338672018-05-31 00:51:20 +00007011 9 + ((((imm) >> 6) & 0x3) * 2))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007012
Craig Topperc6338672018-05-31 00:51:20 +00007013#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \
Jina Nahiasdca97912017-11-13 09:15:31 +00007014 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7015 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00007016 (__v16si)(__m512i)(W))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007017
Craig Topperc6338672018-05-31 00:51:20 +00007018#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \
Jina Nahiasdca97912017-11-13 09:15:31 +00007019 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7020 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00007021 (__v16si)_mm512_setzero_si512())
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007022
Craig Topperc6338672018-05-31 00:51:20 +00007023#define _mm512_shuffle_i64x2(A, B, imm) \
Jina Nahiasdca97912017-11-13 09:15:31 +00007024 (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
7025 (__v8di)(__m512i)(B), \
7026 0 + ((((imm) >> 0) & 0x3) * 2), \
7027 1 + ((((imm) >> 0) & 0x3) * 2), \
7028 0 + ((((imm) >> 2) & 0x3) * 2), \
7029 1 + ((((imm) >> 2) & 0x3) * 2), \
7030 8 + ((((imm) >> 4) & 0x3) * 2), \
7031 9 + ((((imm) >> 4) & 0x3) * 2), \
7032 8 + ((((imm) >> 6) & 0x3) * 2), \
Craig Topperc6338672018-05-31 00:51:20 +00007033 9 + ((((imm) >> 6) & 0x3) * 2))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007034
Craig Topperc6338672018-05-31 00:51:20 +00007035#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \
Jina Nahiasdca97912017-11-13 09:15:31 +00007036 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7037 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00007038 (__v8di)(__m512i)(W))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007039
Craig Topperc6338672018-05-31 00:51:20 +00007040#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \
Jina Nahiasdca97912017-11-13 09:15:31 +00007041 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7042 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00007043 (__v8di)_mm512_setzero_si512())
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007044
Craig Topperc6338672018-05-31 00:51:20 +00007045#define _mm512_shuffle_pd(A, B, M) \
Simon Pilgrim427154d2016-07-04 21:30:47 +00007046 (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
7047 (__v8df)(__m512d)(B), \
Craig Topper2a383c92016-07-04 22:18:01 +00007048 0 + (((M) >> 0) & 0x1), \
7049 8 + (((M) >> 1) & 0x1), \
7050 2 + (((M) >> 2) & 0x1), \
7051 10 + (((M) >> 3) & 0x1), \
7052 4 + (((M) >> 4) & 0x1), \
7053 12 + (((M) >> 5) & 0x1), \
7054 6 + (((M) >> 6) & 0x1), \
Craig Topperc6338672018-05-31 00:51:20 +00007055 14 + (((M) >> 7) & 0x1))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007056
Craig Topperc6338672018-05-31 00:51:20 +00007057#define _mm512_mask_shuffle_pd(W, U, A, B, M) \
Simon Pilgrim427154d2016-07-04 21:30:47 +00007058 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7059 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
Craig Topperc6338672018-05-31 00:51:20 +00007060 (__v8df)(__m512d)(W))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007061
Craig Topperc6338672018-05-31 00:51:20 +00007062#define _mm512_maskz_shuffle_pd(U, A, B, M) \
Simon Pilgrim427154d2016-07-04 21:30:47 +00007063 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7064 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
Craig Topperc6338672018-05-31 00:51:20 +00007065 (__v8df)_mm512_setzero_pd())
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007066
Craig Topperc6338672018-05-31 00:51:20 +00007067#define _mm512_shuffle_ps(A, B, M) \
Craig Topper6e76fb62016-07-10 05:57:21 +00007068 (__m512d)__builtin_shufflevector((__v16sf)(__m512)(A), \
7069 (__v16sf)(__m512)(B), \
7070 0 + (((M) >> 0) & 0x3), \
7071 0 + (((M) >> 2) & 0x3), \
7072 16 + (((M) >> 4) & 0x3), \
7073 16 + (((M) >> 6) & 0x3), \
7074 4 + (((M) >> 0) & 0x3), \
7075 4 + (((M) >> 2) & 0x3), \
7076 20 + (((M) >> 4) & 0x3), \
7077 20 + (((M) >> 6) & 0x3), \
7078 8 + (((M) >> 0) & 0x3), \
7079 8 + (((M) >> 2) & 0x3), \
7080 24 + (((M) >> 4) & 0x3), \
7081 24 + (((M) >> 6) & 0x3), \
7082 12 + (((M) >> 0) & 0x3), \
7083 12 + (((M) >> 2) & 0x3), \
7084 28 + (((M) >> 4) & 0x3), \
Craig Topperc6338672018-05-31 00:51:20 +00007085 28 + (((M) >> 6) & 0x3))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007086
Craig Topperc6338672018-05-31 00:51:20 +00007087#define _mm512_mask_shuffle_ps(W, U, A, B, M) \
Craig Topper6e76fb62016-07-10 05:57:21 +00007088 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7089 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
Craig Topperc6338672018-05-31 00:51:20 +00007090 (__v16sf)(__m512)(W))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007091
Craig Topperc6338672018-05-31 00:51:20 +00007092#define _mm512_maskz_shuffle_ps(U, A, B, M) \
Craig Topper6e76fb62016-07-10 05:57:21 +00007093 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7094 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
Craig Topperc6338672018-05-31 00:51:20 +00007095 (__v16sf)_mm512_setzero_ps())
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007096
Craig Topperc6338672018-05-31 00:51:20 +00007097#define _mm_sqrt_round_sd(A, B, R) \
Asaf Badouhf9cdb8d2016-07-05 11:36:21 +00007098 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7099 (__v2df)(__m128d)(B), \
Craig Topper8c18e112016-05-17 04:41:50 +00007100 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00007101 (__mmask8)-1, (int)(R))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007102
7103static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00007104_mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007105{
Asaf Badouhf9cdb8d2016-07-05 11:36:21 +00007106 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
7107 (__v2df) __B,
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007108 (__v2df) __W,
7109 (__mmask8) __U,
7110 _MM_FROUND_CUR_DIRECTION);
7111}
7112
Craig Topperc6338672018-05-31 00:51:20 +00007113#define _mm_mask_sqrt_round_sd(W, U, A, B, R) \
Asaf Badouhf9cdb8d2016-07-05 11:36:21 +00007114 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7115 (__v2df)(__m128d)(B), \
Craig Topper8c18e112016-05-17 04:41:50 +00007116 (__v2df)(__m128d)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00007117 (__mmask8)(U), (int)(R))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007118
7119static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00007120_mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007121{
Asaf Badouhf9cdb8d2016-07-05 11:36:21 +00007122 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
7123 (__v2df) __B,
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007124 (__v2df) _mm_setzero_pd (),
7125 (__mmask8) __U,
7126 _MM_FROUND_CUR_DIRECTION);
7127}
7128
Craig Topperc6338672018-05-31 00:51:20 +00007129#define _mm_maskz_sqrt_round_sd(U, A, B, R) \
Asaf Badouhf9cdb8d2016-07-05 11:36:21 +00007130 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7131 (__v2df)(__m128d)(B), \
Craig Topper8c18e112016-05-17 04:41:50 +00007132 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00007133 (__mmask8)(U), (int)(R))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007134
Craig Topperc6338672018-05-31 00:51:20 +00007135#define _mm_sqrt_round_ss(A, B, R) \
Asaf Badouhf9cdb8d2016-07-05 11:36:21 +00007136 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7137 (__v4sf)(__m128)(B), \
Craig Topper8c18e112016-05-17 04:41:50 +00007138 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00007139 (__mmask8)-1, (int)(R))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007140
7141static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00007142_mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007143{
Asaf Badouhf9cdb8d2016-07-05 11:36:21 +00007144 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
7145 (__v4sf) __B,
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007146 (__v4sf) __W,
7147 (__mmask8) __U,
7148 _MM_FROUND_CUR_DIRECTION);
7149}
7150
Craig Topperc6338672018-05-31 00:51:20 +00007151#define _mm_mask_sqrt_round_ss(W, U, A, B, R) \
Asaf Badouhf9cdb8d2016-07-05 11:36:21 +00007152 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7153 (__v4sf)(__m128)(B), \
Craig Topper8c18e112016-05-17 04:41:50 +00007154 (__v4sf)(__m128)(W), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00007155 (int)(R))
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007156
7157static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00007158_mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckerman04fb3bc2016-04-12 07:59:39 +00007159{
7160 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
7161 (__v4sf) __B,
7162 (__v4sf) _mm_setzero_ps (),
7163 (__mmask8) __U,
7164 _MM_FROUND_CUR_DIRECTION);
7165}
7166
Craig Topperc6338672018-05-31 00:51:20 +00007167#define _mm_maskz_sqrt_round_ss(U, A, B, R) \
Asaf Badouhf9cdb8d2016-07-05 11:36:21 +00007168 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7169 (__v4sf)(__m128)(B), \
Craig Topper8c18e112016-05-17 04:41:50 +00007170 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00007171 (__mmask8)(U), (int)(R))
Michael Zuckerman6b5f4d82016-04-11 15:46:39 +00007172
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007173static __inline__ __m512 __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00007174_mm512_broadcast_f32x4(__m128 __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007175{
Craig Topper367c86d2017-01-18 02:17:10 +00007176 return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
7177 0, 1, 2, 3, 0, 1, 2, 3,
7178 0, 1, 2, 3, 0, 1, 2, 3);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007179}
7180
7181static __inline__ __m512 __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00007182_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007183{
Craig Topper367c86d2017-01-18 02:17:10 +00007184 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
7185 (__v16sf)_mm512_broadcast_f32x4(__A),
7186 (__v16sf)__O);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007187}
7188
7189static __inline__ __m512 __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00007190_mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007191{
Craig Topper367c86d2017-01-18 02:17:10 +00007192 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
7193 (__v16sf)_mm512_broadcast_f32x4(__A),
7194 (__v16sf)_mm512_setzero_ps());
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007195}
7196
7197static __inline__ __m512d __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00007198_mm512_broadcast_f64x4(__m256d __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007199{
Craig Topper367c86d2017-01-18 02:17:10 +00007200 return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
7201 0, 1, 2, 3, 0, 1, 2, 3);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007202}
7203
7204static __inline__ __m512d __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00007205_mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007206{
Craig Topper367c86d2017-01-18 02:17:10 +00007207 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
7208 (__v8df)_mm512_broadcast_f64x4(__A),
7209 (__v8df)__O);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007210}
7211
7212static __inline__ __m512d __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00007213_mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007214{
Craig Topper367c86d2017-01-18 02:17:10 +00007215 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
7216 (__v8df)_mm512_broadcast_f64x4(__A),
7217 (__v8df)_mm512_setzero_pd());
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007218}
7219
7220static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00007221_mm512_broadcast_i32x4(__m128i __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007222{
Craig Topper367c86d2017-01-18 02:17:10 +00007223 return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
7224 0, 1, 2, 3, 0, 1, 2, 3,
7225 0, 1, 2, 3, 0, 1, 2, 3);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007226}
7227
7228static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00007229_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007230{
Craig Topper367c86d2017-01-18 02:17:10 +00007231 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
7232 (__v16si)_mm512_broadcast_i32x4(__A),
7233 (__v16si)__O);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007234}
7235
7236static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00007237_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007238{
Craig Topper367c86d2017-01-18 02:17:10 +00007239 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
7240 (__v16si)_mm512_broadcast_i32x4(__A),
7241 (__v16si)_mm512_setzero_si512());
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007242}
7243
7244static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00007245_mm512_broadcast_i64x4(__m256i __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007246{
Craig Topper367c86d2017-01-18 02:17:10 +00007247 return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
7248 0, 1, 2, 3, 0, 1, 2, 3);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007249}
7250
7251static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00007252_mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007253{
Craig Topper367c86d2017-01-18 02:17:10 +00007254 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
7255 (__v8di)_mm512_broadcast_i64x4(__A),
7256 (__v8di)__O);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007257}
7258
7259static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper367c86d2017-01-18 02:17:10 +00007260_mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007261{
Craig Topper367c86d2017-01-18 02:17:10 +00007262 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
7263 (__v8di)_mm512_broadcast_i64x4(__A),
7264 (__v8di)_mm512_setzero_si512());
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007265}
7266
7267static __inline__ __m512d __DEFAULT_FN_ATTRS
7268_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
7269{
Simon Pilgrimf5a88372016-07-05 12:59:33 +00007270 return (__m512d)__builtin_ia32_selectpd_512(__M,
7271 (__v8df) _mm512_broadcastsd_pd(__A),
7272 (__v8df) __O);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007273}
7274
7275static __inline__ __m512d __DEFAULT_FN_ATTRS
7276_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
7277{
Simon Pilgrimf5a88372016-07-05 12:59:33 +00007278 return (__m512d)__builtin_ia32_selectpd_512(__M,
7279 (__v8df) _mm512_broadcastsd_pd(__A),
7280 (__v8df) _mm512_setzero_pd());
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007281}
7282
7283static __inline__ __m512 __DEFAULT_FN_ATTRS
7284_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
7285{
Simon Pilgrimf5a88372016-07-05 12:59:33 +00007286 return (__m512)__builtin_ia32_selectps_512(__M,
7287 (__v16sf) _mm512_broadcastss_ps(__A),
7288 (__v16sf) __O);
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007289}
7290
7291static __inline__ __m512 __DEFAULT_FN_ATTRS
7292_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
7293{
Simon Pilgrimf5a88372016-07-05 12:59:33 +00007294 return (__m512)__builtin_ia32_selectps_512(__M,
7295 (__v16sf) _mm512_broadcastss_ps(__A),
7296 (__v16sf) _mm512_setzero_ps());
Michael Zuckerman8c2900f2016-04-27 11:43:14 +00007297}
7298
Michael Zuckermane1680612016-04-13 15:02:04 +00007299static __inline__ __m128i __DEFAULT_FN_ATTRS
7300_mm512_cvtsepi32_epi8 (__m512i __A)
7301{
7302 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7303 (__v16qi) _mm_undefined_si128 (),
7304 (__mmask16) -1);
7305}
7306
7307static __inline__ __m128i __DEFAULT_FN_ATTRS
7308_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7309{
7310 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7311 (__v16qi) __O, __M);
7312}
7313
7314static __inline__ __m128i __DEFAULT_FN_ATTRS
7315_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
7316{
7317 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7318 (__v16qi) _mm_setzero_si128 (),
7319 __M);
7320}
7321
7322static __inline__ void __DEFAULT_FN_ATTRS
7323_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7324{
7325 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7326}
7327
7328static __inline__ __m256i __DEFAULT_FN_ATTRS
7329_mm512_cvtsepi32_epi16 (__m512i __A)
7330{
7331 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7332 (__v16hi) _mm256_undefined_si256 (),
7333 (__mmask16) -1);
7334}
7335
7336static __inline__ __m256i __DEFAULT_FN_ATTRS
7337_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7338{
7339 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7340 (__v16hi) __O, __M);
7341}
7342
7343static __inline__ __m256i __DEFAULT_FN_ATTRS
7344_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
7345{
7346 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7347 (__v16hi) _mm256_setzero_si256 (),
7348 __M);
7349}
7350
7351static __inline__ void __DEFAULT_FN_ATTRS
7352_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7353{
7354 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7355}
7356
7357static __inline__ __m128i __DEFAULT_FN_ATTRS
7358_mm512_cvtsepi64_epi8 (__m512i __A)
7359{
7360 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7361 (__v16qi) _mm_undefined_si128 (),
7362 (__mmask8) -1);
7363}
7364
7365static __inline__ __m128i __DEFAULT_FN_ATTRS
7366_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7367{
7368 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7369 (__v16qi) __O, __M);
7370}
7371
7372static __inline__ __m128i __DEFAULT_FN_ATTRS
7373_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
7374{
7375 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7376 (__v16qi) _mm_setzero_si128 (),
7377 __M);
7378}
7379
7380static __inline__ void __DEFAULT_FN_ATTRS
7381_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7382{
7383 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7384}
7385
7386static __inline__ __m256i __DEFAULT_FN_ATTRS
7387_mm512_cvtsepi64_epi32 (__m512i __A)
7388{
Michael Zuckermane1680612016-04-13 15:02:04 +00007389 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7390 (__v8si) _mm256_undefined_si256 (),
7391 (__mmask8) -1);
7392}
7393
7394static __inline__ __m256i __DEFAULT_FN_ATTRS
7395_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7396{
7397 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7398 (__v8si) __O, __M);
7399}
7400
7401static __inline__ __m256i __DEFAULT_FN_ATTRS
7402_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
7403{
7404 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7405 (__v8si) _mm256_setzero_si256 (),
7406 __M);
7407}
7408
7409static __inline__ void __DEFAULT_FN_ATTRS
7410_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
7411{
7412 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7413}
7414
7415static __inline__ __m128i __DEFAULT_FN_ATTRS
7416_mm512_cvtsepi64_epi16 (__m512i __A)
7417{
7418 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7419 (__v8hi) _mm_undefined_si128 (),
7420 (__mmask8) -1);
7421}
7422
7423static __inline__ __m128i __DEFAULT_FN_ATTRS
7424_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7425{
7426 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7427 (__v8hi) __O, __M);
7428}
7429
7430static __inline__ __m128i __DEFAULT_FN_ATTRS
7431_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
7432{
7433 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7434 (__v8hi) _mm_setzero_si128 (),
7435 __M);
7436}
7437
7438static __inline__ void __DEFAULT_FN_ATTRS
7439_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
7440{
7441 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7442}
7443
Michael Zuckermand8715312016-04-14 06:48:09 +00007444static __inline__ __m128i __DEFAULT_FN_ATTRS
7445_mm512_cvtusepi32_epi8 (__m512i __A)
7446{
7447 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7448 (__v16qi) _mm_undefined_si128 (),
7449 (__mmask16) -1);
7450}
7451
7452static __inline__ __m128i __DEFAULT_FN_ATTRS
7453_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7454{
7455 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7456 (__v16qi) __O,
7457 __M);
7458}
7459
7460static __inline__ __m128i __DEFAULT_FN_ATTRS
7461_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
7462{
7463 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7464 (__v16qi) _mm_setzero_si128 (),
7465 __M);
7466}
7467
7468static __inline__ void __DEFAULT_FN_ATTRS
7469_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7470{
7471 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7472}
7473
7474static __inline__ __m256i __DEFAULT_FN_ATTRS
7475_mm512_cvtusepi32_epi16 (__m512i __A)
7476{
7477 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7478 (__v16hi) _mm256_undefined_si256 (),
7479 (__mmask16) -1);
7480}
7481
7482static __inline__ __m256i __DEFAULT_FN_ATTRS
7483_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7484{
7485 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7486 (__v16hi) __O,
7487 __M);
7488}
7489
7490static __inline__ __m256i __DEFAULT_FN_ATTRS
7491_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
7492{
7493 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7494 (__v16hi) _mm256_setzero_si256 (),
7495 __M);
7496}
7497
7498static __inline__ void __DEFAULT_FN_ATTRS
7499_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7500{
7501 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7502}
7503
7504static __inline__ __m128i __DEFAULT_FN_ATTRS
7505_mm512_cvtusepi64_epi8 (__m512i __A)
7506{
7507 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7508 (__v16qi) _mm_undefined_si128 (),
7509 (__mmask8) -1);
7510}
7511
7512static __inline__ __m128i __DEFAULT_FN_ATTRS
7513_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7514{
7515 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7516 (__v16qi) __O,
7517 __M);
7518}
7519
7520static __inline__ __m128i __DEFAULT_FN_ATTRS
7521_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
7522{
7523 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7524 (__v16qi) _mm_setzero_si128 (),
7525 __M);
7526}
7527
7528static __inline__ void __DEFAULT_FN_ATTRS
7529_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7530{
7531 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7532}
7533
7534static __inline__ __m256i __DEFAULT_FN_ATTRS
7535_mm512_cvtusepi64_epi32 (__m512i __A)
7536{
7537 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7538 (__v8si) _mm256_undefined_si256 (),
7539 (__mmask8) -1);
7540}
7541
7542static __inline__ __m256i __DEFAULT_FN_ATTRS
7543_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7544{
7545 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7546 (__v8si) __O, __M);
7547}
7548
7549static __inline__ __m256i __DEFAULT_FN_ATTRS
7550_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
7551{
7552 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7553 (__v8si) _mm256_setzero_si256 (),
7554 __M);
7555}
7556
7557static __inline__ void __DEFAULT_FN_ATTRS
7558_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7559{
7560 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
7561}
7562
7563static __inline__ __m128i __DEFAULT_FN_ATTRS
7564_mm512_cvtusepi64_epi16 (__m512i __A)
7565{
7566 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7567 (__v8hi) _mm_undefined_si128 (),
7568 (__mmask8) -1);
7569}
7570
7571static __inline__ __m128i __DEFAULT_FN_ATTRS
7572_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7573{
7574 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7575 (__v8hi) __O, __M);
7576}
7577
7578static __inline__ __m128i __DEFAULT_FN_ATTRS
7579_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
7580{
7581 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7582 (__v8hi) _mm_setzero_si128 (),
7583 __M);
7584}
7585
7586static __inline__ void __DEFAULT_FN_ATTRS
7587_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7588{
7589 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
7590}
7591
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007592static __inline__ __m128i __DEFAULT_FN_ATTRS
7593_mm512_cvtepi32_epi8 (__m512i __A)
7594{
Craig Topper25de41c2018-05-14 17:50:40 +00007595 return (__m128i)__builtin_convertvector((__v16si)__A, __v16qi);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007596}
7597
7598static __inline__ __m128i __DEFAULT_FN_ATTRS
7599_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7600{
Craig Topper9d146bb2018-05-15 03:17:52 +00007601 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7602 (__v16qi) __O, __M);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007603}
7604
7605static __inline__ __m128i __DEFAULT_FN_ATTRS
7606_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
7607{
Craig Topper9d146bb2018-05-15 03:17:52 +00007608 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7609 (__v16qi) _mm_setzero_si128 (),
7610 __M);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007611}
7612
7613static __inline__ void __DEFAULT_FN_ATTRS
7614_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7615{
7616 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7617}
7618
7619static __inline__ __m256i __DEFAULT_FN_ATTRS
7620_mm512_cvtepi32_epi16 (__m512i __A)
7621{
Craig Topper25de41c2018-05-14 17:50:40 +00007622 return (__m256i)__builtin_convertvector((__v16si)__A, __v16hi);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007623}
7624
7625static __inline__ __m256i __DEFAULT_FN_ATTRS
7626_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7627{
Craig Topper9d146bb2018-05-15 03:17:52 +00007628 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7629 (__v16hi) __O, __M);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007630}
7631
7632static __inline__ __m256i __DEFAULT_FN_ATTRS
7633_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
7634{
Craig Topper9d146bb2018-05-15 03:17:52 +00007635 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7636 (__v16hi) _mm256_setzero_si256 (),
7637 __M);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007638}
7639
7640static __inline__ void __DEFAULT_FN_ATTRS
7641_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
7642{
7643 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
7644}
7645
7646static __inline__ __m128i __DEFAULT_FN_ATTRS
7647_mm512_cvtepi64_epi8 (__m512i __A)
7648{
7649 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7650 (__v16qi) _mm_undefined_si128 (),
7651 (__mmask8) -1);
7652}
7653
7654static __inline__ __m128i __DEFAULT_FN_ATTRS
7655_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7656{
7657 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7658 (__v16qi) __O, __M);
7659}
7660
7661static __inline__ __m128i __DEFAULT_FN_ATTRS
7662_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
7663{
7664 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7665 (__v16qi) _mm_setzero_si128 (),
7666 __M);
7667}
7668
7669static __inline__ void __DEFAULT_FN_ATTRS
7670_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7671{
7672 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7673}
7674
7675static __inline__ __m256i __DEFAULT_FN_ATTRS
7676_mm512_cvtepi64_epi32 (__m512i __A)
7677{
Craig Topper25de41c2018-05-14 17:50:40 +00007678 return (__m256i)__builtin_convertvector((__v8di) __A, __v8si);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007679}
7680
7681static __inline__ __m256i __DEFAULT_FN_ATTRS
7682_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7683{
Craig Topper25de41c2018-05-14 17:50:40 +00007684 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
7685 (__v8si)_mm512_cvtepi64_epi32(__A),
7686 (__v8si)__O);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007687}
7688
7689static __inline__ __m256i __DEFAULT_FN_ATTRS
7690_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
7691{
Craig Topper25de41c2018-05-14 17:50:40 +00007692 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
7693 (__v8si)_mm512_cvtepi64_epi32(__A),
7694 (__v8si)_mm256_setzero_si256());
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007695}
7696
7697static __inline__ void __DEFAULT_FN_ATTRS
7698_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7699{
7700 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7701}
7702
7703static __inline__ __m128i __DEFAULT_FN_ATTRS
7704_mm512_cvtepi64_epi16 (__m512i __A)
7705{
Craig Topper25de41c2018-05-14 17:50:40 +00007706 return (__m128i)__builtin_convertvector((__v8di)__A, __v8hi);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007707}
7708
7709static __inline__ __m128i __DEFAULT_FN_ATTRS
7710_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7711{
Craig Topper9d146bb2018-05-15 03:17:52 +00007712 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7713 (__v8hi) __O, __M);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007714}
7715
7716static __inline__ __m128i __DEFAULT_FN_ATTRS
7717_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
7718{
Craig Topper9d146bb2018-05-15 03:17:52 +00007719 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7720 (__v8hi) _mm_setzero_si128 (),
7721 __M);
Michael Zuckerman0a3508a2016-04-14 07:56:51 +00007722}
7723
7724static __inline__ void __DEFAULT_FN_ATTRS
7725_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7726{
7727 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7728}
7729
Craig Topperc6338672018-05-31 00:51:20 +00007730#define _mm512_extracti32x4_epi32(A, imm) \
Craig Topper93ffabd2016-10-31 04:30:56 +00007731 (__m128i)__builtin_shufflevector((__v16si)(__m512i)(A), \
7732 (__v16si)_mm512_undefined_epi32(), \
7733 0 + ((imm) & 0x3) * 4, \
7734 1 + ((imm) & 0x3) * 4, \
7735 2 + ((imm) & 0x3) * 4, \
Craig Topperc6338672018-05-31 00:51:20 +00007736 3 + ((imm) & 0x3) * 4)
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007737
Craig Topperc6338672018-05-31 00:51:20 +00007738#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
Craig Topperac9959e2017-04-03 03:41:29 +00007739 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
Craig Topper93ffabd2016-10-31 04:30:56 +00007740 (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
Craig Toppercbf39292018-05-31 01:24:40 +00007741 (__v4si)(__m128i)(W))
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007742
Craig Topperc6338672018-05-31 00:51:20 +00007743#define _mm512_maskz_extracti32x4_epi32(U, A, imm) \
Craig Topperac9959e2017-04-03 03:41:29 +00007744 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
Craig Topper93ffabd2016-10-31 04:30:56 +00007745 (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00007746 (__v4si)_mm_setzero_si128())
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007747
Craig Topperc6338672018-05-31 00:51:20 +00007748#define _mm512_extracti64x4_epi64(A, imm) \
Craig Topper93ffabd2016-10-31 04:30:56 +00007749 (__m256i)__builtin_shufflevector((__v8di)(__m512i)(A), \
7750 (__v8di)_mm512_undefined_epi32(), \
7751 ((imm) & 1) ? 4 : 0, \
7752 ((imm) & 1) ? 5 : 1, \
7753 ((imm) & 1) ? 6 : 2, \
Craig Topperc6338672018-05-31 00:51:20 +00007754 ((imm) & 1) ? 7 : 3)
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007755
Craig Topperc6338672018-05-31 00:51:20 +00007756#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
Craig Topperac9959e2017-04-03 03:41:29 +00007757 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
Craig Topper93ffabd2016-10-31 04:30:56 +00007758 (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
Craig Toppercbf39292018-05-31 01:24:40 +00007759 (__v4di)(__m256i)(W))
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007760
Craig Topperc6338672018-05-31 00:51:20 +00007761#define _mm512_maskz_extracti64x4_epi64(U, A, imm) \
Craig Topperac9959e2017-04-03 03:41:29 +00007762 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
Craig Topper93ffabd2016-10-31 04:30:56 +00007763 (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00007764 (__v4di)_mm256_setzero_si256())
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007765
Craig Topperc6338672018-05-31 00:51:20 +00007766#define _mm512_insertf64x4(A, B, imm) \
Craig Topper08bf53f2016-11-01 05:47:56 +00007767 (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
7768 (__v8df)_mm512_castpd256_pd512((__m256d)(B)), \
7769 ((imm) & 0x1) ? 0 : 8, \
7770 ((imm) & 0x1) ? 1 : 9, \
7771 ((imm) & 0x1) ? 2 : 10, \
7772 ((imm) & 0x1) ? 3 : 11, \
7773 ((imm) & 0x1) ? 8 : 4, \
7774 ((imm) & 0x1) ? 9 : 5, \
7775 ((imm) & 0x1) ? 10 : 6, \
Craig Topperc6338672018-05-31 00:51:20 +00007776 ((imm) & 0x1) ? 11 : 7)
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007777
Craig Topperc6338672018-05-31 00:51:20 +00007778#define _mm512_mask_insertf64x4(W, U, A, B, imm) \
Craig Topper08bf53f2016-11-01 05:47:56 +00007779 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7780 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
Craig Toppercbf39292018-05-31 01:24:40 +00007781 (__v8df)(__m512d)(W))
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007782
Craig Topperc6338672018-05-31 00:51:20 +00007783#define _mm512_maskz_insertf64x4(U, A, B, imm) \
Craig Topper08bf53f2016-11-01 05:47:56 +00007784 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7785 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00007786 (__v8df)_mm512_setzero_pd())
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007787
Craig Topperc6338672018-05-31 00:51:20 +00007788#define _mm512_inserti64x4(A, B, imm) \
Craig Topper08bf53f2016-11-01 05:47:56 +00007789 (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
7790 (__v8di)_mm512_castsi256_si512((__m256i)(B)), \
7791 ((imm) & 0x1) ? 0 : 8, \
7792 ((imm) & 0x1) ? 1 : 9, \
7793 ((imm) & 0x1) ? 2 : 10, \
7794 ((imm) & 0x1) ? 3 : 11, \
7795 ((imm) & 0x1) ? 8 : 4, \
7796 ((imm) & 0x1) ? 9 : 5, \
7797 ((imm) & 0x1) ? 10 : 6, \
Craig Topperc6338672018-05-31 00:51:20 +00007798 ((imm) & 0x1) ? 11 : 7)
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007799
Craig Topperc6338672018-05-31 00:51:20 +00007800#define _mm512_mask_inserti64x4(W, U, A, B, imm) \
Craig Topper08bf53f2016-11-01 05:47:56 +00007801 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7802 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
Craig Toppercbf39292018-05-31 01:24:40 +00007803 (__v8di)(__m512i)(W))
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007804
Craig Topperc6338672018-05-31 00:51:20 +00007805#define _mm512_maskz_inserti64x4(U, A, B, imm) \
Craig Topper08bf53f2016-11-01 05:47:56 +00007806 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7807 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00007808 (__v8di)_mm512_setzero_si512())
Michael Zuckermanef2979a2016-04-19 15:18:23 +00007809
Craig Topperc6338672018-05-31 00:51:20 +00007810#define _mm512_insertf32x4(A, B, imm) \
Craig Topper08bf53f2016-11-01 05:47:56 +00007811 (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \
7812 (__v16sf)_mm512_castps128_ps512((__m128)(B)),\
7813 (((imm) & 0x3) == 0) ? 16 : 0, \
7814 (((imm) & 0x3) == 0) ? 17 : 1, \
7815 (((imm) & 0x3) == 0) ? 18 : 2, \
7816 (((imm) & 0x3) == 0) ? 19 : 3, \
7817 (((imm) & 0x3) == 1) ? 16 : 4, \
7818 (((imm) & 0x3) == 1) ? 17 : 5, \
7819 (((imm) & 0x3) == 1) ? 18 : 6, \
7820 (((imm) & 0x3) == 1) ? 19 : 7, \
7821 (((imm) & 0x3) == 2) ? 16 : 8, \
7822 (((imm) & 0x3) == 2) ? 17 : 9, \
7823 (((imm) & 0x3) == 2) ? 18 : 10, \
7824 (((imm) & 0x3) == 2) ? 19 : 11, \
7825 (((imm) & 0x3) == 3) ? 16 : 12, \
7826 (((imm) & 0x3) == 3) ? 17 : 13, \
7827 (((imm) & 0x3) == 3) ? 18 : 14, \
Craig Topperc6338672018-05-31 00:51:20 +00007828 (((imm) & 0x3) == 3) ? 19 : 15)
Craig Topperdca1f232016-05-15 21:26:20 +00007829
Craig Topperc6338672018-05-31 00:51:20 +00007830#define _mm512_mask_insertf32x4(W, U, A, B, imm) \
Craig Topper08bf53f2016-11-01 05:47:56 +00007831 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7832 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
Craig Toppercbf39292018-05-31 01:24:40 +00007833 (__v16sf)(__m512)(W))
Craig Topperdca1f232016-05-15 21:26:20 +00007834
Craig Topperc6338672018-05-31 00:51:20 +00007835#define _mm512_maskz_insertf32x4(U, A, B, imm) \
Craig Topper08bf53f2016-11-01 05:47:56 +00007836 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7837 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00007838 (__v16sf)_mm512_setzero_ps())
Craig Topperdca1f232016-05-15 21:26:20 +00007839
Craig Topperc6338672018-05-31 00:51:20 +00007840#define _mm512_inserti32x4(A, B, imm) \
Craig Topper08bf53f2016-11-01 05:47:56 +00007841 (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
7842 (__v16si)_mm512_castsi128_si512((__m128i)(B)),\
7843 (((imm) & 0x3) == 0) ? 16 : 0, \
7844 (((imm) & 0x3) == 0) ? 17 : 1, \
7845 (((imm) & 0x3) == 0) ? 18 : 2, \
7846 (((imm) & 0x3) == 0) ? 19 : 3, \
7847 (((imm) & 0x3) == 1) ? 16 : 4, \
7848 (((imm) & 0x3) == 1) ? 17 : 5, \
7849 (((imm) & 0x3) == 1) ? 18 : 6, \
7850 (((imm) & 0x3) == 1) ? 19 : 7, \
7851 (((imm) & 0x3) == 2) ? 16 : 8, \
7852 (((imm) & 0x3) == 2) ? 17 : 9, \
7853 (((imm) & 0x3) == 2) ? 18 : 10, \
7854 (((imm) & 0x3) == 2) ? 19 : 11, \
7855 (((imm) & 0x3) == 3) ? 16 : 12, \
7856 (((imm) & 0x3) == 3) ? 17 : 13, \
7857 (((imm) & 0x3) == 3) ? 18 : 14, \
Craig Topperc6338672018-05-31 00:51:20 +00007858 (((imm) & 0x3) == 3) ? 19 : 15)
Craig Topperdca1f232016-05-15 21:26:20 +00007859
Craig Topperc6338672018-05-31 00:51:20 +00007860#define _mm512_mask_inserti32x4(W, U, A, B, imm) \
Craig Topper08bf53f2016-11-01 05:47:56 +00007861 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7862 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
Craig Toppercbf39292018-05-31 01:24:40 +00007863 (__v16si)(__m512i)(W))
Craig Topperdca1f232016-05-15 21:26:20 +00007864
Craig Topperc6338672018-05-31 00:51:20 +00007865#define _mm512_maskz_inserti32x4(U, A, B, imm) \
Craig Topper08bf53f2016-11-01 05:47:56 +00007866 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7867 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
Craig Topperc6338672018-05-31 00:51:20 +00007868 (__v16si)_mm512_setzero_si512())
Craig Topperdca1f232016-05-15 21:26:20 +00007869
Craig Topperc6338672018-05-31 00:51:20 +00007870#define _mm512_getmant_round_pd(A, B, C, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00007871 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7872 (int)(((C)<<2) | (B)), \
7873 (__v8df)_mm512_undefined_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00007874 (__mmask8)-1, (int)(R))
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007875
Craig Topperc6338672018-05-31 00:51:20 +00007876#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00007877 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7878 (int)(((C)<<2) | (B)), \
7879 (__v8df)(__m512d)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00007880 (__mmask8)(U), (int)(R))
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007881
Craig Topperc6338672018-05-31 00:51:20 +00007882#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00007883 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7884 (int)(((C)<<2) | (B)), \
7885 (__v8df)_mm512_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00007886 (__mmask8)(U), (int)(R))
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007887
Craig Topperc6338672018-05-31 00:51:20 +00007888#define _mm512_getmant_pd(A, B, C) \
Craig Topper8c18e112016-05-17 04:41:50 +00007889 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7890 (int)(((C)<<2) | (B)), \
7891 (__v8df)_mm512_setzero_pd(), \
7892 (__mmask8)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00007893 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007894
Craig Topperc6338672018-05-31 00:51:20 +00007895#define _mm512_mask_getmant_pd(W, U, A, B, C) \
Craig Topper8c18e112016-05-17 04:41:50 +00007896 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7897 (int)(((C)<<2) | (B)), \
7898 (__v8df)(__m512d)(W), \
7899 (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00007900 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007901
Craig Topperc6338672018-05-31 00:51:20 +00007902#define _mm512_maskz_getmant_pd(U, A, B, C) \
Craig Topper8c18e112016-05-17 04:41:50 +00007903 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7904 (int)(((C)<<2) | (B)), \
7905 (__v8df)_mm512_setzero_pd(), \
7906 (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00007907 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007908
Craig Topperc6338672018-05-31 00:51:20 +00007909#define _mm512_getmant_round_ps(A, B, C, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00007910 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7911 (int)(((C)<<2) | (B)), \
7912 (__v16sf)_mm512_undefined_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00007913 (__mmask16)-1, (int)(R))
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007914
Craig Topperc6338672018-05-31 00:51:20 +00007915#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00007916 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7917 (int)(((C)<<2) | (B)), \
7918 (__v16sf)(__m512)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00007919 (__mmask16)(U), (int)(R))
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007920
Craig Topperc6338672018-05-31 00:51:20 +00007921#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00007922 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7923 (int)(((C)<<2) | (B)), \
7924 (__v16sf)_mm512_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00007925 (__mmask16)(U), (int)(R))
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007926
Craig Topperc6338672018-05-31 00:51:20 +00007927#define _mm512_getmant_ps(A, B, C) \
Craig Topper8c18e112016-05-17 04:41:50 +00007928 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7929 (int)(((C)<<2)|(B)), \
7930 (__v16sf)_mm512_undefined_ps(), \
7931 (__mmask16)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00007932 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007933
Craig Topperc6338672018-05-31 00:51:20 +00007934#define _mm512_mask_getmant_ps(W, U, A, B, C) \
Craig Topper8c18e112016-05-17 04:41:50 +00007935 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7936 (int)(((C)<<2)|(B)), \
7937 (__v16sf)(__m512)(W), \
7938 (__mmask16)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00007939 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007940
Craig Topperc6338672018-05-31 00:51:20 +00007941#define _mm512_maskz_getmant_ps(U, A, B, C) \
Craig Topper8c18e112016-05-17 04:41:50 +00007942 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7943 (int)(((C)<<2)|(B)), \
7944 (__v16sf)_mm512_setzero_ps(), \
7945 (__mmask16)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00007946 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007947
Craig Topperc6338672018-05-31 00:51:20 +00007948#define _mm512_getexp_round_pd(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00007949 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7950 (__v8df)_mm512_undefined_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00007951 (__mmask8)-1, (int)(R))
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007952
Craig Topperc6338672018-05-31 00:51:20 +00007953#define _mm512_mask_getexp_round_pd(W, U, A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00007954 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7955 (__v8df)(__m512d)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00007956 (__mmask8)(U), (int)(R))
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007957
Craig Topperc6338672018-05-31 00:51:20 +00007958#define _mm512_maskz_getexp_round_pd(U, A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00007959 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7960 (__v8df)_mm512_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00007961 (__mmask8)(U), (int)(R))
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007962
7963static __inline__ __m512d __DEFAULT_FN_ATTRS
7964_mm512_getexp_pd (__m512d __A)
7965{
7966 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7967 (__v8df) _mm512_undefined_pd (),
7968 (__mmask8) -1,
7969 _MM_FROUND_CUR_DIRECTION);
7970}
7971
7972static __inline__ __m512d __DEFAULT_FN_ATTRS
7973_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
7974{
7975 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7976 (__v8df) __W,
7977 (__mmask8) __U,
7978 _MM_FROUND_CUR_DIRECTION);
7979}
7980
7981static __inline__ __m512d __DEFAULT_FN_ATTRS
7982_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
7983{
7984 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7985 (__v8df) _mm512_setzero_pd (),
7986 (__mmask8) __U,
7987 _MM_FROUND_CUR_DIRECTION);
7988}
7989
Craig Topperc6338672018-05-31 00:51:20 +00007990#define _mm512_getexp_round_ps(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00007991 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7992 (__v16sf)_mm512_undefined_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00007993 (__mmask16)-1, (int)(R))
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007994
Craig Topperc6338672018-05-31 00:51:20 +00007995#define _mm512_mask_getexp_round_ps(W, U, A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00007996 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7997 (__v16sf)(__m512)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00007998 (__mmask16)(U), (int)(R))
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00007999
Craig Topperc6338672018-05-31 00:51:20 +00008000#define _mm512_maskz_getexp_round_ps(U, A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00008001 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8002 (__v16sf)_mm512_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00008003 (__mmask16)(U), (int)(R))
Michael Zuckerman6fa512c2016-04-19 17:10:29 +00008004
8005static __inline__ __m512 __DEFAULT_FN_ATTRS
8006_mm512_getexp_ps (__m512 __A)
8007{
8008 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8009 (__v16sf) _mm512_undefined_ps (),
8010 (__mmask16) -1,
8011 _MM_FROUND_CUR_DIRECTION);
8012}
8013
8014static __inline__ __m512 __DEFAULT_FN_ATTRS
8015_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
8016{
8017 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8018 (__v16sf) __W,
8019 (__mmask16) __U,
8020 _MM_FROUND_CUR_DIRECTION);
8021}
8022
8023static __inline__ __m512 __DEFAULT_FN_ATTRS
8024_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
8025{
8026 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8027 (__v16sf) _mm512_setzero_ps (),
8028 (__mmask16) __U,
8029 _MM_FROUND_CUR_DIRECTION);
8030}
8031
Craig Topperc6338672018-05-31 00:51:20 +00008032#define _mm512_i64gather_ps(index, addr, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008033 (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
8034 (float const *)(addr), \
8035 (__v8di)(__m512i)(index), (__mmask8)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00008036 (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00008037
Craig Topperc6338672018-05-31 00:51:20 +00008038#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \
Craig Topper2e5058c2017-03-17 05:14:37 +00008039 (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
8040 (float const *)(addr), \
8041 (__v8di)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00008042 (__mmask8)(mask), (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00008043
Craig Topperc6338672018-05-31 00:51:20 +00008044#define _mm512_i64gather_epi32(index, addr, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008045 (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_ps(), \
8046 (int const *)(addr), \
8047 (__v8di)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00008048 (__mmask8)-1, (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00008049
Craig Topperc6338672018-05-31 00:51:20 +00008050#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008051 (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
8052 (int const *)(addr), \
8053 (__v8di)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00008054 (__mmask8)(mask), (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00008055
Craig Topperc6338672018-05-31 00:51:20 +00008056#define _mm512_i64gather_pd(index, addr, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008057 (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
8058 (double const *)(addr), \
8059 (__v8di)(__m512i)(index), (__mmask8)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00008060 (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00008061
Craig Topperc6338672018-05-31 00:51:20 +00008062#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008063 (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
8064 (double const *)(addr), \
8065 (__v8di)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00008066 (__mmask8)(mask), (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00008067
Craig Topperc6338672018-05-31 00:51:20 +00008068#define _mm512_i64gather_epi64(index, addr, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008069 (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_pd(), \
8070 (long long const *)(addr), \
8071 (__v8di)(__m512i)(index), (__mmask8)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00008072 (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00008073
Craig Topperc6338672018-05-31 00:51:20 +00008074#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008075 (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
8076 (long long const *)(addr), \
8077 (__v8di)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00008078 (__mmask8)(mask), (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00008079
Craig Topperc6338672018-05-31 00:51:20 +00008080#define _mm512_i32gather_ps(index, addr, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008081 (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
8082 (float const *)(addr), \
8083 (__v16sf)(__m512)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00008084 (__mmask16)-1, (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00008085
Craig Topperc6338672018-05-31 00:51:20 +00008086#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008087 (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
8088 (float const *)(addr), \
8089 (__v16sf)(__m512)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00008090 (__mmask16)(mask), (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00008091
Craig Topperc6338672018-05-31 00:51:20 +00008092#define _mm512_i32gather_epi32(index, addr, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008093 (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
8094 (int const *)(addr), \
8095 (__v16si)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00008096 (__mmask16)-1, (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00008097
Craig Topperc6338672018-05-31 00:51:20 +00008098#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008099 (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
8100 (int const *)(addr), \
8101 (__v16si)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00008102 (__mmask16)(mask), (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00008103
Craig Topperc6338672018-05-31 00:51:20 +00008104#define _mm512_i32gather_pd(index, addr, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008105 (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
8106 (double const *)(addr), \
8107 (__v8si)(__m256i)(index), (__mmask8)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00008108 (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00008109
Craig Topperc6338672018-05-31 00:51:20 +00008110#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008111 (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
8112 (double const *)(addr), \
8113 (__v8si)(__m256i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00008114 (__mmask8)(mask), (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00008115
Craig Topperc6338672018-05-31 00:51:20 +00008116#define _mm512_i32gather_epi64(index, addr, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008117 (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
8118 (long long const *)(addr), \
8119 (__v8si)(__m256i)(index), (__mmask8)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00008120 (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00008121
Craig Topperc6338672018-05-31 00:51:20 +00008122#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008123 (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
8124 (long long const *)(addr), \
8125 (__v8si)(__m256i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00008126 (__mmask8)(mask), (int)(scale))
Michael Zuckerman4fa96af2016-04-21 12:47:27 +00008127
Craig Topperc6338672018-05-31 00:51:20 +00008128#define _mm512_i64scatter_ps(addr, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008129 __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)-1, \
8130 (__v8di)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00008131 (__v8sf)(__m256)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008132
Craig Topperc6338672018-05-31 00:51:20 +00008133#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008134 __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)(mask), \
8135 (__v8di)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00008136 (__v8sf)(__m256)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008137
Craig Topperc6338672018-05-31 00:51:20 +00008138#define _mm512_i64scatter_epi32(addr, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008139 __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)-1, \
8140 (__v8di)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00008141 (__v8si)(__m256i)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008142
Craig Topperc6338672018-05-31 00:51:20 +00008143#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008144 __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)(mask), \
8145 (__v8di)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00008146 (__v8si)(__m256i)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008147
Craig Topperc6338672018-05-31 00:51:20 +00008148#define _mm512_i64scatter_pd(addr, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008149 __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)-1, \
8150 (__v8di)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00008151 (__v8df)(__m512d)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008152
Craig Topperc6338672018-05-31 00:51:20 +00008153#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008154 __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)(mask), \
8155 (__v8di)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00008156 (__v8df)(__m512d)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008157
Craig Topperc6338672018-05-31 00:51:20 +00008158#define _mm512_i64scatter_epi64(addr, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008159 __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)-1, \
8160 (__v8di)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00008161 (__v8di)(__m512i)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008162
Craig Topperc6338672018-05-31 00:51:20 +00008163#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008164 __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)(mask), \
8165 (__v8di)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00008166 (__v8di)(__m512i)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008167
Craig Topperc6338672018-05-31 00:51:20 +00008168#define _mm512_i32scatter_ps(addr, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008169 __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)-1, \
8170 (__v16si)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00008171 (__v16sf)(__m512)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008172
Craig Topperc6338672018-05-31 00:51:20 +00008173#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008174 __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)(mask), \
8175 (__v16si)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00008176 (__v16sf)(__m512)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008177
Craig Topperc6338672018-05-31 00:51:20 +00008178#define _mm512_i32scatter_epi32(addr, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008179 __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)-1, \
8180 (__v16si)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00008181 (__v16si)(__m512i)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008182
Craig Topperc6338672018-05-31 00:51:20 +00008183#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008184 __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)(mask), \
8185 (__v16si)(__m512i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00008186 (__v16si)(__m512i)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008187
Craig Topperc6338672018-05-31 00:51:20 +00008188#define _mm512_i32scatter_pd(addr, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008189 __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)-1, \
8190 (__v8si)(__m256i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00008191 (__v8df)(__m512d)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008192
Craig Topperc6338672018-05-31 00:51:20 +00008193#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008194 __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)(mask), \
8195 (__v8si)(__m256i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00008196 (__v8df)(__m512d)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008197
Craig Topperc6338672018-05-31 00:51:20 +00008198#define _mm512_i32scatter_epi64(addr, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008199 __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)-1, \
8200 (__v8si)(__m256i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00008201 (__v8di)(__m512i)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008202
Craig Topperc6338672018-05-31 00:51:20 +00008203#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
Craig Topper8c18e112016-05-17 04:41:50 +00008204 __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)(mask), \
8205 (__v8si)(__m256i)(index), \
Craig Topperc6338672018-05-31 00:51:20 +00008206 (__v8di)(__m512i)(v1), (int)(scale))
Michael Zuckermanfcf32c22016-04-25 13:01:40 +00008207
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008208static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008209_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008210{
Gabor Buella70d8d512018-05-30 15:27:49 +00008211 __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
8212 (__v4sf) __A,
8213 (__v4sf) __B);
8214 __W[0] = (__U & 1) ? __Z[0] : __W[0];
8215 return __W;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008216}
8217
Craig Topperc6338672018-05-31 00:51:20 +00008218#define _mm_mask_fmadd_round_ss(W, U, A, B, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00008219 (__m128d)__builtin_ia32_vfmaddss3_mask((__v2df)(__m128d)(W), \
8220 (__v2df)(__m128d)(A), \
8221 (__v2df)(__m128d)(B), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00008222 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008223
8224static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008225_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008226{
Gabor Buella70d8d512018-05-30 15:27:49 +00008227 __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A,
8228 (__v4sf) __B,
8229 (__v4sf) __C);
8230 __A[0] = (__U & 1) ? __Z[0] : 0;
8231 return __A;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008232}
8233
Craig Topperc6338672018-05-31 00:51:20 +00008234#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00008235 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
8236 (__v4sf)(__m128)(B), \
8237 (__v4sf)(__m128)(C), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00008238 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008239
8240static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008241_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008242{
Gabor Buella70d8d512018-05-30 15:27:49 +00008243 __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
8244 (__v4sf) __X,
8245 (__v4sf) __Y);
8246 __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
8247 return __Y;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008248}
8249
Craig Topperc6338672018-05-31 00:51:20 +00008250#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00008251 (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
8252 (__v4sf)(__m128)(X), \
8253 (__v4sf)(__m128)(Y), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00008254 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008255
8256static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008257_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008258{
Gabor Buella70d8d512018-05-30 15:27:49 +00008259 __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
8260 (__v4sf) __A,
8261 -(__v4sf) __B);
8262 __W[0] = (__U & 1) ? __Z[0] : __W[0];
8263 return __W;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008264}
8265
Craig Topperc6338672018-05-31 00:51:20 +00008266#define _mm_mask_fmsub_round_ss(W, U, A, B, R) \
Craig Topperf2023652016-10-26 05:35:38 +00008267 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8268 (__v4sf)(__m128)(A), \
Gabor Buella70d8d512018-05-30 15:27:49 +00008269 -(__v4sf)(__m128)(B), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00008270 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008271
8272static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008273_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008274{
Gabor Buella70d8d512018-05-30 15:27:49 +00008275 __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A,
8276 (__v4sf) __B,
8277 -(__v4sf) __C);
8278 __A[0] = (__U & 1) ? __Z[0] : 0;
8279 return __A;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008280}
8281
Craig Topperc6338672018-05-31 00:51:20 +00008282#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00008283 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
8284 (__v4sf)(__m128)(B), \
8285 -(__v4sf)(__m128)(C), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00008286 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008287
8288static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008289_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008290{
Gabor Buella70d8d512018-05-30 15:27:49 +00008291 __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
8292 (__v4sf) __X,
8293 -(__v4sf) __Y);
8294 __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
8295 return __Y;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008296}
8297
Craig Topperc6338672018-05-31 00:51:20 +00008298#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \
Craig Topper2c8f49e2016-11-12 23:24:34 +00008299 (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
Craig Topper8c18e112016-05-17 04:41:50 +00008300 (__v4sf)(__m128)(X), \
Craig Topper2c8f49e2016-11-12 23:24:34 +00008301 (__v4sf)(__m128)(Y), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00008302 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008303
8304static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008305_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008306{
Gabor Buella70d8d512018-05-30 15:27:49 +00008307 __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
8308 -(__v4sf) __A,
8309 (__v4sf) __B);
8310 __W[0] = (__U & 1) ? __Z[0] : __W[0];
8311 return __W;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008312}
8313
Craig Topperc6338672018-05-31 00:51:20 +00008314#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \
Craig Topperf2023652016-10-26 05:35:38 +00008315 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8316 -(__v4sf)(__m128)(A), \
8317 (__v4sf)(__m128)(B), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00008318 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008319
8320static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008321_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008322{
Gabor Buella70d8d512018-05-30 15:27:49 +00008323 __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A,
8324 -(__v4sf) __B,
8325 (__v4sf) __C);
8326 __A[0] = (__U & 1) ? __Z[0] : 0;
8327 return __A;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008328}
8329
Craig Topperc6338672018-05-31 00:51:20 +00008330#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00008331 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
8332 -(__v4sf)(__m128)(B), \
Craig Topper8c18e112016-05-17 04:41:50 +00008333 (__v4sf)(__m128)(C), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00008334 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008335
8336static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008337_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008338{
Gabor Buella70d8d512018-05-30 15:27:49 +00008339 __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
8340 -(__v4sf) __X,
8341 (__v4sf) __Y);
8342 __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
8343 return __Y;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008344}
8345
Craig Topperc6338672018-05-31 00:51:20 +00008346#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00008347 (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
8348 -(__v4sf)(__m128)(X), \
Craig Topper8c18e112016-05-17 04:41:50 +00008349 (__v4sf)(__m128)(Y), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00008350 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008351
8352static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008353_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008354{
Gabor Buella70d8d512018-05-30 15:27:49 +00008355 __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
8356 -(__v4sf) __A,
8357 -(__v4sf) __B);
8358 __W[0] = (__U & 1) ? __Z[0] : __W[0];
8359 return __W;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008360}
8361
Craig Topperc6338672018-05-31 00:51:20 +00008362#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \
Craig Topperf2023652016-10-26 05:35:38 +00008363 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8364 -(__v4sf)(__m128)(A), \
8365 -(__v4sf)(__m128)(B), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00008366 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008367
8368static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008369_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008370{
Gabor Buella70d8d512018-05-30 15:27:49 +00008371 __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A,
8372 -(__v4sf) __B,
8373 -(__v4sf) __C);
8374 __A[0] = (__U & 1) ? __Z[0] : 0;
8375 return __A;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008376}
8377
Craig Topperc6338672018-05-31 00:51:20 +00008378#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00008379 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
8380 -(__v4sf)(__m128)(B), \
Craig Topper8c18e112016-05-17 04:41:50 +00008381 -(__v4sf)(__m128)(C), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00008382 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008383
8384static __inline__ __m128 __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008385_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008386{
Gabor Buella70d8d512018-05-30 15:27:49 +00008387 __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
8388 -(__v4sf) __X,
8389 -(__v4sf) __Y);
8390 __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
8391 return __Y;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008392}
8393
Craig Topperc6338672018-05-31 00:51:20 +00008394#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00008395 (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
8396 -(__v4sf)(__m128)(X), \
Craig Topper2c8f49e2016-11-12 23:24:34 +00008397 (__v4sf)(__m128)(Y), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00008398 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008399
Craig Topper58187d32016-05-17 04:41:29 +00008400static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008401_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008402{
Gabor Buella70d8d512018-05-30 15:27:49 +00008403 __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
8404 (__v2df) __A,
8405 (__v2df) __B);
8406 __W[0] = (__U & 1) ? __Z[0] : __W[0];
8407 return __W;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008408}
8409
Craig Topperc6338672018-05-31 00:51:20 +00008410#define _mm_mask_fmadd_round_sd(W, U, A, B, R) \
Craig Topperf2023652016-10-26 05:35:38 +00008411 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8412 (__v2df)(__m128d)(A), \
8413 (__v2df)(__m128d)(B), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00008414 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008415
Craig Topper58187d32016-05-17 04:41:29 +00008416static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008417_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008418{
Gabor Buella70d8d512018-05-30 15:27:49 +00008419 __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A,
8420 (__v2df) __B,
8421 (__v2df) __C);
8422 __A[0] = (__U & 1) ? __Z[0] : 0;
8423 return __A;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008424}
8425
Craig Topperc6338672018-05-31 00:51:20 +00008426#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00008427 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8428 (__v2df)(__m128d)(B), \
8429 (__v2df)(__m128d)(C), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00008430 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008431
Craig Topper58187d32016-05-17 04:41:29 +00008432static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008433_mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008434{
Gabor Buella70d8d512018-05-30 15:27:49 +00008435 __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
8436 (__v2df) __X,
8437 (__v2df) __Y);
8438 __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
8439 return __Y;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008440}
8441
Craig Topperc6338672018-05-31 00:51:20 +00008442#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00008443 (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8444 (__v2df)(__m128d)(X), \
8445 (__v2df)(__m128d)(Y), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00008446 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008447
Craig Topper58187d32016-05-17 04:41:29 +00008448static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008449_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008450{
Gabor Buella70d8d512018-05-30 15:27:49 +00008451 __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
8452 (__v2df) __A,
8453 -(__v2df) __B);
8454 __W[0] = (__U & 1) ? __Z[0] : __W[0];
8455 return __W;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008456}
8457
Craig Topperc6338672018-05-31 00:51:20 +00008458#define _mm_mask_fmsub_round_sd(W, U, A, B, R) \
Craig Topperf2023652016-10-26 05:35:38 +00008459 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8460 (__v2df)(__m128d)(A), \
8461 -(__v2df)(__m128d)(B), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00008462 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008463
Craig Topper58187d32016-05-17 04:41:29 +00008464static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008465_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008466{
Gabor Buella70d8d512018-05-30 15:27:49 +00008467 __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A,
8468 (__v2df) __B,
8469 -(__v2df) __C);
8470 __A[0] = (__U & 1) ? __Z[0] : 0;
8471 return __A;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008472}
8473
Craig Topperc6338672018-05-31 00:51:20 +00008474#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00008475 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8476 (__v2df)(__m128d)(B), \
8477 -(__v2df)(__m128d)(C), \
Craig Topperc6338672018-05-31 00:51:20 +00008478 (__mmask8)(U), (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008479
Craig Topper58187d32016-05-17 04:41:29 +00008480static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008481_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008482{
Gabor Buella70d8d512018-05-30 15:27:49 +00008483 __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
8484 (__v2df) __X,
8485 -(__v2df) __Y);
8486 __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
8487 return __Y;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008488}
8489
Craig Topperc6338672018-05-31 00:51:20 +00008490#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \
Craig Topper2c8f49e2016-11-12 23:24:34 +00008491 (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
Craig Topper8c18e112016-05-17 04:41:50 +00008492 (__v2df)(__m128d)(X), \
Craig Topper2c8f49e2016-11-12 23:24:34 +00008493 (__v2df)(__m128d)(Y), \
Craig Topperc6338672018-05-31 00:51:20 +00008494 (__mmask8)(U), (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008495
Craig Topper58187d32016-05-17 04:41:29 +00008496static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008497_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008498{
Gabor Buella70d8d512018-05-30 15:27:49 +00008499 __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
8500 -(__v2df) __A,
8501 (__v2df) __B);
8502 __W[0] = (__U & 1) ? __Z[0] : __W[0];
8503 return __W;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008504}
8505
Craig Topperc6338672018-05-31 00:51:20 +00008506#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \
Craig Topperf2023652016-10-26 05:35:38 +00008507 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8508 -(__v2df)(__m128d)(A), \
8509 (__v2df)(__m128d)(B), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00008510 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008511
Craig Topper58187d32016-05-17 04:41:29 +00008512static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008513_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008514{
Gabor Buella70d8d512018-05-30 15:27:49 +00008515 __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A,
8516 -(__v2df) __B,
8517 (__v2df) __C);
8518 __A[0] = (__U & 1) ? __Z[0] : 0;
8519 return __A;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008520}
8521
Craig Topperc6338672018-05-31 00:51:20 +00008522#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00008523 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8524 -(__v2df)(__m128d)(B), \
Craig Topper8c18e112016-05-17 04:41:50 +00008525 (__v2df)(__m128d)(C), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00008526 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008527
Craig Topper58187d32016-05-17 04:41:29 +00008528static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008529_mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008530{
Gabor Buella70d8d512018-05-30 15:27:49 +00008531 __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
8532 -(__v2df) __X,
8533 (__v2df) __Y);
8534 __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
8535 return __Y;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008536}
8537
Craig Topperc6338672018-05-31 00:51:20 +00008538#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00008539 (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8540 -(__v2df)(__m128d)(X), \
Craig Topper8c18e112016-05-17 04:41:50 +00008541 (__v2df)(__m128d)(Y), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00008542 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008543
Craig Topper58187d32016-05-17 04:41:29 +00008544static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008545_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008546{
Gabor Buella70d8d512018-05-30 15:27:49 +00008547 __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
8548 -(__v2df) __A,
8549 -(__v2df) __B);
8550 __W[0] = (__U & 1) ? __Z[0] : __W[0];
8551 return __W;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008552}
8553
Craig Topperc6338672018-05-31 00:51:20 +00008554#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \
Craig Topperf2023652016-10-26 05:35:38 +00008555 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8556 -(__v2df)(__m128d)(A), \
8557 -(__v2df)(__m128d)(B), (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00008558 (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008559
Craig Topper58187d32016-05-17 04:41:29 +00008560static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008561_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008562{
Gabor Buella70d8d512018-05-30 15:27:49 +00008563 __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A,
8564 -(__v2df) __B,
8565 -(__v2df) __C);
8566 __A[0] = (__U & 1) ? __Z[0] : 0;
8567 return __A;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008568}
8569
Craig Topperc6338672018-05-31 00:51:20 +00008570#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00008571 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8572 -(__v2df)(__m128d)(B), \
Craig Topper8c18e112016-05-17 04:41:50 +00008573 -(__v2df)(__m128d)(C), \
8574 (__mmask8)(U), \
Craig Topperc6338672018-05-31 00:51:20 +00008575 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008576
Craig Topper58187d32016-05-17 04:41:29 +00008577static __inline__ __m128d __DEFAULT_FN_ATTRS
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00008578_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008579{
Gabor Buella70d8d512018-05-30 15:27:49 +00008580 __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
8581 -(__v2df) __X,
8582 -(__v2df) __Y);
8583 __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
8584 return __Y;
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008585}
8586
Craig Topperc6338672018-05-31 00:51:20 +00008587#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \
Gabor Buella70d8d512018-05-30 15:27:49 +00008588 (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8589 -(__v2df)(__m128d)(X), \
Craig Topper2c8f49e2016-11-12 23:24:34 +00008590 (__v2df)(__m128d)(Y), \
Craig Topperc6338672018-05-31 00:51:20 +00008591 (__mmask8)(U), (int)(R))
Michael Zuckerman743d68c2016-04-22 10:56:24 +00008592
Craig Topperc6338672018-05-31 00:51:20 +00008593#define _mm512_permutex_pd(X, C) \
Simon Pilgrim30db8112016-07-04 13:34:44 +00008594 (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
8595 (__v8df)_mm512_undefined_pd(), \
Craig Topper2a383c92016-07-04 22:18:01 +00008596 0 + (((C) >> 0) & 0x3), \
8597 0 + (((C) >> 2) & 0x3), \
8598 0 + (((C) >> 4) & 0x3), \
8599 0 + (((C) >> 6) & 0x3), \
8600 4 + (((C) >> 0) & 0x3), \
8601 4 + (((C) >> 2) & 0x3), \
8602 4 + (((C) >> 4) & 0x3), \
Craig Topperc6338672018-05-31 00:51:20 +00008603 4 + (((C) >> 6) & 0x3))
Michael Zuckerman8938e832016-04-25 05:32:35 +00008604
Craig Topperc6338672018-05-31 00:51:20 +00008605#define _mm512_mask_permutex_pd(W, U, X, C) \
Simon Pilgrim30db8112016-07-04 13:34:44 +00008606 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8607 (__v8df)_mm512_permutex_pd((X), (C)), \
Craig Topperc6338672018-05-31 00:51:20 +00008608 (__v8df)(__m512d)(W))
Michael Zuckerman8938e832016-04-25 05:32:35 +00008609
Craig Topperc6338672018-05-31 00:51:20 +00008610#define _mm512_maskz_permutex_pd(U, X, C) \
Simon Pilgrim30db8112016-07-04 13:34:44 +00008611 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8612 (__v8df)_mm512_permutex_pd((X), (C)), \
Craig Topperc6338672018-05-31 00:51:20 +00008613 (__v8df)_mm512_setzero_pd())
Michael Zuckerman8938e832016-04-25 05:32:35 +00008614
Craig Topperc6338672018-05-31 00:51:20 +00008615#define _mm512_permutex_epi64(X, C) \
Simon Pilgrim30db8112016-07-04 13:34:44 +00008616 (__m512i)__builtin_shufflevector((__v8di)(__m512i)(X), \
8617 (__v8di)_mm512_undefined_epi32(), \
Craig Topper2a383c92016-07-04 22:18:01 +00008618 0 + (((C) >> 0) & 0x3), \
8619 0 + (((C) >> 2) & 0x3), \
8620 0 + (((C) >> 4) & 0x3), \
8621 0 + (((C) >> 6) & 0x3), \
8622 4 + (((C) >> 0) & 0x3), \
8623 4 + (((C) >> 2) & 0x3), \
8624 4 + (((C) >> 4) & 0x3), \
Craig Topperc6338672018-05-31 00:51:20 +00008625 4 + (((C) >> 6) & 0x3))
Michael Zuckerman8938e832016-04-25 05:32:35 +00008626
Craig Topperc6338672018-05-31 00:51:20 +00008627#define _mm512_mask_permutex_epi64(W, U, X, C) \
Simon Pilgrim30db8112016-07-04 13:34:44 +00008628 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8629 (__v8di)_mm512_permutex_epi64((X), (C)), \
Craig Topperc6338672018-05-31 00:51:20 +00008630 (__v8di)(__m512i)(W))
Michael Zuckerman8938e832016-04-25 05:32:35 +00008631
Craig Topperc6338672018-05-31 00:51:20 +00008632#define _mm512_maskz_permutex_epi64(U, X, C) \
Simon Pilgrim30db8112016-07-04 13:34:44 +00008633 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8634 (__v8di)_mm512_permutex_epi64((X), (C)), \
Craig Topperc6338672018-05-31 00:51:20 +00008635 (__v8di)_mm512_setzero_si512())
Michael Zuckerman8938e832016-04-25 05:32:35 +00008636
8637static __inline__ __m512d __DEFAULT_FN_ATTRS
8638_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
8639{
Craig Topper55b40672018-05-20 23:34:10 +00008640 return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X);
Michael Zuckerman8938e832016-04-25 05:32:35 +00008641}
8642
8643static __inline__ __m512d __DEFAULT_FN_ATTRS
8644_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
8645{
Craig Topper55b40672018-05-20 23:34:10 +00008646 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8647 (__v8df)_mm512_permutexvar_pd(__X, __Y),
8648 (__v8df)__W);
Michael Zuckerman8938e832016-04-25 05:32:35 +00008649}
8650
8651static __inline__ __m512d __DEFAULT_FN_ATTRS
8652_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
8653{
Craig Topper55b40672018-05-20 23:34:10 +00008654 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8655 (__v8df)_mm512_permutexvar_pd(__X, __Y),
8656 (__v8df)_mm512_setzero_pd());
Michael Zuckerman8938e832016-04-25 05:32:35 +00008657}
8658
8659static __inline__ __m512i __DEFAULT_FN_ATTRS
8660_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
8661{
Craig Topper55b40672018-05-20 23:34:10 +00008662 return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
8663}
8664
8665static __inline__ __m512i __DEFAULT_FN_ATTRS
8666_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
8667{
8668 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8669 (__v8di)_mm512_permutexvar_epi64(__X, __Y),
8670 (__v8di)_mm512_setzero_si512());
Michael Zuckerman8938e832016-04-25 05:32:35 +00008671}
8672
8673static __inline__ __m512i __DEFAULT_FN_ATTRS
8674_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
8675 __m512i __Y)
8676{
Craig Topper55b40672018-05-20 23:34:10 +00008677 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8678 (__v8di)_mm512_permutexvar_epi64(__X, __Y),
8679 (__v8di)__W);
Michael Zuckerman8938e832016-04-25 05:32:35 +00008680}
8681
8682static __inline__ __m512 __DEFAULT_FN_ATTRS
8683_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
8684{
Craig Topper55b40672018-05-20 23:34:10 +00008685 return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
Michael Zuckerman8938e832016-04-25 05:32:35 +00008686}
8687
8688static __inline__ __m512 __DEFAULT_FN_ATTRS
8689_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
8690{
Craig Topper55b40672018-05-20 23:34:10 +00008691 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8692 (__v16sf)_mm512_permutexvar_ps(__X, __Y),
8693 (__v16sf)__W);
Michael Zuckerman8938e832016-04-25 05:32:35 +00008694}
8695
8696static __inline__ __m512 __DEFAULT_FN_ATTRS
8697_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
8698{
Craig Topper55b40672018-05-20 23:34:10 +00008699 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8700 (__v16sf)_mm512_permutexvar_ps(__X, __Y),
8701 (__v16sf)_mm512_setzero_ps());
Michael Zuckerman8938e832016-04-25 05:32:35 +00008702}
8703
8704static __inline__ __m512i __DEFAULT_FN_ATTRS
8705_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
8706{
Craig Topper55b40672018-05-20 23:34:10 +00008707 return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
Michael Zuckerman8938e832016-04-25 05:32:35 +00008708}
8709
Igor Bregerf050b792017-03-19 08:27:16 +00008710#define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
8711
Michael Zuckerman8938e832016-04-25 05:32:35 +00008712static __inline__ __m512i __DEFAULT_FN_ATTRS
Craig Topper55b40672018-05-20 23:34:10 +00008713_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
8714{
8715 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8716 (__v16si)_mm512_permutexvar_epi32(__X, __Y),
8717 (__v16si)_mm512_setzero_si512());
8718}
8719
8720static __inline__ __m512i __DEFAULT_FN_ATTRS
Michael Zuckerman8938e832016-04-25 05:32:35 +00008721_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
8722 __m512i __Y)
8723{
Craig Topper55b40672018-05-20 23:34:10 +00008724 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8725 (__v16si)_mm512_permutexvar_epi32(__X, __Y),
8726 (__v16si)__W);
Michael Zuckerman8938e832016-04-25 05:32:35 +00008727}
8728
Igor Bregerf050b792017-03-19 08:27:16 +00008729#define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
8730
Michael Zuckermanfa508e82016-04-25 16:42:29 +00008731static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8732_mm512_kand (__mmask16 __A, __mmask16 __B)
8733{
8734 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
8735}
Michael Zuckerman8938e832016-04-25 05:32:35 +00008736
Michael Zuckermanfa508e82016-04-25 16:42:29 +00008737static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8738_mm512_kandn (__mmask16 __A, __mmask16 __B)
8739{
8740 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
8741}
8742
8743static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8744_mm512_kor (__mmask16 __A, __mmask16 __B)
8745{
8746 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
8747}
8748
8749static __inline__ int __DEFAULT_FN_ATTRS
8750_mm512_kortestc (__mmask16 __A, __mmask16 __B)
8751{
Craig Topper91f23d92016-05-16 01:09:16 +00008752 return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
Michael Zuckermanfa508e82016-04-25 16:42:29 +00008753}
8754
8755static __inline__ int __DEFAULT_FN_ATTRS
8756_mm512_kortestz (__mmask16 __A, __mmask16 __B)
8757{
Craig Topper91f23d92016-05-16 01:09:16 +00008758 return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
Michael Zuckermanfa508e82016-04-25 16:42:29 +00008759}
8760
8761static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8762_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
8763{
Craig Topperf517f1a2018-01-14 19:23:50 +00008764 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
Michael Zuckermanfa508e82016-04-25 16:42:29 +00008765}
8766
8767static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8768_mm512_kxnor (__mmask16 __A, __mmask16 __B)
8769{
8770 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
8771}
8772
8773static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8774_mm512_kxor (__mmask16 __A, __mmask16 __B)
8775{
8776 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
8777}
Michael Zuckerman8938e832016-04-25 05:32:35 +00008778
Michael Zuckerman7c85a8c2016-04-27 10:44:15 +00008779static __inline__ void __DEFAULT_FN_ATTRS
8780_mm512_stream_si512 (__m512i * __P, __m512i __A)
8781{
Simon Pilgrimc14865c2017-07-29 15:33:34 +00008782 typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8783 __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
Michael Zuckerman7c85a8c2016-04-27 10:44:15 +00008784}
8785
8786static __inline__ __m512i __DEFAULT_FN_ATTRS
Simon Pilgrim1ba2bf22017-09-05 10:06:41 +00008787_mm512_stream_load_si512 (void const *__P)
Michael Zuckerman7c85a8c2016-04-27 10:44:15 +00008788{
Simon Pilgrimc14865c2017-07-29 15:33:34 +00008789 typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8790 return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
Michael Zuckerman7c85a8c2016-04-27 10:44:15 +00008791}
8792
8793static __inline__ void __DEFAULT_FN_ATTRS
8794_mm512_stream_pd (double *__P, __m512d __A)
8795{
Simon Pilgrimc14865c2017-07-29 15:33:34 +00008796 typedef __v8df __v8df_aligned __attribute__((aligned(64)));
8797 __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
Michael Zuckerman7c85a8c2016-04-27 10:44:15 +00008798}
8799
8800static __inline__ void __DEFAULT_FN_ATTRS
8801_mm512_stream_ps (float *__P, __m512 __A)
8802{
Simon Pilgrimc14865c2017-07-29 15:33:34 +00008803 typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
8804 __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
Michael Zuckerman7c85a8c2016-04-27 10:44:15 +00008805}
8806
Michael Zuckerman41f5a372016-04-29 08:52:02 +00008807static __inline__ __m512d __DEFAULT_FN_ATTRS
8808_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
8809{
8810 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8811 (__v8df) __W,
8812 (__mmask8) __U);
8813}
8814
8815static __inline__ __m512d __DEFAULT_FN_ATTRS
8816_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
8817{
8818 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8819 (__v8df)
8820 _mm512_setzero_pd (),
8821 (__mmask8) __U);
8822}
8823
8824static __inline__ __m512i __DEFAULT_FN_ATTRS
8825_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8826{
8827 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8828 (__v8di) __W,
8829 (__mmask8) __U);
8830}
8831
8832static __inline__ __m512i __DEFAULT_FN_ATTRS
8833_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
8834{
8835 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8836 (__v8di)
8837 _mm512_setzero_si512 (),
8838 (__mmask8) __U);
8839}
8840
8841static __inline__ __m512 __DEFAULT_FN_ATTRS
8842_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
8843{
8844 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8845 (__v16sf) __W,
8846 (__mmask16) __U);
8847}
8848
8849static __inline__ __m512 __DEFAULT_FN_ATTRS
8850_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
8851{
8852 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8853 (__v16sf)
8854 _mm512_setzero_ps (),
8855 (__mmask16) __U);
8856}
8857
8858static __inline__ __m512i __DEFAULT_FN_ATTRS
8859_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8860{
8861 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8862 (__v16si) __W,
8863 (__mmask16) __U);
8864}
8865
8866static __inline__ __m512i __DEFAULT_FN_ATTRS
8867_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
8868{
8869 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8870 (__v16si)
8871 _mm512_setzero_si512 (),
8872 (__mmask16) __U);
8873}
8874
Craig Topperc6338672018-05-31 00:51:20 +00008875#define _mm_cmp_round_ss_mask(X, Y, P, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00008876 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8877 (__v4sf)(__m128)(Y), (int)(P), \
Craig Topperc6338672018-05-31 00:51:20 +00008878 (__mmask8)-1, (int)(R))
Michael Zuckerman0b9d1052016-04-29 11:01:16 +00008879
Craig Topperc6338672018-05-31 00:51:20 +00008880#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00008881 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8882 (__v4sf)(__m128)(Y), (int)(P), \
Craig Topperc6338672018-05-31 00:51:20 +00008883 (__mmask8)(M), (int)(R))
Michael Zuckerman0b9d1052016-04-29 11:01:16 +00008884
Craig Topperc6338672018-05-31 00:51:20 +00008885#define _mm_cmp_ss_mask(X, Y, P) \
Craig Topper8c18e112016-05-17 04:41:50 +00008886 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8887 (__v4sf)(__m128)(Y), (int)(P), \
8888 (__mmask8)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00008889 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman0b9d1052016-04-29 11:01:16 +00008890
Craig Topperc6338672018-05-31 00:51:20 +00008891#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
Craig Topper8c18e112016-05-17 04:41:50 +00008892 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8893 (__v4sf)(__m128)(Y), (int)(P), \
8894 (__mmask8)(M), \
Craig Topperc6338672018-05-31 00:51:20 +00008895 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman0b9d1052016-04-29 11:01:16 +00008896
Craig Topperc6338672018-05-31 00:51:20 +00008897#define _mm_cmp_round_sd_mask(X, Y, P, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00008898 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8899 (__v2df)(__m128d)(Y), (int)(P), \
Craig Topperc6338672018-05-31 00:51:20 +00008900 (__mmask8)-1, (int)(R))
Michael Zuckerman0b9d1052016-04-29 11:01:16 +00008901
Craig Topperc6338672018-05-31 00:51:20 +00008902#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00008903 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8904 (__v2df)(__m128d)(Y), (int)(P), \
Craig Topperc6338672018-05-31 00:51:20 +00008905 (__mmask8)(M), (int)(R))
Michael Zuckerman0b9d1052016-04-29 11:01:16 +00008906
Craig Topperc6338672018-05-31 00:51:20 +00008907#define _mm_cmp_sd_mask(X, Y, P) \
Craig Topper8c18e112016-05-17 04:41:50 +00008908 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8909 (__v2df)(__m128d)(Y), (int)(P), \
8910 (__mmask8)-1, \
Craig Topperc6338672018-05-31 00:51:20 +00008911 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman0b9d1052016-04-29 11:01:16 +00008912
Craig Topperc6338672018-05-31 00:51:20 +00008913#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
Craig Topper8c18e112016-05-17 04:41:50 +00008914 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8915 (__v2df)(__m128d)(Y), (int)(P), \
8916 (__mmask8)(M), \
Craig Topperc6338672018-05-31 00:51:20 +00008917 _MM_FROUND_CUR_DIRECTION)
Michael Zuckerman0b9d1052016-04-29 11:01:16 +00008918
Uriel Korach5b2b71d2017-11-13 12:50:52 +00008919/* Bit Test */
8920
8921static __inline __mmask16 __DEFAULT_FN_ATTRS
8922_mm512_test_epi32_mask (__m512i __A, __m512i __B)
8923{
8924 return _mm512_cmpneq_epi32_mask (_mm512_and_epi32(__A, __B),
Craig Topperdff5b312018-05-30 18:02:11 +00008925 _mm512_setzero_si512());
Uriel Korach5b2b71d2017-11-13 12:50:52 +00008926}
8927
8928static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8929_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8930{
8931 return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
Craig Topperdff5b312018-05-30 18:02:11 +00008932 _mm512_setzero_si512());
Uriel Korach5b2b71d2017-11-13 12:50:52 +00008933}
8934
8935static __inline __mmask8 __DEFAULT_FN_ATTRS
8936_mm512_test_epi64_mask (__m512i __A, __m512i __B)
8937{
8938 return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B),
Craig Topperdff5b312018-05-30 18:02:11 +00008939 _mm512_setzero_si512());
Uriel Korach5b2b71d2017-11-13 12:50:52 +00008940}
8941
8942static __inline__ __mmask8 __DEFAULT_FN_ATTRS
8943_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8944{
8945 return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
Craig Topperdff5b312018-05-30 18:02:11 +00008946 _mm512_setzero_si512());
Uriel Korach5b2b71d2017-11-13 12:50:52 +00008947}
8948
8949static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8950_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
8951{
8952 return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B),
Craig Topperdff5b312018-05-30 18:02:11 +00008953 _mm512_setzero_si512());
Uriel Korach5b2b71d2017-11-13 12:50:52 +00008954}
8955
8956static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8957_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8958{
8959 return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
Craig Topperdff5b312018-05-30 18:02:11 +00008960 _mm512_setzero_si512());
Uriel Korach5b2b71d2017-11-13 12:50:52 +00008961}
8962
8963static __inline__ __mmask8 __DEFAULT_FN_ATTRS
8964_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
8965{
8966 return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B),
Craig Topperdff5b312018-05-30 18:02:11 +00008967 _mm512_setzero_si512());
Uriel Korach5b2b71d2017-11-13 12:50:52 +00008968}
8969
8970static __inline__ __mmask8 __DEFAULT_FN_ATTRS
8971_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8972{
8973 return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
Craig Topperdff5b312018-05-30 18:02:11 +00008974 _mm512_setzero_si512());
Uriel Korach5b2b71d2017-11-13 12:50:52 +00008975}
8976
Michael Zuckermanac1e5192016-05-01 14:43:43 +00008977static __inline__ __m512 __DEFAULT_FN_ATTRS
8978_mm512_movehdup_ps (__m512 __A)
8979{
Simon Pilgrim275d7212016-07-02 17:16:25 +00008980 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8981 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
Michael Zuckermanac1e5192016-05-01 14:43:43 +00008982}
8983
8984static __inline__ __m512 __DEFAULT_FN_ATTRS
8985_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
8986{
Simon Pilgrim275d7212016-07-02 17:16:25 +00008987 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8988 (__v16sf)_mm512_movehdup_ps(__A),
8989 (__v16sf)__W);
Michael Zuckermanac1e5192016-05-01 14:43:43 +00008990}
8991
8992static __inline__ __m512 __DEFAULT_FN_ATTRS
8993_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
8994{
Simon Pilgrim275d7212016-07-02 17:16:25 +00008995 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8996 (__v16sf)_mm512_movehdup_ps(__A),
8997 (__v16sf)_mm512_setzero_ps());
Michael Zuckermanac1e5192016-05-01 14:43:43 +00008998}
8999
9000static __inline__ __m512 __DEFAULT_FN_ATTRS
9001_mm512_moveldup_ps (__m512 __A)
9002{
Simon Pilgrim275d7212016-07-02 17:16:25 +00009003 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
9004 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
Michael Zuckermanac1e5192016-05-01 14:43:43 +00009005}
9006
9007static __inline__ __m512 __DEFAULT_FN_ATTRS
9008_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
9009{
Simon Pilgrim275d7212016-07-02 17:16:25 +00009010 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9011 (__v16sf)_mm512_moveldup_ps(__A),
9012 (__v16sf)__W);
Michael Zuckermanac1e5192016-05-01 14:43:43 +00009013}
9014
9015static __inline__ __m512 __DEFAULT_FN_ATTRS
9016_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
9017{
Simon Pilgrim275d7212016-07-02 17:16:25 +00009018 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9019 (__v16sf)_mm512_moveldup_ps(__A),
9020 (__v16sf)_mm512_setzero_ps());
Michael Zuckermanac1e5192016-05-01 14:43:43 +00009021}
9022
Michael Zuckerman9e43ccf2016-10-05 12:56:06 +00009023static __inline__ __m128 __DEFAULT_FN_ATTRS
9024_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
9025{
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009026 __m128 res = __A;
Ayman Musae60a41c2016-11-08 12:00:30 +00009027 res[0] = (__U & 1) ? __B[0] : __W[0];
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009028 return res;
Michael Zuckerman9e43ccf2016-10-05 12:56:06 +00009029}
9030
9031static __inline__ __m128 __DEFAULT_FN_ATTRS
9032_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
9033{
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009034 __m128 res = __A;
9035 res[0] = (__U & 1) ? __B[0] : 0;
9036 return res;
Michael Zuckerman9e43ccf2016-10-05 12:56:06 +00009037}
9038
9039static __inline__ __m128d __DEFAULT_FN_ATTRS
9040_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
9041{
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009042 __m128d res = __A;
Ayman Musae60a41c2016-11-08 12:00:30 +00009043 res[0] = (__U & 1) ? __B[0] : __W[0];
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009044 return res;
Michael Zuckerman9e43ccf2016-10-05 12:56:06 +00009045}
9046
9047static __inline__ __m128d __DEFAULT_FN_ATTRS
9048_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
9049{
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009050 __m128d res = __A;
9051 res[0] = (__U & 1) ? __B[0] : 0;
9052 return res;
Ayman Musae60a41c2016-11-08 12:00:30 +00009053}
9054
9055static __inline__ void __DEFAULT_FN_ATTRS
9056_mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
9057{
Craig Topper74ac0ed2018-05-10 05:43:43 +00009058 __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1);
Ayman Musae60a41c2016-11-08 12:00:30 +00009059}
9060
9061static __inline__ void __DEFAULT_FN_ATTRS
9062_mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
9063{
Craig Topper74ac0ed2018-05-10 05:43:43 +00009064 __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1);
Ayman Musae60a41c2016-11-08 12:00:30 +00009065}
9066
9067static __inline__ __m128 __DEFAULT_FN_ATTRS
9068_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
9069{
9070 __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
Craig Topper63ec0ea2018-05-30 21:08:27 +00009071 (__v4sf)_mm_setzero_ps(),
Ayman Musae60a41c2016-11-08 12:00:30 +00009072 0, 4, 4, 4);
9073
Craig Topper74ac0ed2018-05-10 05:43:43 +00009074 return (__m128) __builtin_ia32_loadss128_mask ((__v4sf *) __A, src, __U & 1);
Ayman Musae60a41c2016-11-08 12:00:30 +00009075}
9076
9077static __inline__ __m128 __DEFAULT_FN_ATTRS
9078_mm_maskz_load_ss (__mmask8 __U, const float* __A)
9079{
Craig Topper74ac0ed2018-05-10 05:43:43 +00009080 return (__m128)__builtin_ia32_loadss128_mask ((__v4sf *) __A,
9081 (__v4sf) _mm_setzero_ps(),
9082 __U & 1);
Ayman Musae60a41c2016-11-08 12:00:30 +00009083}
9084
9085static __inline__ __m128d __DEFAULT_FN_ATTRS
9086_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
9087{
9088 __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
Craig Topper63ec0ea2018-05-30 21:08:27 +00009089 (__v2df)_mm_setzero_pd(),
9090 0, 2);
Ayman Musae60a41c2016-11-08 12:00:30 +00009091
Craig Topper74ac0ed2018-05-10 05:43:43 +00009092 return (__m128d) __builtin_ia32_loadsd128_mask ((__v2df *) __A, src, __U & 1);
Ayman Musae60a41c2016-11-08 12:00:30 +00009093}
9094
9095static __inline__ __m128d __DEFAULT_FN_ATTRS
9096_mm_maskz_load_sd (__mmask8 __U, const double* __A)
9097{
Craig Topper74ac0ed2018-05-10 05:43:43 +00009098 return (__m128d) __builtin_ia32_loadsd128_mask ((__v2df *) __A,
9099 (__v2df) _mm_setzero_pd(),
9100 __U & 1);
Michael Zuckerman9e43ccf2016-10-05 12:56:06 +00009101}
9102
Craig Topperc6338672018-05-31 00:51:20 +00009103#define _mm512_shuffle_epi32(A, I) \
Craig Topper7cc92632016-06-11 12:50:19 +00009104 (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
Craig Topper2a383c92016-07-04 22:18:01 +00009105 (__v16si)_mm512_undefined_epi32(), \
9106 0 + (((I) >> 0) & 0x3), \
9107 0 + (((I) >> 2) & 0x3), \
9108 0 + (((I) >> 4) & 0x3), \
9109 0 + (((I) >> 6) & 0x3), \
9110 4 + (((I) >> 0) & 0x3), \
9111 4 + (((I) >> 2) & 0x3), \
9112 4 + (((I) >> 4) & 0x3), \
9113 4 + (((I) >> 6) & 0x3), \
9114 8 + (((I) >> 0) & 0x3), \
9115 8 + (((I) >> 2) & 0x3), \
9116 8 + (((I) >> 4) & 0x3), \
9117 8 + (((I) >> 6) & 0x3), \
9118 12 + (((I) >> 0) & 0x3), \
9119 12 + (((I) >> 2) & 0x3), \
9120 12 + (((I) >> 4) & 0x3), \
Craig Topperc6338672018-05-31 00:51:20 +00009121 12 + (((I) >> 6) & 0x3))
Michael Zuckermanc62f27e2016-05-02 07:35:27 +00009122
Craig Topperc6338672018-05-31 00:51:20 +00009123#define _mm512_mask_shuffle_epi32(W, U, A, I) \
Craig Topper7cc92632016-06-11 12:50:19 +00009124 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
9125 (__v16si)_mm512_shuffle_epi32((A), (I)), \
Craig Topperc6338672018-05-31 00:51:20 +00009126 (__v16si)(__m512i)(W))
Michael Zuckermanc62f27e2016-05-02 07:35:27 +00009127
Craig Topperc6338672018-05-31 00:51:20 +00009128#define _mm512_maskz_shuffle_epi32(U, A, I) \
Craig Topper7cc92632016-06-11 12:50:19 +00009129 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
9130 (__v16si)_mm512_shuffle_epi32((A), (I)), \
Craig Topperc6338672018-05-31 00:51:20 +00009131 (__v16si)_mm512_setzero_si512())
Michael Zuckermanc62f27e2016-05-02 07:35:27 +00009132
Michael Zuckerman6a0e0872016-05-02 08:36:41 +00009133static __inline__ __m512d __DEFAULT_FN_ATTRS
9134_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
9135{
9136 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9137 (__v8df) __W,
9138 (__mmask8) __U);
9139}
9140
9141static __inline__ __m512d __DEFAULT_FN_ATTRS
9142_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
9143{
9144 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9145 (__v8df) _mm512_setzero_pd (),
9146 (__mmask8) __U);
9147}
9148
9149static __inline__ __m512i __DEFAULT_FN_ATTRS
9150_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9151{
9152 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9153 (__v8di) __W,
9154 (__mmask8) __U);
9155}
9156
9157static __inline__ __m512i __DEFAULT_FN_ATTRS
9158_mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A)
9159{
9160 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9161 (__v8di) _mm512_setzero_pd (),
9162 (__mmask8) __U);
9163}
9164
9165static __inline__ __m512d __DEFAULT_FN_ATTRS
9166_mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
9167{
9168 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
9169 (__v8df) __W,
9170 (__mmask8) __U);
9171}
9172
9173static __inline__ __m512d __DEFAULT_FN_ATTRS
9174_mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
9175{
9176 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
9177 (__v8df) _mm512_setzero_pd(),
9178 (__mmask8) __U);
9179}
9180
9181static __inline__ __m512i __DEFAULT_FN_ATTRS
9182_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
9183{
9184 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
9185 (__v8di) __W,
9186 (__mmask8) __U);
9187}
9188
9189static __inline__ __m512i __DEFAULT_FN_ATTRS
9190_mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
9191{
9192 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
9193 (__v8di) _mm512_setzero_pd(),
9194 (__mmask8) __U);
9195}
9196
9197static __inline__ __m512 __DEFAULT_FN_ATTRS
9198_mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
9199{
9200 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
9201 (__v16sf) __W,
9202 (__mmask16) __U);
9203}
9204
9205static __inline__ __m512 __DEFAULT_FN_ATTRS
9206_mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
9207{
9208 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
9209 (__v16sf) _mm512_setzero_ps(),
9210 (__mmask16) __U);
9211}
9212
9213static __inline__ __m512i __DEFAULT_FN_ATTRS
9214_mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
9215{
9216 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
9217 (__v16si) __W,
9218 (__mmask16) __U);
9219}
9220
9221static __inline__ __m512i __DEFAULT_FN_ATTRS
9222_mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
9223{
9224 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
9225 (__v16si) _mm512_setzero_ps(),
9226 (__mmask16) __U);
9227}
9228
9229static __inline__ __m512 __DEFAULT_FN_ATTRS
9230_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
9231{
9232 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9233 (__v16sf) __W,
9234 (__mmask16) __U);
9235}
9236
9237static __inline__ __m512 __DEFAULT_FN_ATTRS
9238_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
9239{
9240 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9241 (__v16sf) _mm512_setzero_ps(),
9242 (__mmask16) __U);
9243}
9244
9245static __inline__ __m512i __DEFAULT_FN_ATTRS
9246_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9247{
9248 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9249 (__v16si) __W,
9250 (__mmask16) __U);
9251}
9252
9253static __inline__ __m512i __DEFAULT_FN_ATTRS
9254_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
9255{
9256 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9257 (__v16si) _mm512_setzero_ps(),
9258 (__mmask16) __U);
9259}
9260
Craig Topperc6338672018-05-31 00:51:20 +00009261#define _mm512_cvt_roundps_pd(A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00009262 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9263 (__v8df)_mm512_undefined_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00009264 (__mmask8)-1, (int)(R))
Michael Zuckermand6e68ce2016-05-02 09:42:31 +00009265
Craig Topperc6338672018-05-31 00:51:20 +00009266#define _mm512_mask_cvt_roundps_pd(W, U, A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00009267 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9268 (__v8df)(__m512d)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00009269 (__mmask8)(U), (int)(R))
Michael Zuckermand6e68ce2016-05-02 09:42:31 +00009270
Craig Topperc6338672018-05-31 00:51:20 +00009271#define _mm512_maskz_cvt_roundps_pd(U, A, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00009272 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9273 (__v8df)_mm512_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00009274 (__mmask8)(U), (int)(R))
Michael Zuckermand6e68ce2016-05-02 09:42:31 +00009275
9276static __inline__ __m512d __DEFAULT_FN_ATTRS
9277_mm512_cvtps_pd (__m256 __A)
9278{
Craig Topperdaaf1052018-05-14 04:05:06 +00009279 return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
Michael Zuckermand6e68ce2016-05-02 09:42:31 +00009280}
9281
9282static __inline__ __m512d __DEFAULT_FN_ATTRS
9283_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
9284{
Craig Topper8cb261e2018-05-14 04:57:46 +00009285 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
9286 (__v8df)_mm512_cvtps_pd(__A),
9287 (__v8df)__W);
Michael Zuckermand6e68ce2016-05-02 09:42:31 +00009288}
9289
9290static __inline__ __m512d __DEFAULT_FN_ATTRS
9291_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
9292{
Craig Topper8cb261e2018-05-14 04:57:46 +00009293 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
9294 (__v8df)_mm512_cvtps_pd(__A),
9295 (__v8df)_mm512_setzero_pd());
Michael Zuckermand6e68ce2016-05-02 09:42:31 +00009296}
9297
Ayman Musa2e250e82016-09-27 14:06:32 +00009298static __inline__ __m512 __DEFAULT_FN_ATTRS
Ayman Musa17a28192016-09-27 15:37:31 +00009299_mm512_cvtpslo_pd (__m512 __A)
Ayman Musa2e250e82016-09-27 14:06:32 +00009300{
9301 return (__m512) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
9302}
9303
9304static __inline__ __m512 __DEFAULT_FN_ATTRS
Ayman Musa17a28192016-09-27 15:37:31 +00009305_mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A)
Ayman Musa2e250e82016-09-27 14:06:32 +00009306{
9307 return (__m512) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
9308}
9309
Michael Zuckerman5f0e96e2016-05-02 14:02:01 +00009310static __inline__ __m512d __DEFAULT_FN_ATTRS
9311_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
9312{
Igor Bregeraadb8762016-06-08 13:59:20 +00009313 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
9314 (__v8df) __A,
9315 (__v8df) __W);
Michael Zuckerman5f0e96e2016-05-02 14:02:01 +00009316}
9317
9318static __inline__ __m512d __DEFAULT_FN_ATTRS
9319_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
9320{
Igor Bregeraadb8762016-06-08 13:59:20 +00009321 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
9322 (__v8df) __A,
9323 (__v8df) _mm512_setzero_pd ());
Michael Zuckerman5f0e96e2016-05-02 14:02:01 +00009324}
9325
9326static __inline__ __m512 __DEFAULT_FN_ATTRS
9327_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
9328{
Igor Bregeraadb8762016-06-08 13:59:20 +00009329 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
9330 (__v16sf) __A,
9331 (__v16sf) __W);
Michael Zuckerman5f0e96e2016-05-02 14:02:01 +00009332}
9333
9334static __inline__ __m512 __DEFAULT_FN_ATTRS
9335_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
9336{
Igor Bregeraadb8762016-06-08 13:59:20 +00009337 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
9338 (__v16sf) __A,
9339 (__v16sf) _mm512_setzero_ps ());
Michael Zuckerman5f0e96e2016-05-02 14:02:01 +00009340}
9341
Michael Zuckerman708e7592016-05-03 10:42:46 +00009342static __inline__ void __DEFAULT_FN_ATTRS
9343_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9344{
9345 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9346 (__mmask8) __U);
9347}
9348
9349static __inline__ void __DEFAULT_FN_ATTRS
9350_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9351{
9352 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9353 (__mmask8) __U);
9354}
9355
9356static __inline__ void __DEFAULT_FN_ATTRS
9357_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9358{
9359 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9360 (__mmask16) __U);
9361}
9362
9363static __inline__ void __DEFAULT_FN_ATTRS
9364_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9365{
9366 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9367 (__mmask16) __U);
9368}
Michael Zuckerman5f0e96e2016-05-02 14:02:01 +00009369
Craig Topperc6338672018-05-31 00:51:20 +00009370#define _mm_cvt_roundsd_ss(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00009371 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9372 (__v2df)(__m128d)(B), \
9373 (__v4sf)_mm_undefined_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00009374 (__mmask8)-1, (int)(R))
Michael Zuckermane6f73892016-05-04 08:55:11 +00009375
Craig Topperc6338672018-05-31 00:51:20 +00009376#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00009377 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9378 (__v2df)(__m128d)(B), \
9379 (__v4sf)(__m128)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00009380 (__mmask8)(U), (int)(R))
Michael Zuckermane6f73892016-05-04 08:55:11 +00009381
Craig Topperc6338672018-05-31 00:51:20 +00009382#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00009383 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9384 (__v2df)(__m128d)(B), \
9385 (__v4sf)_mm_setzero_ps(), \
Craig Topperc6338672018-05-31 00:51:20 +00009386 (__mmask8)(U), (int)(R))
Michael Zuckermane6f73892016-05-04 08:55:11 +00009387
Asaf Badouh89f65762016-06-02 08:11:35 +00009388static __inline__ __m128 __DEFAULT_FN_ATTRS
Asaf Badouha0b6f8f2016-07-14 08:40:30 +00009389_mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
Asaf Badouh89f65762016-06-02 08:11:35 +00009390{
Craig Toppercbf39292018-05-31 01:24:40 +00009391 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
9392 (__v2df)__B,
9393 (__v4sf)__W,
9394 (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
Asaf Badouh89f65762016-06-02 08:11:35 +00009395}
9396
9397static __inline__ __m128 __DEFAULT_FN_ATTRS
Asaf Badouha0b6f8f2016-07-14 08:40:30 +00009398_mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
Asaf Badouh89f65762016-06-02 08:11:35 +00009399{
Craig Toppercbf39292018-05-31 01:24:40 +00009400 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
9401 (__v2df)__B,
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009402 (__v4sf)_mm_setzero_ps(),
Craig Toppercbf39292018-05-31 01:24:40 +00009403 (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
Asaf Badouh89f65762016-06-02 08:11:35 +00009404}
9405
9406#define _mm_cvtss_i32 _mm_cvtss_si32
Asaf Badouh89f65762016-06-02 08:11:35 +00009407#define _mm_cvtsd_i32 _mm_cvtsd_si32
Asaf Badouh89f65762016-06-02 08:11:35 +00009408#define _mm_cvti32_sd _mm_cvtsi32_sd
Asaf Badouh89f65762016-06-02 08:11:35 +00009409#define _mm_cvti32_ss _mm_cvtsi32_ss
Craig Topper45db56c2016-07-21 07:38:39 +00009410#ifdef __x86_64__
9411#define _mm_cvtss_i64 _mm_cvtss_si64
9412#define _mm_cvtsd_i64 _mm_cvtsd_si64
9413#define _mm_cvti64_sd _mm_cvtsi64_sd
Asaf Badouh89f65762016-06-02 08:11:35 +00009414#define _mm_cvti64_ss _mm_cvtsi64_ss
Craig Topper45db56c2016-07-21 07:38:39 +00009415#endif
Asaf Badouh89f65762016-06-02 08:11:35 +00009416
Craig Topper45db56c2016-07-21 07:38:39 +00009417#ifdef __x86_64__
Craig Topperc6338672018-05-31 00:51:20 +00009418#define _mm_cvt_roundi64_sd(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00009419 (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
Craig Topperc6338672018-05-31 00:51:20 +00009420 (int)(R))
Michael Zuckermane6f73892016-05-04 08:55:11 +00009421
Craig Topperc6338672018-05-31 00:51:20 +00009422#define _mm_cvt_roundsi64_sd(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00009423 (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
Craig Topperc6338672018-05-31 00:51:20 +00009424 (int)(R))
Craig Topper45db56c2016-07-21 07:38:39 +00009425#endif
Michael Zuckermane6f73892016-05-04 08:55:11 +00009426
Craig Topperc6338672018-05-31 00:51:20 +00009427#define _mm_cvt_roundsi32_ss(A, B, R) \
9428 (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R))
Michael Zuckermane6f73892016-05-04 08:55:11 +00009429
Craig Topperc6338672018-05-31 00:51:20 +00009430#define _mm_cvt_roundi32_ss(A, B, R) \
9431 (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R))
Michael Zuckermane6f73892016-05-04 08:55:11 +00009432
Craig Topper45db56c2016-07-21 07:38:39 +00009433#ifdef __x86_64__
Craig Topperc6338672018-05-31 00:51:20 +00009434#define _mm_cvt_roundsi64_ss(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00009435 (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
Craig Topperc6338672018-05-31 00:51:20 +00009436 (int)(R))
Michael Zuckermane6f73892016-05-04 08:55:11 +00009437
Craig Topperc6338672018-05-31 00:51:20 +00009438#define _mm_cvt_roundi64_ss(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00009439 (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
Craig Topperc6338672018-05-31 00:51:20 +00009440 (int)(R))
Craig Topper45db56c2016-07-21 07:38:39 +00009441#endif
Michael Zuckermane6f73892016-05-04 08:55:11 +00009442
Craig Topperc6338672018-05-31 00:51:20 +00009443#define _mm_cvt_roundss_sd(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00009444 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9445 (__v4sf)(__m128)(B), \
9446 (__v2df)_mm_undefined_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00009447 (__mmask8)-1, (int)(R))
Michael Zuckermane6f73892016-05-04 08:55:11 +00009448
Craig Topperc6338672018-05-31 00:51:20 +00009449#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00009450 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9451 (__v4sf)(__m128)(B), \
9452 (__v2df)(__m128d)(W), \
Craig Topperc6338672018-05-31 00:51:20 +00009453 (__mmask8)(U), (int)(R))
Michael Zuckermane6f73892016-05-04 08:55:11 +00009454
Craig Topperc6338672018-05-31 00:51:20 +00009455#define _mm_maskz_cvt_roundss_sd(U, A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00009456 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9457 (__v4sf)(__m128)(B), \
9458 (__v2df)_mm_setzero_pd(), \
Craig Topperc6338672018-05-31 00:51:20 +00009459 (__mmask8)(U), (int)(R))
Michael Zuckermane6f73892016-05-04 08:55:11 +00009460
9461static __inline__ __m128d __DEFAULT_FN_ATTRS
Asaf Badouha0b6f8f2016-07-14 08:40:30 +00009462_mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
Asaf Badouh89f65762016-06-02 08:11:35 +00009463{
Craig Toppercbf39292018-05-31 01:24:40 +00009464 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9465 (__v4sf)__B,
9466 (__v2df)__W,
9467 (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
Asaf Badouh89f65762016-06-02 08:11:35 +00009468}
9469
9470static __inline__ __m128d __DEFAULT_FN_ATTRS
Asaf Badouha0b6f8f2016-07-14 08:40:30 +00009471_mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
Asaf Badouh89f65762016-06-02 08:11:35 +00009472{
Craig Toppercbf39292018-05-31 01:24:40 +00009473 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9474 (__v4sf)__B,
9475 (__v2df)_mm_setzero_pd(),
9476 (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
Asaf Badouh89f65762016-06-02 08:11:35 +00009477}
9478
9479static __inline__ __m128d __DEFAULT_FN_ATTRS
Michael Zuckermane6f73892016-05-04 08:55:11 +00009480_mm_cvtu32_sd (__m128d __A, unsigned __B)
9481{
Craig Topper6fa91252018-05-13 23:03:30 +00009482 __A[0] = __B;
9483 return __A;
Michael Zuckermane6f73892016-05-04 08:55:11 +00009484}
9485
Craig Topper45db56c2016-07-21 07:38:39 +00009486#ifdef __x86_64__
Craig Topperc6338672018-05-31 00:51:20 +00009487#define _mm_cvt_roundu64_sd(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00009488 (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
Craig Topperc6338672018-05-31 00:51:20 +00009489 (unsigned long long)(B), (int)(R))
Michael Zuckermane6f73892016-05-04 08:55:11 +00009490
9491static __inline__ __m128d __DEFAULT_FN_ATTRS
9492_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
9493{
Craig Topper6fa91252018-05-13 23:03:30 +00009494 __A[0] = __B;
9495 return __A;
Michael Zuckermane6f73892016-05-04 08:55:11 +00009496}
Craig Topper45db56c2016-07-21 07:38:39 +00009497#endif
Michael Zuckermane6f73892016-05-04 08:55:11 +00009498
Craig Topperc6338672018-05-31 00:51:20 +00009499#define _mm_cvt_roundu32_ss(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00009500 (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
Craig Topperc6338672018-05-31 00:51:20 +00009501 (int)(R))
Michael Zuckermane6f73892016-05-04 08:55:11 +00009502
9503static __inline__ __m128 __DEFAULT_FN_ATTRS
9504_mm_cvtu32_ss (__m128 __A, unsigned __B)
9505{
Craig Topper6fa91252018-05-13 23:03:30 +00009506 __A[0] = __B;
9507 return __A;
Michael Zuckermane6f73892016-05-04 08:55:11 +00009508}
9509
Craig Topper45db56c2016-07-21 07:38:39 +00009510#ifdef __x86_64__
Craig Topperc6338672018-05-31 00:51:20 +00009511#define _mm_cvt_roundu64_ss(A, B, R) \
Craig Topper8c18e112016-05-17 04:41:50 +00009512 (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
Craig Topperc6338672018-05-31 00:51:20 +00009513 (unsigned long long)(B), (int)(R))
Michael Zuckermane6f73892016-05-04 08:55:11 +00009514
9515static __inline__ __m128 __DEFAULT_FN_ATTRS
9516_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
9517{
Craig Topper6fa91252018-05-13 23:03:30 +00009518 __A[0] = __B;
9519 return __A;
Michael Zuckermane6f73892016-05-04 08:55:11 +00009520}
Craig Topper45db56c2016-07-21 07:38:39 +00009521#endif
Michael Zuckermane6f73892016-05-04 08:55:11 +00009522
Michael Zuckerman13d3c002016-05-11 11:41:29 +00009523static __inline__ __m512i __DEFAULT_FN_ATTRS
9524_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
9525{
Jina Nahias3ad702a2017-09-19 11:00:27 +00009526 return (__m512i) __builtin_ia32_selectd_512(__M,
9527 (__v16si) _mm512_set1_epi32(__A),
9528 (__v16si) __O);
Michael Zuckerman13d3c002016-05-11 11:41:29 +00009529}
9530
9531static __inline__ __m512i __DEFAULT_FN_ATTRS
9532_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
9533{
Jina Nahias3ad702a2017-09-19 11:00:27 +00009534 return (__m512i) __builtin_ia32_selectq_512(__M,
9535 (__v8di) _mm512_set1_epi64(__A),
9536 (__v8di) __O);
Michael Zuckerman13d3c002016-05-11 11:41:29 +00009537}
9538
Igor Bregerf050b792017-03-19 08:27:16 +00009539static __inline __m512i __DEFAULT_FN_ATTRS
9540_mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59,
9541 char __e58, char __e57, char __e56, char __e55, char __e54, char __e53,
9542 char __e52, char __e51, char __e50, char __e49, char __e48, char __e47,
9543 char __e46, char __e45, char __e44, char __e43, char __e42, char __e41,
9544 char __e40, char __e39, char __e38, char __e37, char __e36, char __e35,
9545 char __e34, char __e33, char __e32, char __e31, char __e30, char __e29,
9546 char __e28, char __e27, char __e26, char __e25, char __e24, char __e23,
9547 char __e22, char __e21, char __e20, char __e19, char __e18, char __e17,
9548 char __e16, char __e15, char __e14, char __e13, char __e12, char __e11,
9549 char __e10, char __e9, char __e8, char __e7, char __e6, char __e5,
9550 char __e4, char __e3, char __e2, char __e1, char __e0) {
9551
9552 return __extension__ (__m512i)(__v64qi)
9553 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9554 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9555 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9556 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
9557 __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
9558 __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
9559 __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
9560 __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
9561}
9562
9563static __inline __m512i __DEFAULT_FN_ATTRS
9564_mm512_set_epi16(short __e31, short __e30, short __e29, short __e28,
9565 short __e27, short __e26, short __e25, short __e24, short __e23,
9566 short __e22, short __e21, short __e20, short __e19, short __e18,
9567 short __e17, short __e16, short __e15, short __e14, short __e13,
9568 short __e12, short __e11, short __e10, short __e9, short __e8,
9569 short __e7, short __e6, short __e5, short __e4, short __e3,
9570 short __e2, short __e1, short __e0) {
9571 return __extension__ (__m512i)(__v32hi)
9572 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9573 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9574 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9575 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
9576}
9577
Michael Zuckerman178113e2016-05-19 12:07:49 +00009578static __inline __m512i __DEFAULT_FN_ATTRS
9579_mm512_set_epi32 (int __A, int __B, int __C, int __D,
9580 int __E, int __F, int __G, int __H,
9581 int __I, int __J, int __K, int __L,
9582 int __M, int __N, int __O, int __P)
9583{
9584 return __extension__ (__m512i)(__v16si)
9585 { __P, __O, __N, __M, __L, __K, __J, __I,
9586 __H, __G, __F, __E, __D, __C, __B, __A };
9587}
9588
9589#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
9590 e8,e9,e10,e11,e12,e13,e14,e15) \
Craig Topper60589492016-06-08 06:08:04 +00009591 _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
9592 (e5),(e4),(e3),(e2),(e1),(e0))
Ekaterina Romanova5a7f09c2016-05-28 00:18:59 +00009593
Michael Zuckerman13d3c002016-05-11 11:41:29 +00009594static __inline__ __m512i __DEFAULT_FN_ATTRS
9595_mm512_set_epi64 (long long __A, long long __B, long long __C,
9596 long long __D, long long __E, long long __F,
9597 long long __G, long long __H)
9598{
9599 return __extension__ (__m512i) (__v8di)
9600 { __H, __G, __F, __E, __D, __C, __B, __A };
9601}
9602
Michael Zuckerman178113e2016-05-19 12:07:49 +00009603#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
Craig Topper60589492016-06-08 06:08:04 +00009604 _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
Michael Zuckerman178113e2016-05-19 12:07:49 +00009605
Michael Zuckerman13d3c002016-05-11 11:41:29 +00009606static __inline__ __m512d __DEFAULT_FN_ATTRS
9607_mm512_set_pd (double __A, double __B, double __C, double __D,
9608 double __E, double __F, double __G, double __H)
9609{
9610 return __extension__ (__m512d)
9611 { __H, __G, __F, __E, __D, __C, __B, __A };
9612}
9613
Michael Zuckerman178113e2016-05-19 12:07:49 +00009614#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
Craig Topper60589492016-06-08 06:08:04 +00009615 _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
Michael Zuckerman178113e2016-05-19 12:07:49 +00009616
Michael Zuckerman13d3c002016-05-11 11:41:29 +00009617static __inline__ __m512 __DEFAULT_FN_ATTRS
9618_mm512_set_ps (float __A, float __B, float __C, float __D,
9619 float __E, float __F, float __G, float __H,
9620 float __I, float __J, float __K, float __L,
9621 float __M, float __N, float __O, float __P)
9622{
9623 return __extension__ (__m512)
9624 { __P, __O, __N, __M, __L, __K, __J, __I,
9625 __H, __G, __F, __E, __D, __C, __B, __A };
9626}
9627
Michael Zuckerman178113e2016-05-19 12:07:49 +00009628#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
Craig Topper60589492016-06-08 06:08:04 +00009629 _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
9630 (e4),(e3),(e2),(e1),(e0))
Michael Zuckerman178113e2016-05-19 12:07:49 +00009631
Asaf Badouh13633282016-07-05 12:24:14 +00009632static __inline__ __m512 __DEFAULT_FN_ATTRS
Asaf Badouh356bb762016-08-21 07:56:47 +00009633_mm512_abs_ps(__m512 __A)
Asaf Badouh13633282016-07-05 12:24:14 +00009634{
Asaf Badouh356bb762016-08-21 07:56:47 +00009635 return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
Asaf Badouh13633282016-07-05 12:24:14 +00009636}
9637
9638static __inline__ __m512 __DEFAULT_FN_ATTRS
Asaf Badouh356bb762016-08-21 07:56:47 +00009639_mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
Asaf Badouh13633282016-07-05 12:24:14 +00009640{
Asaf Badouh356bb762016-08-21 07:56:47 +00009641 return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
Asaf Badouh13633282016-07-05 12:24:14 +00009642}
9643
9644static __inline__ __m512d __DEFAULT_FN_ATTRS
Asaf Badouh356bb762016-08-21 07:56:47 +00009645_mm512_abs_pd(__m512d __A)
Asaf Badouh13633282016-07-05 12:24:14 +00009646{
Asaf Badouh356bb762016-08-21 07:56:47 +00009647 return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
Asaf Badouh13633282016-07-05 12:24:14 +00009648}
9649
9650static __inline__ __m512d __DEFAULT_FN_ATTRS
Asaf Badouh356bb762016-08-21 07:56:47 +00009651_mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
Asaf Badouh13633282016-07-05 12:24:14 +00009652{
Asaf Badouh356bb762016-08-21 07:56:47 +00009653 return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
Asaf Badouh13633282016-07-05 12:24:14 +00009654}
9655
Craig Topper73d1d402018-05-30 22:33:21 +00009656/* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
9657 * outputs. This class of vector operation forms the basis of many scientific
9658 * computations. In vector-reduction arithmetic, the evaluation off is
9659 * independent of the order of the input elements of V.
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009660
Craig Topper73d1d402018-05-30 22:33:21 +00009661 * Used bisection method. At each step, we partition the vector with previous
9662 * step in half, and the operation is performed on its two halves.
9663 * This takes log2(n) steps where n is the number of elements in the vector.
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009664
Craig Topper73d1d402018-05-30 22:33:21 +00009665 * Vec512 - Vector with size 512.
9666 * Operator - Can be one of following: +,*,&,|
9667 * T2 - Can get 'i' for int and 'f' for float.
9668 * T1 - Can get 'i' for int and 'd' for double.
9669 */
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009670
9671#define _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1) \
9672 __extension__({ \
9673 __m256##T1 Vec256 = __builtin_shufflevector( \
9674 (__v8d##T2)Vec512, \
9675 (__v8d##T2)Vec512, \
9676 0, 1, 2, 3) \
9677 Operator \
9678 __builtin_shufflevector( \
9679 (__v8d##T2)Vec512, \
9680 (__v8d##T2)Vec512, \
9681 4, 5, 6, 7); \
9682 __m128##T1 Vec128 = __builtin_shufflevector( \
9683 (__v4d##T2)Vec256, \
9684 (__v4d##T2)Vec256, \
9685 0, 1) \
9686 Operator \
9687 __builtin_shufflevector( \
9688 (__v4d##T2)Vec256, \
9689 (__v4d##T2)Vec256, \
9690 2, 3); \
9691 Vec128 = __builtin_shufflevector((__v2d##T2)Vec128, \
9692 (__v2d##T2)Vec128, 0, -1) \
9693 Operator \
9694 __builtin_shufflevector((__v2d##T2)Vec128, \
9695 (__v2d##T2)Vec128, 1, -1); \
9696 return Vec128[0]; \
9697 })
9698
9699static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_add_epi64(__m512i __W) {
9700 _mm512_reduce_operator_64bit(__W, +, i, i);
9701}
9702
9703static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_mul_epi64(__m512i __W) {
9704 _mm512_reduce_operator_64bit(__W, *, i, i);
9705}
9706
9707static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_and_epi64(__m512i __W) {
9708 _mm512_reduce_operator_64bit(__W, &, i, i);
9709}
9710
9711static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_or_epi64(__m512i __W) {
9712 _mm512_reduce_operator_64bit(__W, |, i, i);
9713}
9714
9715static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_add_pd(__m512d __W) {
9716 _mm512_reduce_operator_64bit(__W, +, f, d);
9717}
9718
9719static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_mul_pd(__m512d __W) {
9720 _mm512_reduce_operator_64bit(__W, *, f, d);
9721}
9722
Craig Topper73d1d402018-05-30 22:33:21 +00009723/* Vec512 - Vector with size 512.
9724 * Vec512Neutral - All vector elements set to the identity element.
9725 * Identity element: {+,0},{*,1},{&,0xFFFFFFFFFFFFFFFF},{|,0}
9726 * Operator - Can be one of following: +,*,&,|
9727 * Mask - Intrinsic Mask
9728 * T2 - Can get 'i' for int and 'f' for float.
9729 * T1 - Can get 'i' for int and 'd' for packed double-precision.
9730 * T3 - Can be Pd for packed double or q for q-word.
9731 */
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009732
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009733#define _mm512_mask_reduce_operator_64bit(Vec512, Vec512Neutral, Operator, \
9734 Mask, T2, T1, T3) \
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009735 __extension__({ \
9736 Vec512 = __builtin_ia32_select##T3##_512( \
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009737 (__mmask8)Mask, \
9738 (__v8d##T2)Vec512, \
9739 (__v8d##T2)Vec512Neutral); \
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009740 _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1); \
9741 })
9742
9743static __inline__ long long __DEFAULT_FN_ATTRS
9744_mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) {
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009745 _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), +, __M, i, i, q);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009746}
9747
9748static __inline__ long long __DEFAULT_FN_ATTRS
9749_mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) {
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009750 _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(1), *, __M, i, i, q);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009751}
9752
9753static __inline__ long long __DEFAULT_FN_ATTRS
9754_mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) {
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009755 _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF),
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009756 &, __M, i, i, q);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009757}
9758
9759static __inline__ long long __DEFAULT_FN_ATTRS
9760_mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) {
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009761 _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), |, __M,
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009762 i, i, q);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009763}
9764
9765static __inline__ double __DEFAULT_FN_ATTRS
9766_mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) {
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009767 _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(0), +, __M,
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009768 f, d, pd);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009769}
9770
9771static __inline__ double __DEFAULT_FN_ATTRS
9772_mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) {
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009773 _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(1), *, __M,
9774 f, d, pd);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009775}
Craig Topper9bed2e62018-05-23 06:31:36 +00009776#undef _mm512_reduce_operator_64bit
9777#undef _mm512_mask_reduce_operator_64bit
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009778
Craig Topper73d1d402018-05-30 22:33:21 +00009779/* Vec512 - Vector with size 512.
9780 * Operator - Can be one of following: +,*,&,|
9781 * T2 - Can get 'i' for int and ' ' for packed single.
9782 * T1 - Can get 'i' for int and 'f' for float.
9783 */
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009784
9785#define _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1) __extension__({ \
9786 __m256##T1 Vec256 = \
Michael Zuckermand3436972016-10-30 14:54:05 +00009787 (__m256##T1)(__builtin_shufflevector( \
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009788 (__v16s##T2)Vec512, \
9789 (__v16s##T2)Vec512, \
9790 0, 1, 2, 3, 4, 5, 6, 7) \
9791 Operator \
Michael Zuckermand3436972016-10-30 14:54:05 +00009792 __builtin_shufflevector( \
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009793 (__v16s##T2)Vec512, \
9794 (__v16s##T2)Vec512, \
Michael Zuckermand3436972016-10-30 14:54:05 +00009795 8, 9, 10, 11, 12, 13, 14, 15)); \
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009796 __m128##T1 Vec128 = \
Michael Zuckermand3436972016-10-30 14:54:05 +00009797 (__m128##T1)(__builtin_shufflevector( \
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009798 (__v8s##T2)Vec256, \
9799 (__v8s##T2)Vec256, \
9800 0, 1, 2, 3) \
9801 Operator \
Michael Zuckermand3436972016-10-30 14:54:05 +00009802 __builtin_shufflevector( \
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009803 (__v8s##T2)Vec256, \
9804 (__v8s##T2)Vec256, \
Michael Zuckermand3436972016-10-30 14:54:05 +00009805 4, 5, 6, 7)); \
9806 Vec128 = (__m128##T1)(__builtin_shufflevector( \
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009807 (__v4s##T2)Vec128, \
9808 (__v4s##T2)Vec128, \
9809 0, 1, -1, -1) \
9810 Operator \
Michael Zuckermand3436972016-10-30 14:54:05 +00009811 __builtin_shufflevector( \
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009812 (__v4s##T2)Vec128, \
9813 (__v4s##T2)Vec128, \
Michael Zuckermand3436972016-10-30 14:54:05 +00009814 2, 3, -1, -1)); \
9815 Vec128 = (__m128##T1)(__builtin_shufflevector( \
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009816 (__v4s##T2)Vec128, \
9817 (__v4s##T2)Vec128, \
9818 0, -1, -1, -1) \
9819 Operator \
Michael Zuckermand3436972016-10-30 14:54:05 +00009820 __builtin_shufflevector( \
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009821 (__v4s##T2)Vec128, \
9822 (__v4s##T2)Vec128, \
Michael Zuckermand3436972016-10-30 14:54:05 +00009823 1, -1, -1, -1)); \
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009824 return Vec128[0]; \
9825 })
9826
9827static __inline__ int __DEFAULT_FN_ATTRS
9828_mm512_reduce_add_epi32(__m512i __W) {
9829 _mm512_reduce_operator_32bit(__W, +, i, i);
9830}
9831
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009832static __inline__ int __DEFAULT_FN_ATTRS
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009833_mm512_reduce_mul_epi32(__m512i __W) {
9834 _mm512_reduce_operator_32bit(__W, *, i, i);
9835}
9836
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009837static __inline__ int __DEFAULT_FN_ATTRS
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009838_mm512_reduce_and_epi32(__m512i __W) {
9839 _mm512_reduce_operator_32bit(__W, &, i, i);
9840}
9841
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009842static __inline__ int __DEFAULT_FN_ATTRS
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009843_mm512_reduce_or_epi32(__m512i __W) {
9844 _mm512_reduce_operator_32bit(__W, |, i, i);
9845}
9846
9847static __inline__ float __DEFAULT_FN_ATTRS
9848_mm512_reduce_add_ps(__m512 __W) {
9849 _mm512_reduce_operator_32bit(__W, +, f, );
9850}
9851
9852static __inline__ float __DEFAULT_FN_ATTRS
9853_mm512_reduce_mul_ps(__m512 __W) {
9854 _mm512_reduce_operator_32bit(__W, *, f, );
9855}
9856
Craig Topper73d1d402018-05-30 22:33:21 +00009857/* Vec512 - Vector with size 512.
9858 * Vec512Neutral - All vector elements set to the identity element.
9859 * Identity element: {+,0},{*,1},{&,0xFFFFFFFF},{|,0}
9860 * Operator - Can be one of following: +,*,&,|
9861 * Mask - Intrinsic Mask
9862 * T2 - Can get 'i' for int and 'f' for float.
9863 * T1 - Can get 'i' for int and 'd' for double.
9864 * T3 - Can be Ps for packed single or d for d-word.
9865 */
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009866
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009867#define _mm512_mask_reduce_operator_32bit(Vec512, Vec512Neutral, Operator, \
9868 Mask, T2, T1, T3) \
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009869 __extension__({ \
9870 Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512( \
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009871 (__mmask16)Mask, \
9872 (__v16s##T2)Vec512, \
9873 (__v16s##T2)Vec512Neutral); \
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009874 _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1); \
9875 })
9876
9877static __inline__ int __DEFAULT_FN_ATTRS
9878_mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) {
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009879 _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), +, __M, i, i, d);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009880}
9881
9882static __inline__ int __DEFAULT_FN_ATTRS
9883_mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) {
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009884 _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(1), *, __M, i, i, d);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009885}
9886
9887static __inline__ int __DEFAULT_FN_ATTRS
9888_mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) {
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009889 _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0xFFFFFFFF), &, __M,
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009890 i, i, d);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009891}
9892
9893static __inline__ int __DEFAULT_FN_ATTRS
9894_mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) {
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009895 _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), |, __M, i, i, d);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009896}
9897
9898static __inline__ float __DEFAULT_FN_ATTRS
9899_mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) {
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009900 _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(0), +, __M, f, , ps);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009901}
9902
9903static __inline__ float __DEFAULT_FN_ATTRS
9904_mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
Michael Zuckermanedd99eb2016-10-28 15:16:03 +00009905 _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(1), *, __M, f, , ps);
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009906}
Craig Topper9bed2e62018-05-23 06:31:36 +00009907#undef _mm512_reduce_operator_32bit
9908#undef _mm512_mask_reduce_operator_32bit
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00009909
Craig Topper73d1d402018-05-30 22:33:21 +00009910/* Used bisection method. At each step, we partition the vector with previous
9911 * step in half, and the operation is performed on its two halves.
9912 * This takes log2(n) steps where n is the number of elements in the vector.
9913 * This macro uses only intrinsics from the AVX512F feature.
Craig Topperf99532f2018-05-26 18:57:41 +00009914
Craig Topper73d1d402018-05-30 22:33:21 +00009915 * Vec512 - Vector with size of 512.
9916 * IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example:
9917 * __mm512_max_epi64
9918 * T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}]
9919 * T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}]
9920 */
Craig Topperf99532f2018-05-26 18:57:41 +00009921
9922#define _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2) __extension__({ \
9923 Vec512 = _mm512_##IntrinName( \
9924 (__m512##T1)__builtin_shufflevector( \
9925 (__v8d##T2)Vec512, \
9926 (__v8d##T2)Vec512, \
9927 0, 1, 2, 3, -1, -1, -1, -1), \
9928 (__m512##T1)__builtin_shufflevector( \
9929 (__v8d##T2)Vec512, \
9930 (__v8d##T2)Vec512, \
9931 4, 5, 6, 7, -1, -1, -1, -1)); \
9932 Vec512 = _mm512_##IntrinName( \
9933 (__m512##T1)__builtin_shufflevector( \
9934 (__v8d##T2)Vec512, \
9935 (__v8d##T2)Vec512, \
9936 0, 1, -1, -1, -1, -1, -1, -1),\
9937 (__m512##T1)__builtin_shufflevector( \
9938 (__v8d##T2)Vec512, \
9939 (__v8d##T2)Vec512, \
9940 2, 3, -1, -1, -1, -1, -1, \
9941 -1)); \
9942 Vec512 = _mm512_##IntrinName( \
9943 (__m512##T1)__builtin_shufflevector( \
9944 (__v8d##T2)Vec512, \
9945 (__v8d##T2)Vec512, \
9946 0, -1, -1, -1, -1, -1, -1, -1),\
9947 (__m512##T1)__builtin_shufflevector( \
9948 (__v8d##T2)Vec512, \
9949 (__v8d##T2)Vec512, \
9950 1, -1, -1, -1, -1, -1, -1, -1))\
9951 ; \
9952 return Vec512[0]; \
9953 })
Michael Zuckerman25eb4202016-10-29 10:29:20 +00009954
Simon Pilgrim0b37ffb2017-07-28 14:01:51 +00009955static __inline__ long long __DEFAULT_FN_ATTRS
Michael Zuckerman25eb4202016-10-29 10:29:20 +00009956_mm512_reduce_max_epi64(__m512i __V) {
Craig Topperf99532f2018-05-26 18:57:41 +00009957 _mm512_reduce_maxMin_64bit(__V, max_epi64, i, i);
Michael Zuckerman25eb4202016-10-29 10:29:20 +00009958}
9959
9960static __inline__ unsigned long long __DEFAULT_FN_ATTRS
9961_mm512_reduce_max_epu64(__m512i __V) {
Craig Topperf99532f2018-05-26 18:57:41 +00009962 _mm512_reduce_maxMin_64bit(__V, max_epu64, i, i);
Michael Zuckerman25eb4202016-10-29 10:29:20 +00009963}
9964
Craig Topperf99532f2018-05-26 18:57:41 +00009965static __inline__ double __DEFAULT_FN_ATTRS
9966_mm512_reduce_max_pd(__m512d __V) {
9967 _mm512_reduce_maxMin_64bit(__V, max_pd, d, f);
9968}
9969
9970static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_min_epi64
9971(__m512i __V) {
9972 _mm512_reduce_maxMin_64bit(__V, min_epi64, i, i);
Michael Zuckerman25eb4202016-10-29 10:29:20 +00009973}
9974
9975static __inline__ unsigned long long __DEFAULT_FN_ATTRS
9976_mm512_reduce_min_epu64(__m512i __V) {
Craig Topperf99532f2018-05-26 18:57:41 +00009977 _mm512_reduce_maxMin_64bit(__V, min_epu64, i, i);
Craig Toppere0915232018-05-26 18:55:24 +00009978}
9979
9980static __inline__ double __DEFAULT_FN_ATTRS
9981_mm512_reduce_min_pd(__m512d __V) {
Craig Topperf99532f2018-05-26 18:57:41 +00009982 _mm512_reduce_maxMin_64bit(__V, min_pd, d, f);
9983}
9984
Craig Topper73d1d402018-05-30 22:33:21 +00009985/* Vec512 - Vector with size 512.
9986 * Vec512Neutral - A 512 length vector with elements set to the identity element
9987 * Identity element: {max_epi,0x8000000000000000}
9988 * {max_epu,0x0000000000000000}
9989 * {max_pd, 0xFFF0000000000000}
9990 * {min_epi,0x7FFFFFFFFFFFFFFF}
9991 * {min_epu,0xFFFFFFFFFFFFFFFF}
9992 * {min_pd, 0x7FF0000000000000}
9993 *
9994 * IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example:
9995 * __mm512_max_epi64
9996 * T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}]
9997 * T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}]
9998 * T3 - Can get 'q' q word and 'pd' for packed double.
9999 * [__builtin_ia32_select{q|pd}_512]
10000 * Mask - Intrinsic Mask
10001 */
Craig Topperf99532f2018-05-26 18:57:41 +000010002
10003#define _mm512_mask_reduce_maxMin_64bit(Vec512, Vec512Neutral, IntrinName, T1, \
10004 T2, T3, Mask) \
10005 __extension__({ \
10006 Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512( \
10007 (__mmask8)Mask, \
10008 (__v8d##T2)Vec512, \
10009 (__v8d##T2)Vec512Neutral); \
10010 _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2); \
10011 })
10012
10013static __inline__ long long __DEFAULT_FN_ATTRS
10014_mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) {
10015 _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x8000000000000000),
10016 max_epi64, i, i, q, __M);
10017}
10018
10019static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10020_mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) {
10021 _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x0000000000000000),
10022 max_epu64, i, i, q, __M);
Craig Toppere0915232018-05-26 18:55:24 +000010023}
10024
10025static __inline__ double __DEFAULT_FN_ATTRS
10026_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) {
Craig Topperf99532f2018-05-26 18:57:41 +000010027 _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_pd(-__builtin_inf()),
10028 max_pd, d, f, pd, __M);
10029}
10030
10031static __inline__ long long __DEFAULT_FN_ATTRS
10032_mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) {
10033 _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),
10034 min_epi64, i, i, q, __M);
10035}
10036
10037static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10038_mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) {
10039 _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF),
10040 min_epu64, i, i, q, __M);
Craig Toppere0915232018-05-26 18:55:24 +000010041}
10042
10043static __inline__ double __DEFAULT_FN_ATTRS
10044_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) {
Craig Topperf99532f2018-05-26 18:57:41 +000010045 _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_pd(__builtin_inf()),
10046 min_pd, d, f, pd, __M);
Craig Toppere0915232018-05-26 18:55:24 +000010047}
Craig Topperf99532f2018-05-26 18:57:41 +000010048#undef _mm512_reduce_maxMin_64bit
10049#undef _mm512_mask_reduce_maxMin_64bit
Craig Toppere0915232018-05-26 18:55:24 +000010050
Craig Topper73d1d402018-05-30 22:33:21 +000010051/* Vec512 - Vector with size 512.
10052 * IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example:
10053 * __mm512_max_epi32
10054 * T1 - Can get 'i' for int and ' ' .[__m512{i|}]
10055 * T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}]
10056 */
Craig Toppere0915232018-05-26 18:55:24 +000010057
Craig Topperf99532f2018-05-26 18:57:41 +000010058#define _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2) __extension__({ \
10059 Vec512 = _mm512_##IntrinName( \
10060 (__m512##T1)__builtin_shufflevector( \
10061 (__v16s##T2)Vec512, \
10062 (__v16s##T2)Vec512, \
10063 0, 1, 2, 3, 4, 5, 6, 7, \
10064 -1, -1, -1, -1, -1, -1, -1, -1), \
10065 (__m512##T1)__builtin_shufflevector( \
10066 (__v16s##T2)Vec512, \
10067 (__v16s##T2)Vec512, \
10068 8, 9, 10, 11, 12, 13, 14, 15, \
10069 -1, -1, -1, -1, -1, -1, -1, -1)); \
10070 Vec512 = _mm512_##IntrinName( \
10071 (__m512##T1)__builtin_shufflevector( \
10072 (__v16s##T2)Vec512, \
10073 (__v16s##T2)Vec512, \
10074 0, 1, 2, 3, -1, -1, -1, -1, \
10075 -1, -1, -1, -1, -1, -1, -1, -1), \
10076 (__m512##T1)__builtin_shufflevector( \
10077 (__v16s##T2)Vec512, \
10078 (__v16s##T2)Vec512, \
10079 4, 5, 6, 7, -1, -1, -1, -1, \
10080 -1, -1, -1, -1, -1, -1, -1, -1)); \
10081 Vec512 = _mm512_##IntrinName( \
10082 (__m512##T1)__builtin_shufflevector( \
10083 (__v16s##T2)Vec512, \
10084 (__v16s##T2)Vec512, \
10085 0, 1, -1, -1, -1, -1, -1, -1, \
10086 -1, -1, -1, -1, -1, -1, -1, -1), \
10087 (__m512##T1)__builtin_shufflevector( \
10088 (__v16s##T2)Vec512, \
10089 (__v16s##T2)Vec512, \
10090 2, 3, -1, -1, -1, -1, -1, -1, \
10091 -1, -1, -1, -1, -1, -1, -1, -1)); \
10092 Vec512 = _mm512_##IntrinName( \
10093 (__m512##T1)__builtin_shufflevector( \
10094 (__v16s##T2)Vec512, \
10095 (__v16s##T2)Vec512, \
10096 0, -1, -1, -1, -1, -1, -1, -1, \
10097 -1, -1, -1, -1, -1, -1, -1, -1), \
10098 (__m512##T1)__builtin_shufflevector( \
10099 (__v16s##T2)Vec512, \
10100 (__v16s##T2)Vec512, \
10101 1, -1, -1, -1, -1, -1, -1, -1, \
10102 -1, -1, -1, -1, -1, -1, -1, -1)); \
10103 return Vec512[0]; \
10104 })
10105
10106static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_max_epi32(__m512i a) {
10107 _mm512_reduce_maxMin_32bit(a, max_epi32, i, i);
Craig Toppere0915232018-05-26 18:55:24 +000010108}
10109
Craig Topperf99532f2018-05-26 18:57:41 +000010110static __inline__ unsigned int __DEFAULT_FN_ATTRS
10111_mm512_reduce_max_epu32(__m512i a) {
10112 _mm512_reduce_maxMin_32bit(a, max_epu32, i, i);
10113}
10114
10115static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_max_ps(__m512 a) {
10116 _mm512_reduce_maxMin_32bit(a, max_ps, , f);
10117}
10118
10119static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_min_epi32(__m512i a) {
10120 _mm512_reduce_maxMin_32bit(a, min_epi32, i, i);
10121}
10122
10123static __inline__ unsigned int __DEFAULT_FN_ATTRS
10124_mm512_reduce_min_epu32(__m512i a) {
10125 _mm512_reduce_maxMin_32bit(a, min_epu32, i, i);
10126}
10127
10128static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_min_ps(__m512 a) {
10129 _mm512_reduce_maxMin_32bit(a, min_ps, , f);
10130}
10131
Craig Topper73d1d402018-05-30 22:33:21 +000010132/* Vec512 - Vector with size 512.
10133 * Vec512Neutral - A 512 length vector with elements set to the identity element
10134 * Identity element: {max_epi,0x80000000}
10135 * {max_epu,0x00000000}
10136 * {max_ps, 0xFF800000}
10137 * {min_epi,0x7FFFFFFF}
10138 * {min_epu,0xFFFFFFFF}
10139 * {min_ps, 0x7F800000}
10140 *
10141 * IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example:
10142 * __mm512_max_epi32
10143 * T1 - Can get 'i' for int and ' ' .[__m512{i|}]
10144 * T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}]
10145 * T3 - Can get 'q' q word and 'pd' for packed double.
10146 * [__builtin_ia32_select{q|pd}_512]
10147 * Mask - Intrinsic Mask
10148 */
Craig Topperf99532f2018-05-26 18:57:41 +000010149
10150#define _mm512_mask_reduce_maxMin_32bit(Vec512, Vec512Neutral, IntrinName, T1, \
10151 T2, T3, Mask) \
10152 __extension__({ \
10153 Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512( \
10154 (__mmask16)Mask, \
10155 (__v16s##T2)Vec512, \
10156 (__v16s##T2)Vec512Neutral); \
10157 _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2); \
10158 })
10159
10160static __inline__ int __DEFAULT_FN_ATTRS
10161_mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) {
10162 _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x80000000), max_epi32,
10163 i, i, d, __M);
10164}
10165
10166static __inline__ unsigned int __DEFAULT_FN_ATTRS
10167_mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) {
10168 _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x00000000), max_epu32,
10169 i, i, d, __M);
Craig Toppere0915232018-05-26 18:55:24 +000010170}
10171
10172static __inline__ float __DEFAULT_FN_ATTRS
10173_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) {
Craig Topperf99532f2018-05-26 18:57:41 +000010174 _mm512_mask_reduce_maxMin_32bit(__V,_mm512_set1_ps(-__builtin_inff()), max_ps, , f,
10175 ps, __M);
10176}
10177
10178static __inline__ int __DEFAULT_FN_ATTRS
10179_mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) {
10180 _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x7FFFFFFF), min_epi32,
10181 i, i, d, __M);
10182}
10183
10184static __inline__ unsigned int __DEFAULT_FN_ATTRS
10185_mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) {
10186 _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0xFFFFFFFF), min_epu32,
10187 i, i, d, __M);
Michael Zuckerman25eb4202016-10-29 10:29:20 +000010188}
10189
10190static __inline__ float __DEFAULT_FN_ATTRS
10191_mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {
Craig Topperf99532f2018-05-26 18:57:41 +000010192 _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_ps(__builtin_inff()), min_ps, , f,
10193 ps, __M);
Michael Zuckerman25eb4202016-10-29 10:29:20 +000010194}
Craig Topperf99532f2018-05-26 18:57:41 +000010195#undef _mm512_reduce_maxMin_32bit
10196#undef _mm512_mask_reduce_maxMin_32bit
Michael Zuckerman25eb4202016-10-29 10:29:20 +000010197
Michael Kupersteine45af542015-06-30 13:36:19 +000010198#undef __DEFAULT_FN_ATTRS
Eric Christopher4d1851682015-06-17 07:09:20 +000010199
Craig Topper73d1d402018-05-30 22:33:21 +000010200#endif /* __AVX512FINTRIN_H */