blob: df298640523b7b65ac53e53b4a030feb5610982d [file] [log] [blame]
Logan Chien2833ffb2018-10-09 10:03:24 +08001/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2 *
Logan Chiendf4f7662019-09-04 16:45:23 -07003 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Logan Chien2833ffb2018-10-09 10:03:24 +08006 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __IMMINTRIN_H
10#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
11#endif
12
13#ifndef __AVX512FINTRIN_H
14#define __AVX512FINTRIN_H
15
16typedef char __v64qi __attribute__((__vector_size__(64)));
17typedef short __v32hi __attribute__((__vector_size__(64)));
18typedef double __v8df __attribute__((__vector_size__(64)));
19typedef float __v16sf __attribute__((__vector_size__(64)));
20typedef long long __v8di __attribute__((__vector_size__(64)));
21typedef int __v16si __attribute__((__vector_size__(64)));
22
23/* Unsigned types */
24typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
25typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
26typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
27typedef unsigned int __v16su __attribute__((__vector_size__(64)));
28
Logan Chiendbcf4122019-03-21 10:50:25 +080029typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
30typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64)));
31typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64)));
32
33typedef float __m512_u __attribute__((__vector_size__(64), __aligned__(1)));
34typedef double __m512d_u __attribute__((__vector_size__(64), __aligned__(1)));
35typedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1)));
Logan Chien2833ffb2018-10-09 10:03:24 +080036
37typedef unsigned char __mmask8;
38typedef unsigned short __mmask16;
39
40/* Rounding mode macros. */
41#define _MM_FROUND_TO_NEAREST_INT 0x00
42#define _MM_FROUND_TO_NEG_INF 0x01
43#define _MM_FROUND_TO_POS_INF 0x02
44#define _MM_FROUND_TO_ZERO 0x03
45#define _MM_FROUND_CUR_DIRECTION 0x04
46
Logan Chien55afb0a2018-10-15 10:42:14 +080047/* Constants for integer comparison predicates */
48typedef enum {
49 _MM_CMPINT_EQ, /* Equal */
50 _MM_CMPINT_LT, /* Less than */
51 _MM_CMPINT_LE, /* Less than or Equal */
52 _MM_CMPINT_UNUSED,
53 _MM_CMPINT_NE, /* Not Equal */
54 _MM_CMPINT_NLT, /* Not Less than */
55#define _MM_CMPINT_GE _MM_CMPINT_NLT /* Greater than or Equal */
56 _MM_CMPINT_NLE /* Not Less than or Equal */
57#define _MM_CMPINT_GT _MM_CMPINT_NLE /* Greater than */
58} _MM_CMPINT_ENUM;
59
Logan Chien2833ffb2018-10-09 10:03:24 +080060typedef enum
61{
62 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
63 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
64 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
65 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
66 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
67 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
68 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
69 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
70 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
71 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
72 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
73 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
74 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
75 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
76 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
77 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
78 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
79 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
80 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
81 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
82 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
83 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
84 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
85 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
86 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
87 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
88 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
89 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
90 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
91 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
92 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
93 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
94 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
95 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
96 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
97 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
98 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
99 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
100 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
101 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
102 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
103 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
104 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
105 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
106 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
107 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
108 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
109 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
110 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
111 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
112 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
113 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
114 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
115 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
116 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
117 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
118 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
119 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
120 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
121 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
122 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
123 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
124 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
125 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
126 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
127 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
128 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
129 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
130 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
131 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
132 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
133 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
134 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
135 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
136 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
137 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
138 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
139 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
140 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
141 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
142 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
143 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
144 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
145 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
146 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
147 _MM_PERM_DDDD = 0xFF
148} _MM_PERM_ENUM;
149
150typedef enum
151{
152 _MM_MANT_NORM_1_2, /* interval [1, 2) */
153 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
154 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
155 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
156} _MM_MANTISSA_NORM_ENUM;
157
158typedef enum
159{
160 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
161 _MM_MANT_SIGN_zero, /* sign = 0 */
162 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
163} _MM_MANTISSA_SIGN_ENUM;
164
165/* Define the default attributes for the functions in this file. */
Logan Chien55afb0a2018-10-15 10:42:14 +0800166#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512)))
167#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128)))
Logan Chienb0c84022018-11-09 16:19:54 +0800168#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
Logan Chien2833ffb2018-10-09 10:03:24 +0800169
170/* Create vectors with repeated elements */
171
Logan Chien55afb0a2018-10-15 10:42:14 +0800172static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800173_mm512_setzero_si512(void)
174{
Logan Chien55afb0a2018-10-15 10:42:14 +0800175 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
Logan Chien2833ffb2018-10-09 10:03:24 +0800176}
177
178#define _mm512_setzero_epi32 _mm512_setzero_si512
179
Logan Chien55afb0a2018-10-15 10:42:14 +0800180static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800181_mm512_undefined_pd(void)
182{
183 return (__m512d)__builtin_ia32_undef512();
184}
185
Logan Chien55afb0a2018-10-15 10:42:14 +0800186static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800187_mm512_undefined(void)
188{
189 return (__m512)__builtin_ia32_undef512();
190}
191
Logan Chien55afb0a2018-10-15 10:42:14 +0800192static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800193_mm512_undefined_ps(void)
194{
195 return (__m512)__builtin_ia32_undef512();
196}
197
Logan Chien55afb0a2018-10-15 10:42:14 +0800198static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800199_mm512_undefined_epi32(void)
200{
201 return (__m512i)__builtin_ia32_undef512();
202}
203
Logan Chien55afb0a2018-10-15 10:42:14 +0800204static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800205_mm512_broadcastd_epi32 (__m128i __A)
206{
Logan Chien55afb0a2018-10-15 10:42:14 +0800207 return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
Logan Chien2833ffb2018-10-09 10:03:24 +0800208 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
209}
210
Logan Chien55afb0a2018-10-15 10:42:14 +0800211static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800212_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
213{
214 return (__m512i)__builtin_ia32_selectd_512(__M,
215 (__v16si) _mm512_broadcastd_epi32(__A),
216 (__v16si) __O);
217}
218
Logan Chien55afb0a2018-10-15 10:42:14 +0800219static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800220_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
221{
222 return (__m512i)__builtin_ia32_selectd_512(__M,
223 (__v16si) _mm512_broadcastd_epi32(__A),
224 (__v16si) _mm512_setzero_si512());
225}
226
Logan Chien55afb0a2018-10-15 10:42:14 +0800227static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800228_mm512_broadcastq_epi64 (__m128i __A)
229{
Logan Chien55afb0a2018-10-15 10:42:14 +0800230 return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
Logan Chien2833ffb2018-10-09 10:03:24 +0800231 0, 0, 0, 0, 0, 0, 0, 0);
232}
233
Logan Chien55afb0a2018-10-15 10:42:14 +0800234static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800235_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
236{
237 return (__m512i)__builtin_ia32_selectq_512(__M,
238 (__v8di) _mm512_broadcastq_epi64(__A),
239 (__v8di) __O);
240
241}
242
Logan Chien55afb0a2018-10-15 10:42:14 +0800243static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800244_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
245{
246 return (__m512i)__builtin_ia32_selectq_512(__M,
247 (__v8di) _mm512_broadcastq_epi64(__A),
248 (__v8di) _mm512_setzero_si512());
249}
250
Logan Chien2833ffb2018-10-09 10:03:24 +0800251
Logan Chien55afb0a2018-10-15 10:42:14 +0800252static __inline __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800253_mm512_setzero_ps(void)
254{
Logan Chien55afb0a2018-10-15 10:42:14 +0800255 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
256 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
Logan Chien2833ffb2018-10-09 10:03:24 +0800257}
258
259#define _mm512_setzero _mm512_setzero_ps
260
Logan Chien55afb0a2018-10-15 10:42:14 +0800261static __inline __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800262_mm512_setzero_pd(void)
263{
Logan Chien55afb0a2018-10-15 10:42:14 +0800264 return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
Logan Chien2833ffb2018-10-09 10:03:24 +0800265}
266
Logan Chien55afb0a2018-10-15 10:42:14 +0800267static __inline __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800268_mm512_set1_ps(float __w)
269{
Logan Chien55afb0a2018-10-15 10:42:14 +0800270 return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
271 __w, __w, __w, __w, __w, __w, __w, __w };
Logan Chien2833ffb2018-10-09 10:03:24 +0800272}
273
Logan Chien55afb0a2018-10-15 10:42:14 +0800274static __inline __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800275_mm512_set1_pd(double __w)
276{
Logan Chien55afb0a2018-10-15 10:42:14 +0800277 return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
Logan Chien2833ffb2018-10-09 10:03:24 +0800278}
279
Logan Chien55afb0a2018-10-15 10:42:14 +0800280static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800281_mm512_set1_epi8(char __w)
282{
Logan Chien55afb0a2018-10-15 10:42:14 +0800283 return __extension__ (__m512i)(__v64qi){
284 __w, __w, __w, __w, __w, __w, __w, __w,
285 __w, __w, __w, __w, __w, __w, __w, __w,
286 __w, __w, __w, __w, __w, __w, __w, __w,
287 __w, __w, __w, __w, __w, __w, __w, __w,
288 __w, __w, __w, __w, __w, __w, __w, __w,
289 __w, __w, __w, __w, __w, __w, __w, __w,
290 __w, __w, __w, __w, __w, __w, __w, __w,
291 __w, __w, __w, __w, __w, __w, __w, __w };
Logan Chien2833ffb2018-10-09 10:03:24 +0800292}
293
Logan Chien55afb0a2018-10-15 10:42:14 +0800294static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800295_mm512_set1_epi16(short __w)
296{
Logan Chien55afb0a2018-10-15 10:42:14 +0800297 return __extension__ (__m512i)(__v32hi){
298 __w, __w, __w, __w, __w, __w, __w, __w,
299 __w, __w, __w, __w, __w, __w, __w, __w,
300 __w, __w, __w, __w, __w, __w, __w, __w,
301 __w, __w, __w, __w, __w, __w, __w, __w };
Logan Chien2833ffb2018-10-09 10:03:24 +0800302}
303
Logan Chien55afb0a2018-10-15 10:42:14 +0800304static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800305_mm512_set1_epi32(int __s)
306{
Logan Chien55afb0a2018-10-15 10:42:14 +0800307 return __extension__ (__m512i)(__v16si){
308 __s, __s, __s, __s, __s, __s, __s, __s,
309 __s, __s, __s, __s, __s, __s, __s, __s };
Logan Chien2833ffb2018-10-09 10:03:24 +0800310}
311
Logan Chien55afb0a2018-10-15 10:42:14 +0800312static __inline __m512i __DEFAULT_FN_ATTRS512
313_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
314{
315 return (__m512i)__builtin_ia32_selectd_512(__M,
316 (__v16si)_mm512_set1_epi32(__A),
317 (__v16si)_mm512_setzero_si512());
318}
319
320static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800321_mm512_set1_epi64(long long __d)
322{
Logan Chien55afb0a2018-10-15 10:42:14 +0800323 return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
Logan Chien2833ffb2018-10-09 10:03:24 +0800324}
325
Logan Chien55afb0a2018-10-15 10:42:14 +0800326static __inline __m512i __DEFAULT_FN_ATTRS512
327_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
328{
329 return (__m512i)__builtin_ia32_selectq_512(__M,
330 (__v8di)_mm512_set1_epi64(__A),
331 (__v8di)_mm512_setzero_si512());
332}
333
334static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800335_mm512_broadcastss_ps(__m128 __A)
336{
Logan Chien55afb0a2018-10-15 10:42:14 +0800337 return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
Logan Chien2833ffb2018-10-09 10:03:24 +0800338 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
339}
340
Logan Chien55afb0a2018-10-15 10:42:14 +0800341static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800342_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
343{
Logan Chien55afb0a2018-10-15 10:42:14 +0800344 return __extension__ (__m512i)(__v16si)
Logan Chien2833ffb2018-10-09 10:03:24 +0800345 { __D, __C, __B, __A, __D, __C, __B, __A,
346 __D, __C, __B, __A, __D, __C, __B, __A };
347}
348
Logan Chien55afb0a2018-10-15 10:42:14 +0800349static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800350_mm512_set4_epi64 (long long __A, long long __B, long long __C,
351 long long __D)
352{
Logan Chien55afb0a2018-10-15 10:42:14 +0800353 return __extension__ (__m512i) (__v8di)
Logan Chien2833ffb2018-10-09 10:03:24 +0800354 { __D, __C, __B, __A, __D, __C, __B, __A };
355}
356
Logan Chien55afb0a2018-10-15 10:42:14 +0800357static __inline __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800358_mm512_set4_pd (double __A, double __B, double __C, double __D)
359{
Logan Chien55afb0a2018-10-15 10:42:14 +0800360 return __extension__ (__m512d)
Logan Chien2833ffb2018-10-09 10:03:24 +0800361 { __D, __C, __B, __A, __D, __C, __B, __A };
362}
363
Logan Chien55afb0a2018-10-15 10:42:14 +0800364static __inline __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800365_mm512_set4_ps (float __A, float __B, float __C, float __D)
366{
Logan Chien55afb0a2018-10-15 10:42:14 +0800367 return __extension__ (__m512)
Logan Chien2833ffb2018-10-09 10:03:24 +0800368 { __D, __C, __B, __A, __D, __C, __B, __A,
369 __D, __C, __B, __A, __D, __C, __B, __A };
370}
371
372#define _mm512_setr4_epi32(e0,e1,e2,e3) \
373 _mm512_set4_epi32((e3),(e2),(e1),(e0))
374
375#define _mm512_setr4_epi64(e0,e1,e2,e3) \
376 _mm512_set4_epi64((e3),(e2),(e1),(e0))
377
378#define _mm512_setr4_pd(e0,e1,e2,e3) \
379 _mm512_set4_pd((e3),(e2),(e1),(e0))
380
381#define _mm512_setr4_ps(e0,e1,e2,e3) \
382 _mm512_set4_ps((e3),(e2),(e1),(e0))
383
Logan Chien55afb0a2018-10-15 10:42:14 +0800384static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800385_mm512_broadcastsd_pd(__m128d __A)
386{
Logan Chien55afb0a2018-10-15 10:42:14 +0800387 return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
Logan Chien2833ffb2018-10-09 10:03:24 +0800388 0, 0, 0, 0, 0, 0, 0, 0);
389}
390
391/* Cast between vector types */
392
Logan Chien55afb0a2018-10-15 10:42:14 +0800393static __inline __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800394_mm512_castpd256_pd512(__m256d __a)
395{
396 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
397}
398
Logan Chien55afb0a2018-10-15 10:42:14 +0800399static __inline __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800400_mm512_castps256_ps512(__m256 __a)
401{
402 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7,
403 -1, -1, -1, -1, -1, -1, -1, -1);
404}
405
Logan Chien55afb0a2018-10-15 10:42:14 +0800406static __inline __m128d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800407_mm512_castpd512_pd128(__m512d __a)
408{
409 return __builtin_shufflevector(__a, __a, 0, 1);
410}
411
Logan Chien55afb0a2018-10-15 10:42:14 +0800412static __inline __m256d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800413_mm512_castpd512_pd256 (__m512d __A)
414{
415 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
416}
417
Logan Chien55afb0a2018-10-15 10:42:14 +0800418static __inline __m128 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800419_mm512_castps512_ps128(__m512 __a)
420{
421 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
422}
423
Logan Chien55afb0a2018-10-15 10:42:14 +0800424static __inline __m256 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800425_mm512_castps512_ps256 (__m512 __A)
426{
427 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
428}
429
Logan Chien55afb0a2018-10-15 10:42:14 +0800430static __inline __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800431_mm512_castpd_ps (__m512d __A)
432{
433 return (__m512) (__A);
434}
435
Logan Chien55afb0a2018-10-15 10:42:14 +0800436static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800437_mm512_castpd_si512 (__m512d __A)
438{
439 return (__m512i) (__A);
440}
441
Logan Chien55afb0a2018-10-15 10:42:14 +0800442static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800443_mm512_castpd128_pd512 (__m128d __A)
444{
445 return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
446}
447
Logan Chien55afb0a2018-10-15 10:42:14 +0800448static __inline __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800449_mm512_castps_pd (__m512 __A)
450{
451 return (__m512d) (__A);
452}
453
Logan Chien55afb0a2018-10-15 10:42:14 +0800454static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800455_mm512_castps_si512 (__m512 __A)
456{
457 return (__m512i) (__A);
458}
459
Logan Chien55afb0a2018-10-15 10:42:14 +0800460static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800461_mm512_castps128_ps512 (__m128 __A)
462{
463 return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
464}
465
Logan Chien55afb0a2018-10-15 10:42:14 +0800466static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800467_mm512_castsi128_si512 (__m128i __A)
468{
469 return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
470}
471
Logan Chien55afb0a2018-10-15 10:42:14 +0800472static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800473_mm512_castsi256_si512 (__m256i __A)
474{
475 return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
476}
477
Logan Chien55afb0a2018-10-15 10:42:14 +0800478static __inline __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800479_mm512_castsi512_ps (__m512i __A)
480{
481 return (__m512) (__A);
482}
483
Logan Chien55afb0a2018-10-15 10:42:14 +0800484static __inline __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800485_mm512_castsi512_pd (__m512i __A)
486{
487 return (__m512d) (__A);
488}
489
Logan Chien55afb0a2018-10-15 10:42:14 +0800490static __inline __m128i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800491_mm512_castsi512_si128 (__m512i __A)
492{
493 return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
494}
495
Logan Chien55afb0a2018-10-15 10:42:14 +0800496static __inline __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800497_mm512_castsi512_si256 (__m512i __A)
498{
499 return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
500}
501
Logan Chienb0c84022018-11-09 16:19:54 +0800502static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Logan Chien55afb0a2018-10-15 10:42:14 +0800503_mm512_int2mask(int __a)
504{
505 return (__mmask16)__a;
506}
507
Logan Chienb0c84022018-11-09 16:19:54 +0800508static __inline__ int __DEFAULT_FN_ATTRS
Logan Chien55afb0a2018-10-15 10:42:14 +0800509_mm512_mask2int(__mmask16 __a)
510{
511 return (int)__a;
512}
513
514/// Constructs a 512-bit floating-point vector of [8 x double] from a
515/// 128-bit floating-point vector of [2 x double]. The lower 128 bits
516/// contain the value of the source vector. The upper 384 bits are set
517/// to zero.
518///
519/// \headerfile <x86intrin.h>
520///
521/// This intrinsic has no corresponding instruction.
522///
523/// \param __a
524/// A 128-bit vector of [2 x double].
525/// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
526/// contain the value of the parameter. The upper 384 bits are set to zero.
527static __inline __m512d __DEFAULT_FN_ATTRS512
528_mm512_zextpd128_pd512(__m128d __a)
529{
530 return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
531}
532
533/// Constructs a 512-bit floating-point vector of [8 x double] from a
534/// 256-bit floating-point vector of [4 x double]. The lower 256 bits
535/// contain the value of the source vector. The upper 256 bits are set
536/// to zero.
537///
538/// \headerfile <x86intrin.h>
539///
540/// This intrinsic has no corresponding instruction.
541///
542/// \param __a
543/// A 256-bit vector of [4 x double].
544/// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
545/// contain the value of the parameter. The upper 256 bits are set to zero.
546static __inline __m512d __DEFAULT_FN_ATTRS512
547_mm512_zextpd256_pd512(__m256d __a)
548{
549 return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
550}
551
552/// Constructs a 512-bit floating-point vector of [16 x float] from a
553/// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain
554/// the value of the source vector. The upper 384 bits are set to zero.
555///
556/// \headerfile <x86intrin.h>
557///
558/// This intrinsic has no corresponding instruction.
559///
560/// \param __a
561/// A 128-bit vector of [4 x float].
562/// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
563/// contain the value of the parameter. The upper 384 bits are set to zero.
564static __inline __m512 __DEFAULT_FN_ATTRS512
565_mm512_zextps128_ps512(__m128 __a)
566{
567 return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
568}
569
570/// Constructs a 512-bit floating-point vector of [16 x float] from a
571/// 256-bit floating-point vector of [8 x float]. The lower 256 bits contain
572/// the value of the source vector. The upper 256 bits are set to zero.
573///
574/// \headerfile <x86intrin.h>
575///
576/// This intrinsic has no corresponding instruction.
577///
578/// \param __a
579/// A 256-bit vector of [8 x float].
580/// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
581/// contain the value of the parameter. The upper 256 bits are set to zero.
582static __inline __m512 __DEFAULT_FN_ATTRS512
583_mm512_zextps256_ps512(__m256 __a)
584{
585 return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
586}
587
588/// Constructs a 512-bit integer vector from a 128-bit integer vector.
589/// The lower 128 bits contain the value of the source vector. The upper
590/// 384 bits are set to zero.
591///
592/// \headerfile <x86intrin.h>
593///
594/// This intrinsic has no corresponding instruction.
595///
596/// \param __a
597/// A 128-bit integer vector.
598/// \returns A 512-bit integer vector. The lower 128 bits contain the value of
599/// the parameter. The upper 384 bits are set to zero.
600static __inline __m512i __DEFAULT_FN_ATTRS512
601_mm512_zextsi128_si512(__m128i __a)
602{
603 return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
604}
605
606/// Constructs a 512-bit integer vector from a 256-bit integer vector.
607/// The lower 256 bits contain the value of the source vector. The upper
608/// 256 bits are set to zero.
609///
610/// \headerfile <x86intrin.h>
611///
612/// This intrinsic has no corresponding instruction.
613///
614/// \param __a
615/// A 256-bit integer vector.
616/// \returns A 512-bit integer vector. The lower 256 bits contain the value of
617/// the parameter. The upper 256 bits are set to zero.
618static __inline __m512i __DEFAULT_FN_ATTRS512
619_mm512_zextsi256_si512(__m256i __a)
620{
621 return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
622}
623
Logan Chien2833ffb2018-10-09 10:03:24 +0800624/* Bitwise operators */
Logan Chien55afb0a2018-10-15 10:42:14 +0800625static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800626_mm512_and_epi32(__m512i __a, __m512i __b)
627{
628 return (__m512i)((__v16su)__a & (__v16su)__b);
629}
630
Logan Chien55afb0a2018-10-15 10:42:14 +0800631static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800632_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
633{
634 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
635 (__v16si) _mm512_and_epi32(__a, __b),
636 (__v16si) __src);
637}
638
Logan Chien55afb0a2018-10-15 10:42:14 +0800639static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800640_mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
641{
642 return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (),
643 __k, __a, __b);
644}
645
Logan Chien55afb0a2018-10-15 10:42:14 +0800646static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800647_mm512_and_epi64(__m512i __a, __m512i __b)
648{
649 return (__m512i)((__v8du)__a & (__v8du)__b);
650}
651
Logan Chien55afb0a2018-10-15 10:42:14 +0800652static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800653_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
654{
655 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
656 (__v8di) _mm512_and_epi64(__a, __b),
657 (__v8di) __src);
658}
659
Logan Chien55afb0a2018-10-15 10:42:14 +0800660static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800661_mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
662{
663 return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (),
664 __k, __a, __b);
665}
666
Logan Chien55afb0a2018-10-15 10:42:14 +0800667static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800668_mm512_andnot_si512 (__m512i __A, __m512i __B)
669{
Logan Chien55afb0a2018-10-15 10:42:14 +0800670 return (__m512i)(~(__v8du)__A & (__v8du)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +0800671}
672
Logan Chien55afb0a2018-10-15 10:42:14 +0800673static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800674_mm512_andnot_epi32 (__m512i __A, __m512i __B)
675{
Logan Chien55afb0a2018-10-15 10:42:14 +0800676 return (__m512i)(~(__v16su)__A & (__v16su)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +0800677}
678
Logan Chien55afb0a2018-10-15 10:42:14 +0800679static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800680_mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
681{
682 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
683 (__v16si)_mm512_andnot_epi32(__A, __B),
684 (__v16si)__W);
685}
686
Logan Chien55afb0a2018-10-15 10:42:14 +0800687static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800688_mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
689{
690 return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(),
691 __U, __A, __B);
692}
693
Logan Chien55afb0a2018-10-15 10:42:14 +0800694static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800695_mm512_andnot_epi64(__m512i __A, __m512i __B)
696{
Logan Chien55afb0a2018-10-15 10:42:14 +0800697 return (__m512i)(~(__v8du)__A & (__v8du)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +0800698}
699
Logan Chien55afb0a2018-10-15 10:42:14 +0800700static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800701_mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
702{
703 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
704 (__v8di)_mm512_andnot_epi64(__A, __B),
705 (__v8di)__W);
706}
707
Logan Chien55afb0a2018-10-15 10:42:14 +0800708static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800709_mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
710{
711 return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(),
712 __U, __A, __B);
713}
714
Logan Chien55afb0a2018-10-15 10:42:14 +0800715static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800716_mm512_or_epi32(__m512i __a, __m512i __b)
717{
718 return (__m512i)((__v16su)__a | (__v16su)__b);
719}
720
Logan Chien55afb0a2018-10-15 10:42:14 +0800721static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800722_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
723{
724 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
725 (__v16si)_mm512_or_epi32(__a, __b),
726 (__v16si)__src);
727}
728
Logan Chien55afb0a2018-10-15 10:42:14 +0800729static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800730_mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
731{
732 return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
733}
734
Logan Chien55afb0a2018-10-15 10:42:14 +0800735static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800736_mm512_or_epi64(__m512i __a, __m512i __b)
737{
738 return (__m512i)((__v8du)__a | (__v8du)__b);
739}
740
Logan Chien55afb0a2018-10-15 10:42:14 +0800741static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800742_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
743{
744 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
745 (__v8di)_mm512_or_epi64(__a, __b),
746 (__v8di)__src);
747}
748
Logan Chien55afb0a2018-10-15 10:42:14 +0800749static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800750_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
751{
752 return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
753}
754
Logan Chien55afb0a2018-10-15 10:42:14 +0800755static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800756_mm512_xor_epi32(__m512i __a, __m512i __b)
757{
758 return (__m512i)((__v16su)__a ^ (__v16su)__b);
759}
760
Logan Chien55afb0a2018-10-15 10:42:14 +0800761static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800762_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
763{
764 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
765 (__v16si)_mm512_xor_epi32(__a, __b),
766 (__v16si)__src);
767}
768
Logan Chien55afb0a2018-10-15 10:42:14 +0800769static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800770_mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
771{
772 return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
773}
774
Logan Chien55afb0a2018-10-15 10:42:14 +0800775static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800776_mm512_xor_epi64(__m512i __a, __m512i __b)
777{
778 return (__m512i)((__v8du)__a ^ (__v8du)__b);
779}
780
Logan Chien55afb0a2018-10-15 10:42:14 +0800781static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800782_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
783{
784 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
785 (__v8di)_mm512_xor_epi64(__a, __b),
786 (__v8di)__src);
787}
788
Logan Chien55afb0a2018-10-15 10:42:14 +0800789static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800790_mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
791{
792 return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
793}
794
Logan Chien55afb0a2018-10-15 10:42:14 +0800795static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800796_mm512_and_si512(__m512i __a, __m512i __b)
797{
798 return (__m512i)((__v8du)__a & (__v8du)__b);
799}
800
Logan Chien55afb0a2018-10-15 10:42:14 +0800801static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800802_mm512_or_si512(__m512i __a, __m512i __b)
803{
804 return (__m512i)((__v8du)__a | (__v8du)__b);
805}
806
Logan Chien55afb0a2018-10-15 10:42:14 +0800807static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800808_mm512_xor_si512(__m512i __a, __m512i __b)
809{
810 return (__m512i)((__v8du)__a ^ (__v8du)__b);
811}
812
813/* Arithmetic */
814
Logan Chien55afb0a2018-10-15 10:42:14 +0800815static __inline __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800816_mm512_add_pd(__m512d __a, __m512d __b)
817{
818 return (__m512d)((__v8df)__a + (__v8df)__b);
819}
820
Logan Chien55afb0a2018-10-15 10:42:14 +0800821static __inline __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800822_mm512_add_ps(__m512 __a, __m512 __b)
823{
824 return (__m512)((__v16sf)__a + (__v16sf)__b);
825}
826
Logan Chien55afb0a2018-10-15 10:42:14 +0800827static __inline __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800828_mm512_mul_pd(__m512d __a, __m512d __b)
829{
830 return (__m512d)((__v8df)__a * (__v8df)__b);
831}
832
Logan Chien55afb0a2018-10-15 10:42:14 +0800833static __inline __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800834_mm512_mul_ps(__m512 __a, __m512 __b)
835{
836 return (__m512)((__v16sf)__a * (__v16sf)__b);
837}
838
Logan Chien55afb0a2018-10-15 10:42:14 +0800839static __inline __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800840_mm512_sub_pd(__m512d __a, __m512d __b)
841{
842 return (__m512d)((__v8df)__a - (__v8df)__b);
843}
844
Logan Chien55afb0a2018-10-15 10:42:14 +0800845static __inline __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800846_mm512_sub_ps(__m512 __a, __m512 __b)
847{
848 return (__m512)((__v16sf)__a - (__v16sf)__b);
849}
850
Logan Chien55afb0a2018-10-15 10:42:14 +0800851static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800852_mm512_add_epi64 (__m512i __A, __m512i __B)
853{
854 return (__m512i) ((__v8du) __A + (__v8du) __B);
855}
856
Logan Chien55afb0a2018-10-15 10:42:14 +0800857static __inline__ __m512i __DEFAULT_FN_ATTRS512
858_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800859{
Logan Chien55afb0a2018-10-15 10:42:14 +0800860 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
861 (__v8di)_mm512_add_epi64(__A, __B),
862 (__v8di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800863}
864
Logan Chien55afb0a2018-10-15 10:42:14 +0800865static __inline__ __m512i __DEFAULT_FN_ATTRS512
866_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800867{
Logan Chien55afb0a2018-10-15 10:42:14 +0800868 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
869 (__v8di)_mm512_add_epi64(__A, __B),
870 (__v8di)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800871}
872
Logan Chien55afb0a2018-10-15 10:42:14 +0800873static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800874_mm512_sub_epi64 (__m512i __A, __m512i __B)
875{
876 return (__m512i) ((__v8du) __A - (__v8du) __B);
877}
878
Logan Chien55afb0a2018-10-15 10:42:14 +0800879static __inline__ __m512i __DEFAULT_FN_ATTRS512
880_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800881{
Logan Chien55afb0a2018-10-15 10:42:14 +0800882 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
883 (__v8di)_mm512_sub_epi64(__A, __B),
884 (__v8di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800885}
886
Logan Chien55afb0a2018-10-15 10:42:14 +0800887static __inline__ __m512i __DEFAULT_FN_ATTRS512
888_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800889{
Logan Chien55afb0a2018-10-15 10:42:14 +0800890 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
891 (__v8di)_mm512_sub_epi64(__A, __B),
892 (__v8di)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800893}
894
Logan Chien55afb0a2018-10-15 10:42:14 +0800895static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800896_mm512_add_epi32 (__m512i __A, __m512i __B)
897{
898 return (__m512i) ((__v16su) __A + (__v16su) __B);
899}
900
Logan Chien55afb0a2018-10-15 10:42:14 +0800901static __inline__ __m512i __DEFAULT_FN_ATTRS512
902_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800903{
Logan Chien55afb0a2018-10-15 10:42:14 +0800904 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
905 (__v16si)_mm512_add_epi32(__A, __B),
906 (__v16si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800907}
908
Logan Chien55afb0a2018-10-15 10:42:14 +0800909static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800910_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
911{
Logan Chien55afb0a2018-10-15 10:42:14 +0800912 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
913 (__v16si)_mm512_add_epi32(__A, __B),
914 (__v16si)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800915}
916
Logan Chien55afb0a2018-10-15 10:42:14 +0800917static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800918_mm512_sub_epi32 (__m512i __A, __m512i __B)
919{
920 return (__m512i) ((__v16su) __A - (__v16su) __B);
921}
922
Logan Chien55afb0a2018-10-15 10:42:14 +0800923static __inline__ __m512i __DEFAULT_FN_ATTRS512
924_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800925{
Logan Chien55afb0a2018-10-15 10:42:14 +0800926 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
927 (__v16si)_mm512_sub_epi32(__A, __B),
928 (__v16si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800929}
930
Logan Chien55afb0a2018-10-15 10:42:14 +0800931static __inline__ __m512i __DEFAULT_FN_ATTRS512
932_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800933{
Logan Chien55afb0a2018-10-15 10:42:14 +0800934 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
935 (__v16si)_mm512_sub_epi32(__A, __B),
936 (__v16si)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800937}
938
Logan Chien55afb0a2018-10-15 10:42:14 +0800939#define _mm512_max_round_pd(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800940 ((__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \
941 (__v8df)(__m512d)(B), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +0800942
Logan Chien55afb0a2018-10-15 10:42:14 +0800943#define _mm512_mask_max_round_pd(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800944 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +0800945 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800946 (__v8df)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +0800947
Logan Chien55afb0a2018-10-15 10:42:14 +0800948#define _mm512_maskz_max_round_pd(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800949 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +0800950 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800951 (__v8df)_mm512_setzero_pd()))
Logan Chien2833ffb2018-10-09 10:03:24 +0800952
Logan Chien55afb0a2018-10-15 10:42:14 +0800953static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800954_mm512_max_pd(__m512d __A, __m512d __B)
955{
Logan Chien55afb0a2018-10-15 10:42:14 +0800956 return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
957 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +0800958}
959
Logan Chien55afb0a2018-10-15 10:42:14 +0800960static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800961_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
962{
Logan Chien55afb0a2018-10-15 10:42:14 +0800963 return (__m512d)__builtin_ia32_selectpd_512(__U,
964 (__v8df)_mm512_max_pd(__A, __B),
965 (__v8df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800966}
967
Logan Chien55afb0a2018-10-15 10:42:14 +0800968static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800969_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
970{
Logan Chien55afb0a2018-10-15 10:42:14 +0800971 return (__m512d)__builtin_ia32_selectpd_512(__U,
972 (__v8df)_mm512_max_pd(__A, __B),
973 (__v8df)_mm512_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +0800974}
975
Logan Chien55afb0a2018-10-15 10:42:14 +0800976#define _mm512_max_round_ps(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800977 ((__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
978 (__v16sf)(__m512)(B), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +0800979
Logan Chien55afb0a2018-10-15 10:42:14 +0800980#define _mm512_mask_max_round_ps(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800981 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +0800982 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800983 (__v16sf)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +0800984
Logan Chien55afb0a2018-10-15 10:42:14 +0800985#define _mm512_maskz_max_round_ps(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800986 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +0800987 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800988 (__v16sf)_mm512_setzero_ps()))
Logan Chien2833ffb2018-10-09 10:03:24 +0800989
Logan Chien55afb0a2018-10-15 10:42:14 +0800990static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800991_mm512_max_ps(__m512 __A, __m512 __B)
992{
Logan Chien55afb0a2018-10-15 10:42:14 +0800993 return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
994 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +0800995}
996
Logan Chien55afb0a2018-10-15 10:42:14 +0800997static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800998_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
999{
Logan Chien55afb0a2018-10-15 10:42:14 +08001000 return (__m512)__builtin_ia32_selectps_512(__U,
1001 (__v16sf)_mm512_max_ps(__A, __B),
1002 (__v16sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001003}
1004
Logan Chien55afb0a2018-10-15 10:42:14 +08001005static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001006_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
1007{
Logan Chien55afb0a2018-10-15 10:42:14 +08001008 return (__m512)__builtin_ia32_selectps_512(__U,
1009 (__v16sf)_mm512_max_ps(__A, __B),
1010 (__v16sf)_mm512_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08001011}
1012
Logan Chien55afb0a2018-10-15 10:42:14 +08001013static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001014_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1015 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1016 (__v4sf) __B,
1017 (__v4sf) __W,
1018 (__mmask8) __U,
1019 _MM_FROUND_CUR_DIRECTION);
1020}
1021
Logan Chien55afb0a2018-10-15 10:42:14 +08001022static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001023_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1024 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1025 (__v4sf) __B,
1026 (__v4sf) _mm_setzero_ps (),
1027 (__mmask8) __U,
1028 _MM_FROUND_CUR_DIRECTION);
1029}
1030
Logan Chien55afb0a2018-10-15 10:42:14 +08001031#define _mm_max_round_ss(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001032 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1033 (__v4sf)(__m128)(B), \
1034 (__v4sf)_mm_setzero_ps(), \
1035 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001036
Logan Chien55afb0a2018-10-15 10:42:14 +08001037#define _mm_mask_max_round_ss(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001038 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1039 (__v4sf)(__m128)(B), \
1040 (__v4sf)(__m128)(W), (__mmask8)(U), \
1041 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001042
Logan Chien55afb0a2018-10-15 10:42:14 +08001043#define _mm_maskz_max_round_ss(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001044 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1045 (__v4sf)(__m128)(B), \
1046 (__v4sf)_mm_setzero_ps(), \
1047 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001048
Logan Chien55afb0a2018-10-15 10:42:14 +08001049static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001050_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1051 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1052 (__v2df) __B,
1053 (__v2df) __W,
1054 (__mmask8) __U,
1055 _MM_FROUND_CUR_DIRECTION);
1056}
1057
Logan Chien55afb0a2018-10-15 10:42:14 +08001058static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001059_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1060 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1061 (__v2df) __B,
1062 (__v2df) _mm_setzero_pd (),
1063 (__mmask8) __U,
1064 _MM_FROUND_CUR_DIRECTION);
1065}
1066
Logan Chien55afb0a2018-10-15 10:42:14 +08001067#define _mm_max_round_sd(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001068 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1069 (__v2df)(__m128d)(B), \
1070 (__v2df)_mm_setzero_pd(), \
1071 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001072
Logan Chien55afb0a2018-10-15 10:42:14 +08001073#define _mm_mask_max_round_sd(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001074 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1075 (__v2df)(__m128d)(B), \
1076 (__v2df)(__m128d)(W), \
1077 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001078
Logan Chien55afb0a2018-10-15 10:42:14 +08001079#define _mm_maskz_max_round_sd(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001080 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1081 (__v2df)(__m128d)(B), \
1082 (__v2df)_mm_setzero_pd(), \
1083 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001084
1085static __inline __m512i
Logan Chien55afb0a2018-10-15 10:42:14 +08001086__DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001087_mm512_max_epi32(__m512i __A, __m512i __B)
1088{
Logan Chien55afb0a2018-10-15 10:42:14 +08001089 return (__m512i)__builtin_ia32_pmaxsd512((__v16si)__A, (__v16si)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08001090}
1091
Logan Chien55afb0a2018-10-15 10:42:14 +08001092static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001093_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1094{
Logan Chien55afb0a2018-10-15 10:42:14 +08001095 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1096 (__v16si)_mm512_max_epi32(__A, __B),
1097 (__v16si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001098}
1099
Logan Chien55afb0a2018-10-15 10:42:14 +08001100static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001101_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1102{
Logan Chien55afb0a2018-10-15 10:42:14 +08001103 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1104 (__v16si)_mm512_max_epi32(__A, __B),
1105 (__v16si)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001106}
1107
Logan Chien55afb0a2018-10-15 10:42:14 +08001108static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001109_mm512_max_epu32(__m512i __A, __m512i __B)
1110{
Logan Chien55afb0a2018-10-15 10:42:14 +08001111 return (__m512i)__builtin_ia32_pmaxud512((__v16si)__A, (__v16si)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08001112}
1113
Logan Chien55afb0a2018-10-15 10:42:14 +08001114static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001115_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1116{
Logan Chien55afb0a2018-10-15 10:42:14 +08001117 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1118 (__v16si)_mm512_max_epu32(__A, __B),
1119 (__v16si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001120}
1121
Logan Chien55afb0a2018-10-15 10:42:14 +08001122static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001123_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1124{
Logan Chien55afb0a2018-10-15 10:42:14 +08001125 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1126 (__v16si)_mm512_max_epu32(__A, __B),
1127 (__v16si)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001128}
1129
Logan Chien55afb0a2018-10-15 10:42:14 +08001130static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001131_mm512_max_epi64(__m512i __A, __m512i __B)
1132{
Logan Chien55afb0a2018-10-15 10:42:14 +08001133 return (__m512i)__builtin_ia32_pmaxsq512((__v8di)__A, (__v8di)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08001134}
1135
Logan Chien55afb0a2018-10-15 10:42:14 +08001136static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001137_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1138{
Logan Chien55afb0a2018-10-15 10:42:14 +08001139 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1140 (__v8di)_mm512_max_epi64(__A, __B),
1141 (__v8di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001142}
1143
Logan Chien55afb0a2018-10-15 10:42:14 +08001144static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001145_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1146{
Logan Chien55afb0a2018-10-15 10:42:14 +08001147 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1148 (__v8di)_mm512_max_epi64(__A, __B),
1149 (__v8di)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001150}
1151
Logan Chien55afb0a2018-10-15 10:42:14 +08001152static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001153_mm512_max_epu64(__m512i __A, __m512i __B)
1154{
Logan Chien55afb0a2018-10-15 10:42:14 +08001155 return (__m512i)__builtin_ia32_pmaxuq512((__v8di)__A, (__v8di)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08001156}
1157
Logan Chien55afb0a2018-10-15 10:42:14 +08001158static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001159_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1160{
Logan Chien55afb0a2018-10-15 10:42:14 +08001161 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1162 (__v8di)_mm512_max_epu64(__A, __B),
1163 (__v8di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001164}
1165
Logan Chien55afb0a2018-10-15 10:42:14 +08001166static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001167_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1168{
Logan Chien55afb0a2018-10-15 10:42:14 +08001169 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1170 (__v8di)_mm512_max_epu64(__A, __B),
1171 (__v8di)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001172}
1173
Logan Chien55afb0a2018-10-15 10:42:14 +08001174#define _mm512_min_round_pd(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001175 ((__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \
1176 (__v8df)(__m512d)(B), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001177
Logan Chien55afb0a2018-10-15 10:42:14 +08001178#define _mm512_mask_min_round_pd(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001179 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08001180 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001181 (__v8df)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001182
Logan Chien55afb0a2018-10-15 10:42:14 +08001183#define _mm512_maskz_min_round_pd(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001184 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08001185 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001186 (__v8df)_mm512_setzero_pd()))
Logan Chien2833ffb2018-10-09 10:03:24 +08001187
Logan Chien55afb0a2018-10-15 10:42:14 +08001188static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001189_mm512_min_pd(__m512d __A, __m512d __B)
1190{
Logan Chien55afb0a2018-10-15 10:42:14 +08001191 return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
1192 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08001193}
1194
Logan Chien55afb0a2018-10-15 10:42:14 +08001195static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001196_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
1197{
Logan Chien55afb0a2018-10-15 10:42:14 +08001198 return (__m512d)__builtin_ia32_selectpd_512(__U,
1199 (__v8df)_mm512_min_pd(__A, __B),
1200 (__v8df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001201}
1202
Logan Chien55afb0a2018-10-15 10:42:14 +08001203static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001204_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
1205{
Logan Chien55afb0a2018-10-15 10:42:14 +08001206 return (__m512d)__builtin_ia32_selectpd_512(__U,
1207 (__v8df)_mm512_min_pd(__A, __B),
1208 (__v8df)_mm512_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08001209}
1210
Logan Chien55afb0a2018-10-15 10:42:14 +08001211#define _mm512_min_round_ps(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001212 ((__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
1213 (__v16sf)(__m512)(B), (int)(R)))
Logan Chien55afb0a2018-10-15 10:42:14 +08001214
1215#define _mm512_mask_min_round_ps(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001216 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08001217 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001218 (__v16sf)(W)))
Logan Chien55afb0a2018-10-15 10:42:14 +08001219
1220#define _mm512_maskz_min_round_ps(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001221 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08001222 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001223 (__v16sf)_mm512_setzero_ps()))
Logan Chien55afb0a2018-10-15 10:42:14 +08001224
1225static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001226_mm512_min_ps(__m512 __A, __m512 __B)
1227{
Logan Chien55afb0a2018-10-15 10:42:14 +08001228 return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
1229 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08001230}
1231
Logan Chien55afb0a2018-10-15 10:42:14 +08001232static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001233_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1234{
Logan Chien55afb0a2018-10-15 10:42:14 +08001235 return (__m512)__builtin_ia32_selectps_512(__U,
1236 (__v16sf)_mm512_min_ps(__A, __B),
1237 (__v16sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001238}
1239
Logan Chien55afb0a2018-10-15 10:42:14 +08001240static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001241_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
1242{
Logan Chien55afb0a2018-10-15 10:42:14 +08001243 return (__m512)__builtin_ia32_selectps_512(__U,
1244 (__v16sf)_mm512_min_ps(__A, __B),
1245 (__v16sf)_mm512_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08001246}
1247
Logan Chien55afb0a2018-10-15 10:42:14 +08001248static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001249_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1250 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1251 (__v4sf) __B,
1252 (__v4sf) __W,
1253 (__mmask8) __U,
1254 _MM_FROUND_CUR_DIRECTION);
1255}
1256
Logan Chien55afb0a2018-10-15 10:42:14 +08001257static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001258_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1259 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1260 (__v4sf) __B,
1261 (__v4sf) _mm_setzero_ps (),
1262 (__mmask8) __U,
1263 _MM_FROUND_CUR_DIRECTION);
1264}
1265
Logan Chien55afb0a2018-10-15 10:42:14 +08001266#define _mm_min_round_ss(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001267 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1268 (__v4sf)(__m128)(B), \
1269 (__v4sf)_mm_setzero_ps(), \
1270 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001271
Logan Chien55afb0a2018-10-15 10:42:14 +08001272#define _mm_mask_min_round_ss(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001273 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1274 (__v4sf)(__m128)(B), \
1275 (__v4sf)(__m128)(W), (__mmask8)(U), \
1276 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001277
Logan Chien55afb0a2018-10-15 10:42:14 +08001278#define _mm_maskz_min_round_ss(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001279 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1280 (__v4sf)(__m128)(B), \
1281 (__v4sf)_mm_setzero_ps(), \
1282 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001283
Logan Chien55afb0a2018-10-15 10:42:14 +08001284static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001285_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1286 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1287 (__v2df) __B,
1288 (__v2df) __W,
1289 (__mmask8) __U,
1290 _MM_FROUND_CUR_DIRECTION);
1291}
1292
Logan Chien55afb0a2018-10-15 10:42:14 +08001293static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001294_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1295 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1296 (__v2df) __B,
1297 (__v2df) _mm_setzero_pd (),
1298 (__mmask8) __U,
1299 _MM_FROUND_CUR_DIRECTION);
1300}
1301
Logan Chien55afb0a2018-10-15 10:42:14 +08001302#define _mm_min_round_sd(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001303 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1304 (__v2df)(__m128d)(B), \
1305 (__v2df)_mm_setzero_pd(), \
1306 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001307
Logan Chien55afb0a2018-10-15 10:42:14 +08001308#define _mm_mask_min_round_sd(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001309 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1310 (__v2df)(__m128d)(B), \
1311 (__v2df)(__m128d)(W), \
1312 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001313
Logan Chien55afb0a2018-10-15 10:42:14 +08001314#define _mm_maskz_min_round_sd(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001315 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1316 (__v2df)(__m128d)(B), \
1317 (__v2df)_mm_setzero_pd(), \
1318 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001319
1320static __inline __m512i
Logan Chien55afb0a2018-10-15 10:42:14 +08001321__DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001322_mm512_min_epi32(__m512i __A, __m512i __B)
1323{
Logan Chien55afb0a2018-10-15 10:42:14 +08001324 return (__m512i)__builtin_ia32_pminsd512((__v16si)__A, (__v16si)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08001325}
1326
Logan Chien55afb0a2018-10-15 10:42:14 +08001327static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001328_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1329{
Logan Chien55afb0a2018-10-15 10:42:14 +08001330 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1331 (__v16si)_mm512_min_epi32(__A, __B),
1332 (__v16si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001333}
1334
Logan Chien55afb0a2018-10-15 10:42:14 +08001335static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001336_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1337{
Logan Chien55afb0a2018-10-15 10:42:14 +08001338 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1339 (__v16si)_mm512_min_epi32(__A, __B),
1340 (__v16si)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001341}
1342
Logan Chien55afb0a2018-10-15 10:42:14 +08001343static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001344_mm512_min_epu32(__m512i __A, __m512i __B)
1345{
Logan Chien55afb0a2018-10-15 10:42:14 +08001346 return (__m512i)__builtin_ia32_pminud512((__v16si)__A, (__v16si)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08001347}
1348
Logan Chien55afb0a2018-10-15 10:42:14 +08001349static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001350_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1351{
Logan Chien55afb0a2018-10-15 10:42:14 +08001352 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1353 (__v16si)_mm512_min_epu32(__A, __B),
1354 (__v16si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001355}
1356
Logan Chien55afb0a2018-10-15 10:42:14 +08001357static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001358_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1359{
Logan Chien55afb0a2018-10-15 10:42:14 +08001360 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1361 (__v16si)_mm512_min_epu32(__A, __B),
1362 (__v16si)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001363}
1364
Logan Chien55afb0a2018-10-15 10:42:14 +08001365static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001366_mm512_min_epi64(__m512i __A, __m512i __B)
1367{
Logan Chien55afb0a2018-10-15 10:42:14 +08001368 return (__m512i)__builtin_ia32_pminsq512((__v8di)__A, (__v8di)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08001369}
1370
Logan Chien55afb0a2018-10-15 10:42:14 +08001371static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001372_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1373{
Logan Chien55afb0a2018-10-15 10:42:14 +08001374 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1375 (__v8di)_mm512_min_epi64(__A, __B),
1376 (__v8di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001377}
1378
Logan Chien55afb0a2018-10-15 10:42:14 +08001379static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001380_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1381{
Logan Chien55afb0a2018-10-15 10:42:14 +08001382 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1383 (__v8di)_mm512_min_epi64(__A, __B),
1384 (__v8di)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001385}
1386
Logan Chien55afb0a2018-10-15 10:42:14 +08001387static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001388_mm512_min_epu64(__m512i __A, __m512i __B)
1389{
Logan Chien55afb0a2018-10-15 10:42:14 +08001390 return (__m512i)__builtin_ia32_pminuq512((__v8di)__A, (__v8di)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08001391}
1392
Logan Chien55afb0a2018-10-15 10:42:14 +08001393static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001394_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1395{
Logan Chien55afb0a2018-10-15 10:42:14 +08001396 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1397 (__v8di)_mm512_min_epu64(__A, __B),
1398 (__v8di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001399}
1400
Logan Chien55afb0a2018-10-15 10:42:14 +08001401static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001402_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1403{
Logan Chien55afb0a2018-10-15 10:42:14 +08001404 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1405 (__v8di)_mm512_min_epu64(__A, __B),
1406 (__v8di)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001407}
1408
Logan Chien55afb0a2018-10-15 10:42:14 +08001409static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001410_mm512_mul_epi32(__m512i __X, __m512i __Y)
1411{
Logan Chien55afb0a2018-10-15 10:42:14 +08001412 return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
Logan Chien2833ffb2018-10-09 10:03:24 +08001413}
1414
Logan Chien55afb0a2018-10-15 10:42:14 +08001415static __inline __m512i __DEFAULT_FN_ATTRS512
1416_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08001417{
Logan Chien55afb0a2018-10-15 10:42:14 +08001418 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1419 (__v8di)_mm512_mul_epi32(__X, __Y),
1420 (__v8di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001421}
1422
Logan Chien55afb0a2018-10-15 10:42:14 +08001423static __inline __m512i __DEFAULT_FN_ATTRS512
1424_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08001425{
Logan Chien55afb0a2018-10-15 10:42:14 +08001426 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1427 (__v8di)_mm512_mul_epi32(__X, __Y),
1428 (__v8di)_mm512_setzero_si512 ());
Logan Chien2833ffb2018-10-09 10:03:24 +08001429}
1430
Logan Chien55afb0a2018-10-15 10:42:14 +08001431static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001432_mm512_mul_epu32(__m512i __X, __m512i __Y)
1433{
Logan Chien55afb0a2018-10-15 10:42:14 +08001434 return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
Logan Chien2833ffb2018-10-09 10:03:24 +08001435}
1436
Logan Chien55afb0a2018-10-15 10:42:14 +08001437static __inline __m512i __DEFAULT_FN_ATTRS512
1438_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08001439{
Logan Chien55afb0a2018-10-15 10:42:14 +08001440 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1441 (__v8di)_mm512_mul_epu32(__X, __Y),
1442 (__v8di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001443}
1444
Logan Chien55afb0a2018-10-15 10:42:14 +08001445static __inline __m512i __DEFAULT_FN_ATTRS512
1446_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08001447{
Logan Chien55afb0a2018-10-15 10:42:14 +08001448 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1449 (__v8di)_mm512_mul_epu32(__X, __Y),
1450 (__v8di)_mm512_setzero_si512 ());
Logan Chien2833ffb2018-10-09 10:03:24 +08001451}
1452
Logan Chien55afb0a2018-10-15 10:42:14 +08001453static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001454_mm512_mullo_epi32 (__m512i __A, __m512i __B)
1455{
1456 return (__m512i) ((__v16su) __A * (__v16su) __B);
1457}
1458
Logan Chien55afb0a2018-10-15 10:42:14 +08001459static __inline __m512i __DEFAULT_FN_ATTRS512
1460_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001461{
Logan Chien55afb0a2018-10-15 10:42:14 +08001462 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1463 (__v16si)_mm512_mullo_epi32(__A, __B),
1464 (__v16si)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001465}
1466
Logan Chien55afb0a2018-10-15 10:42:14 +08001467static __inline __m512i __DEFAULT_FN_ATTRS512
1468_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001469{
Logan Chien55afb0a2018-10-15 10:42:14 +08001470 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1471 (__v16si)_mm512_mullo_epi32(__A, __B),
1472 (__v16si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001473}
1474
Logan Chien55afb0a2018-10-15 10:42:14 +08001475static __inline__ __m512i __DEFAULT_FN_ATTRS512
1476_mm512_mullox_epi64 (__m512i __A, __m512i __B) {
1477 return (__m512i) ((__v8du) __A * (__v8du) __B);
Logan Chien2833ffb2018-10-09 10:03:24 +08001478}
1479
Logan Chien55afb0a2018-10-15 10:42:14 +08001480static __inline__ __m512i __DEFAULT_FN_ATTRS512
1481_mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
1482 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1483 (__v8di)_mm512_mullox_epi64(__A, __B),
1484 (__v8di)__W);
1485}
1486
1487#define _mm512_sqrt_round_pd(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001488 ((__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R)))
Logan Chien55afb0a2018-10-15 10:42:14 +08001489
1490#define _mm512_mask_sqrt_round_pd(W, U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001491 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08001492 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001493 (__v8df)(__m512d)(W)))
Logan Chien55afb0a2018-10-15 10:42:14 +08001494
1495#define _mm512_maskz_sqrt_round_pd(U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001496 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08001497 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001498 (__v8df)_mm512_setzero_pd()))
Logan Chien55afb0a2018-10-15 10:42:14 +08001499
1500static __inline__ __m512d __DEFAULT_FN_ATTRS512
1501_mm512_sqrt_pd(__m512d __A)
1502{
1503 return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A,
1504 _MM_FROUND_CUR_DIRECTION);
1505}
1506
1507static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001508_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
1509{
Logan Chien55afb0a2018-10-15 10:42:14 +08001510 return (__m512d)__builtin_ia32_selectpd_512(__U,
1511 (__v8df)_mm512_sqrt_pd(__A),
1512 (__v8df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001513}
1514
Logan Chien55afb0a2018-10-15 10:42:14 +08001515static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001516_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
1517{
Logan Chien55afb0a2018-10-15 10:42:14 +08001518 return (__m512d)__builtin_ia32_selectpd_512(__U,
1519 (__v8df)_mm512_sqrt_pd(__A),
1520 (__v8df)_mm512_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08001521}
1522
Logan Chien55afb0a2018-10-15 10:42:14 +08001523#define _mm512_sqrt_round_ps(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001524 ((__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001525
Logan Chien55afb0a2018-10-15 10:42:14 +08001526#define _mm512_mask_sqrt_round_ps(W, U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001527 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08001528 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001529 (__v16sf)(__m512)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001530
Logan Chien55afb0a2018-10-15 10:42:14 +08001531#define _mm512_maskz_sqrt_round_ps(U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001532 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08001533 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001534 (__v16sf)_mm512_setzero_ps()))
Logan Chien2833ffb2018-10-09 10:03:24 +08001535
Logan Chien55afb0a2018-10-15 10:42:14 +08001536static __inline__ __m512 __DEFAULT_FN_ATTRS512
1537_mm512_sqrt_ps(__m512 __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08001538{
Logan Chien55afb0a2018-10-15 10:42:14 +08001539 return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A,
1540 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08001541}
1542
Logan Chien55afb0a2018-10-15 10:42:14 +08001543static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001544_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
1545{
Logan Chien55afb0a2018-10-15 10:42:14 +08001546 return (__m512)__builtin_ia32_selectps_512(__U,
1547 (__v16sf)_mm512_sqrt_ps(__A),
1548 (__v16sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001549}
1550
Logan Chien55afb0a2018-10-15 10:42:14 +08001551static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001552_mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
1553{
Logan Chien55afb0a2018-10-15 10:42:14 +08001554 return (__m512)__builtin_ia32_selectps_512(__U,
1555 (__v16sf)_mm512_sqrt_ps(__A),
1556 (__v16sf)_mm512_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08001557}
1558
Logan Chien55afb0a2018-10-15 10:42:14 +08001559static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001560_mm512_rsqrt14_pd(__m512d __A)
1561{
1562 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1563 (__v8df)
1564 _mm512_setzero_pd (),
1565 (__mmask8) -1);}
1566
Logan Chien55afb0a2018-10-15 10:42:14 +08001567static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001568_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1569{
1570 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1571 (__v8df) __W,
1572 (__mmask8) __U);
1573}
1574
Logan Chien55afb0a2018-10-15 10:42:14 +08001575static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001576_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1577{
1578 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1579 (__v8df)
1580 _mm512_setzero_pd (),
1581 (__mmask8) __U);
1582}
1583
Logan Chien55afb0a2018-10-15 10:42:14 +08001584static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001585_mm512_rsqrt14_ps(__m512 __A)
1586{
1587 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1588 (__v16sf)
1589 _mm512_setzero_ps (),
1590 (__mmask16) -1);
1591}
1592
Logan Chien55afb0a2018-10-15 10:42:14 +08001593static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001594_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1595{
1596 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1597 (__v16sf) __W,
1598 (__mmask16) __U);
1599}
1600
Logan Chien55afb0a2018-10-15 10:42:14 +08001601static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001602_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1603{
1604 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1605 (__v16sf)
1606 _mm512_setzero_ps (),
1607 (__mmask16) __U);
1608}
1609
Logan Chien55afb0a2018-10-15 10:42:14 +08001610static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001611_mm_rsqrt14_ss(__m128 __A, __m128 __B)
1612{
1613 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1614 (__v4sf) __B,
1615 (__v4sf)
1616 _mm_setzero_ps (),
1617 (__mmask8) -1);
1618}
1619
Logan Chien55afb0a2018-10-15 10:42:14 +08001620static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001621_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1622{
1623 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1624 (__v4sf) __B,
1625 (__v4sf) __W,
1626 (__mmask8) __U);
1627}
1628
Logan Chien55afb0a2018-10-15 10:42:14 +08001629static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001630_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1631{
1632 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1633 (__v4sf) __B,
1634 (__v4sf) _mm_setzero_ps (),
1635 (__mmask8) __U);
1636}
1637
Logan Chien55afb0a2018-10-15 10:42:14 +08001638static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001639_mm_rsqrt14_sd(__m128d __A, __m128d __B)
1640{
1641 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1642 (__v2df) __B,
1643 (__v2df)
1644 _mm_setzero_pd (),
1645 (__mmask8) -1);
1646}
1647
Logan Chien55afb0a2018-10-15 10:42:14 +08001648static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001649_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1650{
1651 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1652 (__v2df) __B,
1653 (__v2df) __W,
1654 (__mmask8) __U);
1655}
1656
Logan Chien55afb0a2018-10-15 10:42:14 +08001657static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001658_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1659{
1660 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1661 (__v2df) __B,
1662 (__v2df) _mm_setzero_pd (),
1663 (__mmask8) __U);
1664}
1665
Logan Chien55afb0a2018-10-15 10:42:14 +08001666static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001667_mm512_rcp14_pd(__m512d __A)
1668{
1669 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1670 (__v8df)
1671 _mm512_setzero_pd (),
1672 (__mmask8) -1);
1673}
1674
Logan Chien55afb0a2018-10-15 10:42:14 +08001675static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001676_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1677{
1678 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1679 (__v8df) __W,
1680 (__mmask8) __U);
1681}
1682
Logan Chien55afb0a2018-10-15 10:42:14 +08001683static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001684_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1685{
1686 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1687 (__v8df)
1688 _mm512_setzero_pd (),
1689 (__mmask8) __U);
1690}
1691
Logan Chien55afb0a2018-10-15 10:42:14 +08001692static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001693_mm512_rcp14_ps(__m512 __A)
1694{
1695 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1696 (__v16sf)
1697 _mm512_setzero_ps (),
1698 (__mmask16) -1);
1699}
1700
Logan Chien55afb0a2018-10-15 10:42:14 +08001701static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001702_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1703{
1704 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1705 (__v16sf) __W,
1706 (__mmask16) __U);
1707}
1708
Logan Chien55afb0a2018-10-15 10:42:14 +08001709static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001710_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1711{
1712 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1713 (__v16sf)
1714 _mm512_setzero_ps (),
1715 (__mmask16) __U);
1716}
1717
Logan Chien55afb0a2018-10-15 10:42:14 +08001718static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001719_mm_rcp14_ss(__m128 __A, __m128 __B)
1720{
1721 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1722 (__v4sf) __B,
1723 (__v4sf)
1724 _mm_setzero_ps (),
1725 (__mmask8) -1);
1726}
1727
Logan Chien55afb0a2018-10-15 10:42:14 +08001728static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001729_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1730{
1731 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1732 (__v4sf) __B,
1733 (__v4sf) __W,
1734 (__mmask8) __U);
1735}
1736
Logan Chien55afb0a2018-10-15 10:42:14 +08001737static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001738_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1739{
1740 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1741 (__v4sf) __B,
1742 (__v4sf) _mm_setzero_ps (),
1743 (__mmask8) __U);
1744}
1745
Logan Chien55afb0a2018-10-15 10:42:14 +08001746static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001747_mm_rcp14_sd(__m128d __A, __m128d __B)
1748{
1749 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1750 (__v2df) __B,
1751 (__v2df)
1752 _mm_setzero_pd (),
1753 (__mmask8) -1);
1754}
1755
Logan Chien55afb0a2018-10-15 10:42:14 +08001756static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001757_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1758{
1759 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1760 (__v2df) __B,
1761 (__v2df) __W,
1762 (__mmask8) __U);
1763}
1764
Logan Chien55afb0a2018-10-15 10:42:14 +08001765static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001766_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1767{
1768 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1769 (__v2df) __B,
1770 (__v2df) _mm_setzero_pd (),
1771 (__mmask8) __U);
1772}
1773
Logan Chien55afb0a2018-10-15 10:42:14 +08001774static __inline __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001775_mm512_floor_ps(__m512 __A)
1776{
1777 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1778 _MM_FROUND_FLOOR,
1779 (__v16sf) __A, -1,
1780 _MM_FROUND_CUR_DIRECTION);
1781}
1782
Logan Chien55afb0a2018-10-15 10:42:14 +08001783static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001784_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1785{
1786 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1787 _MM_FROUND_FLOOR,
1788 (__v16sf) __W, __U,
1789 _MM_FROUND_CUR_DIRECTION);
1790}
1791
Logan Chien55afb0a2018-10-15 10:42:14 +08001792static __inline __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001793_mm512_floor_pd(__m512d __A)
1794{
1795 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1796 _MM_FROUND_FLOOR,
1797 (__v8df) __A, -1,
1798 _MM_FROUND_CUR_DIRECTION);
1799}
1800
Logan Chien55afb0a2018-10-15 10:42:14 +08001801static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001802_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1803{
1804 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1805 _MM_FROUND_FLOOR,
1806 (__v8df) __W, __U,
1807 _MM_FROUND_CUR_DIRECTION);
1808}
1809
Logan Chien55afb0a2018-10-15 10:42:14 +08001810static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001811_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1812{
1813 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1814 _MM_FROUND_CEIL,
1815 (__v16sf) __W, __U,
1816 _MM_FROUND_CUR_DIRECTION);
1817}
1818
Logan Chien55afb0a2018-10-15 10:42:14 +08001819static __inline __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001820_mm512_ceil_ps(__m512 __A)
1821{
1822 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1823 _MM_FROUND_CEIL,
1824 (__v16sf) __A, -1,
1825 _MM_FROUND_CUR_DIRECTION);
1826}
1827
Logan Chien55afb0a2018-10-15 10:42:14 +08001828static __inline __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001829_mm512_ceil_pd(__m512d __A)
1830{
1831 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1832 _MM_FROUND_CEIL,
1833 (__v8df) __A, -1,
1834 _MM_FROUND_CUR_DIRECTION);
1835}
1836
Logan Chien55afb0a2018-10-15 10:42:14 +08001837static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001838_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1839{
1840 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1841 _MM_FROUND_CEIL,
1842 (__v8df) __W, __U,
1843 _MM_FROUND_CUR_DIRECTION);
1844}
1845
Logan Chien55afb0a2018-10-15 10:42:14 +08001846static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001847_mm512_abs_epi64(__m512i __A)
1848{
Logan Chien55afb0a2018-10-15 10:42:14 +08001849 return (__m512i)__builtin_ia32_pabsq512((__v8di)__A);
Logan Chien2833ffb2018-10-09 10:03:24 +08001850}
1851
Logan Chien55afb0a2018-10-15 10:42:14 +08001852static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001853_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
1854{
Logan Chien55afb0a2018-10-15 10:42:14 +08001855 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1856 (__v8di)_mm512_abs_epi64(__A),
1857 (__v8di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001858}
1859
Logan Chien55afb0a2018-10-15 10:42:14 +08001860static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001861_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
1862{
Logan Chien55afb0a2018-10-15 10:42:14 +08001863 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1864 (__v8di)_mm512_abs_epi64(__A),
1865 (__v8di)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001866}
1867
Logan Chien55afb0a2018-10-15 10:42:14 +08001868static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001869_mm512_abs_epi32(__m512i __A)
1870{
Logan Chien55afb0a2018-10-15 10:42:14 +08001871 return (__m512i)__builtin_ia32_pabsd512((__v16si) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +08001872}
1873
Logan Chien55afb0a2018-10-15 10:42:14 +08001874static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001875_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
1876{
Logan Chien55afb0a2018-10-15 10:42:14 +08001877 return (__m512i)__builtin_ia32_selectd_512(__U,
1878 (__v16si)_mm512_abs_epi32(__A),
1879 (__v16si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001880}
1881
Logan Chien55afb0a2018-10-15 10:42:14 +08001882static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001883_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
1884{
Logan Chien55afb0a2018-10-15 10:42:14 +08001885 return (__m512i)__builtin_ia32_selectd_512(__U,
1886 (__v16si)_mm512_abs_epi32(__A),
1887 (__v16si)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001888}
1889
Logan Chien55afb0a2018-10-15 10:42:14 +08001890static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001891_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08001892 __A = _mm_add_ss(__A, __B);
1893 return __builtin_ia32_selectss_128(__U, __A, __W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001894}
1895
Logan Chien55afb0a2018-10-15 10:42:14 +08001896static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001897_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08001898 __A = _mm_add_ss(__A, __B);
1899 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08001900}
1901
Logan Chien55afb0a2018-10-15 10:42:14 +08001902#define _mm_add_round_ss(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001903 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1904 (__v4sf)(__m128)(B), \
1905 (__v4sf)_mm_setzero_ps(), \
1906 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001907
Logan Chien55afb0a2018-10-15 10:42:14 +08001908#define _mm_mask_add_round_ss(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001909 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1910 (__v4sf)(__m128)(B), \
1911 (__v4sf)(__m128)(W), (__mmask8)(U), \
1912 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001913
Logan Chien55afb0a2018-10-15 10:42:14 +08001914#define _mm_maskz_add_round_ss(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001915 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1916 (__v4sf)(__m128)(B), \
1917 (__v4sf)_mm_setzero_ps(), \
1918 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001919
Logan Chien55afb0a2018-10-15 10:42:14 +08001920static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001921_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08001922 __A = _mm_add_sd(__A, __B);
1923 return __builtin_ia32_selectsd_128(__U, __A, __W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001924}
1925
Logan Chien55afb0a2018-10-15 10:42:14 +08001926static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001927_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08001928 __A = _mm_add_sd(__A, __B);
1929 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08001930}
Logan Chien55afb0a2018-10-15 10:42:14 +08001931#define _mm_add_round_sd(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001932 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1933 (__v2df)(__m128d)(B), \
1934 (__v2df)_mm_setzero_pd(), \
1935 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001936
Logan Chien55afb0a2018-10-15 10:42:14 +08001937#define _mm_mask_add_round_sd(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001938 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1939 (__v2df)(__m128d)(B), \
1940 (__v2df)(__m128d)(W), \
1941 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001942
Logan Chien55afb0a2018-10-15 10:42:14 +08001943#define _mm_maskz_add_round_sd(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001944 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1945 (__v2df)(__m128d)(B), \
1946 (__v2df)_mm_setzero_pd(), \
1947 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001948
Logan Chien55afb0a2018-10-15 10:42:14 +08001949static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001950_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08001951 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1952 (__v8df)_mm512_add_pd(__A, __B),
1953 (__v8df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001954}
1955
Logan Chien55afb0a2018-10-15 10:42:14 +08001956static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001957_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08001958 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1959 (__v8df)_mm512_add_pd(__A, __B),
1960 (__v8df)_mm512_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08001961}
1962
Logan Chien55afb0a2018-10-15 10:42:14 +08001963static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001964_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08001965 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1966 (__v16sf)_mm512_add_ps(__A, __B),
1967 (__v16sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001968}
1969
Logan Chien55afb0a2018-10-15 10:42:14 +08001970static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001971_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08001972 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1973 (__v16sf)_mm512_add_ps(__A, __B),
1974 (__v16sf)_mm512_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08001975}
1976
Logan Chien55afb0a2018-10-15 10:42:14 +08001977#define _mm512_add_round_pd(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001978 ((__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
1979 (__v8df)(__m512d)(B), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001980
Logan Chien55afb0a2018-10-15 10:42:14 +08001981#define _mm512_mask_add_round_pd(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001982 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08001983 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001984 (__v8df)(__m512d)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001985
Logan Chien55afb0a2018-10-15 10:42:14 +08001986#define _mm512_maskz_add_round_pd(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001987 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08001988 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001989 (__v8df)_mm512_setzero_pd()))
Logan Chien2833ffb2018-10-09 10:03:24 +08001990
Logan Chien55afb0a2018-10-15 10:42:14 +08001991#define _mm512_add_round_ps(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001992 ((__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
1993 (__v16sf)(__m512)(B), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001994
Logan Chien55afb0a2018-10-15 10:42:14 +08001995#define _mm512_mask_add_round_ps(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001996 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08001997 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001998 (__v16sf)(__m512)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001999
Logan Chien55afb0a2018-10-15 10:42:14 +08002000#define _mm512_maskz_add_round_ps(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002001 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08002002 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002003 (__v16sf)_mm512_setzero_ps()))
Logan Chien2833ffb2018-10-09 10:03:24 +08002004
Logan Chien55afb0a2018-10-15 10:42:14 +08002005static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002006_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002007 __A = _mm_sub_ss(__A, __B);
2008 return __builtin_ia32_selectss_128(__U, __A, __W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002009}
2010
Logan Chien55afb0a2018-10-15 10:42:14 +08002011static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002012_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002013 __A = _mm_sub_ss(__A, __B);
2014 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08002015}
Logan Chien55afb0a2018-10-15 10:42:14 +08002016#define _mm_sub_round_ss(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002017 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2018 (__v4sf)(__m128)(B), \
2019 (__v4sf)_mm_setzero_ps(), \
2020 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002021
Logan Chien55afb0a2018-10-15 10:42:14 +08002022#define _mm_mask_sub_round_ss(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002023 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2024 (__v4sf)(__m128)(B), \
2025 (__v4sf)(__m128)(W), (__mmask8)(U), \
2026 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002027
Logan Chien55afb0a2018-10-15 10:42:14 +08002028#define _mm_maskz_sub_round_ss(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002029 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2030 (__v4sf)(__m128)(B), \
2031 (__v4sf)_mm_setzero_ps(), \
2032 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002033
Logan Chien55afb0a2018-10-15 10:42:14 +08002034static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002035_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002036 __A = _mm_sub_sd(__A, __B);
2037 return __builtin_ia32_selectsd_128(__U, __A, __W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002038}
2039
Logan Chien55afb0a2018-10-15 10:42:14 +08002040static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002041_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002042 __A = _mm_sub_sd(__A, __B);
2043 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08002044}
2045
Logan Chien55afb0a2018-10-15 10:42:14 +08002046#define _mm_sub_round_sd(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002047 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2048 (__v2df)(__m128d)(B), \
2049 (__v2df)_mm_setzero_pd(), \
2050 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002051
Logan Chien55afb0a2018-10-15 10:42:14 +08002052#define _mm_mask_sub_round_sd(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002053 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2054 (__v2df)(__m128d)(B), \
2055 (__v2df)(__m128d)(W), \
2056 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002057
Logan Chien55afb0a2018-10-15 10:42:14 +08002058#define _mm_maskz_sub_round_sd(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002059 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2060 (__v2df)(__m128d)(B), \
2061 (__v2df)_mm_setzero_pd(), \
2062 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002063
Logan Chien55afb0a2018-10-15 10:42:14 +08002064static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002065_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002066 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2067 (__v8df)_mm512_sub_pd(__A, __B),
2068 (__v8df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002069}
2070
Logan Chien55afb0a2018-10-15 10:42:14 +08002071static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002072_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002073 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2074 (__v8df)_mm512_sub_pd(__A, __B),
2075 (__v8df)_mm512_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08002076}
2077
Logan Chien55afb0a2018-10-15 10:42:14 +08002078static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002079_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002080 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2081 (__v16sf)_mm512_sub_ps(__A, __B),
2082 (__v16sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002083}
2084
Logan Chien55afb0a2018-10-15 10:42:14 +08002085static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002086_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002087 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2088 (__v16sf)_mm512_sub_ps(__A, __B),
2089 (__v16sf)_mm512_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08002090}
2091
Logan Chien55afb0a2018-10-15 10:42:14 +08002092#define _mm512_sub_round_pd(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002093 ((__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
2094 (__v8df)(__m512d)(B), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002095
Logan Chien55afb0a2018-10-15 10:42:14 +08002096#define _mm512_mask_sub_round_pd(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002097 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08002098 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002099 (__v8df)(__m512d)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002100
Logan Chien55afb0a2018-10-15 10:42:14 +08002101#define _mm512_maskz_sub_round_pd(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002102 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08002103 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002104 (__v8df)_mm512_setzero_pd()))
Logan Chien2833ffb2018-10-09 10:03:24 +08002105
Logan Chien55afb0a2018-10-15 10:42:14 +08002106#define _mm512_sub_round_ps(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002107 ((__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
2108 (__v16sf)(__m512)(B), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002109
Logan Chien55afb0a2018-10-15 10:42:14 +08002110#define _mm512_mask_sub_round_ps(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002111 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08002112 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002113 (__v16sf)(__m512)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002114
Logan Chien55afb0a2018-10-15 10:42:14 +08002115#define _mm512_maskz_sub_round_ps(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002116 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08002117 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002118 (__v16sf)_mm512_setzero_ps()))
Logan Chien2833ffb2018-10-09 10:03:24 +08002119
Logan Chien55afb0a2018-10-15 10:42:14 +08002120static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002121_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002122 __A = _mm_mul_ss(__A, __B);
2123 return __builtin_ia32_selectss_128(__U, __A, __W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002124}
2125
Logan Chien55afb0a2018-10-15 10:42:14 +08002126static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002127_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002128 __A = _mm_mul_ss(__A, __B);
2129 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08002130}
Logan Chien55afb0a2018-10-15 10:42:14 +08002131#define _mm_mul_round_ss(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002132 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2133 (__v4sf)(__m128)(B), \
2134 (__v4sf)_mm_setzero_ps(), \
2135 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002136
Logan Chien55afb0a2018-10-15 10:42:14 +08002137#define _mm_mask_mul_round_ss(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002138 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2139 (__v4sf)(__m128)(B), \
2140 (__v4sf)(__m128)(W), (__mmask8)(U), \
2141 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002142
Logan Chien55afb0a2018-10-15 10:42:14 +08002143#define _mm_maskz_mul_round_ss(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002144 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2145 (__v4sf)(__m128)(B), \
2146 (__v4sf)_mm_setzero_ps(), \
2147 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002148
Logan Chien55afb0a2018-10-15 10:42:14 +08002149static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002150_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002151 __A = _mm_mul_sd(__A, __B);
2152 return __builtin_ia32_selectsd_128(__U, __A, __W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002153}
2154
Logan Chien55afb0a2018-10-15 10:42:14 +08002155static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002156_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002157 __A = _mm_mul_sd(__A, __B);
2158 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08002159}
2160
Logan Chien55afb0a2018-10-15 10:42:14 +08002161#define _mm_mul_round_sd(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002162 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2163 (__v2df)(__m128d)(B), \
2164 (__v2df)_mm_setzero_pd(), \
2165 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002166
Logan Chien55afb0a2018-10-15 10:42:14 +08002167#define _mm_mask_mul_round_sd(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002168 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2169 (__v2df)(__m128d)(B), \
2170 (__v2df)(__m128d)(W), \
2171 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002172
Logan Chien55afb0a2018-10-15 10:42:14 +08002173#define _mm_maskz_mul_round_sd(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002174 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2175 (__v2df)(__m128d)(B), \
2176 (__v2df)_mm_setzero_pd(), \
2177 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002178
Logan Chien55afb0a2018-10-15 10:42:14 +08002179static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002180_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002181 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2182 (__v8df)_mm512_mul_pd(__A, __B),
2183 (__v8df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002184}
2185
Logan Chien55afb0a2018-10-15 10:42:14 +08002186static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002187_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002188 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2189 (__v8df)_mm512_mul_pd(__A, __B),
2190 (__v8df)_mm512_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08002191}
2192
Logan Chien55afb0a2018-10-15 10:42:14 +08002193static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002194_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002195 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2196 (__v16sf)_mm512_mul_ps(__A, __B),
2197 (__v16sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002198}
2199
Logan Chien55afb0a2018-10-15 10:42:14 +08002200static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002201_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002202 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2203 (__v16sf)_mm512_mul_ps(__A, __B),
2204 (__v16sf)_mm512_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08002205}
2206
Logan Chien55afb0a2018-10-15 10:42:14 +08002207#define _mm512_mul_round_pd(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002208 ((__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
2209 (__v8df)(__m512d)(B), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002210
Logan Chien55afb0a2018-10-15 10:42:14 +08002211#define _mm512_mask_mul_round_pd(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002212 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08002213 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002214 (__v8df)(__m512d)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002215
Logan Chien55afb0a2018-10-15 10:42:14 +08002216#define _mm512_maskz_mul_round_pd(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002217 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08002218 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002219 (__v8df)_mm512_setzero_pd()))
Logan Chien2833ffb2018-10-09 10:03:24 +08002220
Logan Chien55afb0a2018-10-15 10:42:14 +08002221#define _mm512_mul_round_ps(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002222 ((__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
2223 (__v16sf)(__m512)(B), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002224
Logan Chien55afb0a2018-10-15 10:42:14 +08002225#define _mm512_mask_mul_round_ps(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002226 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08002227 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002228 (__v16sf)(__m512)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002229
Logan Chien55afb0a2018-10-15 10:42:14 +08002230#define _mm512_maskz_mul_round_ps(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002231 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08002232 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002233 (__v16sf)_mm512_setzero_ps()))
Logan Chien2833ffb2018-10-09 10:03:24 +08002234
Logan Chien55afb0a2018-10-15 10:42:14 +08002235static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002236_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002237 __A = _mm_div_ss(__A, __B);
2238 return __builtin_ia32_selectss_128(__U, __A, __W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002239}
2240
Logan Chien55afb0a2018-10-15 10:42:14 +08002241static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002242_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002243 __A = _mm_div_ss(__A, __B);
2244 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08002245}
2246
Logan Chien55afb0a2018-10-15 10:42:14 +08002247#define _mm_div_round_ss(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002248 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2249 (__v4sf)(__m128)(B), \
2250 (__v4sf)_mm_setzero_ps(), \
2251 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002252
Logan Chien55afb0a2018-10-15 10:42:14 +08002253#define _mm_mask_div_round_ss(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002254 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2255 (__v4sf)(__m128)(B), \
2256 (__v4sf)(__m128)(W), (__mmask8)(U), \
2257 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002258
Logan Chien55afb0a2018-10-15 10:42:14 +08002259#define _mm_maskz_div_round_ss(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002260 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2261 (__v4sf)(__m128)(B), \
2262 (__v4sf)_mm_setzero_ps(), \
2263 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002264
Logan Chien55afb0a2018-10-15 10:42:14 +08002265static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002266_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002267 __A = _mm_div_sd(__A, __B);
2268 return __builtin_ia32_selectsd_128(__U, __A, __W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002269}
2270
Logan Chien55afb0a2018-10-15 10:42:14 +08002271static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002272_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002273 __A = _mm_div_sd(__A, __B);
2274 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08002275}
2276
Logan Chien55afb0a2018-10-15 10:42:14 +08002277#define _mm_div_round_sd(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002278 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2279 (__v2df)(__m128d)(B), \
2280 (__v2df)_mm_setzero_pd(), \
2281 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002282
Logan Chien55afb0a2018-10-15 10:42:14 +08002283#define _mm_mask_div_round_sd(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002284 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2285 (__v2df)(__m128d)(B), \
2286 (__v2df)(__m128d)(W), \
2287 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002288
Logan Chien55afb0a2018-10-15 10:42:14 +08002289#define _mm_maskz_div_round_sd(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002290 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2291 (__v2df)(__m128d)(B), \
2292 (__v2df)_mm_setzero_pd(), \
2293 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002294
Logan Chien55afb0a2018-10-15 10:42:14 +08002295static __inline __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002296_mm512_div_pd(__m512d __a, __m512d __b)
2297{
2298 return (__m512d)((__v8df)__a/(__v8df)__b);
2299}
2300
Logan Chien55afb0a2018-10-15 10:42:14 +08002301static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002302_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002303 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2304 (__v8df)_mm512_div_pd(__A, __B),
2305 (__v8df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002306}
2307
Logan Chien55afb0a2018-10-15 10:42:14 +08002308static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002309_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002310 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2311 (__v8df)_mm512_div_pd(__A, __B),
2312 (__v8df)_mm512_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08002313}
2314
Logan Chien55afb0a2018-10-15 10:42:14 +08002315static __inline __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002316_mm512_div_ps(__m512 __a, __m512 __b)
2317{
2318 return (__m512)((__v16sf)__a/(__v16sf)__b);
2319}
2320
Logan Chien55afb0a2018-10-15 10:42:14 +08002321static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002322_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002323 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2324 (__v16sf)_mm512_div_ps(__A, __B),
2325 (__v16sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002326}
2327
Logan Chien55afb0a2018-10-15 10:42:14 +08002328static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002329_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002330 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2331 (__v16sf)_mm512_div_ps(__A, __B),
2332 (__v16sf)_mm512_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08002333}
2334
Logan Chien55afb0a2018-10-15 10:42:14 +08002335#define _mm512_div_round_pd(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002336 ((__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
2337 (__v8df)(__m512d)(B), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002338
Logan Chien55afb0a2018-10-15 10:42:14 +08002339#define _mm512_mask_div_round_pd(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002340 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08002341 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002342 (__v8df)(__m512d)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002343
Logan Chien55afb0a2018-10-15 10:42:14 +08002344#define _mm512_maskz_div_round_pd(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002345 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08002346 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002347 (__v8df)_mm512_setzero_pd()))
Logan Chien2833ffb2018-10-09 10:03:24 +08002348
Logan Chien55afb0a2018-10-15 10:42:14 +08002349#define _mm512_div_round_ps(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002350 ((__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
2351 (__v16sf)(__m512)(B), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002352
Logan Chien55afb0a2018-10-15 10:42:14 +08002353#define _mm512_mask_div_round_ps(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002354 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08002355 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002356 (__v16sf)(__m512)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002357
Logan Chien55afb0a2018-10-15 10:42:14 +08002358#define _mm512_maskz_div_round_ps(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002359 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08002360 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002361 (__v16sf)_mm512_setzero_ps()))
Logan Chien2833ffb2018-10-09 10:03:24 +08002362
Logan Chien55afb0a2018-10-15 10:42:14 +08002363#define _mm512_roundscale_ps(A, B) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002364 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2365 (__v16sf)_mm512_undefined_ps(), \
2366 (__mmask16)-1, \
2367 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08002368
Logan Chien55afb0a2018-10-15 10:42:14 +08002369#define _mm512_mask_roundscale_ps(A, B, C, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002370 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
Logan Chien2833ffb2018-10-09 10:03:24 +08002371 (__v16sf)(__m512)(A), (__mmask16)(B), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002372 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08002373
Logan Chien55afb0a2018-10-15 10:42:14 +08002374#define _mm512_maskz_roundscale_ps(A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002375 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2376 (__v16sf)_mm512_setzero_ps(), \
2377 (__mmask16)(A), \
2378 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08002379
Logan Chien55afb0a2018-10-15 10:42:14 +08002380#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002381 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
Logan Chien2833ffb2018-10-09 10:03:24 +08002382 (__v16sf)(__m512)(A), (__mmask16)(B), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002383 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002384
Logan Chien55afb0a2018-10-15 10:42:14 +08002385#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002386 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2387 (__v16sf)_mm512_setzero_ps(), \
2388 (__mmask16)(A), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002389
Logan Chien55afb0a2018-10-15 10:42:14 +08002390#define _mm512_roundscale_round_ps(A, imm, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002391 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2392 (__v16sf)_mm512_undefined_ps(), \
2393 (__mmask16)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002394
Logan Chien55afb0a2018-10-15 10:42:14 +08002395#define _mm512_roundscale_pd(A, B) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002396 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2397 (__v8df)_mm512_undefined_pd(), \
2398 (__mmask8)-1, \
2399 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08002400
Logan Chien55afb0a2018-10-15 10:42:14 +08002401#define _mm512_mask_roundscale_pd(A, B, C, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002402 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
Logan Chien2833ffb2018-10-09 10:03:24 +08002403 (__v8df)(__m512d)(A), (__mmask8)(B), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002404 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08002405
Logan Chien55afb0a2018-10-15 10:42:14 +08002406#define _mm512_maskz_roundscale_pd(A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002407 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2408 (__v8df)_mm512_setzero_pd(), \
2409 (__mmask8)(A), \
2410 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08002411
Logan Chien55afb0a2018-10-15 10:42:14 +08002412#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002413 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
Logan Chien2833ffb2018-10-09 10:03:24 +08002414 (__v8df)(__m512d)(A), (__mmask8)(B), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002415 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002416
Logan Chien55afb0a2018-10-15 10:42:14 +08002417#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002418 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2419 (__v8df)_mm512_setzero_pd(), \
2420 (__mmask8)(A), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002421
Logan Chien55afb0a2018-10-15 10:42:14 +08002422#define _mm512_roundscale_round_pd(A, imm, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002423 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2424 (__v8df)_mm512_undefined_pd(), \
2425 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002426
Logan Chien55afb0a2018-10-15 10:42:14 +08002427#define _mm512_fmadd_round_pd(A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002428 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2429 (__v8df)(__m512d)(B), \
2430 (__v8df)(__m512d)(C), \
2431 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002432
2433
Logan Chien55afb0a2018-10-15 10:42:14 +08002434#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002435 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2436 (__v8df)(__m512d)(B), \
2437 (__v8df)(__m512d)(C), \
2438 (__mmask8)(U), (int)(R)))
Logan Chien55afb0a2018-10-15 10:42:14 +08002439
2440
2441#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002442 ((__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2443 (__v8df)(__m512d)(B), \
2444 (__v8df)(__m512d)(C), \
2445 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002446
2447
Logan Chien55afb0a2018-10-15 10:42:14 +08002448#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002449 ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2450 (__v8df)(__m512d)(B), \
2451 (__v8df)(__m512d)(C), \
2452 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002453
2454
Logan Chien55afb0a2018-10-15 10:42:14 +08002455#define _mm512_fmsub_round_pd(A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002456 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2457 (__v8df)(__m512d)(B), \
2458 -(__v8df)(__m512d)(C), \
2459 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002460
2461
Logan Chien55afb0a2018-10-15 10:42:14 +08002462#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002463 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2464 (__v8df)(__m512d)(B), \
2465 -(__v8df)(__m512d)(C), \
2466 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002467
2468
Logan Chien55afb0a2018-10-15 10:42:14 +08002469#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002470 ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2471 (__v8df)(__m512d)(B), \
2472 -(__v8df)(__m512d)(C), \
2473 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002474
2475
Logan Chien55afb0a2018-10-15 10:42:14 +08002476#define _mm512_fnmadd_round_pd(A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002477 ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2478 (__v8df)(__m512d)(B), \
2479 (__v8df)(__m512d)(C), \
2480 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002481
2482
Logan Chien55afb0a2018-10-15 10:42:14 +08002483#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002484 ((__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2485 (__v8df)(__m512d)(B), \
2486 (__v8df)(__m512d)(C), \
2487 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002488
2489
Logan Chien55afb0a2018-10-15 10:42:14 +08002490#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002491 ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2492 (__v8df)(__m512d)(B), \
2493 (__v8df)(__m512d)(C), \
2494 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002495
2496
Logan Chien55afb0a2018-10-15 10:42:14 +08002497#define _mm512_fnmsub_round_pd(A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002498 ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2499 (__v8df)(__m512d)(B), \
2500 -(__v8df)(__m512d)(C), \
2501 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002502
2503
Logan Chien55afb0a2018-10-15 10:42:14 +08002504#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002505 ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2506 (__v8df)(__m512d)(B), \
2507 -(__v8df)(__m512d)(C), \
2508 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002509
2510
Logan Chien55afb0a2018-10-15 10:42:14 +08002511static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002512_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2513{
2514 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2515 (__v8df) __B,
2516 (__v8df) __C,
2517 (__mmask8) -1,
2518 _MM_FROUND_CUR_DIRECTION);
2519}
2520
Logan Chien55afb0a2018-10-15 10:42:14 +08002521static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002522_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2523{
2524 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2525 (__v8df) __B,
2526 (__v8df) __C,
2527 (__mmask8) __U,
2528 _MM_FROUND_CUR_DIRECTION);
2529}
2530
Logan Chien55afb0a2018-10-15 10:42:14 +08002531static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002532_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2533{
2534 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2535 (__v8df) __B,
2536 (__v8df) __C,
2537 (__mmask8) __U,
2538 _MM_FROUND_CUR_DIRECTION);
2539}
2540
Logan Chien55afb0a2018-10-15 10:42:14 +08002541static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002542_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2543{
2544 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2545 (__v8df) __B,
2546 (__v8df) __C,
2547 (__mmask8) __U,
2548 _MM_FROUND_CUR_DIRECTION);
2549}
2550
Logan Chien55afb0a2018-10-15 10:42:14 +08002551static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002552_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2553{
2554 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2555 (__v8df) __B,
2556 -(__v8df) __C,
2557 (__mmask8) -1,
2558 _MM_FROUND_CUR_DIRECTION);
2559}
2560
Logan Chien55afb0a2018-10-15 10:42:14 +08002561static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002562_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2563{
2564 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2565 (__v8df) __B,
2566 -(__v8df) __C,
2567 (__mmask8) __U,
2568 _MM_FROUND_CUR_DIRECTION);
2569}
2570
Logan Chien55afb0a2018-10-15 10:42:14 +08002571static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002572_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2573{
2574 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2575 (__v8df) __B,
2576 -(__v8df) __C,
2577 (__mmask8) __U,
2578 _MM_FROUND_CUR_DIRECTION);
2579}
2580
Logan Chien55afb0a2018-10-15 10:42:14 +08002581static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002582_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2583{
Logan Chien55afb0a2018-10-15 10:42:14 +08002584 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2585 -(__v8df) __B,
Logan Chien2833ffb2018-10-09 10:03:24 +08002586 (__v8df) __C,
2587 (__mmask8) -1,
2588 _MM_FROUND_CUR_DIRECTION);
2589}
2590
Logan Chien55afb0a2018-10-15 10:42:14 +08002591static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002592_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2593{
2594 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
2595 (__v8df) __B,
2596 (__v8df) __C,
2597 (__mmask8) __U,
2598 _MM_FROUND_CUR_DIRECTION);
2599}
2600
Logan Chien55afb0a2018-10-15 10:42:14 +08002601static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002602_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2603{
2604 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2605 (__v8df) __B,
2606 (__v8df) __C,
2607 (__mmask8) __U,
2608 _MM_FROUND_CUR_DIRECTION);
2609}
2610
Logan Chien55afb0a2018-10-15 10:42:14 +08002611static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002612_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2613{
Logan Chien55afb0a2018-10-15 10:42:14 +08002614 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2615 -(__v8df) __B,
Logan Chien2833ffb2018-10-09 10:03:24 +08002616 -(__v8df) __C,
2617 (__mmask8) -1,
2618 _MM_FROUND_CUR_DIRECTION);
2619}
2620
Logan Chien55afb0a2018-10-15 10:42:14 +08002621static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002622_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2623{
2624 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2625 (__v8df) __B,
2626 -(__v8df) __C,
2627 (__mmask8) __U,
2628 _MM_FROUND_CUR_DIRECTION);
2629}
2630
Logan Chien55afb0a2018-10-15 10:42:14 +08002631#define _mm512_fmadd_round_ps(A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002632 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2633 (__v16sf)(__m512)(B), \
2634 (__v16sf)(__m512)(C), \
2635 (__mmask16)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002636
2637
Logan Chien55afb0a2018-10-15 10:42:14 +08002638#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002639 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2640 (__v16sf)(__m512)(B), \
2641 (__v16sf)(__m512)(C), \
2642 (__mmask16)(U), (int)(R)))
Logan Chien55afb0a2018-10-15 10:42:14 +08002643
2644
2645#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002646 ((__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2647 (__v16sf)(__m512)(B), \
2648 (__v16sf)(__m512)(C), \
2649 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002650
2651
Logan Chien55afb0a2018-10-15 10:42:14 +08002652#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002653 ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2654 (__v16sf)(__m512)(B), \
2655 (__v16sf)(__m512)(C), \
2656 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002657
2658
Logan Chien55afb0a2018-10-15 10:42:14 +08002659#define _mm512_fmsub_round_ps(A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002660 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2661 (__v16sf)(__m512)(B), \
2662 -(__v16sf)(__m512)(C), \
2663 (__mmask16)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002664
2665
Logan Chien55afb0a2018-10-15 10:42:14 +08002666#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002667 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2668 (__v16sf)(__m512)(B), \
2669 -(__v16sf)(__m512)(C), \
2670 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002671
2672
Logan Chien55afb0a2018-10-15 10:42:14 +08002673#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002674 ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2675 (__v16sf)(__m512)(B), \
2676 -(__v16sf)(__m512)(C), \
2677 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002678
2679
Logan Chien55afb0a2018-10-15 10:42:14 +08002680#define _mm512_fnmadd_round_ps(A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002681 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2682 -(__v16sf)(__m512)(B), \
2683 (__v16sf)(__m512)(C), \
2684 (__mmask16)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002685
2686
Logan Chien55afb0a2018-10-15 10:42:14 +08002687#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002688 ((__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2689 (__v16sf)(__m512)(B), \
2690 (__v16sf)(__m512)(C), \
2691 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002692
2693
Logan Chien55afb0a2018-10-15 10:42:14 +08002694#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002695 ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2696 (__v16sf)(__m512)(B), \
2697 (__v16sf)(__m512)(C), \
2698 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002699
2700
Logan Chien55afb0a2018-10-15 10:42:14 +08002701#define _mm512_fnmsub_round_ps(A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002702 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2703 -(__v16sf)(__m512)(B), \
2704 -(__v16sf)(__m512)(C), \
2705 (__mmask16)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002706
2707
Logan Chien55afb0a2018-10-15 10:42:14 +08002708#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002709 ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2710 (__v16sf)(__m512)(B), \
2711 -(__v16sf)(__m512)(C), \
2712 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002713
2714
Logan Chien55afb0a2018-10-15 10:42:14 +08002715static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002716_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2717{
2718 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2719 (__v16sf) __B,
2720 (__v16sf) __C,
2721 (__mmask16) -1,
2722 _MM_FROUND_CUR_DIRECTION);
2723}
2724
Logan Chien55afb0a2018-10-15 10:42:14 +08002725static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002726_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2727{
2728 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2729 (__v16sf) __B,
2730 (__v16sf) __C,
2731 (__mmask16) __U,
2732 _MM_FROUND_CUR_DIRECTION);
2733}
2734
Logan Chien55afb0a2018-10-15 10:42:14 +08002735static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002736_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2737{
2738 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2739 (__v16sf) __B,
2740 (__v16sf) __C,
2741 (__mmask16) __U,
2742 _MM_FROUND_CUR_DIRECTION);
2743}
2744
Logan Chien55afb0a2018-10-15 10:42:14 +08002745static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002746_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2747{
2748 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2749 (__v16sf) __B,
2750 (__v16sf) __C,
2751 (__mmask16) __U,
2752 _MM_FROUND_CUR_DIRECTION);
2753}
2754
Logan Chien55afb0a2018-10-15 10:42:14 +08002755static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002756_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2757{
2758 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2759 (__v16sf) __B,
2760 -(__v16sf) __C,
2761 (__mmask16) -1,
2762 _MM_FROUND_CUR_DIRECTION);
2763}
2764
Logan Chien55afb0a2018-10-15 10:42:14 +08002765static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002766_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2767{
2768 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2769 (__v16sf) __B,
2770 -(__v16sf) __C,
2771 (__mmask16) __U,
2772 _MM_FROUND_CUR_DIRECTION);
2773}
2774
Logan Chien55afb0a2018-10-15 10:42:14 +08002775static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002776_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2777{
2778 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2779 (__v16sf) __B,
2780 -(__v16sf) __C,
2781 (__mmask16) __U,
2782 _MM_FROUND_CUR_DIRECTION);
2783}
2784
Logan Chien55afb0a2018-10-15 10:42:14 +08002785static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002786_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2787{
Logan Chien55afb0a2018-10-15 10:42:14 +08002788 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2789 -(__v16sf) __B,
Logan Chien2833ffb2018-10-09 10:03:24 +08002790 (__v16sf) __C,
2791 (__mmask16) -1,
2792 _MM_FROUND_CUR_DIRECTION);
2793}
2794
Logan Chien55afb0a2018-10-15 10:42:14 +08002795static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002796_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2797{
2798 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
2799 (__v16sf) __B,
2800 (__v16sf) __C,
2801 (__mmask16) __U,
2802 _MM_FROUND_CUR_DIRECTION);
2803}
2804
Logan Chien55afb0a2018-10-15 10:42:14 +08002805static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002806_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2807{
2808 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2809 (__v16sf) __B,
2810 (__v16sf) __C,
2811 (__mmask16) __U,
2812 _MM_FROUND_CUR_DIRECTION);
2813}
2814
Logan Chien55afb0a2018-10-15 10:42:14 +08002815static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002816_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2817{
Logan Chien55afb0a2018-10-15 10:42:14 +08002818 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2819 -(__v16sf) __B,
Logan Chien2833ffb2018-10-09 10:03:24 +08002820 -(__v16sf) __C,
2821 (__mmask16) -1,
2822 _MM_FROUND_CUR_DIRECTION);
2823}
2824
Logan Chien55afb0a2018-10-15 10:42:14 +08002825static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002826_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2827{
2828 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2829 (__v16sf) __B,
2830 -(__v16sf) __C,
2831 (__mmask16) __U,
2832 _MM_FROUND_CUR_DIRECTION);
2833}
2834
Logan Chien55afb0a2018-10-15 10:42:14 +08002835#define _mm512_fmaddsub_round_pd(A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002836 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2837 (__v8df)(__m512d)(B), \
2838 (__v8df)(__m512d)(C), \
2839 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002840
2841
Logan Chien55afb0a2018-10-15 10:42:14 +08002842#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002843 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2844 (__v8df)(__m512d)(B), \
2845 (__v8df)(__m512d)(C), \
2846 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002847
2848
Logan Chien55afb0a2018-10-15 10:42:14 +08002849#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002850 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
2851 (__v8df)(__m512d)(B), \
2852 (__v8df)(__m512d)(C), \
2853 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002854
2855
Logan Chien55afb0a2018-10-15 10:42:14 +08002856#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002857 ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2858 (__v8df)(__m512d)(B), \
2859 (__v8df)(__m512d)(C), \
2860 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002861
2862
Logan Chien55afb0a2018-10-15 10:42:14 +08002863#define _mm512_fmsubadd_round_pd(A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002864 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2865 (__v8df)(__m512d)(B), \
2866 -(__v8df)(__m512d)(C), \
2867 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002868
2869
Logan Chien55afb0a2018-10-15 10:42:14 +08002870#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002871 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2872 (__v8df)(__m512d)(B), \
2873 -(__v8df)(__m512d)(C), \
2874 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002875
2876
Logan Chien55afb0a2018-10-15 10:42:14 +08002877#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002878 ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2879 (__v8df)(__m512d)(B), \
2880 -(__v8df)(__m512d)(C), \
2881 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002882
2883
Logan Chien55afb0a2018-10-15 10:42:14 +08002884static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002885_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
2886{
2887 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
Logan Chien55afb0a2018-10-15 10:42:14 +08002888 (__v8df) __B,
2889 (__v8df) __C,
2890 (__mmask8) -1,
2891 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08002892}
2893
Logan Chien55afb0a2018-10-15 10:42:14 +08002894static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002895_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2896{
2897 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
Logan Chien55afb0a2018-10-15 10:42:14 +08002898 (__v8df) __B,
2899 (__v8df) __C,
2900 (__mmask8) __U,
2901 _MM_FROUND_CUR_DIRECTION);
2902}
2903
2904static __inline__ __m512d __DEFAULT_FN_ATTRS512
2905_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2906{
2907 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
Logan Chien2833ffb2018-10-09 10:03:24 +08002908 (__v8df) __B,
2909 (__v8df) __C,
2910 (__mmask8) __U,
2911 _MM_FROUND_CUR_DIRECTION);
2912}
2913
Logan Chien55afb0a2018-10-15 10:42:14 +08002914static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002915_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2916{
2917 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
Logan Chien55afb0a2018-10-15 10:42:14 +08002918 (__v8df) __B,
2919 (__v8df) __C,
2920 (__mmask8) __U,
2921 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08002922}
2923
Logan Chien55afb0a2018-10-15 10:42:14 +08002924static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002925_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
2926{
2927 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2928 (__v8df) __B,
2929 -(__v8df) __C,
2930 (__mmask8) -1,
2931 _MM_FROUND_CUR_DIRECTION);
2932}
2933
Logan Chien55afb0a2018-10-15 10:42:14 +08002934static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002935_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2936{
2937 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2938 (__v8df) __B,
2939 -(__v8df) __C,
2940 (__mmask8) __U,
2941 _MM_FROUND_CUR_DIRECTION);
2942}
2943
Logan Chien55afb0a2018-10-15 10:42:14 +08002944static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002945_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2946{
2947 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2948 (__v8df) __B,
2949 -(__v8df) __C,
2950 (__mmask8) __U,
2951 _MM_FROUND_CUR_DIRECTION);
2952}
2953
Logan Chien55afb0a2018-10-15 10:42:14 +08002954#define _mm512_fmaddsub_round_ps(A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002955 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2956 (__v16sf)(__m512)(B), \
2957 (__v16sf)(__m512)(C), \
2958 (__mmask16)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002959
2960
Logan Chien55afb0a2018-10-15 10:42:14 +08002961#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002962 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2963 (__v16sf)(__m512)(B), \
2964 (__v16sf)(__m512)(C), \
2965 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002966
2967
Logan Chien55afb0a2018-10-15 10:42:14 +08002968#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002969 ((__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
2970 (__v16sf)(__m512)(B), \
2971 (__v16sf)(__m512)(C), \
2972 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002973
2974
Logan Chien55afb0a2018-10-15 10:42:14 +08002975#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002976 ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2977 (__v16sf)(__m512)(B), \
2978 (__v16sf)(__m512)(C), \
2979 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002980
2981
Logan Chien55afb0a2018-10-15 10:42:14 +08002982#define _mm512_fmsubadd_round_ps(A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002983 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2984 (__v16sf)(__m512)(B), \
2985 -(__v16sf)(__m512)(C), \
2986 (__mmask16)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002987
2988
Logan Chien55afb0a2018-10-15 10:42:14 +08002989#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002990 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2991 (__v16sf)(__m512)(B), \
2992 -(__v16sf)(__m512)(C), \
2993 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002994
2995
Logan Chien55afb0a2018-10-15 10:42:14 +08002996#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002997 ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2998 (__v16sf)(__m512)(B), \
2999 -(__v16sf)(__m512)(C), \
3000 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003001
3002
Logan Chien55afb0a2018-10-15 10:42:14 +08003003static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003004_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
3005{
3006 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3007 (__v16sf) __B,
3008 (__v16sf) __C,
3009 (__mmask16) -1,
3010 _MM_FROUND_CUR_DIRECTION);
3011}
3012
Logan Chien55afb0a2018-10-15 10:42:14 +08003013static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003014_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3015{
3016 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3017 (__v16sf) __B,
3018 (__v16sf) __C,
3019 (__mmask16) __U,
3020 _MM_FROUND_CUR_DIRECTION);
3021}
3022
Logan Chien55afb0a2018-10-15 10:42:14 +08003023static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003024_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3025{
3026 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3027 (__v16sf) __B,
3028 (__v16sf) __C,
3029 (__mmask16) __U,
3030 _MM_FROUND_CUR_DIRECTION);
3031}
3032
Logan Chien55afb0a2018-10-15 10:42:14 +08003033static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003034_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3035{
3036 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3037 (__v16sf) __B,
3038 (__v16sf) __C,
3039 (__mmask16) __U,
3040 _MM_FROUND_CUR_DIRECTION);
3041}
3042
Logan Chien55afb0a2018-10-15 10:42:14 +08003043static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003044_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
3045{
3046 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3047 (__v16sf) __B,
3048 -(__v16sf) __C,
3049 (__mmask16) -1,
3050 _MM_FROUND_CUR_DIRECTION);
3051}
3052
Logan Chien55afb0a2018-10-15 10:42:14 +08003053static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003054_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3055{
3056 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3057 (__v16sf) __B,
3058 -(__v16sf) __C,
3059 (__mmask16) __U,
3060 _MM_FROUND_CUR_DIRECTION);
3061}
3062
Logan Chien55afb0a2018-10-15 10:42:14 +08003063static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003064_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3065{
3066 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3067 (__v16sf) __B,
3068 -(__v16sf) __C,
3069 (__mmask16) __U,
3070 _MM_FROUND_CUR_DIRECTION);
3071}
3072
Logan Chien55afb0a2018-10-15 10:42:14 +08003073#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003074 ((__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
3075 (__v8df)(__m512d)(B), \
3076 (__v8df)(__m512d)(C), \
3077 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003078
3079
Logan Chien55afb0a2018-10-15 10:42:14 +08003080static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003081_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3082{
Logan Chien55afb0a2018-10-15 10:42:14 +08003083 return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3084 (__v8df) __B,
3085 (__v8df) __C,
3086 (__mmask8) __U,
3087 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08003088}
3089
Logan Chien55afb0a2018-10-15 10:42:14 +08003090#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003091 ((__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
3092 (__v16sf)(__m512)(B), \
3093 (__v16sf)(__m512)(C), \
3094 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003095
Logan Chien55afb0a2018-10-15 10:42:14 +08003096static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003097_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3098{
Logan Chien55afb0a2018-10-15 10:42:14 +08003099 return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3100 (__v16sf) __B,
3101 (__v16sf) __C,
3102 (__mmask16) __U,
3103 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08003104}
3105
Logan Chien55afb0a2018-10-15 10:42:14 +08003106#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003107 ((__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
3108 (__v8df)(__m512d)(B), \
3109 (__v8df)(__m512d)(C), \
3110 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003111
3112
Logan Chien55afb0a2018-10-15 10:42:14 +08003113static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003114_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3115{
Logan Chien55afb0a2018-10-15 10:42:14 +08003116 return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3117 (__v8df) __B,
3118 (__v8df) __C,
3119 (__mmask8) __U,
Logan Chien2833ffb2018-10-09 10:03:24 +08003120 _MM_FROUND_CUR_DIRECTION);
3121}
3122
Logan Chien55afb0a2018-10-15 10:42:14 +08003123#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003124 ((__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
3125 (__v16sf)(__m512)(B), \
3126 (__v16sf)(__m512)(C), \
3127 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003128
3129
Logan Chien55afb0a2018-10-15 10:42:14 +08003130static __inline__ __m512 __DEFAULT_FN_ATTRS512
3131_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
Logan Chien2833ffb2018-10-09 10:03:24 +08003132{
Logan Chien55afb0a2018-10-15 10:42:14 +08003133 return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3134 (__v16sf) __B,
3135 (__v16sf) __C,
3136 (__mmask16) __U,
Logan Chien2833ffb2018-10-09 10:03:24 +08003137 _MM_FROUND_CUR_DIRECTION);
3138}
3139
Logan Chien55afb0a2018-10-15 10:42:14 +08003140#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003141 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3142 -(__v8df)(__m512d)(B), \
3143 (__v8df)(__m512d)(C), \
3144 (__mmask8)(U), (int)(R)))
Logan Chien55afb0a2018-10-15 10:42:14 +08003145
3146
3147static __inline__ __m512d __DEFAULT_FN_ATTRS512
3148_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3149{
3150 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3151 -(__v8df) __B,
3152 (__v8df) __C,
3153 (__mmask8) __U,
3154 _MM_FROUND_CUR_DIRECTION);
3155}
3156
3157#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003158 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3159 -(__v16sf)(__m512)(B), \
3160 (__v16sf)(__m512)(C), \
3161 (__mmask16)(U), (int)(R)))
Logan Chien55afb0a2018-10-15 10:42:14 +08003162
3163
3164static __inline__ __m512 __DEFAULT_FN_ATTRS512
3165_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3166{
3167 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3168 -(__v16sf) __B,
3169 (__v16sf) __C,
3170 (__mmask16) __U,
3171 _MM_FROUND_CUR_DIRECTION);
3172}
3173
3174#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003175 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3176 -(__v8df)(__m512d)(B), \
3177 -(__v8df)(__m512d)(C), \
3178 (__mmask8)(U), (int)(R)))
Logan Chien55afb0a2018-10-15 10:42:14 +08003179
3180
3181#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003182 ((__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \
3183 (__v8df)(__m512d)(B), \
3184 (__v8df)(__m512d)(C), \
3185 (__mmask8)(U), (int)(R)))
Logan Chien55afb0a2018-10-15 10:42:14 +08003186
3187
3188static __inline__ __m512d __DEFAULT_FN_ATTRS512
3189_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3190{
3191 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3192 -(__v8df) __B,
3193 -(__v8df) __C,
3194 (__mmask8) __U,
3195 _MM_FROUND_CUR_DIRECTION);
3196}
3197
3198static __inline__ __m512d __DEFAULT_FN_ATTRS512
3199_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3200{
3201 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A,
3202 (__v8df) __B,
3203 (__v8df) __C,
3204 (__mmask8) __U,
3205 _MM_FROUND_CUR_DIRECTION);
3206}
3207
3208#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003209 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3210 -(__v16sf)(__m512)(B), \
3211 -(__v16sf)(__m512)(C), \
3212 (__mmask16)(U), (int)(R)))
Logan Chien55afb0a2018-10-15 10:42:14 +08003213
3214
3215#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003216 ((__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
3217 (__v16sf)(__m512)(B), \
3218 (__v16sf)(__m512)(C), \
3219 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003220
3221
Logan Chien55afb0a2018-10-15 10:42:14 +08003222static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003223_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3224{
Logan Chien55afb0a2018-10-15 10:42:14 +08003225 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3226 -(__v16sf) __B,
3227 -(__v16sf) __C,
3228 (__mmask16) __U,
3229 _MM_FROUND_CUR_DIRECTION);
3230}
3231
3232static __inline__ __m512 __DEFAULT_FN_ATTRS512
3233_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3234{
3235 return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A,
Logan Chien2833ffb2018-10-09 10:03:24 +08003236 (__v16sf) __B,
3237 (__v16sf) __C,
3238 (__mmask16) __U,
3239 _MM_FROUND_CUR_DIRECTION);
3240}
3241
Logan Chien2833ffb2018-10-09 10:03:24 +08003242
3243
3244/* Vector permutations */
3245
Logan Chien55afb0a2018-10-15 10:42:14 +08003246static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003247_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
3248{
Logan Chien55afb0a2018-10-15 10:42:14 +08003249 return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
3250 (__v16si) __B);
Logan Chien2833ffb2018-10-09 10:03:24 +08003251}
3252
Logan Chien55afb0a2018-10-15 10:42:14 +08003253static __inline__ __m512i __DEFAULT_FN_ATTRS512
3254_mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I,
3255 __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08003256{
Logan Chien55afb0a2018-10-15 10:42:14 +08003257 return (__m512i)__builtin_ia32_selectd_512(__U,
3258 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3259 (__v16si)__A);
Logan Chien2833ffb2018-10-09 10:03:24 +08003260}
3261
Logan Chien55afb0a2018-10-15 10:42:14 +08003262static __inline__ __m512i __DEFAULT_FN_ATTRS512
3263_mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U,
Logan Chien2833ffb2018-10-09 10:03:24 +08003264 __m512i __B)
3265{
Logan Chien55afb0a2018-10-15 10:42:14 +08003266 return (__m512i)__builtin_ia32_selectd_512(__U,
3267 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3268 (__v16si)__I);
Logan Chien2833ffb2018-10-09 10:03:24 +08003269}
3270
Logan Chien55afb0a2018-10-15 10:42:14 +08003271static __inline__ __m512i __DEFAULT_FN_ATTRS512
3272_mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I,
3273 __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08003274{
Logan Chien55afb0a2018-10-15 10:42:14 +08003275 return (__m512i)__builtin_ia32_selectd_512(__U,
3276 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3277 (__v16si)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08003278}
3279
Logan Chien55afb0a2018-10-15 10:42:14 +08003280static __inline __m512i __DEFAULT_FN_ATTRS512
3281_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
3282{
3283 return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
3284 (__v8di) __B);
3285}
Logan Chien2833ffb2018-10-09 10:03:24 +08003286
Logan Chien55afb0a2018-10-15 10:42:14 +08003287static __inline__ __m512i __DEFAULT_FN_ATTRS512
3288_mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I,
3289 __m512i __B)
3290{
3291 return (__m512i)__builtin_ia32_selectq_512(__U,
3292 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3293 (__v8di)__A);
3294}
Logan Chien2833ffb2018-10-09 10:03:24 +08003295
Logan Chien55afb0a2018-10-15 10:42:14 +08003296static __inline__ __m512i __DEFAULT_FN_ATTRS512
3297_mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U,
3298 __m512i __B)
3299{
3300 return (__m512i)__builtin_ia32_selectq_512(__U,
3301 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3302 (__v8di)__I);
3303}
Logan Chien2833ffb2018-10-09 10:03:24 +08003304
Logan Chien55afb0a2018-10-15 10:42:14 +08003305static __inline__ __m512i __DEFAULT_FN_ATTRS512
3306_mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
3307 __m512i __B)
3308{
3309 return (__m512i)__builtin_ia32_selectq_512(__U,
3310 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3311 (__v8di)_mm512_setzero_si512());
3312}
Logan Chien2833ffb2018-10-09 10:03:24 +08003313
Logan Chien55afb0a2018-10-15 10:42:14 +08003314#define _mm512_alignr_epi64(A, B, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003315 ((__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
3316 (__v8di)(__m512i)(B), (int)(I)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003317
Logan Chien55afb0a2018-10-15 10:42:14 +08003318#define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003319 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3320 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3321 (__v8di)(__m512i)(W)))
Logan Chien55afb0a2018-10-15 10:42:14 +08003322
3323#define _mm512_maskz_alignr_epi64(U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003324 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3325 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3326 (__v8di)_mm512_setzero_si512()))
Logan Chien55afb0a2018-10-15 10:42:14 +08003327
3328#define _mm512_alignr_epi32(A, B, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003329 ((__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
3330 (__v16si)(__m512i)(B), (int)(I)))
Logan Chien55afb0a2018-10-15 10:42:14 +08003331
3332#define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003333 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3334 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3335 (__v16si)(__m512i)(W)))
Logan Chien55afb0a2018-10-15 10:42:14 +08003336
3337#define _mm512_maskz_alignr_epi32(U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003338 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3339 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3340 (__v16si)_mm512_setzero_si512()))
Logan Chien2833ffb2018-10-09 10:03:24 +08003341/* Vector Extract */
3342
Logan Chien55afb0a2018-10-15 10:42:14 +08003343#define _mm512_extractf64x4_pd(A, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003344 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
3345 (__v4df)_mm256_undefined_pd(), \
3346 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +08003347
Logan Chien55afb0a2018-10-15 10:42:14 +08003348#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003349 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3350 (__v4df)(__m256d)(W), \
3351 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003352
Logan Chien55afb0a2018-10-15 10:42:14 +08003353#define _mm512_maskz_extractf64x4_pd(U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003354 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3355 (__v4df)_mm256_setzero_pd(), \
3356 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003357
Logan Chien55afb0a2018-10-15 10:42:14 +08003358#define _mm512_extractf32x4_ps(A, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003359 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
3360 (__v4sf)_mm_undefined_ps(), \
3361 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +08003362
Logan Chien55afb0a2018-10-15 10:42:14 +08003363#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003364 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3365 (__v4sf)(__m128)(W), \
3366 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003367
Logan Chien55afb0a2018-10-15 10:42:14 +08003368#define _mm512_maskz_extractf32x4_ps(U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003369 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3370 (__v4sf)_mm_setzero_ps(), \
3371 (__mmask8)(U)))
Logan Chien55afb0a2018-10-15 10:42:14 +08003372
Logan Chien2833ffb2018-10-09 10:03:24 +08003373/* Vector Blend */
3374
Logan Chien55afb0a2018-10-15 10:42:14 +08003375static __inline __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003376_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
3377{
3378 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3379 (__v8df) __W,
3380 (__v8df) __A);
3381}
3382
Logan Chien55afb0a2018-10-15 10:42:14 +08003383static __inline __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003384_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
3385{
3386 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3387 (__v16sf) __W,
3388 (__v16sf) __A);
3389}
3390
Logan Chien55afb0a2018-10-15 10:42:14 +08003391static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003392_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
3393{
3394 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3395 (__v8di) __W,
3396 (__v8di) __A);
3397}
3398
Logan Chien55afb0a2018-10-15 10:42:14 +08003399static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003400_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
3401{
3402 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3403 (__v16si) __W,
3404 (__v16si) __A);
3405}
3406
3407/* Compare */
3408
Logan Chien55afb0a2018-10-15 10:42:14 +08003409#define _mm512_cmp_round_ps_mask(A, B, P, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003410 ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3411 (__v16sf)(__m512)(B), (int)(P), \
3412 (__mmask16)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003413
Logan Chien55afb0a2018-10-15 10:42:14 +08003414#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003415 ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3416 (__v16sf)(__m512)(B), (int)(P), \
3417 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003418
3419#define _mm512_cmp_ps_mask(A, B, P) \
3420 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
Logan Chien2833ffb2018-10-09 10:03:24 +08003421#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3422 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3423
Logan Chien55afb0a2018-10-15 10:42:14 +08003424#define _mm512_cmpeq_ps_mask(A, B) \
3425 _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3426#define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3427 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
Logan Chien2833ffb2018-10-09 10:03:24 +08003428
Logan Chien55afb0a2018-10-15 10:42:14 +08003429#define _mm512_cmplt_ps_mask(A, B) \
3430 _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3431#define _mm512_mask_cmplt_ps_mask(k, A, B) \
3432 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3433
3434#define _mm512_cmple_ps_mask(A, B) \
3435 _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3436#define _mm512_mask_cmple_ps_mask(k, A, B) \
3437 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3438
3439#define _mm512_cmpunord_ps_mask(A, B) \
3440 _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3441#define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3442 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3443
3444#define _mm512_cmpneq_ps_mask(A, B) \
3445 _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3446#define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3447 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3448
3449#define _mm512_cmpnlt_ps_mask(A, B) \
3450 _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3451#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3452 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3453
3454#define _mm512_cmpnle_ps_mask(A, B) \
3455 _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3456#define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3457 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3458
3459#define _mm512_cmpord_ps_mask(A, B) \
3460 _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3461#define _mm512_mask_cmpord_ps_mask(k, A, B) \
3462 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3463
3464#define _mm512_cmp_round_pd_mask(A, B, P, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003465 ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3466 (__v8df)(__m512d)(B), (int)(P), \
3467 (__mmask8)-1, (int)(R)))
Logan Chien55afb0a2018-10-15 10:42:14 +08003468
3469#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003470 ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3471 (__v8df)(__m512d)(B), (int)(P), \
3472 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003473
3474#define _mm512_cmp_pd_mask(A, B, P) \
3475 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
Logan Chien2833ffb2018-10-09 10:03:24 +08003476#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3477 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3478
Logan Chien55afb0a2018-10-15 10:42:14 +08003479#define _mm512_cmpeq_pd_mask(A, B) \
3480 _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3481#define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3482 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3483
3484#define _mm512_cmplt_pd_mask(A, B) \
3485 _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3486#define _mm512_mask_cmplt_pd_mask(k, A, B) \
3487 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3488
3489#define _mm512_cmple_pd_mask(A, B) \
3490 _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3491#define _mm512_mask_cmple_pd_mask(k, A, B) \
3492 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3493
3494#define _mm512_cmpunord_pd_mask(A, B) \
3495 _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3496#define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3497 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3498
3499#define _mm512_cmpneq_pd_mask(A, B) \
3500 _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3501#define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3502 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3503
3504#define _mm512_cmpnlt_pd_mask(A, B) \
3505 _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3506#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3507 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3508
3509#define _mm512_cmpnle_pd_mask(A, B) \
3510 _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3511#define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3512 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3513
3514#define _mm512_cmpord_pd_mask(A, B) \
3515 _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3516#define _mm512_mask_cmpord_pd_mask(k, A, B) \
3517 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3518
Logan Chien2833ffb2018-10-09 10:03:24 +08003519/* Conversion */
3520
Logan Chien55afb0a2018-10-15 10:42:14 +08003521#define _mm512_cvtt_roundps_epu32(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003522 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3523 (__v16si)_mm512_undefined_epi32(), \
3524 (__mmask16)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003525
Logan Chien55afb0a2018-10-15 10:42:14 +08003526#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003527 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3528 (__v16si)(__m512i)(W), \
3529 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003530
Logan Chien55afb0a2018-10-15 10:42:14 +08003531#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003532 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3533 (__v16si)_mm512_setzero_si512(), \
3534 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003535
3536
Logan Chien55afb0a2018-10-15 10:42:14 +08003537static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003538_mm512_cvttps_epu32(__m512 __A)
3539{
3540 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3541 (__v16si)
3542 _mm512_setzero_si512 (),
3543 (__mmask16) -1,
3544 _MM_FROUND_CUR_DIRECTION);
3545}
3546
Logan Chien55afb0a2018-10-15 10:42:14 +08003547static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003548_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3549{
3550 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3551 (__v16si) __W,
3552 (__mmask16) __U,
3553 _MM_FROUND_CUR_DIRECTION);
3554}
3555
Logan Chien55afb0a2018-10-15 10:42:14 +08003556static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003557_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
3558{
3559 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3560 (__v16si) _mm512_setzero_si512 (),
3561 (__mmask16) __U,
3562 _MM_FROUND_CUR_DIRECTION);
3563}
3564
Logan Chien55afb0a2018-10-15 10:42:14 +08003565#define _mm512_cvt_roundepi32_ps(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003566 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3567 (__v16sf)_mm512_setzero_ps(), \
3568 (__mmask16)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003569
Logan Chien55afb0a2018-10-15 10:42:14 +08003570#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003571 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3572 (__v16sf)(__m512)(W), \
3573 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003574
Logan Chien55afb0a2018-10-15 10:42:14 +08003575#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003576 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3577 (__v16sf)_mm512_setzero_ps(), \
3578 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003579
Logan Chien55afb0a2018-10-15 10:42:14 +08003580#define _mm512_cvt_roundepu32_ps(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003581 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3582 (__v16sf)_mm512_setzero_ps(), \
3583 (__mmask16)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003584
Logan Chien55afb0a2018-10-15 10:42:14 +08003585#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003586 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3587 (__v16sf)(__m512)(W), \
3588 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003589
Logan Chien55afb0a2018-10-15 10:42:14 +08003590#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003591 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3592 (__v16sf)_mm512_setzero_ps(), \
3593 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003594
Logan Chien55afb0a2018-10-15 10:42:14 +08003595static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003596_mm512_cvtepu32_ps (__m512i __A)
3597{
Logan Chien55afb0a2018-10-15 10:42:14 +08003598 return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
Logan Chien2833ffb2018-10-09 10:03:24 +08003599}
3600
Logan Chien55afb0a2018-10-15 10:42:14 +08003601static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003602_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3603{
Logan Chien55afb0a2018-10-15 10:42:14 +08003604 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3605 (__v16sf)_mm512_cvtepu32_ps(__A),
3606 (__v16sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08003607}
3608
Logan Chien55afb0a2018-10-15 10:42:14 +08003609static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003610_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
3611{
Logan Chien55afb0a2018-10-15 10:42:14 +08003612 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3613 (__v16sf)_mm512_cvtepu32_ps(__A),
3614 (__v16sf)_mm512_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08003615}
3616
Logan Chien55afb0a2018-10-15 10:42:14 +08003617static __inline __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003618_mm512_cvtepi32_pd(__m256i __A)
3619{
Logan Chien55afb0a2018-10-15 10:42:14 +08003620 return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
Logan Chien2833ffb2018-10-09 10:03:24 +08003621}
3622
Logan Chien55afb0a2018-10-15 10:42:14 +08003623static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003624_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3625{
Logan Chien55afb0a2018-10-15 10:42:14 +08003626 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3627 (__v8df)_mm512_cvtepi32_pd(__A),
3628 (__v8df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08003629}
3630
Logan Chien55afb0a2018-10-15 10:42:14 +08003631static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003632_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
3633{
Logan Chien55afb0a2018-10-15 10:42:14 +08003634 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3635 (__v8df)_mm512_cvtepi32_pd(__A),
3636 (__v8df)_mm512_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08003637}
3638
Logan Chien55afb0a2018-10-15 10:42:14 +08003639static __inline__ __m512d __DEFAULT_FN_ATTRS512
3640_mm512_cvtepi32lo_pd(__m512i __A)
3641{
3642 return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3643}
3644
3645static __inline__ __m512d __DEFAULT_FN_ATTRS512
3646_mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3647{
3648 return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
3649}
3650
3651static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003652_mm512_cvtepi32_ps (__m512i __A)
3653{
Logan Chien55afb0a2018-10-15 10:42:14 +08003654 return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
Logan Chien2833ffb2018-10-09 10:03:24 +08003655}
3656
Logan Chien55afb0a2018-10-15 10:42:14 +08003657static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003658_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3659{
Logan Chien55afb0a2018-10-15 10:42:14 +08003660 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3661 (__v16sf)_mm512_cvtepi32_ps(__A),
3662 (__v16sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08003663}
3664
Logan Chien55afb0a2018-10-15 10:42:14 +08003665static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003666_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
3667{
Logan Chien55afb0a2018-10-15 10:42:14 +08003668 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3669 (__v16sf)_mm512_cvtepi32_ps(__A),
3670 (__v16sf)_mm512_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08003671}
3672
Logan Chien55afb0a2018-10-15 10:42:14 +08003673static __inline __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003674_mm512_cvtepu32_pd(__m256i __A)
3675{
Logan Chien55afb0a2018-10-15 10:42:14 +08003676 return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
Logan Chien2833ffb2018-10-09 10:03:24 +08003677}
3678
Logan Chien55afb0a2018-10-15 10:42:14 +08003679static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003680_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3681{
Logan Chien55afb0a2018-10-15 10:42:14 +08003682 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3683 (__v8df)_mm512_cvtepu32_pd(__A),
3684 (__v8df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08003685}
3686
Logan Chien55afb0a2018-10-15 10:42:14 +08003687static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003688_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
3689{
Logan Chien55afb0a2018-10-15 10:42:14 +08003690 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3691 (__v8df)_mm512_cvtepu32_pd(__A),
3692 (__v8df)_mm512_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08003693}
3694
Logan Chien55afb0a2018-10-15 10:42:14 +08003695static __inline__ __m512d __DEFAULT_FN_ATTRS512
3696_mm512_cvtepu32lo_pd(__m512i __A)
3697{
3698 return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3699}
3700
3701static __inline__ __m512d __DEFAULT_FN_ATTRS512
3702_mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3703{
3704 return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
3705}
3706
3707#define _mm512_cvt_roundpd_ps(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003708 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3709 (__v8sf)_mm256_setzero_ps(), \
3710 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003711
Logan Chien55afb0a2018-10-15 10:42:14 +08003712#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003713 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3714 (__v8sf)(__m256)(W), (__mmask8)(U), \
3715 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003716
Logan Chien55afb0a2018-10-15 10:42:14 +08003717#define _mm512_maskz_cvt_roundpd_ps(U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003718 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3719 (__v8sf)_mm256_setzero_ps(), \
3720 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003721
Logan Chien55afb0a2018-10-15 10:42:14 +08003722static __inline__ __m256 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003723_mm512_cvtpd_ps (__m512d __A)
3724{
3725 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3726 (__v8sf) _mm256_undefined_ps (),
3727 (__mmask8) -1,
3728 _MM_FROUND_CUR_DIRECTION);
3729}
3730
Logan Chien55afb0a2018-10-15 10:42:14 +08003731static __inline__ __m256 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003732_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
3733{
3734 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3735 (__v8sf) __W,
3736 (__mmask8) __U,
3737 _MM_FROUND_CUR_DIRECTION);
3738}
3739
Logan Chien55afb0a2018-10-15 10:42:14 +08003740static __inline__ __m256 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003741_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
3742{
3743 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3744 (__v8sf) _mm256_setzero_ps (),
3745 (__mmask8) __U,
3746 _MM_FROUND_CUR_DIRECTION);
3747}
3748
Logan Chien55afb0a2018-10-15 10:42:14 +08003749static __inline__ __m512 __DEFAULT_FN_ATTRS512
3750_mm512_cvtpd_pslo (__m512d __A)
3751{
3752 return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
3753 (__v8sf) _mm256_setzero_ps (),
3754 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3755}
3756
3757static __inline__ __m512 __DEFAULT_FN_ATTRS512
3758_mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
3759{
3760 return (__m512) __builtin_shufflevector (
3761 (__v8sf) _mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W),
3762 __U, __A),
3763 (__v8sf) _mm256_setzero_ps (),
3764 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3765}
3766
3767#define _mm512_cvt_roundps_ph(A, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003768 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3769 (__v16hi)_mm256_undefined_si256(), \
3770 (__mmask16)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +08003771
Logan Chien55afb0a2018-10-15 10:42:14 +08003772#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003773 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3774 (__v16hi)(__m256i)(U), \
3775 (__mmask16)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003776
Logan Chien55afb0a2018-10-15 10:42:14 +08003777#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003778 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3779 (__v16hi)_mm256_setzero_si256(), \
3780 (__mmask16)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003781
Logan Chiendf4f7662019-09-04 16:45:23 -07003782#define _mm512_cvtps_ph _mm512_cvt_roundps_ph
3783#define _mm512_mask_cvtps_ph _mm512_mask_cvt_roundps_ph
3784#define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph
Logan Chien2833ffb2018-10-09 10:03:24 +08003785
Logan Chien55afb0a2018-10-15 10:42:14 +08003786#define _mm512_cvt_roundph_ps(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003787 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3788 (__v16sf)_mm512_undefined_ps(), \
3789 (__mmask16)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003790
Logan Chien55afb0a2018-10-15 10:42:14 +08003791#define _mm512_mask_cvt_roundph_ps(W, U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003792 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3793 (__v16sf)(__m512)(W), \
3794 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003795
Logan Chien55afb0a2018-10-15 10:42:14 +08003796#define _mm512_maskz_cvt_roundph_ps(U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003797 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3798 (__v16sf)_mm512_setzero_ps(), \
3799 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003800
3801
Logan Chien55afb0a2018-10-15 10:42:14 +08003802static __inline __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003803_mm512_cvtph_ps(__m256i __A)
3804{
3805 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3806 (__v16sf)
3807 _mm512_setzero_ps (),
3808 (__mmask16) -1,
3809 _MM_FROUND_CUR_DIRECTION);
3810}
3811
Logan Chien55afb0a2018-10-15 10:42:14 +08003812static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003813_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
3814{
3815 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3816 (__v16sf) __W,
3817 (__mmask16) __U,
3818 _MM_FROUND_CUR_DIRECTION);
3819}
3820
Logan Chien55afb0a2018-10-15 10:42:14 +08003821static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003822_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
3823{
3824 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3825 (__v16sf) _mm512_setzero_ps (),
3826 (__mmask16) __U,
3827 _MM_FROUND_CUR_DIRECTION);
3828}
3829
Logan Chien55afb0a2018-10-15 10:42:14 +08003830#define _mm512_cvtt_roundpd_epi32(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003831 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3832 (__v8si)_mm256_setzero_si256(), \
3833 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003834
Logan Chien55afb0a2018-10-15 10:42:14 +08003835#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003836 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3837 (__v8si)(__m256i)(W), \
3838 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003839
Logan Chien55afb0a2018-10-15 10:42:14 +08003840#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003841 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3842 (__v8si)_mm256_setzero_si256(), \
3843 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003844
Logan Chien55afb0a2018-10-15 10:42:14 +08003845static __inline __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003846_mm512_cvttpd_epi32(__m512d __a)
3847{
3848 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
3849 (__v8si)_mm256_setzero_si256(),
3850 (__mmask8) -1,
3851 _MM_FROUND_CUR_DIRECTION);
3852}
3853
Logan Chien55afb0a2018-10-15 10:42:14 +08003854static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003855_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3856{
3857 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3858 (__v8si) __W,
3859 (__mmask8) __U,
3860 _MM_FROUND_CUR_DIRECTION);
3861}
3862
Logan Chien55afb0a2018-10-15 10:42:14 +08003863static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003864_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
3865{
3866 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3867 (__v8si) _mm256_setzero_si256 (),
3868 (__mmask8) __U,
3869 _MM_FROUND_CUR_DIRECTION);
3870}
3871
Logan Chien55afb0a2018-10-15 10:42:14 +08003872#define _mm512_cvtt_roundps_epi32(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003873 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3874 (__v16si)_mm512_setzero_si512(), \
3875 (__mmask16)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003876
Logan Chien55afb0a2018-10-15 10:42:14 +08003877#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003878 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3879 (__v16si)(__m512i)(W), \
3880 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003881
Logan Chien55afb0a2018-10-15 10:42:14 +08003882#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003883 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3884 (__v16si)_mm512_setzero_si512(), \
3885 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003886
Logan Chien55afb0a2018-10-15 10:42:14 +08003887static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003888_mm512_cvttps_epi32(__m512 __a)
3889{
3890 return (__m512i)
3891 __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
3892 (__v16si) _mm512_setzero_si512 (),
3893 (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
3894}
3895
Logan Chien55afb0a2018-10-15 10:42:14 +08003896static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003897_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3898{
3899 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3900 (__v16si) __W,
3901 (__mmask16) __U,
3902 _MM_FROUND_CUR_DIRECTION);
3903}
3904
Logan Chien55afb0a2018-10-15 10:42:14 +08003905static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003906_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
3907{
3908 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3909 (__v16si) _mm512_setzero_si512 (),
3910 (__mmask16) __U,
3911 _MM_FROUND_CUR_DIRECTION);
3912}
3913
Logan Chien55afb0a2018-10-15 10:42:14 +08003914#define _mm512_cvt_roundps_epi32(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003915 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3916 (__v16si)_mm512_setzero_si512(), \
3917 (__mmask16)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003918
Logan Chien55afb0a2018-10-15 10:42:14 +08003919#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003920 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3921 (__v16si)(__m512i)(W), \
3922 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003923
Logan Chien55afb0a2018-10-15 10:42:14 +08003924#define _mm512_maskz_cvt_roundps_epi32(U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003925 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3926 (__v16si)_mm512_setzero_si512(), \
3927 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003928
Logan Chien55afb0a2018-10-15 10:42:14 +08003929static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003930_mm512_cvtps_epi32 (__m512 __A)
3931{
3932 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3933 (__v16si) _mm512_undefined_epi32 (),
3934 (__mmask16) -1,
3935 _MM_FROUND_CUR_DIRECTION);
3936}
3937
Logan Chien55afb0a2018-10-15 10:42:14 +08003938static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003939_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3940{
3941 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3942 (__v16si) __W,
3943 (__mmask16) __U,
3944 _MM_FROUND_CUR_DIRECTION);
3945}
3946
Logan Chien55afb0a2018-10-15 10:42:14 +08003947static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003948_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
3949{
3950 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3951 (__v16si)
3952 _mm512_setzero_si512 (),
3953 (__mmask16) __U,
3954 _MM_FROUND_CUR_DIRECTION);
3955}
3956
Logan Chien55afb0a2018-10-15 10:42:14 +08003957#define _mm512_cvt_roundpd_epi32(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003958 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3959 (__v8si)_mm256_setzero_si256(), \
3960 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003961
Logan Chien55afb0a2018-10-15 10:42:14 +08003962#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003963 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3964 (__v8si)(__m256i)(W), \
3965 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003966
Logan Chien55afb0a2018-10-15 10:42:14 +08003967#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003968 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3969 (__v8si)_mm256_setzero_si256(), \
3970 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003971
Logan Chien55afb0a2018-10-15 10:42:14 +08003972static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003973_mm512_cvtpd_epi32 (__m512d __A)
3974{
3975 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3976 (__v8si)
3977 _mm256_undefined_si256 (),
3978 (__mmask8) -1,
3979 _MM_FROUND_CUR_DIRECTION);
3980}
3981
Logan Chien55afb0a2018-10-15 10:42:14 +08003982static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003983_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3984{
3985 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3986 (__v8si) __W,
3987 (__mmask8) __U,
3988 _MM_FROUND_CUR_DIRECTION);
3989}
3990
Logan Chien55afb0a2018-10-15 10:42:14 +08003991static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08003992_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
3993{
3994 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3995 (__v8si)
3996 _mm256_setzero_si256 (),
3997 (__mmask8) __U,
3998 _MM_FROUND_CUR_DIRECTION);
3999}
4000
Logan Chien55afb0a2018-10-15 10:42:14 +08004001#define _mm512_cvt_roundps_epu32(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004002 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4003 (__v16si)_mm512_setzero_si512(), \
4004 (__mmask16)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08004005
Logan Chien55afb0a2018-10-15 10:42:14 +08004006#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004007 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4008 (__v16si)(__m512i)(W), \
4009 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08004010
Logan Chien55afb0a2018-10-15 10:42:14 +08004011#define _mm512_maskz_cvt_roundps_epu32(U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004012 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4013 (__v16si)_mm512_setzero_si512(), \
4014 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08004015
Logan Chien55afb0a2018-10-15 10:42:14 +08004016static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004017_mm512_cvtps_epu32 ( __m512 __A)
4018{
4019 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4020 (__v16si)\
Logan Chien55afb0a2018-10-15 10:42:14 +08004021 _mm512_undefined_epi32 (),
Logan Chien2833ffb2018-10-09 10:03:24 +08004022 (__mmask16) -1,\
Logan Chien55afb0a2018-10-15 10:42:14 +08004023 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08004024}
4025
Logan Chien55afb0a2018-10-15 10:42:14 +08004026static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004027_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
4028{
4029 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4030 (__v16si) __W,
4031 (__mmask16) __U,
4032 _MM_FROUND_CUR_DIRECTION);
4033}
4034
Logan Chien55afb0a2018-10-15 10:42:14 +08004035static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004036_mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
4037{
4038 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
Logan Chien55afb0a2018-10-15 10:42:14 +08004039 (__v16si)
Logan Chien2833ffb2018-10-09 10:03:24 +08004040 _mm512_setzero_si512 (),
4041 (__mmask16) __U ,
4042 _MM_FROUND_CUR_DIRECTION);
4043}
4044
Logan Chien55afb0a2018-10-15 10:42:14 +08004045#define _mm512_cvt_roundpd_epu32(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004046 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4047 (__v8si)_mm256_setzero_si256(), \
4048 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08004049
Logan Chien55afb0a2018-10-15 10:42:14 +08004050#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004051 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4052 (__v8si)(__m256i)(W), \
4053 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08004054
Logan Chien55afb0a2018-10-15 10:42:14 +08004055#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004056 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4057 (__v8si)_mm256_setzero_si256(), \
4058 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08004059
Logan Chien55afb0a2018-10-15 10:42:14 +08004060static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004061_mm512_cvtpd_epu32 (__m512d __A)
4062{
4063 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4064 (__v8si)
4065 _mm256_undefined_si256 (),
4066 (__mmask8) -1,
4067 _MM_FROUND_CUR_DIRECTION);
4068}
4069
Logan Chien55afb0a2018-10-15 10:42:14 +08004070static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004071_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
4072{
4073 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4074 (__v8si) __W,
4075 (__mmask8) __U,
4076 _MM_FROUND_CUR_DIRECTION);
4077}
4078
Logan Chien55afb0a2018-10-15 10:42:14 +08004079static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004080_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
4081{
4082 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4083 (__v8si)
4084 _mm256_setzero_si256 (),
4085 (__mmask8) __U,
4086 _MM_FROUND_CUR_DIRECTION);
4087}
4088
Logan Chien55afb0a2018-10-15 10:42:14 +08004089static __inline__ double __DEFAULT_FN_ATTRS512
4090_mm512_cvtsd_f64(__m512d __a)
4091{
4092 return __a[0];
4093}
4094
4095static __inline__ float __DEFAULT_FN_ATTRS512
4096_mm512_cvtss_f32(__m512 __a)
4097{
4098 return __a[0];
4099}
4100
Logan Chien2833ffb2018-10-09 10:03:24 +08004101/* Unpack and Interleave */
4102
Logan Chien55afb0a2018-10-15 10:42:14 +08004103static __inline __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004104_mm512_unpackhi_pd(__m512d __a, __m512d __b)
4105{
4106 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4107 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4108}
4109
Logan Chien55afb0a2018-10-15 10:42:14 +08004110static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004111_mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4112{
4113 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4114 (__v8df)_mm512_unpackhi_pd(__A, __B),
4115 (__v8df)__W);
4116}
4117
Logan Chien55afb0a2018-10-15 10:42:14 +08004118static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004119_mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
4120{
4121 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4122 (__v8df)_mm512_unpackhi_pd(__A, __B),
4123 (__v8df)_mm512_setzero_pd());
4124}
4125
Logan Chien55afb0a2018-10-15 10:42:14 +08004126static __inline __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004127_mm512_unpacklo_pd(__m512d __a, __m512d __b)
4128{
4129 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4130 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4131}
4132
Logan Chien55afb0a2018-10-15 10:42:14 +08004133static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004134_mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4135{
4136 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4137 (__v8df)_mm512_unpacklo_pd(__A, __B),
4138 (__v8df)__W);
4139}
4140
Logan Chien55afb0a2018-10-15 10:42:14 +08004141static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004142_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
4143{
4144 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4145 (__v8df)_mm512_unpacklo_pd(__A, __B),
4146 (__v8df)_mm512_setzero_pd());
4147}
4148
Logan Chien55afb0a2018-10-15 10:42:14 +08004149static __inline __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004150_mm512_unpackhi_ps(__m512 __a, __m512 __b)
4151{
4152 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4153 2, 18, 3, 19,
4154 2+4, 18+4, 3+4, 19+4,
4155 2+8, 18+8, 3+8, 19+8,
4156 2+12, 18+12, 3+12, 19+12);
4157}
4158
Logan Chien55afb0a2018-10-15 10:42:14 +08004159static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004160_mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4161{
4162 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4163 (__v16sf)_mm512_unpackhi_ps(__A, __B),
4164 (__v16sf)__W);
4165}
4166
Logan Chien55afb0a2018-10-15 10:42:14 +08004167static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004168_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
4169{
4170 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4171 (__v16sf)_mm512_unpackhi_ps(__A, __B),
4172 (__v16sf)_mm512_setzero_ps());
4173}
4174
Logan Chien55afb0a2018-10-15 10:42:14 +08004175static __inline __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004176_mm512_unpacklo_ps(__m512 __a, __m512 __b)
4177{
4178 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4179 0, 16, 1, 17,
4180 0+4, 16+4, 1+4, 17+4,
4181 0+8, 16+8, 1+8, 17+8,
4182 0+12, 16+12, 1+12, 17+12);
4183}
4184
Logan Chien55afb0a2018-10-15 10:42:14 +08004185static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004186_mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4187{
4188 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4189 (__v16sf)_mm512_unpacklo_ps(__A, __B),
4190 (__v16sf)__W);
4191}
4192
Logan Chien55afb0a2018-10-15 10:42:14 +08004193static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004194_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
4195{
4196 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4197 (__v16sf)_mm512_unpacklo_ps(__A, __B),
4198 (__v16sf)_mm512_setzero_ps());
4199}
4200
Logan Chien55afb0a2018-10-15 10:42:14 +08004201static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004202_mm512_unpackhi_epi32(__m512i __A, __m512i __B)
4203{
4204 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4205 2, 18, 3, 19,
4206 2+4, 18+4, 3+4, 19+4,
4207 2+8, 18+8, 3+8, 19+8,
4208 2+12, 18+12, 3+12, 19+12);
4209}
4210
Logan Chien55afb0a2018-10-15 10:42:14 +08004211static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004212_mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4213{
4214 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4215 (__v16si)_mm512_unpackhi_epi32(__A, __B),
4216 (__v16si)__W);
4217}
4218
Logan Chien55afb0a2018-10-15 10:42:14 +08004219static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004220_mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4221{
4222 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4223 (__v16si)_mm512_unpackhi_epi32(__A, __B),
4224 (__v16si)_mm512_setzero_si512());
4225}
4226
Logan Chien55afb0a2018-10-15 10:42:14 +08004227static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004228_mm512_unpacklo_epi32(__m512i __A, __m512i __B)
4229{
4230 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4231 0, 16, 1, 17,
4232 0+4, 16+4, 1+4, 17+4,
4233 0+8, 16+8, 1+8, 17+8,
4234 0+12, 16+12, 1+12, 17+12);
4235}
4236
Logan Chien55afb0a2018-10-15 10:42:14 +08004237static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004238_mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4239{
4240 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4241 (__v16si)_mm512_unpacklo_epi32(__A, __B),
4242 (__v16si)__W);
4243}
4244
Logan Chien55afb0a2018-10-15 10:42:14 +08004245static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004246_mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4247{
4248 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4249 (__v16si)_mm512_unpacklo_epi32(__A, __B),
4250 (__v16si)_mm512_setzero_si512());
4251}
4252
Logan Chien55afb0a2018-10-15 10:42:14 +08004253static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004254_mm512_unpackhi_epi64(__m512i __A, __m512i __B)
4255{
4256 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4257 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4258}
4259
Logan Chien55afb0a2018-10-15 10:42:14 +08004260static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004261_mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4262{
4263 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4264 (__v8di)_mm512_unpackhi_epi64(__A, __B),
4265 (__v8di)__W);
4266}
4267
Logan Chien55afb0a2018-10-15 10:42:14 +08004268static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004269_mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4270{
4271 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4272 (__v8di)_mm512_unpackhi_epi64(__A, __B),
4273 (__v8di)_mm512_setzero_si512());
4274}
4275
Logan Chien55afb0a2018-10-15 10:42:14 +08004276static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004277_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
4278{
4279 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4280 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4281}
4282
Logan Chien55afb0a2018-10-15 10:42:14 +08004283static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004284_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4285{
4286 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4287 (__v8di)_mm512_unpacklo_epi64(__A, __B),
4288 (__v8di)__W);
4289}
4290
Logan Chien55afb0a2018-10-15 10:42:14 +08004291static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004292_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4293{
4294 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4295 (__v8di)_mm512_unpacklo_epi64(__A, __B),
4296 (__v8di)_mm512_setzero_si512());
4297}
4298
Logan Chien2833ffb2018-10-09 10:03:24 +08004299
4300/* SIMD load ops */
4301
Logan Chien55afb0a2018-10-15 10:42:14 +08004302static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004303_mm512_loadu_si512 (void const *__P)
4304{
Logan Chien55afb0a2018-10-15 10:42:14 +08004305 struct __loadu_si512 {
Logan Chiendbcf4122019-03-21 10:50:25 +08004306 __m512i_u __v;
Logan Chien55afb0a2018-10-15 10:42:14 +08004307 } __attribute__((__packed__, __may_alias__));
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07004308 return ((const struct __loadu_si512*)__P)->__v;
Logan Chien2833ffb2018-10-09 10:03:24 +08004309}
4310
Logan Chien55afb0a2018-10-15 10:42:14 +08004311static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien969aea62018-12-05 18:40:57 +08004312_mm512_loadu_epi32 (void const *__P)
4313{
4314 struct __loadu_epi32 {
Logan Chiendbcf4122019-03-21 10:50:25 +08004315 __m512i_u __v;
Logan Chien969aea62018-12-05 18:40:57 +08004316 } __attribute__((__packed__, __may_alias__));
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07004317 return ((const struct __loadu_epi32*)__P)->__v;
Logan Chien969aea62018-12-05 18:40:57 +08004318}
4319
4320static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004321_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4322{
4323 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4324 (__v16si) __W,
4325 (__mmask16) __U);
4326}
4327
4328
Logan Chien55afb0a2018-10-15 10:42:14 +08004329static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004330_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
4331{
4332 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
4333 (__v16si)
4334 _mm512_setzero_si512 (),
4335 (__mmask16) __U);
4336}
4337
Logan Chien55afb0a2018-10-15 10:42:14 +08004338static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien969aea62018-12-05 18:40:57 +08004339_mm512_loadu_epi64 (void const *__P)
4340{
4341 struct __loadu_epi64 {
Logan Chiendbcf4122019-03-21 10:50:25 +08004342 __m512i_u __v;
Logan Chien969aea62018-12-05 18:40:57 +08004343 } __attribute__((__packed__, __may_alias__));
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07004344 return ((const struct __loadu_epi64*)__P)->__v;
Logan Chien969aea62018-12-05 18:40:57 +08004345}
4346
4347static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004348_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4349{
4350 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
4351 (__v8di) __W,
4352 (__mmask8) __U);
4353}
4354
Logan Chien55afb0a2018-10-15 10:42:14 +08004355static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004356_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
4357{
4358 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
4359 (__v8di)
4360 _mm512_setzero_si512 (),
4361 (__mmask8) __U);
4362}
4363
Logan Chien55afb0a2018-10-15 10:42:14 +08004364static __inline __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004365_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4366{
4367 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
4368 (__v16sf) __W,
4369 (__mmask16) __U);
4370}
4371
Logan Chien55afb0a2018-10-15 10:42:14 +08004372static __inline __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004373_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
4374{
4375 return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
4376 (__v16sf)
4377 _mm512_setzero_ps (),
4378 (__mmask16) __U);
4379}
4380
Logan Chien55afb0a2018-10-15 10:42:14 +08004381static __inline __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004382_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
4383{
4384 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
4385 (__v8df) __W,
4386 (__mmask8) __U);
4387}
4388
Logan Chien55afb0a2018-10-15 10:42:14 +08004389static __inline __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004390_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
4391{
4392 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
4393 (__v8df)
4394 _mm512_setzero_pd (),
4395 (__mmask8) __U);
4396}
4397
Logan Chien55afb0a2018-10-15 10:42:14 +08004398static __inline __m512d __DEFAULT_FN_ATTRS512
4399_mm512_loadu_pd(void const *__p)
Logan Chien2833ffb2018-10-09 10:03:24 +08004400{
4401 struct __loadu_pd {
Logan Chiendbcf4122019-03-21 10:50:25 +08004402 __m512d_u __v;
Logan Chien2833ffb2018-10-09 10:03:24 +08004403 } __attribute__((__packed__, __may_alias__));
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07004404 return ((const struct __loadu_pd*)__p)->__v;
Logan Chien2833ffb2018-10-09 10:03:24 +08004405}
4406
Logan Chien55afb0a2018-10-15 10:42:14 +08004407static __inline __m512 __DEFAULT_FN_ATTRS512
4408_mm512_loadu_ps(void const *__p)
Logan Chien2833ffb2018-10-09 10:03:24 +08004409{
4410 struct __loadu_ps {
Logan Chiendbcf4122019-03-21 10:50:25 +08004411 __m512_u __v;
Logan Chien2833ffb2018-10-09 10:03:24 +08004412 } __attribute__((__packed__, __may_alias__));
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07004413 return ((const struct __loadu_ps*)__p)->__v;
Logan Chien2833ffb2018-10-09 10:03:24 +08004414}
4415
Logan Chien55afb0a2018-10-15 10:42:14 +08004416static __inline __m512 __DEFAULT_FN_ATTRS512
4417_mm512_load_ps(void const *__p)
Logan Chien2833ffb2018-10-09 10:03:24 +08004418{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07004419 return *(const __m512*)__p;
Logan Chien2833ffb2018-10-09 10:03:24 +08004420}
4421
Logan Chien55afb0a2018-10-15 10:42:14 +08004422static __inline __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004423_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4424{
4425 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4426 (__v16sf) __W,
4427 (__mmask16) __U);
4428}
4429
Logan Chien55afb0a2018-10-15 10:42:14 +08004430static __inline __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004431_mm512_maskz_load_ps(__mmask16 __U, void const *__P)
4432{
4433 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4434 (__v16sf)
4435 _mm512_setzero_ps (),
4436 (__mmask16) __U);
4437}
4438
Logan Chien55afb0a2018-10-15 10:42:14 +08004439static __inline __m512d __DEFAULT_FN_ATTRS512
4440_mm512_load_pd(void const *__p)
Logan Chien2833ffb2018-10-09 10:03:24 +08004441{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07004442 return *(const __m512d*)__p;
Logan Chien2833ffb2018-10-09 10:03:24 +08004443}
4444
Logan Chien55afb0a2018-10-15 10:42:14 +08004445static __inline __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004446_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4447{
4448 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4449 (__v8df) __W,
4450 (__mmask8) __U);
4451}
4452
Logan Chien55afb0a2018-10-15 10:42:14 +08004453static __inline __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004454_mm512_maskz_load_pd(__mmask8 __U, void const *__P)
4455{
4456 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4457 (__v8df)
4458 _mm512_setzero_pd (),
4459 (__mmask8) __U);
4460}
4461
Logan Chien55afb0a2018-10-15 10:42:14 +08004462static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004463_mm512_load_si512 (void const *__P)
4464{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07004465 return *(const __m512i *) __P;
Logan Chien2833ffb2018-10-09 10:03:24 +08004466}
4467
Logan Chien55afb0a2018-10-15 10:42:14 +08004468static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004469_mm512_load_epi32 (void const *__P)
4470{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07004471 return *(const __m512i *) __P;
Logan Chien2833ffb2018-10-09 10:03:24 +08004472}
4473
Logan Chien55afb0a2018-10-15 10:42:14 +08004474static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004475_mm512_load_epi64 (void const *__P)
4476{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07004477 return *(const __m512i *) __P;
Logan Chien2833ffb2018-10-09 10:03:24 +08004478}
4479
4480/* SIMD store ops */
4481
Logan Chien55afb0a2018-10-15 10:42:14 +08004482static __inline void __DEFAULT_FN_ATTRS512
Logan Chien969aea62018-12-05 18:40:57 +08004483_mm512_storeu_epi64 (void *__P, __m512i __A)
4484{
4485 struct __storeu_epi64 {
Logan Chiendbcf4122019-03-21 10:50:25 +08004486 __m512i_u __v;
Logan Chien969aea62018-12-05 18:40:57 +08004487 } __attribute__((__packed__, __may_alias__));
4488 ((struct __storeu_epi64*)__P)->__v = __A;
4489}
4490
4491static __inline void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004492_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
4493{
4494 __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
4495 (__mmask8) __U);
4496}
4497
Logan Chien55afb0a2018-10-15 10:42:14 +08004498static __inline void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004499_mm512_storeu_si512 (void *__P, __m512i __A)
4500{
Logan Chien55afb0a2018-10-15 10:42:14 +08004501 struct __storeu_si512 {
Logan Chiendbcf4122019-03-21 10:50:25 +08004502 __m512i_u __v;
Logan Chien55afb0a2018-10-15 10:42:14 +08004503 } __attribute__((__packed__, __may_alias__));
4504 ((struct __storeu_si512*)__P)->__v = __A;
Logan Chien2833ffb2018-10-09 10:03:24 +08004505}
4506
Logan Chien55afb0a2018-10-15 10:42:14 +08004507static __inline void __DEFAULT_FN_ATTRS512
Logan Chien969aea62018-12-05 18:40:57 +08004508_mm512_storeu_epi32 (void *__P, __m512i __A)
4509{
4510 struct __storeu_epi32 {
Logan Chiendbcf4122019-03-21 10:50:25 +08004511 __m512i_u __v;
Logan Chien969aea62018-12-05 18:40:57 +08004512 } __attribute__((__packed__, __may_alias__));
4513 ((struct __storeu_epi32*)__P)->__v = __A;
4514}
4515
4516static __inline void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004517_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
4518{
4519 __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
4520 (__mmask16) __U);
4521}
4522
Logan Chien55afb0a2018-10-15 10:42:14 +08004523static __inline void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004524_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
4525{
4526 __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
4527}
4528
Logan Chien55afb0a2018-10-15 10:42:14 +08004529static __inline void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004530_mm512_storeu_pd(void *__P, __m512d __A)
4531{
Logan Chien55afb0a2018-10-15 10:42:14 +08004532 struct __storeu_pd {
Logan Chiendbcf4122019-03-21 10:50:25 +08004533 __m512d_u __v;
Logan Chien55afb0a2018-10-15 10:42:14 +08004534 } __attribute__((__packed__, __may_alias__));
4535 ((struct __storeu_pd*)__P)->__v = __A;
Logan Chien2833ffb2018-10-09 10:03:24 +08004536}
4537
Logan Chien55afb0a2018-10-15 10:42:14 +08004538static __inline void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004539_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
4540{
4541 __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
4542 (__mmask16) __U);
4543}
4544
Logan Chien55afb0a2018-10-15 10:42:14 +08004545static __inline void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004546_mm512_storeu_ps(void *__P, __m512 __A)
4547{
Logan Chien55afb0a2018-10-15 10:42:14 +08004548 struct __storeu_ps {
Logan Chiendbcf4122019-03-21 10:50:25 +08004549 __m512_u __v;
Logan Chien55afb0a2018-10-15 10:42:14 +08004550 } __attribute__((__packed__, __may_alias__));
4551 ((struct __storeu_ps*)__P)->__v = __A;
Logan Chien2833ffb2018-10-09 10:03:24 +08004552}
4553
Logan Chien55afb0a2018-10-15 10:42:14 +08004554static __inline void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004555_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
4556{
4557 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4558}
4559
Logan Chien55afb0a2018-10-15 10:42:14 +08004560static __inline void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004561_mm512_store_pd(void *__P, __m512d __A)
4562{
4563 *(__m512d*)__P = __A;
4564}
4565
Logan Chien55afb0a2018-10-15 10:42:14 +08004566static __inline void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004567_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4568{
4569 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4570 (__mmask16) __U);
4571}
4572
Logan Chien55afb0a2018-10-15 10:42:14 +08004573static __inline void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004574_mm512_store_ps(void *__P, __m512 __A)
4575{
4576 *(__m512*)__P = __A;
4577}
4578
Logan Chien55afb0a2018-10-15 10:42:14 +08004579static __inline void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004580_mm512_store_si512 (void *__P, __m512i __A)
4581{
4582 *(__m512i *) __P = __A;
4583}
4584
Logan Chien55afb0a2018-10-15 10:42:14 +08004585static __inline void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004586_mm512_store_epi32 (void *__P, __m512i __A)
4587{
4588 *(__m512i *) __P = __A;
4589}
4590
Logan Chien55afb0a2018-10-15 10:42:14 +08004591static __inline void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004592_mm512_store_epi64 (void *__P, __m512i __A)
4593{
4594 *(__m512i *) __P = __A;
4595}
4596
4597/* Mask ops */
4598
Logan Chienb0c84022018-11-09 16:19:54 +08004599static __inline __mmask16 __DEFAULT_FN_ATTRS
Logan Chien2833ffb2018-10-09 10:03:24 +08004600_mm512_knot(__mmask16 __M)
4601{
4602 return __builtin_ia32_knothi(__M);
4603}
4604
4605/* Integer compare */
4606
Logan Chien55afb0a2018-10-15 10:42:14 +08004607#define _mm512_cmpeq_epi32_mask(A, B) \
4608 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4609#define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4610 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4611#define _mm512_cmpge_epi32_mask(A, B) \
4612 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4613#define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4614 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4615#define _mm512_cmpgt_epi32_mask(A, B) \
4616 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4617#define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4618 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4619#define _mm512_cmple_epi32_mask(A, B) \
4620 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4621#define _mm512_mask_cmple_epi32_mask(k, A, B) \
4622 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4623#define _mm512_cmplt_epi32_mask(A, B) \
4624 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4625#define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4626 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4627#define _mm512_cmpneq_epi32_mask(A, B) \
4628 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4629#define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4630 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
Logan Chien2833ffb2018-10-09 10:03:24 +08004631
Logan Chien55afb0a2018-10-15 10:42:14 +08004632#define _mm512_cmpeq_epu32_mask(A, B) \
4633 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4634#define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4635 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4636#define _mm512_cmpge_epu32_mask(A, B) \
4637 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4638#define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4639 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4640#define _mm512_cmpgt_epu32_mask(A, B) \
4641 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4642#define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4643 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4644#define _mm512_cmple_epu32_mask(A, B) \
4645 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4646#define _mm512_mask_cmple_epu32_mask(k, A, B) \
4647 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4648#define _mm512_cmplt_epu32_mask(A, B) \
4649 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4650#define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4651 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4652#define _mm512_cmpneq_epu32_mask(A, B) \
4653 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4654#define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4655 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
Logan Chien2833ffb2018-10-09 10:03:24 +08004656
Logan Chien55afb0a2018-10-15 10:42:14 +08004657#define _mm512_cmpeq_epi64_mask(A, B) \
4658 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4659#define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4660 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4661#define _mm512_cmpge_epi64_mask(A, B) \
4662 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4663#define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4664 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4665#define _mm512_cmpgt_epi64_mask(A, B) \
4666 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4667#define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4668 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4669#define _mm512_cmple_epi64_mask(A, B) \
4670 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4671#define _mm512_mask_cmple_epi64_mask(k, A, B) \
4672 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4673#define _mm512_cmplt_epi64_mask(A, B) \
4674 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4675#define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4676 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4677#define _mm512_cmpneq_epi64_mask(A, B) \
4678 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4679#define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4680 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
Logan Chien2833ffb2018-10-09 10:03:24 +08004681
Logan Chien55afb0a2018-10-15 10:42:14 +08004682#define _mm512_cmpeq_epu64_mask(A, B) \
4683 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4684#define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4685 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4686#define _mm512_cmpge_epu64_mask(A, B) \
4687 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4688#define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4689 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4690#define _mm512_cmpgt_epu64_mask(A, B) \
4691 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4692#define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4693 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4694#define _mm512_cmple_epu64_mask(A, B) \
4695 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4696#define _mm512_mask_cmple_epu64_mask(k, A, B) \
4697 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4698#define _mm512_cmplt_epu64_mask(A, B) \
4699 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4700#define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4701 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4702#define _mm512_cmpneq_epu64_mask(A, B) \
4703 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4704#define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4705 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
Logan Chien2833ffb2018-10-09 10:03:24 +08004706
Logan Chien55afb0a2018-10-15 10:42:14 +08004707static __inline__ __m512i __DEFAULT_FN_ATTRS512
4708_mm512_cvtepi8_epi32(__m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08004709{
Logan Chien55afb0a2018-10-15 10:42:14 +08004710 /* This function always performs a signed extension, but __v16qi is a char
4711 which may be signed or unsigned, so use __v16qs. */
4712 return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
Logan Chien2833ffb2018-10-09 10:03:24 +08004713}
4714
Logan Chien55afb0a2018-10-15 10:42:14 +08004715static __inline__ __m512i __DEFAULT_FN_ATTRS512
4716_mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08004717{
Logan Chien55afb0a2018-10-15 10:42:14 +08004718 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4719 (__v16si)_mm512_cvtepi8_epi32(__A),
4720 (__v16si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004721}
4722
Logan Chien55afb0a2018-10-15 10:42:14 +08004723static __inline__ __m512i __DEFAULT_FN_ATTRS512
4724_mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08004725{
Logan Chien55afb0a2018-10-15 10:42:14 +08004726 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4727 (__v16si)_mm512_cvtepi8_epi32(__A),
4728 (__v16si)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08004729}
4730
Logan Chien55afb0a2018-10-15 10:42:14 +08004731static __inline__ __m512i __DEFAULT_FN_ATTRS512
4732_mm512_cvtepi8_epi64(__m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08004733{
Logan Chien55afb0a2018-10-15 10:42:14 +08004734 /* This function always performs a signed extension, but __v16qi is a char
4735 which may be signed or unsigned, so use __v16qs. */
4736 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
Logan Chien2833ffb2018-10-09 10:03:24 +08004737}
4738
Logan Chien55afb0a2018-10-15 10:42:14 +08004739static __inline__ __m512i __DEFAULT_FN_ATTRS512
4740_mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08004741{
Logan Chien55afb0a2018-10-15 10:42:14 +08004742 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4743 (__v8di)_mm512_cvtepi8_epi64(__A),
4744 (__v8di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004745}
4746
Logan Chien55afb0a2018-10-15 10:42:14 +08004747static __inline__ __m512i __DEFAULT_FN_ATTRS512
4748_mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08004749{
Logan Chien55afb0a2018-10-15 10:42:14 +08004750 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4751 (__v8di)_mm512_cvtepi8_epi64(__A),
4752 (__v8di)_mm512_setzero_si512 ());
Logan Chien2833ffb2018-10-09 10:03:24 +08004753}
4754
Logan Chien55afb0a2018-10-15 10:42:14 +08004755static __inline__ __m512i __DEFAULT_FN_ATTRS512
4756_mm512_cvtepi32_epi64(__m256i __X)
Logan Chien2833ffb2018-10-09 10:03:24 +08004757{
Logan Chien55afb0a2018-10-15 10:42:14 +08004758 return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
Logan Chien2833ffb2018-10-09 10:03:24 +08004759}
4760
Logan Chien55afb0a2018-10-15 10:42:14 +08004761static __inline__ __m512i __DEFAULT_FN_ATTRS512
4762_mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
Logan Chien2833ffb2018-10-09 10:03:24 +08004763{
Logan Chien55afb0a2018-10-15 10:42:14 +08004764 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4765 (__v8di)_mm512_cvtepi32_epi64(__X),
4766 (__v8di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004767}
4768
Logan Chien55afb0a2018-10-15 10:42:14 +08004769static __inline__ __m512i __DEFAULT_FN_ATTRS512
4770_mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
Logan Chien2833ffb2018-10-09 10:03:24 +08004771{
Logan Chien55afb0a2018-10-15 10:42:14 +08004772 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4773 (__v8di)_mm512_cvtepi32_epi64(__X),
4774 (__v8di)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08004775}
4776
Logan Chien55afb0a2018-10-15 10:42:14 +08004777static __inline__ __m512i __DEFAULT_FN_ATTRS512
4778_mm512_cvtepi16_epi32(__m256i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08004779{
Logan Chien55afb0a2018-10-15 10:42:14 +08004780 return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
Logan Chien2833ffb2018-10-09 10:03:24 +08004781}
4782
Logan Chien55afb0a2018-10-15 10:42:14 +08004783static __inline__ __m512i __DEFAULT_FN_ATTRS512
4784_mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08004785{
Logan Chien55afb0a2018-10-15 10:42:14 +08004786 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4787 (__v16si)_mm512_cvtepi16_epi32(__A),
4788 (__v16si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004789}
4790
Logan Chien55afb0a2018-10-15 10:42:14 +08004791static __inline__ __m512i __DEFAULT_FN_ATTRS512
4792_mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08004793{
Logan Chien55afb0a2018-10-15 10:42:14 +08004794 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4795 (__v16si)_mm512_cvtepi16_epi32(__A),
4796 (__v16si)_mm512_setzero_si512 ());
Logan Chien2833ffb2018-10-09 10:03:24 +08004797}
4798
Logan Chien55afb0a2018-10-15 10:42:14 +08004799static __inline__ __m512i __DEFAULT_FN_ATTRS512
4800_mm512_cvtepi16_epi64(__m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08004801{
Logan Chien55afb0a2018-10-15 10:42:14 +08004802 return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
Logan Chien2833ffb2018-10-09 10:03:24 +08004803}
4804
Logan Chien55afb0a2018-10-15 10:42:14 +08004805static __inline__ __m512i __DEFAULT_FN_ATTRS512
4806_mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08004807{
Logan Chien55afb0a2018-10-15 10:42:14 +08004808 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4809 (__v8di)_mm512_cvtepi16_epi64(__A),
4810 (__v8di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004811}
4812
Logan Chien55afb0a2018-10-15 10:42:14 +08004813static __inline__ __m512i __DEFAULT_FN_ATTRS512
4814_mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08004815{
Logan Chien55afb0a2018-10-15 10:42:14 +08004816 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4817 (__v8di)_mm512_cvtepi16_epi64(__A),
4818 (__v8di)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08004819}
4820
Logan Chien55afb0a2018-10-15 10:42:14 +08004821static __inline__ __m512i __DEFAULT_FN_ATTRS512
4822_mm512_cvtepu8_epi32(__m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08004823{
Logan Chien55afb0a2018-10-15 10:42:14 +08004824 return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
Logan Chien2833ffb2018-10-09 10:03:24 +08004825}
4826
Logan Chien55afb0a2018-10-15 10:42:14 +08004827static __inline__ __m512i __DEFAULT_FN_ATTRS512
4828_mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08004829{
Logan Chien55afb0a2018-10-15 10:42:14 +08004830 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4831 (__v16si)_mm512_cvtepu8_epi32(__A),
4832 (__v16si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004833}
4834
Logan Chien55afb0a2018-10-15 10:42:14 +08004835static __inline__ __m512i __DEFAULT_FN_ATTRS512
4836_mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08004837{
Logan Chien55afb0a2018-10-15 10:42:14 +08004838 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4839 (__v16si)_mm512_cvtepu8_epi32(__A),
4840 (__v16si)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08004841}
4842
Logan Chien55afb0a2018-10-15 10:42:14 +08004843static __inline__ __m512i __DEFAULT_FN_ATTRS512
4844_mm512_cvtepu8_epi64(__m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08004845{
Logan Chien55afb0a2018-10-15 10:42:14 +08004846 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
Logan Chien2833ffb2018-10-09 10:03:24 +08004847}
4848
Logan Chien55afb0a2018-10-15 10:42:14 +08004849static __inline__ __m512i __DEFAULT_FN_ATTRS512
4850_mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08004851{
Logan Chien55afb0a2018-10-15 10:42:14 +08004852 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4853 (__v8di)_mm512_cvtepu8_epi64(__A),
4854 (__v8di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004855}
4856
Logan Chien55afb0a2018-10-15 10:42:14 +08004857static __inline__ __m512i __DEFAULT_FN_ATTRS512
4858_mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08004859{
Logan Chien55afb0a2018-10-15 10:42:14 +08004860 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4861 (__v8di)_mm512_cvtepu8_epi64(__A),
4862 (__v8di)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08004863}
4864
Logan Chien55afb0a2018-10-15 10:42:14 +08004865static __inline__ __m512i __DEFAULT_FN_ATTRS512
4866_mm512_cvtepu32_epi64(__m256i __X)
Logan Chien2833ffb2018-10-09 10:03:24 +08004867{
Logan Chien55afb0a2018-10-15 10:42:14 +08004868 return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
Logan Chien2833ffb2018-10-09 10:03:24 +08004869}
4870
Logan Chien55afb0a2018-10-15 10:42:14 +08004871static __inline__ __m512i __DEFAULT_FN_ATTRS512
4872_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
Logan Chien2833ffb2018-10-09 10:03:24 +08004873{
Logan Chien55afb0a2018-10-15 10:42:14 +08004874 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4875 (__v8di)_mm512_cvtepu32_epi64(__X),
4876 (__v8di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004877}
4878
Logan Chien55afb0a2018-10-15 10:42:14 +08004879static __inline__ __m512i __DEFAULT_FN_ATTRS512
4880_mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
Logan Chien2833ffb2018-10-09 10:03:24 +08004881{
Logan Chien55afb0a2018-10-15 10:42:14 +08004882 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4883 (__v8di)_mm512_cvtepu32_epi64(__X),
4884 (__v8di)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08004885}
4886
Logan Chien55afb0a2018-10-15 10:42:14 +08004887static __inline__ __m512i __DEFAULT_FN_ATTRS512
4888_mm512_cvtepu16_epi32(__m256i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08004889{
Logan Chien55afb0a2018-10-15 10:42:14 +08004890 return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
Logan Chien2833ffb2018-10-09 10:03:24 +08004891}
4892
Logan Chien55afb0a2018-10-15 10:42:14 +08004893static __inline__ __m512i __DEFAULT_FN_ATTRS512
4894_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08004895{
Logan Chien55afb0a2018-10-15 10:42:14 +08004896 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4897 (__v16si)_mm512_cvtepu16_epi32(__A),
4898 (__v16si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004899}
4900
Logan Chien55afb0a2018-10-15 10:42:14 +08004901static __inline__ __m512i __DEFAULT_FN_ATTRS512
4902_mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08004903{
Logan Chien55afb0a2018-10-15 10:42:14 +08004904 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4905 (__v16si)_mm512_cvtepu16_epi32(__A),
4906 (__v16si)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08004907}
4908
Logan Chien55afb0a2018-10-15 10:42:14 +08004909static __inline__ __m512i __DEFAULT_FN_ATTRS512
4910_mm512_cvtepu16_epi64(__m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08004911{
Logan Chien55afb0a2018-10-15 10:42:14 +08004912 return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
Logan Chien2833ffb2018-10-09 10:03:24 +08004913}
4914
Logan Chien55afb0a2018-10-15 10:42:14 +08004915static __inline__ __m512i __DEFAULT_FN_ATTRS512
4916_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08004917{
Logan Chien55afb0a2018-10-15 10:42:14 +08004918 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4919 (__v8di)_mm512_cvtepu16_epi64(__A),
4920 (__v8di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004921}
4922
Logan Chien55afb0a2018-10-15 10:42:14 +08004923static __inline__ __m512i __DEFAULT_FN_ATTRS512
4924_mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08004925{
Logan Chien55afb0a2018-10-15 10:42:14 +08004926 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4927 (__v8di)_mm512_cvtepu16_epi64(__A),
4928 (__v8di)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08004929}
4930
Logan Chien55afb0a2018-10-15 10:42:14 +08004931static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004932_mm512_rorv_epi32 (__m512i __A, __m512i __B)
4933{
Logan Chien55afb0a2018-10-15 10:42:14 +08004934 return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08004935}
4936
Logan Chien55afb0a2018-10-15 10:42:14 +08004937static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004938_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4939{
Logan Chien55afb0a2018-10-15 10:42:14 +08004940 return (__m512i)__builtin_ia32_selectd_512(__U,
4941 (__v16si)_mm512_rorv_epi32(__A, __B),
4942 (__v16si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004943}
4944
Logan Chien55afb0a2018-10-15 10:42:14 +08004945static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004946_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4947{
Logan Chien55afb0a2018-10-15 10:42:14 +08004948 return (__m512i)__builtin_ia32_selectd_512(__U,
4949 (__v16si)_mm512_rorv_epi32(__A, __B),
4950 (__v16si)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08004951}
4952
Logan Chien55afb0a2018-10-15 10:42:14 +08004953static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004954_mm512_rorv_epi64 (__m512i __A, __m512i __B)
4955{
Logan Chien55afb0a2018-10-15 10:42:14 +08004956 return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08004957}
4958
Logan Chien55afb0a2018-10-15 10:42:14 +08004959static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004960_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4961{
Logan Chien55afb0a2018-10-15 10:42:14 +08004962 return (__m512i)__builtin_ia32_selectq_512(__U,
4963 (__v8di)_mm512_rorv_epi64(__A, __B),
4964 (__v8di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004965}
4966
Logan Chien55afb0a2018-10-15 10:42:14 +08004967static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08004968_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4969{
Logan Chien55afb0a2018-10-15 10:42:14 +08004970 return (__m512i)__builtin_ia32_selectq_512(__U,
4971 (__v8di)_mm512_rorv_epi64(__A, __B),
4972 (__v8di)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08004973}
4974
4975
4976
Logan Chien55afb0a2018-10-15 10:42:14 +08004977#define _mm512_cmp_epi32_mask(a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004978 ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4979 (__v16si)(__m512i)(b), (int)(p), \
4980 (__mmask16)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +08004981
Logan Chien55afb0a2018-10-15 10:42:14 +08004982#define _mm512_cmp_epu32_mask(a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004983 ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
4984 (__v16si)(__m512i)(b), (int)(p), \
4985 (__mmask16)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +08004986
Logan Chien55afb0a2018-10-15 10:42:14 +08004987#define _mm512_cmp_epi64_mask(a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004988 ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
4989 (__v8di)(__m512i)(b), (int)(p), \
4990 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +08004991
Logan Chien55afb0a2018-10-15 10:42:14 +08004992#define _mm512_cmp_epu64_mask(a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004993 ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
4994 (__v8di)(__m512i)(b), (int)(p), \
4995 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +08004996
Logan Chien55afb0a2018-10-15 10:42:14 +08004997#define _mm512_mask_cmp_epi32_mask(m, a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004998 ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4999 (__v16si)(__m512i)(b), (int)(p), \
5000 (__mmask16)(m)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005001
Logan Chien55afb0a2018-10-15 10:42:14 +08005002#define _mm512_mask_cmp_epu32_mask(m, a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005003 ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5004 (__v16si)(__m512i)(b), (int)(p), \
5005 (__mmask16)(m)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005006
Logan Chien55afb0a2018-10-15 10:42:14 +08005007#define _mm512_mask_cmp_epi64_mask(m, a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005008 ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5009 (__v8di)(__m512i)(b), (int)(p), \
5010 (__mmask8)(m)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005011
Logan Chien55afb0a2018-10-15 10:42:14 +08005012#define _mm512_mask_cmp_epu64_mask(m, a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005013 ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5014 (__v8di)(__m512i)(b), (int)(p), \
5015 (__mmask8)(m)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005016
Logan Chien55afb0a2018-10-15 10:42:14 +08005017#define _mm512_rol_epi32(a, b) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005018 ((__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005019
Logan Chien55afb0a2018-10-15 10:42:14 +08005020#define _mm512_mask_rol_epi32(W, U, a, b) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005021 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5022 (__v16si)_mm512_rol_epi32((a), (b)), \
5023 (__v16si)(__m512i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005024
Logan Chien55afb0a2018-10-15 10:42:14 +08005025#define _mm512_maskz_rol_epi32(U, a, b) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005026 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5027 (__v16si)_mm512_rol_epi32((a), (b)), \
5028 (__v16si)_mm512_setzero_si512()))
Logan Chien2833ffb2018-10-09 10:03:24 +08005029
Logan Chien55afb0a2018-10-15 10:42:14 +08005030#define _mm512_rol_epi64(a, b) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005031 ((__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005032
Logan Chien55afb0a2018-10-15 10:42:14 +08005033#define _mm512_mask_rol_epi64(W, U, a, b) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005034 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5035 (__v8di)_mm512_rol_epi64((a), (b)), \
5036 (__v8di)(__m512i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005037
Logan Chien55afb0a2018-10-15 10:42:14 +08005038#define _mm512_maskz_rol_epi64(U, a, b) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005039 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5040 (__v8di)_mm512_rol_epi64((a), (b)), \
5041 (__v8di)_mm512_setzero_si512()))
Logan Chien55afb0a2018-10-15 10:42:14 +08005042
5043static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08005044_mm512_rolv_epi32 (__m512i __A, __m512i __B)
5045{
Logan Chien55afb0a2018-10-15 10:42:14 +08005046 return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08005047}
5048
Logan Chien55afb0a2018-10-15 10:42:14 +08005049static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08005050_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5051{
Logan Chien55afb0a2018-10-15 10:42:14 +08005052 return (__m512i)__builtin_ia32_selectd_512(__U,
5053 (__v16si)_mm512_rolv_epi32(__A, __B),
5054 (__v16si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08005055}
5056
Logan Chien55afb0a2018-10-15 10:42:14 +08005057static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08005058_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5059{
Logan Chien55afb0a2018-10-15 10:42:14 +08005060 return (__m512i)__builtin_ia32_selectd_512(__U,
5061 (__v16si)_mm512_rolv_epi32(__A, __B),
5062 (__v16si)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08005063}
5064
Logan Chien55afb0a2018-10-15 10:42:14 +08005065static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08005066_mm512_rolv_epi64 (__m512i __A, __m512i __B)
5067{
Logan Chien55afb0a2018-10-15 10:42:14 +08005068 return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08005069}
5070
Logan Chien55afb0a2018-10-15 10:42:14 +08005071static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08005072_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5073{
Logan Chien55afb0a2018-10-15 10:42:14 +08005074 return (__m512i)__builtin_ia32_selectq_512(__U,
5075 (__v8di)_mm512_rolv_epi64(__A, __B),
5076 (__v8di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08005077}
5078
Logan Chien55afb0a2018-10-15 10:42:14 +08005079static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08005080_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5081{
Logan Chien55afb0a2018-10-15 10:42:14 +08005082 return (__m512i)__builtin_ia32_selectq_512(__U,
5083 (__v8di)_mm512_rolv_epi64(__A, __B),
5084 (__v8di)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08005085}
5086
Logan Chien55afb0a2018-10-15 10:42:14 +08005087#define _mm512_ror_epi32(A, B) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005088 ((__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005089
Logan Chien55afb0a2018-10-15 10:42:14 +08005090#define _mm512_mask_ror_epi32(W, U, A, B) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005091 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5092 (__v16si)_mm512_ror_epi32((A), (B)), \
5093 (__v16si)(__m512i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005094
Logan Chien55afb0a2018-10-15 10:42:14 +08005095#define _mm512_maskz_ror_epi32(U, A, B) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005096 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5097 (__v16si)_mm512_ror_epi32((A), (B)), \
5098 (__v16si)_mm512_setzero_si512()))
Logan Chien2833ffb2018-10-09 10:03:24 +08005099
Logan Chien55afb0a2018-10-15 10:42:14 +08005100#define _mm512_ror_epi64(A, B) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005101 ((__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005102
Logan Chien55afb0a2018-10-15 10:42:14 +08005103#define _mm512_mask_ror_epi64(W, U, A, B) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005104 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5105 (__v8di)_mm512_ror_epi64((A), (B)), \
5106 (__v8di)(__m512i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005107
Logan Chien55afb0a2018-10-15 10:42:14 +08005108#define _mm512_maskz_ror_epi64(U, A, B) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005109 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5110 (__v8di)_mm512_ror_epi64((A), (B)), \
5111 (__v8di)_mm512_setzero_si512()))
Logan Chien2833ffb2018-10-09 10:03:24 +08005112
Logan Chien55afb0a2018-10-15 10:42:14 +08005113static __inline__ __m512i __DEFAULT_FN_ATTRS512
Sasha Smundak0fc590b2020-10-07 08:11:59 -07005114_mm512_slli_epi32(__m512i __A, unsigned int __B)
Logan Chien55afb0a2018-10-15 10:42:14 +08005115{
5116 return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B);
5117}
Logan Chien2833ffb2018-10-09 10:03:24 +08005118
Logan Chien55afb0a2018-10-15 10:42:14 +08005119static __inline__ __m512i __DEFAULT_FN_ATTRS512
Sasha Smundak0fc590b2020-10-07 08:11:59 -07005120_mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
5121 unsigned int __B)
Logan Chien55afb0a2018-10-15 10:42:14 +08005122{
5123 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5124 (__v16si)_mm512_slli_epi32(__A, __B),
5125 (__v16si)__W);
5126}
Logan Chien2833ffb2018-10-09 10:03:24 +08005127
Logan Chien55afb0a2018-10-15 10:42:14 +08005128static __inline__ __m512i __DEFAULT_FN_ATTRS512
Sasha Smundak0fc590b2020-10-07 08:11:59 -07005129_mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08005130 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5131 (__v16si)_mm512_slli_epi32(__A, __B),
5132 (__v16si)_mm512_setzero_si512());
5133}
Logan Chien2833ffb2018-10-09 10:03:24 +08005134
Logan Chien55afb0a2018-10-15 10:42:14 +08005135static __inline__ __m512i __DEFAULT_FN_ATTRS512
Sasha Smundak0fc590b2020-10-07 08:11:59 -07005136_mm512_slli_epi64(__m512i __A, unsigned int __B)
Logan Chien55afb0a2018-10-15 10:42:14 +08005137{
5138 return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B);
5139}
Logan Chien2833ffb2018-10-09 10:03:24 +08005140
Logan Chien55afb0a2018-10-15 10:42:14 +08005141static __inline__ __m512i __DEFAULT_FN_ATTRS512
Sasha Smundak0fc590b2020-10-07 08:11:59 -07005142_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
Logan Chien55afb0a2018-10-15 10:42:14 +08005143{
5144 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5145 (__v8di)_mm512_slli_epi64(__A, __B),
5146 (__v8di)__W);
5147}
Logan Chien2833ffb2018-10-09 10:03:24 +08005148
Logan Chien55afb0a2018-10-15 10:42:14 +08005149static __inline__ __m512i __DEFAULT_FN_ATTRS512
Sasha Smundak0fc590b2020-10-07 08:11:59 -07005150_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
Logan Chien55afb0a2018-10-15 10:42:14 +08005151{
5152 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5153 (__v8di)_mm512_slli_epi64(__A, __B),
5154 (__v8di)_mm512_setzero_si512());
5155}
Logan Chien2833ffb2018-10-09 10:03:24 +08005156
Logan Chien55afb0a2018-10-15 10:42:14 +08005157static __inline__ __m512i __DEFAULT_FN_ATTRS512
Sasha Smundak0fc590b2020-10-07 08:11:59 -07005158_mm512_srli_epi32(__m512i __A, unsigned int __B)
Logan Chien55afb0a2018-10-15 10:42:14 +08005159{
5160 return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B);
5161}
Logan Chien2833ffb2018-10-09 10:03:24 +08005162
Logan Chien55afb0a2018-10-15 10:42:14 +08005163static __inline__ __m512i __DEFAULT_FN_ATTRS512
Sasha Smundak0fc590b2020-10-07 08:11:59 -07005164_mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
5165 unsigned int __B)
Logan Chien55afb0a2018-10-15 10:42:14 +08005166{
5167 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5168 (__v16si)_mm512_srli_epi32(__A, __B),
5169 (__v16si)__W);
5170}
Logan Chien2833ffb2018-10-09 10:03:24 +08005171
Logan Chien55afb0a2018-10-15 10:42:14 +08005172static __inline__ __m512i __DEFAULT_FN_ATTRS512
Sasha Smundak0fc590b2020-10-07 08:11:59 -07005173_mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08005174 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5175 (__v16si)_mm512_srli_epi32(__A, __B),
5176 (__v16si)_mm512_setzero_si512());
5177}
Logan Chien2833ffb2018-10-09 10:03:24 +08005178
Logan Chien55afb0a2018-10-15 10:42:14 +08005179static __inline__ __m512i __DEFAULT_FN_ATTRS512
Sasha Smundak0fc590b2020-10-07 08:11:59 -07005180_mm512_srli_epi64(__m512i __A, unsigned int __B)
Logan Chien55afb0a2018-10-15 10:42:14 +08005181{
5182 return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B);
5183}
Logan Chien2833ffb2018-10-09 10:03:24 +08005184
Logan Chien55afb0a2018-10-15 10:42:14 +08005185static __inline__ __m512i __DEFAULT_FN_ATTRS512
Sasha Smundak0fc590b2020-10-07 08:11:59 -07005186_mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A,
5187 unsigned int __B)
Logan Chien55afb0a2018-10-15 10:42:14 +08005188{
5189 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5190 (__v8di)_mm512_srli_epi64(__A, __B),
5191 (__v8di)__W);
5192}
Logan Chien2833ffb2018-10-09 10:03:24 +08005193
Logan Chien55afb0a2018-10-15 10:42:14 +08005194static __inline__ __m512i __DEFAULT_FN_ATTRS512
Sasha Smundak0fc590b2020-10-07 08:11:59 -07005195_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A,
5196 unsigned int __B)
Logan Chien55afb0a2018-10-15 10:42:14 +08005197{
5198 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5199 (__v8di)_mm512_srli_epi64(__A, __B),
5200 (__v8di)_mm512_setzero_si512());
5201}
Logan Chien2833ffb2018-10-09 10:03:24 +08005202
Logan Chien55afb0a2018-10-15 10:42:14 +08005203static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08005204_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5205{
5206 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5207 (__v16si) __W,
5208 (__mmask16) __U);
5209}
5210
Logan Chien55afb0a2018-10-15 10:42:14 +08005211static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08005212_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
5213{
5214 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5215 (__v16si)
5216 _mm512_setzero_si512 (),
5217 (__mmask16) __U);
5218}
5219
Logan Chien55afb0a2018-10-15 10:42:14 +08005220static __inline__ void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08005221_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
5222{
5223 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
5224 (__mmask16) __U);
5225}
5226
Logan Chien55afb0a2018-10-15 10:42:14 +08005227static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08005228_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
5229{
5230 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5231 (__v16si) __A,
5232 (__v16si) __W);
5233}
5234
Logan Chien55afb0a2018-10-15 10:42:14 +08005235static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08005236_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
5237{
5238 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5239 (__v16si) __A,
5240 (__v16si) _mm512_setzero_si512 ());
5241}
5242
Logan Chien55afb0a2018-10-15 10:42:14 +08005243static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08005244_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
5245{
5246 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5247 (__v8di) __A,
5248 (__v8di) __W);
5249}
5250
Logan Chien55afb0a2018-10-15 10:42:14 +08005251static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08005252_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
5253{
5254 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5255 (__v8di) __A,
5256 (__v8di) _mm512_setzero_si512 ());
5257}
5258
Logan Chien55afb0a2018-10-15 10:42:14 +08005259static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08005260_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5261{
5262 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5263 (__v8di) __W,
5264 (__mmask8) __U);
5265}
5266
Logan Chien55afb0a2018-10-15 10:42:14 +08005267static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08005268_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
5269{
5270 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5271 (__v8di)
5272 _mm512_setzero_si512 (),
5273 (__mmask8) __U);
5274}
5275
Logan Chien55afb0a2018-10-15 10:42:14 +08005276static __inline__ void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08005277_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
5278{
5279 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
5280 (__mmask8) __U);
5281}
5282
Logan Chien55afb0a2018-10-15 10:42:14 +08005283static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08005284_mm512_movedup_pd (__m512d __A)
5285{
5286 return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5287 0, 0, 2, 2, 4, 4, 6, 6);
5288}
5289
Logan Chien55afb0a2018-10-15 10:42:14 +08005290static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08005291_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
5292{
5293 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5294 (__v8df)_mm512_movedup_pd(__A),
5295 (__v8df)__W);
5296}
5297
Logan Chien55afb0a2018-10-15 10:42:14 +08005298static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08005299_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
5300{
5301 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5302 (__v8df)_mm512_movedup_pd(__A),
5303 (__v8df)_mm512_setzero_pd());
5304}
5305
Logan Chien55afb0a2018-10-15 10:42:14 +08005306#define _mm512_fixupimm_round_pd(A, B, C, imm, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005307 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5308 (__v8df)(__m512d)(B), \
5309 (__v8di)(__m512i)(C), (int)(imm), \
5310 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005311
Logan Chien55afb0a2018-10-15 10:42:14 +08005312#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005313 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5314 (__v8df)(__m512d)(B), \
5315 (__v8di)(__m512i)(C), (int)(imm), \
5316 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005317
Logan Chien55afb0a2018-10-15 10:42:14 +08005318#define _mm512_fixupimm_pd(A, B, C, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005319 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5320 (__v8df)(__m512d)(B), \
5321 (__v8di)(__m512i)(C), (int)(imm), \
5322 (__mmask8)-1, \
5323 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08005324
Logan Chien55afb0a2018-10-15 10:42:14 +08005325#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005326 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5327 (__v8df)(__m512d)(B), \
5328 (__v8di)(__m512i)(C), (int)(imm), \
5329 (__mmask8)(U), \
5330 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08005331
Logan Chien55afb0a2018-10-15 10:42:14 +08005332#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005333 ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5334 (__v8df)(__m512d)(B), \
5335 (__v8di)(__m512i)(C), \
5336 (int)(imm), (__mmask8)(U), \
5337 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005338
Logan Chien55afb0a2018-10-15 10:42:14 +08005339#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005340 ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5341 (__v8df)(__m512d)(B), \
5342 (__v8di)(__m512i)(C), \
5343 (int)(imm), (__mmask8)(U), \
5344 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08005345
Logan Chien55afb0a2018-10-15 10:42:14 +08005346#define _mm512_fixupimm_round_ps(A, B, C, imm, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005347 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5348 (__v16sf)(__m512)(B), \
5349 (__v16si)(__m512i)(C), (int)(imm), \
5350 (__mmask16)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005351
Logan Chien55afb0a2018-10-15 10:42:14 +08005352#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005353 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5354 (__v16sf)(__m512)(B), \
5355 (__v16si)(__m512i)(C), (int)(imm), \
5356 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005357
Logan Chien55afb0a2018-10-15 10:42:14 +08005358#define _mm512_fixupimm_ps(A, B, C, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005359 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5360 (__v16sf)(__m512)(B), \
5361 (__v16si)(__m512i)(C), (int)(imm), \
5362 (__mmask16)-1, \
5363 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08005364
Logan Chien55afb0a2018-10-15 10:42:14 +08005365#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005366 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5367 (__v16sf)(__m512)(B), \
5368 (__v16si)(__m512i)(C), (int)(imm), \
5369 (__mmask16)(U), \
5370 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08005371
Logan Chien55afb0a2018-10-15 10:42:14 +08005372#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005373 ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5374 (__v16sf)(__m512)(B), \
5375 (__v16si)(__m512i)(C), \
5376 (int)(imm), (__mmask16)(U), \
5377 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005378
Logan Chien55afb0a2018-10-15 10:42:14 +08005379#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005380 ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5381 (__v16sf)(__m512)(B), \
5382 (__v16si)(__m512i)(C), \
5383 (int)(imm), (__mmask16)(U), \
5384 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08005385
Logan Chien55afb0a2018-10-15 10:42:14 +08005386#define _mm_fixupimm_round_sd(A, B, C, imm, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005387 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
Logan Chien2833ffb2018-10-09 10:03:24 +08005388 (__v2df)(__m128d)(B), \
5389 (__v2di)(__m128i)(C), (int)(imm), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005390 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005391
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005392#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \
5393 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5394 (__v2df)(__m128d)(B), \
5395 (__v2di)(__m128i)(C), (int)(imm), \
5396 (__mmask8)(U), (int)(R)))
5397
5398#define _mm_fixupimm_sd(A, B, C, imm) \
5399 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5400 (__v2df)(__m128d)(B), \
5401 (__v2di)(__m128i)(C), (int)(imm), \
5402 (__mmask8)-1, \
5403 _MM_FROUND_CUR_DIRECTION))
5404
5405#define _mm_mask_fixupimm_sd(A, U, B, C, imm) \
5406 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
Logan Chien2833ffb2018-10-09 10:03:24 +08005407 (__v2df)(__m128d)(B), \
5408 (__v2di)(__m128i)(C), (int)(imm), \
5409 (__mmask8)(U), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005410 _MM_FROUND_CUR_DIRECTION))
5411
5412#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \
5413 ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5414 (__v2df)(__m128d)(B), \
5415 (__v2di)(__m128i)(C), (int)(imm), \
5416 (__mmask8)(U), (int)(R)))
5417
5418#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \
5419 ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5420 (__v2df)(__m128d)(B), \
5421 (__v2di)(__m128i)(C), (int)(imm), \
5422 (__mmask8)(U), \
5423 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08005424
Logan Chien55afb0a2018-10-15 10:42:14 +08005425#define _mm_fixupimm_round_ss(A, B, C, imm, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005426 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
Logan Chien2833ffb2018-10-09 10:03:24 +08005427 (__v4sf)(__m128)(B), \
5428 (__v4si)(__m128i)(C), (int)(imm), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005429 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005430
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005431#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \
5432 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5433 (__v4sf)(__m128)(B), \
5434 (__v4si)(__m128i)(C), (int)(imm), \
5435 (__mmask8)(U), (int)(R)))
5436
5437#define _mm_fixupimm_ss(A, B, C, imm) \
5438 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5439 (__v4sf)(__m128)(B), \
5440 (__v4si)(__m128i)(C), (int)(imm), \
5441 (__mmask8)-1, \
5442 _MM_FROUND_CUR_DIRECTION))
5443
5444#define _mm_mask_fixupimm_ss(A, U, B, C, imm) \
5445 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
Logan Chien2833ffb2018-10-09 10:03:24 +08005446 (__v4sf)(__m128)(B), \
5447 (__v4si)(__m128i)(C), (int)(imm), \
5448 (__mmask8)(U), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005449 _MM_FROUND_CUR_DIRECTION))
5450
5451#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \
5452 ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5453 (__v4sf)(__m128)(B), \
5454 (__v4si)(__m128i)(C), (int)(imm), \
5455 (__mmask8)(U), (int)(R)))
5456
5457#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \
5458 ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5459 (__v4sf)(__m128)(B), \
5460 (__v4si)(__m128i)(C), (int)(imm), \
5461 (__mmask8)(U), \
5462 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08005463
Logan Chien55afb0a2018-10-15 10:42:14 +08005464#define _mm_getexp_round_sd(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005465 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5466 (__v2df)(__m128d)(B), \
5467 (__v2df)_mm_setzero_pd(), \
5468 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005469
5470
Logan Chien55afb0a2018-10-15 10:42:14 +08005471static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005472_mm_getexp_sd (__m128d __A, __m128d __B)
5473{
5474 return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5475 (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5476}
5477
Logan Chien55afb0a2018-10-15 10:42:14 +08005478static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005479_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5480{
5481 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5482 (__v2df) __B,
5483 (__v2df) __W,
5484 (__mmask8) __U,
5485 _MM_FROUND_CUR_DIRECTION);
5486}
5487
Logan Chien55afb0a2018-10-15 10:42:14 +08005488#define _mm_mask_getexp_round_sd(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005489 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5490 (__v2df)(__m128d)(B), \
5491 (__v2df)(__m128d)(W), \
5492 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005493
Logan Chien55afb0a2018-10-15 10:42:14 +08005494static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005495_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
5496{
5497 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5498 (__v2df) __B,
5499 (__v2df) _mm_setzero_pd (),
5500 (__mmask8) __U,
5501 _MM_FROUND_CUR_DIRECTION);
5502}
5503
Logan Chien55afb0a2018-10-15 10:42:14 +08005504#define _mm_maskz_getexp_round_sd(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005505 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5506 (__v2df)(__m128d)(B), \
5507 (__v2df)_mm_setzero_pd(), \
5508 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005509
Logan Chien55afb0a2018-10-15 10:42:14 +08005510#define _mm_getexp_round_ss(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005511 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5512 (__v4sf)(__m128)(B), \
5513 (__v4sf)_mm_setzero_ps(), \
5514 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005515
Logan Chien55afb0a2018-10-15 10:42:14 +08005516static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005517_mm_getexp_ss (__m128 __A, __m128 __B)
5518{
5519 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5520 (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5521}
5522
Logan Chien55afb0a2018-10-15 10:42:14 +08005523static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005524_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5525{
5526 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5527 (__v4sf) __B,
5528 (__v4sf) __W,
5529 (__mmask8) __U,
5530 _MM_FROUND_CUR_DIRECTION);
5531}
5532
Logan Chien55afb0a2018-10-15 10:42:14 +08005533#define _mm_mask_getexp_round_ss(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005534 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5535 (__v4sf)(__m128)(B), \
5536 (__v4sf)(__m128)(W), \
5537 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005538
Logan Chien55afb0a2018-10-15 10:42:14 +08005539static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005540_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
5541{
5542 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5543 (__v4sf) __B,
Logan Chien55afb0a2018-10-15 10:42:14 +08005544 (__v4sf) _mm_setzero_ps (),
Logan Chien2833ffb2018-10-09 10:03:24 +08005545 (__mmask8) __U,
5546 _MM_FROUND_CUR_DIRECTION);
5547}
5548
Logan Chien55afb0a2018-10-15 10:42:14 +08005549#define _mm_maskz_getexp_round_ss(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005550 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5551 (__v4sf)(__m128)(B), \
5552 (__v4sf)_mm_setzero_ps(), \
5553 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005554
Logan Chien55afb0a2018-10-15 10:42:14 +08005555#define _mm_getmant_round_sd(A, B, C, D, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005556 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5557 (__v2df)(__m128d)(B), \
5558 (int)(((D)<<2) | (C)), \
5559 (__v2df)_mm_setzero_pd(), \
5560 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005561
Logan Chien55afb0a2018-10-15 10:42:14 +08005562#define _mm_getmant_sd(A, B, C, D) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005563 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5564 (__v2df)(__m128d)(B), \
5565 (int)(((D)<<2) | (C)), \
5566 (__v2df)_mm_setzero_pd(), \
5567 (__mmask8)-1, \
5568 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08005569
Logan Chien55afb0a2018-10-15 10:42:14 +08005570#define _mm_mask_getmant_sd(W, U, A, B, C, D) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005571 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5572 (__v2df)(__m128d)(B), \
5573 (int)(((D)<<2) | (C)), \
5574 (__v2df)(__m128d)(W), \
5575 (__mmask8)(U), \
5576 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08005577
Logan Chien55afb0a2018-10-15 10:42:14 +08005578#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005579 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5580 (__v2df)(__m128d)(B), \
5581 (int)(((D)<<2) | (C)), \
5582 (__v2df)(__m128d)(W), \
5583 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005584
Logan Chien55afb0a2018-10-15 10:42:14 +08005585#define _mm_maskz_getmant_sd(U, A, B, C, D) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005586 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5587 (__v2df)(__m128d)(B), \
5588 (int)(((D)<<2) | (C)), \
5589 (__v2df)_mm_setzero_pd(), \
5590 (__mmask8)(U), \
5591 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08005592
Logan Chien55afb0a2018-10-15 10:42:14 +08005593#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005594 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5595 (__v2df)(__m128d)(B), \
5596 (int)(((D)<<2) | (C)), \
5597 (__v2df)_mm_setzero_pd(), \
5598 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005599
Logan Chien55afb0a2018-10-15 10:42:14 +08005600#define _mm_getmant_round_ss(A, B, C, D, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005601 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5602 (__v4sf)(__m128)(B), \
5603 (int)(((D)<<2) | (C)), \
5604 (__v4sf)_mm_setzero_ps(), \
5605 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005606
Logan Chien55afb0a2018-10-15 10:42:14 +08005607#define _mm_getmant_ss(A, B, C, D) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005608 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5609 (__v4sf)(__m128)(B), \
5610 (int)(((D)<<2) | (C)), \
5611 (__v4sf)_mm_setzero_ps(), \
5612 (__mmask8)-1, \
5613 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08005614
Logan Chien55afb0a2018-10-15 10:42:14 +08005615#define _mm_mask_getmant_ss(W, U, A, B, C, D) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005616 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5617 (__v4sf)(__m128)(B), \
5618 (int)(((D)<<2) | (C)), \
5619 (__v4sf)(__m128)(W), \
5620 (__mmask8)(U), \
5621 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08005622
Logan Chien55afb0a2018-10-15 10:42:14 +08005623#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005624 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5625 (__v4sf)(__m128)(B), \
5626 (int)(((D)<<2) | (C)), \
5627 (__v4sf)(__m128)(W), \
5628 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005629
Logan Chien55afb0a2018-10-15 10:42:14 +08005630#define _mm_maskz_getmant_ss(U, A, B, C, D) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005631 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5632 (__v4sf)(__m128)(B), \
5633 (int)(((D)<<2) | (C)), \
5634 (__v4sf)_mm_setzero_ps(), \
5635 (__mmask8)(U), \
5636 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08005637
Logan Chien55afb0a2018-10-15 10:42:14 +08005638#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005639 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5640 (__v4sf)(__m128)(B), \
5641 (int)(((D)<<2) | (C)), \
5642 (__v4sf)_mm_setzero_ps(), \
5643 (__mmask8)(U), (int)(R)))
Logan Chien55afb0a2018-10-15 10:42:14 +08005644
Logan Chienb0c84022018-11-09 16:19:54 +08005645static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Logan Chien2833ffb2018-10-09 10:03:24 +08005646_mm512_kmov (__mmask16 __A)
5647{
5648 return __A;
5649}
5650
Logan Chien55afb0a2018-10-15 10:42:14 +08005651#define _mm_comi_round_sd(A, B, P, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005652 ((int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5653 (int)(P), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005654
Logan Chien55afb0a2018-10-15 10:42:14 +08005655#define _mm_comi_round_ss(A, B, P, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005656 ((int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5657 (int)(P), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005658
Logan Chien55afb0a2018-10-15 10:42:14 +08005659#ifdef __x86_64__
5660#define _mm_cvt_roundsd_si64(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005661 ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
Logan Chien55afb0a2018-10-15 10:42:14 +08005662#endif
Logan Chien2833ffb2018-10-09 10:03:24 +08005663
Logan Chien55afb0a2018-10-15 10:42:14 +08005664static __inline__ __m512i __DEFAULT_FN_ATTRS512
5665_mm512_sll_epi32(__m512i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08005666{
Logan Chien55afb0a2018-10-15 10:42:14 +08005667 return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08005668}
5669
Logan Chien55afb0a2018-10-15 10:42:14 +08005670static __inline__ __m512i __DEFAULT_FN_ATTRS512
5671_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08005672{
Logan Chien55afb0a2018-10-15 10:42:14 +08005673 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5674 (__v16si)_mm512_sll_epi32(__A, __B),
5675 (__v16si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08005676}
5677
Logan Chien55afb0a2018-10-15 10:42:14 +08005678static __inline__ __m512i __DEFAULT_FN_ATTRS512
5679_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08005680{
Logan Chien55afb0a2018-10-15 10:42:14 +08005681 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5682 (__v16si)_mm512_sll_epi32(__A, __B),
5683 (__v16si)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08005684}
5685
Logan Chien55afb0a2018-10-15 10:42:14 +08005686static __inline__ __m512i __DEFAULT_FN_ATTRS512
5687_mm512_sll_epi64(__m512i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08005688{
Logan Chien55afb0a2018-10-15 10:42:14 +08005689 return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08005690}
5691
Logan Chien55afb0a2018-10-15 10:42:14 +08005692static __inline__ __m512i __DEFAULT_FN_ATTRS512
5693_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08005694{
Logan Chien55afb0a2018-10-15 10:42:14 +08005695 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5696 (__v8di)_mm512_sll_epi64(__A, __B),
5697 (__v8di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08005698}
5699
Logan Chien55afb0a2018-10-15 10:42:14 +08005700static __inline__ __m512i __DEFAULT_FN_ATTRS512
5701_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08005702{
Logan Chien55afb0a2018-10-15 10:42:14 +08005703 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5704 (__v8di)_mm512_sll_epi64(__A, __B),
5705 (__v8di)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08005706}
5707
Logan Chien55afb0a2018-10-15 10:42:14 +08005708static __inline__ __m512i __DEFAULT_FN_ATTRS512
5709_mm512_sllv_epi32(__m512i __X, __m512i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08005710{
Logan Chien55afb0a2018-10-15 10:42:14 +08005711 return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
Logan Chien2833ffb2018-10-09 10:03:24 +08005712}
5713
Logan Chien55afb0a2018-10-15 10:42:14 +08005714static __inline__ __m512i __DEFAULT_FN_ATTRS512
5715_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08005716{
Logan Chien55afb0a2018-10-15 10:42:14 +08005717 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5718 (__v16si)_mm512_sllv_epi32(__X, __Y),
5719 (__v16si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08005720}
5721
Logan Chien55afb0a2018-10-15 10:42:14 +08005722static __inline__ __m512i __DEFAULT_FN_ATTRS512
5723_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08005724{
Logan Chien55afb0a2018-10-15 10:42:14 +08005725 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5726 (__v16si)_mm512_sllv_epi32(__X, __Y),
5727 (__v16si)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08005728}
5729
Logan Chien55afb0a2018-10-15 10:42:14 +08005730static __inline__ __m512i __DEFAULT_FN_ATTRS512
5731_mm512_sllv_epi64(__m512i __X, __m512i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08005732{
Logan Chien55afb0a2018-10-15 10:42:14 +08005733 return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
Logan Chien2833ffb2018-10-09 10:03:24 +08005734}
5735
Logan Chien55afb0a2018-10-15 10:42:14 +08005736static __inline__ __m512i __DEFAULT_FN_ATTRS512
5737_mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08005738{
Logan Chien55afb0a2018-10-15 10:42:14 +08005739 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5740 (__v8di)_mm512_sllv_epi64(__X, __Y),
5741 (__v8di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08005742}
5743
Logan Chien55afb0a2018-10-15 10:42:14 +08005744static __inline__ __m512i __DEFAULT_FN_ATTRS512
5745_mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08005746{
Logan Chien55afb0a2018-10-15 10:42:14 +08005747 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5748 (__v8di)_mm512_sllv_epi64(__X, __Y),
5749 (__v8di)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08005750}
5751
Logan Chien55afb0a2018-10-15 10:42:14 +08005752static __inline__ __m512i __DEFAULT_FN_ATTRS512
5753_mm512_sra_epi32(__m512i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08005754{
Logan Chien55afb0a2018-10-15 10:42:14 +08005755 return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08005756}
5757
Logan Chien55afb0a2018-10-15 10:42:14 +08005758static __inline__ __m512i __DEFAULT_FN_ATTRS512
5759_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08005760{
Logan Chien55afb0a2018-10-15 10:42:14 +08005761 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5762 (__v16si)_mm512_sra_epi32(__A, __B),
5763 (__v16si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08005764}
5765
Logan Chien55afb0a2018-10-15 10:42:14 +08005766static __inline__ __m512i __DEFAULT_FN_ATTRS512
5767_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08005768{
Logan Chien55afb0a2018-10-15 10:42:14 +08005769 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5770 (__v16si)_mm512_sra_epi32(__A, __B),
5771 (__v16si)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08005772}
5773
Logan Chien55afb0a2018-10-15 10:42:14 +08005774static __inline__ __m512i __DEFAULT_FN_ATTRS512
5775_mm512_sra_epi64(__m512i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08005776{
Logan Chien55afb0a2018-10-15 10:42:14 +08005777 return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08005778}
5779
Logan Chien55afb0a2018-10-15 10:42:14 +08005780static __inline__ __m512i __DEFAULT_FN_ATTRS512
5781_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08005782{
Logan Chien55afb0a2018-10-15 10:42:14 +08005783 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5784 (__v8di)_mm512_sra_epi64(__A, __B),
5785 (__v8di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08005786}
5787
Logan Chien55afb0a2018-10-15 10:42:14 +08005788static __inline__ __m512i __DEFAULT_FN_ATTRS512
5789_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08005790{
Logan Chien55afb0a2018-10-15 10:42:14 +08005791 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5792 (__v8di)_mm512_sra_epi64(__A, __B),
5793 (__v8di)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08005794}
5795
Logan Chien55afb0a2018-10-15 10:42:14 +08005796static __inline__ __m512i __DEFAULT_FN_ATTRS512
5797_mm512_srav_epi32(__m512i __X, __m512i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08005798{
Logan Chien55afb0a2018-10-15 10:42:14 +08005799 return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
Logan Chien2833ffb2018-10-09 10:03:24 +08005800}
5801
Logan Chien55afb0a2018-10-15 10:42:14 +08005802static __inline__ __m512i __DEFAULT_FN_ATTRS512
5803_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08005804{
Logan Chien55afb0a2018-10-15 10:42:14 +08005805 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5806 (__v16si)_mm512_srav_epi32(__X, __Y),
5807 (__v16si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08005808}
5809
Logan Chien55afb0a2018-10-15 10:42:14 +08005810static __inline__ __m512i __DEFAULT_FN_ATTRS512
5811_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08005812{
Logan Chien55afb0a2018-10-15 10:42:14 +08005813 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5814 (__v16si)_mm512_srav_epi32(__X, __Y),
5815 (__v16si)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08005816}
5817
Logan Chien55afb0a2018-10-15 10:42:14 +08005818static __inline__ __m512i __DEFAULT_FN_ATTRS512
5819_mm512_srav_epi64(__m512i __X, __m512i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08005820{
Logan Chien55afb0a2018-10-15 10:42:14 +08005821 return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
Logan Chien2833ffb2018-10-09 10:03:24 +08005822}
5823
Logan Chien55afb0a2018-10-15 10:42:14 +08005824static __inline__ __m512i __DEFAULT_FN_ATTRS512
5825_mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08005826{
Logan Chien55afb0a2018-10-15 10:42:14 +08005827 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5828 (__v8di)_mm512_srav_epi64(__X, __Y),
5829 (__v8di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08005830}
5831
Logan Chien55afb0a2018-10-15 10:42:14 +08005832static __inline__ __m512i __DEFAULT_FN_ATTRS512
5833_mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08005834{
Logan Chien55afb0a2018-10-15 10:42:14 +08005835 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5836 (__v8di)_mm512_srav_epi64(__X, __Y),
5837 (__v8di)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08005838}
5839
Logan Chien55afb0a2018-10-15 10:42:14 +08005840static __inline__ __m512i __DEFAULT_FN_ATTRS512
5841_mm512_srl_epi32(__m512i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08005842{
Logan Chien55afb0a2018-10-15 10:42:14 +08005843 return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08005844}
5845
Logan Chien55afb0a2018-10-15 10:42:14 +08005846static __inline__ __m512i __DEFAULT_FN_ATTRS512
5847_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08005848{
Logan Chien55afb0a2018-10-15 10:42:14 +08005849 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5850 (__v16si)_mm512_srl_epi32(__A, __B),
5851 (__v16si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08005852}
5853
Logan Chien55afb0a2018-10-15 10:42:14 +08005854static __inline__ __m512i __DEFAULT_FN_ATTRS512
5855_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08005856{
Logan Chien55afb0a2018-10-15 10:42:14 +08005857 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5858 (__v16si)_mm512_srl_epi32(__A, __B),
5859 (__v16si)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08005860}
5861
Logan Chien55afb0a2018-10-15 10:42:14 +08005862static __inline__ __m512i __DEFAULT_FN_ATTRS512
5863_mm512_srl_epi64(__m512i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08005864{
Logan Chien55afb0a2018-10-15 10:42:14 +08005865 return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08005866}
5867
Logan Chien55afb0a2018-10-15 10:42:14 +08005868static __inline__ __m512i __DEFAULT_FN_ATTRS512
5869_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08005870{
Logan Chien55afb0a2018-10-15 10:42:14 +08005871 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5872 (__v8di)_mm512_srl_epi64(__A, __B),
5873 (__v8di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08005874}
5875
Logan Chien55afb0a2018-10-15 10:42:14 +08005876static __inline__ __m512i __DEFAULT_FN_ATTRS512
5877_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08005878{
Logan Chien55afb0a2018-10-15 10:42:14 +08005879 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5880 (__v8di)_mm512_srl_epi64(__A, __B),
5881 (__v8di)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08005882}
5883
Logan Chien55afb0a2018-10-15 10:42:14 +08005884static __inline__ __m512i __DEFAULT_FN_ATTRS512
5885_mm512_srlv_epi32(__m512i __X, __m512i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08005886{
Logan Chien55afb0a2018-10-15 10:42:14 +08005887 return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
Logan Chien2833ffb2018-10-09 10:03:24 +08005888}
5889
Logan Chien55afb0a2018-10-15 10:42:14 +08005890static __inline__ __m512i __DEFAULT_FN_ATTRS512
5891_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08005892{
Logan Chien55afb0a2018-10-15 10:42:14 +08005893 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5894 (__v16si)_mm512_srlv_epi32(__X, __Y),
5895 (__v16si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08005896}
5897
Logan Chien55afb0a2018-10-15 10:42:14 +08005898static __inline__ __m512i __DEFAULT_FN_ATTRS512
5899_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08005900{
Logan Chien55afb0a2018-10-15 10:42:14 +08005901 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5902 (__v16si)_mm512_srlv_epi32(__X, __Y),
5903 (__v16si)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08005904}
5905
Logan Chien55afb0a2018-10-15 10:42:14 +08005906static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08005907_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
5908{
Logan Chien55afb0a2018-10-15 10:42:14 +08005909 return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
Logan Chien2833ffb2018-10-09 10:03:24 +08005910}
5911
Logan Chien55afb0a2018-10-15 10:42:14 +08005912static __inline__ __m512i __DEFAULT_FN_ATTRS512
5913_mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08005914{
Logan Chien55afb0a2018-10-15 10:42:14 +08005915 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5916 (__v8di)_mm512_srlv_epi64(__X, __Y),
5917 (__v8di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08005918}
5919
Logan Chien55afb0a2018-10-15 10:42:14 +08005920static __inline__ __m512i __DEFAULT_FN_ATTRS512
5921_mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08005922{
Logan Chien55afb0a2018-10-15 10:42:14 +08005923 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5924 (__v8di)_mm512_srlv_epi64(__X, __Y),
5925 (__v8di)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08005926}
5927
Logan Chien55afb0a2018-10-15 10:42:14 +08005928#define _mm512_ternarylogic_epi32(A, B, C, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005929 ((__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
5930 (__v16si)(__m512i)(B), \
5931 (__v16si)(__m512i)(C), (int)(imm), \
5932 (__mmask16)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +08005933
Logan Chien55afb0a2018-10-15 10:42:14 +08005934#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005935 ((__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
5936 (__v16si)(__m512i)(B), \
5937 (__v16si)(__m512i)(C), (int)(imm), \
5938 (__mmask16)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005939
Logan Chien55afb0a2018-10-15 10:42:14 +08005940#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005941 ((__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \
5942 (__v16si)(__m512i)(B), \
5943 (__v16si)(__m512i)(C), \
5944 (int)(imm), (__mmask16)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005945
Logan Chien55afb0a2018-10-15 10:42:14 +08005946#define _mm512_ternarylogic_epi64(A, B, C, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005947 ((__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
Logan Chien2833ffb2018-10-09 10:03:24 +08005948 (__v8di)(__m512i)(B), \
5949 (__v8di)(__m512i)(C), (int)(imm), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005950 (__mmask8)-1))
5951
5952#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \
5953 ((__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
5954 (__v8di)(__m512i)(B), \
5955 (__v8di)(__m512i)(C), (int)(imm), \
5956 (__mmask8)(U)))
5957
5958#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \
5959 ((__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \
5960 (__v8di)(__m512i)(B), \
5961 (__v8di)(__m512i)(C), (int)(imm), \
5962 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005963
Logan Chien55afb0a2018-10-15 10:42:14 +08005964#ifdef __x86_64__
5965#define _mm_cvt_roundsd_i64(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005966 ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
Logan Chien55afb0a2018-10-15 10:42:14 +08005967#endif
Logan Chien2833ffb2018-10-09 10:03:24 +08005968
Logan Chien55afb0a2018-10-15 10:42:14 +08005969#define _mm_cvt_roundsd_si32(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005970 ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005971
Logan Chien55afb0a2018-10-15 10:42:14 +08005972#define _mm_cvt_roundsd_i32(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005973 ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005974
Logan Chien55afb0a2018-10-15 10:42:14 +08005975#define _mm_cvt_roundsd_u32(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005976 ((unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005977
Logan Chien55afb0a2018-10-15 10:42:14 +08005978static __inline__ unsigned __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005979_mm_cvtsd_u32 (__m128d __A)
5980{
5981 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
5982 _MM_FROUND_CUR_DIRECTION);
5983}
5984
Logan Chien55afb0a2018-10-15 10:42:14 +08005985#ifdef __x86_64__
5986#define _mm_cvt_roundsd_u64(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005987 ((unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
5988 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005989
Logan Chien55afb0a2018-10-15 10:42:14 +08005990static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005991_mm_cvtsd_u64 (__m128d __A)
5992{
5993 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
5994 __A,
5995 _MM_FROUND_CUR_DIRECTION);
5996}
Logan Chien55afb0a2018-10-15 10:42:14 +08005997#endif
Logan Chien2833ffb2018-10-09 10:03:24 +08005998
Logan Chien55afb0a2018-10-15 10:42:14 +08005999#define _mm_cvt_roundss_si32(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006000 ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006001
Logan Chien55afb0a2018-10-15 10:42:14 +08006002#define _mm_cvt_roundss_i32(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006003 ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006004
Logan Chien55afb0a2018-10-15 10:42:14 +08006005#ifdef __x86_64__
6006#define _mm_cvt_roundss_si64(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006007 ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006008
Logan Chien55afb0a2018-10-15 10:42:14 +08006009#define _mm_cvt_roundss_i64(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006010 ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
Logan Chien55afb0a2018-10-15 10:42:14 +08006011#endif
Logan Chien2833ffb2018-10-09 10:03:24 +08006012
Logan Chien55afb0a2018-10-15 10:42:14 +08006013#define _mm_cvt_roundss_u32(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006014 ((unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006015
Logan Chien55afb0a2018-10-15 10:42:14 +08006016static __inline__ unsigned __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006017_mm_cvtss_u32 (__m128 __A)
6018{
6019 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
6020 _MM_FROUND_CUR_DIRECTION);
6021}
6022
Logan Chien55afb0a2018-10-15 10:42:14 +08006023#ifdef __x86_64__
6024#define _mm_cvt_roundss_u64(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006025 ((unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
6026 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006027
Logan Chien55afb0a2018-10-15 10:42:14 +08006028static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006029_mm_cvtss_u64 (__m128 __A)
6030{
6031 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
6032 __A,
6033 _MM_FROUND_CUR_DIRECTION);
6034}
Logan Chien55afb0a2018-10-15 10:42:14 +08006035#endif
Logan Chien2833ffb2018-10-09 10:03:24 +08006036
Logan Chien55afb0a2018-10-15 10:42:14 +08006037#define _mm_cvtt_roundsd_i32(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006038 ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006039
Logan Chien55afb0a2018-10-15 10:42:14 +08006040#define _mm_cvtt_roundsd_si32(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006041 ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006042
Logan Chien55afb0a2018-10-15 10:42:14 +08006043static __inline__ int __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006044_mm_cvttsd_i32 (__m128d __A)
6045{
6046 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
6047 _MM_FROUND_CUR_DIRECTION);
6048}
6049
Logan Chien55afb0a2018-10-15 10:42:14 +08006050#ifdef __x86_64__
6051#define _mm_cvtt_roundsd_si64(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006052 ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006053
Logan Chien55afb0a2018-10-15 10:42:14 +08006054#define _mm_cvtt_roundsd_i64(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006055 ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006056
Logan Chien55afb0a2018-10-15 10:42:14 +08006057static __inline__ long long __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006058_mm_cvttsd_i64 (__m128d __A)
6059{
6060 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
6061 _MM_FROUND_CUR_DIRECTION);
6062}
Logan Chien55afb0a2018-10-15 10:42:14 +08006063#endif
Logan Chien2833ffb2018-10-09 10:03:24 +08006064
Logan Chien55afb0a2018-10-15 10:42:14 +08006065#define _mm_cvtt_roundsd_u32(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006066 ((unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006067
Logan Chien55afb0a2018-10-15 10:42:14 +08006068static __inline__ unsigned __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006069_mm_cvttsd_u32 (__m128d __A)
6070{
6071 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
6072 _MM_FROUND_CUR_DIRECTION);
6073}
6074
Logan Chien55afb0a2018-10-15 10:42:14 +08006075#ifdef __x86_64__
6076#define _mm_cvtt_roundsd_u64(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006077 ((unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
6078 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006079
Logan Chien55afb0a2018-10-15 10:42:14 +08006080static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006081_mm_cvttsd_u64 (__m128d __A)
6082{
6083 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
6084 __A,
6085 _MM_FROUND_CUR_DIRECTION);
6086}
Logan Chien55afb0a2018-10-15 10:42:14 +08006087#endif
Logan Chien2833ffb2018-10-09 10:03:24 +08006088
Logan Chien55afb0a2018-10-15 10:42:14 +08006089#define _mm_cvtt_roundss_i32(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006090 ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006091
Logan Chien55afb0a2018-10-15 10:42:14 +08006092#define _mm_cvtt_roundss_si32(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006093 ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006094
Logan Chien55afb0a2018-10-15 10:42:14 +08006095static __inline__ int __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006096_mm_cvttss_i32 (__m128 __A)
6097{
6098 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
6099 _MM_FROUND_CUR_DIRECTION);
6100}
6101
Logan Chien55afb0a2018-10-15 10:42:14 +08006102#ifdef __x86_64__
6103#define _mm_cvtt_roundss_i64(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006104 ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006105
Logan Chien55afb0a2018-10-15 10:42:14 +08006106#define _mm_cvtt_roundss_si64(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006107 ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006108
Logan Chien55afb0a2018-10-15 10:42:14 +08006109static __inline__ long long __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006110_mm_cvttss_i64 (__m128 __A)
6111{
6112 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
6113 _MM_FROUND_CUR_DIRECTION);
6114}
Logan Chien55afb0a2018-10-15 10:42:14 +08006115#endif
Logan Chien2833ffb2018-10-09 10:03:24 +08006116
Logan Chien55afb0a2018-10-15 10:42:14 +08006117#define _mm_cvtt_roundss_u32(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006118 ((unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006119
Logan Chien55afb0a2018-10-15 10:42:14 +08006120static __inline__ unsigned __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006121_mm_cvttss_u32 (__m128 __A)
6122{
6123 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
6124 _MM_FROUND_CUR_DIRECTION);
6125}
6126
Logan Chien55afb0a2018-10-15 10:42:14 +08006127#ifdef __x86_64__
6128#define _mm_cvtt_roundss_u64(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006129 ((unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
6130 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006131
Logan Chien55afb0a2018-10-15 10:42:14 +08006132static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006133_mm_cvttss_u64 (__m128 __A)
6134{
6135 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
6136 __A,
6137 _MM_FROUND_CUR_DIRECTION);
6138}
Logan Chien55afb0a2018-10-15 10:42:14 +08006139#endif
Logan Chien2833ffb2018-10-09 10:03:24 +08006140
Logan Chien55afb0a2018-10-15 10:42:14 +08006141#define _mm512_permute_pd(X, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006142 ((__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006143
Logan Chien55afb0a2018-10-15 10:42:14 +08006144#define _mm512_mask_permute_pd(W, U, X, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006145 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6146 (__v8df)_mm512_permute_pd((X), (C)), \
6147 (__v8df)(__m512d)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006148
Logan Chien55afb0a2018-10-15 10:42:14 +08006149#define _mm512_maskz_permute_pd(U, X, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006150 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6151 (__v8df)_mm512_permute_pd((X), (C)), \
6152 (__v8df)_mm512_setzero_pd()))
Logan Chien2833ffb2018-10-09 10:03:24 +08006153
Logan Chien55afb0a2018-10-15 10:42:14 +08006154#define _mm512_permute_ps(X, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006155 ((__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006156
Logan Chien55afb0a2018-10-15 10:42:14 +08006157#define _mm512_mask_permute_ps(W, U, X, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006158 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6159 (__v16sf)_mm512_permute_ps((X), (C)), \
6160 (__v16sf)(__m512)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006161
Logan Chien55afb0a2018-10-15 10:42:14 +08006162#define _mm512_maskz_permute_ps(U, X, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006163 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6164 (__v16sf)_mm512_permute_ps((X), (C)), \
6165 (__v16sf)_mm512_setzero_ps()))
Logan Chien2833ffb2018-10-09 10:03:24 +08006166
Logan Chien55afb0a2018-10-15 10:42:14 +08006167static __inline__ __m512d __DEFAULT_FN_ATTRS512
6168_mm512_permutevar_pd(__m512d __A, __m512i __C)
Logan Chien2833ffb2018-10-09 10:03:24 +08006169{
Logan Chien55afb0a2018-10-15 10:42:14 +08006170 return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
Logan Chien2833ffb2018-10-09 10:03:24 +08006171}
6172
Logan Chien55afb0a2018-10-15 10:42:14 +08006173static __inline__ __m512d __DEFAULT_FN_ATTRS512
6174_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
Logan Chien2833ffb2018-10-09 10:03:24 +08006175{
Logan Chien55afb0a2018-10-15 10:42:14 +08006176 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6177 (__v8df)_mm512_permutevar_pd(__A, __C),
6178 (__v8df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08006179}
6180
Logan Chien55afb0a2018-10-15 10:42:14 +08006181static __inline__ __m512d __DEFAULT_FN_ATTRS512
6182_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
Logan Chien2833ffb2018-10-09 10:03:24 +08006183{
Logan Chien55afb0a2018-10-15 10:42:14 +08006184 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6185 (__v8df)_mm512_permutevar_pd(__A, __C),
6186 (__v8df)_mm512_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08006187}
6188
Logan Chien55afb0a2018-10-15 10:42:14 +08006189static __inline__ __m512 __DEFAULT_FN_ATTRS512
6190_mm512_permutevar_ps(__m512 __A, __m512i __C)
Logan Chien2833ffb2018-10-09 10:03:24 +08006191{
Logan Chien55afb0a2018-10-15 10:42:14 +08006192 return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
Logan Chien2833ffb2018-10-09 10:03:24 +08006193}
6194
Logan Chien55afb0a2018-10-15 10:42:14 +08006195static __inline__ __m512 __DEFAULT_FN_ATTRS512
6196_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
Logan Chien2833ffb2018-10-09 10:03:24 +08006197{
Logan Chien55afb0a2018-10-15 10:42:14 +08006198 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6199 (__v16sf)_mm512_permutevar_ps(__A, __C),
6200 (__v16sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08006201}
6202
Logan Chien55afb0a2018-10-15 10:42:14 +08006203static __inline__ __m512 __DEFAULT_FN_ATTRS512
6204_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
Logan Chien2833ffb2018-10-09 10:03:24 +08006205{
Logan Chien55afb0a2018-10-15 10:42:14 +08006206 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6207 (__v16sf)_mm512_permutevar_ps(__A, __C),
6208 (__v16sf)_mm512_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08006209}
6210
Logan Chien55afb0a2018-10-15 10:42:14 +08006211static __inline __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08006212_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
6213{
Logan Chien55afb0a2018-10-15 10:42:14 +08006214 return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
6215 (__v8df)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08006216}
6217
Logan Chien55afb0a2018-10-15 10:42:14 +08006218static __inline__ __m512d __DEFAULT_FN_ATTRS512
6219_mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08006220{
Logan Chien55afb0a2018-10-15 10:42:14 +08006221 return (__m512d)__builtin_ia32_selectpd_512(__U,
6222 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6223 (__v8df)__A);
Logan Chien2833ffb2018-10-09 10:03:24 +08006224}
6225
Logan Chien55afb0a2018-10-15 10:42:14 +08006226static __inline__ __m512d __DEFAULT_FN_ATTRS512
6227_mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U,
6228 __m512d __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08006229{
Logan Chien55afb0a2018-10-15 10:42:14 +08006230 return (__m512d)__builtin_ia32_selectpd_512(__U,
6231 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6232 (__v8df)(__m512d)__I);
Logan Chien2833ffb2018-10-09 10:03:24 +08006233}
6234
Logan Chien55afb0a2018-10-15 10:42:14 +08006235static __inline__ __m512d __DEFAULT_FN_ATTRS512
6236_mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I,
6237 __m512d __B)
6238{
6239 return (__m512d)__builtin_ia32_selectpd_512(__U,
6240 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6241 (__v8df)_mm512_setzero_pd());
6242}
6243
6244static __inline __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08006245_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
6246{
Logan Chien55afb0a2018-10-15 10:42:14 +08006247 return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
6248 (__v16sf) __B);
Logan Chien2833ffb2018-10-09 10:03:24 +08006249}
6250
Logan Chien55afb0a2018-10-15 10:42:14 +08006251static __inline__ __m512 __DEFAULT_FN_ATTRS512
6252_mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08006253{
Logan Chien55afb0a2018-10-15 10:42:14 +08006254 return (__m512)__builtin_ia32_selectps_512(__U,
6255 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6256 (__v16sf)__A);
Logan Chien2833ffb2018-10-09 10:03:24 +08006257}
6258
Logan Chien55afb0a2018-10-15 10:42:14 +08006259static __inline__ __m512 __DEFAULT_FN_ATTRS512
6260_mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08006261{
Logan Chien55afb0a2018-10-15 10:42:14 +08006262 return (__m512)__builtin_ia32_selectps_512(__U,
6263 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6264 (__v16sf)(__m512)__I);
Logan Chien2833ffb2018-10-09 10:03:24 +08006265}
6266
Logan Chien55afb0a2018-10-15 10:42:14 +08006267static __inline__ __m512 __DEFAULT_FN_ATTRS512
6268_mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08006269{
Logan Chien55afb0a2018-10-15 10:42:14 +08006270 return (__m512)__builtin_ia32_selectps_512(__U,
6271 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6272 (__v16sf)_mm512_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08006273}
6274
Logan Chien2833ffb2018-10-09 10:03:24 +08006275
Logan Chien55afb0a2018-10-15 10:42:14 +08006276#define _mm512_cvtt_roundpd_epu32(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006277 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6278 (__v8si)_mm256_undefined_si256(), \
6279 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006280
Logan Chien55afb0a2018-10-15 10:42:14 +08006281#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006282 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6283 (__v8si)(__m256i)(W), \
6284 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006285
Logan Chien55afb0a2018-10-15 10:42:14 +08006286#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006287 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6288 (__v8si)_mm256_setzero_si256(), \
6289 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006290
Logan Chien55afb0a2018-10-15 10:42:14 +08006291static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08006292_mm512_cvttpd_epu32 (__m512d __A)
6293{
6294 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6295 (__v8si)
6296 _mm256_undefined_si256 (),
6297 (__mmask8) -1,
6298 _MM_FROUND_CUR_DIRECTION);
6299}
6300
Logan Chien55afb0a2018-10-15 10:42:14 +08006301static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08006302_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
6303{
6304 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6305 (__v8si) __W,
6306 (__mmask8) __U,
6307 _MM_FROUND_CUR_DIRECTION);
6308}
6309
Logan Chien55afb0a2018-10-15 10:42:14 +08006310static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08006311_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
6312{
6313 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6314 (__v8si)
6315 _mm256_setzero_si256 (),
6316 (__mmask8) __U,
6317 _MM_FROUND_CUR_DIRECTION);
6318}
6319
Logan Chien55afb0a2018-10-15 10:42:14 +08006320#define _mm_roundscale_round_sd(A, B, imm, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006321 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6322 (__v2df)(__m128d)(B), \
6323 (__v2df)_mm_setzero_pd(), \
6324 (__mmask8)-1, (int)(imm), \
6325 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006326
Logan Chien55afb0a2018-10-15 10:42:14 +08006327#define _mm_roundscale_sd(A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006328 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6329 (__v2df)(__m128d)(B), \
6330 (__v2df)_mm_setzero_pd(), \
6331 (__mmask8)-1, (int)(imm), \
6332 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08006333
Logan Chien55afb0a2018-10-15 10:42:14 +08006334#define _mm_mask_roundscale_sd(W, U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006335 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6336 (__v2df)(__m128d)(B), \
6337 (__v2df)(__m128d)(W), \
6338 (__mmask8)(U), (int)(imm), \
6339 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08006340
Logan Chien55afb0a2018-10-15 10:42:14 +08006341#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006342 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6343 (__v2df)(__m128d)(B), \
6344 (__v2df)(__m128d)(W), \
6345 (__mmask8)(U), (int)(I), \
6346 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006347
Logan Chien55afb0a2018-10-15 10:42:14 +08006348#define _mm_maskz_roundscale_sd(U, A, B, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006349 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6350 (__v2df)(__m128d)(B), \
6351 (__v2df)_mm_setzero_pd(), \
6352 (__mmask8)(U), (int)(I), \
6353 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08006354
Logan Chien55afb0a2018-10-15 10:42:14 +08006355#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006356 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6357 (__v2df)(__m128d)(B), \
6358 (__v2df)_mm_setzero_pd(), \
6359 (__mmask8)(U), (int)(I), \
6360 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006361
Logan Chien55afb0a2018-10-15 10:42:14 +08006362#define _mm_roundscale_round_ss(A, B, imm, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006363 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6364 (__v4sf)(__m128)(B), \
6365 (__v4sf)_mm_setzero_ps(), \
6366 (__mmask8)-1, (int)(imm), \
6367 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006368
Logan Chien55afb0a2018-10-15 10:42:14 +08006369#define _mm_roundscale_ss(A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006370 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6371 (__v4sf)(__m128)(B), \
6372 (__v4sf)_mm_setzero_ps(), \
6373 (__mmask8)-1, (int)(imm), \
6374 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08006375
Logan Chien55afb0a2018-10-15 10:42:14 +08006376#define _mm_mask_roundscale_ss(W, U, A, B, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006377 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6378 (__v4sf)(__m128)(B), \
6379 (__v4sf)(__m128)(W), \
6380 (__mmask8)(U), (int)(I), \
6381 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08006382
Logan Chien55afb0a2018-10-15 10:42:14 +08006383#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006384 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6385 (__v4sf)(__m128)(B), \
6386 (__v4sf)(__m128)(W), \
6387 (__mmask8)(U), (int)(I), \
6388 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006389
Logan Chien55afb0a2018-10-15 10:42:14 +08006390#define _mm_maskz_roundscale_ss(U, A, B, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006391 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6392 (__v4sf)(__m128)(B), \
6393 (__v4sf)_mm_setzero_ps(), \
6394 (__mmask8)(U), (int)(I), \
6395 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08006396
Logan Chien55afb0a2018-10-15 10:42:14 +08006397#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006398 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6399 (__v4sf)(__m128)(B), \
6400 (__v4sf)_mm_setzero_ps(), \
6401 (__mmask8)(U), (int)(I), \
6402 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006403
Logan Chien55afb0a2018-10-15 10:42:14 +08006404#define _mm512_scalef_round_pd(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006405 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6406 (__v8df)(__m512d)(B), \
6407 (__v8df)_mm512_undefined_pd(), \
6408 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006409
Logan Chien55afb0a2018-10-15 10:42:14 +08006410#define _mm512_mask_scalef_round_pd(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006411 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6412 (__v8df)(__m512d)(B), \
6413 (__v8df)(__m512d)(W), \
6414 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006415
Logan Chien55afb0a2018-10-15 10:42:14 +08006416#define _mm512_maskz_scalef_round_pd(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006417 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6418 (__v8df)(__m512d)(B), \
6419 (__v8df)_mm512_setzero_pd(), \
6420 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006421
Logan Chien55afb0a2018-10-15 10:42:14 +08006422static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08006423_mm512_scalef_pd (__m512d __A, __m512d __B)
6424{
6425 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6426 (__v8df) __B,
6427 (__v8df)
6428 _mm512_undefined_pd (),
6429 (__mmask8) -1,
6430 _MM_FROUND_CUR_DIRECTION);
6431}
6432
Logan Chien55afb0a2018-10-15 10:42:14 +08006433static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08006434_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
6435{
6436 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6437 (__v8df) __B,
6438 (__v8df) __W,
6439 (__mmask8) __U,
6440 _MM_FROUND_CUR_DIRECTION);
6441}
6442
Logan Chien55afb0a2018-10-15 10:42:14 +08006443static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08006444_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
6445{
6446 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6447 (__v8df) __B,
6448 (__v8df)
6449 _mm512_setzero_pd (),
6450 (__mmask8) __U,
6451 _MM_FROUND_CUR_DIRECTION);
6452}
6453
Logan Chien55afb0a2018-10-15 10:42:14 +08006454#define _mm512_scalef_round_ps(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006455 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6456 (__v16sf)(__m512)(B), \
6457 (__v16sf)_mm512_undefined_ps(), \
6458 (__mmask16)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006459
Logan Chien55afb0a2018-10-15 10:42:14 +08006460#define _mm512_mask_scalef_round_ps(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006461 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6462 (__v16sf)(__m512)(B), \
6463 (__v16sf)(__m512)(W), \
6464 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006465
Logan Chien55afb0a2018-10-15 10:42:14 +08006466#define _mm512_maskz_scalef_round_ps(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006467 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6468 (__v16sf)(__m512)(B), \
6469 (__v16sf)_mm512_setzero_ps(), \
6470 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006471
Logan Chien55afb0a2018-10-15 10:42:14 +08006472static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08006473_mm512_scalef_ps (__m512 __A, __m512 __B)
6474{
6475 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6476 (__v16sf) __B,
6477 (__v16sf)
6478 _mm512_undefined_ps (),
6479 (__mmask16) -1,
6480 _MM_FROUND_CUR_DIRECTION);
6481}
6482
Logan Chien55afb0a2018-10-15 10:42:14 +08006483static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08006484_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
6485{
6486 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6487 (__v16sf) __B,
6488 (__v16sf) __W,
6489 (__mmask16) __U,
6490 _MM_FROUND_CUR_DIRECTION);
6491}
6492
Logan Chien55afb0a2018-10-15 10:42:14 +08006493static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08006494_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
6495{
6496 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6497 (__v16sf) __B,
6498 (__v16sf)
6499 _mm512_setzero_ps (),
6500 (__mmask16) __U,
6501 _MM_FROUND_CUR_DIRECTION);
6502}
6503
Logan Chien55afb0a2018-10-15 10:42:14 +08006504#define _mm_scalef_round_sd(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006505 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6506 (__v2df)(__m128d)(B), \
6507 (__v2df)_mm_setzero_pd(), \
6508 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006509
Logan Chien55afb0a2018-10-15 10:42:14 +08006510static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006511_mm_scalef_sd (__m128d __A, __m128d __B)
6512{
6513 return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6514 (__v2df)( __B), (__v2df) _mm_setzero_pd(),
6515 (__mmask8) -1,
6516 _MM_FROUND_CUR_DIRECTION);
6517}
6518
Logan Chien55afb0a2018-10-15 10:42:14 +08006519static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006520_mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6521{
6522 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6523 (__v2df) __B,
6524 (__v2df) __W,
6525 (__mmask8) __U,
6526 _MM_FROUND_CUR_DIRECTION);
6527}
6528
Logan Chien55afb0a2018-10-15 10:42:14 +08006529#define _mm_mask_scalef_round_sd(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006530 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6531 (__v2df)(__m128d)(B), \
6532 (__v2df)(__m128d)(W), \
6533 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006534
Logan Chien55afb0a2018-10-15 10:42:14 +08006535static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006536_mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
6537{
6538 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6539 (__v2df) __B,
6540 (__v2df) _mm_setzero_pd (),
6541 (__mmask8) __U,
6542 _MM_FROUND_CUR_DIRECTION);
6543}
6544
Logan Chien55afb0a2018-10-15 10:42:14 +08006545#define _mm_maskz_scalef_round_sd(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006546 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6547 (__v2df)(__m128d)(B), \
6548 (__v2df)_mm_setzero_pd(), \
6549 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006550
Logan Chien55afb0a2018-10-15 10:42:14 +08006551#define _mm_scalef_round_ss(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006552 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6553 (__v4sf)(__m128)(B), \
6554 (__v4sf)_mm_setzero_ps(), \
6555 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006556
Logan Chien55afb0a2018-10-15 10:42:14 +08006557static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006558_mm_scalef_ss (__m128 __A, __m128 __B)
6559{
6560 return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6561 (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
6562 (__mmask8) -1,
6563 _MM_FROUND_CUR_DIRECTION);
6564}
6565
Logan Chien55afb0a2018-10-15 10:42:14 +08006566static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006567_mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6568{
6569 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6570 (__v4sf) __B,
6571 (__v4sf) __W,
6572 (__mmask8) __U,
6573 _MM_FROUND_CUR_DIRECTION);
6574}
6575
Logan Chien55afb0a2018-10-15 10:42:14 +08006576#define _mm_mask_scalef_round_ss(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006577 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6578 (__v4sf)(__m128)(B), \
6579 (__v4sf)(__m128)(W), \
6580 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006581
Logan Chien55afb0a2018-10-15 10:42:14 +08006582static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006583_mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
6584{
6585 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6586 (__v4sf) __B,
6587 (__v4sf) _mm_setzero_ps (),
6588 (__mmask8) __U,
6589 _MM_FROUND_CUR_DIRECTION);
6590}
6591
Logan Chien55afb0a2018-10-15 10:42:14 +08006592#define _mm_maskz_scalef_round_ss(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006593 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6594 (__v4sf)(__m128)(B), \
6595 (__v4sf)_mm_setzero_ps(), \
6596 (__mmask8)(U), \
6597 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006598
Logan Chien55afb0a2018-10-15 10:42:14 +08006599static __inline__ __m512i __DEFAULT_FN_ATTRS512
Sasha Smundak0fc590b2020-10-07 08:11:59 -07006600_mm512_srai_epi32(__m512i __A, unsigned int __B)
Logan Chien55afb0a2018-10-15 10:42:14 +08006601{
6602 return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B);
6603}
Logan Chien2833ffb2018-10-09 10:03:24 +08006604
Logan Chien55afb0a2018-10-15 10:42:14 +08006605static __inline__ __m512i __DEFAULT_FN_ATTRS512
Sasha Smundak0fc590b2020-10-07 08:11:59 -07006606_mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A,
6607 unsigned int __B)
Logan Chien55afb0a2018-10-15 10:42:14 +08006608{
6609 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6610 (__v16si)_mm512_srai_epi32(__A, __B),
6611 (__v16si)__W);
6612}
Logan Chien2833ffb2018-10-09 10:03:24 +08006613
Logan Chien55afb0a2018-10-15 10:42:14 +08006614static __inline__ __m512i __DEFAULT_FN_ATTRS512
Sasha Smundak0fc590b2020-10-07 08:11:59 -07006615_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A,
6616 unsigned int __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08006617 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6618 (__v16si)_mm512_srai_epi32(__A, __B),
6619 (__v16si)_mm512_setzero_si512());
6620}
Logan Chien2833ffb2018-10-09 10:03:24 +08006621
Logan Chien55afb0a2018-10-15 10:42:14 +08006622static __inline__ __m512i __DEFAULT_FN_ATTRS512
Sasha Smundak0fc590b2020-10-07 08:11:59 -07006623_mm512_srai_epi64(__m512i __A, unsigned int __B)
Logan Chien55afb0a2018-10-15 10:42:14 +08006624{
6625 return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B);
6626}
Logan Chien2833ffb2018-10-09 10:03:24 +08006627
Logan Chien55afb0a2018-10-15 10:42:14 +08006628static __inline__ __m512i __DEFAULT_FN_ATTRS512
Sasha Smundak0fc590b2020-10-07 08:11:59 -07006629_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
Logan Chien55afb0a2018-10-15 10:42:14 +08006630{
6631 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6632 (__v8di)_mm512_srai_epi64(__A, __B),
6633 (__v8di)__W);
6634}
Logan Chien2833ffb2018-10-09 10:03:24 +08006635
Logan Chien55afb0a2018-10-15 10:42:14 +08006636static __inline__ __m512i __DEFAULT_FN_ATTRS512
Sasha Smundak0fc590b2020-10-07 08:11:59 -07006637_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
Logan Chien55afb0a2018-10-15 10:42:14 +08006638{
6639 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6640 (__v8di)_mm512_srai_epi64(__A, __B),
6641 (__v8di)_mm512_setzero_si512());
6642}
Logan Chien2833ffb2018-10-09 10:03:24 +08006643
Logan Chien55afb0a2018-10-15 10:42:14 +08006644#define _mm512_shuffle_f32x4(A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006645 ((__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \
6646 (__v16sf)(__m512)(B), (int)(imm)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006647
Logan Chien55afb0a2018-10-15 10:42:14 +08006648#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006649 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6650 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6651 (__v16sf)(__m512)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006652
Logan Chien55afb0a2018-10-15 10:42:14 +08006653#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006654 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6655 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6656 (__v16sf)_mm512_setzero_ps()))
Logan Chien2833ffb2018-10-09 10:03:24 +08006657
Logan Chien55afb0a2018-10-15 10:42:14 +08006658#define _mm512_shuffle_f64x2(A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006659 ((__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \
6660 (__v8df)(__m512d)(B), (int)(imm)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006661
Logan Chien55afb0a2018-10-15 10:42:14 +08006662#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006663 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6664 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6665 (__v8df)(__m512d)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006666
Logan Chien55afb0a2018-10-15 10:42:14 +08006667#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006668 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6669 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6670 (__v8df)_mm512_setzero_pd()))
Logan Chien2833ffb2018-10-09 10:03:24 +08006671
Logan Chien55afb0a2018-10-15 10:42:14 +08006672#define _mm512_shuffle_i32x4(A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006673 ((__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \
6674 (__v16si)(__m512i)(B), (int)(imm)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006675
Logan Chien55afb0a2018-10-15 10:42:14 +08006676#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006677 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6678 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6679 (__v16si)(__m512i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006680
Logan Chien55afb0a2018-10-15 10:42:14 +08006681#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006682 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6683 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6684 (__v16si)_mm512_setzero_si512()))
Logan Chien2833ffb2018-10-09 10:03:24 +08006685
Logan Chien55afb0a2018-10-15 10:42:14 +08006686#define _mm512_shuffle_i64x2(A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006687 ((__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \
6688 (__v8di)(__m512i)(B), (int)(imm)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006689
Logan Chien55afb0a2018-10-15 10:42:14 +08006690#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006691 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6692 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6693 (__v8di)(__m512i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006694
Logan Chien55afb0a2018-10-15 10:42:14 +08006695#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006696 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6697 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6698 (__v8di)_mm512_setzero_si512()))
Logan Chien2833ffb2018-10-09 10:03:24 +08006699
Logan Chien55afb0a2018-10-15 10:42:14 +08006700#define _mm512_shuffle_pd(A, B, M) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006701 ((__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \
6702 (__v8df)(__m512d)(B), (int)(M)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006703
Logan Chien55afb0a2018-10-15 10:42:14 +08006704#define _mm512_mask_shuffle_pd(W, U, A, B, M) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006705 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6706 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6707 (__v8df)(__m512d)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006708
Logan Chien55afb0a2018-10-15 10:42:14 +08006709#define _mm512_maskz_shuffle_pd(U, A, B, M) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006710 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6711 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6712 (__v8df)_mm512_setzero_pd()))
Logan Chien2833ffb2018-10-09 10:03:24 +08006713
Logan Chien55afb0a2018-10-15 10:42:14 +08006714#define _mm512_shuffle_ps(A, B, M) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006715 ((__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \
6716 (__v16sf)(__m512)(B), (int)(M)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006717
Logan Chien55afb0a2018-10-15 10:42:14 +08006718#define _mm512_mask_shuffle_ps(W, U, A, B, M) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006719 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6720 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6721 (__v16sf)(__m512)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006722
Logan Chien55afb0a2018-10-15 10:42:14 +08006723#define _mm512_maskz_shuffle_ps(U, A, B, M) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006724 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6725 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6726 (__v16sf)_mm512_setzero_ps()))
Logan Chien2833ffb2018-10-09 10:03:24 +08006727
Logan Chien55afb0a2018-10-15 10:42:14 +08006728#define _mm_sqrt_round_sd(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006729 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6730 (__v2df)(__m128d)(B), \
6731 (__v2df)_mm_setzero_pd(), \
6732 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006733
Logan Chien55afb0a2018-10-15 10:42:14 +08006734static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006735_mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6736{
6737 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6738 (__v2df) __B,
6739 (__v2df) __W,
6740 (__mmask8) __U,
6741 _MM_FROUND_CUR_DIRECTION);
6742}
6743
Logan Chien55afb0a2018-10-15 10:42:14 +08006744#define _mm_mask_sqrt_round_sd(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006745 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6746 (__v2df)(__m128d)(B), \
6747 (__v2df)(__m128d)(W), \
6748 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006749
Logan Chien55afb0a2018-10-15 10:42:14 +08006750static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006751_mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
6752{
6753 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6754 (__v2df) __B,
6755 (__v2df) _mm_setzero_pd (),
6756 (__mmask8) __U,
6757 _MM_FROUND_CUR_DIRECTION);
6758}
6759
Logan Chien55afb0a2018-10-15 10:42:14 +08006760#define _mm_maskz_sqrt_round_sd(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006761 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6762 (__v2df)(__m128d)(B), \
6763 (__v2df)_mm_setzero_pd(), \
6764 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006765
Logan Chien55afb0a2018-10-15 10:42:14 +08006766#define _mm_sqrt_round_ss(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006767 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6768 (__v4sf)(__m128)(B), \
6769 (__v4sf)_mm_setzero_ps(), \
6770 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006771
Logan Chien55afb0a2018-10-15 10:42:14 +08006772static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006773_mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6774{
6775 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6776 (__v4sf) __B,
6777 (__v4sf) __W,
6778 (__mmask8) __U,
6779 _MM_FROUND_CUR_DIRECTION);
6780}
6781
Logan Chien55afb0a2018-10-15 10:42:14 +08006782#define _mm_mask_sqrt_round_ss(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006783 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6784 (__v4sf)(__m128)(B), \
6785 (__v4sf)(__m128)(W), (__mmask8)(U), \
6786 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006787
Logan Chien55afb0a2018-10-15 10:42:14 +08006788static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006789_mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
6790{
6791 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6792 (__v4sf) __B,
6793 (__v4sf) _mm_setzero_ps (),
6794 (__mmask8) __U,
6795 _MM_FROUND_CUR_DIRECTION);
6796}
6797
Logan Chien55afb0a2018-10-15 10:42:14 +08006798#define _mm_maskz_sqrt_round_ss(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006799 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6800 (__v4sf)(__m128)(B), \
6801 (__v4sf)_mm_setzero_ps(), \
6802 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006803
Logan Chien55afb0a2018-10-15 10:42:14 +08006804static __inline__ __m512 __DEFAULT_FN_ATTRS512
6805_mm512_broadcast_f32x4(__m128 __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08006806{
Logan Chien55afb0a2018-10-15 10:42:14 +08006807 return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6808 0, 1, 2, 3, 0, 1, 2, 3,
6809 0, 1, 2, 3, 0, 1, 2, 3);
Logan Chien2833ffb2018-10-09 10:03:24 +08006810}
6811
Logan Chien55afb0a2018-10-15 10:42:14 +08006812static __inline__ __m512 __DEFAULT_FN_ATTRS512
6813_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08006814{
Logan Chien55afb0a2018-10-15 10:42:14 +08006815 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6816 (__v16sf)_mm512_broadcast_f32x4(__A),
6817 (__v16sf)__O);
Logan Chien2833ffb2018-10-09 10:03:24 +08006818}
6819
Logan Chien55afb0a2018-10-15 10:42:14 +08006820static __inline__ __m512 __DEFAULT_FN_ATTRS512
6821_mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08006822{
Logan Chien55afb0a2018-10-15 10:42:14 +08006823 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6824 (__v16sf)_mm512_broadcast_f32x4(__A),
6825 (__v16sf)_mm512_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08006826}
6827
Logan Chien55afb0a2018-10-15 10:42:14 +08006828static __inline__ __m512d __DEFAULT_FN_ATTRS512
6829_mm512_broadcast_f64x4(__m256d __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08006830{
Logan Chien55afb0a2018-10-15 10:42:14 +08006831 return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
6832 0, 1, 2, 3, 0, 1, 2, 3);
Logan Chien2833ffb2018-10-09 10:03:24 +08006833}
6834
Logan Chien55afb0a2018-10-15 10:42:14 +08006835static __inline__ __m512d __DEFAULT_FN_ATTRS512
6836_mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08006837{
Logan Chien55afb0a2018-10-15 10:42:14 +08006838 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6839 (__v8df)_mm512_broadcast_f64x4(__A),
6840 (__v8df)__O);
Logan Chien2833ffb2018-10-09 10:03:24 +08006841}
6842
Logan Chien55afb0a2018-10-15 10:42:14 +08006843static __inline__ __m512d __DEFAULT_FN_ATTRS512
6844_mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08006845{
Logan Chien55afb0a2018-10-15 10:42:14 +08006846 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6847 (__v8df)_mm512_broadcast_f64x4(__A),
6848 (__v8df)_mm512_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08006849}
6850
Logan Chien55afb0a2018-10-15 10:42:14 +08006851static __inline__ __m512i __DEFAULT_FN_ATTRS512
6852_mm512_broadcast_i32x4(__m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08006853{
Logan Chien55afb0a2018-10-15 10:42:14 +08006854 return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6855 0, 1, 2, 3, 0, 1, 2, 3,
6856 0, 1, 2, 3, 0, 1, 2, 3);
Logan Chien2833ffb2018-10-09 10:03:24 +08006857}
6858
Logan Chien55afb0a2018-10-15 10:42:14 +08006859static __inline__ __m512i __DEFAULT_FN_ATTRS512
6860_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08006861{
Logan Chien55afb0a2018-10-15 10:42:14 +08006862 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6863 (__v16si)_mm512_broadcast_i32x4(__A),
6864 (__v16si)__O);
Logan Chien2833ffb2018-10-09 10:03:24 +08006865}
6866
Logan Chien55afb0a2018-10-15 10:42:14 +08006867static __inline__ __m512i __DEFAULT_FN_ATTRS512
6868_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08006869{
Logan Chien55afb0a2018-10-15 10:42:14 +08006870 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6871 (__v16si)_mm512_broadcast_i32x4(__A),
6872 (__v16si)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08006873}
6874
Logan Chien55afb0a2018-10-15 10:42:14 +08006875static __inline__ __m512i __DEFAULT_FN_ATTRS512
6876_mm512_broadcast_i64x4(__m256i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08006877{
Logan Chien55afb0a2018-10-15 10:42:14 +08006878 return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
6879 0, 1, 2, 3, 0, 1, 2, 3);
Logan Chien2833ffb2018-10-09 10:03:24 +08006880}
6881
Logan Chien55afb0a2018-10-15 10:42:14 +08006882static __inline__ __m512i __DEFAULT_FN_ATTRS512
6883_mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08006884{
Logan Chien55afb0a2018-10-15 10:42:14 +08006885 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6886 (__v8di)_mm512_broadcast_i64x4(__A),
6887 (__v8di)__O);
Logan Chien2833ffb2018-10-09 10:03:24 +08006888}
6889
Logan Chien55afb0a2018-10-15 10:42:14 +08006890static __inline__ __m512i __DEFAULT_FN_ATTRS512
6891_mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08006892{
Logan Chien55afb0a2018-10-15 10:42:14 +08006893 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6894 (__v8di)_mm512_broadcast_i64x4(__A),
6895 (__v8di)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08006896}
6897
Logan Chien55afb0a2018-10-15 10:42:14 +08006898static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08006899_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
6900{
6901 return (__m512d)__builtin_ia32_selectpd_512(__M,
6902 (__v8df) _mm512_broadcastsd_pd(__A),
6903 (__v8df) __O);
6904}
6905
Logan Chien55afb0a2018-10-15 10:42:14 +08006906static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08006907_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
6908{
6909 return (__m512d)__builtin_ia32_selectpd_512(__M,
6910 (__v8df) _mm512_broadcastsd_pd(__A),
6911 (__v8df) _mm512_setzero_pd());
6912}
6913
Logan Chien55afb0a2018-10-15 10:42:14 +08006914static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08006915_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
6916{
6917 return (__m512)__builtin_ia32_selectps_512(__M,
6918 (__v16sf) _mm512_broadcastss_ps(__A),
6919 (__v16sf) __O);
6920}
6921
Logan Chien55afb0a2018-10-15 10:42:14 +08006922static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08006923_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
6924{
6925 return (__m512)__builtin_ia32_selectps_512(__M,
6926 (__v16sf) _mm512_broadcastss_ps(__A),
6927 (__v16sf) _mm512_setzero_ps());
6928}
6929
Logan Chien55afb0a2018-10-15 10:42:14 +08006930static __inline__ __m128i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08006931_mm512_cvtsepi32_epi8 (__m512i __A)
6932{
6933 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6934 (__v16qi) _mm_undefined_si128 (),
6935 (__mmask16) -1);
6936}
6937
Logan Chien55afb0a2018-10-15 10:42:14 +08006938static __inline__ __m128i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08006939_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6940{
6941 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6942 (__v16qi) __O, __M);
6943}
6944
Logan Chien55afb0a2018-10-15 10:42:14 +08006945static __inline__ __m128i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08006946_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
6947{
6948 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6949 (__v16qi) _mm_setzero_si128 (),
6950 __M);
6951}
6952
Logan Chien55afb0a2018-10-15 10:42:14 +08006953static __inline__ void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08006954_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
6955{
6956 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6957}
6958
Logan Chien55afb0a2018-10-15 10:42:14 +08006959static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08006960_mm512_cvtsepi32_epi16 (__m512i __A)
6961{
6962 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6963 (__v16hi) _mm256_undefined_si256 (),
6964 (__mmask16) -1);
6965}
6966
Logan Chien55afb0a2018-10-15 10:42:14 +08006967static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08006968_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6969{
6970 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6971 (__v16hi) __O, __M);
6972}
6973
Logan Chien55afb0a2018-10-15 10:42:14 +08006974static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08006975_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
6976{
6977 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6978 (__v16hi) _mm256_setzero_si256 (),
6979 __M);
6980}
6981
Logan Chien55afb0a2018-10-15 10:42:14 +08006982static __inline__ void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08006983_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
6984{
6985 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
6986}
6987
Logan Chien55afb0a2018-10-15 10:42:14 +08006988static __inline__ __m128i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08006989_mm512_cvtsepi64_epi8 (__m512i __A)
6990{
6991 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6992 (__v16qi) _mm_undefined_si128 (),
6993 (__mmask8) -1);
6994}
6995
Logan Chien55afb0a2018-10-15 10:42:14 +08006996static __inline__ __m128i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08006997_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6998{
6999 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7000 (__v16qi) __O, __M);
7001}
7002
Logan Chien55afb0a2018-10-15 10:42:14 +08007003static __inline__ __m128i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007004_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
7005{
7006 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7007 (__v16qi) _mm_setzero_si128 (),
7008 __M);
7009}
7010
Logan Chien55afb0a2018-10-15 10:42:14 +08007011static __inline__ void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007012_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7013{
7014 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7015}
7016
Logan Chien55afb0a2018-10-15 10:42:14 +08007017static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007018_mm512_cvtsepi64_epi32 (__m512i __A)
7019{
Logan Chien2833ffb2018-10-09 10:03:24 +08007020 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7021 (__v8si) _mm256_undefined_si256 (),
7022 (__mmask8) -1);
7023}
7024
Logan Chien55afb0a2018-10-15 10:42:14 +08007025static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007026_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7027{
7028 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7029 (__v8si) __O, __M);
7030}
7031
Logan Chien55afb0a2018-10-15 10:42:14 +08007032static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007033_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
7034{
7035 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7036 (__v8si) _mm256_setzero_si256 (),
7037 __M);
7038}
7039
Logan Chien55afb0a2018-10-15 10:42:14 +08007040static __inline__ void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007041_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
7042{
7043 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7044}
7045
Logan Chien55afb0a2018-10-15 10:42:14 +08007046static __inline__ __m128i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007047_mm512_cvtsepi64_epi16 (__m512i __A)
7048{
7049 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7050 (__v8hi) _mm_undefined_si128 (),
7051 (__mmask8) -1);
7052}
7053
Logan Chien55afb0a2018-10-15 10:42:14 +08007054static __inline__ __m128i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007055_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7056{
7057 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7058 (__v8hi) __O, __M);
7059}
7060
Logan Chien55afb0a2018-10-15 10:42:14 +08007061static __inline__ __m128i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007062_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
7063{
7064 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7065 (__v8hi) _mm_setzero_si128 (),
7066 __M);
7067}
7068
Logan Chien55afb0a2018-10-15 10:42:14 +08007069static __inline__ void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007070_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
7071{
7072 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7073}
7074
Logan Chien55afb0a2018-10-15 10:42:14 +08007075static __inline__ __m128i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007076_mm512_cvtusepi32_epi8 (__m512i __A)
7077{
7078 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7079 (__v16qi) _mm_undefined_si128 (),
7080 (__mmask16) -1);
7081}
7082
Logan Chien55afb0a2018-10-15 10:42:14 +08007083static __inline__ __m128i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007084_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7085{
7086 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7087 (__v16qi) __O,
7088 __M);
7089}
7090
Logan Chien55afb0a2018-10-15 10:42:14 +08007091static __inline__ __m128i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007092_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
7093{
7094 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7095 (__v16qi) _mm_setzero_si128 (),
7096 __M);
7097}
7098
Logan Chien55afb0a2018-10-15 10:42:14 +08007099static __inline__ void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007100_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7101{
7102 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7103}
7104
Logan Chien55afb0a2018-10-15 10:42:14 +08007105static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007106_mm512_cvtusepi32_epi16 (__m512i __A)
7107{
7108 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7109 (__v16hi) _mm256_undefined_si256 (),
7110 (__mmask16) -1);
7111}
7112
Logan Chien55afb0a2018-10-15 10:42:14 +08007113static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007114_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7115{
7116 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7117 (__v16hi) __O,
7118 __M);
7119}
7120
Logan Chien55afb0a2018-10-15 10:42:14 +08007121static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007122_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
7123{
7124 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7125 (__v16hi) _mm256_setzero_si256 (),
7126 __M);
7127}
7128
Logan Chien55afb0a2018-10-15 10:42:14 +08007129static __inline__ void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007130_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7131{
7132 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7133}
7134
Logan Chien55afb0a2018-10-15 10:42:14 +08007135static __inline__ __m128i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007136_mm512_cvtusepi64_epi8 (__m512i __A)
7137{
7138 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7139 (__v16qi) _mm_undefined_si128 (),
7140 (__mmask8) -1);
7141}
7142
Logan Chien55afb0a2018-10-15 10:42:14 +08007143static __inline__ __m128i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007144_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7145{
7146 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7147 (__v16qi) __O,
7148 __M);
7149}
7150
Logan Chien55afb0a2018-10-15 10:42:14 +08007151static __inline__ __m128i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007152_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
7153{
7154 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7155 (__v16qi) _mm_setzero_si128 (),
7156 __M);
7157}
7158
Logan Chien55afb0a2018-10-15 10:42:14 +08007159static __inline__ void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007160_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7161{
7162 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7163}
7164
Logan Chien55afb0a2018-10-15 10:42:14 +08007165static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007166_mm512_cvtusepi64_epi32 (__m512i __A)
7167{
7168 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7169 (__v8si) _mm256_undefined_si256 (),
7170 (__mmask8) -1);
7171}
7172
Logan Chien55afb0a2018-10-15 10:42:14 +08007173static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007174_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7175{
7176 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7177 (__v8si) __O, __M);
7178}
7179
Logan Chien55afb0a2018-10-15 10:42:14 +08007180static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007181_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
7182{
7183 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7184 (__v8si) _mm256_setzero_si256 (),
7185 __M);
7186}
7187
Logan Chien55afb0a2018-10-15 10:42:14 +08007188static __inline__ void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007189_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7190{
7191 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
7192}
7193
Logan Chien55afb0a2018-10-15 10:42:14 +08007194static __inline__ __m128i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007195_mm512_cvtusepi64_epi16 (__m512i __A)
7196{
7197 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7198 (__v8hi) _mm_undefined_si128 (),
7199 (__mmask8) -1);
7200}
7201
Logan Chien55afb0a2018-10-15 10:42:14 +08007202static __inline__ __m128i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007203_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7204{
7205 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7206 (__v8hi) __O, __M);
7207}
7208
Logan Chien55afb0a2018-10-15 10:42:14 +08007209static __inline__ __m128i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007210_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
7211{
7212 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7213 (__v8hi) _mm_setzero_si128 (),
7214 __M);
7215}
7216
Logan Chien55afb0a2018-10-15 10:42:14 +08007217static __inline__ void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007218_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7219{
7220 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
7221}
7222
Logan Chien55afb0a2018-10-15 10:42:14 +08007223static __inline__ __m128i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007224_mm512_cvtepi32_epi8 (__m512i __A)
7225{
7226 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7227 (__v16qi) _mm_undefined_si128 (),
7228 (__mmask16) -1);
7229}
7230
Logan Chien55afb0a2018-10-15 10:42:14 +08007231static __inline__ __m128i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007232_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7233{
7234 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7235 (__v16qi) __O, __M);
7236}
7237
Logan Chien55afb0a2018-10-15 10:42:14 +08007238static __inline__ __m128i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007239_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
7240{
7241 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7242 (__v16qi) _mm_setzero_si128 (),
7243 __M);
7244}
7245
Logan Chien55afb0a2018-10-15 10:42:14 +08007246static __inline__ void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007247_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7248{
7249 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7250}
7251
Logan Chien55afb0a2018-10-15 10:42:14 +08007252static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007253_mm512_cvtepi32_epi16 (__m512i __A)
7254{
7255 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7256 (__v16hi) _mm256_undefined_si256 (),
7257 (__mmask16) -1);
7258}
7259
Logan Chien55afb0a2018-10-15 10:42:14 +08007260static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007261_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7262{
7263 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7264 (__v16hi) __O, __M);
7265}
7266
Logan Chien55afb0a2018-10-15 10:42:14 +08007267static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007268_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
7269{
7270 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7271 (__v16hi) _mm256_setzero_si256 (),
7272 __M);
7273}
7274
Logan Chien55afb0a2018-10-15 10:42:14 +08007275static __inline__ void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007276_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
7277{
7278 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
7279}
7280
Logan Chien55afb0a2018-10-15 10:42:14 +08007281static __inline__ __m128i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007282_mm512_cvtepi64_epi8 (__m512i __A)
7283{
7284 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7285 (__v16qi) _mm_undefined_si128 (),
7286 (__mmask8) -1);
7287}
7288
Logan Chien55afb0a2018-10-15 10:42:14 +08007289static __inline__ __m128i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007290_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7291{
7292 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7293 (__v16qi) __O, __M);
7294}
7295
Logan Chien55afb0a2018-10-15 10:42:14 +08007296static __inline__ __m128i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007297_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
7298{
7299 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7300 (__v16qi) _mm_setzero_si128 (),
7301 __M);
7302}
7303
Logan Chien55afb0a2018-10-15 10:42:14 +08007304static __inline__ void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007305_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7306{
7307 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7308}
7309
Logan Chien55afb0a2018-10-15 10:42:14 +08007310static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007311_mm512_cvtepi64_epi32 (__m512i __A)
7312{
7313 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7314 (__v8si) _mm256_undefined_si256 (),
7315 (__mmask8) -1);
7316}
7317
Logan Chien55afb0a2018-10-15 10:42:14 +08007318static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007319_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7320{
7321 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7322 (__v8si) __O, __M);
7323}
7324
Logan Chien55afb0a2018-10-15 10:42:14 +08007325static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007326_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
7327{
7328 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7329 (__v8si) _mm256_setzero_si256 (),
7330 __M);
7331}
7332
Logan Chien55afb0a2018-10-15 10:42:14 +08007333static __inline__ void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007334_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7335{
7336 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7337}
7338
Logan Chien55afb0a2018-10-15 10:42:14 +08007339static __inline__ __m128i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007340_mm512_cvtepi64_epi16 (__m512i __A)
7341{
7342 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7343 (__v8hi) _mm_undefined_si128 (),
7344 (__mmask8) -1);
7345}
7346
Logan Chien55afb0a2018-10-15 10:42:14 +08007347static __inline__ __m128i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007348_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7349{
7350 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7351 (__v8hi) __O, __M);
7352}
7353
Logan Chien55afb0a2018-10-15 10:42:14 +08007354static __inline__ __m128i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007355_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
7356{
7357 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7358 (__v8hi) _mm_setzero_si128 (),
7359 __M);
7360}
7361
Logan Chien55afb0a2018-10-15 10:42:14 +08007362static __inline__ void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007363_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7364{
7365 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7366}
7367
Logan Chien55afb0a2018-10-15 10:42:14 +08007368#define _mm512_extracti32x4_epi32(A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007369 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7370 (__v4si)_mm_undefined_si128(), \
7371 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +08007372
Logan Chien55afb0a2018-10-15 10:42:14 +08007373#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007374 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7375 (__v4si)(__m128i)(W), \
7376 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007377
Logan Chien55afb0a2018-10-15 10:42:14 +08007378#define _mm512_maskz_extracti32x4_epi32(U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007379 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7380 (__v4si)_mm_setzero_si128(), \
7381 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007382
Logan Chien55afb0a2018-10-15 10:42:14 +08007383#define _mm512_extracti64x4_epi64(A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007384 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7385 (__v4di)_mm256_undefined_si256(), \
7386 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +08007387
Logan Chien55afb0a2018-10-15 10:42:14 +08007388#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007389 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7390 (__v4di)(__m256i)(W), \
7391 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007392
Logan Chien55afb0a2018-10-15 10:42:14 +08007393#define _mm512_maskz_extracti64x4_epi64(U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007394 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7395 (__v4di)_mm256_setzero_si256(), \
7396 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007397
Logan Chien55afb0a2018-10-15 10:42:14 +08007398#define _mm512_insertf64x4(A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007399 ((__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \
7400 (__v4df)(__m256d)(B), (int)(imm)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007401
Logan Chien55afb0a2018-10-15 10:42:14 +08007402#define _mm512_mask_insertf64x4(W, U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007403 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7404 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7405 (__v8df)(__m512d)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007406
Logan Chien55afb0a2018-10-15 10:42:14 +08007407#define _mm512_maskz_insertf64x4(U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007408 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7409 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7410 (__v8df)_mm512_setzero_pd()))
Logan Chien2833ffb2018-10-09 10:03:24 +08007411
Logan Chien55afb0a2018-10-15 10:42:14 +08007412#define _mm512_inserti64x4(A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007413 ((__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
7414 (__v4di)(__m256i)(B), (int)(imm)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007415
Logan Chien55afb0a2018-10-15 10:42:14 +08007416#define _mm512_mask_inserti64x4(W, U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007417 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7418 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7419 (__v8di)(__m512i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007420
Logan Chien55afb0a2018-10-15 10:42:14 +08007421#define _mm512_maskz_inserti64x4(U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007422 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7423 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7424 (__v8di)_mm512_setzero_si512()))
Logan Chien2833ffb2018-10-09 10:03:24 +08007425
Logan Chien55afb0a2018-10-15 10:42:14 +08007426#define _mm512_insertf32x4(A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007427 ((__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \
7428 (__v4sf)(__m128)(B), (int)(imm)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007429
Logan Chien55afb0a2018-10-15 10:42:14 +08007430#define _mm512_mask_insertf32x4(W, U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007431 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7432 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7433 (__v16sf)(__m512)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007434
Logan Chien55afb0a2018-10-15 10:42:14 +08007435#define _mm512_maskz_insertf32x4(U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007436 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7437 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7438 (__v16sf)_mm512_setzero_ps()))
Logan Chien2833ffb2018-10-09 10:03:24 +08007439
Logan Chien55afb0a2018-10-15 10:42:14 +08007440#define _mm512_inserti32x4(A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007441 ((__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \
7442 (__v4si)(__m128i)(B), (int)(imm)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007443
Logan Chien55afb0a2018-10-15 10:42:14 +08007444#define _mm512_mask_inserti32x4(W, U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007445 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7446 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7447 (__v16si)(__m512i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007448
Logan Chien55afb0a2018-10-15 10:42:14 +08007449#define _mm512_maskz_inserti32x4(U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007450 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7451 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7452 (__v16si)_mm512_setzero_si512()))
Logan Chien2833ffb2018-10-09 10:03:24 +08007453
Logan Chien55afb0a2018-10-15 10:42:14 +08007454#define _mm512_getmant_round_pd(A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007455 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7456 (int)(((C)<<2) | (B)), \
7457 (__v8df)_mm512_undefined_pd(), \
7458 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007459
Logan Chien55afb0a2018-10-15 10:42:14 +08007460#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007461 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7462 (int)(((C)<<2) | (B)), \
7463 (__v8df)(__m512d)(W), \
7464 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007465
Logan Chien55afb0a2018-10-15 10:42:14 +08007466#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007467 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7468 (int)(((C)<<2) | (B)), \
7469 (__v8df)_mm512_setzero_pd(), \
7470 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007471
Logan Chien55afb0a2018-10-15 10:42:14 +08007472#define _mm512_getmant_pd(A, B, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007473 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7474 (int)(((C)<<2) | (B)), \
7475 (__v8df)_mm512_setzero_pd(), \
7476 (__mmask8)-1, \
7477 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08007478
Logan Chien55afb0a2018-10-15 10:42:14 +08007479#define _mm512_mask_getmant_pd(W, U, A, B, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007480 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7481 (int)(((C)<<2) | (B)), \
7482 (__v8df)(__m512d)(W), \
7483 (__mmask8)(U), \
7484 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08007485
Logan Chien55afb0a2018-10-15 10:42:14 +08007486#define _mm512_maskz_getmant_pd(U, A, B, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007487 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7488 (int)(((C)<<2) | (B)), \
7489 (__v8df)_mm512_setzero_pd(), \
7490 (__mmask8)(U), \
7491 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08007492
Logan Chien55afb0a2018-10-15 10:42:14 +08007493#define _mm512_getmant_round_ps(A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007494 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7495 (int)(((C)<<2) | (B)), \
7496 (__v16sf)_mm512_undefined_ps(), \
7497 (__mmask16)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007498
Logan Chien55afb0a2018-10-15 10:42:14 +08007499#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007500 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7501 (int)(((C)<<2) | (B)), \
7502 (__v16sf)(__m512)(W), \
7503 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007504
Logan Chien55afb0a2018-10-15 10:42:14 +08007505#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007506 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7507 (int)(((C)<<2) | (B)), \
7508 (__v16sf)_mm512_setzero_ps(), \
7509 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007510
Logan Chien55afb0a2018-10-15 10:42:14 +08007511#define _mm512_getmant_ps(A, B, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007512 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7513 (int)(((C)<<2)|(B)), \
7514 (__v16sf)_mm512_undefined_ps(), \
7515 (__mmask16)-1, \
7516 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08007517
Logan Chien55afb0a2018-10-15 10:42:14 +08007518#define _mm512_mask_getmant_ps(W, U, A, B, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007519 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7520 (int)(((C)<<2)|(B)), \
7521 (__v16sf)(__m512)(W), \
7522 (__mmask16)(U), \
7523 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08007524
Logan Chien55afb0a2018-10-15 10:42:14 +08007525#define _mm512_maskz_getmant_ps(U, A, B, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007526 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7527 (int)(((C)<<2)|(B)), \
7528 (__v16sf)_mm512_setzero_ps(), \
7529 (__mmask16)(U), \
7530 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08007531
Logan Chien55afb0a2018-10-15 10:42:14 +08007532#define _mm512_getexp_round_pd(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007533 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7534 (__v8df)_mm512_undefined_pd(), \
7535 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007536
Logan Chien55afb0a2018-10-15 10:42:14 +08007537#define _mm512_mask_getexp_round_pd(W, U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007538 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7539 (__v8df)(__m512d)(W), \
7540 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007541
Logan Chien55afb0a2018-10-15 10:42:14 +08007542#define _mm512_maskz_getexp_round_pd(U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007543 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7544 (__v8df)_mm512_setzero_pd(), \
7545 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007546
Logan Chien55afb0a2018-10-15 10:42:14 +08007547static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007548_mm512_getexp_pd (__m512d __A)
7549{
7550 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7551 (__v8df) _mm512_undefined_pd (),
7552 (__mmask8) -1,
7553 _MM_FROUND_CUR_DIRECTION);
7554}
7555
Logan Chien55afb0a2018-10-15 10:42:14 +08007556static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007557_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
7558{
7559 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7560 (__v8df) __W,
7561 (__mmask8) __U,
7562 _MM_FROUND_CUR_DIRECTION);
7563}
7564
Logan Chien55afb0a2018-10-15 10:42:14 +08007565static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007566_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
7567{
7568 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7569 (__v8df) _mm512_setzero_pd (),
7570 (__mmask8) __U,
7571 _MM_FROUND_CUR_DIRECTION);
7572}
7573
Logan Chien55afb0a2018-10-15 10:42:14 +08007574#define _mm512_getexp_round_ps(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007575 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7576 (__v16sf)_mm512_undefined_ps(), \
7577 (__mmask16)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007578
Logan Chien55afb0a2018-10-15 10:42:14 +08007579#define _mm512_mask_getexp_round_ps(W, U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007580 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7581 (__v16sf)(__m512)(W), \
7582 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007583
Logan Chien55afb0a2018-10-15 10:42:14 +08007584#define _mm512_maskz_getexp_round_ps(U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007585 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7586 (__v16sf)_mm512_setzero_ps(), \
7587 (__mmask16)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007588
Logan Chien55afb0a2018-10-15 10:42:14 +08007589static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007590_mm512_getexp_ps (__m512 __A)
7591{
7592 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7593 (__v16sf) _mm512_undefined_ps (),
7594 (__mmask16) -1,
7595 _MM_FROUND_CUR_DIRECTION);
7596}
7597
Logan Chien55afb0a2018-10-15 10:42:14 +08007598static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007599_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
7600{
7601 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7602 (__v16sf) __W,
7603 (__mmask16) __U,
7604 _MM_FROUND_CUR_DIRECTION);
7605}
7606
Logan Chien55afb0a2018-10-15 10:42:14 +08007607static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08007608_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
7609{
7610 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7611 (__v16sf) _mm512_setzero_ps (),
7612 (__mmask16) __U,
7613 _MM_FROUND_CUR_DIRECTION);
7614}
7615
Logan Chien55afb0a2018-10-15 10:42:14 +08007616#define _mm512_i64gather_ps(index, addr, scale) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007617 ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
7618 (void const *)(addr), \
7619 (__v8di)(__m512i)(index), (__mmask8)-1, \
7620 (int)(scale)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007621
Logan Chien55afb0a2018-10-15 10:42:14 +08007622#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007623 ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
7624 (void const *)(addr), \
7625 (__v8di)(__m512i)(index), \
7626 (__mmask8)(mask), (int)(scale)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007627
Logan Chien55afb0a2018-10-15 10:42:14 +08007628#define _mm512_i64gather_epi32(index, addr, scale) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007629 ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \
7630 (void const *)(addr), \
7631 (__v8di)(__m512i)(index), \
7632 (__mmask8)-1, (int)(scale)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007633
Logan Chien55afb0a2018-10-15 10:42:14 +08007634#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007635 ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
7636 (void const *)(addr), \
7637 (__v8di)(__m512i)(index), \
7638 (__mmask8)(mask), (int)(scale)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007639
Logan Chien55afb0a2018-10-15 10:42:14 +08007640#define _mm512_i64gather_pd(index, addr, scale) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007641 ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
7642 (void const *)(addr), \
7643 (__v8di)(__m512i)(index), (__mmask8)-1, \
7644 (int)(scale)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007645
Logan Chien55afb0a2018-10-15 10:42:14 +08007646#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007647 ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
7648 (void const *)(addr), \
7649 (__v8di)(__m512i)(index), \
7650 (__mmask8)(mask), (int)(scale)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007651
Logan Chien55afb0a2018-10-15 10:42:14 +08007652#define _mm512_i64gather_epi64(index, addr, scale) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007653 ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \
7654 (void const *)(addr), \
7655 (__v8di)(__m512i)(index), (__mmask8)-1, \
7656 (int)(scale)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007657
Logan Chien55afb0a2018-10-15 10:42:14 +08007658#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007659 ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
7660 (void const *)(addr), \
7661 (__v8di)(__m512i)(index), \
7662 (__mmask8)(mask), (int)(scale)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007663
Logan Chien55afb0a2018-10-15 10:42:14 +08007664#define _mm512_i32gather_ps(index, addr, scale) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007665 ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
7666 (void const *)(addr), \
7667 (__v16si)(__m512)(index), \
7668 (__mmask16)-1, (int)(scale)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007669
Logan Chien55afb0a2018-10-15 10:42:14 +08007670#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007671 ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
7672 (void const *)(addr), \
7673 (__v16si)(__m512)(index), \
7674 (__mmask16)(mask), (int)(scale)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007675
Logan Chien55afb0a2018-10-15 10:42:14 +08007676#define _mm512_i32gather_epi32(index, addr, scale) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007677 ((__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
7678 (void const *)(addr), \
7679 (__v16si)(__m512i)(index), \
7680 (__mmask16)-1, (int)(scale)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007681
Logan Chien55afb0a2018-10-15 10:42:14 +08007682#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007683 ((__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
7684 (void const *)(addr), \
7685 (__v16si)(__m512i)(index), \
7686 (__mmask16)(mask), (int)(scale)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007687
Logan Chien55afb0a2018-10-15 10:42:14 +08007688#define _mm512_i32gather_pd(index, addr, scale) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007689 ((__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
7690 (void const *)(addr), \
7691 (__v8si)(__m256i)(index), (__mmask8)-1, \
7692 (int)(scale)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007693
Logan Chien55afb0a2018-10-15 10:42:14 +08007694#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007695 ((__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
7696 (void const *)(addr), \
7697 (__v8si)(__m256i)(index), \
7698 (__mmask8)(mask), (int)(scale)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007699
Logan Chien55afb0a2018-10-15 10:42:14 +08007700#define _mm512_i32gather_epi64(index, addr, scale) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007701 ((__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
7702 (void const *)(addr), \
7703 (__v8si)(__m256i)(index), (__mmask8)-1, \
7704 (int)(scale)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007705
Logan Chien55afb0a2018-10-15 10:42:14 +08007706#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007707 ((__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
7708 (void const *)(addr), \
7709 (__v8si)(__m256i)(index), \
7710 (__mmask8)(mask), (int)(scale)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007711
Logan Chien55afb0a2018-10-15 10:42:14 +08007712#define _mm512_i64scatter_ps(addr, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08007713 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \
Logan Chien2833ffb2018-10-09 10:03:24 +08007714 (__v8di)(__m512i)(index), \
Logan Chien55afb0a2018-10-15 10:42:14 +08007715 (__v8sf)(__m256)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08007716
Logan Chien55afb0a2018-10-15 10:42:14 +08007717#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08007718 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \
Logan Chien2833ffb2018-10-09 10:03:24 +08007719 (__v8di)(__m512i)(index), \
Logan Chien55afb0a2018-10-15 10:42:14 +08007720 (__v8sf)(__m256)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08007721
Logan Chien55afb0a2018-10-15 10:42:14 +08007722#define _mm512_i64scatter_epi32(addr, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08007723 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \
Logan Chien2833ffb2018-10-09 10:03:24 +08007724 (__v8di)(__m512i)(index), \
Logan Chien55afb0a2018-10-15 10:42:14 +08007725 (__v8si)(__m256i)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08007726
Logan Chien55afb0a2018-10-15 10:42:14 +08007727#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08007728 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \
Logan Chien2833ffb2018-10-09 10:03:24 +08007729 (__v8di)(__m512i)(index), \
Logan Chien55afb0a2018-10-15 10:42:14 +08007730 (__v8si)(__m256i)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08007731
Logan Chien55afb0a2018-10-15 10:42:14 +08007732#define _mm512_i64scatter_pd(addr, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08007733 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \
Logan Chien2833ffb2018-10-09 10:03:24 +08007734 (__v8di)(__m512i)(index), \
Logan Chien55afb0a2018-10-15 10:42:14 +08007735 (__v8df)(__m512d)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08007736
Logan Chien55afb0a2018-10-15 10:42:14 +08007737#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08007738 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \
Logan Chien2833ffb2018-10-09 10:03:24 +08007739 (__v8di)(__m512i)(index), \
Logan Chien55afb0a2018-10-15 10:42:14 +08007740 (__v8df)(__m512d)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08007741
Logan Chien55afb0a2018-10-15 10:42:14 +08007742#define _mm512_i64scatter_epi64(addr, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08007743 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \
Logan Chien2833ffb2018-10-09 10:03:24 +08007744 (__v8di)(__m512i)(index), \
Logan Chien55afb0a2018-10-15 10:42:14 +08007745 (__v8di)(__m512i)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08007746
Logan Chien55afb0a2018-10-15 10:42:14 +08007747#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08007748 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \
Logan Chien2833ffb2018-10-09 10:03:24 +08007749 (__v8di)(__m512i)(index), \
Logan Chien55afb0a2018-10-15 10:42:14 +08007750 (__v8di)(__m512i)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08007751
Logan Chien55afb0a2018-10-15 10:42:14 +08007752#define _mm512_i32scatter_ps(addr, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08007753 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \
Logan Chien2833ffb2018-10-09 10:03:24 +08007754 (__v16si)(__m512i)(index), \
Logan Chien55afb0a2018-10-15 10:42:14 +08007755 (__v16sf)(__m512)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08007756
Logan Chien55afb0a2018-10-15 10:42:14 +08007757#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08007758 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \
Logan Chien2833ffb2018-10-09 10:03:24 +08007759 (__v16si)(__m512i)(index), \
Logan Chien55afb0a2018-10-15 10:42:14 +08007760 (__v16sf)(__m512)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08007761
Logan Chien55afb0a2018-10-15 10:42:14 +08007762#define _mm512_i32scatter_epi32(addr, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08007763 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \
Logan Chien2833ffb2018-10-09 10:03:24 +08007764 (__v16si)(__m512i)(index), \
Logan Chien55afb0a2018-10-15 10:42:14 +08007765 (__v16si)(__m512i)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08007766
Logan Chien55afb0a2018-10-15 10:42:14 +08007767#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08007768 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \
Logan Chien2833ffb2018-10-09 10:03:24 +08007769 (__v16si)(__m512i)(index), \
Logan Chien55afb0a2018-10-15 10:42:14 +08007770 (__v16si)(__m512i)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08007771
Logan Chien55afb0a2018-10-15 10:42:14 +08007772#define _mm512_i32scatter_pd(addr, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08007773 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \
Logan Chien2833ffb2018-10-09 10:03:24 +08007774 (__v8si)(__m256i)(index), \
Logan Chien55afb0a2018-10-15 10:42:14 +08007775 (__v8df)(__m512d)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08007776
Logan Chien55afb0a2018-10-15 10:42:14 +08007777#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08007778 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \
Logan Chien2833ffb2018-10-09 10:03:24 +08007779 (__v8si)(__m256i)(index), \
Logan Chien55afb0a2018-10-15 10:42:14 +08007780 (__v8df)(__m512d)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08007781
Logan Chien55afb0a2018-10-15 10:42:14 +08007782#define _mm512_i32scatter_epi64(addr, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08007783 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \
Logan Chien2833ffb2018-10-09 10:03:24 +08007784 (__v8si)(__m256i)(index), \
Logan Chien55afb0a2018-10-15 10:42:14 +08007785 (__v8di)(__m512i)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08007786
Logan Chien55afb0a2018-10-15 10:42:14 +08007787#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08007788 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \
Logan Chien2833ffb2018-10-09 10:03:24 +08007789 (__v8si)(__m256i)(index), \
Logan Chien55afb0a2018-10-15 10:42:14 +08007790 (__v8di)(__m512i)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08007791
Logan Chien55afb0a2018-10-15 10:42:14 +08007792static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007793_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7794{
Logan Chien55afb0a2018-10-15 10:42:14 +08007795 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7796 (__v4sf)__A,
7797 (__v4sf)__B,
7798 (__mmask8)__U,
7799 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08007800}
7801
Logan Chien55afb0a2018-10-15 10:42:14 +08007802#define _mm_fmadd_round_ss(A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007803 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7804 (__v4sf)(__m128)(B), \
7805 (__v4sf)(__m128)(C), (__mmask8)-1, \
7806 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007807
Logan Chien55afb0a2018-10-15 10:42:14 +08007808#define _mm_mask_fmadd_round_ss(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007809 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7810 (__v4sf)(__m128)(A), \
7811 (__v4sf)(__m128)(B), (__mmask8)(U), \
7812 (int)(R)))
Logan Chien55afb0a2018-10-15 10:42:14 +08007813
7814static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007815_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7816{
Logan Chien55afb0a2018-10-15 10:42:14 +08007817 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7818 (__v4sf)__B,
7819 (__v4sf)__C,
7820 (__mmask8)__U,
7821 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08007822}
7823
Logan Chien55afb0a2018-10-15 10:42:14 +08007824#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007825 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7826 (__v4sf)(__m128)(B), \
7827 (__v4sf)(__m128)(C), (__mmask8)(U), \
7828 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007829
Logan Chien55afb0a2018-10-15 10:42:14 +08007830static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007831_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7832{
Logan Chien55afb0a2018-10-15 10:42:14 +08007833 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7834 (__v4sf)__X,
7835 (__v4sf)__Y,
7836 (__mmask8)__U,
7837 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08007838}
7839
Logan Chien55afb0a2018-10-15 10:42:14 +08007840#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007841 ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7842 (__v4sf)(__m128)(X), \
7843 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7844 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007845
Logan Chien55afb0a2018-10-15 10:42:14 +08007846static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007847_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7848{
Logan Chien55afb0a2018-10-15 10:42:14 +08007849 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7850 (__v4sf)__A,
7851 -(__v4sf)__B,
7852 (__mmask8)__U,
7853 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08007854}
7855
Logan Chien55afb0a2018-10-15 10:42:14 +08007856#define _mm_fmsub_round_ss(A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007857 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7858 (__v4sf)(__m128)(B), \
7859 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7860 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007861
Logan Chien55afb0a2018-10-15 10:42:14 +08007862#define _mm_mask_fmsub_round_ss(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007863 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7864 (__v4sf)(__m128)(A), \
7865 -(__v4sf)(__m128)(B), (__mmask8)(U), \
7866 (int)(R)))
Logan Chien55afb0a2018-10-15 10:42:14 +08007867
7868static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007869_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7870{
Logan Chien55afb0a2018-10-15 10:42:14 +08007871 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7872 (__v4sf)__B,
7873 -(__v4sf)__C,
7874 (__mmask8)__U,
7875 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08007876}
7877
Logan Chien55afb0a2018-10-15 10:42:14 +08007878#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007879 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7880 (__v4sf)(__m128)(B), \
7881 -(__v4sf)(__m128)(C), (__mmask8)(U), \
7882 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007883
Logan Chien55afb0a2018-10-15 10:42:14 +08007884static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007885_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7886{
Logan Chien55afb0a2018-10-15 10:42:14 +08007887 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7888 (__v4sf)__X,
7889 (__v4sf)__Y,
7890 (__mmask8)__U,
7891 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08007892}
7893
Logan Chien55afb0a2018-10-15 10:42:14 +08007894#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007895 ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7896 (__v4sf)(__m128)(X), \
7897 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7898 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007899
Logan Chien55afb0a2018-10-15 10:42:14 +08007900static __inline__ __m128 __DEFAULT_FN_ATTRS128
7901_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7902{
7903 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7904 -(__v4sf)__A,
7905 (__v4sf)__B,
7906 (__mmask8)__U,
7907 _MM_FROUND_CUR_DIRECTION);
7908}
7909
7910#define _mm_fnmadd_round_ss(A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007911 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7912 -(__v4sf)(__m128)(B), \
7913 (__v4sf)(__m128)(C), (__mmask8)-1, \
7914 (int)(R)))
Logan Chien55afb0a2018-10-15 10:42:14 +08007915
7916#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007917 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7918 -(__v4sf)(__m128)(A), \
7919 (__v4sf)(__m128)(B), (__mmask8)(U), \
7920 (int)(R)))
Logan Chien55afb0a2018-10-15 10:42:14 +08007921
7922static __inline__ __m128 __DEFAULT_FN_ATTRS128
7923_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7924{
7925 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7926 -(__v4sf)__B,
7927 (__v4sf)__C,
7928 (__mmask8)__U,
7929 _MM_FROUND_CUR_DIRECTION);
7930}
7931
7932#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007933 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7934 -(__v4sf)(__m128)(B), \
7935 (__v4sf)(__m128)(C), (__mmask8)(U), \
7936 (int)(R)))
Logan Chien55afb0a2018-10-15 10:42:14 +08007937
7938static __inline__ __m128 __DEFAULT_FN_ATTRS128
7939_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7940{
7941 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7942 -(__v4sf)__X,
7943 (__v4sf)__Y,
7944 (__mmask8)__U,
7945 _MM_FROUND_CUR_DIRECTION);
7946}
7947
7948#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007949 ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7950 -(__v4sf)(__m128)(X), \
7951 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7952 (int)(R)))
Logan Chien55afb0a2018-10-15 10:42:14 +08007953
7954static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007955_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7956{
Logan Chien55afb0a2018-10-15 10:42:14 +08007957 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7958 -(__v4sf)__A,
7959 -(__v4sf)__B,
7960 (__mmask8)__U,
7961 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08007962}
7963
Logan Chien55afb0a2018-10-15 10:42:14 +08007964#define _mm_fnmsub_round_ss(A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007965 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7966 -(__v4sf)(__m128)(B), \
7967 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7968 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007969
Logan Chien55afb0a2018-10-15 10:42:14 +08007970#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007971 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7972 -(__v4sf)(__m128)(A), \
7973 -(__v4sf)(__m128)(B), (__mmask8)(U), \
7974 (int)(R)))
Logan Chien55afb0a2018-10-15 10:42:14 +08007975
7976static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007977_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7978{
Logan Chien55afb0a2018-10-15 10:42:14 +08007979 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7980 -(__v4sf)__B,
7981 -(__v4sf)__C,
7982 (__mmask8)__U,
7983 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08007984}
7985
Logan Chien55afb0a2018-10-15 10:42:14 +08007986#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007987 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7988 -(__v4sf)(__m128)(B), \
7989 -(__v4sf)(__m128)(C), (__mmask8)(U), \
7990 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007991
Logan Chien55afb0a2018-10-15 10:42:14 +08007992static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007993_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7994{
Logan Chien55afb0a2018-10-15 10:42:14 +08007995 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7996 -(__v4sf)__X,
7997 (__v4sf)__Y,
7998 (__mmask8)__U,
7999 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08008000}
8001
Logan Chien55afb0a2018-10-15 10:42:14 +08008002#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008003 ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
8004 -(__v4sf)(__m128)(X), \
8005 (__v4sf)(__m128)(Y), (__mmask8)(U), \
8006 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008007
Logan Chien55afb0a2018-10-15 10:42:14 +08008008static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08008009_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8010{
Logan Chien55afb0a2018-10-15 10:42:14 +08008011 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8012 (__v2df)__A,
8013 (__v2df)__B,
8014 (__mmask8)__U,
8015 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08008016}
8017
Logan Chien55afb0a2018-10-15 10:42:14 +08008018#define _mm_fmadd_round_sd(A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008019 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8020 (__v2df)(__m128d)(B), \
8021 (__v2df)(__m128d)(C), (__mmask8)-1, \
8022 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008023
Logan Chien55afb0a2018-10-15 10:42:14 +08008024#define _mm_mask_fmadd_round_sd(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008025 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8026 (__v2df)(__m128d)(A), \
8027 (__v2df)(__m128d)(B), (__mmask8)(U), \
8028 (int)(R)))
Logan Chien55afb0a2018-10-15 10:42:14 +08008029
8030static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08008031_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8032{
Logan Chien55afb0a2018-10-15 10:42:14 +08008033 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8034 (__v2df)__B,
8035 (__v2df)__C,
8036 (__mmask8)__U,
8037 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08008038}
8039
Logan Chien55afb0a2018-10-15 10:42:14 +08008040#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008041 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8042 (__v2df)(__m128d)(B), \
8043 (__v2df)(__m128d)(C), (__mmask8)(U), \
8044 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008045
Logan Chien55afb0a2018-10-15 10:42:14 +08008046static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08008047_mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8048{
Logan Chien55afb0a2018-10-15 10:42:14 +08008049 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
8050 (__v2df)__X,
8051 (__v2df)__Y,
8052 (__mmask8)__U,
8053 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08008054}
8055
Logan Chien55afb0a2018-10-15 10:42:14 +08008056#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008057 ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8058 (__v2df)(__m128d)(X), \
8059 (__v2df)(__m128d)(Y), (__mmask8)(U), \
8060 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008061
Logan Chien55afb0a2018-10-15 10:42:14 +08008062static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08008063_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8064{
Logan Chien55afb0a2018-10-15 10:42:14 +08008065 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8066 (__v2df)__A,
8067 -(__v2df)__B,
8068 (__mmask8)__U,
8069 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08008070}
8071
Logan Chien55afb0a2018-10-15 10:42:14 +08008072#define _mm_fmsub_round_sd(A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008073 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8074 (__v2df)(__m128d)(B), \
8075 -(__v2df)(__m128d)(C), (__mmask8)-1, \
8076 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008077
Logan Chien55afb0a2018-10-15 10:42:14 +08008078#define _mm_mask_fmsub_round_sd(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008079 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8080 (__v2df)(__m128d)(A), \
8081 -(__v2df)(__m128d)(B), (__mmask8)(U), \
8082 (int)(R)))
Logan Chien55afb0a2018-10-15 10:42:14 +08008083
8084static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08008085_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8086{
Logan Chien55afb0a2018-10-15 10:42:14 +08008087 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8088 (__v2df)__B,
8089 -(__v2df)__C,
8090 (__mmask8)__U,
8091 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08008092}
8093
Logan Chien55afb0a2018-10-15 10:42:14 +08008094#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008095 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8096 (__v2df)(__m128d)(B), \
8097 -(__v2df)(__m128d)(C), \
8098 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008099
Logan Chien55afb0a2018-10-15 10:42:14 +08008100static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08008101_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8102{
Logan Chien55afb0a2018-10-15 10:42:14 +08008103 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
8104 (__v2df)__X,
8105 (__v2df)__Y,
8106 (__mmask8)__U,
8107 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08008108}
8109
Logan Chien55afb0a2018-10-15 10:42:14 +08008110#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008111 ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8112 (__v2df)(__m128d)(X), \
8113 (__v2df)(__m128d)(Y), \
8114 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008115
Logan Chien55afb0a2018-10-15 10:42:14 +08008116static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08008117_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8118{
Logan Chien55afb0a2018-10-15 10:42:14 +08008119 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8120 -(__v2df)__A,
8121 (__v2df)__B,
8122 (__mmask8)__U,
8123 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08008124}
8125
Logan Chien55afb0a2018-10-15 10:42:14 +08008126#define _mm_fnmadd_round_sd(A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008127 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8128 -(__v2df)(__m128d)(B), \
8129 (__v2df)(__m128d)(C), (__mmask8)-1, \
8130 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008131
Logan Chien55afb0a2018-10-15 10:42:14 +08008132#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008133 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8134 -(__v2df)(__m128d)(A), \
8135 (__v2df)(__m128d)(B), (__mmask8)(U), \
8136 (int)(R)))
Logan Chien55afb0a2018-10-15 10:42:14 +08008137
8138static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08008139_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8140{
Logan Chien55afb0a2018-10-15 10:42:14 +08008141 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8142 -(__v2df)__B,
8143 (__v2df)__C,
8144 (__mmask8)__U,
8145 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08008146}
8147
Logan Chien55afb0a2018-10-15 10:42:14 +08008148#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008149 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8150 -(__v2df)(__m128d)(B), \
8151 (__v2df)(__m128d)(C), (__mmask8)(U), \
8152 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008153
Logan Chien55afb0a2018-10-15 10:42:14 +08008154static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08008155_mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8156{
Logan Chien55afb0a2018-10-15 10:42:14 +08008157 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
8158 -(__v2df)__X,
8159 (__v2df)__Y,
8160 (__mmask8)__U,
8161 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08008162}
8163
Logan Chien55afb0a2018-10-15 10:42:14 +08008164#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008165 ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8166 -(__v2df)(__m128d)(X), \
8167 (__v2df)(__m128d)(Y), (__mmask8)(U), \
8168 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008169
Logan Chien55afb0a2018-10-15 10:42:14 +08008170static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08008171_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8172{
Logan Chien55afb0a2018-10-15 10:42:14 +08008173 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8174 -(__v2df)__A,
8175 -(__v2df)__B,
8176 (__mmask8)__U,
8177 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08008178}
8179
Logan Chien55afb0a2018-10-15 10:42:14 +08008180#define _mm_fnmsub_round_sd(A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008181 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8182 -(__v2df)(__m128d)(B), \
8183 -(__v2df)(__m128d)(C), (__mmask8)-1, \
8184 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008185
Logan Chien55afb0a2018-10-15 10:42:14 +08008186#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008187 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8188 -(__v2df)(__m128d)(A), \
8189 -(__v2df)(__m128d)(B), (__mmask8)(U), \
8190 (int)(R)))
Logan Chien55afb0a2018-10-15 10:42:14 +08008191
8192static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08008193_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8194{
Logan Chien55afb0a2018-10-15 10:42:14 +08008195 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8196 -(__v2df)__B,
8197 -(__v2df)__C,
8198 (__mmask8)__U,
8199 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08008200}
8201
Logan Chien55afb0a2018-10-15 10:42:14 +08008202#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008203 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8204 -(__v2df)(__m128d)(B), \
8205 -(__v2df)(__m128d)(C), \
8206 (__mmask8)(U), \
8207 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008208
Logan Chien55afb0a2018-10-15 10:42:14 +08008209static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08008210_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8211{
Logan Chien55afb0a2018-10-15 10:42:14 +08008212 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
8213 -(__v2df)__X,
8214 (__v2df)__Y,
8215 (__mmask8)__U,
8216 _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08008217}
8218
Logan Chien55afb0a2018-10-15 10:42:14 +08008219#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008220 ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8221 -(__v2df)(__m128d)(X), \
8222 (__v2df)(__m128d)(Y), \
8223 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008224
Logan Chien55afb0a2018-10-15 10:42:14 +08008225#define _mm512_permutex_pd(X, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008226 ((__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008227
Logan Chien55afb0a2018-10-15 10:42:14 +08008228#define _mm512_mask_permutex_pd(W, U, X, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008229 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8230 (__v8df)_mm512_permutex_pd((X), (C)), \
8231 (__v8df)(__m512d)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008232
Logan Chien55afb0a2018-10-15 10:42:14 +08008233#define _mm512_maskz_permutex_pd(U, X, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008234 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8235 (__v8df)_mm512_permutex_pd((X), (C)), \
8236 (__v8df)_mm512_setzero_pd()))
Logan Chien2833ffb2018-10-09 10:03:24 +08008237
Logan Chien55afb0a2018-10-15 10:42:14 +08008238#define _mm512_permutex_epi64(X, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008239 ((__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008240
Logan Chien55afb0a2018-10-15 10:42:14 +08008241#define _mm512_mask_permutex_epi64(W, U, X, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008242 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8243 (__v8di)_mm512_permutex_epi64((X), (C)), \
8244 (__v8di)(__m512i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008245
Logan Chien55afb0a2018-10-15 10:42:14 +08008246#define _mm512_maskz_permutex_epi64(U, X, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008247 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8248 (__v8di)_mm512_permutex_epi64((X), (C)), \
8249 (__v8di)_mm512_setzero_si512()))
Logan Chien2833ffb2018-10-09 10:03:24 +08008250
Logan Chien55afb0a2018-10-15 10:42:14 +08008251static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008252_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
8253{
Logan Chien55afb0a2018-10-15 10:42:14 +08008254 return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X);
Logan Chien2833ffb2018-10-09 10:03:24 +08008255}
8256
Logan Chien55afb0a2018-10-15 10:42:14 +08008257static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008258_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
8259{
Logan Chien55afb0a2018-10-15 10:42:14 +08008260 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8261 (__v8df)_mm512_permutexvar_pd(__X, __Y),
8262 (__v8df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08008263}
8264
Logan Chien55afb0a2018-10-15 10:42:14 +08008265static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008266_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
8267{
Logan Chien55afb0a2018-10-15 10:42:14 +08008268 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8269 (__v8df)_mm512_permutexvar_pd(__X, __Y),
8270 (__v8df)_mm512_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08008271}
8272
Logan Chien55afb0a2018-10-15 10:42:14 +08008273static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008274_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
8275{
Logan Chien55afb0a2018-10-15 10:42:14 +08008276 return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
Logan Chien2833ffb2018-10-09 10:03:24 +08008277}
8278
Logan Chien55afb0a2018-10-15 10:42:14 +08008279static __inline__ __m512i __DEFAULT_FN_ATTRS512
8280_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
8281{
8282 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8283 (__v8di)_mm512_permutexvar_epi64(__X, __Y),
8284 (__v8di)_mm512_setzero_si512());
8285}
8286
8287static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008288_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
8289 __m512i __Y)
8290{
Logan Chien55afb0a2018-10-15 10:42:14 +08008291 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8292 (__v8di)_mm512_permutexvar_epi64(__X, __Y),
8293 (__v8di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08008294}
8295
Logan Chien55afb0a2018-10-15 10:42:14 +08008296static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008297_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
8298{
Logan Chien55afb0a2018-10-15 10:42:14 +08008299 return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
Logan Chien2833ffb2018-10-09 10:03:24 +08008300}
8301
Logan Chien55afb0a2018-10-15 10:42:14 +08008302static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008303_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
8304{
Logan Chien55afb0a2018-10-15 10:42:14 +08008305 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8306 (__v16sf)_mm512_permutexvar_ps(__X, __Y),
8307 (__v16sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08008308}
8309
Logan Chien55afb0a2018-10-15 10:42:14 +08008310static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008311_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
8312{
Logan Chien55afb0a2018-10-15 10:42:14 +08008313 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8314 (__v16sf)_mm512_permutexvar_ps(__X, __Y),
8315 (__v16sf)_mm512_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08008316}
8317
Logan Chien55afb0a2018-10-15 10:42:14 +08008318static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008319_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
8320{
Logan Chien55afb0a2018-10-15 10:42:14 +08008321 return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
Logan Chien2833ffb2018-10-09 10:03:24 +08008322}
8323
Logan Chien55afb0a2018-10-15 10:42:14 +08008324#define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
8325
8326static __inline__ __m512i __DEFAULT_FN_ATTRS512
8327_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
8328{
8329 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8330 (__v16si)_mm512_permutexvar_epi32(__X, __Y),
8331 (__v16si)_mm512_setzero_si512());
8332}
8333
8334static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008335_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
8336 __m512i __Y)
8337{
Logan Chien55afb0a2018-10-15 10:42:14 +08008338 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8339 (__v16si)_mm512_permutexvar_epi32(__X, __Y),
8340 (__v16si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08008341}
8342
Logan Chien55afb0a2018-10-15 10:42:14 +08008343#define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
8344
Logan Chienb0c84022018-11-09 16:19:54 +08008345static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Logan Chien2833ffb2018-10-09 10:03:24 +08008346_mm512_kand (__mmask16 __A, __mmask16 __B)
8347{
8348 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
8349}
8350
Logan Chienb0c84022018-11-09 16:19:54 +08008351static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Logan Chien2833ffb2018-10-09 10:03:24 +08008352_mm512_kandn (__mmask16 __A, __mmask16 __B)
8353{
8354 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
8355}
8356
Logan Chienb0c84022018-11-09 16:19:54 +08008357static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Logan Chien2833ffb2018-10-09 10:03:24 +08008358_mm512_kor (__mmask16 __A, __mmask16 __B)
8359{
8360 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
8361}
8362
Logan Chienb0c84022018-11-09 16:19:54 +08008363static __inline__ int __DEFAULT_FN_ATTRS
Logan Chien2833ffb2018-10-09 10:03:24 +08008364_mm512_kortestc (__mmask16 __A, __mmask16 __B)
8365{
8366 return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
8367}
8368
Logan Chienb0c84022018-11-09 16:19:54 +08008369static __inline__ int __DEFAULT_FN_ATTRS
Logan Chien2833ffb2018-10-09 10:03:24 +08008370_mm512_kortestz (__mmask16 __A, __mmask16 __B)
8371{
8372 return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
8373}
8374
Logan Chienb0c84022018-11-09 16:19:54 +08008375static __inline__ unsigned char __DEFAULT_FN_ATTRS
8376_kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
8377{
8378 return (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8379}
8380
8381static __inline__ unsigned char __DEFAULT_FN_ATTRS
8382_kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
8383{
8384 return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8385}
8386
8387static __inline__ unsigned char __DEFAULT_FN_ATTRS
8388_kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
8389 *__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8390 return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8391}
8392
8393static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Logan Chien2833ffb2018-10-09 10:03:24 +08008394_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
8395{
8396 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
8397}
8398
Logan Chienb0c84022018-11-09 16:19:54 +08008399static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Logan Chien2833ffb2018-10-09 10:03:24 +08008400_mm512_kxnor (__mmask16 __A, __mmask16 __B)
8401{
8402 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
8403}
8404
Logan Chienb0c84022018-11-09 16:19:54 +08008405static __inline__ __mmask16 __DEFAULT_FN_ATTRS
Logan Chien2833ffb2018-10-09 10:03:24 +08008406_mm512_kxor (__mmask16 __A, __mmask16 __B)
8407{
8408 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
8409}
8410
Logan Chienb0c84022018-11-09 16:19:54 +08008411#define _kand_mask16 _mm512_kand
8412#define _kandn_mask16 _mm512_kandn
8413#define _knot_mask16 _mm512_knot
8414#define _kor_mask16 _mm512_kor
8415#define _kxnor_mask16 _mm512_kxnor
8416#define _kxor_mask16 _mm512_kxor
8417
8418#define _kshiftli_mask16(A, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008419 ((__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I)))
Logan Chienb0c84022018-11-09 16:19:54 +08008420
8421#define _kshiftri_mask16(A, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008422 ((__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I)))
Logan Chienb0c84022018-11-09 16:19:54 +08008423
8424static __inline__ unsigned int __DEFAULT_FN_ATTRS
8425_cvtmask16_u32(__mmask16 __A) {
8426 return (unsigned int)__builtin_ia32_kmovw((__mmask16)__A);
8427}
8428
8429static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8430_cvtu32_mask16(unsigned int __A) {
8431 return (__mmask16)__builtin_ia32_kmovw((__mmask16)__A);
8432}
8433
8434static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8435_load_mask16(__mmask16 *__A) {
8436 return (__mmask16)__builtin_ia32_kmovw(*(__mmask16 *)__A);
8437}
8438
8439static __inline__ void __DEFAULT_FN_ATTRS
8440_store_mask16(__mmask16 *__A, __mmask16 __B) {
8441 *(__mmask16 *)__A = __builtin_ia32_kmovw((__mmask16)__B);
8442}
8443
Logan Chien55afb0a2018-10-15 10:42:14 +08008444static __inline__ void __DEFAULT_FN_ATTRS512
Logan Chienbedbf4f2020-01-06 19:35:19 -08008445_mm512_stream_si512 (void * __P, __m512i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08008446{
Logan Chien55afb0a2018-10-15 10:42:14 +08008447 typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8448 __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
Logan Chien2833ffb2018-10-09 10:03:24 +08008449}
8450
Logan Chien55afb0a2018-10-15 10:42:14 +08008451static __inline__ __m512i __DEFAULT_FN_ATTRS512
8452_mm512_stream_load_si512 (void const *__P)
Logan Chien2833ffb2018-10-09 10:03:24 +08008453{
Logan Chien55afb0a2018-10-15 10:42:14 +08008454 typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8455 return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
Logan Chien2833ffb2018-10-09 10:03:24 +08008456}
8457
Logan Chien55afb0a2018-10-15 10:42:14 +08008458static __inline__ void __DEFAULT_FN_ATTRS512
Logan Chienbedbf4f2020-01-06 19:35:19 -08008459_mm512_stream_pd (void *__P, __m512d __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08008460{
Logan Chien55afb0a2018-10-15 10:42:14 +08008461 typedef __v8df __v8df_aligned __attribute__((aligned(64)));
8462 __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
Logan Chien2833ffb2018-10-09 10:03:24 +08008463}
8464
Logan Chien55afb0a2018-10-15 10:42:14 +08008465static __inline__ void __DEFAULT_FN_ATTRS512
Logan Chienbedbf4f2020-01-06 19:35:19 -08008466_mm512_stream_ps (void *__P, __m512 __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08008467{
Logan Chien55afb0a2018-10-15 10:42:14 +08008468 typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
8469 __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
Logan Chien2833ffb2018-10-09 10:03:24 +08008470}
8471
Logan Chien55afb0a2018-10-15 10:42:14 +08008472static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008473_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
8474{
8475 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8476 (__v8df) __W,
8477 (__mmask8) __U);
8478}
8479
Logan Chien55afb0a2018-10-15 10:42:14 +08008480static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008481_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
8482{
8483 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8484 (__v8df)
8485 _mm512_setzero_pd (),
8486 (__mmask8) __U);
8487}
8488
Logan Chien55afb0a2018-10-15 10:42:14 +08008489static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008490_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8491{
8492 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8493 (__v8di) __W,
8494 (__mmask8) __U);
8495}
8496
Logan Chien55afb0a2018-10-15 10:42:14 +08008497static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008498_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
8499{
8500 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8501 (__v8di)
8502 _mm512_setzero_si512 (),
8503 (__mmask8) __U);
8504}
8505
Logan Chien55afb0a2018-10-15 10:42:14 +08008506static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008507_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
8508{
8509 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8510 (__v16sf) __W,
8511 (__mmask16) __U);
8512}
8513
Logan Chien55afb0a2018-10-15 10:42:14 +08008514static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008515_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
8516{
8517 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8518 (__v16sf)
8519 _mm512_setzero_ps (),
8520 (__mmask16) __U);
8521}
8522
Logan Chien55afb0a2018-10-15 10:42:14 +08008523static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008524_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8525{
8526 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8527 (__v16si) __W,
8528 (__mmask16) __U);
8529}
8530
Logan Chien55afb0a2018-10-15 10:42:14 +08008531static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008532_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
8533{
8534 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8535 (__v16si)
8536 _mm512_setzero_si512 (),
8537 (__mmask16) __U);
8538}
8539
Logan Chien55afb0a2018-10-15 10:42:14 +08008540#define _mm_cmp_round_ss_mask(X, Y, P, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008541 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8542 (__v4sf)(__m128)(Y), (int)(P), \
8543 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008544
Logan Chien55afb0a2018-10-15 10:42:14 +08008545#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008546 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8547 (__v4sf)(__m128)(Y), (int)(P), \
8548 (__mmask8)(M), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008549
Logan Chien55afb0a2018-10-15 10:42:14 +08008550#define _mm_cmp_ss_mask(X, Y, P) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008551 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8552 (__v4sf)(__m128)(Y), (int)(P), \
8553 (__mmask8)-1, \
8554 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08008555
Logan Chien55afb0a2018-10-15 10:42:14 +08008556#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008557 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8558 (__v4sf)(__m128)(Y), (int)(P), \
8559 (__mmask8)(M), \
8560 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08008561
Logan Chien55afb0a2018-10-15 10:42:14 +08008562#define _mm_cmp_round_sd_mask(X, Y, P, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008563 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8564 (__v2df)(__m128d)(Y), (int)(P), \
8565 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008566
Logan Chien55afb0a2018-10-15 10:42:14 +08008567#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008568 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8569 (__v2df)(__m128d)(Y), (int)(P), \
8570 (__mmask8)(M), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008571
Logan Chien55afb0a2018-10-15 10:42:14 +08008572#define _mm_cmp_sd_mask(X, Y, P) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008573 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8574 (__v2df)(__m128d)(Y), (int)(P), \
8575 (__mmask8)-1, \
8576 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08008577
Logan Chien55afb0a2018-10-15 10:42:14 +08008578#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008579 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8580 (__v2df)(__m128d)(Y), (int)(P), \
8581 (__mmask8)(M), \
8582 _MM_FROUND_CUR_DIRECTION))
Logan Chien2833ffb2018-10-09 10:03:24 +08008583
Logan Chien55afb0a2018-10-15 10:42:14 +08008584/* Bit Test */
8585
8586static __inline __mmask16 __DEFAULT_FN_ATTRS512
8587_mm512_test_epi32_mask (__m512i __A, __m512i __B)
8588{
8589 return _mm512_cmpneq_epi32_mask (_mm512_and_epi32(__A, __B),
8590 _mm512_setzero_si512());
8591}
8592
8593static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8594_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8595{
8596 return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8597 _mm512_setzero_si512());
8598}
8599
8600static __inline __mmask8 __DEFAULT_FN_ATTRS512
8601_mm512_test_epi64_mask (__m512i __A, __m512i __B)
8602{
8603 return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B),
8604 _mm512_setzero_si512());
8605}
8606
8607static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8608_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8609{
8610 return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8611 _mm512_setzero_si512());
8612}
8613
8614static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8615_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
8616{
8617 return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B),
8618 _mm512_setzero_si512());
8619}
8620
8621static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8622_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8623{
8624 return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8625 _mm512_setzero_si512());
8626}
8627
8628static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8629_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
8630{
8631 return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B),
8632 _mm512_setzero_si512());
8633}
8634
8635static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8636_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8637{
8638 return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8639 _mm512_setzero_si512());
8640}
8641
8642static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008643_mm512_movehdup_ps (__m512 __A)
8644{
8645 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8646 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
8647}
8648
Logan Chien55afb0a2018-10-15 10:42:14 +08008649static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008650_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
8651{
8652 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8653 (__v16sf)_mm512_movehdup_ps(__A),
8654 (__v16sf)__W);
8655}
8656
Logan Chien55afb0a2018-10-15 10:42:14 +08008657static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008658_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
8659{
8660 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8661 (__v16sf)_mm512_movehdup_ps(__A),
8662 (__v16sf)_mm512_setzero_ps());
8663}
8664
Logan Chien55afb0a2018-10-15 10:42:14 +08008665static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008666_mm512_moveldup_ps (__m512 __A)
8667{
8668 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8669 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
8670}
8671
Logan Chien55afb0a2018-10-15 10:42:14 +08008672static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008673_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
8674{
8675 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8676 (__v16sf)_mm512_moveldup_ps(__A),
8677 (__v16sf)__W);
8678}
8679
Logan Chien55afb0a2018-10-15 10:42:14 +08008680static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008681_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
8682{
8683 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8684 (__v16sf)_mm512_moveldup_ps(__A),
8685 (__v16sf)_mm512_setzero_ps());
8686}
8687
Logan Chien55afb0a2018-10-15 10:42:14 +08008688static __inline__ __m128 __DEFAULT_FN_ATTRS128
8689_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8690{
8691 return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W);
8692}
Logan Chien2833ffb2018-10-09 10:03:24 +08008693
Logan Chien55afb0a2018-10-15 10:42:14 +08008694static __inline__ __m128 __DEFAULT_FN_ATTRS128
8695_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
8696{
8697 return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B),
8698 _mm_setzero_ps());
8699}
8700
8701static __inline__ __m128d __DEFAULT_FN_ATTRS128
8702_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8703{
8704 return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W);
8705}
8706
8707static __inline__ __m128d __DEFAULT_FN_ATTRS128
8708_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
8709{
8710 return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B),
8711 _mm_setzero_pd());
8712}
8713
8714static __inline__ void __DEFAULT_FN_ATTRS128
8715_mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
8716{
8717 __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1);
8718}
8719
8720static __inline__ void __DEFAULT_FN_ATTRS128
8721_mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
8722{
8723 __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1);
8724}
8725
8726static __inline__ __m128 __DEFAULT_FN_ATTRS128
8727_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
8728{
8729 __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
8730 (__v4sf)_mm_setzero_ps(),
8731 0, 4, 4, 4);
8732
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07008733 return (__m128) __builtin_ia32_loadss128_mask ((const __v4sf *) __A, src, __U & 1);
Logan Chien55afb0a2018-10-15 10:42:14 +08008734}
8735
8736static __inline__ __m128 __DEFAULT_FN_ATTRS128
8737_mm_maskz_load_ss (__mmask8 __U, const float* __A)
8738{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07008739 return (__m128)__builtin_ia32_loadss128_mask ((const __v4sf *) __A,
Logan Chien55afb0a2018-10-15 10:42:14 +08008740 (__v4sf) _mm_setzero_ps(),
8741 __U & 1);
8742}
8743
8744static __inline__ __m128d __DEFAULT_FN_ATTRS128
8745_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
8746{
8747 __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
8748 (__v2df)_mm_setzero_pd(),
8749 0, 2);
8750
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07008751 return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A, src, __U & 1);
Logan Chien55afb0a2018-10-15 10:42:14 +08008752}
8753
8754static __inline__ __m128d __DEFAULT_FN_ATTRS128
8755_mm_maskz_load_sd (__mmask8 __U, const double* __A)
8756{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07008757 return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A,
Logan Chien55afb0a2018-10-15 10:42:14 +08008758 (__v2df) _mm_setzero_pd(),
8759 __U & 1);
8760}
8761
8762#define _mm512_shuffle_epi32(A, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008763 ((__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I)))
Logan Chien55afb0a2018-10-15 10:42:14 +08008764
8765#define _mm512_mask_shuffle_epi32(W, U, A, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008766 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8767 (__v16si)_mm512_shuffle_epi32((A), (I)), \
8768 (__v16si)(__m512i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008769
Logan Chien55afb0a2018-10-15 10:42:14 +08008770#define _mm512_maskz_shuffle_epi32(U, A, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008771 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8772 (__v16si)_mm512_shuffle_epi32((A), (I)), \
8773 (__v16si)_mm512_setzero_si512()))
Logan Chien2833ffb2018-10-09 10:03:24 +08008774
Logan Chien55afb0a2018-10-15 10:42:14 +08008775static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008776_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
8777{
8778 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8779 (__v8df) __W,
8780 (__mmask8) __U);
8781}
8782
Logan Chien55afb0a2018-10-15 10:42:14 +08008783static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008784_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
8785{
8786 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8787 (__v8df) _mm512_setzero_pd (),
8788 (__mmask8) __U);
8789}
8790
Logan Chien55afb0a2018-10-15 10:42:14 +08008791static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008792_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8793{
8794 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8795 (__v8di) __W,
8796 (__mmask8) __U);
8797}
8798
Logan Chien55afb0a2018-10-15 10:42:14 +08008799static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008800_mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A)
8801{
8802 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
Logan Chien55afb0a2018-10-15 10:42:14 +08008803 (__v8di) _mm512_setzero_si512 (),
Logan Chien2833ffb2018-10-09 10:03:24 +08008804 (__mmask8) __U);
8805}
8806
Logan Chien55afb0a2018-10-15 10:42:14 +08008807static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008808_mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
8809{
8810 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8811 (__v8df) __W,
8812 (__mmask8) __U);
8813}
8814
Logan Chien55afb0a2018-10-15 10:42:14 +08008815static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008816_mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
8817{
8818 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8819 (__v8df) _mm512_setzero_pd(),
8820 (__mmask8) __U);
8821}
8822
Logan Chien55afb0a2018-10-15 10:42:14 +08008823static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008824_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
8825{
8826 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8827 (__v8di) __W,
8828 (__mmask8) __U);
8829}
8830
Logan Chien55afb0a2018-10-15 10:42:14 +08008831static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008832_mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
8833{
8834 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
Logan Chien55afb0a2018-10-15 10:42:14 +08008835 (__v8di) _mm512_setzero_si512(),
Logan Chien2833ffb2018-10-09 10:03:24 +08008836 (__mmask8) __U);
8837}
8838
Logan Chien55afb0a2018-10-15 10:42:14 +08008839static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008840_mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
8841{
8842 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8843 (__v16sf) __W,
8844 (__mmask16) __U);
8845}
8846
Logan Chien55afb0a2018-10-15 10:42:14 +08008847static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008848_mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
8849{
8850 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8851 (__v16sf) _mm512_setzero_ps(),
8852 (__mmask16) __U);
8853}
8854
Logan Chien55afb0a2018-10-15 10:42:14 +08008855static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008856_mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
8857{
8858 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8859 (__v16si) __W,
8860 (__mmask16) __U);
8861}
8862
Logan Chien55afb0a2018-10-15 10:42:14 +08008863static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008864_mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
8865{
8866 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
Logan Chien55afb0a2018-10-15 10:42:14 +08008867 (__v16si) _mm512_setzero_si512(),
Logan Chien2833ffb2018-10-09 10:03:24 +08008868 (__mmask16) __U);
8869}
8870
Logan Chien55afb0a2018-10-15 10:42:14 +08008871static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008872_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
8873{
8874 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8875 (__v16sf) __W,
8876 (__mmask16) __U);
8877}
8878
Logan Chien55afb0a2018-10-15 10:42:14 +08008879static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008880_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
8881{
8882 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8883 (__v16sf) _mm512_setzero_ps(),
8884 (__mmask16) __U);
8885}
8886
Logan Chien55afb0a2018-10-15 10:42:14 +08008887static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008888_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8889{
8890 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8891 (__v16si) __W,
8892 (__mmask16) __U);
8893}
8894
Logan Chien55afb0a2018-10-15 10:42:14 +08008895static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008896_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
8897{
8898 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
Logan Chien55afb0a2018-10-15 10:42:14 +08008899 (__v16si) _mm512_setzero_si512(),
Logan Chien2833ffb2018-10-09 10:03:24 +08008900 (__mmask16) __U);
8901}
8902
Logan Chien55afb0a2018-10-15 10:42:14 +08008903#define _mm512_cvt_roundps_pd(A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008904 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8905 (__v8df)_mm512_undefined_pd(), \
8906 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008907
Logan Chien55afb0a2018-10-15 10:42:14 +08008908#define _mm512_mask_cvt_roundps_pd(W, U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008909 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8910 (__v8df)(__m512d)(W), \
8911 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008912
Logan Chien55afb0a2018-10-15 10:42:14 +08008913#define _mm512_maskz_cvt_roundps_pd(U, A, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008914 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8915 (__v8df)_mm512_setzero_pd(), \
8916 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008917
Logan Chien55afb0a2018-10-15 10:42:14 +08008918static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008919_mm512_cvtps_pd (__m256 __A)
8920{
Logan Chien55afb0a2018-10-15 10:42:14 +08008921 return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
Logan Chien2833ffb2018-10-09 10:03:24 +08008922}
8923
Logan Chien55afb0a2018-10-15 10:42:14 +08008924static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008925_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
8926{
Logan Chien55afb0a2018-10-15 10:42:14 +08008927 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8928 (__v8df)_mm512_cvtps_pd(__A),
8929 (__v8df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08008930}
8931
Logan Chien55afb0a2018-10-15 10:42:14 +08008932static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008933_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
8934{
Logan Chien55afb0a2018-10-15 10:42:14 +08008935 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8936 (__v8df)_mm512_cvtps_pd(__A),
8937 (__v8df)_mm512_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08008938}
8939
Logan Chien55afb0a2018-10-15 10:42:14 +08008940static __inline__ __m512d __DEFAULT_FN_ATTRS512
8941_mm512_cvtpslo_pd (__m512 __A)
8942{
8943 return (__m512d) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
8944}
8945
8946static __inline__ __m512d __DEFAULT_FN_ATTRS512
8947_mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A)
8948{
8949 return (__m512d) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
8950}
8951
8952static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008953_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
8954{
8955 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
8956 (__v8df) __A,
8957 (__v8df) __W);
8958}
8959
Logan Chien55afb0a2018-10-15 10:42:14 +08008960static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008961_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
8962{
8963 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
8964 (__v8df) __A,
8965 (__v8df) _mm512_setzero_pd ());
8966}
8967
Logan Chien55afb0a2018-10-15 10:42:14 +08008968static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008969_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
8970{
8971 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
8972 (__v16sf) __A,
8973 (__v16sf) __W);
8974}
8975
Logan Chien55afb0a2018-10-15 10:42:14 +08008976static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008977_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
8978{
8979 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
8980 (__v16sf) __A,
8981 (__v16sf) _mm512_setzero_ps ());
8982}
8983
Logan Chien55afb0a2018-10-15 10:42:14 +08008984static __inline__ void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008985_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
8986{
8987 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
8988 (__mmask8) __U);
8989}
8990
Logan Chien55afb0a2018-10-15 10:42:14 +08008991static __inline__ void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008992_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
8993{
8994 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
8995 (__mmask8) __U);
8996}
8997
Logan Chien55afb0a2018-10-15 10:42:14 +08008998static __inline__ void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08008999_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9000{
9001 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9002 (__mmask16) __U);
9003}
9004
Logan Chien55afb0a2018-10-15 10:42:14 +08009005static __inline__ void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08009006_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9007{
9008 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9009 (__mmask16) __U);
9010}
9011
Logan Chien55afb0a2018-10-15 10:42:14 +08009012#define _mm_cvt_roundsd_ss(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08009013 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9014 (__v2df)(__m128d)(B), \
9015 (__v4sf)_mm_undefined_ps(), \
9016 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08009017
Logan Chien55afb0a2018-10-15 10:42:14 +08009018#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08009019 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9020 (__v2df)(__m128d)(B), \
9021 (__v4sf)(__m128)(W), \
9022 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08009023
Logan Chien55afb0a2018-10-15 10:42:14 +08009024#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08009025 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9026 (__v2df)(__m128d)(B), \
9027 (__v4sf)_mm_setzero_ps(), \
9028 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08009029
Logan Chien55afb0a2018-10-15 10:42:14 +08009030static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08009031_mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
9032{
Logan Chien55afb0a2018-10-15 10:42:14 +08009033 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
9034 (__v2df)__B,
9035 (__v4sf)__W,
9036 (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08009037}
9038
Logan Chien55afb0a2018-10-15 10:42:14 +08009039static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08009040_mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
9041{
Logan Chien55afb0a2018-10-15 10:42:14 +08009042 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
9043 (__v2df)__B,
9044 (__v4sf)_mm_setzero_ps(),
9045 (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08009046}
9047
9048#define _mm_cvtss_i32 _mm_cvtss_si32
Logan Chien2833ffb2018-10-09 10:03:24 +08009049#define _mm_cvtsd_i32 _mm_cvtsd_si32
Logan Chien2833ffb2018-10-09 10:03:24 +08009050#define _mm_cvti32_sd _mm_cvtsi32_sd
Logan Chien2833ffb2018-10-09 10:03:24 +08009051#define _mm_cvti32_ss _mm_cvtsi32_ss
Logan Chien55afb0a2018-10-15 10:42:14 +08009052#ifdef __x86_64__
9053#define _mm_cvtss_i64 _mm_cvtss_si64
9054#define _mm_cvtsd_i64 _mm_cvtsd_si64
9055#define _mm_cvti64_sd _mm_cvtsi64_sd
Logan Chien2833ffb2018-10-09 10:03:24 +08009056#define _mm_cvti64_ss _mm_cvtsi64_ss
Logan Chien55afb0a2018-10-15 10:42:14 +08009057#endif
Logan Chien2833ffb2018-10-09 10:03:24 +08009058
Logan Chien55afb0a2018-10-15 10:42:14 +08009059#ifdef __x86_64__
9060#define _mm_cvt_roundi64_sd(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08009061 ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9062 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08009063
Logan Chien55afb0a2018-10-15 10:42:14 +08009064#define _mm_cvt_roundsi64_sd(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08009065 ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9066 (int)(R)))
Logan Chien55afb0a2018-10-15 10:42:14 +08009067#endif
Logan Chien2833ffb2018-10-09 10:03:24 +08009068
Logan Chien55afb0a2018-10-15 10:42:14 +08009069#define _mm_cvt_roundsi32_ss(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08009070 ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08009071
Logan Chien55afb0a2018-10-15 10:42:14 +08009072#define _mm_cvt_roundi32_ss(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08009073 ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08009074
Logan Chien55afb0a2018-10-15 10:42:14 +08009075#ifdef __x86_64__
9076#define _mm_cvt_roundsi64_ss(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08009077 ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9078 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08009079
Logan Chien55afb0a2018-10-15 10:42:14 +08009080#define _mm_cvt_roundi64_ss(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08009081 ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9082 (int)(R)))
Logan Chien55afb0a2018-10-15 10:42:14 +08009083#endif
Logan Chien2833ffb2018-10-09 10:03:24 +08009084
Logan Chien55afb0a2018-10-15 10:42:14 +08009085#define _mm_cvt_roundss_sd(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08009086 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9087 (__v4sf)(__m128)(B), \
9088 (__v2df)_mm_undefined_pd(), \
9089 (__mmask8)-1, (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08009090
Logan Chien55afb0a2018-10-15 10:42:14 +08009091#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08009092 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9093 (__v4sf)(__m128)(B), \
9094 (__v2df)(__m128d)(W), \
9095 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08009096
Logan Chien55afb0a2018-10-15 10:42:14 +08009097#define _mm_maskz_cvt_roundss_sd(U, A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08009098 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9099 (__v4sf)(__m128)(B), \
9100 (__v2df)_mm_setzero_pd(), \
9101 (__mmask8)(U), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08009102
Logan Chien55afb0a2018-10-15 10:42:14 +08009103static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08009104_mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
9105{
Logan Chien55afb0a2018-10-15 10:42:14 +08009106 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9107 (__v4sf)__B,
9108 (__v2df)__W,
9109 (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08009110}
9111
Logan Chien55afb0a2018-10-15 10:42:14 +08009112static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08009113_mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
9114{
Logan Chien55afb0a2018-10-15 10:42:14 +08009115 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9116 (__v4sf)__B,
9117 (__v2df)_mm_setzero_pd(),
9118 (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
Logan Chien2833ffb2018-10-09 10:03:24 +08009119}
9120
Logan Chien55afb0a2018-10-15 10:42:14 +08009121static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08009122_mm_cvtu32_sd (__m128d __A, unsigned __B)
9123{
Logan Chien55afb0a2018-10-15 10:42:14 +08009124 __A[0] = __B;
9125 return __A;
Logan Chien2833ffb2018-10-09 10:03:24 +08009126}
9127
Logan Chien55afb0a2018-10-15 10:42:14 +08009128#ifdef __x86_64__
9129#define _mm_cvt_roundu64_sd(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08009130 ((__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
9131 (unsigned long long)(B), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08009132
Logan Chien55afb0a2018-10-15 10:42:14 +08009133static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08009134_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
9135{
Logan Chien55afb0a2018-10-15 10:42:14 +08009136 __A[0] = __B;
9137 return __A;
Logan Chien2833ffb2018-10-09 10:03:24 +08009138}
Logan Chien55afb0a2018-10-15 10:42:14 +08009139#endif
Logan Chien2833ffb2018-10-09 10:03:24 +08009140
Logan Chien55afb0a2018-10-15 10:42:14 +08009141#define _mm_cvt_roundu32_ss(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08009142 ((__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
9143 (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08009144
Logan Chien55afb0a2018-10-15 10:42:14 +08009145static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08009146_mm_cvtu32_ss (__m128 __A, unsigned __B)
9147{
Logan Chien55afb0a2018-10-15 10:42:14 +08009148 __A[0] = __B;
9149 return __A;
Logan Chien2833ffb2018-10-09 10:03:24 +08009150}
9151
Logan Chien55afb0a2018-10-15 10:42:14 +08009152#ifdef __x86_64__
9153#define _mm_cvt_roundu64_ss(A, B, R) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08009154 ((__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
9155 (unsigned long long)(B), (int)(R)))
Logan Chien2833ffb2018-10-09 10:03:24 +08009156
Logan Chien55afb0a2018-10-15 10:42:14 +08009157static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08009158_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
9159{
Logan Chien55afb0a2018-10-15 10:42:14 +08009160 __A[0] = __B;
9161 return __A;
Logan Chien2833ffb2018-10-09 10:03:24 +08009162}
Logan Chien55afb0a2018-10-15 10:42:14 +08009163#endif
Logan Chien2833ffb2018-10-09 10:03:24 +08009164
Logan Chien55afb0a2018-10-15 10:42:14 +08009165static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08009166_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
9167{
Logan Chien55afb0a2018-10-15 10:42:14 +08009168 return (__m512i) __builtin_ia32_selectd_512(__M,
9169 (__v16si) _mm512_set1_epi32(__A),
9170 (__v16si) __O);
Logan Chien2833ffb2018-10-09 10:03:24 +08009171}
9172
Logan Chien55afb0a2018-10-15 10:42:14 +08009173static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08009174_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
9175{
Logan Chien55afb0a2018-10-15 10:42:14 +08009176 return (__m512i) __builtin_ia32_selectq_512(__M,
9177 (__v8di) _mm512_set1_epi64(__A),
9178 (__v8di) __O);
Logan Chien2833ffb2018-10-09 10:03:24 +08009179}
9180
Logan Chien55afb0a2018-10-15 10:42:14 +08009181static __inline __m512i __DEFAULT_FN_ATTRS512
9182_mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59,
9183 char __e58, char __e57, char __e56, char __e55, char __e54, char __e53,
9184 char __e52, char __e51, char __e50, char __e49, char __e48, char __e47,
9185 char __e46, char __e45, char __e44, char __e43, char __e42, char __e41,
9186 char __e40, char __e39, char __e38, char __e37, char __e36, char __e35,
9187 char __e34, char __e33, char __e32, char __e31, char __e30, char __e29,
9188 char __e28, char __e27, char __e26, char __e25, char __e24, char __e23,
9189 char __e22, char __e21, char __e20, char __e19, char __e18, char __e17,
9190 char __e16, char __e15, char __e14, char __e13, char __e12, char __e11,
9191 char __e10, char __e9, char __e8, char __e7, char __e6, char __e5,
9192 char __e4, char __e3, char __e2, char __e1, char __e0) {
9193
9194 return __extension__ (__m512i)(__v64qi)
9195 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9196 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9197 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9198 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
9199 __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
9200 __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
9201 __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
9202 __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
9203}
9204
9205static __inline __m512i __DEFAULT_FN_ATTRS512
9206_mm512_set_epi16(short __e31, short __e30, short __e29, short __e28,
9207 short __e27, short __e26, short __e25, short __e24, short __e23,
9208 short __e22, short __e21, short __e20, short __e19, short __e18,
9209 short __e17, short __e16, short __e15, short __e14, short __e13,
9210 short __e12, short __e11, short __e10, short __e9, short __e8,
9211 short __e7, short __e6, short __e5, short __e4, short __e3,
9212 short __e2, short __e1, short __e0) {
9213 return __extension__ (__m512i)(__v32hi)
9214 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9215 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9216 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9217 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
9218}
9219
9220static __inline __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08009221_mm512_set_epi32 (int __A, int __B, int __C, int __D,
9222 int __E, int __F, int __G, int __H,
9223 int __I, int __J, int __K, int __L,
9224 int __M, int __N, int __O, int __P)
9225{
9226 return __extension__ (__m512i)(__v16si)
9227 { __P, __O, __N, __M, __L, __K, __J, __I,
9228 __H, __G, __F, __E, __D, __C, __B, __A };
9229}
9230
9231#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
9232 e8,e9,e10,e11,e12,e13,e14,e15) \
9233 _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
9234 (e5),(e4),(e3),(e2),(e1),(e0))
9235
Logan Chien55afb0a2018-10-15 10:42:14 +08009236static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08009237_mm512_set_epi64 (long long __A, long long __B, long long __C,
9238 long long __D, long long __E, long long __F,
9239 long long __G, long long __H)
9240{
9241 return __extension__ (__m512i) (__v8di)
9242 { __H, __G, __F, __E, __D, __C, __B, __A };
9243}
9244
9245#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
9246 _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9247
Logan Chien55afb0a2018-10-15 10:42:14 +08009248static __inline__ __m512d __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08009249_mm512_set_pd (double __A, double __B, double __C, double __D,
9250 double __E, double __F, double __G, double __H)
9251{
9252 return __extension__ (__m512d)
9253 { __H, __G, __F, __E, __D, __C, __B, __A };
9254}
9255
9256#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
9257 _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9258
Logan Chien55afb0a2018-10-15 10:42:14 +08009259static __inline__ __m512 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08009260_mm512_set_ps (float __A, float __B, float __C, float __D,
9261 float __E, float __F, float __G, float __H,
9262 float __I, float __J, float __K, float __L,
9263 float __M, float __N, float __O, float __P)
9264{
9265 return __extension__ (__m512)
9266 { __P, __O, __N, __M, __L, __K, __J, __I,
9267 __H, __G, __F, __E, __D, __C, __B, __A };
9268}
9269
9270#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
9271 _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
9272 (e4),(e3),(e2),(e1),(e0))
9273
Logan Chien55afb0a2018-10-15 10:42:14 +08009274static __inline__ __m512 __DEFAULT_FN_ATTRS512
9275_mm512_abs_ps(__m512 __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08009276{
Logan Chien55afb0a2018-10-15 10:42:14 +08009277 return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
Logan Chien2833ffb2018-10-09 10:03:24 +08009278}
9279
Logan Chien55afb0a2018-10-15 10:42:14 +08009280static __inline__ __m512 __DEFAULT_FN_ATTRS512
9281_mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08009282{
Logan Chien55afb0a2018-10-15 10:42:14 +08009283 return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
Logan Chien2833ffb2018-10-09 10:03:24 +08009284}
9285
Logan Chien55afb0a2018-10-15 10:42:14 +08009286static __inline__ __m512d __DEFAULT_FN_ATTRS512
9287_mm512_abs_pd(__m512d __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08009288{
Logan Chien55afb0a2018-10-15 10:42:14 +08009289 return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
Logan Chien2833ffb2018-10-09 10:03:24 +08009290}
9291
Logan Chien55afb0a2018-10-15 10:42:14 +08009292static __inline__ __m512d __DEFAULT_FN_ATTRS512
9293_mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08009294{
Logan Chien55afb0a2018-10-15 10:42:14 +08009295 return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
Logan Chien2833ffb2018-10-09 10:03:24 +08009296}
9297
Logan Chien55afb0a2018-10-15 10:42:14 +08009298/* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
9299 * outputs. This class of vector operation forms the basis of many scientific
Pirama Arumuga Nainar7e1f8392021-08-16 17:30:48 -07009300 * computations. In vector-reduction arithmetic, the evaluation order is
Logan Chien55afb0a2018-10-15 10:42:14 +08009301 * independent of the order of the input elements of V.
Logan Chien2833ffb2018-10-09 10:03:24 +08009302
Pirama Arumuga Nainar7e1f8392021-08-16 17:30:48 -07009303 * For floating-point intrinsics:
9304 * 1. When using fadd/fmul intrinsics, the order of operations within the
9305 * vector is unspecified (associative math).
9306 * 2. When using fmin/fmax intrinsics, NaN or -0.0 elements within the vector
9307 * produce unspecified results.
9308
Logan Chien55afb0a2018-10-15 10:42:14 +08009309 * Used bisection method. At each step, we partition the vector with previous
9310 * step in half, and the operation is performed on its two halves.
9311 * This takes log2(n) steps where n is the number of elements in the vector.
9312 */
9313
Logan Chien55afb0a2018-10-15 10:42:14 +08009314static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) {
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009315 return __builtin_ia32_reduce_add_q512(__W);
Logan Chien55afb0a2018-10-15 10:42:14 +08009316}
9317
9318static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W) {
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009319 return __builtin_ia32_reduce_mul_q512(__W);
Logan Chien55afb0a2018-10-15 10:42:14 +08009320}
9321
9322static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W) {
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009323 return __builtin_ia32_reduce_and_q512(__W);
Logan Chien55afb0a2018-10-15 10:42:14 +08009324}
9325
9326static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W) {
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009327 return __builtin_ia32_reduce_or_q512(__W);
Logan Chien55afb0a2018-10-15 10:42:14 +08009328}
9329
9330static __inline__ long long __DEFAULT_FN_ATTRS512
9331_mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) {
9332 __W = _mm512_maskz_mov_epi64(__M, __W);
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009333 return __builtin_ia32_reduce_add_q512(__W);
Logan Chien55afb0a2018-10-15 10:42:14 +08009334}
9335
9336static __inline__ long long __DEFAULT_FN_ATTRS512
9337_mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) {
9338 __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(1), __M, __W);
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009339 return __builtin_ia32_reduce_mul_q512(__W);
Logan Chien55afb0a2018-10-15 10:42:14 +08009340}
9341
9342static __inline__ long long __DEFAULT_FN_ATTRS512
9343_mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) {
9344 __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(~0ULL), __M, __W);
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009345 return __builtin_ia32_reduce_and_q512(__W);
Logan Chien55afb0a2018-10-15 10:42:14 +08009346}
9347
9348static __inline__ long long __DEFAULT_FN_ATTRS512
9349_mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) {
9350 __W = _mm512_maskz_mov_epi64(__M, __W);
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009351 return __builtin_ia32_reduce_or_q512(__W);
Logan Chien55afb0a2018-10-15 10:42:14 +08009352}
Logan Chien55afb0a2018-10-15 10:42:14 +08009353
Pirama Arumuga Nainar7e1f8392021-08-16 17:30:48 -07009354// -0.0 is used to ignore the start value since it is the neutral value of
9355// floating point addition. For more information, please refer to
9356// https://llvm.org/docs/LangRef.html#llvm-vector-reduce-fadd-intrinsic
Logan Chien55afb0a2018-10-15 10:42:14 +08009357static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) {
Pirama Arumuga Nainar7e1f8392021-08-16 17:30:48 -07009358 return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
Logan Chien55afb0a2018-10-15 10:42:14 +08009359}
9360
9361static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W) {
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009362 return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
Logan Chien55afb0a2018-10-15 10:42:14 +08009363}
9364
9365static __inline__ double __DEFAULT_FN_ATTRS512
9366_mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) {
9367 __W = _mm512_maskz_mov_pd(__M, __W);
Pirama Arumuga Nainar7e1f8392021-08-16 17:30:48 -07009368 return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
Logan Chien55afb0a2018-10-15 10:42:14 +08009369}
9370
9371static __inline__ double __DEFAULT_FN_ATTRS512
9372_mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) {
9373 __W = _mm512_mask_mov_pd(_mm512_set1_pd(1.0), __M, __W);
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009374 return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
Logan Chien55afb0a2018-10-15 10:42:14 +08009375}
Logan Chien55afb0a2018-10-15 10:42:14 +08009376
9377static __inline__ int __DEFAULT_FN_ATTRS512
9378_mm512_reduce_add_epi32(__m512i __W) {
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009379 return __builtin_ia32_reduce_add_d512((__v16si)__W);
Logan Chien55afb0a2018-10-15 10:42:14 +08009380}
9381
9382static __inline__ int __DEFAULT_FN_ATTRS512
9383_mm512_reduce_mul_epi32(__m512i __W) {
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009384 return __builtin_ia32_reduce_mul_d512((__v16si)__W);
Logan Chien55afb0a2018-10-15 10:42:14 +08009385}
9386
9387static __inline__ int __DEFAULT_FN_ATTRS512
9388_mm512_reduce_and_epi32(__m512i __W) {
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009389 return __builtin_ia32_reduce_and_d512((__v16si)__W);
Logan Chien55afb0a2018-10-15 10:42:14 +08009390}
9391
9392static __inline__ int __DEFAULT_FN_ATTRS512
9393_mm512_reduce_or_epi32(__m512i __W) {
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009394 return __builtin_ia32_reduce_or_d512((__v16si)__W);
Logan Chien55afb0a2018-10-15 10:42:14 +08009395}
9396
9397static __inline__ int __DEFAULT_FN_ATTRS512
9398_mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) {
9399 __W = _mm512_maskz_mov_epi32(__M, __W);
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009400 return __builtin_ia32_reduce_add_d512((__v16si)__W);
Logan Chien55afb0a2018-10-15 10:42:14 +08009401}
9402
9403static __inline__ int __DEFAULT_FN_ATTRS512
9404_mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) {
9405 __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(1), __M, __W);
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009406 return __builtin_ia32_reduce_mul_d512((__v16si)__W);
Logan Chien55afb0a2018-10-15 10:42:14 +08009407}
9408
9409static __inline__ int __DEFAULT_FN_ATTRS512
9410_mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) {
9411 __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M, __W);
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009412 return __builtin_ia32_reduce_and_d512((__v16si)__W);
Logan Chien55afb0a2018-10-15 10:42:14 +08009413}
9414
9415static __inline__ int __DEFAULT_FN_ATTRS512
9416_mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) {
9417 __W = _mm512_maskz_mov_epi32(__M, __W);
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009418 return __builtin_ia32_reduce_or_d512((__v16si)__W);
Logan Chien55afb0a2018-10-15 10:42:14 +08009419}
Logan Chien55afb0a2018-10-15 10:42:14 +08009420
9421static __inline__ float __DEFAULT_FN_ATTRS512
9422_mm512_reduce_add_ps(__m512 __W) {
Pirama Arumuga Nainar7e1f8392021-08-16 17:30:48 -07009423 return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
Logan Chien55afb0a2018-10-15 10:42:14 +08009424}
9425
9426static __inline__ float __DEFAULT_FN_ATTRS512
9427_mm512_reduce_mul_ps(__m512 __W) {
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009428 return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
Logan Chien55afb0a2018-10-15 10:42:14 +08009429}
9430
9431static __inline__ float __DEFAULT_FN_ATTRS512
9432_mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) {
9433 __W = _mm512_maskz_mov_ps(__M, __W);
Pirama Arumuga Nainar7e1f8392021-08-16 17:30:48 -07009434 return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
Logan Chien55afb0a2018-10-15 10:42:14 +08009435}
9436
9437static __inline__ float __DEFAULT_FN_ATTRS512
9438_mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
9439 __W = _mm512_mask_mov_ps(_mm512_set1_ps(1.0f), __M, __W);
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009440 return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
Logan Chien55afb0a2018-10-15 10:42:14 +08009441}
Logan Chien55afb0a2018-10-15 10:42:14 +08009442
9443static __inline__ long long __DEFAULT_FN_ATTRS512
9444_mm512_reduce_max_epi64(__m512i __V) {
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009445 return __builtin_ia32_reduce_smax_q512(__V);
Logan Chien55afb0a2018-10-15 10:42:14 +08009446}
9447
9448static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9449_mm512_reduce_max_epu64(__m512i __V) {
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009450 return __builtin_ia32_reduce_umax_q512(__V);
Logan Chien55afb0a2018-10-15 10:42:14 +08009451}
9452
9453static __inline__ long long __DEFAULT_FN_ATTRS512
9454_mm512_reduce_min_epi64(__m512i __V) {
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009455 return __builtin_ia32_reduce_smin_q512(__V);
Logan Chien55afb0a2018-10-15 10:42:14 +08009456}
9457
9458static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9459_mm512_reduce_min_epu64(__m512i __V) {
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009460 return __builtin_ia32_reduce_umin_q512(__V);
Logan Chien55afb0a2018-10-15 10:42:14 +08009461}
9462
9463static __inline__ long long __DEFAULT_FN_ATTRS512
9464_mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) {
9465 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-__LONG_LONG_MAX__ - 1LL), __M, __V);
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009466 return __builtin_ia32_reduce_smax_q512(__V);
Logan Chien55afb0a2018-10-15 10:42:14 +08009467}
9468
9469static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9470_mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) {
9471 __V = _mm512_maskz_mov_epi64(__M, __V);
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009472 return __builtin_ia32_reduce_umax_q512(__V);
Logan Chien55afb0a2018-10-15 10:42:14 +08009473}
9474
9475static __inline__ long long __DEFAULT_FN_ATTRS512
9476_mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) {
9477 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(__LONG_LONG_MAX__), __M, __V);
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009478 return __builtin_ia32_reduce_smin_q512(__V);
Logan Chien55afb0a2018-10-15 10:42:14 +08009479}
9480
9481static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9482_mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) {
9483 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(~0ULL), __M, __V);
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009484 return __builtin_ia32_reduce_umin_q512(__V);
Logan Chien55afb0a2018-10-15 10:42:14 +08009485}
Logan Chien55afb0a2018-10-15 10:42:14 +08009486static __inline__ int __DEFAULT_FN_ATTRS512
9487_mm512_reduce_max_epi32(__m512i __V) {
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009488 return __builtin_ia32_reduce_smax_d512((__v16si)__V);
Logan Chien55afb0a2018-10-15 10:42:14 +08009489}
9490
9491static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9492_mm512_reduce_max_epu32(__m512i __V) {
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009493 return __builtin_ia32_reduce_umax_d512((__v16si)__V);
Logan Chien55afb0a2018-10-15 10:42:14 +08009494}
9495
9496static __inline__ int __DEFAULT_FN_ATTRS512
9497_mm512_reduce_min_epi32(__m512i __V) {
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009498 return __builtin_ia32_reduce_smin_d512((__v16si)__V);
Logan Chien55afb0a2018-10-15 10:42:14 +08009499}
9500
9501static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9502_mm512_reduce_min_epu32(__m512i __V) {
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009503 return __builtin_ia32_reduce_umin_d512((__v16si)__V);
Logan Chien55afb0a2018-10-15 10:42:14 +08009504}
9505
9506static __inline__ int __DEFAULT_FN_ATTRS512
9507_mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) {
9508 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-__INT_MAX__ - 1), __M, __V);
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009509 return __builtin_ia32_reduce_smax_d512((__v16si)__V);
Logan Chien55afb0a2018-10-15 10:42:14 +08009510}
9511
9512static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9513_mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) {
9514 __V = _mm512_maskz_mov_epi32(__M, __V);
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009515 return __builtin_ia32_reduce_umax_d512((__v16si)__V);
Logan Chien55afb0a2018-10-15 10:42:14 +08009516}
9517
9518static __inline__ int __DEFAULT_FN_ATTRS512
9519_mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) {
9520 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(__INT_MAX__), __M, __V);
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009521 return __builtin_ia32_reduce_smin_d512((__v16si)__V);
Logan Chien55afb0a2018-10-15 10:42:14 +08009522}
9523
9524static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9525_mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) {
9526 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M, __V);
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07009527 return __builtin_ia32_reduce_umin_d512((__v16si)__V);
Logan Chien55afb0a2018-10-15 10:42:14 +08009528}
Logan Chien55afb0a2018-10-15 10:42:14 +08009529
Logan Chien55afb0a2018-10-15 10:42:14 +08009530static __inline__ double __DEFAULT_FN_ATTRS512
9531_mm512_reduce_max_pd(__m512d __V) {
Pirama Arumuga Nainar7e1f8392021-08-16 17:30:48 -07009532 return __builtin_ia32_reduce_fmax_pd512(__V);
Logan Chien55afb0a2018-10-15 10:42:14 +08009533}
9534
9535static __inline__ double __DEFAULT_FN_ATTRS512
9536_mm512_reduce_min_pd(__m512d __V) {
Pirama Arumuga Nainar7e1f8392021-08-16 17:30:48 -07009537 return __builtin_ia32_reduce_fmin_pd512(__V);
Logan Chien55afb0a2018-10-15 10:42:14 +08009538}
9539
9540static __inline__ double __DEFAULT_FN_ATTRS512
9541_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) {
9542 __V = _mm512_mask_mov_pd(_mm512_set1_pd(-__builtin_inf()), __M, __V);
Pirama Arumuga Nainar7e1f8392021-08-16 17:30:48 -07009543 return __builtin_ia32_reduce_fmax_pd512(__V);
Logan Chien55afb0a2018-10-15 10:42:14 +08009544}
9545
9546static __inline__ double __DEFAULT_FN_ATTRS512
9547_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) {
9548 __V = _mm512_mask_mov_pd(_mm512_set1_pd(__builtin_inf()), __M, __V);
Pirama Arumuga Nainar7e1f8392021-08-16 17:30:48 -07009549 return __builtin_ia32_reduce_fmin_pd512(__V);
Logan Chien55afb0a2018-10-15 10:42:14 +08009550}
Logan Chien55afb0a2018-10-15 10:42:14 +08009551
9552static __inline__ float __DEFAULT_FN_ATTRS512
9553_mm512_reduce_max_ps(__m512 __V) {
Pirama Arumuga Nainar7e1f8392021-08-16 17:30:48 -07009554 return __builtin_ia32_reduce_fmax_ps512(__V);
Logan Chien55afb0a2018-10-15 10:42:14 +08009555}
9556
9557static __inline__ float __DEFAULT_FN_ATTRS512
9558_mm512_reduce_min_ps(__m512 __V) {
Pirama Arumuga Nainar7e1f8392021-08-16 17:30:48 -07009559 return __builtin_ia32_reduce_fmin_ps512(__V);
Logan Chien55afb0a2018-10-15 10:42:14 +08009560}
9561
9562static __inline__ float __DEFAULT_FN_ATTRS512
9563_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) {
9564 __V = _mm512_mask_mov_ps(_mm512_set1_ps(-__builtin_inff()), __M, __V);
Pirama Arumuga Nainar7e1f8392021-08-16 17:30:48 -07009565 return __builtin_ia32_reduce_fmax_ps512(__V);
Logan Chien55afb0a2018-10-15 10:42:14 +08009566}
9567
9568static __inline__ float __DEFAULT_FN_ATTRS512
9569_mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {
9570 __V = _mm512_mask_mov_ps(_mm512_set1_ps(__builtin_inff()), __M, __V);
Pirama Arumuga Nainar7e1f8392021-08-16 17:30:48 -07009571 return __builtin_ia32_reduce_fmin_ps512(__V);
Logan Chien55afb0a2018-10-15 10:42:14 +08009572}
Logan Chien55afb0a2018-10-15 10:42:14 +08009573
Logan Chienbedbf4f2020-01-06 19:35:19 -08009574/// Moves the least significant 32 bits of a vector of [16 x i32] to a
9575/// 32-bit signed integer value.
9576///
9577/// \headerfile <x86intrin.h>
9578///
9579/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
9580///
9581/// \param __A
9582/// A vector of [16 x i32]. The least significant 32 bits are moved to the
9583/// destination.
9584/// \returns A 32-bit signed integer containing the moved value.
9585static __inline__ int __DEFAULT_FN_ATTRS512
9586_mm512_cvtsi512_si32(__m512i __A) {
9587 __v16si __b = (__v16si)__A;
9588 return __b[0];
9589}
9590
Pirama Arumuga Nainar7e1f8392021-08-16 17:30:48 -07009591/// Loads 8 double-precision (64-bit) floating-point elements stored at memory
9592/// locations starting at location \a base_addr at packed 32-bit integer indices
9593/// stored in the lower half of \a vindex scaled by \a scale them in dst.
9594///
9595/// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions.
9596///
9597/// \operation
9598/// FOR j := 0 to 7
9599/// i := j*64
9600/// m := j*32
9601/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9602/// dst[i+63:i] := MEM[addr+63:addr]
9603/// ENDFOR
9604/// dst[MAX:512] := 0
9605/// \endoperation
9606#define _mm512_i32logather_pd(vindex, base_addr, scale) \
9607 _mm512_i32gather_pd(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9608
9609/// Loads 8 double-precision (64-bit) floating-point elements from memory
9610/// starting at location \a base_addr at packed 32-bit integer indices stored in
9611/// the lower half of \a vindex scaled by \a scale into dst using writemask
9612/// \a mask (elements are copied from \a src when the corresponding mask bit is
9613/// not set).
9614///
9615/// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions.
9616///
9617/// \operation
9618/// FOR j := 0 to 7
9619/// i := j*64
9620/// m := j*32
9621/// IF mask[j]
9622/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9623/// dst[i+63:i] := MEM[addr+63:addr]
9624/// ELSE
9625/// dst[i+63:i] := src[i+63:i]
9626/// FI
9627/// ENDFOR
9628/// dst[MAX:512] := 0
9629/// \endoperation
9630#define _mm512_mask_i32logather_pd(src, mask, vindex, base_addr, scale) \
9631 _mm512_mask_i32gather_pd((src), (mask), _mm512_castsi512_si256(vindex), \
9632 (base_addr), (scale))
9633
9634/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
9635/// at packed 32-bit integer indices stored in the lower half of \a vindex
9636/// scaled by \a scale and stores them in dst.
9637///
9638/// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions.
9639///
9640/// \operation
9641/// FOR j := 0 to 7
9642/// i := j*64
9643/// m := j*32
9644/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9645/// dst[i+63:i] := MEM[addr+63:addr]
9646/// ENDFOR
9647/// dst[MAX:512] := 0
9648/// \endoperation
9649#define _mm512_i32logather_epi64(vindex, base_addr, scale) \
9650 _mm512_i32gather_epi64(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9651
9652/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
9653/// at packed 32-bit integer indices stored in the lower half of \a vindex
9654/// scaled by \a scale and stores them in dst using writemask \a mask (elements
9655/// are copied from \a src when the corresponding mask bit is not set).
9656///
9657/// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions.
9658///
9659/// \operation
9660/// FOR j := 0 to 7
9661/// i := j*64
9662/// m := j*32
9663/// IF mask[j]
9664/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9665/// dst[i+63:i] := MEM[addr+63:addr]
9666/// ELSE
9667/// dst[i+63:i] := src[i+63:i]
9668/// FI
9669/// ENDFOR
9670/// dst[MAX:512] := 0
9671/// \endoperation
9672#define _mm512_mask_i32logather_epi64(src, mask, vindex, base_addr, scale) \
9673 _mm512_mask_i32gather_epi64((src), (mask), _mm512_castsi512_si256(vindex), \
9674 (base_addr), (scale))
9675
9676/// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1
9677/// and to memory locations starting at location \a base_addr at packed 32-bit
9678/// integer indices stored in \a vindex scaled by \a scale.
9679///
9680/// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions.
9681///
9682/// \operation
9683/// FOR j := 0 to 7
9684/// i := j*64
9685/// m := j*32
9686/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9687/// MEM[addr+63:addr] := v1[i+63:i]
9688/// ENDFOR
9689/// \endoperation
9690#define _mm512_i32loscatter_pd(base_addr, vindex, v1, scale) \
9691 _mm512_i32scatter_pd((base_addr), _mm512_castsi512_si256(vindex), (v1), (scale))
9692
9693/// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1
9694/// to memory locations starting at location \a base_addr at packed 32-bit
9695/// integer indices stored in \a vindex scaled by \a scale. Only those elements
9696/// whose corresponding mask bit is set in writemask \a mask are written to
9697/// memory.
9698///
9699/// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions.
9700///
9701/// \operation
9702/// FOR j := 0 to 7
9703/// i := j*64
9704/// m := j*32
9705/// IF mask[j]
9706/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9707/// MEM[addr+63:addr] := a[i+63:i]
9708/// FI
9709/// ENDFOR
9710/// \endoperation
9711#define _mm512_mask_i32loscatter_pd(base_addr, mask, vindex, v1, scale) \
9712 _mm512_mask_i32scatter_pd((base_addr), (mask), \
9713 _mm512_castsi512_si256(vindex), (v1), (scale))
9714
9715/// Stores 8 packed 64-bit integer elements located in \a v1 and stores them in
9716/// memory locations starting at location \a base_addr at packed 32-bit integer
9717/// indices stored in \a vindex scaled by \a scale.
9718///
9719/// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions.
9720///
9721/// \operation
9722/// FOR j := 0 to 7
9723/// i := j*64
9724/// m := j*32
9725/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9726/// MEM[addr+63:addr] := a[i+63:i]
9727/// ENDFOR
9728/// \endoperation
9729#define _mm512_i32loscatter_epi64(base_addr, vindex, v1, scale) \
9730 _mm512_i32scatter_epi64((base_addr), \
9731 _mm512_castsi512_si256(vindex), (v1), (scale))
9732
9733/// Stores 8 packed 64-bit integer elements located in a and stores them in
9734/// memory locations starting at location \a base_addr at packed 32-bit integer
9735/// indices stored in \a vindex scaled by scale using writemask \a mask (elements
9736/// whose corresponding mask bit is not set are not written to memory).
9737///
9738/// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions.
9739///
9740/// \operation
9741/// FOR j := 0 to 7
9742/// i := j*64
9743/// m := j*32
9744/// IF mask[j]
9745/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9746/// MEM[addr+63:addr] := a[i+63:i]
9747/// FI
9748/// ENDFOR
9749/// \endoperation
9750#define _mm512_mask_i32loscatter_epi64(base_addr, mask, vindex, v1, scale) \
9751 _mm512_mask_i32scatter_epi64((base_addr), (mask), \
9752 _mm512_castsi512_si256(vindex), (v1), (scale))
9753
Logan Chien55afb0a2018-10-15 10:42:14 +08009754#undef __DEFAULT_FN_ATTRS512
9755#undef __DEFAULT_FN_ATTRS128
Logan Chienb0c84022018-11-09 16:19:54 +08009756#undef __DEFAULT_FN_ATTRS
Logan Chien55afb0a2018-10-15 10:42:14 +08009757
9758#endif /* __AVX512FINTRIN_H */