blob: 0519dba59081a506ebcf9971857375cde74bcf81 [file] [log] [blame]
Logan Chien2833ffb2018-10-09 10:03:24 +08001/*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------===
2 *
Logan Chiendf4f7662019-09-04 16:45:23 -07003 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Logan Chien2833ffb2018-10-09 10:03:24 +08006 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __IMMINTRIN_H
11#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifndef __AVX512VLINTRIN_H
15#define __AVX512VLINTRIN_H
16
Logan Chien55afb0a2018-10-15 10:42:14 +080017#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(128)))
18#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(256)))
Logan Chien2833ffb2018-10-09 10:03:24 +080019
Logan Chien55afb0a2018-10-15 10:42:14 +080020typedef short __v2hi __attribute__((__vector_size__(4)));
21typedef char __v4qi __attribute__((__vector_size__(4)));
22typedef char __v2qi __attribute__((__vector_size__(2)));
Logan Chien2833ffb2018-10-09 10:03:24 +080023
24/* Integer compare */
25
Logan Chien55afb0a2018-10-15 10:42:14 +080026#define _mm_cmpeq_epi32_mask(A, B) \
27 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
28#define _mm_mask_cmpeq_epi32_mask(k, A, B) \
29 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
30#define _mm_cmpge_epi32_mask(A, B) \
31 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
32#define _mm_mask_cmpge_epi32_mask(k, A, B) \
33 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
34#define _mm_cmpgt_epi32_mask(A, B) \
35 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
36#define _mm_mask_cmpgt_epi32_mask(k, A, B) \
37 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
38#define _mm_cmple_epi32_mask(A, B) \
39 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
40#define _mm_mask_cmple_epi32_mask(k, A, B) \
41 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
42#define _mm_cmplt_epi32_mask(A, B) \
43 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
44#define _mm_mask_cmplt_epi32_mask(k, A, B) \
45 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
46#define _mm_cmpneq_epi32_mask(A, B) \
47 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
48#define _mm_mask_cmpneq_epi32_mask(k, A, B) \
49 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
Logan Chien2833ffb2018-10-09 10:03:24 +080050
Logan Chien55afb0a2018-10-15 10:42:14 +080051#define _mm256_cmpeq_epi32_mask(A, B) \
52 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
53#define _mm256_mask_cmpeq_epi32_mask(k, A, B) \
54 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
55#define _mm256_cmpge_epi32_mask(A, B) \
56 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
57#define _mm256_mask_cmpge_epi32_mask(k, A, B) \
58 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
59#define _mm256_cmpgt_epi32_mask(A, B) \
60 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
61#define _mm256_mask_cmpgt_epi32_mask(k, A, B) \
62 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
63#define _mm256_cmple_epi32_mask(A, B) \
64 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
65#define _mm256_mask_cmple_epi32_mask(k, A, B) \
66 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
67#define _mm256_cmplt_epi32_mask(A, B) \
68 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
69#define _mm256_mask_cmplt_epi32_mask(k, A, B) \
70 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
71#define _mm256_cmpneq_epi32_mask(A, B) \
72 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
73#define _mm256_mask_cmpneq_epi32_mask(k, A, B) \
74 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
Logan Chien2833ffb2018-10-09 10:03:24 +080075
Logan Chien55afb0a2018-10-15 10:42:14 +080076#define _mm_cmpeq_epu32_mask(A, B) \
77 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
78#define _mm_mask_cmpeq_epu32_mask(k, A, B) \
79 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
80#define _mm_cmpge_epu32_mask(A, B) \
81 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
82#define _mm_mask_cmpge_epu32_mask(k, A, B) \
83 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
84#define _mm_cmpgt_epu32_mask(A, B) \
85 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
86#define _mm_mask_cmpgt_epu32_mask(k, A, B) \
87 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
88#define _mm_cmple_epu32_mask(A, B) \
89 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
90#define _mm_mask_cmple_epu32_mask(k, A, B) \
91 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
92#define _mm_cmplt_epu32_mask(A, B) \
93 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
94#define _mm_mask_cmplt_epu32_mask(k, A, B) \
95 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
96#define _mm_cmpneq_epu32_mask(A, B) \
97 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
98#define _mm_mask_cmpneq_epu32_mask(k, A, B) \
99 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
Logan Chien2833ffb2018-10-09 10:03:24 +0800100
Logan Chien55afb0a2018-10-15 10:42:14 +0800101#define _mm256_cmpeq_epu32_mask(A, B) \
102 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
103#define _mm256_mask_cmpeq_epu32_mask(k, A, B) \
104 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
105#define _mm256_cmpge_epu32_mask(A, B) \
106 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
107#define _mm256_mask_cmpge_epu32_mask(k, A, B) \
108 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
109#define _mm256_cmpgt_epu32_mask(A, B) \
110 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
111#define _mm256_mask_cmpgt_epu32_mask(k, A, B) \
112 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
113#define _mm256_cmple_epu32_mask(A, B) \
114 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
115#define _mm256_mask_cmple_epu32_mask(k, A, B) \
116 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
117#define _mm256_cmplt_epu32_mask(A, B) \
118 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
119#define _mm256_mask_cmplt_epu32_mask(k, A, B) \
120 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
121#define _mm256_cmpneq_epu32_mask(A, B) \
122 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
123#define _mm256_mask_cmpneq_epu32_mask(k, A, B) \
124 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
Logan Chien2833ffb2018-10-09 10:03:24 +0800125
Logan Chien55afb0a2018-10-15 10:42:14 +0800126#define _mm_cmpeq_epi64_mask(A, B) \
127 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
128#define _mm_mask_cmpeq_epi64_mask(k, A, B) \
129 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
130#define _mm_cmpge_epi64_mask(A, B) \
131 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
132#define _mm_mask_cmpge_epi64_mask(k, A, B) \
133 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
134#define _mm_cmpgt_epi64_mask(A, B) \
135 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
136#define _mm_mask_cmpgt_epi64_mask(k, A, B) \
137 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
138#define _mm_cmple_epi64_mask(A, B) \
139 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
140#define _mm_mask_cmple_epi64_mask(k, A, B) \
141 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
142#define _mm_cmplt_epi64_mask(A, B) \
143 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
144#define _mm_mask_cmplt_epi64_mask(k, A, B) \
145 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
146#define _mm_cmpneq_epi64_mask(A, B) \
147 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
148#define _mm_mask_cmpneq_epi64_mask(k, A, B) \
149 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
Logan Chien2833ffb2018-10-09 10:03:24 +0800150
Logan Chien55afb0a2018-10-15 10:42:14 +0800151#define _mm256_cmpeq_epi64_mask(A, B) \
152 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
153#define _mm256_mask_cmpeq_epi64_mask(k, A, B) \
154 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
155#define _mm256_cmpge_epi64_mask(A, B) \
156 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
157#define _mm256_mask_cmpge_epi64_mask(k, A, B) \
158 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
159#define _mm256_cmpgt_epi64_mask(A, B) \
160 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
161#define _mm256_mask_cmpgt_epi64_mask(k, A, B) \
162 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
163#define _mm256_cmple_epi64_mask(A, B) \
164 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
165#define _mm256_mask_cmple_epi64_mask(k, A, B) \
166 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
167#define _mm256_cmplt_epi64_mask(A, B) \
168 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
169#define _mm256_mask_cmplt_epi64_mask(k, A, B) \
170 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
171#define _mm256_cmpneq_epi64_mask(A, B) \
172 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
173#define _mm256_mask_cmpneq_epi64_mask(k, A, B) \
174 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
Logan Chien2833ffb2018-10-09 10:03:24 +0800175
Logan Chien55afb0a2018-10-15 10:42:14 +0800176#define _mm_cmpeq_epu64_mask(A, B) \
177 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
178#define _mm_mask_cmpeq_epu64_mask(k, A, B) \
179 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
180#define _mm_cmpge_epu64_mask(A, B) \
181 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
182#define _mm_mask_cmpge_epu64_mask(k, A, B) \
183 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
184#define _mm_cmpgt_epu64_mask(A, B) \
185 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
186#define _mm_mask_cmpgt_epu64_mask(k, A, B) \
187 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
188#define _mm_cmple_epu64_mask(A, B) \
189 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
190#define _mm_mask_cmple_epu64_mask(k, A, B) \
191 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
192#define _mm_cmplt_epu64_mask(A, B) \
193 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
194#define _mm_mask_cmplt_epu64_mask(k, A, B) \
195 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
196#define _mm_cmpneq_epu64_mask(A, B) \
197 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
198#define _mm_mask_cmpneq_epu64_mask(k, A, B) \
199 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
Logan Chien2833ffb2018-10-09 10:03:24 +0800200
Logan Chien55afb0a2018-10-15 10:42:14 +0800201#define _mm256_cmpeq_epu64_mask(A, B) \
202 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
203#define _mm256_mask_cmpeq_epu64_mask(k, A, B) \
204 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
205#define _mm256_cmpge_epu64_mask(A, B) \
206 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
207#define _mm256_mask_cmpge_epu64_mask(k, A, B) \
208 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
209#define _mm256_cmpgt_epu64_mask(A, B) \
210 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
211#define _mm256_mask_cmpgt_epu64_mask(k, A, B) \
212 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
213#define _mm256_cmple_epu64_mask(A, B) \
214 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
215#define _mm256_mask_cmple_epu64_mask(k, A, B) \
216 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
217#define _mm256_cmplt_epu64_mask(A, B) \
218 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
219#define _mm256_mask_cmplt_epu64_mask(k, A, B) \
220 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
221#define _mm256_cmpneq_epu64_mask(A, B) \
222 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
223#define _mm256_mask_cmpneq_epu64_mask(k, A, B) \
224 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
Logan Chien2833ffb2018-10-09 10:03:24 +0800225
Logan Chien55afb0a2018-10-15 10:42:14 +0800226static __inline__ __m256i __DEFAULT_FN_ATTRS256
227_mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800228{
Logan Chien55afb0a2018-10-15 10:42:14 +0800229 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
230 (__v8si)_mm256_add_epi32(__A, __B),
231 (__v8si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800232}
233
Logan Chien55afb0a2018-10-15 10:42:14 +0800234static __inline__ __m256i __DEFAULT_FN_ATTRS256
235_mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800236{
Logan Chien55afb0a2018-10-15 10:42:14 +0800237 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
238 (__v8si)_mm256_add_epi32(__A, __B),
239 (__v8si)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800240}
241
Logan Chien55afb0a2018-10-15 10:42:14 +0800242static __inline__ __m256i __DEFAULT_FN_ATTRS256
243_mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800244{
Logan Chien55afb0a2018-10-15 10:42:14 +0800245 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
246 (__v4di)_mm256_add_epi64(__A, __B),
247 (__v4di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800248}
249
Logan Chien55afb0a2018-10-15 10:42:14 +0800250static __inline__ __m256i __DEFAULT_FN_ATTRS256
251_mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800252{
Logan Chien55afb0a2018-10-15 10:42:14 +0800253 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
254 (__v4di)_mm256_add_epi64(__A, __B),
255 (__v4di)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800256}
257
Logan Chien55afb0a2018-10-15 10:42:14 +0800258static __inline__ __m256i __DEFAULT_FN_ATTRS256
259_mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800260{
Logan Chien55afb0a2018-10-15 10:42:14 +0800261 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
262 (__v8si)_mm256_sub_epi32(__A, __B),
263 (__v8si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800264}
265
Logan Chien55afb0a2018-10-15 10:42:14 +0800266static __inline__ __m256i __DEFAULT_FN_ATTRS256
267_mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800268{
Logan Chien55afb0a2018-10-15 10:42:14 +0800269 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
270 (__v8si)_mm256_sub_epi32(__A, __B),
271 (__v8si)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800272}
273
Logan Chien55afb0a2018-10-15 10:42:14 +0800274static __inline__ __m256i __DEFAULT_FN_ATTRS256
275_mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800276{
Logan Chien55afb0a2018-10-15 10:42:14 +0800277 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
278 (__v4di)_mm256_sub_epi64(__A, __B),
279 (__v4di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800280}
281
Logan Chien55afb0a2018-10-15 10:42:14 +0800282static __inline__ __m256i __DEFAULT_FN_ATTRS256
283_mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800284{
Logan Chien55afb0a2018-10-15 10:42:14 +0800285 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
286 (__v4di)_mm256_sub_epi64(__A, __B),
287 (__v4di)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800288}
289
Logan Chien55afb0a2018-10-15 10:42:14 +0800290static __inline__ __m128i __DEFAULT_FN_ATTRS128
291_mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800292{
Logan Chien55afb0a2018-10-15 10:42:14 +0800293 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
294 (__v4si)_mm_add_epi32(__A, __B),
295 (__v4si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800296}
297
Logan Chien55afb0a2018-10-15 10:42:14 +0800298static __inline__ __m128i __DEFAULT_FN_ATTRS128
299_mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800300{
Logan Chien55afb0a2018-10-15 10:42:14 +0800301 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
302 (__v4si)_mm_add_epi32(__A, __B),
303 (__v4si)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800304}
305
Logan Chien55afb0a2018-10-15 10:42:14 +0800306static __inline__ __m128i __DEFAULT_FN_ATTRS128
307_mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800308{
Logan Chien55afb0a2018-10-15 10:42:14 +0800309 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
310 (__v2di)_mm_add_epi64(__A, __B),
311 (__v2di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800312}
313
Logan Chien55afb0a2018-10-15 10:42:14 +0800314static __inline__ __m128i __DEFAULT_FN_ATTRS128
315_mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800316{
Logan Chien55afb0a2018-10-15 10:42:14 +0800317 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
318 (__v2di)_mm_add_epi64(__A, __B),
319 (__v2di)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800320}
321
Logan Chien55afb0a2018-10-15 10:42:14 +0800322static __inline__ __m128i __DEFAULT_FN_ATTRS128
323_mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800324{
Logan Chien55afb0a2018-10-15 10:42:14 +0800325 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
326 (__v4si)_mm_sub_epi32(__A, __B),
327 (__v4si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800328}
329
Logan Chien55afb0a2018-10-15 10:42:14 +0800330static __inline__ __m128i __DEFAULT_FN_ATTRS128
331_mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800332{
Logan Chien55afb0a2018-10-15 10:42:14 +0800333 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
334 (__v4si)_mm_sub_epi32(__A, __B),
335 (__v4si)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800336}
337
Logan Chien55afb0a2018-10-15 10:42:14 +0800338static __inline__ __m128i __DEFAULT_FN_ATTRS128
339_mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800340{
Logan Chien55afb0a2018-10-15 10:42:14 +0800341 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
342 (__v2di)_mm_sub_epi64(__A, __B),
343 (__v2di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800344}
345
Logan Chien55afb0a2018-10-15 10:42:14 +0800346static __inline__ __m128i __DEFAULT_FN_ATTRS128
347_mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800348{
Logan Chien55afb0a2018-10-15 10:42:14 +0800349 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
350 (__v2di)_mm_sub_epi64(__A, __B),
351 (__v2di)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800352}
353
Logan Chien55afb0a2018-10-15 10:42:14 +0800354static __inline__ __m256i __DEFAULT_FN_ATTRS256
355_mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +0800356{
Logan Chien55afb0a2018-10-15 10:42:14 +0800357 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
358 (__v4di)_mm256_mul_epi32(__X, __Y),
359 (__v4di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800360}
361
Logan Chien55afb0a2018-10-15 10:42:14 +0800362static __inline__ __m256i __DEFAULT_FN_ATTRS256
363_mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +0800364{
Logan Chien55afb0a2018-10-15 10:42:14 +0800365 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
366 (__v4di)_mm256_mul_epi32(__X, __Y),
367 (__v4di)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800368}
369
Logan Chien55afb0a2018-10-15 10:42:14 +0800370static __inline__ __m128i __DEFAULT_FN_ATTRS128
371_mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +0800372{
Logan Chien55afb0a2018-10-15 10:42:14 +0800373 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
374 (__v2di)_mm_mul_epi32(__X, __Y),
375 (__v2di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800376}
377
Logan Chien55afb0a2018-10-15 10:42:14 +0800378static __inline__ __m128i __DEFAULT_FN_ATTRS128
379_mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +0800380{
Logan Chien55afb0a2018-10-15 10:42:14 +0800381 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
382 (__v2di)_mm_mul_epi32(__X, __Y),
383 (__v2di)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800384}
385
Logan Chien55afb0a2018-10-15 10:42:14 +0800386static __inline__ __m256i __DEFAULT_FN_ATTRS256
387_mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +0800388{
Logan Chien55afb0a2018-10-15 10:42:14 +0800389 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
390 (__v4di)_mm256_mul_epu32(__X, __Y),
391 (__v4di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800392}
393
Logan Chien55afb0a2018-10-15 10:42:14 +0800394static __inline__ __m256i __DEFAULT_FN_ATTRS256
395_mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +0800396{
Logan Chien55afb0a2018-10-15 10:42:14 +0800397 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
398 (__v4di)_mm256_mul_epu32(__X, __Y),
399 (__v4di)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800400}
401
Logan Chien55afb0a2018-10-15 10:42:14 +0800402static __inline__ __m128i __DEFAULT_FN_ATTRS128
403_mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +0800404{
Logan Chien55afb0a2018-10-15 10:42:14 +0800405 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
406 (__v2di)_mm_mul_epu32(__X, __Y),
407 (__v2di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800408}
409
Logan Chien55afb0a2018-10-15 10:42:14 +0800410static __inline__ __m128i __DEFAULT_FN_ATTRS128
411_mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +0800412{
Logan Chien55afb0a2018-10-15 10:42:14 +0800413 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
414 (__v2di)_mm_mul_epu32(__X, __Y),
415 (__v2di)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800416}
417
Logan Chien55afb0a2018-10-15 10:42:14 +0800418static __inline__ __m256i __DEFAULT_FN_ATTRS256
419_mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800420{
Logan Chien55afb0a2018-10-15 10:42:14 +0800421 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
422 (__v8si)_mm256_mullo_epi32(__A, __B),
423 (__v8si)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800424}
425
Logan Chien55afb0a2018-10-15 10:42:14 +0800426static __inline__ __m256i __DEFAULT_FN_ATTRS256
427_mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800428{
Logan Chien55afb0a2018-10-15 10:42:14 +0800429 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
430 (__v8si)_mm256_mullo_epi32(__A, __B),
431 (__v8si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800432}
433
Logan Chien55afb0a2018-10-15 10:42:14 +0800434static __inline__ __m128i __DEFAULT_FN_ATTRS128
435_mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800436{
Logan Chien55afb0a2018-10-15 10:42:14 +0800437 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
438 (__v4si)_mm_mullo_epi32(__A, __B),
439 (__v4si)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800440}
441
Logan Chien55afb0a2018-10-15 10:42:14 +0800442static __inline__ __m128i __DEFAULT_FN_ATTRS128
443_mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800444{
Logan Chien55afb0a2018-10-15 10:42:14 +0800445 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
446 (__v4si)_mm_mullo_epi32(__A, __B),
447 (__v4si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800448}
449
Logan Chien55afb0a2018-10-15 10:42:14 +0800450static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien969aea62018-12-05 18:40:57 +0800451_mm256_and_epi32(__m256i __a, __m256i __b)
452{
453 return (__m256i)((__v8su)__a & (__v8su)__b);
454}
455
456static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800457_mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
458{
459 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
Logan Chien969aea62018-12-05 18:40:57 +0800460 (__v8si)_mm256_and_epi32(__A, __B),
Logan Chien2833ffb2018-10-09 10:03:24 +0800461 (__v8si)__W);
462}
463
Logan Chien55afb0a2018-10-15 10:42:14 +0800464static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800465_mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
466{
467 return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B);
468}
469
Logan Chien55afb0a2018-10-15 10:42:14 +0800470static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien969aea62018-12-05 18:40:57 +0800471_mm_and_epi32(__m128i __a, __m128i __b)
472{
473 return (__m128i)((__v4su)__a & (__v4su)__b);
474}
475
476static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800477_mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
478{
479 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
Logan Chien969aea62018-12-05 18:40:57 +0800480 (__v4si)_mm_and_epi32(__A, __B),
Logan Chien2833ffb2018-10-09 10:03:24 +0800481 (__v4si)__W);
482}
483
Logan Chien55afb0a2018-10-15 10:42:14 +0800484static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800485_mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
486{
487 return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B);
488}
489
Logan Chien55afb0a2018-10-15 10:42:14 +0800490static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien969aea62018-12-05 18:40:57 +0800491_mm256_andnot_epi32(__m256i __A, __m256i __B)
492{
493 return (__m256i)(~(__v8su)__A & (__v8su)__B);
494}
495
496static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800497_mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
498{
499 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
Logan Chien969aea62018-12-05 18:40:57 +0800500 (__v8si)_mm256_andnot_epi32(__A, __B),
Logan Chien2833ffb2018-10-09 10:03:24 +0800501 (__v8si)__W);
502}
503
Logan Chien55afb0a2018-10-15 10:42:14 +0800504static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800505_mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
506{
507 return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(),
508 __U, __A, __B);
509}
510
Logan Chien55afb0a2018-10-15 10:42:14 +0800511static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien969aea62018-12-05 18:40:57 +0800512_mm_andnot_epi32(__m128i __A, __m128i __B)
513{
514 return (__m128i)(~(__v4su)__A & (__v4su)__B);
515}
516
517static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800518_mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
519{
520 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
Logan Chien969aea62018-12-05 18:40:57 +0800521 (__v4si)_mm_andnot_epi32(__A, __B),
Logan Chien2833ffb2018-10-09 10:03:24 +0800522 (__v4si)__W);
523}
524
Logan Chien55afb0a2018-10-15 10:42:14 +0800525static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien969aea62018-12-05 18:40:57 +0800526_mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800527{
528 return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B);
529}
530
Logan Chien55afb0a2018-10-15 10:42:14 +0800531static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien969aea62018-12-05 18:40:57 +0800532_mm256_or_epi32(__m256i __a, __m256i __b)
533{
534 return (__m256i)((__v8su)__a | (__v8su)__b);
535}
536
537static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800538_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
539{
540 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
Logan Chien969aea62018-12-05 18:40:57 +0800541 (__v8si)_mm256_or_epi32(__A, __B),
Logan Chien2833ffb2018-10-09 10:03:24 +0800542 (__v8si)__W);
543}
544
Logan Chien55afb0a2018-10-15 10:42:14 +0800545static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800546_mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
547{
548 return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B);
549}
550
Logan Chien55afb0a2018-10-15 10:42:14 +0800551static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien969aea62018-12-05 18:40:57 +0800552_mm_or_epi32(__m128i __a, __m128i __b)
553{
554 return (__m128i)((__v4su)__a | (__v4su)__b);
555}
556
557static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800558_mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
559{
560 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
Logan Chien969aea62018-12-05 18:40:57 +0800561 (__v4si)_mm_or_epi32(__A, __B),
Logan Chien2833ffb2018-10-09 10:03:24 +0800562 (__v4si)__W);
563}
564
Logan Chien55afb0a2018-10-15 10:42:14 +0800565static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800566_mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
567{
568 return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B);
569}
570
Logan Chien55afb0a2018-10-15 10:42:14 +0800571static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien969aea62018-12-05 18:40:57 +0800572_mm256_xor_epi32(__m256i __a, __m256i __b)
573{
574 return (__m256i)((__v8su)__a ^ (__v8su)__b);
575}
576
577static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800578_mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
579{
580 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
Logan Chien969aea62018-12-05 18:40:57 +0800581 (__v8si)_mm256_xor_epi32(__A, __B),
Logan Chien2833ffb2018-10-09 10:03:24 +0800582 (__v8si)__W);
583}
584
Logan Chien55afb0a2018-10-15 10:42:14 +0800585static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800586_mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
587{
588 return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B);
589}
590
Logan Chien55afb0a2018-10-15 10:42:14 +0800591static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien969aea62018-12-05 18:40:57 +0800592_mm_xor_epi32(__m128i __a, __m128i __b)
593{
594 return (__m128i)((__v4su)__a ^ (__v4su)__b);
595}
596
597static __inline__ __m128i __DEFAULT_FN_ATTRS128
598_mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800599{
600 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
Logan Chien969aea62018-12-05 18:40:57 +0800601 (__v4si)_mm_xor_epi32(__A, __B),
Logan Chien2833ffb2018-10-09 10:03:24 +0800602 (__v4si)__W);
603}
604
Logan Chien55afb0a2018-10-15 10:42:14 +0800605static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800606_mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
607{
608 return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B);
609}
610
Logan Chien55afb0a2018-10-15 10:42:14 +0800611static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien969aea62018-12-05 18:40:57 +0800612_mm256_and_epi64(__m256i __a, __m256i __b)
613{
614 return (__m256i)((__v4du)__a & (__v4du)__b);
615}
616
617static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800618_mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
619{
620 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
Logan Chien969aea62018-12-05 18:40:57 +0800621 (__v4di)_mm256_and_epi64(__A, __B),
Logan Chien2833ffb2018-10-09 10:03:24 +0800622 (__v4di)__W);
623}
624
Logan Chien55afb0a2018-10-15 10:42:14 +0800625static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800626_mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
627{
628 return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B);
629}
630
Logan Chien55afb0a2018-10-15 10:42:14 +0800631static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien969aea62018-12-05 18:40:57 +0800632_mm_and_epi64(__m128i __a, __m128i __b)
633{
634 return (__m128i)((__v2du)__a & (__v2du)__b);
635}
636
637static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800638_mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
639{
640 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
Logan Chien969aea62018-12-05 18:40:57 +0800641 (__v2di)_mm_and_epi64(__A, __B),
Logan Chien2833ffb2018-10-09 10:03:24 +0800642 (__v2di)__W);
643}
644
Logan Chien55afb0a2018-10-15 10:42:14 +0800645static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800646_mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
647{
648 return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B);
649}
650
Logan Chien55afb0a2018-10-15 10:42:14 +0800651static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien969aea62018-12-05 18:40:57 +0800652_mm256_andnot_epi64(__m256i __A, __m256i __B)
653{
654 return (__m256i)(~(__v4du)__A & (__v4du)__B);
655}
656
657static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800658_mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
659{
660 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
Logan Chien969aea62018-12-05 18:40:57 +0800661 (__v4di)_mm256_andnot_epi64(__A, __B),
Logan Chien2833ffb2018-10-09 10:03:24 +0800662 (__v4di)__W);
663}
664
Logan Chien55afb0a2018-10-15 10:42:14 +0800665static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800666_mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
667{
668 return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(),
669 __U, __A, __B);
670}
671
Logan Chien55afb0a2018-10-15 10:42:14 +0800672static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien969aea62018-12-05 18:40:57 +0800673_mm_andnot_epi64(__m128i __A, __m128i __B)
674{
675 return (__m128i)(~(__v2du)__A & (__v2du)__B);
676}
677
678static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800679_mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
680{
681 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
Logan Chien969aea62018-12-05 18:40:57 +0800682 (__v2di)_mm_andnot_epi64(__A, __B),
Logan Chien2833ffb2018-10-09 10:03:24 +0800683 (__v2di)__W);
684}
685
Logan Chien55afb0a2018-10-15 10:42:14 +0800686static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800687_mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
688{
689 return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B);
690}
691
Logan Chien55afb0a2018-10-15 10:42:14 +0800692static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien969aea62018-12-05 18:40:57 +0800693_mm256_or_epi64(__m256i __a, __m256i __b)
694{
695 return (__m256i)((__v4du)__a | (__v4du)__b);
696}
697
698static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800699_mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
700{
701 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
Logan Chien969aea62018-12-05 18:40:57 +0800702 (__v4di)_mm256_or_epi64(__A, __B),
Logan Chien2833ffb2018-10-09 10:03:24 +0800703 (__v4di)__W);
704}
705
Logan Chien55afb0a2018-10-15 10:42:14 +0800706static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800707_mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
708{
709 return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B);
710}
711
Logan Chien55afb0a2018-10-15 10:42:14 +0800712static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien969aea62018-12-05 18:40:57 +0800713_mm_or_epi64(__m128i __a, __m128i __b)
714{
715 return (__m128i)((__v2du)__a | (__v2du)__b);
716}
717
718static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800719_mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
720{
721 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
Logan Chien969aea62018-12-05 18:40:57 +0800722 (__v2di)_mm_or_epi64(__A, __B),
Logan Chien2833ffb2018-10-09 10:03:24 +0800723 (__v2di)__W);
724}
725
Logan Chien55afb0a2018-10-15 10:42:14 +0800726static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800727_mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
728{
729 return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B);
730}
731
Logan Chien55afb0a2018-10-15 10:42:14 +0800732static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien969aea62018-12-05 18:40:57 +0800733_mm256_xor_epi64(__m256i __a, __m256i __b)
734{
735 return (__m256i)((__v4du)__a ^ (__v4du)__b);
736}
737
738static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800739_mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
740{
741 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
Logan Chien969aea62018-12-05 18:40:57 +0800742 (__v4di)_mm256_xor_epi64(__A, __B),
Logan Chien2833ffb2018-10-09 10:03:24 +0800743 (__v4di)__W);
744}
745
Logan Chien55afb0a2018-10-15 10:42:14 +0800746static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800747_mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
748{
749 return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B);
750}
751
Logan Chien55afb0a2018-10-15 10:42:14 +0800752static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien969aea62018-12-05 18:40:57 +0800753_mm_xor_epi64(__m128i __a, __m128i __b)
754{
755 return (__m128i)((__v2du)__a ^ (__v2du)__b);
756}
757
758static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800759_mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A,
760 __m128i __B)
761{
762 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
Logan Chien969aea62018-12-05 18:40:57 +0800763 (__v2di)_mm_xor_epi64(__A, __B),
Logan Chien2833ffb2018-10-09 10:03:24 +0800764 (__v2di)__W);
765}
766
Logan Chien55afb0a2018-10-15 10:42:14 +0800767static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800768_mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
769{
770 return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B);
771}
772
Logan Chien55afb0a2018-10-15 10:42:14 +0800773#define _mm_cmp_epi32_mask(a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800774 ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
775 (__v4si)(__m128i)(b), (int)(p), \
776 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +0800777
Logan Chien55afb0a2018-10-15 10:42:14 +0800778#define _mm_mask_cmp_epi32_mask(m, a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800779 ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
780 (__v4si)(__m128i)(b), (int)(p), \
781 (__mmask8)(m)))
Logan Chien2833ffb2018-10-09 10:03:24 +0800782
Logan Chien55afb0a2018-10-15 10:42:14 +0800783#define _mm_cmp_epu32_mask(a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800784 ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
785 (__v4si)(__m128i)(b), (int)(p), \
786 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +0800787
Logan Chien55afb0a2018-10-15 10:42:14 +0800788#define _mm_mask_cmp_epu32_mask(m, a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800789 ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
790 (__v4si)(__m128i)(b), (int)(p), \
791 (__mmask8)(m)))
Logan Chien2833ffb2018-10-09 10:03:24 +0800792
Logan Chien55afb0a2018-10-15 10:42:14 +0800793#define _mm256_cmp_epi32_mask(a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800794 ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
795 (__v8si)(__m256i)(b), (int)(p), \
796 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +0800797
Logan Chien55afb0a2018-10-15 10:42:14 +0800798#define _mm256_mask_cmp_epi32_mask(m, a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800799 ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
800 (__v8si)(__m256i)(b), (int)(p), \
801 (__mmask8)(m)))
Logan Chien2833ffb2018-10-09 10:03:24 +0800802
Logan Chien55afb0a2018-10-15 10:42:14 +0800803#define _mm256_cmp_epu32_mask(a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800804 ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
805 (__v8si)(__m256i)(b), (int)(p), \
806 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +0800807
Logan Chien55afb0a2018-10-15 10:42:14 +0800808#define _mm256_mask_cmp_epu32_mask(m, a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800809 ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
810 (__v8si)(__m256i)(b), (int)(p), \
811 (__mmask8)(m)))
Logan Chien2833ffb2018-10-09 10:03:24 +0800812
Logan Chien55afb0a2018-10-15 10:42:14 +0800813#define _mm_cmp_epi64_mask(a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800814 ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
815 (__v2di)(__m128i)(b), (int)(p), \
816 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +0800817
Logan Chien55afb0a2018-10-15 10:42:14 +0800818#define _mm_mask_cmp_epi64_mask(m, a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800819 ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
820 (__v2di)(__m128i)(b), (int)(p), \
821 (__mmask8)(m)))
Logan Chien2833ffb2018-10-09 10:03:24 +0800822
Logan Chien55afb0a2018-10-15 10:42:14 +0800823#define _mm_cmp_epu64_mask(a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800824 ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
825 (__v2di)(__m128i)(b), (int)(p), \
826 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +0800827
Logan Chien55afb0a2018-10-15 10:42:14 +0800828#define _mm_mask_cmp_epu64_mask(m, a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800829 ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
830 (__v2di)(__m128i)(b), (int)(p), \
831 (__mmask8)(m)))
Logan Chien2833ffb2018-10-09 10:03:24 +0800832
Logan Chien55afb0a2018-10-15 10:42:14 +0800833#define _mm256_cmp_epi64_mask(a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800834 ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
835 (__v4di)(__m256i)(b), (int)(p), \
836 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +0800837
Logan Chien55afb0a2018-10-15 10:42:14 +0800838#define _mm256_mask_cmp_epi64_mask(m, a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800839 ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
840 (__v4di)(__m256i)(b), (int)(p), \
841 (__mmask8)(m)))
Logan Chien2833ffb2018-10-09 10:03:24 +0800842
Logan Chien55afb0a2018-10-15 10:42:14 +0800843#define _mm256_cmp_epu64_mask(a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800844 ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
845 (__v4di)(__m256i)(b), (int)(p), \
846 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +0800847
Logan Chien55afb0a2018-10-15 10:42:14 +0800848#define _mm256_mask_cmp_epu64_mask(m, a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800849 ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
850 (__v4di)(__m256i)(b), (int)(p), \
851 (__mmask8)(m)))
Logan Chien2833ffb2018-10-09 10:03:24 +0800852
Logan Chien55afb0a2018-10-15 10:42:14 +0800853#define _mm256_cmp_ps_mask(a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800854 ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
855 (__v8sf)(__m256)(b), (int)(p), \
856 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +0800857
Logan Chien55afb0a2018-10-15 10:42:14 +0800858#define _mm256_mask_cmp_ps_mask(m, a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800859 ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
860 (__v8sf)(__m256)(b), (int)(p), \
861 (__mmask8)(m)))
Logan Chien2833ffb2018-10-09 10:03:24 +0800862
Logan Chien55afb0a2018-10-15 10:42:14 +0800863#define _mm256_cmp_pd_mask(a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800864 ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
865 (__v4df)(__m256d)(b), (int)(p), \
866 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +0800867
Logan Chien55afb0a2018-10-15 10:42:14 +0800868#define _mm256_mask_cmp_pd_mask(m, a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800869 ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
870 (__v4df)(__m256d)(b), (int)(p), \
871 (__mmask8)(m)))
Logan Chien2833ffb2018-10-09 10:03:24 +0800872
Logan Chien55afb0a2018-10-15 10:42:14 +0800873#define _mm_cmp_ps_mask(a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800874 ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
875 (__v4sf)(__m128)(b), (int)(p), \
876 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +0800877
Logan Chien55afb0a2018-10-15 10:42:14 +0800878#define _mm_mask_cmp_ps_mask(m, a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800879 ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
880 (__v4sf)(__m128)(b), (int)(p), \
881 (__mmask8)(m)))
Logan Chien2833ffb2018-10-09 10:03:24 +0800882
Logan Chien55afb0a2018-10-15 10:42:14 +0800883#define _mm_cmp_pd_mask(a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800884 ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
885 (__v2df)(__m128d)(b), (int)(p), \
886 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +0800887
Logan Chien55afb0a2018-10-15 10:42:14 +0800888#define _mm_mask_cmp_pd_mask(m, a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800889 ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
890 (__v2df)(__m128d)(b), (int)(p), \
891 (__mmask8)(m)))
Logan Chien2833ffb2018-10-09 10:03:24 +0800892
Logan Chien55afb0a2018-10-15 10:42:14 +0800893static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800894_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
895{
Logan Chien55afb0a2018-10-15 10:42:14 +0800896 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
897 __builtin_ia32_vfmaddpd ((__v2df) __A,
898 (__v2df) __B,
899 (__v2df) __C),
900 (__v2df) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +0800901}
902
Logan Chien55afb0a2018-10-15 10:42:14 +0800903static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800904_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
905{
Logan Chien55afb0a2018-10-15 10:42:14 +0800906 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
907 __builtin_ia32_vfmaddpd ((__v2df) __A,
908 (__v2df) __B,
909 (__v2df) __C),
910 (__v2df) __C);
Logan Chien2833ffb2018-10-09 10:03:24 +0800911}
912
Logan Chien55afb0a2018-10-15 10:42:14 +0800913static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800914_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
915{
Logan Chien55afb0a2018-10-15 10:42:14 +0800916 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
917 __builtin_ia32_vfmaddpd ((__v2df) __A,
918 (__v2df) __B,
919 (__v2df) __C),
920 (__v2df)_mm_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +0800921}
922
Logan Chien55afb0a2018-10-15 10:42:14 +0800923static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800924_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
925{
Logan Chien55afb0a2018-10-15 10:42:14 +0800926 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
927 __builtin_ia32_vfmaddpd ((__v2df) __A,
928 (__v2df) __B,
929 -(__v2df) __C),
930 (__v2df) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +0800931}
932
Logan Chien55afb0a2018-10-15 10:42:14 +0800933static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800934_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
935{
Logan Chien55afb0a2018-10-15 10:42:14 +0800936 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
937 __builtin_ia32_vfmaddpd ((__v2df) __A,
938 (__v2df) __B,
939 -(__v2df) __C),
940 (__v2df)_mm_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +0800941}
942
Logan Chien55afb0a2018-10-15 10:42:14 +0800943static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800944_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
945{
Logan Chien55afb0a2018-10-15 10:42:14 +0800946 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
947 __builtin_ia32_vfmaddpd (-(__v2df) __A,
948 (__v2df) __B,
949 (__v2df) __C),
950 (__v2df) __C);
Logan Chien2833ffb2018-10-09 10:03:24 +0800951}
952
Logan Chien55afb0a2018-10-15 10:42:14 +0800953static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800954_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
955{
Logan Chien55afb0a2018-10-15 10:42:14 +0800956 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
957 __builtin_ia32_vfmaddpd (-(__v2df) __A,
958 (__v2df) __B,
959 (__v2df) __C),
960 (__v2df)_mm_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +0800961}
962
Logan Chien55afb0a2018-10-15 10:42:14 +0800963static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800964_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
965{
Logan Chien55afb0a2018-10-15 10:42:14 +0800966 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
967 __builtin_ia32_vfmaddpd (-(__v2df) __A,
968 (__v2df) __B,
969 -(__v2df) __C),
970 (__v2df)_mm_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +0800971}
972
Logan Chien55afb0a2018-10-15 10:42:14 +0800973static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800974_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
975{
Logan Chien55afb0a2018-10-15 10:42:14 +0800976 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
977 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
978 (__v4df) __B,
979 (__v4df) __C),
980 (__v4df) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +0800981}
982
Logan Chien55afb0a2018-10-15 10:42:14 +0800983static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800984_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
985{
Logan Chien55afb0a2018-10-15 10:42:14 +0800986 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
987 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
988 (__v4df) __B,
989 (__v4df) __C),
990 (__v4df) __C);
Logan Chien2833ffb2018-10-09 10:03:24 +0800991}
992
Logan Chien55afb0a2018-10-15 10:42:14 +0800993static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800994_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
995{
Logan Chien55afb0a2018-10-15 10:42:14 +0800996 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
997 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
998 (__v4df) __B,
999 (__v4df) __C),
1000 (__v4df)_mm256_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08001001}
1002
Logan Chien55afb0a2018-10-15 10:42:14 +08001003static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001004_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1005{
Logan Chien55afb0a2018-10-15 10:42:14 +08001006 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1007 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1008 (__v4df) __B,
1009 -(__v4df) __C),
1010 (__v4df) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +08001011}
1012
Logan Chien55afb0a2018-10-15 10:42:14 +08001013static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001014_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1015{
Logan Chien55afb0a2018-10-15 10:42:14 +08001016 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1017 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1018 (__v4df) __B,
1019 -(__v4df) __C),
1020 (__v4df)_mm256_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08001021}
1022
Logan Chien55afb0a2018-10-15 10:42:14 +08001023static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001024_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1025{
Logan Chien55afb0a2018-10-15 10:42:14 +08001026 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1027 __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1028 (__v4df) __B,
1029 (__v4df) __C),
1030 (__v4df) __C);
Logan Chien2833ffb2018-10-09 10:03:24 +08001031}
1032
Logan Chien55afb0a2018-10-15 10:42:14 +08001033static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001034_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1035{
Logan Chien55afb0a2018-10-15 10:42:14 +08001036 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1037 __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1038 (__v4df) __B,
1039 (__v4df) __C),
1040 (__v4df)_mm256_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08001041}
1042
Logan Chien55afb0a2018-10-15 10:42:14 +08001043static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001044_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1045{
Logan Chien55afb0a2018-10-15 10:42:14 +08001046 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1047 __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1048 (__v4df) __B,
1049 -(__v4df) __C),
1050 (__v4df)_mm256_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08001051}
1052
Logan Chien55afb0a2018-10-15 10:42:14 +08001053static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001054_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1055{
Logan Chien55afb0a2018-10-15 10:42:14 +08001056 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1057 __builtin_ia32_vfmaddps ((__v4sf) __A,
1058 (__v4sf) __B,
1059 (__v4sf) __C),
1060 (__v4sf) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +08001061}
1062
Logan Chien55afb0a2018-10-15 10:42:14 +08001063static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001064_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1065{
Logan Chien55afb0a2018-10-15 10:42:14 +08001066 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1067 __builtin_ia32_vfmaddps ((__v4sf) __A,
1068 (__v4sf) __B,
1069 (__v4sf) __C),
1070 (__v4sf) __C);
Logan Chien2833ffb2018-10-09 10:03:24 +08001071}
1072
Logan Chien55afb0a2018-10-15 10:42:14 +08001073static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001074_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1075{
Logan Chien55afb0a2018-10-15 10:42:14 +08001076 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1077 __builtin_ia32_vfmaddps ((__v4sf) __A,
1078 (__v4sf) __B,
1079 (__v4sf) __C),
1080 (__v4sf)_mm_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08001081}
1082
Logan Chien55afb0a2018-10-15 10:42:14 +08001083static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001084_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1085{
Logan Chien55afb0a2018-10-15 10:42:14 +08001086 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1087 __builtin_ia32_vfmaddps ((__v4sf) __A,
1088 (__v4sf) __B,
1089 -(__v4sf) __C),
1090 (__v4sf) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +08001091}
1092
Logan Chien55afb0a2018-10-15 10:42:14 +08001093static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001094_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1095{
Logan Chien55afb0a2018-10-15 10:42:14 +08001096 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1097 __builtin_ia32_vfmaddps ((__v4sf) __A,
1098 (__v4sf) __B,
1099 -(__v4sf) __C),
1100 (__v4sf)_mm_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08001101}
1102
Logan Chien55afb0a2018-10-15 10:42:14 +08001103static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001104_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1105{
Logan Chien55afb0a2018-10-15 10:42:14 +08001106 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1107 __builtin_ia32_vfmaddps (-(__v4sf) __A,
1108 (__v4sf) __B,
1109 (__v4sf) __C),
1110 (__v4sf) __C);
Logan Chien2833ffb2018-10-09 10:03:24 +08001111}
1112
Logan Chien55afb0a2018-10-15 10:42:14 +08001113static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001114_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1115{
Logan Chien55afb0a2018-10-15 10:42:14 +08001116 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1117 __builtin_ia32_vfmaddps (-(__v4sf) __A,
1118 (__v4sf) __B,
1119 (__v4sf) __C),
1120 (__v4sf)_mm_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08001121}
1122
Logan Chien55afb0a2018-10-15 10:42:14 +08001123static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001124_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1125{
Logan Chien55afb0a2018-10-15 10:42:14 +08001126 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1127 __builtin_ia32_vfmaddps (-(__v4sf) __A,
1128 (__v4sf) __B,
1129 -(__v4sf) __C),
1130 (__v4sf)_mm_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08001131}
1132
Logan Chien55afb0a2018-10-15 10:42:14 +08001133static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001134_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1135{
Logan Chien55afb0a2018-10-15 10:42:14 +08001136 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1137 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1138 (__v8sf) __B,
1139 (__v8sf) __C),
1140 (__v8sf) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +08001141}
1142
Logan Chien55afb0a2018-10-15 10:42:14 +08001143static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001144_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1145{
Logan Chien55afb0a2018-10-15 10:42:14 +08001146 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1147 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1148 (__v8sf) __B,
1149 (__v8sf) __C),
1150 (__v8sf) __C);
Logan Chien2833ffb2018-10-09 10:03:24 +08001151}
1152
Logan Chien55afb0a2018-10-15 10:42:14 +08001153static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001154_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1155{
Logan Chien55afb0a2018-10-15 10:42:14 +08001156 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1157 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1158 (__v8sf) __B,
1159 (__v8sf) __C),
1160 (__v8sf)_mm256_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08001161}
1162
Logan Chien55afb0a2018-10-15 10:42:14 +08001163static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001164_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1165{
Logan Chien55afb0a2018-10-15 10:42:14 +08001166 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1167 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1168 (__v8sf) __B,
1169 -(__v8sf) __C),
1170 (__v8sf) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +08001171}
1172
Logan Chien55afb0a2018-10-15 10:42:14 +08001173static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001174_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1175{
Logan Chien55afb0a2018-10-15 10:42:14 +08001176 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1177 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1178 (__v8sf) __B,
1179 -(__v8sf) __C),
1180 (__v8sf)_mm256_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08001181}
1182
Logan Chien55afb0a2018-10-15 10:42:14 +08001183static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001184_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1185{
Logan Chien55afb0a2018-10-15 10:42:14 +08001186 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1187 __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1188 (__v8sf) __B,
1189 (__v8sf) __C),
1190 (__v8sf) __C);
Logan Chien2833ffb2018-10-09 10:03:24 +08001191}
1192
Logan Chien55afb0a2018-10-15 10:42:14 +08001193static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001194_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1195{
Logan Chien55afb0a2018-10-15 10:42:14 +08001196 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1197 __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1198 (__v8sf) __B,
1199 (__v8sf) __C),
1200 (__v8sf)_mm256_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08001201}
1202
Logan Chien55afb0a2018-10-15 10:42:14 +08001203static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001204_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1205{
Logan Chien55afb0a2018-10-15 10:42:14 +08001206 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1207 __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1208 (__v8sf) __B,
1209 -(__v8sf) __C),
1210 (__v8sf)_mm256_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08001211}
1212
Logan Chien55afb0a2018-10-15 10:42:14 +08001213static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001214_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1215{
Logan Chien55afb0a2018-10-15 10:42:14 +08001216 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1217 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1218 (__v2df) __B,
1219 (__v2df) __C),
1220 (__v2df) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +08001221}
1222
Logan Chien55afb0a2018-10-15 10:42:14 +08001223static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001224_mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1225{
Logan Chien55afb0a2018-10-15 10:42:14 +08001226 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1227 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1228 (__v2df) __B,
1229 (__v2df) __C),
1230 (__v2df) __C);
Logan Chien2833ffb2018-10-09 10:03:24 +08001231}
1232
Logan Chien55afb0a2018-10-15 10:42:14 +08001233static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001234_mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1235{
Logan Chien55afb0a2018-10-15 10:42:14 +08001236 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1237 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1238 (__v2df) __B,
1239 (__v2df) __C),
1240 (__v2df)_mm_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08001241}
1242
Logan Chien55afb0a2018-10-15 10:42:14 +08001243static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001244_mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1245{
Logan Chien55afb0a2018-10-15 10:42:14 +08001246 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1247 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1248 (__v2df) __B,
1249 -(__v2df) __C),
1250 (__v2df) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +08001251}
1252
Logan Chien55afb0a2018-10-15 10:42:14 +08001253static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001254_mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1255{
Logan Chien55afb0a2018-10-15 10:42:14 +08001256 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1257 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1258 (__v2df) __B,
1259 -(__v2df) __C),
1260 (__v2df)_mm_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08001261}
1262
Logan Chien55afb0a2018-10-15 10:42:14 +08001263static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001264_mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1265{
Logan Chien55afb0a2018-10-15 10:42:14 +08001266 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1267 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1268 (__v4df) __B,
1269 (__v4df) __C),
1270 (__v4df) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +08001271}
1272
Logan Chien55afb0a2018-10-15 10:42:14 +08001273static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001274_mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1275{
Logan Chien55afb0a2018-10-15 10:42:14 +08001276 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1277 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1278 (__v4df) __B,
1279 (__v4df) __C),
1280 (__v4df) __C);
Logan Chien2833ffb2018-10-09 10:03:24 +08001281}
1282
Logan Chien55afb0a2018-10-15 10:42:14 +08001283static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001284_mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1285{
Logan Chien55afb0a2018-10-15 10:42:14 +08001286 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1287 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1288 (__v4df) __B,
1289 (__v4df) __C),
1290 (__v4df)_mm256_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08001291}
1292
Logan Chien55afb0a2018-10-15 10:42:14 +08001293static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001294_mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1295{
Logan Chien55afb0a2018-10-15 10:42:14 +08001296 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1297 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1298 (__v4df) __B,
1299 -(__v4df) __C),
1300 (__v4df) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +08001301}
1302
Logan Chien55afb0a2018-10-15 10:42:14 +08001303static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001304_mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1305{
Logan Chien55afb0a2018-10-15 10:42:14 +08001306 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1307 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1308 (__v4df) __B,
1309 -(__v4df) __C),
1310 (__v4df)_mm256_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08001311}
1312
Logan Chien55afb0a2018-10-15 10:42:14 +08001313static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001314_mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1315{
Logan Chien55afb0a2018-10-15 10:42:14 +08001316 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1317 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1318 (__v4sf) __B,
1319 (__v4sf) __C),
1320 (__v4sf) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +08001321}
1322
Logan Chien55afb0a2018-10-15 10:42:14 +08001323static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001324_mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1325{
Logan Chien55afb0a2018-10-15 10:42:14 +08001326 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1327 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1328 (__v4sf) __B,
1329 (__v4sf) __C),
1330 (__v4sf) __C);
Logan Chien2833ffb2018-10-09 10:03:24 +08001331}
1332
Logan Chien55afb0a2018-10-15 10:42:14 +08001333static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001334_mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1335{
Logan Chien55afb0a2018-10-15 10:42:14 +08001336 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1337 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1338 (__v4sf) __B,
1339 (__v4sf) __C),
1340 (__v4sf)_mm_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08001341}
1342
Logan Chien55afb0a2018-10-15 10:42:14 +08001343static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001344_mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1345{
Logan Chien55afb0a2018-10-15 10:42:14 +08001346 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1347 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1348 (__v4sf) __B,
1349 -(__v4sf) __C),
1350 (__v4sf) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +08001351}
1352
Logan Chien55afb0a2018-10-15 10:42:14 +08001353static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001354_mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1355{
Logan Chien55afb0a2018-10-15 10:42:14 +08001356 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1357 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1358 (__v4sf) __B,
1359 -(__v4sf) __C),
1360 (__v4sf)_mm_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08001361}
1362
Logan Chien55afb0a2018-10-15 10:42:14 +08001363static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001364_mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B,
1365 __m256 __C)
1366{
Logan Chien55afb0a2018-10-15 10:42:14 +08001367 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1368 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1369 (__v8sf) __B,
1370 (__v8sf) __C),
1371 (__v8sf) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +08001372}
1373
Logan Chien55afb0a2018-10-15 10:42:14 +08001374static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001375_mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1376{
Logan Chien55afb0a2018-10-15 10:42:14 +08001377 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1378 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1379 (__v8sf) __B,
1380 (__v8sf) __C),
1381 (__v8sf) __C);
Logan Chien2833ffb2018-10-09 10:03:24 +08001382}
1383
Logan Chien55afb0a2018-10-15 10:42:14 +08001384static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001385_mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1386{
Logan Chien55afb0a2018-10-15 10:42:14 +08001387 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1388 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1389 (__v8sf) __B,
1390 (__v8sf) __C),
1391 (__v8sf)_mm256_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08001392}
1393
Logan Chien55afb0a2018-10-15 10:42:14 +08001394static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001395_mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1396{
Logan Chien55afb0a2018-10-15 10:42:14 +08001397 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1398 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1399 (__v8sf) __B,
1400 -(__v8sf) __C),
1401 (__v8sf) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +08001402}
1403
Logan Chien55afb0a2018-10-15 10:42:14 +08001404static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001405_mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1406{
Logan Chien55afb0a2018-10-15 10:42:14 +08001407 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1408 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1409 (__v8sf) __B,
1410 -(__v8sf) __C),
1411 (__v8sf)_mm256_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08001412}
1413
Logan Chien55afb0a2018-10-15 10:42:14 +08001414static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001415_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1416{
Logan Chien55afb0a2018-10-15 10:42:14 +08001417 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1418 __builtin_ia32_vfmaddpd ((__v2df) __A,
1419 (__v2df) __B,
1420 -(__v2df) __C),
1421 (__v2df) __C);
Logan Chien2833ffb2018-10-09 10:03:24 +08001422}
1423
Logan Chien55afb0a2018-10-15 10:42:14 +08001424static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001425_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1426{
Logan Chien55afb0a2018-10-15 10:42:14 +08001427 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1428 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1429 (__v4df) __B,
1430 -(__v4df) __C),
1431 (__v4df) __C);
Logan Chien2833ffb2018-10-09 10:03:24 +08001432}
1433
Logan Chien55afb0a2018-10-15 10:42:14 +08001434static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001435_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1436{
Logan Chien55afb0a2018-10-15 10:42:14 +08001437 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1438 __builtin_ia32_vfmaddps ((__v4sf) __A,
1439 (__v4sf) __B,
1440 -(__v4sf) __C),
1441 (__v4sf) __C);
Logan Chien2833ffb2018-10-09 10:03:24 +08001442}
1443
Logan Chien55afb0a2018-10-15 10:42:14 +08001444static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001445_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1446{
Logan Chien55afb0a2018-10-15 10:42:14 +08001447 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1448 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1449 (__v8sf) __B,
1450 -(__v8sf) __C),
1451 (__v8sf) __C);
Logan Chien2833ffb2018-10-09 10:03:24 +08001452}
1453
Logan Chien55afb0a2018-10-15 10:42:14 +08001454static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001455_mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1456{
Logan Chien55afb0a2018-10-15 10:42:14 +08001457 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1458 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1459 (__v2df) __B,
1460 -(__v2df) __C),
1461 (__v2df) __C);
Logan Chien2833ffb2018-10-09 10:03:24 +08001462}
1463
Logan Chien55afb0a2018-10-15 10:42:14 +08001464static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001465_mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1466{
Logan Chien55afb0a2018-10-15 10:42:14 +08001467 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1468 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1469 (__v4df) __B,
1470 -(__v4df) __C),
1471 (__v4df) __C);
Logan Chien2833ffb2018-10-09 10:03:24 +08001472}
1473
Logan Chien55afb0a2018-10-15 10:42:14 +08001474static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001475_mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1476{
Logan Chien55afb0a2018-10-15 10:42:14 +08001477 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1478 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1479 (__v4sf) __B,
1480 -(__v4sf) __C),
1481 (__v4sf) __C);
Logan Chien2833ffb2018-10-09 10:03:24 +08001482}
1483
Logan Chien55afb0a2018-10-15 10:42:14 +08001484static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001485_mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1486{
Logan Chien55afb0a2018-10-15 10:42:14 +08001487 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1488 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1489 (__v8sf) __B,
1490 -(__v8sf) __C),
1491 (__v8sf) __C);
Logan Chien2833ffb2018-10-09 10:03:24 +08001492}
1493
Logan Chien55afb0a2018-10-15 10:42:14 +08001494static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001495_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1496{
Logan Chien55afb0a2018-10-15 10:42:14 +08001497 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1498 __builtin_ia32_vfmaddpd ((__v2df) __A,
1499 -(__v2df) __B,
1500 (__v2df) __C),
1501 (__v2df) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +08001502}
1503
Logan Chien55afb0a2018-10-15 10:42:14 +08001504static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001505_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1506{
Logan Chien55afb0a2018-10-15 10:42:14 +08001507 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1508 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1509 -(__v4df) __B,
1510 (__v4df) __C),
1511 (__v4df) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +08001512}
1513
Logan Chien55afb0a2018-10-15 10:42:14 +08001514static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001515_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1516{
Logan Chien55afb0a2018-10-15 10:42:14 +08001517 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1518 __builtin_ia32_vfmaddps ((__v4sf) __A,
1519 -(__v4sf) __B,
1520 (__v4sf) __C),
1521 (__v4sf) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +08001522}
1523
Logan Chien55afb0a2018-10-15 10:42:14 +08001524static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001525_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1526{
Logan Chien55afb0a2018-10-15 10:42:14 +08001527 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1528 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1529 -(__v8sf) __B,
1530 (__v8sf) __C),
1531 (__v8sf) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +08001532}
1533
Logan Chien55afb0a2018-10-15 10:42:14 +08001534static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001535_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1536{
Logan Chien55afb0a2018-10-15 10:42:14 +08001537 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1538 __builtin_ia32_vfmaddpd ((__v2df) __A,
1539 -(__v2df) __B,
1540 -(__v2df) __C),
1541 (__v2df) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +08001542}
1543
Logan Chien55afb0a2018-10-15 10:42:14 +08001544static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001545_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1546{
Logan Chien55afb0a2018-10-15 10:42:14 +08001547 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1548 __builtin_ia32_vfmaddpd ((__v2df) __A,
1549 -(__v2df) __B,
1550 -(__v2df) __C),
1551 (__v2df) __C);
Logan Chien2833ffb2018-10-09 10:03:24 +08001552}
1553
Logan Chien55afb0a2018-10-15 10:42:14 +08001554static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001555_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1556{
Logan Chien55afb0a2018-10-15 10:42:14 +08001557 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1558 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1559 -(__v4df) __B,
1560 -(__v4df) __C),
1561 (__v4df) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +08001562}
1563
Logan Chien55afb0a2018-10-15 10:42:14 +08001564static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001565_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1566{
Logan Chien55afb0a2018-10-15 10:42:14 +08001567 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1568 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1569 -(__v4df) __B,
1570 -(__v4df) __C),
1571 (__v4df) __C);
Logan Chien2833ffb2018-10-09 10:03:24 +08001572}
1573
Logan Chien55afb0a2018-10-15 10:42:14 +08001574static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001575_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1576{
Logan Chien55afb0a2018-10-15 10:42:14 +08001577 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1578 __builtin_ia32_vfmaddps ((__v4sf) __A,
1579 -(__v4sf) __B,
1580 -(__v4sf) __C),
1581 (__v4sf) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +08001582}
1583
Logan Chien55afb0a2018-10-15 10:42:14 +08001584static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001585_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1586{
Logan Chien55afb0a2018-10-15 10:42:14 +08001587 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1588 __builtin_ia32_vfmaddps ((__v4sf) __A,
1589 -(__v4sf) __B,
1590 -(__v4sf) __C),
1591 (__v4sf) __C);
Logan Chien2833ffb2018-10-09 10:03:24 +08001592}
1593
Logan Chien55afb0a2018-10-15 10:42:14 +08001594static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001595_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1596{
Logan Chien55afb0a2018-10-15 10:42:14 +08001597 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1598 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1599 -(__v8sf) __B,
1600 -(__v8sf) __C),
1601 (__v8sf) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +08001602}
1603
Logan Chien55afb0a2018-10-15 10:42:14 +08001604static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001605_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1606{
Logan Chien55afb0a2018-10-15 10:42:14 +08001607 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1608 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1609 -(__v8sf) __B,
1610 -(__v8sf) __C),
1611 (__v8sf) __C);
Logan Chien2833ffb2018-10-09 10:03:24 +08001612}
1613
Logan Chien55afb0a2018-10-15 10:42:14 +08001614static __inline__ __m128d __DEFAULT_FN_ATTRS128
1615_mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1616 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1617 (__v2df)_mm_add_pd(__A, __B),
1618 (__v2df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001619}
1620
Logan Chien55afb0a2018-10-15 10:42:14 +08001621static __inline__ __m128d __DEFAULT_FN_ATTRS128
1622_mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) {
1623 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1624 (__v2df)_mm_add_pd(__A, __B),
1625 (__v2df)_mm_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08001626}
1627
Logan Chien55afb0a2018-10-15 10:42:14 +08001628static __inline__ __m256d __DEFAULT_FN_ATTRS256
1629_mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
1630 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1631 (__v4df)_mm256_add_pd(__A, __B),
1632 (__v4df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001633}
1634
Logan Chien55afb0a2018-10-15 10:42:14 +08001635static __inline__ __m256d __DEFAULT_FN_ATTRS256
1636_mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) {
1637 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1638 (__v4df)_mm256_add_pd(__A, __B),
1639 (__v4df)_mm256_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08001640}
1641
Logan Chien55afb0a2018-10-15 10:42:14 +08001642static __inline__ __m128 __DEFAULT_FN_ATTRS128
1643_mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
1644 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1645 (__v4sf)_mm_add_ps(__A, __B),
1646 (__v4sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001647}
1648
Logan Chien55afb0a2018-10-15 10:42:14 +08001649static __inline__ __m128 __DEFAULT_FN_ATTRS128
1650_mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) {
1651 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1652 (__v4sf)_mm_add_ps(__A, __B),
1653 (__v4sf)_mm_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08001654}
1655
Logan Chien55afb0a2018-10-15 10:42:14 +08001656static __inline__ __m256 __DEFAULT_FN_ATTRS256
1657_mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
1658 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1659 (__v8sf)_mm256_add_ps(__A, __B),
1660 (__v8sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001661}
1662
Logan Chien55afb0a2018-10-15 10:42:14 +08001663static __inline__ __m256 __DEFAULT_FN_ATTRS256
1664_mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) {
1665 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1666 (__v8sf)_mm256_add_ps(__A, __B),
1667 (__v8sf)_mm256_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08001668}
1669
Logan Chien55afb0a2018-10-15 10:42:14 +08001670static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001671_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) {
1672 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
1673 (__v4si) __W,
1674 (__v4si) __A);
1675}
1676
Logan Chien55afb0a2018-10-15 10:42:14 +08001677static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001678_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) {
1679 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
1680 (__v8si) __W,
1681 (__v8si) __A);
1682}
1683
Logan Chien55afb0a2018-10-15 10:42:14 +08001684static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001685_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) {
1686 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
1687 (__v2df) __W,
1688 (__v2df) __A);
1689}
1690
Logan Chien55afb0a2018-10-15 10:42:14 +08001691static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001692_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) {
1693 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
1694 (__v4df) __W,
1695 (__v4df) __A);
1696}
1697
Logan Chien55afb0a2018-10-15 10:42:14 +08001698static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001699_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) {
1700 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
1701 (__v4sf) __W,
1702 (__v4sf) __A);
1703}
1704
Logan Chien55afb0a2018-10-15 10:42:14 +08001705static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001706_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) {
1707 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
1708 (__v8sf) __W,
1709 (__v8sf) __A);
1710}
1711
Logan Chien55afb0a2018-10-15 10:42:14 +08001712static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001713_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) {
1714 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
1715 (__v2di) __W,
1716 (__v2di) __A);
1717}
1718
Logan Chien55afb0a2018-10-15 10:42:14 +08001719static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001720_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) {
1721 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
1722 (__v4di) __W,
1723 (__v4di) __A);
1724}
1725
Logan Chien55afb0a2018-10-15 10:42:14 +08001726static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001727_mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) {
1728 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1729 (__v2df) __W,
1730 (__mmask8) __U);
1731}
1732
Logan Chien55afb0a2018-10-15 10:42:14 +08001733static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001734_mm_maskz_compress_pd (__mmask8 __U, __m128d __A) {
1735 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1736 (__v2df)
1737 _mm_setzero_pd (),
1738 (__mmask8) __U);
1739}
1740
Logan Chien55afb0a2018-10-15 10:42:14 +08001741static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001742_mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) {
1743 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1744 (__v4df) __W,
1745 (__mmask8) __U);
1746}
1747
Logan Chien55afb0a2018-10-15 10:42:14 +08001748static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001749_mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) {
1750 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1751 (__v4df)
1752 _mm256_setzero_pd (),
1753 (__mmask8) __U);
1754}
1755
Logan Chien55afb0a2018-10-15 10:42:14 +08001756static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001757_mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
1758 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1759 (__v2di) __W,
1760 (__mmask8) __U);
1761}
1762
Logan Chien55afb0a2018-10-15 10:42:14 +08001763static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001764_mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) {
1765 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1766 (__v2di)
1767 _mm_setzero_si128 (),
1768 (__mmask8) __U);
1769}
1770
Logan Chien55afb0a2018-10-15 10:42:14 +08001771static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001772_mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
1773 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1774 (__v4di) __W,
1775 (__mmask8) __U);
1776}
1777
Logan Chien55afb0a2018-10-15 10:42:14 +08001778static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001779_mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) {
1780 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1781 (__v4di)
1782 _mm256_setzero_si256 (),
1783 (__mmask8) __U);
1784}
1785
Logan Chien55afb0a2018-10-15 10:42:14 +08001786static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001787_mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) {
1788 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1789 (__v4sf) __W,
1790 (__mmask8) __U);
1791}
1792
Logan Chien55afb0a2018-10-15 10:42:14 +08001793static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001794_mm_maskz_compress_ps (__mmask8 __U, __m128 __A) {
1795 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1796 (__v4sf)
1797 _mm_setzero_ps (),
1798 (__mmask8) __U);
1799}
1800
Logan Chien55afb0a2018-10-15 10:42:14 +08001801static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001802_mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) {
1803 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1804 (__v8sf) __W,
1805 (__mmask8) __U);
1806}
1807
Logan Chien55afb0a2018-10-15 10:42:14 +08001808static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001809_mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) {
1810 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1811 (__v8sf)
1812 _mm256_setzero_ps (),
1813 (__mmask8) __U);
1814}
1815
Logan Chien55afb0a2018-10-15 10:42:14 +08001816static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001817_mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
1818 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1819 (__v4si) __W,
1820 (__mmask8) __U);
1821}
1822
Logan Chien55afb0a2018-10-15 10:42:14 +08001823static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001824_mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) {
1825 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1826 (__v4si)
1827 _mm_setzero_si128 (),
1828 (__mmask8) __U);
1829}
1830
Logan Chien55afb0a2018-10-15 10:42:14 +08001831static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001832_mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
1833 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1834 (__v8si) __W,
1835 (__mmask8) __U);
1836}
1837
Logan Chien55afb0a2018-10-15 10:42:14 +08001838static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001839_mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) {
1840 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1841 (__v8si)
1842 _mm256_setzero_si256 (),
1843 (__mmask8) __U);
1844}
1845
Logan Chien55afb0a2018-10-15 10:42:14 +08001846static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001847_mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) {
1848 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
1849 (__v2df) __A,
1850 (__mmask8) __U);
1851}
1852
Logan Chien55afb0a2018-10-15 10:42:14 +08001853static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001854_mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) {
1855 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
1856 (__v4df) __A,
1857 (__mmask8) __U);
1858}
1859
Logan Chien55afb0a2018-10-15 10:42:14 +08001860static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001861_mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) {
1862 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
1863 (__v2di) __A,
1864 (__mmask8) __U);
1865}
1866
Logan Chien55afb0a2018-10-15 10:42:14 +08001867static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001868_mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) {
1869 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
1870 (__v4di) __A,
1871 (__mmask8) __U);
1872}
1873
Logan Chien55afb0a2018-10-15 10:42:14 +08001874static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001875_mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) {
1876 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
1877 (__v4sf) __A,
1878 (__mmask8) __U);
1879}
1880
Logan Chien55afb0a2018-10-15 10:42:14 +08001881static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001882_mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) {
1883 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
1884 (__v8sf) __A,
1885 (__mmask8) __U);
1886}
1887
Logan Chien55afb0a2018-10-15 10:42:14 +08001888static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001889_mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) {
1890 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
1891 (__v4si) __A,
1892 (__mmask8) __U);
1893}
1894
Logan Chien55afb0a2018-10-15 10:42:14 +08001895static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001896_mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) {
1897 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
1898 (__v8si) __A,
1899 (__mmask8) __U);
1900}
1901
Logan Chien55afb0a2018-10-15 10:42:14 +08001902static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001903_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08001904 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
1905 (__v2df)_mm_cvtepi32_pd(__A),
1906 (__v2df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001907}
1908
Logan Chien55afb0a2018-10-15 10:42:14 +08001909static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001910_mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08001911 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
1912 (__v2df)_mm_cvtepi32_pd(__A),
1913 (__v2df)_mm_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08001914}
1915
Logan Chien55afb0a2018-10-15 10:42:14 +08001916static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001917_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08001918 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
1919 (__v4df)_mm256_cvtepi32_pd(__A),
1920 (__v4df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001921}
1922
Logan Chien55afb0a2018-10-15 10:42:14 +08001923static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001924_mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08001925 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
1926 (__v4df)_mm256_cvtepi32_pd(__A),
1927 (__v4df)_mm256_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08001928}
1929
Logan Chien55afb0a2018-10-15 10:42:14 +08001930static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001931_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08001932 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1933 (__v4sf)_mm_cvtepi32_ps(__A),
1934 (__v4sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001935}
1936
Logan Chien55afb0a2018-10-15 10:42:14 +08001937static __inline__ __m128 __DEFAULT_FN_ATTRS128
1938_mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A) {
1939 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1940 (__v4sf)_mm_cvtepi32_ps(__A),
1941 (__v4sf)_mm_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08001942}
1943
Logan Chien55afb0a2018-10-15 10:42:14 +08001944static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001945_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08001946 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1947 (__v8sf)_mm256_cvtepi32_ps(__A),
1948 (__v8sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001949}
1950
Logan Chien55afb0a2018-10-15 10:42:14 +08001951static __inline__ __m256 __DEFAULT_FN_ATTRS256
1952_mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A) {
1953 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1954 (__v8sf)_mm256_cvtepi32_ps(__A),
1955 (__v8sf)_mm256_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08001956}
1957
Logan Chien55afb0a2018-10-15 10:42:14 +08001958static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001959_mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
1960 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1961 (__v4si) __W,
1962 (__mmask8) __U);
1963}
1964
Logan Chien55afb0a2018-10-15 10:42:14 +08001965static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001966_mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) {
1967 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1968 (__v4si)
1969 _mm_setzero_si128 (),
1970 (__mmask8) __U);
1971}
1972
Logan Chien55afb0a2018-10-15 10:42:14 +08001973static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001974_mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08001975 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1976 (__v4si)_mm256_cvtpd_epi32(__A),
1977 (__v4si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001978}
1979
Logan Chien55afb0a2018-10-15 10:42:14 +08001980static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001981_mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08001982 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1983 (__v4si)_mm256_cvtpd_epi32(__A),
1984 (__v4si)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08001985}
1986
Logan Chien55afb0a2018-10-15 10:42:14 +08001987static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001988_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) {
1989 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
1990 (__v4sf) __W,
1991 (__mmask8) __U);
1992}
1993
Logan Chien55afb0a2018-10-15 10:42:14 +08001994static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001995_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
1996 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
1997 (__v4sf)
1998 _mm_setzero_ps (),
1999 (__mmask8) __U);
2000}
2001
Logan Chien55afb0a2018-10-15 10:42:14 +08002002static __inline__ __m128 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002003_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002004 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2005 (__v4sf)_mm256_cvtpd_ps(__A),
2006 (__v4sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002007}
2008
Logan Chien55afb0a2018-10-15 10:42:14 +08002009static __inline__ __m128 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002010_mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002011 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2012 (__v4sf)_mm256_cvtpd_ps(__A),
2013 (__v4sf)_mm_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08002014}
2015
Logan Chien55afb0a2018-10-15 10:42:14 +08002016static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002017_mm_cvtpd_epu32 (__m128d __A) {
2018 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2019 (__v4si)
2020 _mm_setzero_si128 (),
2021 (__mmask8) -1);
2022}
2023
Logan Chien55afb0a2018-10-15 10:42:14 +08002024static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002025_mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
2026 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2027 (__v4si) __W,
2028 (__mmask8) __U);
2029}
2030
Logan Chien55afb0a2018-10-15 10:42:14 +08002031static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002032_mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) {
2033 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2034 (__v4si)
2035 _mm_setzero_si128 (),
2036 (__mmask8) __U);
2037}
2038
Logan Chien55afb0a2018-10-15 10:42:14 +08002039static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002040_mm256_cvtpd_epu32 (__m256d __A) {
2041 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2042 (__v4si)
2043 _mm_setzero_si128 (),
2044 (__mmask8) -1);
2045}
2046
Logan Chien55afb0a2018-10-15 10:42:14 +08002047static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002048_mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2049 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2050 (__v4si) __W,
2051 (__mmask8) __U);
2052}
2053
Logan Chien55afb0a2018-10-15 10:42:14 +08002054static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002055_mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) {
2056 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2057 (__v4si)
2058 _mm_setzero_si128 (),
2059 (__mmask8) __U);
2060}
2061
Logan Chien55afb0a2018-10-15 10:42:14 +08002062static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002063_mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002064 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2065 (__v4si)_mm_cvtps_epi32(__A),
2066 (__v4si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002067}
2068
Logan Chien55afb0a2018-10-15 10:42:14 +08002069static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002070_mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002071 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2072 (__v4si)_mm_cvtps_epi32(__A),
2073 (__v4si)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08002074}
2075
Logan Chien55afb0a2018-10-15 10:42:14 +08002076static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002077_mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002078 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2079 (__v8si)_mm256_cvtps_epi32(__A),
2080 (__v8si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002081}
2082
Logan Chien55afb0a2018-10-15 10:42:14 +08002083static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002084_mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002085 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2086 (__v8si)_mm256_cvtps_epi32(__A),
2087 (__v8si)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08002088}
2089
Logan Chien55afb0a2018-10-15 10:42:14 +08002090static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002091_mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002092 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2093 (__v2df)_mm_cvtps_pd(__A),
2094 (__v2df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002095}
2096
Logan Chien55afb0a2018-10-15 10:42:14 +08002097static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002098_mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002099 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2100 (__v2df)_mm_cvtps_pd(__A),
2101 (__v2df)_mm_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08002102}
2103
Logan Chien55afb0a2018-10-15 10:42:14 +08002104static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002105_mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002106 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2107 (__v4df)_mm256_cvtps_pd(__A),
2108 (__v4df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002109}
2110
Logan Chien55afb0a2018-10-15 10:42:14 +08002111static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002112_mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002113 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2114 (__v4df)_mm256_cvtps_pd(__A),
2115 (__v4df)_mm256_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08002116}
2117
Logan Chien55afb0a2018-10-15 10:42:14 +08002118static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002119_mm_cvtps_epu32 (__m128 __A) {
2120 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2121 (__v4si)
2122 _mm_setzero_si128 (),
2123 (__mmask8) -1);
2124}
2125
Logan Chien55afb0a2018-10-15 10:42:14 +08002126static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002127_mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2128 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2129 (__v4si) __W,
2130 (__mmask8) __U);
2131}
2132
Logan Chien55afb0a2018-10-15 10:42:14 +08002133static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002134_mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) {
2135 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2136 (__v4si)
2137 _mm_setzero_si128 (),
2138 (__mmask8) __U);
2139}
2140
Logan Chien55afb0a2018-10-15 10:42:14 +08002141static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002142_mm256_cvtps_epu32 (__m256 __A) {
2143 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2144 (__v8si)
2145 _mm256_setzero_si256 (),
2146 (__mmask8) -1);
2147}
2148
Logan Chien55afb0a2018-10-15 10:42:14 +08002149static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002150_mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2151 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2152 (__v8si) __W,
2153 (__mmask8) __U);
2154}
2155
Logan Chien55afb0a2018-10-15 10:42:14 +08002156static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002157_mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) {
2158 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2159 (__v8si)
2160 _mm256_setzero_si256 (),
2161 (__mmask8) __U);
2162}
2163
Logan Chien55afb0a2018-10-15 10:42:14 +08002164static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002165_mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
2166 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2167 (__v4si) __W,
2168 (__mmask8) __U);
2169}
2170
Logan Chien55afb0a2018-10-15 10:42:14 +08002171static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002172_mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) {
2173 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2174 (__v4si)
2175 _mm_setzero_si128 (),
2176 (__mmask8) __U);
2177}
2178
Logan Chien55afb0a2018-10-15 10:42:14 +08002179static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002180_mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002181 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2182 (__v4si)_mm256_cvttpd_epi32(__A),
2183 (__v4si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002184}
2185
Logan Chien55afb0a2018-10-15 10:42:14 +08002186static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002187_mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002188 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2189 (__v4si)_mm256_cvttpd_epi32(__A),
2190 (__v4si)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08002191}
2192
Logan Chien55afb0a2018-10-15 10:42:14 +08002193static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002194_mm_cvttpd_epu32 (__m128d __A) {
2195 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2196 (__v4si)
2197 _mm_setzero_si128 (),
2198 (__mmask8) -1);
2199}
2200
Logan Chien55afb0a2018-10-15 10:42:14 +08002201static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002202_mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
2203 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2204 (__v4si) __W,
2205 (__mmask8) __U);
2206}
2207
Logan Chien55afb0a2018-10-15 10:42:14 +08002208static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002209_mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) {
2210 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2211 (__v4si)
2212 _mm_setzero_si128 (),
2213 (__mmask8) __U);
2214}
2215
Logan Chien55afb0a2018-10-15 10:42:14 +08002216static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002217_mm256_cvttpd_epu32 (__m256d __A) {
2218 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2219 (__v4si)
2220 _mm_setzero_si128 (),
2221 (__mmask8) -1);
2222}
2223
Logan Chien55afb0a2018-10-15 10:42:14 +08002224static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002225_mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2226 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2227 (__v4si) __W,
2228 (__mmask8) __U);
2229}
2230
Logan Chien55afb0a2018-10-15 10:42:14 +08002231static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002232_mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) {
2233 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2234 (__v4si)
2235 _mm_setzero_si128 (),
2236 (__mmask8) __U);
2237}
2238
Logan Chien55afb0a2018-10-15 10:42:14 +08002239static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002240_mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002241 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2242 (__v4si)_mm_cvttps_epi32(__A),
2243 (__v4si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002244}
2245
Logan Chien55afb0a2018-10-15 10:42:14 +08002246static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002247_mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002248 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2249 (__v4si)_mm_cvttps_epi32(__A),
2250 (__v4si)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08002251}
2252
Logan Chien55afb0a2018-10-15 10:42:14 +08002253static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002254_mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002255 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2256 (__v8si)_mm256_cvttps_epi32(__A),
2257 (__v8si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002258}
2259
Logan Chien55afb0a2018-10-15 10:42:14 +08002260static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002261_mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002262 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2263 (__v8si)_mm256_cvttps_epi32(__A),
2264 (__v8si)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08002265}
2266
Logan Chien55afb0a2018-10-15 10:42:14 +08002267static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002268_mm_cvttps_epu32 (__m128 __A) {
2269 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2270 (__v4si)
2271 _mm_setzero_si128 (),
2272 (__mmask8) -1);
2273}
2274
Logan Chien55afb0a2018-10-15 10:42:14 +08002275static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002276_mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2277 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2278 (__v4si) __W,
2279 (__mmask8) __U);
2280}
2281
Logan Chien55afb0a2018-10-15 10:42:14 +08002282static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002283_mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) {
2284 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2285 (__v4si)
2286 _mm_setzero_si128 (),
2287 (__mmask8) __U);
2288}
2289
Logan Chien55afb0a2018-10-15 10:42:14 +08002290static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002291_mm256_cvttps_epu32 (__m256 __A) {
2292 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2293 (__v8si)
2294 _mm256_setzero_si256 (),
2295 (__mmask8) -1);
2296}
2297
Logan Chien55afb0a2018-10-15 10:42:14 +08002298static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002299_mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2300 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2301 (__v8si) __W,
2302 (__mmask8) __U);
2303}
2304
Logan Chien55afb0a2018-10-15 10:42:14 +08002305static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002306_mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) {
2307 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2308 (__v8si)
2309 _mm256_setzero_si256 (),
2310 (__mmask8) __U);
2311}
2312
Logan Chien55afb0a2018-10-15 10:42:14 +08002313static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002314_mm_cvtepu32_pd (__m128i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002315 return (__m128d) __builtin_convertvector(
2316 __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df);
Logan Chien2833ffb2018-10-09 10:03:24 +08002317}
2318
Logan Chien55afb0a2018-10-15 10:42:14 +08002319static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002320_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002321 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2322 (__v2df)_mm_cvtepu32_pd(__A),
2323 (__v2df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002324}
2325
Logan Chien55afb0a2018-10-15 10:42:14 +08002326static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002327_mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002328 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2329 (__v2df)_mm_cvtepu32_pd(__A),
2330 (__v2df)_mm_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08002331}
2332
Logan Chien55afb0a2018-10-15 10:42:14 +08002333static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002334_mm256_cvtepu32_pd (__m128i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002335 return (__m256d)__builtin_convertvector((__v4su)__A, __v4df);
Logan Chien2833ffb2018-10-09 10:03:24 +08002336}
2337
Logan Chien55afb0a2018-10-15 10:42:14 +08002338static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002339_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002340 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2341 (__v4df)_mm256_cvtepu32_pd(__A),
2342 (__v4df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002343}
2344
Logan Chien55afb0a2018-10-15 10:42:14 +08002345static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002346_mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002347 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2348 (__v4df)_mm256_cvtepu32_pd(__A),
2349 (__v4df)_mm256_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08002350}
2351
Logan Chien55afb0a2018-10-15 10:42:14 +08002352static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002353_mm_cvtepu32_ps (__m128i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002354 return (__m128)__builtin_convertvector((__v4su)__A, __v4sf);
Logan Chien2833ffb2018-10-09 10:03:24 +08002355}
2356
Logan Chien55afb0a2018-10-15 10:42:14 +08002357static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002358_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002359 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2360 (__v4sf)_mm_cvtepu32_ps(__A),
2361 (__v4sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002362}
2363
Logan Chien55afb0a2018-10-15 10:42:14 +08002364static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002365_mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002366 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2367 (__v4sf)_mm_cvtepu32_ps(__A),
2368 (__v4sf)_mm_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08002369}
2370
Logan Chien55afb0a2018-10-15 10:42:14 +08002371static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002372_mm256_cvtepu32_ps (__m256i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002373 return (__m256)__builtin_convertvector((__v8su)__A, __v8sf);
Logan Chien2833ffb2018-10-09 10:03:24 +08002374}
2375
Logan Chien55afb0a2018-10-15 10:42:14 +08002376static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002377_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002378 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2379 (__v8sf)_mm256_cvtepu32_ps(__A),
2380 (__v8sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002381}
2382
Logan Chien55afb0a2018-10-15 10:42:14 +08002383static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002384_mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002385 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2386 (__v8sf)_mm256_cvtepu32_ps(__A),
2387 (__v8sf)_mm256_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08002388}
2389
Logan Chien55afb0a2018-10-15 10:42:14 +08002390static __inline__ __m128d __DEFAULT_FN_ATTRS128
2391_mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2392 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2393 (__v2df)_mm_div_pd(__A, __B),
2394 (__v2df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002395}
2396
Logan Chien55afb0a2018-10-15 10:42:14 +08002397static __inline__ __m128d __DEFAULT_FN_ATTRS128
2398_mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2399 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2400 (__v2df)_mm_div_pd(__A, __B),
2401 (__v2df)_mm_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08002402}
2403
Logan Chien55afb0a2018-10-15 10:42:14 +08002404static __inline__ __m256d __DEFAULT_FN_ATTRS256
2405_mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2406 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2407 (__v4df)_mm256_div_pd(__A, __B),
2408 (__v4df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002409}
2410
Logan Chien55afb0a2018-10-15 10:42:14 +08002411static __inline__ __m256d __DEFAULT_FN_ATTRS256
2412_mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2413 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2414 (__v4df)_mm256_div_pd(__A, __B),
2415 (__v4df)_mm256_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08002416}
2417
Logan Chien55afb0a2018-10-15 10:42:14 +08002418static __inline__ __m128 __DEFAULT_FN_ATTRS128
2419_mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2420 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2421 (__v4sf)_mm_div_ps(__A, __B),
2422 (__v4sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002423}
2424
Logan Chien55afb0a2018-10-15 10:42:14 +08002425static __inline__ __m128 __DEFAULT_FN_ATTRS128
2426_mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2427 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2428 (__v4sf)_mm_div_ps(__A, __B),
2429 (__v4sf)_mm_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08002430}
2431
Logan Chien55afb0a2018-10-15 10:42:14 +08002432static __inline__ __m256 __DEFAULT_FN_ATTRS256
2433_mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2434 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2435 (__v8sf)_mm256_div_ps(__A, __B),
2436 (__v8sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002437}
2438
Logan Chien55afb0a2018-10-15 10:42:14 +08002439static __inline__ __m256 __DEFAULT_FN_ATTRS256
2440_mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2441 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2442 (__v8sf)_mm256_div_ps(__A, __B),
2443 (__v8sf)_mm256_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08002444}
2445
Logan Chien55afb0a2018-10-15 10:42:14 +08002446static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002447_mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2448 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2449 (__v2df) __W,
2450 (__mmask8) __U);
2451}
2452
Logan Chien55afb0a2018-10-15 10:42:14 +08002453static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002454_mm_maskz_expand_pd (__mmask8 __U, __m128d __A) {
2455 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2456 (__v2df)
2457 _mm_setzero_pd (),
2458 (__mmask8) __U);
2459}
2460
Logan Chien55afb0a2018-10-15 10:42:14 +08002461static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002462_mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2463 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2464 (__v4df) __W,
2465 (__mmask8) __U);
2466}
2467
Logan Chien55afb0a2018-10-15 10:42:14 +08002468static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002469_mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) {
2470 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2471 (__v4df)
2472 _mm256_setzero_pd (),
2473 (__mmask8) __U);
2474}
2475
Logan Chien55afb0a2018-10-15 10:42:14 +08002476static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002477_mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
2478 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2479 (__v2di) __W,
2480 (__mmask8) __U);
2481}
2482
Logan Chien55afb0a2018-10-15 10:42:14 +08002483static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002484_mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) {
2485 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2486 (__v2di)
2487 _mm_setzero_si128 (),
2488 (__mmask8) __U);
2489}
2490
Logan Chien55afb0a2018-10-15 10:42:14 +08002491static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002492_mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
2493 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2494 (__v4di) __W,
2495 (__mmask8) __U);
2496}
2497
Logan Chien55afb0a2018-10-15 10:42:14 +08002498static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002499_mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) {
2500 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2501 (__v4di)
2502 _mm256_setzero_si256 (),
2503 (__mmask8) __U);
2504}
2505
Logan Chien55afb0a2018-10-15 10:42:14 +08002506static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002507_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) {
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07002508 return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08002509 (__v2df) __W,
2510 (__mmask8)
2511 __U);
2512}
2513
Logan Chien55afb0a2018-10-15 10:42:14 +08002514static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002515_mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07002516 return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08002517 (__v2df)
2518 _mm_setzero_pd (),
2519 (__mmask8)
2520 __U);
2521}
2522
Logan Chien55afb0a2018-10-15 10:42:14 +08002523static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002524_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) {
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07002525 return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08002526 (__v4df) __W,
2527 (__mmask8)
2528 __U);
2529}
2530
Logan Chien55afb0a2018-10-15 10:42:14 +08002531static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002532_mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07002533 return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08002534 (__v4df)
2535 _mm256_setzero_pd (),
2536 (__mmask8)
2537 __U);
2538}
2539
Logan Chien55afb0a2018-10-15 10:42:14 +08002540static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002541_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) {
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07002542 return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08002543 (__v2di) __W,
2544 (__mmask8)
2545 __U);
2546}
2547
Logan Chien55afb0a2018-10-15 10:42:14 +08002548static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002549_mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07002550 return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08002551 (__v2di)
2552 _mm_setzero_si128 (),
2553 (__mmask8)
2554 __U);
2555}
2556
Logan Chien55afb0a2018-10-15 10:42:14 +08002557static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002558_mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
2559 void const *__P) {
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07002560 return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08002561 (__v4di) __W,
2562 (__mmask8)
2563 __U);
2564}
2565
Logan Chien55afb0a2018-10-15 10:42:14 +08002566static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002567_mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07002568 return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08002569 (__v4di)
2570 _mm256_setzero_si256 (),
2571 (__mmask8)
2572 __U);
2573}
2574
Logan Chien55afb0a2018-10-15 10:42:14 +08002575static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002576_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) {
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07002577 return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08002578 (__v4sf) __W,
2579 (__mmask8) __U);
2580}
2581
Logan Chien55afb0a2018-10-15 10:42:14 +08002582static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002583_mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07002584 return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08002585 (__v4sf)
2586 _mm_setzero_ps (),
2587 (__mmask8)
2588 __U);
2589}
2590
Logan Chien55afb0a2018-10-15 10:42:14 +08002591static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002592_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) {
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07002593 return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08002594 (__v8sf) __W,
2595 (__mmask8) __U);
2596}
2597
Logan Chien55afb0a2018-10-15 10:42:14 +08002598static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002599_mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07002600 return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08002601 (__v8sf)
2602 _mm256_setzero_ps (),
2603 (__mmask8)
2604 __U);
2605}
2606
Logan Chien55afb0a2018-10-15 10:42:14 +08002607static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002608_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) {
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07002609 return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08002610 (__v4si) __W,
2611 (__mmask8)
2612 __U);
2613}
2614
Logan Chien55afb0a2018-10-15 10:42:14 +08002615static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002616_mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07002617 return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08002618 (__v4si)
2619 _mm_setzero_si128 (),
2620 (__mmask8) __U);
2621}
2622
Logan Chien55afb0a2018-10-15 10:42:14 +08002623static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002624_mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
2625 void const *__P) {
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07002626 return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08002627 (__v8si) __W,
2628 (__mmask8)
2629 __U);
2630}
2631
Logan Chien55afb0a2018-10-15 10:42:14 +08002632static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002633_mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07002634 return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08002635 (__v8si)
2636 _mm256_setzero_si256 (),
2637 (__mmask8)
2638 __U);
2639}
2640
Logan Chien55afb0a2018-10-15 10:42:14 +08002641static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002642_mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2643 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2644 (__v4sf) __W,
2645 (__mmask8) __U);
2646}
2647
Logan Chien55afb0a2018-10-15 10:42:14 +08002648static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002649_mm_maskz_expand_ps (__mmask8 __U, __m128 __A) {
2650 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2651 (__v4sf)
2652 _mm_setzero_ps (),
2653 (__mmask8) __U);
2654}
2655
Logan Chien55afb0a2018-10-15 10:42:14 +08002656static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002657_mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2658 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2659 (__v8sf) __W,
2660 (__mmask8) __U);
2661}
2662
Logan Chien55afb0a2018-10-15 10:42:14 +08002663static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002664_mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) {
2665 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2666 (__v8sf)
2667 _mm256_setzero_ps (),
2668 (__mmask8) __U);
2669}
2670
Logan Chien55afb0a2018-10-15 10:42:14 +08002671static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002672_mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
2673 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2674 (__v4si) __W,
2675 (__mmask8) __U);
2676}
2677
Logan Chien55afb0a2018-10-15 10:42:14 +08002678static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002679_mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) {
2680 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2681 (__v4si)
2682 _mm_setzero_si128 (),
2683 (__mmask8) __U);
2684}
2685
Logan Chien55afb0a2018-10-15 10:42:14 +08002686static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002687_mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
2688 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2689 (__v8si) __W,
2690 (__mmask8) __U);
2691}
2692
Logan Chien55afb0a2018-10-15 10:42:14 +08002693static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002694_mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) {
2695 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2696 (__v8si)
2697 _mm256_setzero_si256 (),
2698 (__mmask8) __U);
2699}
2700
Logan Chien55afb0a2018-10-15 10:42:14 +08002701static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002702_mm_getexp_pd (__m128d __A) {
2703 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2704 (__v2df)
2705 _mm_setzero_pd (),
2706 (__mmask8) -1);
2707}
2708
Logan Chien55afb0a2018-10-15 10:42:14 +08002709static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002710_mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2711 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2712 (__v2df) __W,
2713 (__mmask8) __U);
2714}
2715
Logan Chien55afb0a2018-10-15 10:42:14 +08002716static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002717_mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) {
2718 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2719 (__v2df)
2720 _mm_setzero_pd (),
2721 (__mmask8) __U);
2722}
2723
Logan Chien55afb0a2018-10-15 10:42:14 +08002724static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002725_mm256_getexp_pd (__m256d __A) {
2726 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2727 (__v4df)
2728 _mm256_setzero_pd (),
2729 (__mmask8) -1);
2730}
2731
Logan Chien55afb0a2018-10-15 10:42:14 +08002732static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002733_mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2734 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2735 (__v4df) __W,
2736 (__mmask8) __U);
2737}
2738
Logan Chien55afb0a2018-10-15 10:42:14 +08002739static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002740_mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) {
2741 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2742 (__v4df)
2743 _mm256_setzero_pd (),
2744 (__mmask8) __U);
2745}
2746
Logan Chien55afb0a2018-10-15 10:42:14 +08002747static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002748_mm_getexp_ps (__m128 __A) {
2749 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2750 (__v4sf)
2751 _mm_setzero_ps (),
2752 (__mmask8) -1);
2753}
2754
Logan Chien55afb0a2018-10-15 10:42:14 +08002755static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002756_mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2757 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2758 (__v4sf) __W,
2759 (__mmask8) __U);
2760}
2761
Logan Chien55afb0a2018-10-15 10:42:14 +08002762static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002763_mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) {
2764 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2765 (__v4sf)
2766 _mm_setzero_ps (),
2767 (__mmask8) __U);
2768}
2769
Logan Chien55afb0a2018-10-15 10:42:14 +08002770static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002771_mm256_getexp_ps (__m256 __A) {
2772 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2773 (__v8sf)
2774 _mm256_setzero_ps (),
2775 (__mmask8) -1);
2776}
2777
Logan Chien55afb0a2018-10-15 10:42:14 +08002778static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002779_mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2780 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2781 (__v8sf) __W,
2782 (__mmask8) __U);
2783}
2784
Logan Chien55afb0a2018-10-15 10:42:14 +08002785static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002786_mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) {
2787 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2788 (__v8sf)
2789 _mm256_setzero_ps (),
2790 (__mmask8) __U);
2791}
2792
Logan Chien55afb0a2018-10-15 10:42:14 +08002793static __inline__ __m128d __DEFAULT_FN_ATTRS128
2794_mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2795 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2796 (__v2df)_mm_max_pd(__A, __B),
2797 (__v2df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002798}
2799
Logan Chien55afb0a2018-10-15 10:42:14 +08002800static __inline__ __m128d __DEFAULT_FN_ATTRS128
2801_mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2802 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2803 (__v2df)_mm_max_pd(__A, __B),
2804 (__v2df)_mm_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08002805}
2806
Logan Chien55afb0a2018-10-15 10:42:14 +08002807static __inline__ __m256d __DEFAULT_FN_ATTRS256
2808_mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2809 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2810 (__v4df)_mm256_max_pd(__A, __B),
2811 (__v4df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002812}
2813
Logan Chien55afb0a2018-10-15 10:42:14 +08002814static __inline__ __m256d __DEFAULT_FN_ATTRS256
2815_mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2816 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2817 (__v4df)_mm256_max_pd(__A, __B),
2818 (__v4df)_mm256_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08002819}
2820
Logan Chien55afb0a2018-10-15 10:42:14 +08002821static __inline__ __m128 __DEFAULT_FN_ATTRS128
2822_mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2823 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2824 (__v4sf)_mm_max_ps(__A, __B),
2825 (__v4sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002826}
2827
Logan Chien55afb0a2018-10-15 10:42:14 +08002828static __inline__ __m128 __DEFAULT_FN_ATTRS128
2829_mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2830 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2831 (__v4sf)_mm_max_ps(__A, __B),
2832 (__v4sf)_mm_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08002833}
2834
Logan Chien55afb0a2018-10-15 10:42:14 +08002835static __inline__ __m256 __DEFAULT_FN_ATTRS256
2836_mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2837 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2838 (__v8sf)_mm256_max_ps(__A, __B),
2839 (__v8sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002840}
2841
Logan Chien55afb0a2018-10-15 10:42:14 +08002842static __inline__ __m256 __DEFAULT_FN_ATTRS256
2843_mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2844 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2845 (__v8sf)_mm256_max_ps(__A, __B),
2846 (__v8sf)_mm256_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08002847}
2848
Logan Chien55afb0a2018-10-15 10:42:14 +08002849static __inline__ __m128d __DEFAULT_FN_ATTRS128
2850_mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2851 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2852 (__v2df)_mm_min_pd(__A, __B),
2853 (__v2df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002854}
2855
Logan Chien55afb0a2018-10-15 10:42:14 +08002856static __inline__ __m128d __DEFAULT_FN_ATTRS128
2857_mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2858 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2859 (__v2df)_mm_min_pd(__A, __B),
2860 (__v2df)_mm_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08002861}
2862
Logan Chien55afb0a2018-10-15 10:42:14 +08002863static __inline__ __m256d __DEFAULT_FN_ATTRS256
2864_mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2865 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2866 (__v4df)_mm256_min_pd(__A, __B),
2867 (__v4df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002868}
2869
Logan Chien55afb0a2018-10-15 10:42:14 +08002870static __inline__ __m256d __DEFAULT_FN_ATTRS256
2871_mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2872 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2873 (__v4df)_mm256_min_pd(__A, __B),
2874 (__v4df)_mm256_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08002875}
2876
Logan Chien55afb0a2018-10-15 10:42:14 +08002877static __inline__ __m128 __DEFAULT_FN_ATTRS128
2878_mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2879 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2880 (__v4sf)_mm_min_ps(__A, __B),
2881 (__v4sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002882}
2883
Logan Chien55afb0a2018-10-15 10:42:14 +08002884static __inline__ __m128 __DEFAULT_FN_ATTRS128
2885_mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2886 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2887 (__v4sf)_mm_min_ps(__A, __B),
2888 (__v4sf)_mm_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08002889}
2890
Logan Chien55afb0a2018-10-15 10:42:14 +08002891static __inline__ __m256 __DEFAULT_FN_ATTRS256
2892_mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2893 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2894 (__v8sf)_mm256_min_ps(__A, __B),
2895 (__v8sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002896}
2897
Logan Chien55afb0a2018-10-15 10:42:14 +08002898static __inline__ __m256 __DEFAULT_FN_ATTRS256
2899_mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2900 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2901 (__v8sf)_mm256_min_ps(__A, __B),
2902 (__v8sf)_mm256_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08002903}
2904
Logan Chien55afb0a2018-10-15 10:42:14 +08002905static __inline__ __m128d __DEFAULT_FN_ATTRS128
2906_mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2907 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2908 (__v2df)_mm_mul_pd(__A, __B),
2909 (__v2df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002910}
2911
Logan Chien55afb0a2018-10-15 10:42:14 +08002912static __inline__ __m128d __DEFAULT_FN_ATTRS128
2913_mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2914 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2915 (__v2df)_mm_mul_pd(__A, __B),
2916 (__v2df)_mm_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08002917}
2918
Logan Chien55afb0a2018-10-15 10:42:14 +08002919static __inline__ __m256d __DEFAULT_FN_ATTRS256
2920_mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2921 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2922 (__v4df)_mm256_mul_pd(__A, __B),
2923 (__v4df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002924}
2925
Logan Chien55afb0a2018-10-15 10:42:14 +08002926static __inline__ __m256d __DEFAULT_FN_ATTRS256
2927_mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2928 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2929 (__v4df)_mm256_mul_pd(__A, __B),
2930 (__v4df)_mm256_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08002931}
2932
Logan Chien55afb0a2018-10-15 10:42:14 +08002933static __inline__ __m128 __DEFAULT_FN_ATTRS128
2934_mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2935 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2936 (__v4sf)_mm_mul_ps(__A, __B),
2937 (__v4sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002938}
2939
Logan Chien55afb0a2018-10-15 10:42:14 +08002940static __inline__ __m128 __DEFAULT_FN_ATTRS128
2941_mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2942 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2943 (__v4sf)_mm_mul_ps(__A, __B),
2944 (__v4sf)_mm_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08002945}
2946
Logan Chien55afb0a2018-10-15 10:42:14 +08002947static __inline__ __m256 __DEFAULT_FN_ATTRS256
2948_mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2949 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2950 (__v8sf)_mm256_mul_ps(__A, __B),
2951 (__v8sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002952}
2953
Logan Chien55afb0a2018-10-15 10:42:14 +08002954static __inline__ __m256 __DEFAULT_FN_ATTRS256
2955_mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2956 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2957 (__v8sf)_mm256_mul_ps(__A, __B),
2958 (__v8sf)_mm256_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08002959}
2960
Logan Chien55afb0a2018-10-15 10:42:14 +08002961static __inline__ __m128i __DEFAULT_FN_ATTRS128
2962_mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
2963 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2964 (__v4si)_mm_abs_epi32(__A),
2965 (__v4si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002966}
2967
Logan Chien55afb0a2018-10-15 10:42:14 +08002968static __inline__ __m128i __DEFAULT_FN_ATTRS128
2969_mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) {
2970 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2971 (__v4si)_mm_abs_epi32(__A),
2972 (__v4si)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08002973}
2974
Logan Chien55afb0a2018-10-15 10:42:14 +08002975static __inline__ __m256i __DEFAULT_FN_ATTRS256
2976_mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
2977 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2978 (__v8si)_mm256_abs_epi32(__A),
2979 (__v8si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002980}
2981
Logan Chien55afb0a2018-10-15 10:42:14 +08002982static __inline__ __m256i __DEFAULT_FN_ATTRS256
2983_mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) {
2984 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2985 (__v8si)_mm256_abs_epi32(__A),
2986 (__v8si)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08002987}
2988
Logan Chien55afb0a2018-10-15 10:42:14 +08002989static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002990_mm_abs_epi64 (__m128i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002991 return (__m128i)__builtin_ia32_pabsq128((__v2di)__A);
Logan Chien2833ffb2018-10-09 10:03:24 +08002992}
2993
Logan Chien55afb0a2018-10-15 10:42:14 +08002994static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002995_mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08002996 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
2997 (__v2di)_mm_abs_epi64(__A),
2998 (__v2di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002999}
3000
Logan Chien55afb0a2018-10-15 10:42:14 +08003001static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08003002_mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08003003 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3004 (__v2di)_mm_abs_epi64(__A),
3005 (__v2di)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08003006}
3007
Logan Chien55afb0a2018-10-15 10:42:14 +08003008static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08003009_mm256_abs_epi64 (__m256i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08003010 return (__m256i)__builtin_ia32_pabsq256 ((__v4di)__A);
Logan Chien2833ffb2018-10-09 10:03:24 +08003011}
3012
Logan Chien55afb0a2018-10-15 10:42:14 +08003013static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08003014_mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08003015 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3016 (__v4di)_mm256_abs_epi64(__A),
3017 (__v4di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08003018}
3019
Logan Chien55afb0a2018-10-15 10:42:14 +08003020static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08003021_mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08003022 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3023 (__v4di)_mm256_abs_epi64(__A),
3024 (__v4di)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08003025}
3026
Logan Chien55afb0a2018-10-15 10:42:14 +08003027static __inline__ __m128i __DEFAULT_FN_ATTRS128
3028_mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
3029 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3030 (__v4si)_mm_max_epi32(__A, __B),
3031 (__v4si)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08003032}
3033
Logan Chien55afb0a2018-10-15 10:42:14 +08003034static __inline__ __m128i __DEFAULT_FN_ATTRS128
3035_mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3036 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3037 (__v4si)_mm_max_epi32(__A, __B),
3038 (__v4si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08003039}
3040
Logan Chien55afb0a2018-10-15 10:42:14 +08003041static __inline__ __m256i __DEFAULT_FN_ATTRS256
3042_mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
3043 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3044 (__v8si)_mm256_max_epi32(__A, __B),
3045 (__v8si)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08003046}
3047
Logan Chien55afb0a2018-10-15 10:42:14 +08003048static __inline__ __m256i __DEFAULT_FN_ATTRS256
3049_mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3050 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3051 (__v8si)_mm256_max_epi32(__A, __B),
3052 (__v8si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08003053}
3054
Logan Chien55afb0a2018-10-15 10:42:14 +08003055static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08003056_mm_max_epi64 (__m128i __A, __m128i __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08003057 return (__m128i)__builtin_ia32_pmaxsq128((__v2di)__A, (__v2di)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08003058}
3059
Logan Chien55afb0a2018-10-15 10:42:14 +08003060static __inline__ __m128i __DEFAULT_FN_ATTRS128
3061_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
3062 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3063 (__v2di)_mm_max_epi64(__A, __B),
3064 (__v2di)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08003065}
3066
Logan Chien55afb0a2018-10-15 10:42:14 +08003067static __inline__ __m128i __DEFAULT_FN_ATTRS128
3068_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3069 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3070 (__v2di)_mm_max_epi64(__A, __B),
3071 (__v2di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08003072}
3073
Logan Chien55afb0a2018-10-15 10:42:14 +08003074static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08003075_mm256_max_epi64 (__m256i __A, __m256i __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08003076 return (__m256i)__builtin_ia32_pmaxsq256((__v4di)__A, (__v4di)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08003077}
3078
Logan Chien55afb0a2018-10-15 10:42:14 +08003079static __inline__ __m256i __DEFAULT_FN_ATTRS256
3080_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
3081 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3082 (__v4di)_mm256_max_epi64(__A, __B),
3083 (__v4di)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08003084}
3085
Logan Chien55afb0a2018-10-15 10:42:14 +08003086static __inline__ __m256i __DEFAULT_FN_ATTRS256
3087_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3088 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3089 (__v4di)_mm256_max_epi64(__A, __B),
3090 (__v4di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08003091}
3092
Logan Chien55afb0a2018-10-15 10:42:14 +08003093static __inline__ __m128i __DEFAULT_FN_ATTRS128
3094_mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3095 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3096 (__v4si)_mm_max_epu32(__A, __B),
3097 (__v4si)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08003098}
3099
Logan Chien55afb0a2018-10-15 10:42:14 +08003100static __inline__ __m128i __DEFAULT_FN_ATTRS128
3101_mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3102 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3103 (__v4si)_mm_max_epu32(__A, __B),
3104 (__v4si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08003105}
3106
Logan Chien55afb0a2018-10-15 10:42:14 +08003107static __inline__ __m256i __DEFAULT_FN_ATTRS256
3108_mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3109 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3110 (__v8si)_mm256_max_epu32(__A, __B),
3111 (__v8si)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08003112}
3113
Logan Chien55afb0a2018-10-15 10:42:14 +08003114static __inline__ __m256i __DEFAULT_FN_ATTRS256
3115_mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3116 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3117 (__v8si)_mm256_max_epu32(__A, __B),
3118 (__v8si)__W);
3119}
3120
3121static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08003122_mm_max_epu64 (__m128i __A, __m128i __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08003123 return (__m128i)__builtin_ia32_pmaxuq128((__v2di)__A, (__v2di)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08003124}
3125
Logan Chien55afb0a2018-10-15 10:42:14 +08003126static __inline__ __m128i __DEFAULT_FN_ATTRS128
3127_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3128 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3129 (__v2di)_mm_max_epu64(__A, __B),
3130 (__v2di)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08003131}
3132
Logan Chien55afb0a2018-10-15 10:42:14 +08003133static __inline__ __m128i __DEFAULT_FN_ATTRS128
3134_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3135 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3136 (__v2di)_mm_max_epu64(__A, __B),
3137 (__v2di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08003138}
3139
Logan Chien55afb0a2018-10-15 10:42:14 +08003140static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08003141_mm256_max_epu64 (__m256i __A, __m256i __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08003142 return (__m256i)__builtin_ia32_pmaxuq256((__v4di)__A, (__v4di)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08003143}
3144
Logan Chien55afb0a2018-10-15 10:42:14 +08003145static __inline__ __m256i __DEFAULT_FN_ATTRS256
3146_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3147 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3148 (__v4di)_mm256_max_epu64(__A, __B),
3149 (__v4di)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08003150}
3151
Logan Chien55afb0a2018-10-15 10:42:14 +08003152static __inline__ __m256i __DEFAULT_FN_ATTRS256
3153_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3154 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3155 (__v4di)_mm256_max_epu64(__A, __B),
3156 (__v4di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08003157}
3158
Logan Chien55afb0a2018-10-15 10:42:14 +08003159static __inline__ __m128i __DEFAULT_FN_ATTRS128
3160_mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
3161 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3162 (__v4si)_mm_min_epi32(__A, __B),
3163 (__v4si)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08003164}
3165
Logan Chien55afb0a2018-10-15 10:42:14 +08003166static __inline__ __m128i __DEFAULT_FN_ATTRS128
3167_mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3168 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3169 (__v4si)_mm_min_epi32(__A, __B),
3170 (__v4si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08003171}
3172
Logan Chien55afb0a2018-10-15 10:42:14 +08003173static __inline__ __m256i __DEFAULT_FN_ATTRS256
3174_mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
3175 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3176 (__v8si)_mm256_min_epi32(__A, __B),
3177 (__v8si)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08003178}
3179
Logan Chien55afb0a2018-10-15 10:42:14 +08003180static __inline__ __m256i __DEFAULT_FN_ATTRS256
3181_mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3182 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3183 (__v8si)_mm256_min_epi32(__A, __B),
3184 (__v8si)__W);
3185}
3186
3187static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08003188_mm_min_epi64 (__m128i __A, __m128i __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08003189 return (__m128i)__builtin_ia32_pminsq128((__v2di)__A, (__v2di)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08003190}
3191
Logan Chien55afb0a2018-10-15 10:42:14 +08003192static __inline__ __m128i __DEFAULT_FN_ATTRS128
3193_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3194 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3195 (__v2di)_mm_min_epi64(__A, __B),
3196 (__v2di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08003197}
3198
Logan Chien55afb0a2018-10-15 10:42:14 +08003199static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08003200_mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08003201 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3202 (__v2di)_mm_min_epi64(__A, __B),
3203 (__v2di)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08003204}
3205
Logan Chien55afb0a2018-10-15 10:42:14 +08003206static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08003207_mm256_min_epi64 (__m256i __A, __m256i __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08003208 return (__m256i)__builtin_ia32_pminsq256((__v4di)__A, (__v4di)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08003209}
3210
Logan Chien55afb0a2018-10-15 10:42:14 +08003211static __inline__ __m256i __DEFAULT_FN_ATTRS256
3212_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3213 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3214 (__v4di)_mm256_min_epi64(__A, __B),
3215 (__v4di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08003216}
3217
Logan Chien55afb0a2018-10-15 10:42:14 +08003218static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08003219_mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08003220 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3221 (__v4di)_mm256_min_epi64(__A, __B),
3222 (__v4di)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08003223}
3224
Logan Chien55afb0a2018-10-15 10:42:14 +08003225static __inline__ __m128i __DEFAULT_FN_ATTRS128
3226_mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3227 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3228 (__v4si)_mm_min_epu32(__A, __B),
3229 (__v4si)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08003230}
3231
Logan Chien55afb0a2018-10-15 10:42:14 +08003232static __inline__ __m128i __DEFAULT_FN_ATTRS128
3233_mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3234 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3235 (__v4si)_mm_min_epu32(__A, __B),
3236 (__v4si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08003237}
3238
Logan Chien55afb0a2018-10-15 10:42:14 +08003239static __inline__ __m256i __DEFAULT_FN_ATTRS256
3240_mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3241 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3242 (__v8si)_mm256_min_epu32(__A, __B),
3243 (__v8si)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08003244}
3245
Logan Chien55afb0a2018-10-15 10:42:14 +08003246static __inline__ __m256i __DEFAULT_FN_ATTRS256
3247_mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3248 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3249 (__v8si)_mm256_min_epu32(__A, __B),
3250 (__v8si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08003251}
3252
Logan Chien55afb0a2018-10-15 10:42:14 +08003253static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08003254_mm_min_epu64 (__m128i __A, __m128i __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08003255 return (__m128i)__builtin_ia32_pminuq128((__v2di)__A, (__v2di)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08003256}
3257
Logan Chien55afb0a2018-10-15 10:42:14 +08003258static __inline__ __m128i __DEFAULT_FN_ATTRS128
3259_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3260 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3261 (__v2di)_mm_min_epu64(__A, __B),
3262 (__v2di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08003263}
3264
Logan Chien55afb0a2018-10-15 10:42:14 +08003265static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08003266_mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08003267 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3268 (__v2di)_mm_min_epu64(__A, __B),
3269 (__v2di)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08003270}
3271
Logan Chien55afb0a2018-10-15 10:42:14 +08003272static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08003273_mm256_min_epu64 (__m256i __A, __m256i __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08003274 return (__m256i)__builtin_ia32_pminuq256((__v4di)__A, (__v4di)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08003275}
3276
Logan Chien55afb0a2018-10-15 10:42:14 +08003277static __inline__ __m256i __DEFAULT_FN_ATTRS256
3278_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3279 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3280 (__v4di)_mm256_min_epu64(__A, __B),
3281 (__v4di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08003282}
3283
Logan Chien55afb0a2018-10-15 10:42:14 +08003284static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08003285_mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +08003286 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3287 (__v4di)_mm256_min_epu64(__A, __B),
3288 (__v4di)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08003289}
3290
Logan Chien55afb0a2018-10-15 10:42:14 +08003291#define _mm_roundscale_pd(A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003292 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3293 (int)(imm), \
3294 (__v2df)_mm_setzero_pd(), \
3295 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +08003296
3297
Logan Chien55afb0a2018-10-15 10:42:14 +08003298#define _mm_mask_roundscale_pd(W, U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003299 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3300 (int)(imm), \
3301 (__v2df)(__m128d)(W), \
3302 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003303
3304
Logan Chien55afb0a2018-10-15 10:42:14 +08003305#define _mm_maskz_roundscale_pd(U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003306 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3307 (int)(imm), \
3308 (__v2df)_mm_setzero_pd(), \
3309 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003310
3311
Logan Chien55afb0a2018-10-15 10:42:14 +08003312#define _mm256_roundscale_pd(A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003313 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3314 (int)(imm), \
3315 (__v4df)_mm256_setzero_pd(), \
3316 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +08003317
3318
Logan Chien55afb0a2018-10-15 10:42:14 +08003319#define _mm256_mask_roundscale_pd(W, U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003320 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3321 (int)(imm), \
3322 (__v4df)(__m256d)(W), \
3323 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003324
3325
Logan Chien55afb0a2018-10-15 10:42:14 +08003326#define _mm256_maskz_roundscale_pd(U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003327 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3328 (int)(imm), \
3329 (__v4df)_mm256_setzero_pd(), \
3330 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003331
Logan Chien55afb0a2018-10-15 10:42:14 +08003332#define _mm_roundscale_ps(A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003333 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3334 (__v4sf)_mm_setzero_ps(), \
3335 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +08003336
3337
Logan Chien55afb0a2018-10-15 10:42:14 +08003338#define _mm_mask_roundscale_ps(W, U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003339 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3340 (__v4sf)(__m128)(W), \
3341 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003342
3343
Logan Chien55afb0a2018-10-15 10:42:14 +08003344#define _mm_maskz_roundscale_ps(U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003345 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3346 (__v4sf)_mm_setzero_ps(), \
3347 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003348
Logan Chien55afb0a2018-10-15 10:42:14 +08003349#define _mm256_roundscale_ps(A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003350 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3351 (__v8sf)_mm256_setzero_ps(), \
3352 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +08003353
Logan Chien55afb0a2018-10-15 10:42:14 +08003354#define _mm256_mask_roundscale_ps(W, U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003355 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3356 (__v8sf)(__m256)(W), \
3357 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003358
3359
Logan Chien55afb0a2018-10-15 10:42:14 +08003360#define _mm256_maskz_roundscale_ps(U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08003361 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3362 (__v8sf)_mm256_setzero_ps(), \
3363 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08003364
Logan Chien55afb0a2018-10-15 10:42:14 +08003365static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08003366_mm_scalef_pd (__m128d __A, __m128d __B) {
3367 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3368 (__v2df) __B,
3369 (__v2df)
3370 _mm_setzero_pd (),
3371 (__mmask8) -1);
3372}
3373
Logan Chien55afb0a2018-10-15 10:42:14 +08003374static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08003375_mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3376 __m128d __B) {
3377 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3378 (__v2df) __B,
3379 (__v2df) __W,
3380 (__mmask8) __U);
3381}
3382
Logan Chien55afb0a2018-10-15 10:42:14 +08003383static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08003384_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) {
3385 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3386 (__v2df) __B,
3387 (__v2df)
3388 _mm_setzero_pd (),
3389 (__mmask8) __U);
3390}
3391
Logan Chien55afb0a2018-10-15 10:42:14 +08003392static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08003393_mm256_scalef_pd (__m256d __A, __m256d __B) {
3394 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3395 (__v4df) __B,
3396 (__v4df)
3397 _mm256_setzero_pd (),
3398 (__mmask8) -1);
3399}
3400
Logan Chien55afb0a2018-10-15 10:42:14 +08003401static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08003402_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3403 __m256d __B) {
3404 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3405 (__v4df) __B,
3406 (__v4df) __W,
3407 (__mmask8) __U);
3408}
3409
Logan Chien55afb0a2018-10-15 10:42:14 +08003410static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08003411_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) {
3412 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3413 (__v4df) __B,
3414 (__v4df)
3415 _mm256_setzero_pd (),
3416 (__mmask8) __U);
3417}
3418
Logan Chien55afb0a2018-10-15 10:42:14 +08003419static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08003420_mm_scalef_ps (__m128 __A, __m128 __B) {
3421 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3422 (__v4sf) __B,
3423 (__v4sf)
3424 _mm_setzero_ps (),
3425 (__mmask8) -1);
3426}
3427
Logan Chien55afb0a2018-10-15 10:42:14 +08003428static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08003429_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3430 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3431 (__v4sf) __B,
3432 (__v4sf) __W,
3433 (__mmask8) __U);
3434}
3435
Logan Chien55afb0a2018-10-15 10:42:14 +08003436static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08003437_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) {
3438 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3439 (__v4sf) __B,
3440 (__v4sf)
3441 _mm_setzero_ps (),
3442 (__mmask8) __U);
3443}
3444
Logan Chien55afb0a2018-10-15 10:42:14 +08003445static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08003446_mm256_scalef_ps (__m256 __A, __m256 __B) {
3447 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3448 (__v8sf) __B,
3449 (__v8sf)
3450 _mm256_setzero_ps (),
3451 (__mmask8) -1);
3452}
3453
Logan Chien55afb0a2018-10-15 10:42:14 +08003454static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08003455_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3456 __m256 __B) {
3457 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3458 (__v8sf) __B,
3459 (__v8sf) __W,
3460 (__mmask8) __U);
3461}
3462
Logan Chien55afb0a2018-10-15 10:42:14 +08003463static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08003464_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
3465 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3466 (__v8sf) __B,
3467 (__v8sf)
3468 _mm256_setzero_ps (),
3469 (__mmask8) __U);
3470}
3471
Logan Chien55afb0a2018-10-15 10:42:14 +08003472#define _mm_i64scatter_pd(addr, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003473 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)-1, \
Logan Chien2833ffb2018-10-09 10:03:24 +08003474 (__v2di)(__m128i)(index), \
Logan Chien55afb0a2018-10-15 10:42:14 +08003475 (__v2df)(__m128d)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003476
Logan Chien55afb0a2018-10-15 10:42:14 +08003477#define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003478 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)(mask), \
Logan Chien2833ffb2018-10-09 10:03:24 +08003479 (__v2di)(__m128i)(index), \
Logan Chien55afb0a2018-10-15 10:42:14 +08003480 (__v2df)(__m128d)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003481
Logan Chien55afb0a2018-10-15 10:42:14 +08003482#define _mm_i64scatter_epi64(addr, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003483 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)-1, \
Logan Chien2833ffb2018-10-09 10:03:24 +08003484 (__v2di)(__m128i)(index), \
Logan Chien55afb0a2018-10-15 10:42:14 +08003485 (__v2di)(__m128i)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003486
Logan Chien55afb0a2018-10-15 10:42:14 +08003487#define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003488 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)(mask), \
Logan Chien2833ffb2018-10-09 10:03:24 +08003489 (__v2di)(__m128i)(index), \
Logan Chien55afb0a2018-10-15 10:42:14 +08003490 (__v2di)(__m128i)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003491
Logan Chien55afb0a2018-10-15 10:42:14 +08003492#define _mm256_i64scatter_pd(addr, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003493 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)-1, \
Logan Chien2833ffb2018-10-09 10:03:24 +08003494 (__v4di)(__m256i)(index), \
Logan Chien55afb0a2018-10-15 10:42:14 +08003495 (__v4df)(__m256d)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003496
Logan Chien55afb0a2018-10-15 10:42:14 +08003497#define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003498 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)(mask), \
Logan Chien2833ffb2018-10-09 10:03:24 +08003499 (__v4di)(__m256i)(index), \
Logan Chien55afb0a2018-10-15 10:42:14 +08003500 (__v4df)(__m256d)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003501
Logan Chien55afb0a2018-10-15 10:42:14 +08003502#define _mm256_i64scatter_epi64(addr, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003503 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)-1, \
Logan Chien2833ffb2018-10-09 10:03:24 +08003504 (__v4di)(__m256i)(index), \
Logan Chien55afb0a2018-10-15 10:42:14 +08003505 (__v4di)(__m256i)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003506
Logan Chien55afb0a2018-10-15 10:42:14 +08003507#define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003508 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)(mask), \
Logan Chien2833ffb2018-10-09 10:03:24 +08003509 (__v4di)(__m256i)(index), \
Logan Chien55afb0a2018-10-15 10:42:14 +08003510 (__v4di)(__m256i)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003511
Logan Chien55afb0a2018-10-15 10:42:14 +08003512#define _mm_i64scatter_ps(addr, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003513 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)-1, \
Logan Chien2833ffb2018-10-09 10:03:24 +08003514 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
Logan Chien55afb0a2018-10-15 10:42:14 +08003515 (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003516
Logan Chien55afb0a2018-10-15 10:42:14 +08003517#define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003518 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)(mask), \
Logan Chien2833ffb2018-10-09 10:03:24 +08003519 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
Logan Chien55afb0a2018-10-15 10:42:14 +08003520 (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003521
Logan Chien55afb0a2018-10-15 10:42:14 +08003522#define _mm_i64scatter_epi32(addr, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003523 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)-1, \
Logan Chien2833ffb2018-10-09 10:03:24 +08003524 (__v2di)(__m128i)(index), \
Logan Chien55afb0a2018-10-15 10:42:14 +08003525 (__v4si)(__m128i)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003526
Logan Chien55afb0a2018-10-15 10:42:14 +08003527#define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003528 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)(mask), \
Logan Chien2833ffb2018-10-09 10:03:24 +08003529 (__v2di)(__m128i)(index), \
Logan Chien55afb0a2018-10-15 10:42:14 +08003530 (__v4si)(__m128i)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003531
Logan Chien55afb0a2018-10-15 10:42:14 +08003532#define _mm256_i64scatter_ps(addr, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003533 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)-1, \
Logan Chien2833ffb2018-10-09 10:03:24 +08003534 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
Logan Chien55afb0a2018-10-15 10:42:14 +08003535 (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003536
Logan Chien55afb0a2018-10-15 10:42:14 +08003537#define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003538 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)(mask), \
Logan Chien2833ffb2018-10-09 10:03:24 +08003539 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
Logan Chien55afb0a2018-10-15 10:42:14 +08003540 (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003541
Logan Chien55afb0a2018-10-15 10:42:14 +08003542#define _mm256_i64scatter_epi32(addr, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003543 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)-1, \
Logan Chien2833ffb2018-10-09 10:03:24 +08003544 (__v4di)(__m256i)(index), \
Logan Chien55afb0a2018-10-15 10:42:14 +08003545 (__v4si)(__m128i)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003546
Logan Chien55afb0a2018-10-15 10:42:14 +08003547#define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003548 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)(mask), \
Logan Chien2833ffb2018-10-09 10:03:24 +08003549 (__v4di)(__m256i)(index), \
Logan Chien55afb0a2018-10-15 10:42:14 +08003550 (__v4si)(__m128i)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003551
Logan Chien55afb0a2018-10-15 10:42:14 +08003552#define _mm_i32scatter_pd(addr, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003553 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)-1, \
Logan Chien2833ffb2018-10-09 10:03:24 +08003554 (__v4si)(__m128i)(index), \
Logan Chien55afb0a2018-10-15 10:42:14 +08003555 (__v2df)(__m128d)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003556
Logan Chien55afb0a2018-10-15 10:42:14 +08003557#define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003558 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)(mask), \
Logan Chien55afb0a2018-10-15 10:42:14 +08003559 (__v4si)(__m128i)(index), \
3560 (__v2df)(__m128d)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003561
Logan Chien55afb0a2018-10-15 10:42:14 +08003562#define _mm_i32scatter_epi64(addr, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003563 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)-1, \
Logan Chien55afb0a2018-10-15 10:42:14 +08003564 (__v4si)(__m128i)(index), \
3565 (__v2di)(__m128i)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003566
Logan Chien55afb0a2018-10-15 10:42:14 +08003567#define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003568 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)(mask), \
Logan Chien55afb0a2018-10-15 10:42:14 +08003569 (__v4si)(__m128i)(index), \
3570 (__v2di)(__m128i)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003571
Logan Chien55afb0a2018-10-15 10:42:14 +08003572#define _mm256_i32scatter_pd(addr, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003573 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)-1, \
Logan Chien55afb0a2018-10-15 10:42:14 +08003574 (__v4si)(__m128i)(index), \
3575 (__v4df)(__m256d)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003576
Logan Chien55afb0a2018-10-15 10:42:14 +08003577#define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003578 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)(mask), \
Logan Chien55afb0a2018-10-15 10:42:14 +08003579 (__v4si)(__m128i)(index), \
3580 (__v4df)(__m256d)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003581
Logan Chien55afb0a2018-10-15 10:42:14 +08003582#define _mm256_i32scatter_epi64(addr, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003583 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)-1, \
Logan Chien55afb0a2018-10-15 10:42:14 +08003584 (__v4si)(__m128i)(index), \
3585 (__v4di)(__m256i)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003586
Logan Chien55afb0a2018-10-15 10:42:14 +08003587#define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003588 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)(mask), \
Logan Chien55afb0a2018-10-15 10:42:14 +08003589 (__v4si)(__m128i)(index), \
3590 (__v4di)(__m256i)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003591
Logan Chien55afb0a2018-10-15 10:42:14 +08003592#define _mm_i32scatter_ps(addr, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003593 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)-1, \
Logan Chien55afb0a2018-10-15 10:42:14 +08003594 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3595 (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003596
Logan Chien55afb0a2018-10-15 10:42:14 +08003597#define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003598 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)(mask), \
Logan Chien55afb0a2018-10-15 10:42:14 +08003599 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3600 (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003601
Logan Chien55afb0a2018-10-15 10:42:14 +08003602#define _mm_i32scatter_epi32(addr, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003603 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)-1, \
Logan Chien55afb0a2018-10-15 10:42:14 +08003604 (__v4si)(__m128i)(index), \
3605 (__v4si)(__m128i)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003606
Logan Chien55afb0a2018-10-15 10:42:14 +08003607#define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003608 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)(mask), \
Logan Chien55afb0a2018-10-15 10:42:14 +08003609 (__v4si)(__m128i)(index), \
3610 (__v4si)(__m128i)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003611
Logan Chien55afb0a2018-10-15 10:42:14 +08003612#define _mm256_i32scatter_ps(addr, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003613 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)-1, \
Logan Chien55afb0a2018-10-15 10:42:14 +08003614 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3615 (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003616
Logan Chien55afb0a2018-10-15 10:42:14 +08003617#define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003618 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)(mask), \
Logan Chien55afb0a2018-10-15 10:42:14 +08003619 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3620 (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003621
Logan Chien55afb0a2018-10-15 10:42:14 +08003622#define _mm256_i32scatter_epi32(addr, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003623 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)-1, \
Logan Chien55afb0a2018-10-15 10:42:14 +08003624 (__v8si)(__m256i)(index), \
3625 (__v8si)(__m256i)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003626
Logan Chien55afb0a2018-10-15 10:42:14 +08003627#define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
Logan Chiendbcf4122019-03-21 10:50:25 +08003628 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)(mask), \
Logan Chien55afb0a2018-10-15 10:42:14 +08003629 (__v8si)(__m256i)(index), \
3630 (__v8si)(__m256i)(v1), (int)(scale))
Logan Chien2833ffb2018-10-09 10:03:24 +08003631
Logan Chien55afb0a2018-10-15 10:42:14 +08003632 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3633 _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) {
3634 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3635 (__v2df)_mm_sqrt_pd(__A),
3636 (__v2df)__W);
3637 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003638
Logan Chien55afb0a2018-10-15 10:42:14 +08003639 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3640 _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) {
3641 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3642 (__v2df)_mm_sqrt_pd(__A),
3643 (__v2df)_mm_setzero_pd());
3644 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003645
Logan Chien55afb0a2018-10-15 10:42:14 +08003646 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3647 _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) {
3648 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3649 (__v4df)_mm256_sqrt_pd(__A),
3650 (__v4df)__W);
3651 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003652
Logan Chien55afb0a2018-10-15 10:42:14 +08003653 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3654 _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) {
3655 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3656 (__v4df)_mm256_sqrt_pd(__A),
3657 (__v4df)_mm256_setzero_pd());
3658 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003659
Logan Chien55afb0a2018-10-15 10:42:14 +08003660 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3661 _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) {
3662 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3663 (__v4sf)_mm_sqrt_ps(__A),
3664 (__v4sf)__W);
3665 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003666
Logan Chien55afb0a2018-10-15 10:42:14 +08003667 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3668 _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) {
3669 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3670 (__v4sf)_mm_sqrt_ps(__A),
3671 (__v4sf)_mm_setzero_ps());
3672 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003673
Logan Chien55afb0a2018-10-15 10:42:14 +08003674 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3675 _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) {
3676 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3677 (__v8sf)_mm256_sqrt_ps(__A),
3678 (__v8sf)__W);
3679 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003680
Logan Chien55afb0a2018-10-15 10:42:14 +08003681 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3682 _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) {
3683 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3684 (__v8sf)_mm256_sqrt_ps(__A),
3685 (__v8sf)_mm256_setzero_ps());
3686 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003687
Logan Chien55afb0a2018-10-15 10:42:14 +08003688 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3689 _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
3690 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3691 (__v2df)_mm_sub_pd(__A, __B),
3692 (__v2df)__W);
3693 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003694
Logan Chien55afb0a2018-10-15 10:42:14 +08003695 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3696 _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) {
3697 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3698 (__v2df)_mm_sub_pd(__A, __B),
3699 (__v2df)_mm_setzero_pd());
3700 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003701
Logan Chien55afb0a2018-10-15 10:42:14 +08003702 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3703 _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
3704 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3705 (__v4df)_mm256_sub_pd(__A, __B),
3706 (__v4df)__W);
3707 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003708
Logan Chien55afb0a2018-10-15 10:42:14 +08003709 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3710 _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) {
3711 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3712 (__v4df)_mm256_sub_pd(__A, __B),
3713 (__v4df)_mm256_setzero_pd());
3714 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003715
Logan Chien55afb0a2018-10-15 10:42:14 +08003716 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3717 _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3718 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3719 (__v4sf)_mm_sub_ps(__A, __B),
3720 (__v4sf)__W);
3721 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003722
Logan Chien55afb0a2018-10-15 10:42:14 +08003723 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3724 _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) {
3725 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3726 (__v4sf)_mm_sub_ps(__A, __B),
3727 (__v4sf)_mm_setzero_ps());
3728 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003729
Logan Chien55afb0a2018-10-15 10:42:14 +08003730 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3731 _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
3732 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3733 (__v8sf)_mm256_sub_ps(__A, __B),
3734 (__v8sf)__W);
3735 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003736
Logan Chien55afb0a2018-10-15 10:42:14 +08003737 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3738 _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) {
3739 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3740 (__v8sf)_mm256_sub_ps(__A, __B),
3741 (__v8sf)_mm256_setzero_ps());
3742 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003743
Logan Chien55afb0a2018-10-15 10:42:14 +08003744 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3745 _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) {
3746 return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I,
3747 (__v4si)__B);
3748 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003749
Logan Chien55afb0a2018-10-15 10:42:14 +08003750 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3751 _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I,
3752 __m128i __B) {
3753 return (__m128i)__builtin_ia32_selectd_128(__U,
3754 (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3755 (__v4si)__A);
3756 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003757
Logan Chien55afb0a2018-10-15 10:42:14 +08003758 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3759 _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U,
3760 __m128i __B) {
3761 return (__m128i)__builtin_ia32_selectd_128(__U,
3762 (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3763 (__v4si)__I);
3764 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003765
Logan Chien55afb0a2018-10-15 10:42:14 +08003766 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3767 _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I,
3768 __m128i __B) {
3769 return (__m128i)__builtin_ia32_selectd_128(__U,
3770 (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3771 (__v4si)_mm_setzero_si128());
3772 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003773
Logan Chien55afb0a2018-10-15 10:42:14 +08003774 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3775 _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) {
3776 return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I,
3777 (__v8si) __B);
3778 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003779
Logan Chien55afb0a2018-10-15 10:42:14 +08003780 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3781 _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I,
3782 __m256i __B) {
3783 return (__m256i)__builtin_ia32_selectd_256(__U,
3784 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3785 (__v8si)__A);
3786 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003787
Logan Chien55afb0a2018-10-15 10:42:14 +08003788 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3789 _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U,
3790 __m256i __B) {
3791 return (__m256i)__builtin_ia32_selectd_256(__U,
3792 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3793 (__v8si)__I);
3794 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003795
Logan Chien55afb0a2018-10-15 10:42:14 +08003796 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3797 _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I,
3798 __m256i __B) {
3799 return (__m256i)__builtin_ia32_selectd_256(__U,
3800 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3801 (__v8si)_mm256_setzero_si256());
3802 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003803
Logan Chien55afb0a2018-10-15 10:42:14 +08003804 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3805 _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) {
3806 return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I,
3807 (__v2df)__B);
3808 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003809
Logan Chien55afb0a2018-10-15 10:42:14 +08003810 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3811 _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) {
3812 return (__m128d)__builtin_ia32_selectpd_128(__U,
3813 (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3814 (__v2df)__A);
3815 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003816
Logan Chien55afb0a2018-10-15 10:42:14 +08003817 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3818 _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) {
3819 return (__m128d)__builtin_ia32_selectpd_128(__U,
3820 (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3821 (__v2df)(__m128d)__I);
3822 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003823
Logan Chien55afb0a2018-10-15 10:42:14 +08003824 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3825 _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) {
3826 return (__m128d)__builtin_ia32_selectpd_128(__U,
3827 (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3828 (__v2df)_mm_setzero_pd());
3829 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003830
Logan Chien55afb0a2018-10-15 10:42:14 +08003831 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3832 _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) {
3833 return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I,
3834 (__v4df)__B);
3835 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003836
Logan Chien55afb0a2018-10-15 10:42:14 +08003837 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3838 _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I,
3839 __m256d __B) {
3840 return (__m256d)__builtin_ia32_selectpd_256(__U,
3841 (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3842 (__v4df)__A);
3843 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003844
Logan Chien55afb0a2018-10-15 10:42:14 +08003845 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3846 _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U,
3847 __m256d __B) {
3848 return (__m256d)__builtin_ia32_selectpd_256(__U,
3849 (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3850 (__v4df)(__m256d)__I);
3851 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003852
Logan Chien55afb0a2018-10-15 10:42:14 +08003853 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3854 _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I,
3855 __m256d __B) {
3856 return (__m256d)__builtin_ia32_selectpd_256(__U,
3857 (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3858 (__v4df)_mm256_setzero_pd());
3859 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003860
Logan Chien55afb0a2018-10-15 10:42:14 +08003861 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3862 _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) {
3863 return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I,
3864 (__v4sf)__B);
3865 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003866
Logan Chien55afb0a2018-10-15 10:42:14 +08003867 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3868 _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) {
3869 return (__m128)__builtin_ia32_selectps_128(__U,
3870 (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3871 (__v4sf)__A);
3872 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003873
Logan Chien55afb0a2018-10-15 10:42:14 +08003874 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3875 _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) {
3876 return (__m128)__builtin_ia32_selectps_128(__U,
3877 (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3878 (__v4sf)(__m128)__I);
3879 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003880
Logan Chien55afb0a2018-10-15 10:42:14 +08003881 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3882 _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) {
3883 return (__m128)__builtin_ia32_selectps_128(__U,
3884 (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3885 (__v4sf)_mm_setzero_ps());
3886 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003887
Logan Chien55afb0a2018-10-15 10:42:14 +08003888 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3889 _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) {
3890 return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I,
3891 (__v8sf) __B);
3892 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003893
Logan Chien55afb0a2018-10-15 10:42:14 +08003894 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3895 _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) {
3896 return (__m256)__builtin_ia32_selectps_256(__U,
3897 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3898 (__v8sf)__A);
3899 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003900
Logan Chien55afb0a2018-10-15 10:42:14 +08003901 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3902 _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U,
3903 __m256 __B) {
3904 return (__m256)__builtin_ia32_selectps_256(__U,
3905 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3906 (__v8sf)(__m256)__I);
3907 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003908
Logan Chien55afb0a2018-10-15 10:42:14 +08003909 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3910 _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I,
3911 __m256 __B) {
3912 return (__m256)__builtin_ia32_selectps_256(__U,
3913 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3914 (__v8sf)_mm256_setzero_ps());
3915 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003916
Logan Chien55afb0a2018-10-15 10:42:14 +08003917 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3918 _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) {
3919 return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I,
3920 (__v2di)__B);
3921 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003922
Logan Chien55afb0a2018-10-15 10:42:14 +08003923 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3924 _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I,
3925 __m128i __B) {
3926 return (__m128i)__builtin_ia32_selectq_128(__U,
3927 (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3928 (__v2di)__A);
3929 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003930
Logan Chien55afb0a2018-10-15 10:42:14 +08003931 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3932 _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U,
3933 __m128i __B) {
3934 return (__m128i)__builtin_ia32_selectq_128(__U,
3935 (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3936 (__v2di)__I);
3937 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003938
Logan Chien55afb0a2018-10-15 10:42:14 +08003939 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3940 _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I,
3941 __m128i __B) {
3942 return (__m128i)__builtin_ia32_selectq_128(__U,
3943 (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3944 (__v2di)_mm_setzero_si128());
3945 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003946
Logan Chien2833ffb2018-10-09 10:03:24 +08003947
Logan Chien55afb0a2018-10-15 10:42:14 +08003948 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3949 _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) {
3950 return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I,
3951 (__v4di) __B);
3952 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003953
Logan Chien55afb0a2018-10-15 10:42:14 +08003954 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3955 _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I,
3956 __m256i __B) {
3957 return (__m256i)__builtin_ia32_selectq_256(__U,
3958 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3959 (__v4di)__A);
3960 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003961
Logan Chien55afb0a2018-10-15 10:42:14 +08003962 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3963 _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U,
3964 __m256i __B) {
3965 return (__m256i)__builtin_ia32_selectq_256(__U,
3966 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3967 (__v4di)__I);
3968 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003969
Logan Chien55afb0a2018-10-15 10:42:14 +08003970 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3971 _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I,
3972 __m256i __B) {
3973 return (__m256i)__builtin_ia32_selectq_256(__U,
3974 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3975 (__v4di)_mm256_setzero_si256());
3976 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003977
Logan Chien55afb0a2018-10-15 10:42:14 +08003978 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3979 _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
3980 {
3981 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3982 (__v4si)_mm_cvtepi8_epi32(__A),
3983 (__v4si)__W);
3984 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003985
Logan Chien55afb0a2018-10-15 10:42:14 +08003986 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3987 _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
3988 {
3989 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3990 (__v4si)_mm_cvtepi8_epi32(__A),
3991 (__v4si)_mm_setzero_si128());
3992 }
Logan Chien2833ffb2018-10-09 10:03:24 +08003993
Logan Chien55afb0a2018-10-15 10:42:14 +08003994 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3995 _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
3996 {
3997 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
3998 (__v8si)_mm256_cvtepi8_epi32(__A),
3999 (__v8si)__W);
4000 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004001
Logan Chien55afb0a2018-10-15 10:42:14 +08004002 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4003 _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
4004 {
4005 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4006 (__v8si)_mm256_cvtepi8_epi32(__A),
4007 (__v8si)_mm256_setzero_si256());
4008 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004009
Logan Chien55afb0a2018-10-15 10:42:14 +08004010 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4011 _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4012 {
4013 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4014 (__v2di)_mm_cvtepi8_epi64(__A),
4015 (__v2di)__W);
4016 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004017
Logan Chien55afb0a2018-10-15 10:42:14 +08004018 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4019 _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
4020 {
4021 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4022 (__v2di)_mm_cvtepi8_epi64(__A),
4023 (__v2di)_mm_setzero_si128());
4024 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004025
Logan Chien55afb0a2018-10-15 10:42:14 +08004026 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4027 _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4028 {
4029 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4030 (__v4di)_mm256_cvtepi8_epi64(__A),
4031 (__v4di)__W);
4032 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004033
Logan Chien55afb0a2018-10-15 10:42:14 +08004034 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4035 _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
4036 {
4037 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4038 (__v4di)_mm256_cvtepi8_epi64(__A),
4039 (__v4di)_mm256_setzero_si256());
4040 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004041
Logan Chien55afb0a2018-10-15 10:42:14 +08004042 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4043 _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
4044 {
4045 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4046 (__v2di)_mm_cvtepi32_epi64(__X),
4047 (__v2di)__W);
4048 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004049
Logan Chien55afb0a2018-10-15 10:42:14 +08004050 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4051 _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
4052 {
4053 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4054 (__v2di)_mm_cvtepi32_epi64(__X),
4055 (__v2di)_mm_setzero_si128());
4056 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004057
Logan Chien55afb0a2018-10-15 10:42:14 +08004058 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4059 _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
4060 {
4061 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4062 (__v4di)_mm256_cvtepi32_epi64(__X),
4063 (__v4di)__W);
4064 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004065
Logan Chien55afb0a2018-10-15 10:42:14 +08004066 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4067 _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
4068 {
4069 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4070 (__v4di)_mm256_cvtepi32_epi64(__X),
4071 (__v4di)_mm256_setzero_si256());
4072 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004073
Logan Chien55afb0a2018-10-15 10:42:14 +08004074 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4075 _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4076 {
4077 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4078 (__v4si)_mm_cvtepi16_epi32(__A),
4079 (__v4si)__W);
4080 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004081
Logan Chien55afb0a2018-10-15 10:42:14 +08004082 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4083 _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
4084 {
4085 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4086 (__v4si)_mm_cvtepi16_epi32(__A),
4087 (__v4si)_mm_setzero_si128());
4088 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004089
Logan Chien55afb0a2018-10-15 10:42:14 +08004090 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4091 _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4092 {
4093 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4094 (__v8si)_mm256_cvtepi16_epi32(__A),
4095 (__v8si)__W);
4096 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004097
Logan Chien55afb0a2018-10-15 10:42:14 +08004098 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4099 _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
4100 {
4101 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4102 (__v8si)_mm256_cvtepi16_epi32(__A),
4103 (__v8si)_mm256_setzero_si256());
4104 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004105
Logan Chien55afb0a2018-10-15 10:42:14 +08004106 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4107 _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4108 {
4109 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4110 (__v2di)_mm_cvtepi16_epi64(__A),
4111 (__v2di)__W);
4112 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004113
Logan Chien55afb0a2018-10-15 10:42:14 +08004114 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4115 _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
4116 {
4117 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4118 (__v2di)_mm_cvtepi16_epi64(__A),
4119 (__v2di)_mm_setzero_si128());
4120 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004121
Logan Chien55afb0a2018-10-15 10:42:14 +08004122 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4123 _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4124 {
4125 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4126 (__v4di)_mm256_cvtepi16_epi64(__A),
4127 (__v4di)__W);
4128 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004129
Logan Chien55afb0a2018-10-15 10:42:14 +08004130 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4131 _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
4132 {
4133 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4134 (__v4di)_mm256_cvtepi16_epi64(__A),
4135 (__v4di)_mm256_setzero_si256());
4136 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004137
4138
Logan Chien55afb0a2018-10-15 10:42:14 +08004139 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4140 _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4141 {
4142 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4143 (__v4si)_mm_cvtepu8_epi32(__A),
4144 (__v4si)__W);
4145 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004146
Logan Chien55afb0a2018-10-15 10:42:14 +08004147 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4148 _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
4149 {
4150 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4151 (__v4si)_mm_cvtepu8_epi32(__A),
4152 (__v4si)_mm_setzero_si128());
4153 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004154
Logan Chien55afb0a2018-10-15 10:42:14 +08004155 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4156 _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4157 {
4158 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4159 (__v8si)_mm256_cvtepu8_epi32(__A),
4160 (__v8si)__W);
4161 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004162
Logan Chien55afb0a2018-10-15 10:42:14 +08004163 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4164 _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
4165 {
4166 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4167 (__v8si)_mm256_cvtepu8_epi32(__A),
4168 (__v8si)_mm256_setzero_si256());
4169 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004170
Logan Chien55afb0a2018-10-15 10:42:14 +08004171 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4172 _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4173 {
4174 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4175 (__v2di)_mm_cvtepu8_epi64(__A),
4176 (__v2di)__W);
4177 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004178
Logan Chien55afb0a2018-10-15 10:42:14 +08004179 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4180 _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
4181 {
4182 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4183 (__v2di)_mm_cvtepu8_epi64(__A),
4184 (__v2di)_mm_setzero_si128());
4185 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004186
Logan Chien55afb0a2018-10-15 10:42:14 +08004187 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4188 _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4189 {
4190 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4191 (__v4di)_mm256_cvtepu8_epi64(__A),
4192 (__v4di)__W);
4193 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004194
Logan Chien55afb0a2018-10-15 10:42:14 +08004195 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4196 _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
4197 {
4198 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4199 (__v4di)_mm256_cvtepu8_epi64(__A),
4200 (__v4di)_mm256_setzero_si256());
4201 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004202
Logan Chien55afb0a2018-10-15 10:42:14 +08004203 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4204 _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
4205 {
4206 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4207 (__v2di)_mm_cvtepu32_epi64(__X),
4208 (__v2di)__W);
4209 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004210
Logan Chien55afb0a2018-10-15 10:42:14 +08004211 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4212 _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
4213 {
4214 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4215 (__v2di)_mm_cvtepu32_epi64(__X),
4216 (__v2di)_mm_setzero_si128());
4217 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004218
Logan Chien55afb0a2018-10-15 10:42:14 +08004219 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4220 _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
4221 {
4222 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4223 (__v4di)_mm256_cvtepu32_epi64(__X),
4224 (__v4di)__W);
4225 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004226
Logan Chien55afb0a2018-10-15 10:42:14 +08004227 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4228 _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
4229 {
4230 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4231 (__v4di)_mm256_cvtepu32_epi64(__X),
4232 (__v4di)_mm256_setzero_si256());
4233 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004234
Logan Chien55afb0a2018-10-15 10:42:14 +08004235 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4236 _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4237 {
4238 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4239 (__v4si)_mm_cvtepu16_epi32(__A),
4240 (__v4si)__W);
4241 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004242
Logan Chien55afb0a2018-10-15 10:42:14 +08004243 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4244 _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
4245 {
4246 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4247 (__v4si)_mm_cvtepu16_epi32(__A),
4248 (__v4si)_mm_setzero_si128());
4249 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004250
Logan Chien55afb0a2018-10-15 10:42:14 +08004251 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4252 _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4253 {
4254 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4255 (__v8si)_mm256_cvtepu16_epi32(__A),
4256 (__v8si)__W);
4257 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004258
Logan Chien55afb0a2018-10-15 10:42:14 +08004259 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4260 _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
4261 {
4262 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4263 (__v8si)_mm256_cvtepu16_epi32(__A),
4264 (__v8si)_mm256_setzero_si256());
4265 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004266
Logan Chien55afb0a2018-10-15 10:42:14 +08004267 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4268 _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4269 {
4270 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4271 (__v2di)_mm_cvtepu16_epi64(__A),
4272 (__v2di)__W);
4273 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004274
Logan Chien55afb0a2018-10-15 10:42:14 +08004275 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4276 _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
4277 {
4278 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4279 (__v2di)_mm_cvtepu16_epi64(__A),
4280 (__v2di)_mm_setzero_si128());
4281 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004282
Logan Chien55afb0a2018-10-15 10:42:14 +08004283 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4284 _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4285 {
4286 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4287 (__v4di)_mm256_cvtepu16_epi64(__A),
4288 (__v4di)__W);
4289 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004290
Logan Chien55afb0a2018-10-15 10:42:14 +08004291 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4292 _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
4293 {
4294 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4295 (__v4di)_mm256_cvtepu16_epi64(__A),
4296 (__v4di)_mm256_setzero_si256());
4297 }
Logan Chien2833ffb2018-10-09 10:03:24 +08004298
4299
Logan Chien55afb0a2018-10-15 10:42:14 +08004300#define _mm_rol_epi32(a, b) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004301 ((__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b)))
Logan Chien2833ffb2018-10-09 10:03:24 +08004302
Logan Chien55afb0a2018-10-15 10:42:14 +08004303#define _mm_mask_rol_epi32(w, u, a, b) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004304 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4305 (__v4si)_mm_rol_epi32((a), (b)), \
4306 (__v4si)(__m128i)(w)))
Logan Chien2833ffb2018-10-09 10:03:24 +08004307
Logan Chien55afb0a2018-10-15 10:42:14 +08004308#define _mm_maskz_rol_epi32(u, a, b) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004309 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4310 (__v4si)_mm_rol_epi32((a), (b)), \
4311 (__v4si)_mm_setzero_si128()))
Logan Chien2833ffb2018-10-09 10:03:24 +08004312
Logan Chien55afb0a2018-10-15 10:42:14 +08004313#define _mm256_rol_epi32(a, b) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004314 ((__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b)))
Logan Chien2833ffb2018-10-09 10:03:24 +08004315
Logan Chien55afb0a2018-10-15 10:42:14 +08004316#define _mm256_mask_rol_epi32(w, u, a, b) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004317 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4318 (__v8si)_mm256_rol_epi32((a), (b)), \
4319 (__v8si)(__m256i)(w)))
Logan Chien2833ffb2018-10-09 10:03:24 +08004320
Logan Chien55afb0a2018-10-15 10:42:14 +08004321#define _mm256_maskz_rol_epi32(u, a, b) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004322 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4323 (__v8si)_mm256_rol_epi32((a), (b)), \
4324 (__v8si)_mm256_setzero_si256()))
Logan Chien2833ffb2018-10-09 10:03:24 +08004325
Logan Chien55afb0a2018-10-15 10:42:14 +08004326#define _mm_rol_epi64(a, b) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004327 ((__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b)))
Logan Chien2833ffb2018-10-09 10:03:24 +08004328
Logan Chien55afb0a2018-10-15 10:42:14 +08004329#define _mm_mask_rol_epi64(w, u, a, b) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004330 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4331 (__v2di)_mm_rol_epi64((a), (b)), \
4332 (__v2di)(__m128i)(w)))
Logan Chien2833ffb2018-10-09 10:03:24 +08004333
Logan Chien55afb0a2018-10-15 10:42:14 +08004334#define _mm_maskz_rol_epi64(u, a, b) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004335 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4336 (__v2di)_mm_rol_epi64((a), (b)), \
4337 (__v2di)_mm_setzero_si128()))
Logan Chien2833ffb2018-10-09 10:03:24 +08004338
Logan Chien55afb0a2018-10-15 10:42:14 +08004339#define _mm256_rol_epi64(a, b) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004340 ((__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b)))
Logan Chien2833ffb2018-10-09 10:03:24 +08004341
Logan Chien55afb0a2018-10-15 10:42:14 +08004342#define _mm256_mask_rol_epi64(w, u, a, b) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004343 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4344 (__v4di)_mm256_rol_epi64((a), (b)), \
4345 (__v4di)(__m256i)(w)))
Logan Chien2833ffb2018-10-09 10:03:24 +08004346
Logan Chien55afb0a2018-10-15 10:42:14 +08004347#define _mm256_maskz_rol_epi64(u, a, b) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004348 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4349 (__v4di)_mm256_rol_epi64((a), (b)), \
4350 (__v4di)_mm256_setzero_si256()))
Logan Chien2833ffb2018-10-09 10:03:24 +08004351
Logan Chien55afb0a2018-10-15 10:42:14 +08004352static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08004353_mm_rolv_epi32 (__m128i __A, __m128i __B)
4354{
Logan Chien55afb0a2018-10-15 10:42:14 +08004355 return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08004356}
4357
Logan Chien55afb0a2018-10-15 10:42:14 +08004358static __inline__ __m128i __DEFAULT_FN_ATTRS128
4359_mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004360{
Logan Chien55afb0a2018-10-15 10:42:14 +08004361 return (__m128i)__builtin_ia32_selectd_128(__U,
4362 (__v4si)_mm_rolv_epi32(__A, __B),
4363 (__v4si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004364}
4365
Logan Chien55afb0a2018-10-15 10:42:14 +08004366static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08004367_mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4368{
Logan Chien55afb0a2018-10-15 10:42:14 +08004369 return (__m128i)__builtin_ia32_selectd_128(__U,
4370 (__v4si)_mm_rolv_epi32(__A, __B),
4371 (__v4si)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08004372}
4373
Logan Chien55afb0a2018-10-15 10:42:14 +08004374static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08004375_mm256_rolv_epi32 (__m256i __A, __m256i __B)
4376{
Logan Chien55afb0a2018-10-15 10:42:14 +08004377 return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08004378}
4379
Logan Chien55afb0a2018-10-15 10:42:14 +08004380static __inline__ __m256i __DEFAULT_FN_ATTRS256
4381_mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004382{
Logan Chien55afb0a2018-10-15 10:42:14 +08004383 return (__m256i)__builtin_ia32_selectd_256(__U,
4384 (__v8si)_mm256_rolv_epi32(__A, __B),
4385 (__v8si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004386}
4387
Logan Chien55afb0a2018-10-15 10:42:14 +08004388static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08004389_mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4390{
Logan Chien55afb0a2018-10-15 10:42:14 +08004391 return (__m256i)__builtin_ia32_selectd_256(__U,
4392 (__v8si)_mm256_rolv_epi32(__A, __B),
4393 (__v8si)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08004394}
4395
Logan Chien55afb0a2018-10-15 10:42:14 +08004396static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08004397_mm_rolv_epi64 (__m128i __A, __m128i __B)
4398{
Logan Chien55afb0a2018-10-15 10:42:14 +08004399 return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08004400}
4401
Logan Chien55afb0a2018-10-15 10:42:14 +08004402static __inline__ __m128i __DEFAULT_FN_ATTRS128
4403_mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004404{
Logan Chien55afb0a2018-10-15 10:42:14 +08004405 return (__m128i)__builtin_ia32_selectq_128(__U,
4406 (__v2di)_mm_rolv_epi64(__A, __B),
4407 (__v2di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004408}
4409
Logan Chien55afb0a2018-10-15 10:42:14 +08004410static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08004411_mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4412{
Logan Chien55afb0a2018-10-15 10:42:14 +08004413 return (__m128i)__builtin_ia32_selectq_128(__U,
4414 (__v2di)_mm_rolv_epi64(__A, __B),
4415 (__v2di)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08004416}
4417
Logan Chien55afb0a2018-10-15 10:42:14 +08004418static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08004419_mm256_rolv_epi64 (__m256i __A, __m256i __B)
4420{
Logan Chien55afb0a2018-10-15 10:42:14 +08004421 return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08004422}
4423
Logan Chien55afb0a2018-10-15 10:42:14 +08004424static __inline__ __m256i __DEFAULT_FN_ATTRS256
4425_mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004426{
Logan Chien55afb0a2018-10-15 10:42:14 +08004427 return (__m256i)__builtin_ia32_selectq_256(__U,
4428 (__v4di)_mm256_rolv_epi64(__A, __B),
4429 (__v4di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004430}
4431
Logan Chien55afb0a2018-10-15 10:42:14 +08004432static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08004433_mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4434{
Logan Chien55afb0a2018-10-15 10:42:14 +08004435 return (__m256i)__builtin_ia32_selectq_256(__U,
4436 (__v4di)_mm256_rolv_epi64(__A, __B),
4437 (__v4di)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08004438}
4439
Logan Chien55afb0a2018-10-15 10:42:14 +08004440#define _mm_ror_epi32(a, b) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004441 ((__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b)))
Logan Chien2833ffb2018-10-09 10:03:24 +08004442
Logan Chien55afb0a2018-10-15 10:42:14 +08004443#define _mm_mask_ror_epi32(w, u, a, b) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004444 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4445 (__v4si)_mm_ror_epi32((a), (b)), \
4446 (__v4si)(__m128i)(w)))
Logan Chien2833ffb2018-10-09 10:03:24 +08004447
Logan Chien55afb0a2018-10-15 10:42:14 +08004448#define _mm_maskz_ror_epi32(u, a, b) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004449 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4450 (__v4si)_mm_ror_epi32((a), (b)), \
4451 (__v4si)_mm_setzero_si128()))
Logan Chien2833ffb2018-10-09 10:03:24 +08004452
Logan Chien55afb0a2018-10-15 10:42:14 +08004453#define _mm256_ror_epi32(a, b) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004454 ((__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b)))
Logan Chien2833ffb2018-10-09 10:03:24 +08004455
Logan Chien55afb0a2018-10-15 10:42:14 +08004456#define _mm256_mask_ror_epi32(w, u, a, b) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004457 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4458 (__v8si)_mm256_ror_epi32((a), (b)), \
4459 (__v8si)(__m256i)(w)))
Logan Chien2833ffb2018-10-09 10:03:24 +08004460
Logan Chien55afb0a2018-10-15 10:42:14 +08004461#define _mm256_maskz_ror_epi32(u, a, b) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004462 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4463 (__v8si)_mm256_ror_epi32((a), (b)), \
4464 (__v8si)_mm256_setzero_si256()))
Logan Chien2833ffb2018-10-09 10:03:24 +08004465
Logan Chien55afb0a2018-10-15 10:42:14 +08004466#define _mm_ror_epi64(a, b) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004467 ((__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b)))
Logan Chien2833ffb2018-10-09 10:03:24 +08004468
Logan Chien55afb0a2018-10-15 10:42:14 +08004469#define _mm_mask_ror_epi64(w, u, a, b) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004470 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4471 (__v2di)_mm_ror_epi64((a), (b)), \
4472 (__v2di)(__m128i)(w)))
Logan Chien2833ffb2018-10-09 10:03:24 +08004473
Logan Chien55afb0a2018-10-15 10:42:14 +08004474#define _mm_maskz_ror_epi64(u, a, b) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004475 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4476 (__v2di)_mm_ror_epi64((a), (b)), \
4477 (__v2di)_mm_setzero_si128()))
Logan Chien2833ffb2018-10-09 10:03:24 +08004478
Logan Chien55afb0a2018-10-15 10:42:14 +08004479#define _mm256_ror_epi64(a, b) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004480 ((__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b)))
Logan Chien2833ffb2018-10-09 10:03:24 +08004481
Logan Chien55afb0a2018-10-15 10:42:14 +08004482#define _mm256_mask_ror_epi64(w, u, a, b) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004483 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4484 (__v4di)_mm256_ror_epi64((a), (b)), \
4485 (__v4di)(__m256i)(w)))
Logan Chien2833ffb2018-10-09 10:03:24 +08004486
Logan Chien55afb0a2018-10-15 10:42:14 +08004487#define _mm256_maskz_ror_epi64(u, a, b) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08004488 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4489 (__v4di)_mm256_ror_epi64((a), (b)), \
4490 (__v4di)_mm256_setzero_si256()))
Logan Chien2833ffb2018-10-09 10:03:24 +08004491
Logan Chien55afb0a2018-10-15 10:42:14 +08004492static __inline__ __m128i __DEFAULT_FN_ATTRS128
4493_mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004494{
Logan Chien55afb0a2018-10-15 10:42:14 +08004495 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4496 (__v4si)_mm_sll_epi32(__A, __B),
4497 (__v4si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004498}
4499
Logan Chien55afb0a2018-10-15 10:42:14 +08004500static __inline__ __m128i __DEFAULT_FN_ATTRS128
4501_mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004502{
Logan Chien55afb0a2018-10-15 10:42:14 +08004503 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4504 (__v4si)_mm_sll_epi32(__A, __B),
4505 (__v4si)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08004506}
4507
Logan Chien55afb0a2018-10-15 10:42:14 +08004508static __inline__ __m256i __DEFAULT_FN_ATTRS256
4509_mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004510{
Logan Chien55afb0a2018-10-15 10:42:14 +08004511 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4512 (__v8si)_mm256_sll_epi32(__A, __B),
4513 (__v8si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004514}
4515
Logan Chien55afb0a2018-10-15 10:42:14 +08004516static __inline__ __m256i __DEFAULT_FN_ATTRS256
4517_mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004518{
Logan Chien55afb0a2018-10-15 10:42:14 +08004519 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4520 (__v8si)_mm256_sll_epi32(__A, __B),
4521 (__v8si)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08004522}
4523
Logan Chien55afb0a2018-10-15 10:42:14 +08004524static __inline__ __m128i __DEFAULT_FN_ATTRS128
Sasha Smundak0fc590b2020-10-07 08:11:59 -07004525_mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004526{
Logan Chien55afb0a2018-10-15 10:42:14 +08004527 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4528 (__v4si)_mm_slli_epi32(__A, __B),
4529 (__v4si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004530}
4531
Logan Chien55afb0a2018-10-15 10:42:14 +08004532static __inline__ __m128i __DEFAULT_FN_ATTRS128
Sasha Smundak0fc590b2020-10-07 08:11:59 -07004533_mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004534{
Logan Chien55afb0a2018-10-15 10:42:14 +08004535 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4536 (__v4si)_mm_slli_epi32(__A, __B),
4537 (__v4si)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08004538}
4539
Logan Chien55afb0a2018-10-15 10:42:14 +08004540static __inline__ __m256i __DEFAULT_FN_ATTRS256
Sasha Smundak0fc590b2020-10-07 08:11:59 -07004541_mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004542{
Logan Chien55afb0a2018-10-15 10:42:14 +08004543 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4544 (__v8si)_mm256_slli_epi32(__A, __B),
4545 (__v8si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004546}
4547
Logan Chien55afb0a2018-10-15 10:42:14 +08004548static __inline__ __m256i __DEFAULT_FN_ATTRS256
Sasha Smundak0fc590b2020-10-07 08:11:59 -07004549_mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004550{
Logan Chien55afb0a2018-10-15 10:42:14 +08004551 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4552 (__v8si)_mm256_slli_epi32(__A, __B),
4553 (__v8si)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08004554}
4555
Logan Chien55afb0a2018-10-15 10:42:14 +08004556static __inline__ __m128i __DEFAULT_FN_ATTRS128
4557_mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4558{
4559 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4560 (__v2di)_mm_sll_epi64(__A, __B),
4561 (__v2di)__W);
4562}
Logan Chien2833ffb2018-10-09 10:03:24 +08004563
Logan Chien55afb0a2018-10-15 10:42:14 +08004564static __inline__ __m128i __DEFAULT_FN_ATTRS128
4565_mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
4566{
4567 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4568 (__v2di)_mm_sll_epi64(__A, __B),
4569 (__v2di)_mm_setzero_si128());
4570}
Logan Chien2833ffb2018-10-09 10:03:24 +08004571
Logan Chien55afb0a2018-10-15 10:42:14 +08004572static __inline__ __m256i __DEFAULT_FN_ATTRS256
4573_mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4574{
4575 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4576 (__v4di)_mm256_sll_epi64(__A, __B),
4577 (__v4di)__W);
4578}
Logan Chien2833ffb2018-10-09 10:03:24 +08004579
Logan Chien55afb0a2018-10-15 10:42:14 +08004580static __inline__ __m256i __DEFAULT_FN_ATTRS256
4581_mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
4582{
4583 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4584 (__v4di)_mm256_sll_epi64(__A, __B),
4585 (__v4di)_mm256_setzero_si256());
4586}
Logan Chien2833ffb2018-10-09 10:03:24 +08004587
Logan Chien55afb0a2018-10-15 10:42:14 +08004588static __inline__ __m128i __DEFAULT_FN_ATTRS128
Sasha Smundak0fc590b2020-10-07 08:11:59 -07004589_mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
Logan Chien55afb0a2018-10-15 10:42:14 +08004590{
4591 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4592 (__v2di)_mm_slli_epi64(__A, __B),
4593 (__v2di)__W);
4594}
Logan Chien2833ffb2018-10-09 10:03:24 +08004595
Logan Chien55afb0a2018-10-15 10:42:14 +08004596static __inline__ __m128i __DEFAULT_FN_ATTRS128
Sasha Smundak0fc590b2020-10-07 08:11:59 -07004597_mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
Logan Chien55afb0a2018-10-15 10:42:14 +08004598{
4599 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4600 (__v2di)_mm_slli_epi64(__A, __B),
4601 (__v2di)_mm_setzero_si128());
4602}
4603
4604static __inline__ __m256i __DEFAULT_FN_ATTRS256
Sasha Smundak0fc590b2020-10-07 08:11:59 -07004605_mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
Logan Chien55afb0a2018-10-15 10:42:14 +08004606{
4607 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4608 (__v4di)_mm256_slli_epi64(__A, __B),
4609 (__v4di)__W);
4610}
4611
4612static __inline__ __m256i __DEFAULT_FN_ATTRS256
Sasha Smundak0fc590b2020-10-07 08:11:59 -07004613_mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
Logan Chien55afb0a2018-10-15 10:42:14 +08004614{
4615 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4616 (__v4di)_mm256_slli_epi64(__A, __B),
4617 (__v4di)_mm256_setzero_si256());
4618}
4619
4620static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08004621_mm_rorv_epi32 (__m128i __A, __m128i __B)
4622{
Logan Chien55afb0a2018-10-15 10:42:14 +08004623 return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08004624}
4625
Logan Chien55afb0a2018-10-15 10:42:14 +08004626static __inline__ __m128i __DEFAULT_FN_ATTRS128
4627_mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004628{
Logan Chien55afb0a2018-10-15 10:42:14 +08004629 return (__m128i)__builtin_ia32_selectd_128(__U,
4630 (__v4si)_mm_rorv_epi32(__A, __B),
4631 (__v4si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004632}
4633
Logan Chien55afb0a2018-10-15 10:42:14 +08004634static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08004635_mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4636{
Logan Chien55afb0a2018-10-15 10:42:14 +08004637 return (__m128i)__builtin_ia32_selectd_128(__U,
4638 (__v4si)_mm_rorv_epi32(__A, __B),
4639 (__v4si)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08004640}
4641
Logan Chien55afb0a2018-10-15 10:42:14 +08004642static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08004643_mm256_rorv_epi32 (__m256i __A, __m256i __B)
4644{
Logan Chien55afb0a2018-10-15 10:42:14 +08004645 return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08004646}
4647
Logan Chien55afb0a2018-10-15 10:42:14 +08004648static __inline__ __m256i __DEFAULT_FN_ATTRS256
4649_mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004650{
Logan Chien55afb0a2018-10-15 10:42:14 +08004651 return (__m256i)__builtin_ia32_selectd_256(__U,
4652 (__v8si)_mm256_rorv_epi32(__A, __B),
4653 (__v8si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004654}
4655
Logan Chien55afb0a2018-10-15 10:42:14 +08004656static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08004657_mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4658{
Logan Chien55afb0a2018-10-15 10:42:14 +08004659 return (__m256i)__builtin_ia32_selectd_256(__U,
4660 (__v8si)_mm256_rorv_epi32(__A, __B),
4661 (__v8si)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08004662}
4663
Logan Chien55afb0a2018-10-15 10:42:14 +08004664static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08004665_mm_rorv_epi64 (__m128i __A, __m128i __B)
4666{
Logan Chien55afb0a2018-10-15 10:42:14 +08004667 return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08004668}
4669
Logan Chien55afb0a2018-10-15 10:42:14 +08004670static __inline__ __m128i __DEFAULT_FN_ATTRS128
4671_mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004672{
Logan Chien55afb0a2018-10-15 10:42:14 +08004673 return (__m128i)__builtin_ia32_selectq_128(__U,
4674 (__v2di)_mm_rorv_epi64(__A, __B),
4675 (__v2di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004676}
4677
Logan Chien55afb0a2018-10-15 10:42:14 +08004678static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08004679_mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4680{
Logan Chien55afb0a2018-10-15 10:42:14 +08004681 return (__m128i)__builtin_ia32_selectq_128(__U,
4682 (__v2di)_mm_rorv_epi64(__A, __B),
4683 (__v2di)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08004684}
4685
Logan Chien55afb0a2018-10-15 10:42:14 +08004686static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08004687_mm256_rorv_epi64 (__m256i __A, __m256i __B)
4688{
Logan Chien55afb0a2018-10-15 10:42:14 +08004689 return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08004690}
4691
Logan Chien55afb0a2018-10-15 10:42:14 +08004692static __inline__ __m256i __DEFAULT_FN_ATTRS256
4693_mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004694{
Logan Chien55afb0a2018-10-15 10:42:14 +08004695 return (__m256i)__builtin_ia32_selectq_256(__U,
4696 (__v4di)_mm256_rorv_epi64(__A, __B),
4697 (__v4di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004698}
4699
Logan Chien55afb0a2018-10-15 10:42:14 +08004700static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08004701_mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4702{
Logan Chien55afb0a2018-10-15 10:42:14 +08004703 return (__m256i)__builtin_ia32_selectq_256(__U,
4704 (__v4di)_mm256_rorv_epi64(__A, __B),
4705 (__v4di)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08004706}
4707
Logan Chien55afb0a2018-10-15 10:42:14 +08004708static __inline__ __m128i __DEFAULT_FN_ATTRS128
4709_mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08004710{
Logan Chien55afb0a2018-10-15 10:42:14 +08004711 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4712 (__v2di)_mm_sllv_epi64(__X, __Y),
4713 (__v2di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004714}
4715
Logan Chien55afb0a2018-10-15 10:42:14 +08004716static __inline__ __m128i __DEFAULT_FN_ATTRS128
4717_mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08004718{
Logan Chien55afb0a2018-10-15 10:42:14 +08004719 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4720 (__v2di)_mm_sllv_epi64(__X, __Y),
4721 (__v2di)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08004722}
4723
Logan Chien55afb0a2018-10-15 10:42:14 +08004724static __inline__ __m256i __DEFAULT_FN_ATTRS256
4725_mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08004726{
Logan Chien55afb0a2018-10-15 10:42:14 +08004727 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4728 (__v4di)_mm256_sllv_epi64(__X, __Y),
4729 (__v4di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004730}
4731
Logan Chien55afb0a2018-10-15 10:42:14 +08004732static __inline__ __m256i __DEFAULT_FN_ATTRS256
4733_mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08004734{
Logan Chien55afb0a2018-10-15 10:42:14 +08004735 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4736 (__v4di)_mm256_sllv_epi64(__X, __Y),
4737 (__v4di)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08004738}
4739
Logan Chien55afb0a2018-10-15 10:42:14 +08004740static __inline__ __m128i __DEFAULT_FN_ATTRS128
4741_mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08004742{
Logan Chien55afb0a2018-10-15 10:42:14 +08004743 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4744 (__v4si)_mm_sllv_epi32(__X, __Y),
4745 (__v4si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004746}
4747
Logan Chien55afb0a2018-10-15 10:42:14 +08004748static __inline__ __m128i __DEFAULT_FN_ATTRS128
4749_mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08004750{
Logan Chien55afb0a2018-10-15 10:42:14 +08004751 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4752 (__v4si)_mm_sllv_epi32(__X, __Y),
4753 (__v4si)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08004754}
4755
Logan Chien55afb0a2018-10-15 10:42:14 +08004756static __inline__ __m256i __DEFAULT_FN_ATTRS256
4757_mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08004758{
Logan Chien55afb0a2018-10-15 10:42:14 +08004759 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4760 (__v8si)_mm256_sllv_epi32(__X, __Y),
4761 (__v8si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004762}
4763
Logan Chien55afb0a2018-10-15 10:42:14 +08004764static __inline__ __m256i __DEFAULT_FN_ATTRS256
4765_mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08004766{
Logan Chien55afb0a2018-10-15 10:42:14 +08004767 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4768 (__v8si)_mm256_sllv_epi32(__X, __Y),
4769 (__v8si)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08004770}
4771
Logan Chien55afb0a2018-10-15 10:42:14 +08004772static __inline__ __m128i __DEFAULT_FN_ATTRS128
4773_mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08004774{
Logan Chien55afb0a2018-10-15 10:42:14 +08004775 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4776 (__v2di)_mm_srlv_epi64(__X, __Y),
4777 (__v2di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004778}
4779
Logan Chien55afb0a2018-10-15 10:42:14 +08004780static __inline__ __m128i __DEFAULT_FN_ATTRS128
4781_mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08004782{
Logan Chien55afb0a2018-10-15 10:42:14 +08004783 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4784 (__v2di)_mm_srlv_epi64(__X, __Y),
4785 (__v2di)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08004786}
4787
Logan Chien55afb0a2018-10-15 10:42:14 +08004788static __inline__ __m256i __DEFAULT_FN_ATTRS256
4789_mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08004790{
Logan Chien55afb0a2018-10-15 10:42:14 +08004791 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4792 (__v4di)_mm256_srlv_epi64(__X, __Y),
4793 (__v4di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004794}
4795
Logan Chien55afb0a2018-10-15 10:42:14 +08004796static __inline__ __m256i __DEFAULT_FN_ATTRS256
4797_mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08004798{
Logan Chien55afb0a2018-10-15 10:42:14 +08004799 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4800 (__v4di)_mm256_srlv_epi64(__X, __Y),
4801 (__v4di)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08004802}
4803
Logan Chien55afb0a2018-10-15 10:42:14 +08004804static __inline__ __m128i __DEFAULT_FN_ATTRS128
4805_mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08004806{
Logan Chien55afb0a2018-10-15 10:42:14 +08004807 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4808 (__v4si)_mm_srlv_epi32(__X, __Y),
4809 (__v4si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004810}
4811
Logan Chien55afb0a2018-10-15 10:42:14 +08004812static __inline__ __m128i __DEFAULT_FN_ATTRS128
4813_mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08004814{
Logan Chien55afb0a2018-10-15 10:42:14 +08004815 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4816 (__v4si)_mm_srlv_epi32(__X, __Y),
4817 (__v4si)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08004818}
4819
Logan Chien55afb0a2018-10-15 10:42:14 +08004820static __inline__ __m256i __DEFAULT_FN_ATTRS256
4821_mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08004822{
Logan Chien55afb0a2018-10-15 10:42:14 +08004823 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4824 (__v8si)_mm256_srlv_epi32(__X, __Y),
4825 (__v8si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004826}
4827
Logan Chien55afb0a2018-10-15 10:42:14 +08004828static __inline__ __m256i __DEFAULT_FN_ATTRS256
4829_mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08004830{
Logan Chien55afb0a2018-10-15 10:42:14 +08004831 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4832 (__v8si)_mm256_srlv_epi32(__X, __Y),
4833 (__v8si)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08004834}
4835
Logan Chien55afb0a2018-10-15 10:42:14 +08004836static __inline__ __m128i __DEFAULT_FN_ATTRS128
4837_mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004838{
Logan Chien55afb0a2018-10-15 10:42:14 +08004839 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4840 (__v4si)_mm_srl_epi32(__A, __B),
4841 (__v4si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004842}
4843
Logan Chien55afb0a2018-10-15 10:42:14 +08004844static __inline__ __m128i __DEFAULT_FN_ATTRS128
4845_mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004846{
Logan Chien55afb0a2018-10-15 10:42:14 +08004847 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4848 (__v4si)_mm_srl_epi32(__A, __B),
4849 (__v4si)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08004850}
4851
Logan Chien55afb0a2018-10-15 10:42:14 +08004852static __inline__ __m256i __DEFAULT_FN_ATTRS256
4853_mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004854{
Logan Chien55afb0a2018-10-15 10:42:14 +08004855 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4856 (__v8si)_mm256_srl_epi32(__A, __B),
4857 (__v8si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004858}
4859
Logan Chien55afb0a2018-10-15 10:42:14 +08004860static __inline__ __m256i __DEFAULT_FN_ATTRS256
4861_mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004862{
Logan Chien55afb0a2018-10-15 10:42:14 +08004863 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4864 (__v8si)_mm256_srl_epi32(__A, __B),
4865 (__v8si)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08004866}
4867
Logan Chien55afb0a2018-10-15 10:42:14 +08004868static __inline__ __m128i __DEFAULT_FN_ATTRS128
Sasha Smundak0fc590b2020-10-07 08:11:59 -07004869_mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004870{
Logan Chien55afb0a2018-10-15 10:42:14 +08004871 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4872 (__v4si)_mm_srli_epi32(__A, __B),
4873 (__v4si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004874}
4875
Logan Chien55afb0a2018-10-15 10:42:14 +08004876static __inline__ __m128i __DEFAULT_FN_ATTRS128
Sasha Smundak0fc590b2020-10-07 08:11:59 -07004877_mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004878{
Logan Chien55afb0a2018-10-15 10:42:14 +08004879 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4880 (__v4si)_mm_srli_epi32(__A, __B),
4881 (__v4si)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08004882}
4883
Logan Chien55afb0a2018-10-15 10:42:14 +08004884static __inline__ __m256i __DEFAULT_FN_ATTRS256
Sasha Smundak0fc590b2020-10-07 08:11:59 -07004885_mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004886{
Logan Chien55afb0a2018-10-15 10:42:14 +08004887 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4888 (__v8si)_mm256_srli_epi32(__A, __B),
4889 (__v8si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004890}
4891
Logan Chien55afb0a2018-10-15 10:42:14 +08004892static __inline__ __m256i __DEFAULT_FN_ATTRS256
Sasha Smundak0fc590b2020-10-07 08:11:59 -07004893_mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004894{
Logan Chien55afb0a2018-10-15 10:42:14 +08004895 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4896 (__v8si)_mm256_srli_epi32(__A, __B),
4897 (__v8si)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08004898}
4899
Logan Chien55afb0a2018-10-15 10:42:14 +08004900static __inline__ __m128i __DEFAULT_FN_ATTRS128
4901_mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004902{
Logan Chien55afb0a2018-10-15 10:42:14 +08004903 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4904 (__v2di)_mm_srl_epi64(__A, __B),
4905 (__v2di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004906}
4907
Logan Chien55afb0a2018-10-15 10:42:14 +08004908static __inline__ __m128i __DEFAULT_FN_ATTRS128
4909_mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004910{
Logan Chien55afb0a2018-10-15 10:42:14 +08004911 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4912 (__v2di)_mm_srl_epi64(__A, __B),
4913 (__v2di)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08004914}
4915
Logan Chien55afb0a2018-10-15 10:42:14 +08004916static __inline__ __m256i __DEFAULT_FN_ATTRS256
4917_mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004918{
Logan Chien55afb0a2018-10-15 10:42:14 +08004919 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4920 (__v4di)_mm256_srl_epi64(__A, __B),
4921 (__v4di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004922}
4923
Logan Chien55afb0a2018-10-15 10:42:14 +08004924static __inline__ __m256i __DEFAULT_FN_ATTRS256
4925_mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004926{
Logan Chien55afb0a2018-10-15 10:42:14 +08004927 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4928 (__v4di)_mm256_srl_epi64(__A, __B),
4929 (__v4di)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08004930}
4931
Logan Chien55afb0a2018-10-15 10:42:14 +08004932static __inline__ __m128i __DEFAULT_FN_ATTRS128
Sasha Smundak0fc590b2020-10-07 08:11:59 -07004933_mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004934{
Logan Chien55afb0a2018-10-15 10:42:14 +08004935 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4936 (__v2di)_mm_srli_epi64(__A, __B),
4937 (__v2di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004938}
4939
Logan Chien55afb0a2018-10-15 10:42:14 +08004940static __inline__ __m128i __DEFAULT_FN_ATTRS128
Sasha Smundak0fc590b2020-10-07 08:11:59 -07004941_mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004942{
Logan Chien55afb0a2018-10-15 10:42:14 +08004943 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4944 (__v2di)_mm_srli_epi64(__A, __B),
4945 (__v2di)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08004946}
4947
Logan Chien55afb0a2018-10-15 10:42:14 +08004948static __inline__ __m256i __DEFAULT_FN_ATTRS256
Sasha Smundak0fc590b2020-10-07 08:11:59 -07004949_mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004950{
Logan Chien55afb0a2018-10-15 10:42:14 +08004951 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4952 (__v4di)_mm256_srli_epi64(__A, __B),
4953 (__v4di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004954}
4955
Logan Chien55afb0a2018-10-15 10:42:14 +08004956static __inline__ __m256i __DEFAULT_FN_ATTRS256
Sasha Smundak0fc590b2020-10-07 08:11:59 -07004957_mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08004958{
Logan Chien55afb0a2018-10-15 10:42:14 +08004959 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4960 (__v4di)_mm256_srli_epi64(__A, __B),
4961 (__v4di)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08004962}
4963
Logan Chien55afb0a2018-10-15 10:42:14 +08004964static __inline__ __m128i __DEFAULT_FN_ATTRS128
4965_mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08004966{
Logan Chien55afb0a2018-10-15 10:42:14 +08004967 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4968 (__v4si)_mm_srav_epi32(__X, __Y),
4969 (__v4si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08004970}
4971
Logan Chien55afb0a2018-10-15 10:42:14 +08004972static __inline__ __m128i __DEFAULT_FN_ATTRS128
4973_mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4974{
4975 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4976 (__v4si)_mm_srav_epi32(__X, __Y),
4977 (__v4si)_mm_setzero_si128());
4978}
4979
4980static __inline__ __m256i __DEFAULT_FN_ATTRS256
4981_mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4982{
4983 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4984 (__v8si)_mm256_srav_epi32(__X, __Y),
4985 (__v8si)__W);
4986}
4987
4988static __inline__ __m256i __DEFAULT_FN_ATTRS256
4989_mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4990{
4991 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4992 (__v8si)_mm256_srav_epi32(__X, __Y),
4993 (__v8si)_mm256_setzero_si256());
4994}
4995
4996static __inline__ __m128i __DEFAULT_FN_ATTRS128
4997_mm_srav_epi64(__m128i __X, __m128i __Y)
4998{
4999 return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y);
5000}
5001
5002static __inline__ __m128i __DEFAULT_FN_ATTRS128
5003_mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
5004{
5005 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5006 (__v2di)_mm_srav_epi64(__X, __Y),
5007 (__v2di)__W);
5008}
5009
5010static __inline__ __m128i __DEFAULT_FN_ATTRS128
5011_mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
5012{
5013 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5014 (__v2di)_mm_srav_epi64(__X, __Y),
5015 (__v2di)_mm_setzero_si128());
5016}
5017
5018static __inline__ __m256i __DEFAULT_FN_ATTRS256
5019_mm256_srav_epi64(__m256i __X, __m256i __Y)
5020{
5021 return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y);
5022}
5023
5024static __inline__ __m256i __DEFAULT_FN_ATTRS256
5025_mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
5026{
5027 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5028 (__v4di)_mm256_srav_epi64(__X, __Y),
5029 (__v4di)__W);
5030}
5031
5032static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005033_mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
5034{
Logan Chien55afb0a2018-10-15 10:42:14 +08005035 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5036 (__v4di)_mm256_srav_epi64(__X, __Y),
5037 (__v4di)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08005038}
5039
Logan Chien55afb0a2018-10-15 10:42:14 +08005040static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005041_mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
5042{
5043 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
5044 (__v4si) __A,
5045 (__v4si) __W);
5046}
5047
Logan Chien55afb0a2018-10-15 10:42:14 +08005048static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005049_mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
5050{
5051 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
5052 (__v4si) __A,
5053 (__v4si) _mm_setzero_si128 ());
5054}
5055
5056
Logan Chien55afb0a2018-10-15 10:42:14 +08005057static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005058_mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
5059{
5060 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
5061 (__v8si) __A,
5062 (__v8si) __W);
5063}
5064
Logan Chien55afb0a2018-10-15 10:42:14 +08005065static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005066_mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
5067{
5068 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
5069 (__v8si) __A,
5070 (__v8si) _mm256_setzero_si256 ());
5071}
5072
Logan Chien969aea62018-12-05 18:40:57 +08005073static __inline __m128i __DEFAULT_FN_ATTRS128
5074_mm_load_epi32 (void const *__P)
5075{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005076 return *(const __m128i *) __P;
Logan Chien969aea62018-12-05 18:40:57 +08005077}
5078
Logan Chien55afb0a2018-10-15 10:42:14 +08005079static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005080_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
5081{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005082 return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005083 (__v4si) __W,
5084 (__mmask8)
5085 __U);
5086}
5087
Logan Chien55afb0a2018-10-15 10:42:14 +08005088static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005089_mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
5090{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005091 return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005092 (__v4si)
5093 _mm_setzero_si128 (),
5094 (__mmask8)
5095 __U);
5096}
5097
Logan Chien969aea62018-12-05 18:40:57 +08005098static __inline __m256i __DEFAULT_FN_ATTRS256
5099_mm256_load_epi32 (void const *__P)
5100{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005101 return *(const __m256i *) __P;
Logan Chien969aea62018-12-05 18:40:57 +08005102}
5103
Logan Chien55afb0a2018-10-15 10:42:14 +08005104static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005105_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5106{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005107 return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005108 (__v8si) __W,
5109 (__mmask8)
5110 __U);
5111}
5112
Logan Chien55afb0a2018-10-15 10:42:14 +08005113static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005114_mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
5115{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005116 return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005117 (__v8si)
5118 _mm256_setzero_si256 (),
5119 (__mmask8)
5120 __U);
5121}
5122
Logan Chien969aea62018-12-05 18:40:57 +08005123static __inline void __DEFAULT_FN_ATTRS128
5124_mm_store_epi32 (void *__P, __m128i __A)
5125{
5126 *(__m128i *) __P = __A;
5127}
5128
Logan Chien55afb0a2018-10-15 10:42:14 +08005129static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005130_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
5131{
5132 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
5133 (__v4si) __A,
5134 (__mmask8) __U);
5135}
5136
Logan Chien969aea62018-12-05 18:40:57 +08005137static __inline void __DEFAULT_FN_ATTRS256
5138_mm256_store_epi32 (void *__P, __m256i __A)
5139{
5140 *(__m256i *) __P = __A;
5141}
5142
Logan Chien55afb0a2018-10-15 10:42:14 +08005143static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005144_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
5145{
5146 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
5147 (__v8si) __A,
5148 (__mmask8) __U);
5149}
5150
Logan Chien55afb0a2018-10-15 10:42:14 +08005151static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005152_mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
5153{
5154 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
5155 (__v2di) __A,
5156 (__v2di) __W);
5157}
5158
Logan Chien55afb0a2018-10-15 10:42:14 +08005159static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005160_mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
5161{
5162 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
5163 (__v2di) __A,
Logan Chien55afb0a2018-10-15 10:42:14 +08005164 (__v2di) _mm_setzero_si128 ());
Logan Chien2833ffb2018-10-09 10:03:24 +08005165}
5166
Logan Chien55afb0a2018-10-15 10:42:14 +08005167static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005168_mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
5169{
5170 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
5171 (__v4di) __A,
5172 (__v4di) __W);
5173}
5174
Logan Chien55afb0a2018-10-15 10:42:14 +08005175static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005176_mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
5177{
5178 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
5179 (__v4di) __A,
5180 (__v4di) _mm256_setzero_si256 ());
5181}
5182
Logan Chien969aea62018-12-05 18:40:57 +08005183static __inline __m128i __DEFAULT_FN_ATTRS128
5184_mm_load_epi64 (void const *__P)
5185{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005186 return *(const __m128i *) __P;
Logan Chien969aea62018-12-05 18:40:57 +08005187}
5188
Logan Chien55afb0a2018-10-15 10:42:14 +08005189static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005190_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5191{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005192 return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005193 (__v2di) __W,
5194 (__mmask8)
5195 __U);
5196}
5197
Logan Chien55afb0a2018-10-15 10:42:14 +08005198static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005199_mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
5200{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005201 return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005202 (__v2di)
Logan Chien55afb0a2018-10-15 10:42:14 +08005203 _mm_setzero_si128 (),
Logan Chien2833ffb2018-10-09 10:03:24 +08005204 (__mmask8)
5205 __U);
5206}
5207
Logan Chien969aea62018-12-05 18:40:57 +08005208static __inline __m256i __DEFAULT_FN_ATTRS256
5209_mm256_load_epi64 (void const *__P)
5210{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005211 return *(const __m256i *) __P;
Logan Chien969aea62018-12-05 18:40:57 +08005212}
5213
Logan Chien55afb0a2018-10-15 10:42:14 +08005214static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005215_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5216{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005217 return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005218 (__v4di) __W,
5219 (__mmask8)
5220 __U);
5221}
5222
Logan Chien55afb0a2018-10-15 10:42:14 +08005223static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005224_mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
5225{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005226 return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005227 (__v4di)
5228 _mm256_setzero_si256 (),
5229 (__mmask8)
5230 __U);
5231}
5232
Logan Chien969aea62018-12-05 18:40:57 +08005233static __inline void __DEFAULT_FN_ATTRS128
5234_mm_store_epi64 (void *__P, __m128i __A)
5235{
5236 *(__m128i *) __P = __A;
5237}
5238
Logan Chien55afb0a2018-10-15 10:42:14 +08005239static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005240_mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
5241{
5242 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
5243 (__v2di) __A,
5244 (__mmask8) __U);
5245}
5246
Logan Chien969aea62018-12-05 18:40:57 +08005247static __inline void __DEFAULT_FN_ATTRS256
5248_mm256_store_epi64 (void *__P, __m256i __A)
5249{
5250 *(__m256i *) __P = __A;
5251}
5252
Logan Chien55afb0a2018-10-15 10:42:14 +08005253static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005254_mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
5255{
5256 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
5257 (__v4di) __A,
5258 (__mmask8) __U);
5259}
5260
Logan Chien55afb0a2018-10-15 10:42:14 +08005261static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005262_mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5263{
5264 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5265 (__v2df)_mm_movedup_pd(__A),
5266 (__v2df)__W);
5267}
5268
Logan Chien55afb0a2018-10-15 10:42:14 +08005269static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005270_mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
5271{
5272 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5273 (__v2df)_mm_movedup_pd(__A),
5274 (__v2df)_mm_setzero_pd());
5275}
5276
Logan Chien55afb0a2018-10-15 10:42:14 +08005277static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005278_mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5279{
5280 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5281 (__v4df)_mm256_movedup_pd(__A),
5282 (__v4df)__W);
5283}
5284
Logan Chien55afb0a2018-10-15 10:42:14 +08005285static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005286_mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
5287{
5288 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5289 (__v4df)_mm256_movedup_pd(__A),
5290 (__v4df)_mm256_setzero_pd());
5291}
5292
Logan Chien55afb0a2018-10-15 10:42:14 +08005293static __inline__ __m128i __DEFAULT_FN_ATTRS128
5294_mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
5295{
5296 return (__m128i)__builtin_ia32_selectd_128(__M,
5297 (__v4si) _mm_set1_epi32(__A),
5298 (__v4si)__O);
5299}
Logan Chien2833ffb2018-10-09 10:03:24 +08005300
Logan Chien55afb0a2018-10-15 10:42:14 +08005301static __inline__ __m128i __DEFAULT_FN_ATTRS128
5302_mm_maskz_set1_epi32( __mmask8 __M, int __A)
5303{
5304 return (__m128i)__builtin_ia32_selectd_128(__M,
5305 (__v4si) _mm_set1_epi32(__A),
5306 (__v4si)_mm_setzero_si128());
5307}
Logan Chien2833ffb2018-10-09 10:03:24 +08005308
Logan Chien55afb0a2018-10-15 10:42:14 +08005309static __inline__ __m256i __DEFAULT_FN_ATTRS256
5310_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
5311{
5312 return (__m256i)__builtin_ia32_selectd_256(__M,
5313 (__v8si) _mm256_set1_epi32(__A),
5314 (__v8si)__O);
5315}
Logan Chien2833ffb2018-10-09 10:03:24 +08005316
Logan Chien55afb0a2018-10-15 10:42:14 +08005317static __inline__ __m256i __DEFAULT_FN_ATTRS256
5318_mm256_maskz_set1_epi32( __mmask8 __M, int __A)
5319{
5320 return (__m256i)__builtin_ia32_selectd_256(__M,
5321 (__v8si) _mm256_set1_epi32(__A),
5322 (__v8si)_mm256_setzero_si256());
5323}
Logan Chien2833ffb2018-10-09 10:03:24 +08005324
Logan Chien2833ffb2018-10-09 10:03:24 +08005325
Logan Chien55afb0a2018-10-15 10:42:14 +08005326static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005327_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
5328{
Logan Chien55afb0a2018-10-15 10:42:14 +08005329 return (__m128i) __builtin_ia32_selectq_128(__M,
5330 (__v2di) _mm_set1_epi64x(__A),
5331 (__v2di) __O);
Logan Chien2833ffb2018-10-09 10:03:24 +08005332}
5333
Logan Chien55afb0a2018-10-15 10:42:14 +08005334static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005335_mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
5336{
Logan Chien55afb0a2018-10-15 10:42:14 +08005337 return (__m128i) __builtin_ia32_selectq_128(__M,
5338 (__v2di) _mm_set1_epi64x(__A),
5339 (__v2di) _mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08005340}
5341
Logan Chien55afb0a2018-10-15 10:42:14 +08005342static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005343_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
5344{
Logan Chien55afb0a2018-10-15 10:42:14 +08005345 return (__m256i) __builtin_ia32_selectq_256(__M,
5346 (__v4di) _mm256_set1_epi64x(__A),
5347 (__v4di) __O) ;
Logan Chien2833ffb2018-10-09 10:03:24 +08005348}
5349
Logan Chien55afb0a2018-10-15 10:42:14 +08005350static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005351_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
5352{
Logan Chien55afb0a2018-10-15 10:42:14 +08005353 return (__m256i) __builtin_ia32_selectq_256(__M,
5354 (__v4di) _mm256_set1_epi64x(__A),
5355 (__v4di) _mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08005356}
5357
Logan Chien55afb0a2018-10-15 10:42:14 +08005358#define _mm_fixupimm_pd(A, B, C, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005359 ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5360 (__v2df)(__m128d)(B), \
5361 (__v2di)(__m128i)(C), (int)(imm), \
5362 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +08005363
Logan Chien55afb0a2018-10-15 10:42:14 +08005364#define _mm_mask_fixupimm_pd(A, U, B, C, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005365 ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5366 (__v2df)(__m128d)(B), \
5367 (__v2di)(__m128i)(C), (int)(imm), \
5368 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005369
Logan Chien55afb0a2018-10-15 10:42:14 +08005370#define _mm_maskz_fixupimm_pd(U, A, B, C, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005371 ((__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
5372 (__v2df)(__m128d)(B), \
5373 (__v2di)(__m128i)(C), \
5374 (int)(imm), (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005375
Logan Chien55afb0a2018-10-15 10:42:14 +08005376#define _mm256_fixupimm_pd(A, B, C, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005377 ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5378 (__v4df)(__m256d)(B), \
5379 (__v4di)(__m256i)(C), (int)(imm), \
5380 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +08005381
Logan Chien55afb0a2018-10-15 10:42:14 +08005382#define _mm256_mask_fixupimm_pd(A, U, B, C, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005383 ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5384 (__v4df)(__m256d)(B), \
5385 (__v4di)(__m256i)(C), (int)(imm), \
5386 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005387
Logan Chien55afb0a2018-10-15 10:42:14 +08005388#define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005389 ((__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
5390 (__v4df)(__m256d)(B), \
5391 (__v4di)(__m256i)(C), \
5392 (int)(imm), (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005393
Logan Chien55afb0a2018-10-15 10:42:14 +08005394#define _mm_fixupimm_ps(A, B, C, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005395 ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
Logan Chien2833ffb2018-10-09 10:03:24 +08005396 (__v4sf)(__m128)(B), \
5397 (__v4si)(__m128i)(C), (int)(imm), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005398 (__mmask8)-1))
5399
5400#define _mm_mask_fixupimm_ps(A, U, B, C, imm) \
5401 ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5402 (__v4sf)(__m128)(B), \
5403 (__v4si)(__m128i)(C), (int)(imm), \
5404 (__mmask8)(U)))
5405
5406#define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \
5407 ((__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
5408 (__v4sf)(__m128)(B), \
5409 (__v4si)(__m128i)(C), (int)(imm), \
5410 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005411
Logan Chien55afb0a2018-10-15 10:42:14 +08005412#define _mm256_fixupimm_ps(A, B, C, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005413 ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
Logan Chien2833ffb2018-10-09 10:03:24 +08005414 (__v8sf)(__m256)(B), \
5415 (__v8si)(__m256i)(C), (int)(imm), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08005416 (__mmask8)-1))
5417
5418#define _mm256_mask_fixupimm_ps(A, U, B, C, imm) \
5419 ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5420 (__v8sf)(__m256)(B), \
5421 (__v8si)(__m256i)(C), (int)(imm), \
5422 (__mmask8)(U)))
5423
5424#define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \
5425 ((__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
5426 (__v8sf)(__m256)(B), \
5427 (__v8si)(__m256i)(C), (int)(imm), \
5428 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08005429
Logan Chien55afb0a2018-10-15 10:42:14 +08005430static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005431_mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
5432{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005433 return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005434 (__v2df) __W,
5435 (__mmask8) __U);
5436}
5437
Logan Chien55afb0a2018-10-15 10:42:14 +08005438static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005439_mm_maskz_load_pd (__mmask8 __U, void const *__P)
5440{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005441 return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005442 (__v2df)
5443 _mm_setzero_pd (),
5444 (__mmask8) __U);
5445}
5446
Logan Chien55afb0a2018-10-15 10:42:14 +08005447static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005448_mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
5449{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005450 return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005451 (__v4df) __W,
5452 (__mmask8) __U);
5453}
5454
Logan Chien55afb0a2018-10-15 10:42:14 +08005455static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005456_mm256_maskz_load_pd (__mmask8 __U, void const *__P)
5457{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005458 return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005459 (__v4df)
5460 _mm256_setzero_pd (),
5461 (__mmask8) __U);
5462}
5463
Logan Chien55afb0a2018-10-15 10:42:14 +08005464static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005465_mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
5466{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005467 return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005468 (__v4sf) __W,
5469 (__mmask8) __U);
5470}
5471
Logan Chien55afb0a2018-10-15 10:42:14 +08005472static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005473_mm_maskz_load_ps (__mmask8 __U, void const *__P)
5474{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005475 return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005476 (__v4sf)
5477 _mm_setzero_ps (),
5478 (__mmask8) __U);
5479}
5480
Logan Chien55afb0a2018-10-15 10:42:14 +08005481static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005482_mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
5483{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005484 return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005485 (__v8sf) __W,
5486 (__mmask8) __U);
5487}
5488
Logan Chien55afb0a2018-10-15 10:42:14 +08005489static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005490_mm256_maskz_load_ps (__mmask8 __U, void const *__P)
5491{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005492 return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005493 (__v8sf)
5494 _mm256_setzero_ps (),
5495 (__mmask8) __U);
5496}
5497
Logan Chien969aea62018-12-05 18:40:57 +08005498static __inline __m128i __DEFAULT_FN_ATTRS128
5499_mm_loadu_epi64 (void const *__P)
5500{
5501 struct __loadu_epi64 {
Logan Chiendbcf4122019-03-21 10:50:25 +08005502 __m128i_u __v;
Logan Chien969aea62018-12-05 18:40:57 +08005503 } __attribute__((__packed__, __may_alias__));
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005504 return ((const struct __loadu_epi64*)__P)->__v;
Logan Chien969aea62018-12-05 18:40:57 +08005505}
5506
Logan Chien55afb0a2018-10-15 10:42:14 +08005507static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005508_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5509{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005510 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005511 (__v2di) __W,
5512 (__mmask8) __U);
5513}
5514
Logan Chien55afb0a2018-10-15 10:42:14 +08005515static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005516_mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5517{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005518 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005519 (__v2di)
5520 _mm_setzero_si128 (),
5521 (__mmask8) __U);
5522}
5523
Logan Chien969aea62018-12-05 18:40:57 +08005524static __inline __m256i __DEFAULT_FN_ATTRS256
5525_mm256_loadu_epi64 (void const *__P)
5526{
5527 struct __loadu_epi64 {
Logan Chiendbcf4122019-03-21 10:50:25 +08005528 __m256i_u __v;
Logan Chien969aea62018-12-05 18:40:57 +08005529 } __attribute__((__packed__, __may_alias__));
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005530 return ((const struct __loadu_epi64*)__P)->__v;
Logan Chien969aea62018-12-05 18:40:57 +08005531}
5532
Logan Chien55afb0a2018-10-15 10:42:14 +08005533static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005534_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5535{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005536 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005537 (__v4di) __W,
5538 (__mmask8) __U);
5539}
5540
Logan Chien55afb0a2018-10-15 10:42:14 +08005541static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005542_mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5543{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005544 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005545 (__v4di)
5546 _mm256_setzero_si256 (),
5547 (__mmask8) __U);
5548}
5549
Logan Chien969aea62018-12-05 18:40:57 +08005550static __inline __m128i __DEFAULT_FN_ATTRS128
5551_mm_loadu_epi32 (void const *__P)
5552{
5553 struct __loadu_epi32 {
Logan Chiendbcf4122019-03-21 10:50:25 +08005554 __m128i_u __v;
Logan Chien969aea62018-12-05 18:40:57 +08005555 } __attribute__((__packed__, __may_alias__));
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005556 return ((const struct __loadu_epi32*)__P)->__v;
Logan Chien969aea62018-12-05 18:40:57 +08005557}
5558
Logan Chien55afb0a2018-10-15 10:42:14 +08005559static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005560_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
5561{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005562 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005563 (__v4si) __W,
5564 (__mmask8) __U);
5565}
5566
Logan Chien55afb0a2018-10-15 10:42:14 +08005567static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005568_mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
5569{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005570 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005571 (__v4si)
5572 _mm_setzero_si128 (),
5573 (__mmask8) __U);
5574}
5575
Logan Chien969aea62018-12-05 18:40:57 +08005576static __inline __m256i __DEFAULT_FN_ATTRS256
5577_mm256_loadu_epi32 (void const *__P)
5578{
5579 struct __loadu_epi32 {
Logan Chiendbcf4122019-03-21 10:50:25 +08005580 __m256i_u __v;
Logan Chien969aea62018-12-05 18:40:57 +08005581 } __attribute__((__packed__, __may_alias__));
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005582 return ((const struct __loadu_epi32*)__P)->__v;
Logan Chien969aea62018-12-05 18:40:57 +08005583}
5584
Logan Chien55afb0a2018-10-15 10:42:14 +08005585static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005586_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5587{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005588 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005589 (__v8si) __W,
5590 (__mmask8) __U);
5591}
5592
Logan Chien55afb0a2018-10-15 10:42:14 +08005593static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005594_mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
5595{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005596 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005597 (__v8si)
5598 _mm256_setzero_si256 (),
5599 (__mmask8) __U);
5600}
5601
Logan Chien55afb0a2018-10-15 10:42:14 +08005602static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005603_mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
5604{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005605 return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005606 (__v2df) __W,
5607 (__mmask8) __U);
5608}
5609
Logan Chien55afb0a2018-10-15 10:42:14 +08005610static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005611_mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
5612{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005613 return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005614 (__v2df)
5615 _mm_setzero_pd (),
5616 (__mmask8) __U);
5617}
5618
Logan Chien55afb0a2018-10-15 10:42:14 +08005619static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005620_mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
5621{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005622 return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005623 (__v4df) __W,
5624 (__mmask8) __U);
5625}
5626
Logan Chien55afb0a2018-10-15 10:42:14 +08005627static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005628_mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
5629{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005630 return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005631 (__v4df)
5632 _mm256_setzero_pd (),
5633 (__mmask8) __U);
5634}
5635
Logan Chien55afb0a2018-10-15 10:42:14 +08005636static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005637_mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
5638{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005639 return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005640 (__v4sf) __W,
5641 (__mmask8) __U);
5642}
5643
Logan Chien55afb0a2018-10-15 10:42:14 +08005644static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005645_mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
5646{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005647 return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005648 (__v4sf)
5649 _mm_setzero_ps (),
5650 (__mmask8) __U);
5651}
5652
Logan Chien55afb0a2018-10-15 10:42:14 +08005653static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005654_mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
5655{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005656 return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005657 (__v8sf) __W,
5658 (__mmask8) __U);
5659}
5660
Logan Chien55afb0a2018-10-15 10:42:14 +08005661static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005662_mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
5663{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07005664 return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08005665 (__v8sf)
5666 _mm256_setzero_ps (),
5667 (__mmask8) __U);
5668}
5669
Logan Chien55afb0a2018-10-15 10:42:14 +08005670static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005671_mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
5672{
5673 __builtin_ia32_storeapd128_mask ((__v2df *) __P,
5674 (__v2df) __A,
5675 (__mmask8) __U);
5676}
5677
Logan Chien55afb0a2018-10-15 10:42:14 +08005678static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005679_mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
5680{
5681 __builtin_ia32_storeapd256_mask ((__v4df *) __P,
5682 (__v4df) __A,
5683 (__mmask8) __U);
5684}
5685
Logan Chien55afb0a2018-10-15 10:42:14 +08005686static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005687_mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
5688{
5689 __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
5690 (__v4sf) __A,
5691 (__mmask8) __U);
5692}
5693
Logan Chien55afb0a2018-10-15 10:42:14 +08005694static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005695_mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
5696{
5697 __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
5698 (__v8sf) __A,
5699 (__mmask8) __U);
5700}
5701
Logan Chien969aea62018-12-05 18:40:57 +08005702static __inline void __DEFAULT_FN_ATTRS128
5703_mm_storeu_epi64 (void *__P, __m128i __A)
5704{
5705 struct __storeu_epi64 {
Logan Chiendbcf4122019-03-21 10:50:25 +08005706 __m128i_u __v;
Logan Chien969aea62018-12-05 18:40:57 +08005707 } __attribute__((__packed__, __may_alias__));
5708 ((struct __storeu_epi64*)__P)->__v = __A;
5709}
5710
Logan Chien55afb0a2018-10-15 10:42:14 +08005711static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005712_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
5713{
5714 __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
5715 (__v2di) __A,
5716 (__mmask8) __U);
5717}
5718
Logan Chien969aea62018-12-05 18:40:57 +08005719static __inline void __DEFAULT_FN_ATTRS256
5720_mm256_storeu_epi64 (void *__P, __m256i __A)
5721{
5722 struct __storeu_epi64 {
Logan Chiendbcf4122019-03-21 10:50:25 +08005723 __m256i_u __v;
Logan Chien969aea62018-12-05 18:40:57 +08005724 } __attribute__((__packed__, __may_alias__));
5725 ((struct __storeu_epi64*)__P)->__v = __A;
5726}
5727
Logan Chien55afb0a2018-10-15 10:42:14 +08005728static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005729_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
5730{
5731 __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
5732 (__v4di) __A,
5733 (__mmask8) __U);
5734}
5735
Logan Chien969aea62018-12-05 18:40:57 +08005736static __inline void __DEFAULT_FN_ATTRS128
5737_mm_storeu_epi32 (void *__P, __m128i __A)
5738{
5739 struct __storeu_epi32 {
Logan Chiendbcf4122019-03-21 10:50:25 +08005740 __m128i_u __v;
Logan Chien969aea62018-12-05 18:40:57 +08005741 } __attribute__((__packed__, __may_alias__));
5742 ((struct __storeu_epi32*)__P)->__v = __A;
5743}
5744
Logan Chien55afb0a2018-10-15 10:42:14 +08005745static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005746_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
5747{
5748 __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
5749 (__v4si) __A,
5750 (__mmask8) __U);
5751}
5752
Logan Chien969aea62018-12-05 18:40:57 +08005753static __inline void __DEFAULT_FN_ATTRS256
5754_mm256_storeu_epi32 (void *__P, __m256i __A)
5755{
5756 struct __storeu_epi32 {
Logan Chiendbcf4122019-03-21 10:50:25 +08005757 __m256i_u __v;
Logan Chien969aea62018-12-05 18:40:57 +08005758 } __attribute__((__packed__, __may_alias__));
5759 ((struct __storeu_epi32*)__P)->__v = __A;
5760}
5761
Logan Chien55afb0a2018-10-15 10:42:14 +08005762static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005763_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
5764{
5765 __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
5766 (__v8si) __A,
5767 (__mmask8) __U);
5768}
5769
Logan Chien55afb0a2018-10-15 10:42:14 +08005770static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005771_mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
5772{
5773 __builtin_ia32_storeupd128_mask ((__v2df *) __P,
5774 (__v2df) __A,
5775 (__mmask8) __U);
5776}
5777
Logan Chien55afb0a2018-10-15 10:42:14 +08005778static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005779_mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
5780{
5781 __builtin_ia32_storeupd256_mask ((__v4df *) __P,
5782 (__v4df) __A,
5783 (__mmask8) __U);
5784}
5785
Logan Chien55afb0a2018-10-15 10:42:14 +08005786static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005787_mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
5788{
5789 __builtin_ia32_storeups128_mask ((__v4sf *) __P,
5790 (__v4sf) __A,
5791 (__mmask8) __U);
5792}
5793
Logan Chien55afb0a2018-10-15 10:42:14 +08005794static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005795_mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
5796{
5797 __builtin_ia32_storeups256_mask ((__v8sf *) __P,
5798 (__v8sf) __A,
5799 (__mmask8) __U);
5800}
5801
5802
Logan Chien55afb0a2018-10-15 10:42:14 +08005803static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005804_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5805{
5806 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5807 (__v2df)_mm_unpackhi_pd(__A, __B),
5808 (__v2df)__W);
5809}
5810
Logan Chien55afb0a2018-10-15 10:42:14 +08005811static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005812_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
5813{
5814 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5815 (__v2df)_mm_unpackhi_pd(__A, __B),
5816 (__v2df)_mm_setzero_pd());
5817}
5818
Logan Chien55afb0a2018-10-15 10:42:14 +08005819static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005820_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
5821{
5822 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5823 (__v4df)_mm256_unpackhi_pd(__A, __B),
5824 (__v4df)__W);
5825}
5826
Logan Chien55afb0a2018-10-15 10:42:14 +08005827static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005828_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
5829{
5830 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5831 (__v4df)_mm256_unpackhi_pd(__A, __B),
5832 (__v4df)_mm256_setzero_pd());
5833}
5834
Logan Chien55afb0a2018-10-15 10:42:14 +08005835static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005836_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5837{
5838 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5839 (__v4sf)_mm_unpackhi_ps(__A, __B),
5840 (__v4sf)__W);
5841}
5842
Logan Chien55afb0a2018-10-15 10:42:14 +08005843static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005844_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
5845{
5846 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5847 (__v4sf)_mm_unpackhi_ps(__A, __B),
5848 (__v4sf)_mm_setzero_ps());
5849}
5850
Logan Chien55afb0a2018-10-15 10:42:14 +08005851static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005852_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
5853{
5854 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5855 (__v8sf)_mm256_unpackhi_ps(__A, __B),
5856 (__v8sf)__W);
5857}
5858
Logan Chien55afb0a2018-10-15 10:42:14 +08005859static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005860_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
5861{
5862 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5863 (__v8sf)_mm256_unpackhi_ps(__A, __B),
5864 (__v8sf)_mm256_setzero_ps());
5865}
5866
Logan Chien55afb0a2018-10-15 10:42:14 +08005867static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005868_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5869{
5870 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5871 (__v2df)_mm_unpacklo_pd(__A, __B),
5872 (__v2df)__W);
5873}
5874
Logan Chien55afb0a2018-10-15 10:42:14 +08005875static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005876_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
5877{
5878 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5879 (__v2df)_mm_unpacklo_pd(__A, __B),
5880 (__v2df)_mm_setzero_pd());
5881}
5882
Logan Chien55afb0a2018-10-15 10:42:14 +08005883static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005884_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
5885{
5886 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5887 (__v4df)_mm256_unpacklo_pd(__A, __B),
5888 (__v4df)__W);
5889}
5890
Logan Chien55afb0a2018-10-15 10:42:14 +08005891static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005892_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
5893{
5894 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5895 (__v4df)_mm256_unpacklo_pd(__A, __B),
5896 (__v4df)_mm256_setzero_pd());
5897}
5898
Logan Chien55afb0a2018-10-15 10:42:14 +08005899static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005900_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5901{
5902 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5903 (__v4sf)_mm_unpacklo_ps(__A, __B),
5904 (__v4sf)__W);
5905}
5906
Logan Chien55afb0a2018-10-15 10:42:14 +08005907static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005908_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
5909{
5910 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5911 (__v4sf)_mm_unpacklo_ps(__A, __B),
5912 (__v4sf)_mm_setzero_ps());
5913}
5914
Logan Chien55afb0a2018-10-15 10:42:14 +08005915static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005916_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
5917{
5918 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5919 (__v8sf)_mm256_unpacklo_ps(__A, __B),
5920 (__v8sf)__W);
5921}
5922
Logan Chien55afb0a2018-10-15 10:42:14 +08005923static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005924_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
5925{
5926 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5927 (__v8sf)_mm256_unpacklo_ps(__A, __B),
5928 (__v8sf)_mm256_setzero_ps());
5929}
5930
Logan Chien55afb0a2018-10-15 10:42:14 +08005931static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005932_mm_rcp14_pd (__m128d __A)
5933{
5934 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5935 (__v2df)
5936 _mm_setzero_pd (),
5937 (__mmask8) -1);
5938}
5939
Logan Chien55afb0a2018-10-15 10:42:14 +08005940static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005941_mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
5942{
5943 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5944 (__v2df) __W,
5945 (__mmask8) __U);
5946}
5947
Logan Chien55afb0a2018-10-15 10:42:14 +08005948static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005949_mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
5950{
5951 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5952 (__v2df)
5953 _mm_setzero_pd (),
5954 (__mmask8) __U);
5955}
5956
Logan Chien55afb0a2018-10-15 10:42:14 +08005957static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005958_mm256_rcp14_pd (__m256d __A)
5959{
5960 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5961 (__v4df)
5962 _mm256_setzero_pd (),
5963 (__mmask8) -1);
5964}
5965
Logan Chien55afb0a2018-10-15 10:42:14 +08005966static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005967_mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
5968{
5969 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5970 (__v4df) __W,
5971 (__mmask8) __U);
5972}
5973
Logan Chien55afb0a2018-10-15 10:42:14 +08005974static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08005975_mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
5976{
5977 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5978 (__v4df)
5979 _mm256_setzero_pd (),
5980 (__mmask8) __U);
5981}
5982
Logan Chien55afb0a2018-10-15 10:42:14 +08005983static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005984_mm_rcp14_ps (__m128 __A)
5985{
5986 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5987 (__v4sf)
5988 _mm_setzero_ps (),
5989 (__mmask8) -1);
5990}
5991
Logan Chien55afb0a2018-10-15 10:42:14 +08005992static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08005993_mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
5994{
5995 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5996 (__v4sf) __W,
5997 (__mmask8) __U);
5998}
5999
Logan Chien55afb0a2018-10-15 10:42:14 +08006000static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006001_mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
6002{
6003 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6004 (__v4sf)
6005 _mm_setzero_ps (),
6006 (__mmask8) __U);
6007}
6008
Logan Chien55afb0a2018-10-15 10:42:14 +08006009static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006010_mm256_rcp14_ps (__m256 __A)
6011{
6012 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6013 (__v8sf)
6014 _mm256_setzero_ps (),
6015 (__mmask8) -1);
6016}
6017
Logan Chien55afb0a2018-10-15 10:42:14 +08006018static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006019_mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
6020{
6021 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6022 (__v8sf) __W,
6023 (__mmask8) __U);
6024}
6025
Logan Chien55afb0a2018-10-15 10:42:14 +08006026static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006027_mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
6028{
6029 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6030 (__v8sf)
6031 _mm256_setzero_ps (),
6032 (__mmask8) __U);
6033}
6034
Logan Chien55afb0a2018-10-15 10:42:14 +08006035#define _mm_mask_permute_pd(W, U, X, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006036 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6037 (__v2df)_mm_permute_pd((X), (C)), \
6038 (__v2df)(__m128d)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006039
Logan Chien55afb0a2018-10-15 10:42:14 +08006040#define _mm_maskz_permute_pd(U, X, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006041 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6042 (__v2df)_mm_permute_pd((X), (C)), \
6043 (__v2df)_mm_setzero_pd()))
Logan Chien2833ffb2018-10-09 10:03:24 +08006044
Logan Chien55afb0a2018-10-15 10:42:14 +08006045#define _mm256_mask_permute_pd(W, U, X, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006046 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6047 (__v4df)_mm256_permute_pd((X), (C)), \
6048 (__v4df)(__m256d)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006049
Logan Chien55afb0a2018-10-15 10:42:14 +08006050#define _mm256_maskz_permute_pd(U, X, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006051 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6052 (__v4df)_mm256_permute_pd((X), (C)), \
6053 (__v4df)_mm256_setzero_pd()))
Logan Chien2833ffb2018-10-09 10:03:24 +08006054
Logan Chien55afb0a2018-10-15 10:42:14 +08006055#define _mm_mask_permute_ps(W, U, X, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006056 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6057 (__v4sf)_mm_permute_ps((X), (C)), \
6058 (__v4sf)(__m128)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006059
Logan Chien55afb0a2018-10-15 10:42:14 +08006060#define _mm_maskz_permute_ps(U, X, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006061 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6062 (__v4sf)_mm_permute_ps((X), (C)), \
6063 (__v4sf)_mm_setzero_ps()))
Logan Chien2833ffb2018-10-09 10:03:24 +08006064
Logan Chien55afb0a2018-10-15 10:42:14 +08006065#define _mm256_mask_permute_ps(W, U, X, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006066 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6067 (__v8sf)_mm256_permute_ps((X), (C)), \
6068 (__v8sf)(__m256)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006069
Logan Chien55afb0a2018-10-15 10:42:14 +08006070#define _mm256_maskz_permute_ps(U, X, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006071 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6072 (__v8sf)_mm256_permute_ps((X), (C)), \
6073 (__v8sf)_mm256_setzero_ps()))
Logan Chien2833ffb2018-10-09 10:03:24 +08006074
Logan Chien55afb0a2018-10-15 10:42:14 +08006075static __inline__ __m128d __DEFAULT_FN_ATTRS128
6076_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
Logan Chien2833ffb2018-10-09 10:03:24 +08006077{
Logan Chien55afb0a2018-10-15 10:42:14 +08006078 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6079 (__v2df)_mm_permutevar_pd(__A, __C),
6080 (__v2df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08006081}
6082
Logan Chien55afb0a2018-10-15 10:42:14 +08006083static __inline__ __m128d __DEFAULT_FN_ATTRS128
6084_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
Logan Chien2833ffb2018-10-09 10:03:24 +08006085{
Logan Chien55afb0a2018-10-15 10:42:14 +08006086 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6087 (__v2df)_mm_permutevar_pd(__A, __C),
6088 (__v2df)_mm_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08006089}
6090
Logan Chien55afb0a2018-10-15 10:42:14 +08006091static __inline__ __m256d __DEFAULT_FN_ATTRS256
6092_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
Logan Chien2833ffb2018-10-09 10:03:24 +08006093{
Logan Chien55afb0a2018-10-15 10:42:14 +08006094 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6095 (__v4df)_mm256_permutevar_pd(__A, __C),
6096 (__v4df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08006097}
6098
Logan Chien55afb0a2018-10-15 10:42:14 +08006099static __inline__ __m256d __DEFAULT_FN_ATTRS256
6100_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
Logan Chien2833ffb2018-10-09 10:03:24 +08006101{
Logan Chien55afb0a2018-10-15 10:42:14 +08006102 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6103 (__v4df)_mm256_permutevar_pd(__A, __C),
6104 (__v4df)_mm256_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08006105}
6106
Logan Chien55afb0a2018-10-15 10:42:14 +08006107static __inline__ __m128 __DEFAULT_FN_ATTRS128
6108_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
Logan Chien2833ffb2018-10-09 10:03:24 +08006109{
Logan Chien55afb0a2018-10-15 10:42:14 +08006110 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6111 (__v4sf)_mm_permutevar_ps(__A, __C),
6112 (__v4sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08006113}
6114
Logan Chien55afb0a2018-10-15 10:42:14 +08006115static __inline__ __m128 __DEFAULT_FN_ATTRS128
6116_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
Logan Chien2833ffb2018-10-09 10:03:24 +08006117{
Logan Chien55afb0a2018-10-15 10:42:14 +08006118 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6119 (__v4sf)_mm_permutevar_ps(__A, __C),
6120 (__v4sf)_mm_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08006121}
6122
Logan Chien55afb0a2018-10-15 10:42:14 +08006123static __inline__ __m256 __DEFAULT_FN_ATTRS256
6124_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
Logan Chien2833ffb2018-10-09 10:03:24 +08006125{
Logan Chien55afb0a2018-10-15 10:42:14 +08006126 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6127 (__v8sf)_mm256_permutevar_ps(__A, __C),
6128 (__v8sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08006129}
6130
Logan Chien55afb0a2018-10-15 10:42:14 +08006131static __inline__ __m256 __DEFAULT_FN_ATTRS256
6132_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
Logan Chien2833ffb2018-10-09 10:03:24 +08006133{
Logan Chien55afb0a2018-10-15 10:42:14 +08006134 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6135 (__v8sf)_mm256_permutevar_ps(__A, __C),
6136 (__v8sf)_mm256_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08006137}
6138
Logan Chien55afb0a2018-10-15 10:42:14 +08006139static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006140_mm_test_epi32_mask (__m128i __A, __m128i __B)
6141{
Logan Chien55afb0a2018-10-15 10:42:14 +08006142 return _mm_cmpneq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08006143}
6144
Logan Chien55afb0a2018-10-15 10:42:14 +08006145static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006146_mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
6147{
Logan Chien55afb0a2018-10-15 10:42:14 +08006148 return _mm_mask_cmpneq_epi32_mask (__U, _mm_and_si128 (__A, __B),
6149 _mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08006150}
6151
Logan Chien55afb0a2018-10-15 10:42:14 +08006152static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006153_mm256_test_epi32_mask (__m256i __A, __m256i __B)
6154{
Logan Chien55afb0a2018-10-15 10:42:14 +08006155 return _mm256_cmpneq_epi32_mask (_mm256_and_si256 (__A, __B),
6156 _mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08006157}
6158
Logan Chien55afb0a2018-10-15 10:42:14 +08006159static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006160_mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
6161{
Logan Chien55afb0a2018-10-15 10:42:14 +08006162 return _mm256_mask_cmpneq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
6163 _mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08006164}
6165
Logan Chien55afb0a2018-10-15 10:42:14 +08006166static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006167_mm_test_epi64_mask (__m128i __A, __m128i __B)
6168{
Logan Chien55afb0a2018-10-15 10:42:14 +08006169 return _mm_cmpneq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08006170}
6171
Logan Chien55afb0a2018-10-15 10:42:14 +08006172static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006173_mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
6174{
Logan Chien55afb0a2018-10-15 10:42:14 +08006175 return _mm_mask_cmpneq_epi64_mask (__U, _mm_and_si128 (__A, __B),
6176 _mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08006177}
6178
Logan Chien55afb0a2018-10-15 10:42:14 +08006179static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006180_mm256_test_epi64_mask (__m256i __A, __m256i __B)
6181{
Logan Chien55afb0a2018-10-15 10:42:14 +08006182 return _mm256_cmpneq_epi64_mask (_mm256_and_si256 (__A, __B),
6183 _mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08006184}
6185
Logan Chien55afb0a2018-10-15 10:42:14 +08006186static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006187_mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
6188{
Logan Chien55afb0a2018-10-15 10:42:14 +08006189 return _mm256_mask_cmpneq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
6190 _mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08006191}
6192
Logan Chien55afb0a2018-10-15 10:42:14 +08006193static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006194_mm_testn_epi32_mask (__m128i __A, __m128i __B)
6195{
Logan Chien55afb0a2018-10-15 10:42:14 +08006196 return _mm_cmpeq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08006197}
6198
Logan Chien55afb0a2018-10-15 10:42:14 +08006199static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006200_mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
6201{
Logan Chien55afb0a2018-10-15 10:42:14 +08006202 return _mm_mask_cmpeq_epi32_mask (__U, _mm_and_si128 (__A, __B),
6203 _mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08006204}
6205
Logan Chien55afb0a2018-10-15 10:42:14 +08006206static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006207_mm256_testn_epi32_mask (__m256i __A, __m256i __B)
6208{
Logan Chien55afb0a2018-10-15 10:42:14 +08006209 return _mm256_cmpeq_epi32_mask (_mm256_and_si256 (__A, __B),
6210 _mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08006211}
6212
Logan Chien55afb0a2018-10-15 10:42:14 +08006213static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006214_mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
6215{
Logan Chien55afb0a2018-10-15 10:42:14 +08006216 return _mm256_mask_cmpeq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
6217 _mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08006218}
6219
Logan Chien55afb0a2018-10-15 10:42:14 +08006220static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006221_mm_testn_epi64_mask (__m128i __A, __m128i __B)
6222{
Logan Chien55afb0a2018-10-15 10:42:14 +08006223 return _mm_cmpeq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08006224}
6225
Logan Chien55afb0a2018-10-15 10:42:14 +08006226static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006227_mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
6228{
Logan Chien55afb0a2018-10-15 10:42:14 +08006229 return _mm_mask_cmpeq_epi64_mask (__U, _mm_and_si128 (__A, __B),
6230 _mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08006231}
6232
Logan Chien55afb0a2018-10-15 10:42:14 +08006233static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006234_mm256_testn_epi64_mask (__m256i __A, __m256i __B)
6235{
Logan Chien55afb0a2018-10-15 10:42:14 +08006236 return _mm256_cmpeq_epi64_mask (_mm256_and_si256 (__A, __B),
6237 _mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08006238}
6239
Logan Chien55afb0a2018-10-15 10:42:14 +08006240static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006241_mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
6242{
Logan Chien55afb0a2018-10-15 10:42:14 +08006243 return _mm256_mask_cmpeq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
6244 _mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08006245}
6246
Logan Chien55afb0a2018-10-15 10:42:14 +08006247static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006248_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6249{
6250 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6251 (__v4si)_mm_unpackhi_epi32(__A, __B),
6252 (__v4si)__W);
6253}
6254
Logan Chien55afb0a2018-10-15 10:42:14 +08006255static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006256_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6257{
6258 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6259 (__v4si)_mm_unpackhi_epi32(__A, __B),
6260 (__v4si)_mm_setzero_si128());
6261}
6262
Logan Chien55afb0a2018-10-15 10:42:14 +08006263static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006264_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6265{
6266 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6267 (__v8si)_mm256_unpackhi_epi32(__A, __B),
6268 (__v8si)__W);
6269}
6270
Logan Chien55afb0a2018-10-15 10:42:14 +08006271static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006272_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
6273{
6274 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6275 (__v8si)_mm256_unpackhi_epi32(__A, __B),
6276 (__v8si)_mm256_setzero_si256());
6277}
6278
Logan Chien55afb0a2018-10-15 10:42:14 +08006279static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006280_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6281{
6282 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6283 (__v2di)_mm_unpackhi_epi64(__A, __B),
6284 (__v2di)__W);
6285}
6286
Logan Chien55afb0a2018-10-15 10:42:14 +08006287static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006288_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6289{
6290 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6291 (__v2di)_mm_unpackhi_epi64(__A, __B),
Logan Chien55afb0a2018-10-15 10:42:14 +08006292 (__v2di)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08006293}
6294
Logan Chien55afb0a2018-10-15 10:42:14 +08006295static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006296_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6297{
6298 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6299 (__v4di)_mm256_unpackhi_epi64(__A, __B),
6300 (__v4di)__W);
6301}
6302
Logan Chien55afb0a2018-10-15 10:42:14 +08006303static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006304_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
6305{
6306 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6307 (__v4di)_mm256_unpackhi_epi64(__A, __B),
6308 (__v4di)_mm256_setzero_si256());
6309}
6310
Logan Chien55afb0a2018-10-15 10:42:14 +08006311static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006312_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6313{
6314 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6315 (__v4si)_mm_unpacklo_epi32(__A, __B),
6316 (__v4si)__W);
6317}
6318
Logan Chien55afb0a2018-10-15 10:42:14 +08006319static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006320_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6321{
6322 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6323 (__v4si)_mm_unpacklo_epi32(__A, __B),
6324 (__v4si)_mm_setzero_si128());
6325}
6326
Logan Chien55afb0a2018-10-15 10:42:14 +08006327static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006328_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6329{
6330 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6331 (__v8si)_mm256_unpacklo_epi32(__A, __B),
6332 (__v8si)__W);
6333}
6334
Logan Chien55afb0a2018-10-15 10:42:14 +08006335static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006336_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
6337{
6338 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6339 (__v8si)_mm256_unpacklo_epi32(__A, __B),
6340 (__v8si)_mm256_setzero_si256());
6341}
6342
Logan Chien55afb0a2018-10-15 10:42:14 +08006343static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006344_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6345{
6346 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6347 (__v2di)_mm_unpacklo_epi64(__A, __B),
6348 (__v2di)__W);
6349}
6350
Logan Chien55afb0a2018-10-15 10:42:14 +08006351static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006352_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6353{
6354 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6355 (__v2di)_mm_unpacklo_epi64(__A, __B),
Logan Chien55afb0a2018-10-15 10:42:14 +08006356 (__v2di)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08006357}
6358
Logan Chien55afb0a2018-10-15 10:42:14 +08006359static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006360_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6361{
6362 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6363 (__v4di)_mm256_unpacklo_epi64(__A, __B),
6364 (__v4di)__W);
6365}
6366
Logan Chien55afb0a2018-10-15 10:42:14 +08006367static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006368_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
6369{
6370 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6371 (__v4di)_mm256_unpacklo_epi64(__A, __B),
6372 (__v4di)_mm256_setzero_si256());
6373}
6374
Logan Chien55afb0a2018-10-15 10:42:14 +08006375static __inline__ __m128i __DEFAULT_FN_ATTRS128
6376_mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08006377{
Logan Chien55afb0a2018-10-15 10:42:14 +08006378 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6379 (__v4si)_mm_sra_epi32(__A, __B),
6380 (__v4si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08006381}
6382
Logan Chien55afb0a2018-10-15 10:42:14 +08006383static __inline__ __m128i __DEFAULT_FN_ATTRS128
6384_mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08006385{
Logan Chien55afb0a2018-10-15 10:42:14 +08006386 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6387 (__v4si)_mm_sra_epi32(__A, __B),
6388 (__v4si)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08006389}
6390
Logan Chien55afb0a2018-10-15 10:42:14 +08006391static __inline__ __m256i __DEFAULT_FN_ATTRS256
6392_mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08006393{
Logan Chien55afb0a2018-10-15 10:42:14 +08006394 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6395 (__v8si)_mm256_sra_epi32(__A, __B),
6396 (__v8si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08006397}
6398
Logan Chien55afb0a2018-10-15 10:42:14 +08006399static __inline__ __m256i __DEFAULT_FN_ATTRS256
6400_mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08006401{
Logan Chien55afb0a2018-10-15 10:42:14 +08006402 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6403 (__v8si)_mm256_sra_epi32(__A, __B),
6404 (__v8si)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08006405}
6406
Logan Chien55afb0a2018-10-15 10:42:14 +08006407static __inline__ __m128i __DEFAULT_FN_ATTRS128
Sasha Smundak0fc590b2020-10-07 08:11:59 -07006408_mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08006409{
Logan Chien55afb0a2018-10-15 10:42:14 +08006410 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6411 (__v4si)_mm_srai_epi32(__A, __B),
6412 (__v4si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08006413}
6414
Logan Chien55afb0a2018-10-15 10:42:14 +08006415static __inline__ __m128i __DEFAULT_FN_ATTRS128
Sasha Smundak0fc590b2020-10-07 08:11:59 -07006416_mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08006417{
Logan Chien55afb0a2018-10-15 10:42:14 +08006418 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6419 (__v4si)_mm_srai_epi32(__A, __B),
6420 (__v4si)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08006421}
6422
Logan Chien55afb0a2018-10-15 10:42:14 +08006423static __inline__ __m256i __DEFAULT_FN_ATTRS256
Sasha Smundak0fc590b2020-10-07 08:11:59 -07006424_mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08006425{
Logan Chien55afb0a2018-10-15 10:42:14 +08006426 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6427 (__v8si)_mm256_srai_epi32(__A, __B),
6428 (__v8si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08006429}
6430
Logan Chien55afb0a2018-10-15 10:42:14 +08006431static __inline__ __m256i __DEFAULT_FN_ATTRS256
Sasha Smundak0fc590b2020-10-07 08:11:59 -07006432_mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08006433{
Logan Chien55afb0a2018-10-15 10:42:14 +08006434 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6435 (__v8si)_mm256_srai_epi32(__A, __B),
6436 (__v8si)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08006437}
6438
Logan Chien55afb0a2018-10-15 10:42:14 +08006439static __inline__ __m128i __DEFAULT_FN_ATTRS128
6440_mm_sra_epi64(__m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08006441{
Logan Chien55afb0a2018-10-15 10:42:14 +08006442 return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08006443}
6444
Logan Chien55afb0a2018-10-15 10:42:14 +08006445static __inline__ __m128i __DEFAULT_FN_ATTRS128
6446_mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08006447{
Logan Chien55afb0a2018-10-15 10:42:14 +08006448 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6449 (__v2di)_mm_sra_epi64(__A, __B), \
6450 (__v2di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08006451}
6452
Logan Chien55afb0a2018-10-15 10:42:14 +08006453static __inline__ __m128i __DEFAULT_FN_ATTRS128
6454_mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6455{
6456 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6457 (__v2di)_mm_sra_epi64(__A, __B), \
6458 (__v2di)_mm_setzero_si128());
6459}
Logan Chien2833ffb2018-10-09 10:03:24 +08006460
Logan Chien55afb0a2018-10-15 10:42:14 +08006461static __inline__ __m256i __DEFAULT_FN_ATTRS256
6462_mm256_sra_epi64(__m256i __A, __m128i __B)
6463{
6464 return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B);
6465}
Logan Chien2833ffb2018-10-09 10:03:24 +08006466
Logan Chien55afb0a2018-10-15 10:42:14 +08006467static __inline__ __m256i __DEFAULT_FN_ATTRS256
6468_mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
6469{
6470 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6471 (__v4di)_mm256_sra_epi64(__A, __B), \
6472 (__v4di)__W);
6473}
Logan Chien2833ffb2018-10-09 10:03:24 +08006474
Logan Chien55afb0a2018-10-15 10:42:14 +08006475static __inline__ __m256i __DEFAULT_FN_ATTRS256
6476_mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B)
6477{
6478 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6479 (__v4di)_mm256_sra_epi64(__A, __B), \
6480 (__v4di)_mm256_setzero_si256());
6481}
Logan Chien2833ffb2018-10-09 10:03:24 +08006482
Logan Chien55afb0a2018-10-15 10:42:14 +08006483static __inline__ __m128i __DEFAULT_FN_ATTRS128
Sasha Smundak0fc590b2020-10-07 08:11:59 -07006484_mm_srai_epi64(__m128i __A, unsigned int __imm)
Logan Chien55afb0a2018-10-15 10:42:14 +08006485{
6486 return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, __imm);
6487}
Logan Chien2833ffb2018-10-09 10:03:24 +08006488
Logan Chien55afb0a2018-10-15 10:42:14 +08006489static __inline__ __m128i __DEFAULT_FN_ATTRS128
Sasha Smundak0fc590b2020-10-07 08:11:59 -07006490_mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __imm)
Logan Chien55afb0a2018-10-15 10:42:14 +08006491{
6492 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6493 (__v2di)_mm_srai_epi64(__A, __imm), \
6494 (__v2di)__W);
6495}
Logan Chien2833ffb2018-10-09 10:03:24 +08006496
Logan Chien55afb0a2018-10-15 10:42:14 +08006497static __inline__ __m128i __DEFAULT_FN_ATTRS128
Sasha Smundak0fc590b2020-10-07 08:11:59 -07006498_mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, unsigned int __imm)
Logan Chien55afb0a2018-10-15 10:42:14 +08006499{
6500 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6501 (__v2di)_mm_srai_epi64(__A, __imm), \
6502 (__v2di)_mm_setzero_si128());
6503}
6504
6505static __inline__ __m256i __DEFAULT_FN_ATTRS256
Sasha Smundak0fc590b2020-10-07 08:11:59 -07006506_mm256_srai_epi64(__m256i __A, unsigned int __imm)
Logan Chien55afb0a2018-10-15 10:42:14 +08006507{
6508 return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, __imm);
6509}
6510
6511static __inline__ __m256i __DEFAULT_FN_ATTRS256
Sasha Smundak0fc590b2020-10-07 08:11:59 -07006512_mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A,
6513 unsigned int __imm)
Logan Chien55afb0a2018-10-15 10:42:14 +08006514{
6515 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6516 (__v4di)_mm256_srai_epi64(__A, __imm), \
6517 (__v4di)__W);
6518}
6519
6520static __inline__ __m256i __DEFAULT_FN_ATTRS256
Sasha Smundak0fc590b2020-10-07 08:11:59 -07006521_mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm)
Logan Chien55afb0a2018-10-15 10:42:14 +08006522{
6523 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6524 (__v4di)_mm256_srai_epi64(__A, __imm), \
6525 (__v4di)_mm256_setzero_si256());
6526}
6527
6528#define _mm_ternarylogic_epi32(A, B, C, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006529 ((__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
Logan Chien2833ffb2018-10-09 10:03:24 +08006530 (__v4si)(__m128i)(B), \
6531 (__v4si)(__m128i)(C), (int)(imm), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006532 (__mmask8)-1))
6533
6534#define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) \
6535 ((__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
6536 (__v4si)(__m128i)(B), \
6537 (__v4si)(__m128i)(C), (int)(imm), \
6538 (__mmask8)(U)))
6539
6540#define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) \
6541 ((__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \
6542 (__v4si)(__m128i)(B), \
6543 (__v4si)(__m128i)(C), (int)(imm), \
6544 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006545
Logan Chien55afb0a2018-10-15 10:42:14 +08006546#define _mm256_ternarylogic_epi32(A, B, C, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006547 ((__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
Logan Chien2833ffb2018-10-09 10:03:24 +08006548 (__v8si)(__m256i)(B), \
6549 (__v8si)(__m256i)(C), (int)(imm), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006550 (__mmask8)-1))
6551
6552#define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) \
6553 ((__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
6554 (__v8si)(__m256i)(B), \
6555 (__v8si)(__m256i)(C), (int)(imm), \
6556 (__mmask8)(U)))
6557
6558#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) \
6559 ((__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \
6560 (__v8si)(__m256i)(B), \
6561 (__v8si)(__m256i)(C), (int)(imm), \
6562 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006563
Logan Chien55afb0a2018-10-15 10:42:14 +08006564#define _mm_ternarylogic_epi64(A, B, C, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006565 ((__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
Logan Chien2833ffb2018-10-09 10:03:24 +08006566 (__v2di)(__m128i)(B), \
6567 (__v2di)(__m128i)(C), (int)(imm), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006568 (__mmask8)-1))
6569
6570#define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) \
6571 ((__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
6572 (__v2di)(__m128i)(B), \
6573 (__v2di)(__m128i)(C), (int)(imm), \
6574 (__mmask8)(U)))
6575
6576#define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) \
6577 ((__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \
6578 (__v2di)(__m128i)(B), \
6579 (__v2di)(__m128i)(C), (int)(imm), \
6580 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006581
Logan Chien55afb0a2018-10-15 10:42:14 +08006582#define _mm256_ternarylogic_epi64(A, B, C, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006583 ((__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
Logan Chien2833ffb2018-10-09 10:03:24 +08006584 (__v4di)(__m256i)(B), \
6585 (__v4di)(__m256i)(C), (int)(imm), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006586 (__mmask8)-1))
6587
6588#define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) \
6589 ((__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
6590 (__v4di)(__m256i)(B), \
6591 (__v4di)(__m256i)(C), (int)(imm), \
6592 (__mmask8)(U)))
6593
6594#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) \
6595 ((__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \
6596 (__v4di)(__m256i)(B), \
6597 (__v4di)(__m256i)(C), (int)(imm), \
6598 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006599
6600
6601
Logan Chien55afb0a2018-10-15 10:42:14 +08006602#define _mm256_shuffle_f32x4(A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006603 ((__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \
6604 (__v8sf)(__m256)(B), (int)(imm)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006605
Logan Chien55afb0a2018-10-15 10:42:14 +08006606#define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006607 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6608 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6609 (__v8sf)(__m256)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006610
Logan Chien55afb0a2018-10-15 10:42:14 +08006611#define _mm256_maskz_shuffle_f32x4(U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006612 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6613 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6614 (__v8sf)_mm256_setzero_ps()))
Logan Chien2833ffb2018-10-09 10:03:24 +08006615
Logan Chien55afb0a2018-10-15 10:42:14 +08006616#define _mm256_shuffle_f64x2(A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006617 ((__m256d)__builtin_ia32_shuf_f64x2_256((__v4df)(__m256d)(A), \
6618 (__v4df)(__m256d)(B), (int)(imm)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006619
Logan Chien55afb0a2018-10-15 10:42:14 +08006620#define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006621 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6622 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6623 (__v4df)(__m256d)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006624
Logan Chien55afb0a2018-10-15 10:42:14 +08006625#define _mm256_maskz_shuffle_f64x2(U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006626 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6627 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6628 (__v4df)_mm256_setzero_pd()))
Logan Chien2833ffb2018-10-09 10:03:24 +08006629
Logan Chien55afb0a2018-10-15 10:42:14 +08006630#define _mm256_shuffle_i32x4(A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006631 ((__m256i)__builtin_ia32_shuf_i32x4_256((__v8si)(__m256i)(A), \
6632 (__v8si)(__m256i)(B), (int)(imm)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006633
Logan Chien55afb0a2018-10-15 10:42:14 +08006634#define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006635 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6636 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6637 (__v8si)(__m256i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006638
Logan Chien55afb0a2018-10-15 10:42:14 +08006639#define _mm256_maskz_shuffle_i32x4(U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006640 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6641 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6642 (__v8si)_mm256_setzero_si256()))
Logan Chien2833ffb2018-10-09 10:03:24 +08006643
Logan Chien55afb0a2018-10-15 10:42:14 +08006644#define _mm256_shuffle_i64x2(A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006645 ((__m256i)__builtin_ia32_shuf_i64x2_256((__v4di)(__m256i)(A), \
6646 (__v4di)(__m256i)(B), (int)(imm)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006647
Logan Chien55afb0a2018-10-15 10:42:14 +08006648#define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006649 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6650 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6651 (__v4di)(__m256i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006652
Logan Chien2833ffb2018-10-09 10:03:24 +08006653
Logan Chien55afb0a2018-10-15 10:42:14 +08006654#define _mm256_maskz_shuffle_i64x2(U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006655 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6656 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6657 (__v4di)_mm256_setzero_si256()))
Logan Chien55afb0a2018-10-15 10:42:14 +08006658
6659#define _mm_mask_shuffle_pd(W, U, A, B, M) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006660 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6661 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6662 (__v2df)(__m128d)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006663
Logan Chien55afb0a2018-10-15 10:42:14 +08006664#define _mm_maskz_shuffle_pd(U, A, B, M) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006665 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6666 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6667 (__v2df)_mm_setzero_pd()))
Logan Chien2833ffb2018-10-09 10:03:24 +08006668
Logan Chien55afb0a2018-10-15 10:42:14 +08006669#define _mm256_mask_shuffle_pd(W, U, A, B, M) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006670 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6671 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6672 (__v4df)(__m256d)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006673
Logan Chien55afb0a2018-10-15 10:42:14 +08006674#define _mm256_maskz_shuffle_pd(U, A, B, M) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006675 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6676 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6677 (__v4df)_mm256_setzero_pd()))
Logan Chien2833ffb2018-10-09 10:03:24 +08006678
Logan Chien55afb0a2018-10-15 10:42:14 +08006679#define _mm_mask_shuffle_ps(W, U, A, B, M) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006680 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6681 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6682 (__v4sf)(__m128)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006683
Logan Chien55afb0a2018-10-15 10:42:14 +08006684#define _mm_maskz_shuffle_ps(U, A, B, M) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006685 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6686 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6687 (__v4sf)_mm_setzero_ps()))
Logan Chien2833ffb2018-10-09 10:03:24 +08006688
Logan Chien55afb0a2018-10-15 10:42:14 +08006689#define _mm256_mask_shuffle_ps(W, U, A, B, M) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006690 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6691 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6692 (__v8sf)(__m256)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08006693
Logan Chien55afb0a2018-10-15 10:42:14 +08006694#define _mm256_maskz_shuffle_ps(U, A, B, M) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08006695 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6696 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6697 (__v8sf)_mm256_setzero_ps()))
Logan Chien2833ffb2018-10-09 10:03:24 +08006698
Logan Chien55afb0a2018-10-15 10:42:14 +08006699static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006700_mm_rsqrt14_pd (__m128d __A)
6701{
6702 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6703 (__v2df)
6704 _mm_setzero_pd (),
6705 (__mmask8) -1);
6706}
6707
Logan Chien55afb0a2018-10-15 10:42:14 +08006708static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006709_mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
6710{
6711 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6712 (__v2df) __W,
6713 (__mmask8) __U);
6714}
6715
Logan Chien55afb0a2018-10-15 10:42:14 +08006716static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006717_mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
6718{
6719 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6720 (__v2df)
6721 _mm_setzero_pd (),
6722 (__mmask8) __U);
6723}
6724
Logan Chien55afb0a2018-10-15 10:42:14 +08006725static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006726_mm256_rsqrt14_pd (__m256d __A)
6727{
6728 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6729 (__v4df)
6730 _mm256_setzero_pd (),
6731 (__mmask8) -1);
6732}
6733
Logan Chien55afb0a2018-10-15 10:42:14 +08006734static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006735_mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
6736{
6737 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6738 (__v4df) __W,
6739 (__mmask8) __U);
6740}
6741
Logan Chien55afb0a2018-10-15 10:42:14 +08006742static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006743_mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
6744{
6745 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6746 (__v4df)
6747 _mm256_setzero_pd (),
6748 (__mmask8) __U);
6749}
6750
Logan Chien55afb0a2018-10-15 10:42:14 +08006751static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006752_mm_rsqrt14_ps (__m128 __A)
6753{
6754 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6755 (__v4sf)
6756 _mm_setzero_ps (),
6757 (__mmask8) -1);
6758}
6759
Logan Chien55afb0a2018-10-15 10:42:14 +08006760static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006761_mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
6762{
6763 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6764 (__v4sf) __W,
6765 (__mmask8) __U);
6766}
6767
Logan Chien55afb0a2018-10-15 10:42:14 +08006768static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006769_mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
6770{
6771 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6772 (__v4sf)
6773 _mm_setzero_ps (),
6774 (__mmask8) __U);
6775}
6776
Logan Chien55afb0a2018-10-15 10:42:14 +08006777static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006778_mm256_rsqrt14_ps (__m256 __A)
6779{
6780 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6781 (__v8sf)
6782 _mm256_setzero_ps (),
6783 (__mmask8) -1);
6784}
6785
Logan Chien55afb0a2018-10-15 10:42:14 +08006786static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006787_mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
6788{
6789 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6790 (__v8sf) __W,
6791 (__mmask8) __U);
6792}
6793
Logan Chien55afb0a2018-10-15 10:42:14 +08006794static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006795_mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
6796{
6797 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6798 (__v8sf)
6799 _mm256_setzero_ps (),
6800 (__mmask8) __U);
6801}
6802
Logan Chien55afb0a2018-10-15 10:42:14 +08006803static __inline__ __m256 __DEFAULT_FN_ATTRS256
6804_mm256_broadcast_f32x4(__m128 __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08006805{
Logan Chien55afb0a2018-10-15 10:42:14 +08006806 return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6807 0, 1, 2, 3, 0, 1, 2, 3);
Logan Chien2833ffb2018-10-09 10:03:24 +08006808}
6809
Logan Chien55afb0a2018-10-15 10:42:14 +08006810static __inline__ __m256 __DEFAULT_FN_ATTRS256
6811_mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08006812{
Logan Chien55afb0a2018-10-15 10:42:14 +08006813 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
6814 (__v8sf)_mm256_broadcast_f32x4(__A),
6815 (__v8sf)__O);
Logan Chien2833ffb2018-10-09 10:03:24 +08006816}
6817
Logan Chien55afb0a2018-10-15 10:42:14 +08006818static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006819_mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
6820{
Logan Chien55afb0a2018-10-15 10:42:14 +08006821 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
6822 (__v8sf)_mm256_broadcast_f32x4(__A),
6823 (__v8sf)_mm256_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08006824}
6825
Logan Chien55afb0a2018-10-15 10:42:14 +08006826static __inline__ __m256i __DEFAULT_FN_ATTRS256
6827_mm256_broadcast_i32x4(__m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08006828{
Logan Chien55afb0a2018-10-15 10:42:14 +08006829 return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6830 0, 1, 2, 3, 0, 1, 2, 3);
Logan Chien2833ffb2018-10-09 10:03:24 +08006831}
6832
Logan Chien55afb0a2018-10-15 10:42:14 +08006833static __inline__ __m256i __DEFAULT_FN_ATTRS256
6834_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08006835{
Logan Chien55afb0a2018-10-15 10:42:14 +08006836 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
6837 (__v8si)_mm256_broadcast_i32x4(__A),
6838 (__v8si)__O);
Logan Chien2833ffb2018-10-09 10:03:24 +08006839}
6840
Logan Chien55afb0a2018-10-15 10:42:14 +08006841static __inline__ __m256i __DEFAULT_FN_ATTRS256
6842_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08006843{
Logan Chien55afb0a2018-10-15 10:42:14 +08006844 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
6845 (__v8si)_mm256_broadcast_i32x4(__A),
6846 (__v8si)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08006847}
6848
Logan Chien55afb0a2018-10-15 10:42:14 +08006849static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006850_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
6851{
6852 return (__m256d)__builtin_ia32_selectpd_256(__M,
6853 (__v4df) _mm256_broadcastsd_pd(__A),
6854 (__v4df) __O);
6855}
6856
Logan Chien55afb0a2018-10-15 10:42:14 +08006857static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006858_mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
6859{
6860 return (__m256d)__builtin_ia32_selectpd_256(__M,
6861 (__v4df) _mm256_broadcastsd_pd(__A),
6862 (__v4df) _mm256_setzero_pd());
6863}
6864
Logan Chien55afb0a2018-10-15 10:42:14 +08006865static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006866_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
6867{
6868 return (__m128)__builtin_ia32_selectps_128(__M,
6869 (__v4sf) _mm_broadcastss_ps(__A),
6870 (__v4sf) __O);
6871}
6872
Logan Chien55afb0a2018-10-15 10:42:14 +08006873static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006874_mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
6875{
6876 return (__m128)__builtin_ia32_selectps_128(__M,
6877 (__v4sf) _mm_broadcastss_ps(__A),
6878 (__v4sf) _mm_setzero_ps());
6879}
6880
Logan Chien55afb0a2018-10-15 10:42:14 +08006881static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006882_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
6883{
6884 return (__m256)__builtin_ia32_selectps_256(__M,
6885 (__v8sf) _mm256_broadcastss_ps(__A),
6886 (__v8sf) __O);
6887}
6888
Logan Chien55afb0a2018-10-15 10:42:14 +08006889static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006890_mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
6891{
6892 return (__m256)__builtin_ia32_selectps_256(__M,
6893 (__v8sf) _mm256_broadcastss_ps(__A),
6894 (__v8sf) _mm256_setzero_ps());
6895}
6896
Logan Chien55afb0a2018-10-15 10:42:14 +08006897static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006898_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
6899{
6900 return (__m128i)__builtin_ia32_selectd_128(__M,
6901 (__v4si) _mm_broadcastd_epi32(__A),
6902 (__v4si) __O);
6903}
6904
Logan Chien55afb0a2018-10-15 10:42:14 +08006905static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006906_mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
6907{
6908 return (__m128i)__builtin_ia32_selectd_128(__M,
6909 (__v4si) _mm_broadcastd_epi32(__A),
6910 (__v4si) _mm_setzero_si128());
6911}
6912
Logan Chien55afb0a2018-10-15 10:42:14 +08006913static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006914_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
6915{
6916 return (__m256i)__builtin_ia32_selectd_256(__M,
6917 (__v8si) _mm256_broadcastd_epi32(__A),
6918 (__v8si) __O);
6919}
6920
Logan Chien55afb0a2018-10-15 10:42:14 +08006921static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006922_mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
6923{
6924 return (__m256i)__builtin_ia32_selectd_256(__M,
6925 (__v8si) _mm256_broadcastd_epi32(__A),
6926 (__v8si) _mm256_setzero_si256());
6927}
6928
Logan Chien55afb0a2018-10-15 10:42:14 +08006929static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006930_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
6931{
6932 return (__m128i)__builtin_ia32_selectq_128(__M,
6933 (__v2di) _mm_broadcastq_epi64(__A),
6934 (__v2di) __O);
6935}
6936
Logan Chien55afb0a2018-10-15 10:42:14 +08006937static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006938_mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
6939{
6940 return (__m128i)__builtin_ia32_selectq_128(__M,
6941 (__v2di) _mm_broadcastq_epi64(__A),
6942 (__v2di) _mm_setzero_si128());
6943}
6944
Logan Chien55afb0a2018-10-15 10:42:14 +08006945static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006946_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
6947{
6948 return (__m256i)__builtin_ia32_selectq_256(__M,
6949 (__v4di) _mm256_broadcastq_epi64(__A),
6950 (__v4di) __O);
6951}
6952
Logan Chien55afb0a2018-10-15 10:42:14 +08006953static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006954_mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
6955{
6956 return (__m256i)__builtin_ia32_selectq_256(__M,
6957 (__v4di) _mm256_broadcastq_epi64(__A),
6958 (__v4di) _mm256_setzero_si256());
6959}
6960
Logan Chien55afb0a2018-10-15 10:42:14 +08006961static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006962_mm_cvtsepi32_epi8 (__m128i __A)
6963{
6964 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6965 (__v16qi)_mm_undefined_si128(),
6966 (__mmask8) -1);
6967}
6968
Logan Chien55afb0a2018-10-15 10:42:14 +08006969static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006970_mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
6971{
6972 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6973 (__v16qi) __O, __M);
6974}
6975
Logan Chien55afb0a2018-10-15 10:42:14 +08006976static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006977_mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
6978{
6979 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6980 (__v16qi) _mm_setzero_si128 (),
6981 __M);
6982}
6983
Logan Chien55afb0a2018-10-15 10:42:14 +08006984static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08006985_mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
6986{
6987 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
6988}
6989
Logan Chiendf4f7662019-09-04 16:45:23 -07006990static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006991_mm256_cvtsepi32_epi8 (__m256i __A)
6992{
6993 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
6994 (__v16qi)_mm_undefined_si128(),
6995 (__mmask8) -1);
6996}
6997
Logan Chien55afb0a2018-10-15 10:42:14 +08006998static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08006999_mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7000{
7001 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7002 (__v16qi) __O, __M);
7003}
7004
Logan Chien55afb0a2018-10-15 10:42:14 +08007005static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007006_mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
7007{
7008 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7009 (__v16qi) _mm_setzero_si128 (),
7010 __M);
7011}
7012
Logan Chiendf4f7662019-09-04 16:45:23 -07007013static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007014_mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7015{
7016 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7017}
7018
Logan Chien55afb0a2018-10-15 10:42:14 +08007019static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007020_mm_cvtsepi32_epi16 (__m128i __A)
7021{
7022 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7023 (__v8hi)_mm_setzero_si128 (),
7024 (__mmask8) -1);
7025}
7026
Logan Chien55afb0a2018-10-15 10:42:14 +08007027static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007028_mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7029{
7030 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7031 (__v8hi)__O,
7032 __M);
7033}
7034
Logan Chien55afb0a2018-10-15 10:42:14 +08007035static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007036_mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
7037{
7038 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7039 (__v8hi) _mm_setzero_si128 (),
7040 __M);
7041}
7042
Logan Chien55afb0a2018-10-15 10:42:14 +08007043static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007044_mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7045{
7046 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7047}
7048
Logan Chien55afb0a2018-10-15 10:42:14 +08007049static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007050_mm256_cvtsepi32_epi16 (__m256i __A)
7051{
7052 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7053 (__v8hi)_mm_undefined_si128(),
7054 (__mmask8) -1);
7055}
7056
Logan Chien55afb0a2018-10-15 10:42:14 +08007057static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007058_mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7059{
7060 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7061 (__v8hi) __O, __M);
7062}
7063
Logan Chien55afb0a2018-10-15 10:42:14 +08007064static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007065_mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
7066{
7067 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7068 (__v8hi) _mm_setzero_si128 (),
7069 __M);
7070}
7071
Logan Chien55afb0a2018-10-15 10:42:14 +08007072static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007073_mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7074{
7075 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7076}
7077
Logan Chien55afb0a2018-10-15 10:42:14 +08007078static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007079_mm_cvtsepi64_epi8 (__m128i __A)
7080{
7081 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7082 (__v16qi)_mm_undefined_si128(),
7083 (__mmask8) -1);
7084}
7085
Logan Chien55afb0a2018-10-15 10:42:14 +08007086static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007087_mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7088{
7089 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7090 (__v16qi) __O, __M);
7091}
7092
Logan Chien55afb0a2018-10-15 10:42:14 +08007093static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007094_mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
7095{
7096 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7097 (__v16qi) _mm_setzero_si128 (),
7098 __M);
7099}
7100
Logan Chien55afb0a2018-10-15 10:42:14 +08007101static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007102_mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7103{
7104 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7105}
7106
Logan Chien55afb0a2018-10-15 10:42:14 +08007107static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007108_mm256_cvtsepi64_epi8 (__m256i __A)
7109{
7110 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7111 (__v16qi)_mm_undefined_si128(),
7112 (__mmask8) -1);
7113}
7114
Logan Chien55afb0a2018-10-15 10:42:14 +08007115static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007116_mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7117{
7118 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7119 (__v16qi) __O, __M);
7120}
7121
Logan Chien55afb0a2018-10-15 10:42:14 +08007122static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007123_mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
7124{
7125 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7126 (__v16qi) _mm_setzero_si128 (),
7127 __M);
7128}
7129
Logan Chien55afb0a2018-10-15 10:42:14 +08007130static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007131_mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7132{
7133 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7134}
7135
Logan Chien55afb0a2018-10-15 10:42:14 +08007136static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007137_mm_cvtsepi64_epi32 (__m128i __A)
7138{
7139 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7140 (__v4si)_mm_undefined_si128(),
7141 (__mmask8) -1);
7142}
7143
Logan Chien55afb0a2018-10-15 10:42:14 +08007144static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007145_mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7146{
7147 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7148 (__v4si) __O, __M);
7149}
7150
Logan Chien55afb0a2018-10-15 10:42:14 +08007151static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007152_mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
7153{
7154 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7155 (__v4si) _mm_setzero_si128 (),
7156 __M);
7157}
7158
Logan Chien55afb0a2018-10-15 10:42:14 +08007159static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007160_mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7161{
7162 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7163}
7164
Logan Chien55afb0a2018-10-15 10:42:14 +08007165static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007166_mm256_cvtsepi64_epi32 (__m256i __A)
7167{
7168 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7169 (__v4si)_mm_undefined_si128(),
7170 (__mmask8) -1);
7171}
7172
Logan Chien55afb0a2018-10-15 10:42:14 +08007173static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007174_mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7175{
7176 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7177 (__v4si)__O,
7178 __M);
7179}
7180
Logan Chien55afb0a2018-10-15 10:42:14 +08007181static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007182_mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
7183{
7184 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7185 (__v4si) _mm_setzero_si128 (),
7186 __M);
7187}
7188
Logan Chien55afb0a2018-10-15 10:42:14 +08007189static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007190_mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7191{
7192 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7193}
7194
Logan Chien55afb0a2018-10-15 10:42:14 +08007195static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007196_mm_cvtsepi64_epi16 (__m128i __A)
7197{
7198 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7199 (__v8hi)_mm_undefined_si128(),
7200 (__mmask8) -1);
7201}
7202
Logan Chien55afb0a2018-10-15 10:42:14 +08007203static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007204_mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7205{
7206 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7207 (__v8hi) __O, __M);
7208}
7209
Logan Chien55afb0a2018-10-15 10:42:14 +08007210static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007211_mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
7212{
7213 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7214 (__v8hi) _mm_setzero_si128 (),
7215 __M);
7216}
7217
Logan Chien55afb0a2018-10-15 10:42:14 +08007218static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007219_mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7220{
7221 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7222}
7223
Logan Chien55afb0a2018-10-15 10:42:14 +08007224static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007225_mm256_cvtsepi64_epi16 (__m256i __A)
7226{
7227 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7228 (__v8hi)_mm_undefined_si128(),
7229 (__mmask8) -1);
7230}
7231
Logan Chien55afb0a2018-10-15 10:42:14 +08007232static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007233_mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7234{
7235 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7236 (__v8hi) __O, __M);
7237}
7238
Logan Chien55afb0a2018-10-15 10:42:14 +08007239static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007240_mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
7241{
7242 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7243 (__v8hi) _mm_setzero_si128 (),
7244 __M);
7245}
7246
Logan Chien55afb0a2018-10-15 10:42:14 +08007247static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007248_mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7249{
7250 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7251}
7252
Logan Chien55afb0a2018-10-15 10:42:14 +08007253static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007254_mm_cvtusepi32_epi8 (__m128i __A)
7255{
7256 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7257 (__v16qi)_mm_undefined_si128(),
7258 (__mmask8) -1);
7259}
7260
Logan Chien55afb0a2018-10-15 10:42:14 +08007261static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007262_mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7263{
7264 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7265 (__v16qi) __O,
7266 __M);
7267}
7268
Logan Chien55afb0a2018-10-15 10:42:14 +08007269static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007270_mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
7271{
7272 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7273 (__v16qi) _mm_setzero_si128 (),
7274 __M);
7275}
7276
Logan Chien55afb0a2018-10-15 10:42:14 +08007277static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007278_mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7279{
7280 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7281}
7282
Logan Chien55afb0a2018-10-15 10:42:14 +08007283static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007284_mm256_cvtusepi32_epi8 (__m256i __A)
7285{
7286 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7287 (__v16qi)_mm_undefined_si128(),
7288 (__mmask8) -1);
7289}
7290
Logan Chien55afb0a2018-10-15 10:42:14 +08007291static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007292_mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7293{
7294 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7295 (__v16qi) __O,
7296 __M);
7297}
7298
Logan Chien55afb0a2018-10-15 10:42:14 +08007299static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007300_mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
7301{
7302 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7303 (__v16qi) _mm_setzero_si128 (),
7304 __M);
7305}
7306
Logan Chien55afb0a2018-10-15 10:42:14 +08007307static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007308_mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7309{
7310 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
7311}
7312
Logan Chien55afb0a2018-10-15 10:42:14 +08007313static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007314_mm_cvtusepi32_epi16 (__m128i __A)
7315{
7316 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7317 (__v8hi)_mm_undefined_si128(),
7318 (__mmask8) -1);
7319}
7320
Logan Chien55afb0a2018-10-15 10:42:14 +08007321static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007322_mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7323{
7324 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7325 (__v8hi) __O, __M);
7326}
7327
Logan Chien55afb0a2018-10-15 10:42:14 +08007328static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007329_mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
7330{
7331 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7332 (__v8hi) _mm_setzero_si128 (),
7333 __M);
7334}
7335
Logan Chien55afb0a2018-10-15 10:42:14 +08007336static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007337_mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7338{
7339 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7340}
7341
Logan Chien55afb0a2018-10-15 10:42:14 +08007342static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007343_mm256_cvtusepi32_epi16 (__m256i __A)
7344{
7345 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7346 (__v8hi) _mm_undefined_si128(),
7347 (__mmask8) -1);
7348}
7349
Logan Chien55afb0a2018-10-15 10:42:14 +08007350static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007351_mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7352{
7353 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7354 (__v8hi) __O, __M);
7355}
7356
Logan Chien55afb0a2018-10-15 10:42:14 +08007357static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007358_mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
7359{
7360 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7361 (__v8hi) _mm_setzero_si128 (),
7362 __M);
7363}
7364
Logan Chien55afb0a2018-10-15 10:42:14 +08007365static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007366_mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7367{
7368 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7369}
7370
Logan Chien55afb0a2018-10-15 10:42:14 +08007371static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007372_mm_cvtusepi64_epi8 (__m128i __A)
7373{
7374 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7375 (__v16qi)_mm_undefined_si128(),
7376 (__mmask8) -1);
7377}
7378
Logan Chien55afb0a2018-10-15 10:42:14 +08007379static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007380_mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7381{
7382 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7383 (__v16qi) __O,
7384 __M);
7385}
7386
Logan Chien55afb0a2018-10-15 10:42:14 +08007387static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007388_mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
7389{
7390 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7391 (__v16qi) _mm_setzero_si128 (),
7392 __M);
7393}
7394
Logan Chien55afb0a2018-10-15 10:42:14 +08007395static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007396_mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7397{
7398 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7399}
7400
Logan Chien55afb0a2018-10-15 10:42:14 +08007401static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007402_mm256_cvtusepi64_epi8 (__m256i __A)
7403{
7404 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7405 (__v16qi)_mm_undefined_si128(),
7406 (__mmask8) -1);
7407}
7408
Logan Chien55afb0a2018-10-15 10:42:14 +08007409static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007410_mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7411{
7412 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7413 (__v16qi) __O,
7414 __M);
7415}
7416
Logan Chien55afb0a2018-10-15 10:42:14 +08007417static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007418_mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
7419{
7420 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7421 (__v16qi) _mm_setzero_si128 (),
7422 __M);
7423}
7424
Logan Chien55afb0a2018-10-15 10:42:14 +08007425static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007426_mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7427{
7428 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7429}
7430
Logan Chien55afb0a2018-10-15 10:42:14 +08007431static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007432_mm_cvtusepi64_epi32 (__m128i __A)
7433{
7434 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7435 (__v4si)_mm_undefined_si128(),
7436 (__mmask8) -1);
7437}
7438
Logan Chien55afb0a2018-10-15 10:42:14 +08007439static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007440_mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7441{
7442 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7443 (__v4si) __O, __M);
7444}
7445
Logan Chien55afb0a2018-10-15 10:42:14 +08007446static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007447_mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
7448{
7449 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7450 (__v4si) _mm_setzero_si128 (),
7451 __M);
7452}
7453
Logan Chien55afb0a2018-10-15 10:42:14 +08007454static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007455_mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7456{
7457 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7458}
7459
Logan Chien55afb0a2018-10-15 10:42:14 +08007460static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007461_mm256_cvtusepi64_epi32 (__m256i __A)
7462{
7463 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7464 (__v4si)_mm_undefined_si128(),
7465 (__mmask8) -1);
7466}
7467
Logan Chien55afb0a2018-10-15 10:42:14 +08007468static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007469_mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7470{
7471 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7472 (__v4si) __O, __M);
7473}
7474
Logan Chien55afb0a2018-10-15 10:42:14 +08007475static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007476_mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
7477{
7478 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7479 (__v4si) _mm_setzero_si128 (),
7480 __M);
7481}
7482
Logan Chien55afb0a2018-10-15 10:42:14 +08007483static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007484_mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7485{
7486 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7487}
7488
Logan Chien55afb0a2018-10-15 10:42:14 +08007489static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007490_mm_cvtusepi64_epi16 (__m128i __A)
7491{
7492 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7493 (__v8hi)_mm_undefined_si128(),
7494 (__mmask8) -1);
7495}
7496
Logan Chien55afb0a2018-10-15 10:42:14 +08007497static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007498_mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7499{
7500 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7501 (__v8hi) __O, __M);
7502}
7503
Logan Chien55afb0a2018-10-15 10:42:14 +08007504static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007505_mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
7506{
7507 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7508 (__v8hi) _mm_setzero_si128 (),
7509 __M);
7510}
7511
Logan Chien55afb0a2018-10-15 10:42:14 +08007512static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007513_mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7514{
7515 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7516}
7517
Logan Chien55afb0a2018-10-15 10:42:14 +08007518static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007519_mm256_cvtusepi64_epi16 (__m256i __A)
7520{
7521 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7522 (__v8hi)_mm_undefined_si128(),
7523 (__mmask8) -1);
7524}
7525
Logan Chien55afb0a2018-10-15 10:42:14 +08007526static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007527_mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7528{
7529 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7530 (__v8hi) __O, __M);
7531}
7532
Logan Chien55afb0a2018-10-15 10:42:14 +08007533static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007534_mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
7535{
7536 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7537 (__v8hi) _mm_setzero_si128 (),
7538 __M);
7539}
7540
Logan Chien55afb0a2018-10-15 10:42:14 +08007541static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007542_mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7543{
Logan Chien55afb0a2018-10-15 10:42:14 +08007544 __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
Logan Chien2833ffb2018-10-09 10:03:24 +08007545}
7546
Logan Chien55afb0a2018-10-15 10:42:14 +08007547static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007548_mm_cvtepi32_epi8 (__m128i __A)
7549{
Logan Chien55afb0a2018-10-15 10:42:14 +08007550 return (__m128i)__builtin_shufflevector(
7551 __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
7552 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
Logan Chien2833ffb2018-10-09 10:03:24 +08007553}
7554
Logan Chien55afb0a2018-10-15 10:42:14 +08007555static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007556_mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7557{
7558 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7559 (__v16qi) __O, __M);
7560}
7561
Logan Chien55afb0a2018-10-15 10:42:14 +08007562static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007563_mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
7564{
7565 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7566 (__v16qi)
7567 _mm_setzero_si128 (),
7568 __M);
7569}
7570
Logan Chiendf4f7662019-09-04 16:45:23 -07007571static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007572_mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7573{
7574 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7575}
7576
Logan Chien55afb0a2018-10-15 10:42:14 +08007577static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007578_mm256_cvtepi32_epi8 (__m256i __A)
7579{
Logan Chien55afb0a2018-10-15 10:42:14 +08007580 return (__m128i)__builtin_shufflevector(
7581 __builtin_convertvector((__v8si)__A, __v8qi),
7582 (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
7583 12, 13, 14, 15);
Logan Chien2833ffb2018-10-09 10:03:24 +08007584}
7585
Logan Chien55afb0a2018-10-15 10:42:14 +08007586static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007587_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7588{
7589 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7590 (__v16qi) __O, __M);
7591}
7592
Logan Chien55afb0a2018-10-15 10:42:14 +08007593static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007594_mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
7595{
7596 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7597 (__v16qi) _mm_setzero_si128 (),
7598 __M);
7599}
7600
Logan Chien55afb0a2018-10-15 10:42:14 +08007601static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007602_mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7603{
7604 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7605}
7606
Logan Chien55afb0a2018-10-15 10:42:14 +08007607static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007608_mm_cvtepi32_epi16 (__m128i __A)
7609{
Logan Chien55afb0a2018-10-15 10:42:14 +08007610 return (__m128i)__builtin_shufflevector(
7611 __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
7612 2, 3, 4, 5, 6, 7);
Logan Chien2833ffb2018-10-09 10:03:24 +08007613}
7614
Logan Chien55afb0a2018-10-15 10:42:14 +08007615static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007616_mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7617{
7618 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7619 (__v8hi) __O, __M);
7620}
7621
Logan Chien55afb0a2018-10-15 10:42:14 +08007622static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007623_mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
7624{
7625 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7626 (__v8hi) _mm_setzero_si128 (),
7627 __M);
7628}
7629
Logan Chien55afb0a2018-10-15 10:42:14 +08007630static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007631_mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7632{
7633 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7634}
7635
Logan Chien55afb0a2018-10-15 10:42:14 +08007636static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007637_mm256_cvtepi32_epi16 (__m256i __A)
7638{
Logan Chien55afb0a2018-10-15 10:42:14 +08007639 return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi);
Logan Chien2833ffb2018-10-09 10:03:24 +08007640}
7641
Logan Chien55afb0a2018-10-15 10:42:14 +08007642static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007643_mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7644{
7645 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7646 (__v8hi) __O, __M);
7647}
7648
Logan Chien55afb0a2018-10-15 10:42:14 +08007649static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007650_mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
7651{
7652 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7653 (__v8hi) _mm_setzero_si128 (),
7654 __M);
7655}
7656
Logan Chien55afb0a2018-10-15 10:42:14 +08007657static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007658_mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7659{
7660 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7661}
7662
Logan Chien55afb0a2018-10-15 10:42:14 +08007663static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007664_mm_cvtepi64_epi8 (__m128i __A)
7665{
Logan Chien55afb0a2018-10-15 10:42:14 +08007666 return (__m128i)__builtin_shufflevector(
7667 __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3,
7668 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
Logan Chien2833ffb2018-10-09 10:03:24 +08007669}
7670
Logan Chien55afb0a2018-10-15 10:42:14 +08007671static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007672_mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7673{
7674 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7675 (__v16qi) __O, __M);
7676}
7677
Logan Chien55afb0a2018-10-15 10:42:14 +08007678static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007679_mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
7680{
7681 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7682 (__v16qi) _mm_setzero_si128 (),
7683 __M);
7684}
7685
Logan Chien55afb0a2018-10-15 10:42:14 +08007686static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007687_mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7688{
7689 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7690}
7691
Logan Chien55afb0a2018-10-15 10:42:14 +08007692static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007693_mm256_cvtepi64_epi8 (__m256i __A)
7694{
Logan Chien55afb0a2018-10-15 10:42:14 +08007695 return (__m128i)__builtin_shufflevector(
7696 __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
7697 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
Logan Chien2833ffb2018-10-09 10:03:24 +08007698}
7699
Logan Chien55afb0a2018-10-15 10:42:14 +08007700static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007701_mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7702{
7703 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7704 (__v16qi) __O, __M);
7705}
7706
Logan Chien55afb0a2018-10-15 10:42:14 +08007707static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007708_mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
7709{
7710 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7711 (__v16qi) _mm_setzero_si128 (),
7712 __M);
7713}
7714
Logan Chien55afb0a2018-10-15 10:42:14 +08007715static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007716_mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7717{
7718 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7719}
7720
Logan Chien55afb0a2018-10-15 10:42:14 +08007721static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007722_mm_cvtepi64_epi32 (__m128i __A)
7723{
Logan Chien55afb0a2018-10-15 10:42:14 +08007724 return (__m128i)__builtin_shufflevector(
7725 __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3);
Logan Chien2833ffb2018-10-09 10:03:24 +08007726}
7727
Logan Chien55afb0a2018-10-15 10:42:14 +08007728static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007729_mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7730{
7731 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7732 (__v4si) __O, __M);
7733}
7734
Logan Chien55afb0a2018-10-15 10:42:14 +08007735static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007736_mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
7737{
7738 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7739 (__v4si) _mm_setzero_si128 (),
7740 __M);
7741}
7742
Logan Chien55afb0a2018-10-15 10:42:14 +08007743static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007744_mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7745{
7746 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7747}
7748
Logan Chien55afb0a2018-10-15 10:42:14 +08007749static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007750_mm256_cvtepi64_epi32 (__m256i __A)
7751{
Logan Chien55afb0a2018-10-15 10:42:14 +08007752 return (__m128i)__builtin_convertvector((__v4di)__A, __v4si);
Logan Chien2833ffb2018-10-09 10:03:24 +08007753}
7754
Logan Chien55afb0a2018-10-15 10:42:14 +08007755static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007756_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7757{
Logan Chien55afb0a2018-10-15 10:42:14 +08007758 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
7759 (__v4si)_mm256_cvtepi64_epi32(__A),
7760 (__v4si)__O);
Logan Chien2833ffb2018-10-09 10:03:24 +08007761}
7762
Logan Chien55afb0a2018-10-15 10:42:14 +08007763static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007764_mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
7765{
Logan Chien55afb0a2018-10-15 10:42:14 +08007766 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
7767 (__v4si)_mm256_cvtepi64_epi32(__A),
7768 (__v4si)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08007769}
7770
Logan Chien55afb0a2018-10-15 10:42:14 +08007771static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007772_mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7773{
7774 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7775}
7776
Logan Chien55afb0a2018-10-15 10:42:14 +08007777static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007778_mm_cvtepi64_epi16 (__m128i __A)
7779{
Logan Chien55afb0a2018-10-15 10:42:14 +08007780 return (__m128i)__builtin_shufflevector(
7781 __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3,
7782 3, 3, 3, 3);
Logan Chien2833ffb2018-10-09 10:03:24 +08007783}
7784
Logan Chien55afb0a2018-10-15 10:42:14 +08007785static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007786_mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7787{
7788 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7789 (__v8hi)__O,
7790 __M);
7791}
7792
Logan Chien55afb0a2018-10-15 10:42:14 +08007793static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007794_mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
7795{
7796 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7797 (__v8hi) _mm_setzero_si128 (),
7798 __M);
7799}
7800
Logan Chien55afb0a2018-10-15 10:42:14 +08007801static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08007802_mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7803{
7804 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7805}
7806
Logan Chien55afb0a2018-10-15 10:42:14 +08007807static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007808_mm256_cvtepi64_epi16 (__m256i __A)
7809{
Logan Chien55afb0a2018-10-15 10:42:14 +08007810 return (__m128i)__builtin_shufflevector(
7811 __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
7812 2, 3, 4, 5, 6, 7);
Logan Chien2833ffb2018-10-09 10:03:24 +08007813}
7814
Logan Chien55afb0a2018-10-15 10:42:14 +08007815static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007816_mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7817{
7818 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7819 (__v8hi) __O, __M);
7820}
7821
Logan Chien55afb0a2018-10-15 10:42:14 +08007822static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007823_mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
7824{
7825 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7826 (__v8hi) _mm_setzero_si128 (),
7827 __M);
7828}
7829
Logan Chien55afb0a2018-10-15 10:42:14 +08007830static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08007831_mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7832{
7833 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7834}
7835
Logan Chien55afb0a2018-10-15 10:42:14 +08007836#define _mm256_extractf32x4_ps(A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007837 ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7838 (int)(imm), \
7839 (__v4sf)_mm_undefined_ps(), \
7840 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +08007841
Logan Chien55afb0a2018-10-15 10:42:14 +08007842#define _mm256_mask_extractf32x4_ps(W, U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007843 ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7844 (int)(imm), \
7845 (__v4sf)(__m128)(W), \
7846 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007847
Logan Chien55afb0a2018-10-15 10:42:14 +08007848#define _mm256_maskz_extractf32x4_ps(U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007849 ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7850 (int)(imm), \
7851 (__v4sf)_mm_setzero_ps(), \
7852 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007853
Logan Chien55afb0a2018-10-15 10:42:14 +08007854#define _mm256_extracti32x4_epi32(A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007855 ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7856 (int)(imm), \
7857 (__v4si)_mm_undefined_si128(), \
7858 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +08007859
Logan Chien55afb0a2018-10-15 10:42:14 +08007860#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007861 ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7862 (int)(imm), \
7863 (__v4si)(__m128i)(W), \
7864 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007865
Logan Chien55afb0a2018-10-15 10:42:14 +08007866#define _mm256_maskz_extracti32x4_epi32(U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007867 ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7868 (int)(imm), \
7869 (__v4si)_mm_setzero_si128(), \
7870 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007871
Logan Chien55afb0a2018-10-15 10:42:14 +08007872#define _mm256_insertf32x4(A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007873 ((__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \
7874 (__v4sf)(__m128)(B), (int)(imm)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007875
Logan Chien55afb0a2018-10-15 10:42:14 +08007876#define _mm256_mask_insertf32x4(W, U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007877 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08007878 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007879 (__v8sf)(__m256)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007880
Logan Chien55afb0a2018-10-15 10:42:14 +08007881#define _mm256_maskz_insertf32x4(U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007882 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08007883 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007884 (__v8sf)_mm256_setzero_ps()))
Logan Chien2833ffb2018-10-09 10:03:24 +08007885
Logan Chien55afb0a2018-10-15 10:42:14 +08007886#define _mm256_inserti32x4(A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007887 ((__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \
7888 (__v4si)(__m128i)(B), (int)(imm)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007889
Logan Chien55afb0a2018-10-15 10:42:14 +08007890#define _mm256_mask_inserti32x4(W, U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007891 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08007892 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007893 (__v8si)(__m256i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007894
Logan Chien55afb0a2018-10-15 10:42:14 +08007895#define _mm256_maskz_inserti32x4(U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007896 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08007897 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007898 (__v8si)_mm256_setzero_si256()))
Logan Chien2833ffb2018-10-09 10:03:24 +08007899
Logan Chien55afb0a2018-10-15 10:42:14 +08007900#define _mm_getmant_pd(A, B, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007901 ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7902 (int)(((C)<<2) | (B)), \
7903 (__v2df)_mm_setzero_pd(), \
7904 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +08007905
Logan Chien55afb0a2018-10-15 10:42:14 +08007906#define _mm_mask_getmant_pd(W, U, A, B, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007907 ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7908 (int)(((C)<<2) | (B)), \
7909 (__v2df)(__m128d)(W), \
7910 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007911
Logan Chien55afb0a2018-10-15 10:42:14 +08007912#define _mm_maskz_getmant_pd(U, A, B, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007913 ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7914 (int)(((C)<<2) | (B)), \
7915 (__v2df)_mm_setzero_pd(), \
7916 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007917
Logan Chien55afb0a2018-10-15 10:42:14 +08007918#define _mm256_getmant_pd(A, B, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007919 ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7920 (int)(((C)<<2) | (B)), \
7921 (__v4df)_mm256_setzero_pd(), \
7922 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +08007923
Logan Chien55afb0a2018-10-15 10:42:14 +08007924#define _mm256_mask_getmant_pd(W, U, A, B, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007925 ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7926 (int)(((C)<<2) | (B)), \
7927 (__v4df)(__m256d)(W), \
7928 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007929
Logan Chien55afb0a2018-10-15 10:42:14 +08007930#define _mm256_maskz_getmant_pd(U, A, B, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007931 ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7932 (int)(((C)<<2) | (B)), \
7933 (__v4df)_mm256_setzero_pd(), \
7934 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007935
Logan Chien55afb0a2018-10-15 10:42:14 +08007936#define _mm_getmant_ps(A, B, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007937 ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7938 (int)(((C)<<2) | (B)), \
7939 (__v4sf)_mm_setzero_ps(), \
7940 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +08007941
Logan Chien55afb0a2018-10-15 10:42:14 +08007942#define _mm_mask_getmant_ps(W, U, A, B, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007943 ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7944 (int)(((C)<<2) | (B)), \
7945 (__v4sf)(__m128)(W), \
7946 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007947
Logan Chien55afb0a2018-10-15 10:42:14 +08007948#define _mm_maskz_getmant_ps(U, A, B, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007949 ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7950 (int)(((C)<<2) | (B)), \
7951 (__v4sf)_mm_setzero_ps(), \
7952 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007953
Logan Chien55afb0a2018-10-15 10:42:14 +08007954#define _mm256_getmant_ps(A, B, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007955 ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7956 (int)(((C)<<2) | (B)), \
7957 (__v8sf)_mm256_setzero_ps(), \
7958 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +08007959
Logan Chien55afb0a2018-10-15 10:42:14 +08007960#define _mm256_mask_getmant_ps(W, U, A, B, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007961 ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7962 (int)(((C)<<2) | (B)), \
7963 (__v8sf)(__m256)(W), \
7964 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007965
Logan Chien55afb0a2018-10-15 10:42:14 +08007966#define _mm256_maskz_getmant_ps(U, A, B, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007967 ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7968 (int)(((C)<<2) | (B)), \
7969 (__v8sf)_mm256_setzero_ps(), \
7970 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007971
Logan Chien55afb0a2018-10-15 10:42:14 +08007972#define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007973 ((__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
7974 (void const *)(addr), \
7975 (__v2di)(__m128i)(index), \
7976 (__mmask8)(mask), (int)(scale)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007977
Logan Chien55afb0a2018-10-15 10:42:14 +08007978#define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007979 ((__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
7980 (void const *)(addr), \
7981 (__v2di)(__m128i)(index), \
7982 (__mmask8)(mask), (int)(scale)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007983
Logan Chien55afb0a2018-10-15 10:42:14 +08007984#define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007985 ((__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
7986 (void const *)(addr), \
7987 (__v4di)(__m256i)(index), \
7988 (__mmask8)(mask), (int)(scale)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007989
Logan Chien55afb0a2018-10-15 10:42:14 +08007990#define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007991 ((__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
7992 (void const *)(addr), \
7993 (__v4di)(__m256i)(index), \
7994 (__mmask8)(mask), (int)(scale)))
Logan Chien2833ffb2018-10-09 10:03:24 +08007995
Logan Chien55afb0a2018-10-15 10:42:14 +08007996#define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08007997 ((__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
Logan Chiendbcf4122019-03-21 10:50:25 +08007998 (void const *)(addr), \
Logan Chien2833ffb2018-10-09 10:03:24 +08007999 (__v2di)(__m128i)(index), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008000 (__mmask8)(mask), (int)(scale)))
8001
8002#define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
8003 ((__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
8004 (void const *)(addr), \
8005 (__v2di)(__m128i)(index), \
8006 (__mmask8)(mask), (int)(scale)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008007
Logan Chien55afb0a2018-10-15 10:42:14 +08008008#define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008009 ((__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
Logan Chiendbcf4122019-03-21 10:50:25 +08008010 (void const *)(addr), \
Logan Chien2833ffb2018-10-09 10:03:24 +08008011 (__v4di)(__m256i)(index), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008012 (__mmask8)(mask), (int)(scale)))
8013
8014#define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
8015 ((__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
8016 (void const *)(addr), \
8017 (__v4di)(__m256i)(index), \
8018 (__mmask8)(mask), (int)(scale)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008019
Logan Chien55afb0a2018-10-15 10:42:14 +08008020#define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008021 ((__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
8022 (void const *)(addr), \
8023 (__v4si)(__m128i)(index), \
8024 (__mmask8)(mask), (int)(scale)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008025
Logan Chien55afb0a2018-10-15 10:42:14 +08008026#define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008027 ((__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
8028 (void const *)(addr), \
8029 (__v4si)(__m128i)(index), \
8030 (__mmask8)(mask), (int)(scale)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008031
Logan Chien55afb0a2018-10-15 10:42:14 +08008032#define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008033 ((__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
8034 (void const *)(addr), \
8035 (__v4si)(__m128i)(index), \
8036 (__mmask8)(mask), (int)(scale)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008037
Logan Chien55afb0a2018-10-15 10:42:14 +08008038#define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008039 ((__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
8040 (void const *)(addr), \
8041 (__v4si)(__m128i)(index), \
8042 (__mmask8)(mask), (int)(scale)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008043
Logan Chien55afb0a2018-10-15 10:42:14 +08008044#define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008045 ((__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
Logan Chiendbcf4122019-03-21 10:50:25 +08008046 (void const *)(addr), \
Logan Chien2833ffb2018-10-09 10:03:24 +08008047 (__v4si)(__m128i)(index), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008048 (__mmask8)(mask), (int)(scale)))
8049
8050#define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
8051 ((__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
8052 (void const *)(addr), \
8053 (__v4si)(__m128i)(index), \
8054 (__mmask8)(mask), (int)(scale)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008055
Logan Chien55afb0a2018-10-15 10:42:14 +08008056#define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008057 ((__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
Logan Chiendbcf4122019-03-21 10:50:25 +08008058 (void const *)(addr), \
Logan Chien2833ffb2018-10-09 10:03:24 +08008059 (__v8si)(__m256i)(index), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008060 (__mmask8)(mask), (int)(scale)))
8061
8062#define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
8063 ((__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
8064 (void const *)(addr), \
8065 (__v8si)(__m256i)(index), \
8066 (__mmask8)(mask), (int)(scale)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008067
Logan Chien55afb0a2018-10-15 10:42:14 +08008068#define _mm256_permutex_pd(X, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008069 ((__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008070
Logan Chien55afb0a2018-10-15 10:42:14 +08008071#define _mm256_mask_permutex_pd(W, U, X, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008072 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
Logan Chien2833ffb2018-10-09 10:03:24 +08008073 (__v4df)_mm256_permutex_pd((X), (C)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008074 (__v4df)(__m256d)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008075
Logan Chien55afb0a2018-10-15 10:42:14 +08008076#define _mm256_maskz_permutex_pd(U, X, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008077 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8078 (__v4df)_mm256_permutex_pd((X), (C)), \
8079 (__v4df)_mm256_setzero_pd()))
Logan Chien2833ffb2018-10-09 10:03:24 +08008080
Logan Chien55afb0a2018-10-15 10:42:14 +08008081#define _mm256_permutex_epi64(X, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008082 ((__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008083
Logan Chien55afb0a2018-10-15 10:42:14 +08008084#define _mm256_mask_permutex_epi64(W, U, X, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008085 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
Logan Chien2833ffb2018-10-09 10:03:24 +08008086 (__v4di)_mm256_permutex_epi64((X), (C)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008087 (__v4di)(__m256i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008088
Logan Chien55afb0a2018-10-15 10:42:14 +08008089#define _mm256_maskz_permutex_epi64(U, X, C) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008090 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
Logan Chien2833ffb2018-10-09 10:03:24 +08008091 (__v4di)_mm256_permutex_epi64((X), (C)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008092 (__v4di)_mm256_setzero_si256()))
Logan Chien2833ffb2018-10-09 10:03:24 +08008093
Logan Chien55afb0a2018-10-15 10:42:14 +08008094static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08008095_mm256_permutexvar_pd (__m256i __X, __m256d __Y)
8096{
Logan Chien55afb0a2018-10-15 10:42:14 +08008097 return (__m256d)__builtin_ia32_permvardf256((__v4df)__Y, (__v4di)__X);
Logan Chien2833ffb2018-10-09 10:03:24 +08008098}
8099
Logan Chien55afb0a2018-10-15 10:42:14 +08008100static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08008101_mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
8102 __m256d __Y)
8103{
Logan Chien55afb0a2018-10-15 10:42:14 +08008104 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
8105 (__v4df)_mm256_permutexvar_pd(__X, __Y),
8106 (__v4df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08008107}
8108
Logan Chien55afb0a2018-10-15 10:42:14 +08008109static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08008110_mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
8111{
Logan Chien55afb0a2018-10-15 10:42:14 +08008112 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
8113 (__v4df)_mm256_permutexvar_pd(__X, __Y),
8114 (__v4df)_mm256_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08008115}
8116
Logan Chien55afb0a2018-10-15 10:42:14 +08008117static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08008118_mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y)
8119{
Logan Chien55afb0a2018-10-15 10:42:14 +08008120 return (__m256i)__builtin_ia32_permvardi256((__v4di) __Y, (__v4di) __X);
Logan Chien2833ffb2018-10-09 10:03:24 +08008121}
8122
Logan Chien55afb0a2018-10-15 10:42:14 +08008123static __inline__ __m256i __DEFAULT_FN_ATTRS256
8124_mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
8125{
8126 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
8127 (__v4di)_mm256_permutexvar_epi64(__X, __Y),
8128 (__v4di)_mm256_setzero_si256());
8129}
8130
8131static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08008132_mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
8133 __m256i __Y)
8134{
Logan Chien55afb0a2018-10-15 10:42:14 +08008135 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
8136 (__v4di)_mm256_permutexvar_epi64(__X, __Y),
8137 (__v4di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08008138}
8139
Logan Chien55afb0a2018-10-15 10:42:14 +08008140#define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
8141
8142static __inline__ __m256 __DEFAULT_FN_ATTRS256
8143_mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08008144{
Logan Chien55afb0a2018-10-15 10:42:14 +08008145 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8146 (__v8sf)_mm256_permutexvar_ps(__X, __Y),
8147 (__v8sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08008148}
8149
Logan Chien55afb0a2018-10-15 10:42:14 +08008150static __inline__ __m256 __DEFAULT_FN_ATTRS256
8151_mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08008152{
Logan Chien55afb0a2018-10-15 10:42:14 +08008153 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8154 (__v8sf)_mm256_permutexvar_ps(__X, __Y),
8155 (__v8sf)_mm256_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +08008156}
8157
Logan Chien55afb0a2018-10-15 10:42:14 +08008158#define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A))
8159
8160static __inline__ __m256i __DEFAULT_FN_ATTRS256
8161_mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X,
8162 __m256i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08008163{
Logan Chien55afb0a2018-10-15 10:42:14 +08008164 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
8165 (__v8si)_mm256_permutexvar_epi32(__X, __Y),
8166 (__v8si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08008167}
8168
Logan Chien55afb0a2018-10-15 10:42:14 +08008169static __inline__ __m256i __DEFAULT_FN_ATTRS256
8170_mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +08008171{
Logan Chien55afb0a2018-10-15 10:42:14 +08008172 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
8173 (__v8si)_mm256_permutexvar_epi32(__X, __Y),
8174 (__v8si)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08008175}
8176
Logan Chien55afb0a2018-10-15 10:42:14 +08008177#define _mm_alignr_epi32(A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008178 ((__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \
8179 (__v4si)(__m128i)(B), (int)(imm)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008180
Logan Chien55afb0a2018-10-15 10:42:14 +08008181#define _mm_mask_alignr_epi32(W, U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008182 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08008183 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008184 (__v4si)(__m128i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008185
Logan Chien55afb0a2018-10-15 10:42:14 +08008186#define _mm_maskz_alignr_epi32(U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008187 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08008188 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008189 (__v4si)_mm_setzero_si128()))
Logan Chien2833ffb2018-10-09 10:03:24 +08008190
Logan Chien55afb0a2018-10-15 10:42:14 +08008191#define _mm256_alignr_epi32(A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008192 ((__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \
8193 (__v8si)(__m256i)(B), (int)(imm)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008194
Logan Chien55afb0a2018-10-15 10:42:14 +08008195#define _mm256_mask_alignr_epi32(W, U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008196 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08008197 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008198 (__v8si)(__m256i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008199
Logan Chien55afb0a2018-10-15 10:42:14 +08008200#define _mm256_maskz_alignr_epi32(U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008201 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08008202 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008203 (__v8si)_mm256_setzero_si256()))
Logan Chien2833ffb2018-10-09 10:03:24 +08008204
Logan Chien55afb0a2018-10-15 10:42:14 +08008205#define _mm_alignr_epi64(A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008206 ((__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \
8207 (__v2di)(__m128i)(B), (int)(imm)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008208
Logan Chien55afb0a2018-10-15 10:42:14 +08008209#define _mm_mask_alignr_epi64(W, U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008210 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08008211 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008212 (__v2di)(__m128i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008213
Logan Chien55afb0a2018-10-15 10:42:14 +08008214#define _mm_maskz_alignr_epi64(U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008215 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08008216 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008217 (__v2di)_mm_setzero_si128()))
Logan Chien2833ffb2018-10-09 10:03:24 +08008218
Logan Chien55afb0a2018-10-15 10:42:14 +08008219#define _mm256_alignr_epi64(A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008220 ((__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \
8221 (__v4di)(__m256i)(B), (int)(imm)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008222
Logan Chien55afb0a2018-10-15 10:42:14 +08008223#define _mm256_mask_alignr_epi64(W, U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008224 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08008225 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008226 (__v4di)(__m256i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008227
Logan Chien55afb0a2018-10-15 10:42:14 +08008228#define _mm256_maskz_alignr_epi64(U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008229 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08008230 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008231 (__v4di)_mm256_setzero_si256()))
Logan Chien2833ffb2018-10-09 10:03:24 +08008232
Logan Chien55afb0a2018-10-15 10:42:14 +08008233static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08008234_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
8235{
8236 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8237 (__v4sf)_mm_movehdup_ps(__A),
8238 (__v4sf)__W);
8239}
8240
Logan Chien55afb0a2018-10-15 10:42:14 +08008241static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08008242_mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
8243{
8244 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8245 (__v4sf)_mm_movehdup_ps(__A),
8246 (__v4sf)_mm_setzero_ps());
8247}
8248
Logan Chien55afb0a2018-10-15 10:42:14 +08008249static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08008250_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
8251{
8252 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8253 (__v8sf)_mm256_movehdup_ps(__A),
8254 (__v8sf)__W);
8255}
8256
Logan Chien55afb0a2018-10-15 10:42:14 +08008257static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08008258_mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
8259{
8260 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8261 (__v8sf)_mm256_movehdup_ps(__A),
8262 (__v8sf)_mm256_setzero_ps());
8263}
8264
Logan Chien55afb0a2018-10-15 10:42:14 +08008265static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08008266_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
8267{
8268 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8269 (__v4sf)_mm_moveldup_ps(__A),
8270 (__v4sf)__W);
8271}
8272
Logan Chien55afb0a2018-10-15 10:42:14 +08008273static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08008274_mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
8275{
8276 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8277 (__v4sf)_mm_moveldup_ps(__A),
8278 (__v4sf)_mm_setzero_ps());
8279}
8280
Logan Chien55afb0a2018-10-15 10:42:14 +08008281static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08008282_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
8283{
8284 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8285 (__v8sf)_mm256_moveldup_ps(__A),
8286 (__v8sf)__W);
8287}
8288
Logan Chien55afb0a2018-10-15 10:42:14 +08008289static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08008290_mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
8291{
8292 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8293 (__v8sf)_mm256_moveldup_ps(__A),
8294 (__v8sf)_mm256_setzero_ps());
8295}
8296
Logan Chien55afb0a2018-10-15 10:42:14 +08008297#define _mm256_mask_shuffle_epi32(W, U, A, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008298 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8299 (__v8si)_mm256_shuffle_epi32((A), (I)), \
8300 (__v8si)(__m256i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008301
Logan Chien55afb0a2018-10-15 10:42:14 +08008302#define _mm256_maskz_shuffle_epi32(U, A, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008303 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8304 (__v8si)_mm256_shuffle_epi32((A), (I)), \
8305 (__v8si)_mm256_setzero_si256()))
Logan Chien2833ffb2018-10-09 10:03:24 +08008306
Logan Chien55afb0a2018-10-15 10:42:14 +08008307#define _mm_mask_shuffle_epi32(W, U, A, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008308 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8309 (__v4si)_mm_shuffle_epi32((A), (I)), \
8310 (__v4si)(__m128i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008311
Logan Chien55afb0a2018-10-15 10:42:14 +08008312#define _mm_maskz_shuffle_epi32(U, A, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008313 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8314 (__v4si)_mm_shuffle_epi32((A), (I)), \
8315 (__v4si)_mm_setzero_si128()))
Logan Chien2833ffb2018-10-09 10:03:24 +08008316
Logan Chien55afb0a2018-10-15 10:42:14 +08008317static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08008318_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
8319{
8320 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
8321 (__v2df) __A,
8322 (__v2df) __W);
8323}
8324
Logan Chien55afb0a2018-10-15 10:42:14 +08008325static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08008326_mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
8327{
8328 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
8329 (__v2df) __A,
8330 (__v2df) _mm_setzero_pd ());
8331}
8332
Logan Chien55afb0a2018-10-15 10:42:14 +08008333static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08008334_mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
8335{
8336 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
8337 (__v4df) __A,
8338 (__v4df) __W);
8339}
8340
Logan Chien55afb0a2018-10-15 10:42:14 +08008341static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08008342_mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
8343{
8344 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
8345 (__v4df) __A,
8346 (__v4df) _mm256_setzero_pd ());
8347}
8348
Logan Chien55afb0a2018-10-15 10:42:14 +08008349static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08008350_mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
8351{
8352 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
8353 (__v4sf) __A,
8354 (__v4sf) __W);
8355}
8356
Logan Chien55afb0a2018-10-15 10:42:14 +08008357static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08008358_mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
8359{
8360 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
8361 (__v4sf) __A,
8362 (__v4sf) _mm_setzero_ps ());
8363}
8364
Logan Chien55afb0a2018-10-15 10:42:14 +08008365static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08008366_mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
8367{
8368 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
8369 (__v8sf) __A,
8370 (__v8sf) __W);
8371}
8372
Logan Chien55afb0a2018-10-15 10:42:14 +08008373static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08008374_mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
8375{
8376 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
8377 (__v8sf) __A,
8378 (__v8sf) _mm256_setzero_ps ());
8379}
8380
Logan Chien55afb0a2018-10-15 10:42:14 +08008381static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08008382_mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8383{
8384 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8385 (__v4sf) __W,
8386 (__mmask8) __U);
8387}
8388
Logan Chien55afb0a2018-10-15 10:42:14 +08008389static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08008390_mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8391{
8392 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8393 (__v4sf)
8394 _mm_setzero_ps (),
8395 (__mmask8) __U);
8396}
8397
Logan Chien55afb0a2018-10-15 10:42:14 +08008398static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08008399_mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8400{
8401 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8402 (__v8sf) __W,
8403 (__mmask8) __U);
8404}
8405
Logan Chien55afb0a2018-10-15 10:42:14 +08008406static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08008407_mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8408{
8409 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8410 (__v8sf)
8411 _mm256_setzero_ps (),
8412 (__mmask8) __U);
8413}
8414
Logan Chien55afb0a2018-10-15 10:42:14 +08008415#define _mm_mask_cvt_roundps_ph(W, U, A, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008416 ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8417 (__v8hi)(__m128i)(W), \
8418 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008419
Logan Chien55afb0a2018-10-15 10:42:14 +08008420#define _mm_maskz_cvt_roundps_ph(U, A, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008421 ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8422 (__v8hi)_mm_setzero_si128(), \
8423 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008424
Logan Chiendf4f7662019-09-04 16:45:23 -07008425#define _mm_mask_cvtps_ph _mm_mask_cvt_roundps_ph
8426#define _mm_maskz_cvtps_ph _mm_maskz_cvt_roundps_ph
Logan Chien2833ffb2018-10-09 10:03:24 +08008427
Logan Chien55afb0a2018-10-15 10:42:14 +08008428#define _mm256_mask_cvt_roundps_ph(W, U, A, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008429 ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8430 (__v8hi)(__m128i)(W), \
8431 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008432
Logan Chien55afb0a2018-10-15 10:42:14 +08008433#define _mm256_maskz_cvt_roundps_ph(U, A, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08008434 ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8435 (__v8hi)_mm_setzero_si128(), \
8436 (__mmask8)(U)))
Logan Chien2833ffb2018-10-09 10:03:24 +08008437
Logan Chiendf4f7662019-09-04 16:45:23 -07008438#define _mm256_mask_cvtps_ph _mm256_mask_cvt_roundps_ph
8439#define _mm256_maskz_cvtps_ph _mm256_maskz_cvt_roundps_ph
8440
Logan Chien2833ffb2018-10-09 10:03:24 +08008441
Logan Chien55afb0a2018-10-15 10:42:14 +08008442#undef __DEFAULT_FN_ATTRS128
8443#undef __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08008444
8445#endif /* __AVX512VLINTRIN_H */