blob: 4f68157c2814c4807b9f9afb771a962d8cabd86c [file] [log] [blame]
Logan Chien2833ffb2018-10-09 10:03:24 +08001/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __IMMINTRIN_H
25#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef __AVX512VLDQINTRIN_H
29#define __AVX512VLDQINTRIN_H
30
31/* Define the default attributes for the functions in this file. */
Logan Chien55afb0a2018-10-15 10:42:14 +080032#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(128)))
33#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(256)))
Logan Chien2833ffb2018-10-09 10:03:24 +080034
Logan Chien55afb0a2018-10-15 10:42:14 +080035static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +080036_mm256_mullo_epi64 (__m256i __A, __m256i __B) {
37 return (__m256i) ((__v4du) __A * (__v4du) __B);
38}
39
Logan Chien55afb0a2018-10-15 10:42:14 +080040static __inline__ __m256i __DEFAULT_FN_ATTRS256
41_mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
42 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
43 (__v4di)_mm256_mullo_epi64(__A, __B),
44 (__v4di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +080045}
46
Logan Chien55afb0a2018-10-15 10:42:14 +080047static __inline__ __m256i __DEFAULT_FN_ATTRS256
48_mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
49 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
50 (__v4di)_mm256_mullo_epi64(__A, __B),
51 (__v4di)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +080052}
53
Logan Chien55afb0a2018-10-15 10:42:14 +080054static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +080055_mm_mullo_epi64 (__m128i __A, __m128i __B) {
56 return (__m128i) ((__v2du) __A * (__v2du) __B);
57}
58
Logan Chien55afb0a2018-10-15 10:42:14 +080059static __inline__ __m128i __DEFAULT_FN_ATTRS128
60_mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
61 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
62 (__v2di)_mm_mullo_epi64(__A, __B),
63 (__v2di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +080064}
65
Logan Chien55afb0a2018-10-15 10:42:14 +080066static __inline__ __m128i __DEFAULT_FN_ATTRS128
67_mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
68 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
69 (__v2di)_mm_mullo_epi64(__A, __B),
70 (__v2di)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +080071}
72
Logan Chien55afb0a2018-10-15 10:42:14 +080073static __inline__ __m256d __DEFAULT_FN_ATTRS256
74_mm256_mask_andnot_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
75 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
76 (__v4df)_mm256_andnot_pd(__A, __B),
77 (__v4df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +080078}
79
Logan Chien55afb0a2018-10-15 10:42:14 +080080static __inline__ __m256d __DEFAULT_FN_ATTRS256
81_mm256_maskz_andnot_pd(__mmask8 __U, __m256d __A, __m256d __B) {
82 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
83 (__v4df)_mm256_andnot_pd(__A, __B),
84 (__v4df)_mm256_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +080085}
86
Logan Chien55afb0a2018-10-15 10:42:14 +080087static __inline__ __m128d __DEFAULT_FN_ATTRS128
88_mm_mask_andnot_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
89 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
90 (__v2df)_mm_andnot_pd(__A, __B),
91 (__v2df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +080092}
93
Logan Chien55afb0a2018-10-15 10:42:14 +080094static __inline__ __m128d __DEFAULT_FN_ATTRS128
95_mm_maskz_andnot_pd(__mmask8 __U, __m128d __A, __m128d __B) {
96 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
97 (__v2df)_mm_andnot_pd(__A, __B),
98 (__v2df)_mm_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +080099}
100
Logan Chien55afb0a2018-10-15 10:42:14 +0800101static __inline__ __m256 __DEFAULT_FN_ATTRS256
102_mm256_mask_andnot_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
103 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
104 (__v8sf)_mm256_andnot_ps(__A, __B),
105 (__v8sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800106}
107
Logan Chien55afb0a2018-10-15 10:42:14 +0800108static __inline__ __m256 __DEFAULT_FN_ATTRS256
109_mm256_maskz_andnot_ps(__mmask8 __U, __m256 __A, __m256 __B) {
110 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
111 (__v8sf)_mm256_andnot_ps(__A, __B),
112 (__v8sf)_mm256_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +0800113}
114
Logan Chien55afb0a2018-10-15 10:42:14 +0800115static __inline__ __m128 __DEFAULT_FN_ATTRS128
116_mm_mask_andnot_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
117 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
118 (__v4sf)_mm_andnot_ps(__A, __B),
119 (__v4sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800120}
121
Logan Chien55afb0a2018-10-15 10:42:14 +0800122static __inline__ __m128 __DEFAULT_FN_ATTRS128
123_mm_maskz_andnot_ps(__mmask8 __U, __m128 __A, __m128 __B) {
124 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
125 (__v4sf)_mm_andnot_ps(__A, __B),
126 (__v4sf)_mm_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +0800127}
128
Logan Chien55afb0a2018-10-15 10:42:14 +0800129static __inline__ __m256d __DEFAULT_FN_ATTRS256
130_mm256_mask_and_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
131 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
132 (__v4df)_mm256_and_pd(__A, __B),
133 (__v4df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800134}
135
Logan Chien55afb0a2018-10-15 10:42:14 +0800136static __inline__ __m256d __DEFAULT_FN_ATTRS256
137_mm256_maskz_and_pd(__mmask8 __U, __m256d __A, __m256d __B) {
138 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
139 (__v4df)_mm256_and_pd(__A, __B),
140 (__v4df)_mm256_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +0800141}
142
Logan Chien55afb0a2018-10-15 10:42:14 +0800143static __inline__ __m128d __DEFAULT_FN_ATTRS128
144_mm_mask_and_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
145 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
146 (__v2df)_mm_and_pd(__A, __B),
147 (__v2df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800148}
149
Logan Chien55afb0a2018-10-15 10:42:14 +0800150static __inline__ __m128d __DEFAULT_FN_ATTRS128
151_mm_maskz_and_pd(__mmask8 __U, __m128d __A, __m128d __B) {
152 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
153 (__v2df)_mm_and_pd(__A, __B),
154 (__v2df)_mm_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +0800155}
156
Logan Chien55afb0a2018-10-15 10:42:14 +0800157static __inline__ __m256 __DEFAULT_FN_ATTRS256
158_mm256_mask_and_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
159 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
160 (__v8sf)_mm256_and_ps(__A, __B),
161 (__v8sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800162}
163
Logan Chien55afb0a2018-10-15 10:42:14 +0800164static __inline__ __m256 __DEFAULT_FN_ATTRS256
165_mm256_maskz_and_ps(__mmask8 __U, __m256 __A, __m256 __B) {
166 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
167 (__v8sf)_mm256_and_ps(__A, __B),
168 (__v8sf)_mm256_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +0800169}
170
Logan Chien55afb0a2018-10-15 10:42:14 +0800171static __inline__ __m128 __DEFAULT_FN_ATTRS128
172_mm_mask_and_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
173 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
174 (__v4sf)_mm_and_ps(__A, __B),
175 (__v4sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800176}
177
Logan Chien55afb0a2018-10-15 10:42:14 +0800178static __inline__ __m128 __DEFAULT_FN_ATTRS128
179_mm_maskz_and_ps(__mmask8 __U, __m128 __A, __m128 __B) {
180 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
181 (__v4sf)_mm_and_ps(__A, __B),
182 (__v4sf)_mm_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +0800183}
184
Logan Chien55afb0a2018-10-15 10:42:14 +0800185static __inline__ __m256d __DEFAULT_FN_ATTRS256
186_mm256_mask_xor_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
187 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
188 (__v4df)_mm256_xor_pd(__A, __B),
189 (__v4df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800190}
191
Logan Chien55afb0a2018-10-15 10:42:14 +0800192static __inline__ __m256d __DEFAULT_FN_ATTRS256
193_mm256_maskz_xor_pd(__mmask8 __U, __m256d __A, __m256d __B) {
194 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
195 (__v4df)_mm256_xor_pd(__A, __B),
196 (__v4df)_mm256_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +0800197}
198
Logan Chien55afb0a2018-10-15 10:42:14 +0800199static __inline__ __m128d __DEFAULT_FN_ATTRS128
200_mm_mask_xor_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
201 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
202 (__v2df)_mm_xor_pd(__A, __B),
203 (__v2df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800204}
205
Logan Chien55afb0a2018-10-15 10:42:14 +0800206static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800207_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) {
Logan Chien55afb0a2018-10-15 10:42:14 +0800208 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
209 (__v2df)_mm_xor_pd(__A, __B),
210 (__v2df)_mm_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +0800211}
212
Logan Chien55afb0a2018-10-15 10:42:14 +0800213static __inline__ __m256 __DEFAULT_FN_ATTRS256
214_mm256_mask_xor_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
215 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
216 (__v8sf)_mm256_xor_ps(__A, __B),
217 (__v8sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800218}
219
Logan Chien55afb0a2018-10-15 10:42:14 +0800220static __inline__ __m256 __DEFAULT_FN_ATTRS256
221_mm256_maskz_xor_ps(__mmask8 __U, __m256 __A, __m256 __B) {
222 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
223 (__v8sf)_mm256_xor_ps(__A, __B),
224 (__v8sf)_mm256_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +0800225}
226
Logan Chien55afb0a2018-10-15 10:42:14 +0800227static __inline__ __m128 __DEFAULT_FN_ATTRS128
228_mm_mask_xor_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
229 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
230 (__v4sf)_mm_xor_ps(__A, __B),
231 (__v4sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800232}
233
Logan Chien55afb0a2018-10-15 10:42:14 +0800234static __inline__ __m128 __DEFAULT_FN_ATTRS128
235_mm_maskz_xor_ps(__mmask8 __U, __m128 __A, __m128 __B) {
236 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
237 (__v4sf)_mm_xor_ps(__A, __B),
238 (__v4sf)_mm_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +0800239}
240
Logan Chien55afb0a2018-10-15 10:42:14 +0800241static __inline__ __m256d __DEFAULT_FN_ATTRS256
242_mm256_mask_or_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
243 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
244 (__v4df)_mm256_or_pd(__A, __B),
245 (__v4df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800246}
247
Logan Chien55afb0a2018-10-15 10:42:14 +0800248static __inline__ __m256d __DEFAULT_FN_ATTRS256
249_mm256_maskz_or_pd(__mmask8 __U, __m256d __A, __m256d __B) {
250 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
251 (__v4df)_mm256_or_pd(__A, __B),
252 (__v4df)_mm256_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +0800253}
254
Logan Chien55afb0a2018-10-15 10:42:14 +0800255static __inline__ __m128d __DEFAULT_FN_ATTRS128
256_mm_mask_or_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
257 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
258 (__v2df)_mm_or_pd(__A, __B),
259 (__v2df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800260}
261
Logan Chien55afb0a2018-10-15 10:42:14 +0800262static __inline__ __m128d __DEFAULT_FN_ATTRS128
263_mm_maskz_or_pd(__mmask8 __U, __m128d __A, __m128d __B) {
264 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
265 (__v2df)_mm_or_pd(__A, __B),
266 (__v2df)_mm_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +0800267}
268
Logan Chien55afb0a2018-10-15 10:42:14 +0800269static __inline__ __m256 __DEFAULT_FN_ATTRS256
270_mm256_mask_or_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
271 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
272 (__v8sf)_mm256_or_ps(__A, __B),
273 (__v8sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800274}
275
Logan Chien55afb0a2018-10-15 10:42:14 +0800276static __inline__ __m256 __DEFAULT_FN_ATTRS256
277_mm256_maskz_or_ps(__mmask8 __U, __m256 __A, __m256 __B) {
278 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
279 (__v8sf)_mm256_or_ps(__A, __B),
280 (__v8sf)_mm256_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +0800281}
282
Logan Chien55afb0a2018-10-15 10:42:14 +0800283static __inline__ __m128 __DEFAULT_FN_ATTRS128
284_mm_mask_or_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
285 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
286 (__v4sf)_mm_or_ps(__A, __B),
287 (__v4sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800288}
289
Logan Chien55afb0a2018-10-15 10:42:14 +0800290static __inline__ __m128 __DEFAULT_FN_ATTRS128
291_mm_maskz_or_ps(__mmask8 __U, __m128 __A, __m128 __B) {
292 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
293 (__v4sf)_mm_or_ps(__A, __B),
294 (__v4sf)_mm_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +0800295}
296
Logan Chien55afb0a2018-10-15 10:42:14 +0800297static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800298_mm_cvtpd_epi64 (__m128d __A) {
299 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
300 (__v2di) _mm_setzero_si128(),
301 (__mmask8) -1);
302}
303
Logan Chien55afb0a2018-10-15 10:42:14 +0800304static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800305_mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
306 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
307 (__v2di) __W,
308 (__mmask8) __U);
309}
310
Logan Chien55afb0a2018-10-15 10:42:14 +0800311static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800312_mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) {
313 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
314 (__v2di) _mm_setzero_si128(),
315 (__mmask8) __U);
316}
317
Logan Chien55afb0a2018-10-15 10:42:14 +0800318static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800319_mm256_cvtpd_epi64 (__m256d __A) {
320 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
321 (__v4di) _mm256_setzero_si256(),
322 (__mmask8) -1);
323}
324
Logan Chien55afb0a2018-10-15 10:42:14 +0800325static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800326_mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
327 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
328 (__v4di) __W,
329 (__mmask8) __U);
330}
331
Logan Chien55afb0a2018-10-15 10:42:14 +0800332static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800333_mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) {
334 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
335 (__v4di) _mm256_setzero_si256(),
336 (__mmask8) __U);
337}
338
Logan Chien55afb0a2018-10-15 10:42:14 +0800339static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800340_mm_cvtpd_epu64 (__m128d __A) {
341 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
342 (__v2di) _mm_setzero_si128(),
343 (__mmask8) -1);
344}
345
Logan Chien55afb0a2018-10-15 10:42:14 +0800346static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800347_mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
348 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
349 (__v2di) __W,
350 (__mmask8) __U);
351}
352
Logan Chien55afb0a2018-10-15 10:42:14 +0800353static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800354_mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) {
355 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
356 (__v2di) _mm_setzero_si128(),
357 (__mmask8) __U);
358}
359
Logan Chien55afb0a2018-10-15 10:42:14 +0800360static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800361_mm256_cvtpd_epu64 (__m256d __A) {
362 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
363 (__v4di) _mm256_setzero_si256(),
364 (__mmask8) -1);
365}
366
Logan Chien55afb0a2018-10-15 10:42:14 +0800367static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800368_mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
369 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
370 (__v4di) __W,
371 (__mmask8) __U);
372}
373
Logan Chien55afb0a2018-10-15 10:42:14 +0800374static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800375_mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) {
376 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
377 (__v4di) _mm256_setzero_si256(),
378 (__mmask8) __U);
379}
380
Logan Chien55afb0a2018-10-15 10:42:14 +0800381static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800382_mm_cvtps_epi64 (__m128 __A) {
383 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
384 (__v2di) _mm_setzero_si128(),
385 (__mmask8) -1);
386}
387
Logan Chien55afb0a2018-10-15 10:42:14 +0800388static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800389_mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
390 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
391 (__v2di) __W,
392 (__mmask8) __U);
393}
394
Logan Chien55afb0a2018-10-15 10:42:14 +0800395static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800396_mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) {
397 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
398 (__v2di) _mm_setzero_si128(),
399 (__mmask8) __U);
400}
401
Logan Chien55afb0a2018-10-15 10:42:14 +0800402static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800403_mm256_cvtps_epi64 (__m128 __A) {
404 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
405 (__v4di) _mm256_setzero_si256(),
406 (__mmask8) -1);
407}
408
Logan Chien55afb0a2018-10-15 10:42:14 +0800409static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800410_mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
411 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
412 (__v4di) __W,
413 (__mmask8) __U);
414}
415
Logan Chien55afb0a2018-10-15 10:42:14 +0800416static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800417_mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) {
418 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
419 (__v4di) _mm256_setzero_si256(),
420 (__mmask8) __U);
421}
422
Logan Chien55afb0a2018-10-15 10:42:14 +0800423static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800424_mm_cvtps_epu64 (__m128 __A) {
425 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
426 (__v2di) _mm_setzero_si128(),
427 (__mmask8) -1);
428}
429
Logan Chien55afb0a2018-10-15 10:42:14 +0800430static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800431_mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
432 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
433 (__v2di) __W,
434 (__mmask8) __U);
435}
436
Logan Chien55afb0a2018-10-15 10:42:14 +0800437static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800438_mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
439 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
440 (__v2di) _mm_setzero_si128(),
441 (__mmask8) __U);
442}
443
Logan Chien55afb0a2018-10-15 10:42:14 +0800444static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800445_mm256_cvtps_epu64 (__m128 __A) {
446 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
447 (__v4di) _mm256_setzero_si256(),
448 (__mmask8) -1);
449}
450
Logan Chien55afb0a2018-10-15 10:42:14 +0800451static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800452_mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
453 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
454 (__v4di) __W,
455 (__mmask8) __U);
456}
457
Logan Chien55afb0a2018-10-15 10:42:14 +0800458static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800459_mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
460 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
461 (__v4di) _mm256_setzero_si256(),
462 (__mmask8) __U);
463}
464
Logan Chien55afb0a2018-10-15 10:42:14 +0800465static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800466_mm_cvtepi64_pd (__m128i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +0800467 return (__m128d)__builtin_convertvector((__v2di)__A, __v2df);
Logan Chien2833ffb2018-10-09 10:03:24 +0800468}
469
Logan Chien55afb0a2018-10-15 10:42:14 +0800470static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800471_mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +0800472 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
473 (__v2df)_mm_cvtepi64_pd(__A),
474 (__v2df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800475}
476
Logan Chien55afb0a2018-10-15 10:42:14 +0800477static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800478_mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +0800479 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
480 (__v2df)_mm_cvtepi64_pd(__A),
481 (__v2df)_mm_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +0800482}
483
Logan Chien55afb0a2018-10-15 10:42:14 +0800484static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800485_mm256_cvtepi64_pd (__m256i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +0800486 return (__m256d)__builtin_convertvector((__v4di)__A, __v4df);
Logan Chien2833ffb2018-10-09 10:03:24 +0800487}
488
Logan Chien55afb0a2018-10-15 10:42:14 +0800489static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800490_mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +0800491 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
492 (__v4df)_mm256_cvtepi64_pd(__A),
493 (__v4df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800494}
495
Logan Chien55afb0a2018-10-15 10:42:14 +0800496static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800497_mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +0800498 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
499 (__v4df)_mm256_cvtepi64_pd(__A),
500 (__v4df)_mm256_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +0800501}
502
Logan Chien55afb0a2018-10-15 10:42:14 +0800503static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800504_mm_cvtepi64_ps (__m128i __A) {
505 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
506 (__v4sf) _mm_setzero_ps(),
507 (__mmask8) -1);
508}
509
Logan Chien55afb0a2018-10-15 10:42:14 +0800510static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800511_mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
512 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
513 (__v4sf) __W,
514 (__mmask8) __U);
515}
516
Logan Chien55afb0a2018-10-15 10:42:14 +0800517static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800518_mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) {
519 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
520 (__v4sf) _mm_setzero_ps(),
521 (__mmask8) __U);
522}
523
Logan Chien55afb0a2018-10-15 10:42:14 +0800524static __inline__ __m128 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800525_mm256_cvtepi64_ps (__m256i __A) {
Logan Chiendbcf4122019-03-21 10:50:25 +0800526 return (__m128)__builtin_convertvector((__v4di)__A, __v4sf);
Logan Chien2833ffb2018-10-09 10:03:24 +0800527}
528
Logan Chien55afb0a2018-10-15 10:42:14 +0800529static __inline__ __m128 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800530_mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
Logan Chiendbcf4122019-03-21 10:50:25 +0800531 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
532 (__v4sf)_mm256_cvtepi64_ps(__A),
533 (__v4sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800534}
535
Logan Chien55afb0a2018-10-15 10:42:14 +0800536static __inline__ __m128 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800537_mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) {
Logan Chiendbcf4122019-03-21 10:50:25 +0800538 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
539 (__v4sf)_mm256_cvtepi64_ps(__A),
540 (__v4sf)_mm_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +0800541}
542
Logan Chien55afb0a2018-10-15 10:42:14 +0800543static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800544_mm_cvttpd_epi64 (__m128d __A) {
545 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
546 (__v2di) _mm_setzero_si128(),
547 (__mmask8) -1);
548}
549
Logan Chien55afb0a2018-10-15 10:42:14 +0800550static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800551_mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
552 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
553 (__v2di) __W,
554 (__mmask8) __U);
555}
556
Logan Chien55afb0a2018-10-15 10:42:14 +0800557static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800558_mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) {
559 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
560 (__v2di) _mm_setzero_si128(),
561 (__mmask8) __U);
562}
563
Logan Chien55afb0a2018-10-15 10:42:14 +0800564static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800565_mm256_cvttpd_epi64 (__m256d __A) {
566 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
567 (__v4di) _mm256_setzero_si256(),
568 (__mmask8) -1);
569}
570
Logan Chien55afb0a2018-10-15 10:42:14 +0800571static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800572_mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
573 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
574 (__v4di) __W,
575 (__mmask8) __U);
576}
577
Logan Chien55afb0a2018-10-15 10:42:14 +0800578static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800579_mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) {
580 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
581 (__v4di) _mm256_setzero_si256(),
582 (__mmask8) __U);
583}
584
Logan Chien55afb0a2018-10-15 10:42:14 +0800585static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800586_mm_cvttpd_epu64 (__m128d __A) {
587 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
588 (__v2di) _mm_setzero_si128(),
589 (__mmask8) -1);
590}
591
Logan Chien55afb0a2018-10-15 10:42:14 +0800592static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800593_mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
594 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
595 (__v2di) __W,
596 (__mmask8) __U);
597}
598
Logan Chien55afb0a2018-10-15 10:42:14 +0800599static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800600_mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) {
601 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
602 (__v2di) _mm_setzero_si128(),
603 (__mmask8) __U);
604}
605
Logan Chien55afb0a2018-10-15 10:42:14 +0800606static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800607_mm256_cvttpd_epu64 (__m256d __A) {
608 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
609 (__v4di) _mm256_setzero_si256(),
610 (__mmask8) -1);
611}
612
Logan Chien55afb0a2018-10-15 10:42:14 +0800613static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800614_mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
615 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
616 (__v4di) __W,
617 (__mmask8) __U);
618}
619
Logan Chien55afb0a2018-10-15 10:42:14 +0800620static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800621_mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) {
622 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
623 (__v4di) _mm256_setzero_si256(),
624 (__mmask8) __U);
625}
626
Logan Chien55afb0a2018-10-15 10:42:14 +0800627static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800628_mm_cvttps_epi64 (__m128 __A) {
629 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
630 (__v2di) _mm_setzero_si128(),
631 (__mmask8) -1);
632}
633
Logan Chien55afb0a2018-10-15 10:42:14 +0800634static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800635_mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
636 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
637 (__v2di) __W,
638 (__mmask8) __U);
639}
640
Logan Chien55afb0a2018-10-15 10:42:14 +0800641static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800642_mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) {
643 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
644 (__v2di) _mm_setzero_si128(),
645 (__mmask8) __U);
646}
647
Logan Chien55afb0a2018-10-15 10:42:14 +0800648static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800649_mm256_cvttps_epi64 (__m128 __A) {
650 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
651 (__v4di) _mm256_setzero_si256(),
652 (__mmask8) -1);
653}
654
Logan Chien55afb0a2018-10-15 10:42:14 +0800655static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800656_mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
657 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
658 (__v4di) __W,
659 (__mmask8) __U);
660}
661
Logan Chien55afb0a2018-10-15 10:42:14 +0800662static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800663_mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) {
664 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
665 (__v4di) _mm256_setzero_si256(),
666 (__mmask8) __U);
667}
668
Logan Chien55afb0a2018-10-15 10:42:14 +0800669static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800670_mm_cvttps_epu64 (__m128 __A) {
671 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
672 (__v2di) _mm_setzero_si128(),
673 (__mmask8) -1);
674}
675
Logan Chien55afb0a2018-10-15 10:42:14 +0800676static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800677_mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
678 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
679 (__v2di) __W,
680 (__mmask8) __U);
681}
682
Logan Chien55afb0a2018-10-15 10:42:14 +0800683static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800684_mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
685 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
686 (__v2di) _mm_setzero_si128(),
687 (__mmask8) __U);
688}
689
Logan Chien55afb0a2018-10-15 10:42:14 +0800690static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800691_mm256_cvttps_epu64 (__m128 __A) {
692 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
693 (__v4di) _mm256_setzero_si256(),
694 (__mmask8) -1);
695}
696
Logan Chien55afb0a2018-10-15 10:42:14 +0800697static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800698_mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
699 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
700 (__v4di) __W,
701 (__mmask8) __U);
702}
703
Logan Chien55afb0a2018-10-15 10:42:14 +0800704static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800705_mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
706 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
707 (__v4di) _mm256_setzero_si256(),
708 (__mmask8) __U);
709}
710
Logan Chien55afb0a2018-10-15 10:42:14 +0800711static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800712_mm_cvtepu64_pd (__m128i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +0800713 return (__m128d)__builtin_convertvector((__v2du)__A, __v2df);
Logan Chien2833ffb2018-10-09 10:03:24 +0800714}
715
Logan Chien55afb0a2018-10-15 10:42:14 +0800716static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800717_mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +0800718 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
719 (__v2df)_mm_cvtepu64_pd(__A),
720 (__v2df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800721}
722
Logan Chien55afb0a2018-10-15 10:42:14 +0800723static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800724_mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +0800725 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
726 (__v2df)_mm_cvtepu64_pd(__A),
727 (__v2df)_mm_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +0800728}
729
Logan Chien55afb0a2018-10-15 10:42:14 +0800730static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800731_mm256_cvtepu64_pd (__m256i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +0800732 return (__m256d)__builtin_convertvector((__v4du)__A, __v4df);
Logan Chien2833ffb2018-10-09 10:03:24 +0800733}
734
Logan Chien55afb0a2018-10-15 10:42:14 +0800735static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800736_mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +0800737 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
738 (__v4df)_mm256_cvtepu64_pd(__A),
739 (__v4df)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800740}
741
Logan Chien55afb0a2018-10-15 10:42:14 +0800742static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800743_mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +0800744 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
745 (__v4df)_mm256_cvtepu64_pd(__A),
746 (__v4df)_mm256_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +0800747}
748
Logan Chien55afb0a2018-10-15 10:42:14 +0800749static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800750_mm_cvtepu64_ps (__m128i __A) {
751 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
752 (__v4sf) _mm_setzero_ps(),
753 (__mmask8) -1);
754}
755
Logan Chien55afb0a2018-10-15 10:42:14 +0800756static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800757_mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
758 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
759 (__v4sf) __W,
760 (__mmask8) __U);
761}
762
Logan Chien55afb0a2018-10-15 10:42:14 +0800763static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800764_mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) {
765 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
766 (__v4sf) _mm_setzero_ps(),
767 (__mmask8) __U);
768}
769
Logan Chien55afb0a2018-10-15 10:42:14 +0800770static __inline__ __m128 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800771_mm256_cvtepu64_ps (__m256i __A) {
Logan Chiendbcf4122019-03-21 10:50:25 +0800772 return (__m128)__builtin_convertvector((__v4du)__A, __v4sf);
Logan Chien2833ffb2018-10-09 10:03:24 +0800773}
774
Logan Chien55afb0a2018-10-15 10:42:14 +0800775static __inline__ __m128 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800776_mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
Logan Chiendbcf4122019-03-21 10:50:25 +0800777 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
778 (__v4sf)_mm256_cvtepu64_ps(__A),
779 (__v4sf)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800780}
781
Logan Chien55afb0a2018-10-15 10:42:14 +0800782static __inline__ __m128 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800783_mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) {
Logan Chiendbcf4122019-03-21 10:50:25 +0800784 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
785 (__v4sf)_mm256_cvtepu64_ps(__A),
786 (__v4sf)_mm_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +0800787}
788
Logan Chien55afb0a2018-10-15 10:42:14 +0800789#define _mm_range_pd(A, B, C) \
Logan Chien2833ffb2018-10-09 10:03:24 +0800790 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
791 (__v2df)(__m128d)(B), (int)(C), \
792 (__v2df)_mm_setzero_pd(), \
Logan Chien55afb0a2018-10-15 10:42:14 +0800793 (__mmask8)-1)
Logan Chien2833ffb2018-10-09 10:03:24 +0800794
Logan Chien55afb0a2018-10-15 10:42:14 +0800795#define _mm_mask_range_pd(W, U, A, B, C) \
Logan Chien2833ffb2018-10-09 10:03:24 +0800796 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
797 (__v2df)(__m128d)(B), (int)(C), \
798 (__v2df)(__m128d)(W), \
Logan Chien55afb0a2018-10-15 10:42:14 +0800799 (__mmask8)(U))
Logan Chien2833ffb2018-10-09 10:03:24 +0800800
Logan Chien55afb0a2018-10-15 10:42:14 +0800801#define _mm_maskz_range_pd(U, A, B, C) \
Logan Chien2833ffb2018-10-09 10:03:24 +0800802 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
803 (__v2df)(__m128d)(B), (int)(C), \
804 (__v2df)_mm_setzero_pd(), \
Logan Chien55afb0a2018-10-15 10:42:14 +0800805 (__mmask8)(U))
Logan Chien2833ffb2018-10-09 10:03:24 +0800806
Logan Chien55afb0a2018-10-15 10:42:14 +0800807#define _mm256_range_pd(A, B, C) \
Logan Chien2833ffb2018-10-09 10:03:24 +0800808 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
809 (__v4df)(__m256d)(B), (int)(C), \
810 (__v4df)_mm256_setzero_pd(), \
Logan Chien55afb0a2018-10-15 10:42:14 +0800811 (__mmask8)-1)
Logan Chien2833ffb2018-10-09 10:03:24 +0800812
Logan Chien55afb0a2018-10-15 10:42:14 +0800813#define _mm256_mask_range_pd(W, U, A, B, C) \
Logan Chien2833ffb2018-10-09 10:03:24 +0800814 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
815 (__v4df)(__m256d)(B), (int)(C), \
816 (__v4df)(__m256d)(W), \
Logan Chien55afb0a2018-10-15 10:42:14 +0800817 (__mmask8)(U))
Logan Chien2833ffb2018-10-09 10:03:24 +0800818
Logan Chien55afb0a2018-10-15 10:42:14 +0800819#define _mm256_maskz_range_pd(U, A, B, C) \
Logan Chien2833ffb2018-10-09 10:03:24 +0800820 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
821 (__v4df)(__m256d)(B), (int)(C), \
822 (__v4df)_mm256_setzero_pd(), \
Logan Chien55afb0a2018-10-15 10:42:14 +0800823 (__mmask8)(U))
Logan Chien2833ffb2018-10-09 10:03:24 +0800824
Logan Chien55afb0a2018-10-15 10:42:14 +0800825#define _mm_range_ps(A, B, C) \
Logan Chien2833ffb2018-10-09 10:03:24 +0800826 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
827 (__v4sf)(__m128)(B), (int)(C), \
828 (__v4sf)_mm_setzero_ps(), \
Logan Chien55afb0a2018-10-15 10:42:14 +0800829 (__mmask8)-1)
Logan Chien2833ffb2018-10-09 10:03:24 +0800830
Logan Chien55afb0a2018-10-15 10:42:14 +0800831#define _mm_mask_range_ps(W, U, A, B, C) \
Logan Chien2833ffb2018-10-09 10:03:24 +0800832 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
833 (__v4sf)(__m128)(B), (int)(C), \
Logan Chien55afb0a2018-10-15 10:42:14 +0800834 (__v4sf)(__m128)(W), (__mmask8)(U))
Logan Chien2833ffb2018-10-09 10:03:24 +0800835
Logan Chien55afb0a2018-10-15 10:42:14 +0800836#define _mm_maskz_range_ps(U, A, B, C) \
Logan Chien2833ffb2018-10-09 10:03:24 +0800837 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
838 (__v4sf)(__m128)(B), (int)(C), \
839 (__v4sf)_mm_setzero_ps(), \
Logan Chien55afb0a2018-10-15 10:42:14 +0800840 (__mmask8)(U))
Logan Chien2833ffb2018-10-09 10:03:24 +0800841
Logan Chien55afb0a2018-10-15 10:42:14 +0800842#define _mm256_range_ps(A, B, C) \
Logan Chien2833ffb2018-10-09 10:03:24 +0800843 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
844 (__v8sf)(__m256)(B), (int)(C), \
845 (__v8sf)_mm256_setzero_ps(), \
Logan Chien55afb0a2018-10-15 10:42:14 +0800846 (__mmask8)-1)
Logan Chien2833ffb2018-10-09 10:03:24 +0800847
Logan Chien55afb0a2018-10-15 10:42:14 +0800848#define _mm256_mask_range_ps(W, U, A, B, C) \
Logan Chien2833ffb2018-10-09 10:03:24 +0800849 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
850 (__v8sf)(__m256)(B), (int)(C), \
Logan Chien55afb0a2018-10-15 10:42:14 +0800851 (__v8sf)(__m256)(W), (__mmask8)(U))
Logan Chien2833ffb2018-10-09 10:03:24 +0800852
Logan Chien55afb0a2018-10-15 10:42:14 +0800853#define _mm256_maskz_range_ps(U, A, B, C) \
Logan Chien2833ffb2018-10-09 10:03:24 +0800854 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
855 (__v8sf)(__m256)(B), (int)(C), \
856 (__v8sf)_mm256_setzero_ps(), \
Logan Chien55afb0a2018-10-15 10:42:14 +0800857 (__mmask8)(U))
Logan Chien2833ffb2018-10-09 10:03:24 +0800858
Logan Chien55afb0a2018-10-15 10:42:14 +0800859#define _mm_reduce_pd(A, B) \
Logan Chien2833ffb2018-10-09 10:03:24 +0800860 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
861 (__v2df)_mm_setzero_pd(), \
Logan Chien55afb0a2018-10-15 10:42:14 +0800862 (__mmask8)-1)
Logan Chien2833ffb2018-10-09 10:03:24 +0800863
Logan Chien55afb0a2018-10-15 10:42:14 +0800864#define _mm_mask_reduce_pd(W, U, A, B) \
Logan Chien2833ffb2018-10-09 10:03:24 +0800865 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
866 (__v2df)(__m128d)(W), \
Logan Chien55afb0a2018-10-15 10:42:14 +0800867 (__mmask8)(U))
Logan Chien2833ffb2018-10-09 10:03:24 +0800868
Logan Chien55afb0a2018-10-15 10:42:14 +0800869#define _mm_maskz_reduce_pd(U, A, B) \
Logan Chien2833ffb2018-10-09 10:03:24 +0800870 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
871 (__v2df)_mm_setzero_pd(), \
Logan Chien55afb0a2018-10-15 10:42:14 +0800872 (__mmask8)(U))
Logan Chien2833ffb2018-10-09 10:03:24 +0800873
Logan Chien55afb0a2018-10-15 10:42:14 +0800874#define _mm256_reduce_pd(A, B) \
Logan Chien2833ffb2018-10-09 10:03:24 +0800875 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
876 (__v4df)_mm256_setzero_pd(), \
Logan Chien55afb0a2018-10-15 10:42:14 +0800877 (__mmask8)-1)
Logan Chien2833ffb2018-10-09 10:03:24 +0800878
Logan Chien55afb0a2018-10-15 10:42:14 +0800879#define _mm256_mask_reduce_pd(W, U, A, B) \
Logan Chien2833ffb2018-10-09 10:03:24 +0800880 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
881 (__v4df)(__m256d)(W), \
Logan Chien55afb0a2018-10-15 10:42:14 +0800882 (__mmask8)(U))
Logan Chien2833ffb2018-10-09 10:03:24 +0800883
Logan Chien55afb0a2018-10-15 10:42:14 +0800884#define _mm256_maskz_reduce_pd(U, A, B) \
Logan Chien2833ffb2018-10-09 10:03:24 +0800885 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
886 (__v4df)_mm256_setzero_pd(), \
Logan Chien55afb0a2018-10-15 10:42:14 +0800887 (__mmask8)(U))
Logan Chien2833ffb2018-10-09 10:03:24 +0800888
Logan Chien55afb0a2018-10-15 10:42:14 +0800889#define _mm_reduce_ps(A, B) \
Logan Chien2833ffb2018-10-09 10:03:24 +0800890 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
891 (__v4sf)_mm_setzero_ps(), \
Logan Chien55afb0a2018-10-15 10:42:14 +0800892 (__mmask8)-1)
Logan Chien2833ffb2018-10-09 10:03:24 +0800893
Logan Chien55afb0a2018-10-15 10:42:14 +0800894#define _mm_mask_reduce_ps(W, U, A, B) \
Logan Chien2833ffb2018-10-09 10:03:24 +0800895 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
896 (__v4sf)(__m128)(W), \
Logan Chien55afb0a2018-10-15 10:42:14 +0800897 (__mmask8)(U))
Logan Chien2833ffb2018-10-09 10:03:24 +0800898
Logan Chien55afb0a2018-10-15 10:42:14 +0800899#define _mm_maskz_reduce_ps(U, A, B) \
Logan Chien2833ffb2018-10-09 10:03:24 +0800900 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
901 (__v4sf)_mm_setzero_ps(), \
Logan Chien55afb0a2018-10-15 10:42:14 +0800902 (__mmask8)(U))
Logan Chien2833ffb2018-10-09 10:03:24 +0800903
Logan Chien55afb0a2018-10-15 10:42:14 +0800904#define _mm256_reduce_ps(A, B) \
Logan Chien2833ffb2018-10-09 10:03:24 +0800905 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
906 (__v8sf)_mm256_setzero_ps(), \
Logan Chien55afb0a2018-10-15 10:42:14 +0800907 (__mmask8)-1)
Logan Chien2833ffb2018-10-09 10:03:24 +0800908
Logan Chien55afb0a2018-10-15 10:42:14 +0800909#define _mm256_mask_reduce_ps(W, U, A, B) \
Logan Chien2833ffb2018-10-09 10:03:24 +0800910 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
911 (__v8sf)(__m256)(W), \
Logan Chien55afb0a2018-10-15 10:42:14 +0800912 (__mmask8)(U))
Logan Chien2833ffb2018-10-09 10:03:24 +0800913
Logan Chien55afb0a2018-10-15 10:42:14 +0800914#define _mm256_maskz_reduce_ps(U, A, B) \
Logan Chien2833ffb2018-10-09 10:03:24 +0800915 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
916 (__v8sf)_mm256_setzero_ps(), \
Logan Chien55afb0a2018-10-15 10:42:14 +0800917 (__mmask8)(U))
Logan Chien2833ffb2018-10-09 10:03:24 +0800918
Logan Chien55afb0a2018-10-15 10:42:14 +0800919static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800920_mm_movepi32_mask (__m128i __A)
921{
922 return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
923}
924
Logan Chien55afb0a2018-10-15 10:42:14 +0800925static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800926_mm256_movepi32_mask (__m256i __A)
927{
928 return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
929}
930
Logan Chien55afb0a2018-10-15 10:42:14 +0800931static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800932_mm_movm_epi32 (__mmask8 __A)
933{
934 return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
935}
936
Logan Chien55afb0a2018-10-15 10:42:14 +0800937static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800938_mm256_movm_epi32 (__mmask8 __A)
939{
940 return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
941}
942
Logan Chien55afb0a2018-10-15 10:42:14 +0800943static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800944_mm_movm_epi64 (__mmask8 __A)
945{
946 return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
947}
948
Logan Chien55afb0a2018-10-15 10:42:14 +0800949static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800950_mm256_movm_epi64 (__mmask8 __A)
951{
952 return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
953}
954
Logan Chien55afb0a2018-10-15 10:42:14 +0800955static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800956_mm_movepi64_mask (__m128i __A)
957{
958 return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
959}
960
Logan Chien55afb0a2018-10-15 10:42:14 +0800961static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800962_mm256_movepi64_mask (__m256i __A)
963{
964 return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
965}
966
Logan Chien55afb0a2018-10-15 10:42:14 +0800967static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800968_mm256_broadcast_f32x2 (__m128 __A)
969{
Logan Chien55afb0a2018-10-15 10:42:14 +0800970 return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
971 0, 1, 0, 1, 0, 1, 0, 1);
Logan Chien2833ffb2018-10-09 10:03:24 +0800972}
973
Logan Chien55afb0a2018-10-15 10:42:14 +0800974static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800975_mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
976{
Logan Chien55afb0a2018-10-15 10:42:14 +0800977 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
978 (__v8sf)_mm256_broadcast_f32x2(__A),
979 (__v8sf)__O);
Logan Chien2833ffb2018-10-09 10:03:24 +0800980}
981
Logan Chien55afb0a2018-10-15 10:42:14 +0800982static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800983_mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
984{
Logan Chien55afb0a2018-10-15 10:42:14 +0800985 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
986 (__v8sf)_mm256_broadcast_f32x2(__A),
987 (__v8sf)_mm256_setzero_ps());
Logan Chien2833ffb2018-10-09 10:03:24 +0800988}
989
Logan Chien55afb0a2018-10-15 10:42:14 +0800990static __inline__ __m256d __DEFAULT_FN_ATTRS256
991_mm256_broadcast_f64x2(__m128d __A)
Logan Chien2833ffb2018-10-09 10:03:24 +0800992{
Logan Chien55afb0a2018-10-15 10:42:14 +0800993 return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
994 0, 1, 0, 1);
Logan Chien2833ffb2018-10-09 10:03:24 +0800995}
996
Logan Chien55afb0a2018-10-15 10:42:14 +0800997static __inline__ __m256d __DEFAULT_FN_ATTRS256
998_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A)
Logan Chien2833ffb2018-10-09 10:03:24 +0800999{
Logan Chien55afb0a2018-10-15 10:42:14 +08001000 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
1001 (__v4df)_mm256_broadcast_f64x2(__A),
1002 (__v4df)__O);
Logan Chien2833ffb2018-10-09 10:03:24 +08001003}
1004
Logan Chien55afb0a2018-10-15 10:42:14 +08001005static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001006_mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
1007{
Logan Chien55afb0a2018-10-15 10:42:14 +08001008 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
1009 (__v4df)_mm256_broadcast_f64x2(__A),
1010 (__v4df)_mm256_setzero_pd());
Logan Chien2833ffb2018-10-09 10:03:24 +08001011}
1012
Logan Chien55afb0a2018-10-15 10:42:14 +08001013static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001014_mm_broadcast_i32x2 (__m128i __A)
1015{
Logan Chien55afb0a2018-10-15 10:42:14 +08001016 return (__m128i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
1017 0, 1, 0, 1);
Logan Chien2833ffb2018-10-09 10:03:24 +08001018}
1019
Logan Chien55afb0a2018-10-15 10:42:14 +08001020static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001021_mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
1022{
Logan Chien55afb0a2018-10-15 10:42:14 +08001023 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
1024 (__v4si)_mm_broadcast_i32x2(__A),
1025 (__v4si)__O);
Logan Chien2833ffb2018-10-09 10:03:24 +08001026}
1027
Logan Chien55afb0a2018-10-15 10:42:14 +08001028static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001029_mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
1030{
Logan Chien55afb0a2018-10-15 10:42:14 +08001031 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
1032 (__v4si)_mm_broadcast_i32x2(__A),
1033 (__v4si)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08001034}
1035
Logan Chien55afb0a2018-10-15 10:42:14 +08001036static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001037_mm256_broadcast_i32x2 (__m128i __A)
1038{
Logan Chien55afb0a2018-10-15 10:42:14 +08001039 return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
1040 0, 1, 0, 1, 0, 1, 0, 1);
Logan Chien2833ffb2018-10-09 10:03:24 +08001041}
1042
Logan Chien55afb0a2018-10-15 10:42:14 +08001043static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001044_mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
1045{
Logan Chien55afb0a2018-10-15 10:42:14 +08001046 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
1047 (__v8si)_mm256_broadcast_i32x2(__A),
1048 (__v8si)__O);
Logan Chien2833ffb2018-10-09 10:03:24 +08001049}
1050
Logan Chien55afb0a2018-10-15 10:42:14 +08001051static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001052_mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
1053{
Logan Chien55afb0a2018-10-15 10:42:14 +08001054 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
1055 (__v8si)_mm256_broadcast_i32x2(__A),
1056 (__v8si)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08001057}
1058
Logan Chien55afb0a2018-10-15 10:42:14 +08001059static __inline__ __m256i __DEFAULT_FN_ATTRS256
1060_mm256_broadcast_i64x2(__m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08001061{
Logan Chien55afb0a2018-10-15 10:42:14 +08001062 return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
1063 0, 1, 0, 1);
Logan Chien2833ffb2018-10-09 10:03:24 +08001064}
1065
Logan Chien55afb0a2018-10-15 10:42:14 +08001066static __inline__ __m256i __DEFAULT_FN_ATTRS256
1067_mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08001068{
Logan Chien55afb0a2018-10-15 10:42:14 +08001069 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
1070 (__v4di)_mm256_broadcast_i64x2(__A),
1071 (__v4di)__O);
Logan Chien2833ffb2018-10-09 10:03:24 +08001072}
1073
Logan Chien55afb0a2018-10-15 10:42:14 +08001074static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001075_mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
1076{
Logan Chien55afb0a2018-10-15 10:42:14 +08001077 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
1078 (__v4di)_mm256_broadcast_i64x2(__A),
1079 (__v4di)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08001080}
1081
Logan Chien55afb0a2018-10-15 10:42:14 +08001082#define _mm256_extractf64x2_pd(A, imm) \
Logan Chien2833ffb2018-10-09 10:03:24 +08001083 (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1084 (int)(imm), \
Logan Chien55afb0a2018-10-15 10:42:14 +08001085 (__v2df)_mm_undefined_pd(), \
1086 (__mmask8)-1)
Logan Chien2833ffb2018-10-09 10:03:24 +08001087
Logan Chien55afb0a2018-10-15 10:42:14 +08001088#define _mm256_mask_extractf64x2_pd(W, U, A, imm) \
Logan Chien2833ffb2018-10-09 10:03:24 +08001089 (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1090 (int)(imm), \
1091 (__v2df)(__m128d)(W), \
Logan Chien55afb0a2018-10-15 10:42:14 +08001092 (__mmask8)(U))
Logan Chien2833ffb2018-10-09 10:03:24 +08001093
Logan Chien55afb0a2018-10-15 10:42:14 +08001094#define _mm256_maskz_extractf64x2_pd(U, A, imm) \
Logan Chien2833ffb2018-10-09 10:03:24 +08001095 (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1096 (int)(imm), \
1097 (__v2df)_mm_setzero_pd(), \
Logan Chien55afb0a2018-10-15 10:42:14 +08001098 (__mmask8)(U))
Logan Chien2833ffb2018-10-09 10:03:24 +08001099
Logan Chien55afb0a2018-10-15 10:42:14 +08001100#define _mm256_extracti64x2_epi64(A, imm) \
Logan Chien2833ffb2018-10-09 10:03:24 +08001101 (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1102 (int)(imm), \
Logan Chien55afb0a2018-10-15 10:42:14 +08001103 (__v2di)_mm_undefined_si128(), \
1104 (__mmask8)-1)
Logan Chien2833ffb2018-10-09 10:03:24 +08001105
Logan Chien55afb0a2018-10-15 10:42:14 +08001106#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \
Logan Chien2833ffb2018-10-09 10:03:24 +08001107 (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1108 (int)(imm), \
1109 (__v2di)(__m128i)(W), \
Logan Chien55afb0a2018-10-15 10:42:14 +08001110 (__mmask8)(U))
Logan Chien2833ffb2018-10-09 10:03:24 +08001111
Logan Chien55afb0a2018-10-15 10:42:14 +08001112#define _mm256_maskz_extracti64x2_epi64(U, A, imm) \
Logan Chien2833ffb2018-10-09 10:03:24 +08001113 (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1114 (int)(imm), \
Logan Chien55afb0a2018-10-15 10:42:14 +08001115 (__v2di)_mm_setzero_si128(), \
1116 (__mmask8)(U))
Logan Chien2833ffb2018-10-09 10:03:24 +08001117
Logan Chien55afb0a2018-10-15 10:42:14 +08001118#define _mm256_insertf64x2(A, B, imm) \
1119 (__m256d)__builtin_ia32_insertf64x2_256((__v4df)(__m256d)(A), \
1120 (__v2df)(__m128d)(B), (int)(imm))
Logan Chien2833ffb2018-10-09 10:03:24 +08001121
Logan Chien55afb0a2018-10-15 10:42:14 +08001122#define _mm256_mask_insertf64x2(W, U, A, B, imm) \
1123 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
1124 (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
1125 (__v4df)(__m256d)(W))
Logan Chien2833ffb2018-10-09 10:03:24 +08001126
Logan Chien55afb0a2018-10-15 10:42:14 +08001127#define _mm256_maskz_insertf64x2(U, A, B, imm) \
1128 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
1129 (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
1130 (__v4df)_mm256_setzero_pd())
Logan Chien2833ffb2018-10-09 10:03:24 +08001131
Logan Chien55afb0a2018-10-15 10:42:14 +08001132#define _mm256_inserti64x2(A, B, imm) \
1133 (__m256i)__builtin_ia32_inserti64x2_256((__v4di)(__m256i)(A), \
1134 (__v2di)(__m128i)(B), (int)(imm))
Logan Chien2833ffb2018-10-09 10:03:24 +08001135
Logan Chien55afb0a2018-10-15 10:42:14 +08001136#define _mm256_mask_inserti64x2(W, U, A, B, imm) \
1137 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
1138 (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
1139 (__v4di)(__m256i)(W))
Logan Chien2833ffb2018-10-09 10:03:24 +08001140
Logan Chien55afb0a2018-10-15 10:42:14 +08001141#define _mm256_maskz_inserti64x2(U, A, B, imm) \
1142 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
1143 (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
1144 (__v4di)_mm256_setzero_si256())
Logan Chien2833ffb2018-10-09 10:03:24 +08001145
Logan Chien55afb0a2018-10-15 10:42:14 +08001146#define _mm_mask_fpclass_pd_mask(U, A, imm) \
Logan Chien2833ffb2018-10-09 10:03:24 +08001147 (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
Logan Chien55afb0a2018-10-15 10:42:14 +08001148 (__mmask8)(U))
Logan Chien2833ffb2018-10-09 10:03:24 +08001149
Logan Chien55afb0a2018-10-15 10:42:14 +08001150#define _mm_fpclass_pd_mask(A, imm) \
Logan Chien2833ffb2018-10-09 10:03:24 +08001151 (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
Logan Chien55afb0a2018-10-15 10:42:14 +08001152 (__mmask8)-1)
Logan Chien2833ffb2018-10-09 10:03:24 +08001153
Logan Chien55afb0a2018-10-15 10:42:14 +08001154#define _mm256_mask_fpclass_pd_mask(U, A, imm) \
Logan Chien2833ffb2018-10-09 10:03:24 +08001155 (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
Logan Chien55afb0a2018-10-15 10:42:14 +08001156 (__mmask8)(U))
Logan Chien2833ffb2018-10-09 10:03:24 +08001157
Logan Chien55afb0a2018-10-15 10:42:14 +08001158#define _mm256_fpclass_pd_mask(A, imm) \
Logan Chien2833ffb2018-10-09 10:03:24 +08001159 (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
Logan Chien55afb0a2018-10-15 10:42:14 +08001160 (__mmask8)-1)
Logan Chien2833ffb2018-10-09 10:03:24 +08001161
Logan Chien55afb0a2018-10-15 10:42:14 +08001162#define _mm_mask_fpclass_ps_mask(U, A, imm) \
Logan Chien2833ffb2018-10-09 10:03:24 +08001163 (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
Logan Chien55afb0a2018-10-15 10:42:14 +08001164 (__mmask8)(U))
Logan Chien2833ffb2018-10-09 10:03:24 +08001165
Logan Chien55afb0a2018-10-15 10:42:14 +08001166#define _mm_fpclass_ps_mask(A, imm) \
Logan Chien2833ffb2018-10-09 10:03:24 +08001167 (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
Logan Chien55afb0a2018-10-15 10:42:14 +08001168 (__mmask8)-1)
Logan Chien2833ffb2018-10-09 10:03:24 +08001169
Logan Chien55afb0a2018-10-15 10:42:14 +08001170#define _mm256_mask_fpclass_ps_mask(U, A, imm) \
Logan Chien2833ffb2018-10-09 10:03:24 +08001171 (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
Logan Chien55afb0a2018-10-15 10:42:14 +08001172 (__mmask8)(U))
Logan Chien2833ffb2018-10-09 10:03:24 +08001173
Logan Chien55afb0a2018-10-15 10:42:14 +08001174#define _mm256_fpclass_ps_mask(A, imm) \
Logan Chien2833ffb2018-10-09 10:03:24 +08001175 (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
Logan Chien55afb0a2018-10-15 10:42:14 +08001176 (__mmask8)-1)
Logan Chien2833ffb2018-10-09 10:03:24 +08001177
Logan Chien55afb0a2018-10-15 10:42:14 +08001178#undef __DEFAULT_FN_ATTRS128
1179#undef __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001180
1181#endif