blob: 6aee8aed8487177d7afbd71b50d8568bf848bd52 [file] [log] [blame]
Logan Chien2833ffb2018-10-09 10:03:24 +08001/*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------===
2 *
3 *
Logan Chiendf4f7662019-09-04 16:45:23 -07004 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 * See https://llvm.org/LICENSE.txt for license information.
6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Logan Chien2833ffb2018-10-09 10:03:24 +08007 *
8 *===-----------------------------------------------------------------------===
9 */
10#ifndef __IMMINTRIN_H
11#error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifndef __AVX512BWINTRIN_H
15#define __AVX512BWINTRIN_H
16
17typedef unsigned int __mmask32;
18typedef unsigned long long __mmask64;
19
20/* Define the default attributes for the functions in this file. */
Logan Chienb0c84022018-11-09 16:19:54 +080021#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512)))
22#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw")))
23
24static __inline __mmask32 __DEFAULT_FN_ATTRS
25_knot_mask32(__mmask32 __M)
26{
27 return __builtin_ia32_knotsi(__M);
28}
29
30static __inline __mmask64 __DEFAULT_FN_ATTRS
31_knot_mask64(__mmask64 __M)
32{
33 return __builtin_ia32_knotdi(__M);
34}
35
36static __inline__ __mmask32 __DEFAULT_FN_ATTRS
37_kand_mask32(__mmask32 __A, __mmask32 __B)
38{
39 return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B);
40}
41
42static __inline__ __mmask64 __DEFAULT_FN_ATTRS
43_kand_mask64(__mmask64 __A, __mmask64 __B)
44{
45 return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B);
46}
47
48static __inline__ __mmask32 __DEFAULT_FN_ATTRS
49_kandn_mask32(__mmask32 __A, __mmask32 __B)
50{
51 return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B);
52}
53
54static __inline__ __mmask64 __DEFAULT_FN_ATTRS
55_kandn_mask64(__mmask64 __A, __mmask64 __B)
56{
57 return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B);
58}
59
60static __inline__ __mmask32 __DEFAULT_FN_ATTRS
61_kor_mask32(__mmask32 __A, __mmask32 __B)
62{
63 return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B);
64}
65
66static __inline__ __mmask64 __DEFAULT_FN_ATTRS
67_kor_mask64(__mmask64 __A, __mmask64 __B)
68{
69 return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B);
70}
71
72static __inline__ __mmask32 __DEFAULT_FN_ATTRS
73_kxnor_mask32(__mmask32 __A, __mmask32 __B)
74{
75 return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B);
76}
77
78static __inline__ __mmask64 __DEFAULT_FN_ATTRS
79_kxnor_mask64(__mmask64 __A, __mmask64 __B)
80{
81 return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B);
82}
83
84static __inline__ __mmask32 __DEFAULT_FN_ATTRS
85_kxor_mask32(__mmask32 __A, __mmask32 __B)
86{
87 return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B);
88}
89
90static __inline__ __mmask64 __DEFAULT_FN_ATTRS
91_kxor_mask64(__mmask64 __A, __mmask64 __B)
92{
93 return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B);
94}
95
96static __inline__ unsigned char __DEFAULT_FN_ATTRS
97_kortestc_mask32_u8(__mmask32 __A, __mmask32 __B)
98{
99 return (unsigned char)__builtin_ia32_kortestcsi(__A, __B);
100}
101
102static __inline__ unsigned char __DEFAULT_FN_ATTRS
103_kortestz_mask32_u8(__mmask32 __A, __mmask32 __B)
104{
105 return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
106}
107
108static __inline__ unsigned char __DEFAULT_FN_ATTRS
109_kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
110 *__C = (unsigned char)__builtin_ia32_kortestcsi(__A, __B);
111 return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
112}
113
114static __inline__ unsigned char __DEFAULT_FN_ATTRS
115_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B)
116{
117 return (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
118}
119
120static __inline__ unsigned char __DEFAULT_FN_ATTRS
121_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B)
122{
123 return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
124}
125
126static __inline__ unsigned char __DEFAULT_FN_ATTRS
127_kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
128 *__C = (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
129 return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
130}
131
132static __inline__ unsigned char __DEFAULT_FN_ATTRS
133_ktestc_mask32_u8(__mmask32 __A, __mmask32 __B)
134{
135 return (unsigned char)__builtin_ia32_ktestcsi(__A, __B);
136}
137
138static __inline__ unsigned char __DEFAULT_FN_ATTRS
139_ktestz_mask32_u8(__mmask32 __A, __mmask32 __B)
140{
141 return (unsigned char)__builtin_ia32_ktestzsi(__A, __B);
142}
143
144static __inline__ unsigned char __DEFAULT_FN_ATTRS
145_ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
146 *__C = (unsigned char)__builtin_ia32_ktestcsi(__A, __B);
147 return (unsigned char)__builtin_ia32_ktestzsi(__A, __B);
148}
149
150static __inline__ unsigned char __DEFAULT_FN_ATTRS
151_ktestc_mask64_u8(__mmask64 __A, __mmask64 __B)
152{
153 return (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
154}
155
156static __inline__ unsigned char __DEFAULT_FN_ATTRS
157_ktestz_mask64_u8(__mmask64 __A, __mmask64 __B)
158{
159 return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
160}
161
162static __inline__ unsigned char __DEFAULT_FN_ATTRS
163_ktest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
164 *__C = (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
165 return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
166}
167
168static __inline__ __mmask32 __DEFAULT_FN_ATTRS
169_kadd_mask32(__mmask32 __A, __mmask32 __B)
170{
171 return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B);
172}
173
174static __inline__ __mmask64 __DEFAULT_FN_ATTRS
175_kadd_mask64(__mmask64 __A, __mmask64 __B)
176{
177 return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B);
178}
179
180#define _kshiftli_mask32(A, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800181 ((__mmask32)__builtin_ia32_kshiftlisi((__mmask32)(A), (unsigned int)(I)))
Logan Chienb0c84022018-11-09 16:19:54 +0800182
183#define _kshiftri_mask32(A, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800184 ((__mmask32)__builtin_ia32_kshiftrisi((__mmask32)(A), (unsigned int)(I)))
Logan Chienb0c84022018-11-09 16:19:54 +0800185
186#define _kshiftli_mask64(A, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800187 ((__mmask64)__builtin_ia32_kshiftlidi((__mmask64)(A), (unsigned int)(I)))
Logan Chienb0c84022018-11-09 16:19:54 +0800188
189#define _kshiftri_mask64(A, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800190 ((__mmask64)__builtin_ia32_kshiftridi((__mmask64)(A), (unsigned int)(I)))
Logan Chienb0c84022018-11-09 16:19:54 +0800191
192static __inline__ unsigned int __DEFAULT_FN_ATTRS
193_cvtmask32_u32(__mmask32 __A) {
194 return (unsigned int)__builtin_ia32_kmovd((__mmask32)__A);
195}
196
197static __inline__ unsigned long long __DEFAULT_FN_ATTRS
198_cvtmask64_u64(__mmask64 __A) {
199 return (unsigned long long)__builtin_ia32_kmovq((__mmask64)__A);
200}
201
202static __inline__ __mmask32 __DEFAULT_FN_ATTRS
203_cvtu32_mask32(unsigned int __A) {
204 return (__mmask32)__builtin_ia32_kmovd((__mmask32)__A);
205}
206
207static __inline__ __mmask64 __DEFAULT_FN_ATTRS
208_cvtu64_mask64(unsigned long long __A) {
209 return (__mmask64)__builtin_ia32_kmovq((__mmask64)__A);
210}
211
212static __inline__ __mmask32 __DEFAULT_FN_ATTRS
213_load_mask32(__mmask32 *__A) {
214 return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A);
215}
216
217static __inline__ __mmask64 __DEFAULT_FN_ATTRS
218_load_mask64(__mmask64 *__A) {
219 return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A);
220}
221
222static __inline__ void __DEFAULT_FN_ATTRS
223_store_mask32(__mmask32 *__A, __mmask32 __B) {
224 *(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B);
225}
226
227static __inline__ void __DEFAULT_FN_ATTRS
228_store_mask64(__mmask64 *__A, __mmask64 __B) {
229 *(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B);
230}
Logan Chien2833ffb2018-10-09 10:03:24 +0800231
232/* Integer compare */
233
Logan Chien55afb0a2018-10-15 10:42:14 +0800234#define _mm512_cmp_epi8_mask(a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800235 ((__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
236 (__v64qi)(__m512i)(b), (int)(p), \
237 (__mmask64)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +0800238
Logan Chien55afb0a2018-10-15 10:42:14 +0800239#define _mm512_mask_cmp_epi8_mask(m, a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800240 ((__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
241 (__v64qi)(__m512i)(b), (int)(p), \
242 (__mmask64)(m)))
Logan Chien2833ffb2018-10-09 10:03:24 +0800243
Logan Chien55afb0a2018-10-15 10:42:14 +0800244#define _mm512_cmp_epu8_mask(a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800245 ((__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
246 (__v64qi)(__m512i)(b), (int)(p), \
247 (__mmask64)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +0800248
Logan Chien55afb0a2018-10-15 10:42:14 +0800249#define _mm512_mask_cmp_epu8_mask(m, a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800250 ((__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
251 (__v64qi)(__m512i)(b), (int)(p), \
252 (__mmask64)(m)))
Logan Chien2833ffb2018-10-09 10:03:24 +0800253
Logan Chien55afb0a2018-10-15 10:42:14 +0800254#define _mm512_cmp_epi16_mask(a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800255 ((__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
256 (__v32hi)(__m512i)(b), (int)(p), \
257 (__mmask32)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +0800258
Logan Chien55afb0a2018-10-15 10:42:14 +0800259#define _mm512_mask_cmp_epi16_mask(m, a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800260 ((__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
261 (__v32hi)(__m512i)(b), (int)(p), \
262 (__mmask32)(m)))
Logan Chien2833ffb2018-10-09 10:03:24 +0800263
Logan Chien55afb0a2018-10-15 10:42:14 +0800264#define _mm512_cmp_epu16_mask(a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800265 ((__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
266 (__v32hi)(__m512i)(b), (int)(p), \
267 (__mmask32)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +0800268
Logan Chien55afb0a2018-10-15 10:42:14 +0800269#define _mm512_mask_cmp_epu16_mask(m, a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800270 ((__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
271 (__v32hi)(__m512i)(b), (int)(p), \
272 (__mmask32)(m)))
Logan Chien2833ffb2018-10-09 10:03:24 +0800273
Logan Chien55afb0a2018-10-15 10:42:14 +0800274#define _mm512_cmpeq_epi8_mask(A, B) \
275 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
276#define _mm512_mask_cmpeq_epi8_mask(k, A, B) \
277 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
278#define _mm512_cmpge_epi8_mask(A, B) \
279 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
280#define _mm512_mask_cmpge_epi8_mask(k, A, B) \
281 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
282#define _mm512_cmpgt_epi8_mask(A, B) \
283 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
284#define _mm512_mask_cmpgt_epi8_mask(k, A, B) \
285 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
286#define _mm512_cmple_epi8_mask(A, B) \
287 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
288#define _mm512_mask_cmple_epi8_mask(k, A, B) \
289 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
290#define _mm512_cmplt_epi8_mask(A, B) \
291 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
292#define _mm512_mask_cmplt_epi8_mask(k, A, B) \
293 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
294#define _mm512_cmpneq_epi8_mask(A, B) \
295 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
296#define _mm512_mask_cmpneq_epi8_mask(k, A, B) \
297 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
Logan Chien2833ffb2018-10-09 10:03:24 +0800298
Logan Chien55afb0a2018-10-15 10:42:14 +0800299#define _mm512_cmpeq_epu8_mask(A, B) \
300 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
301#define _mm512_mask_cmpeq_epu8_mask(k, A, B) \
302 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
303#define _mm512_cmpge_epu8_mask(A, B) \
304 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
305#define _mm512_mask_cmpge_epu8_mask(k, A, B) \
306 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
307#define _mm512_cmpgt_epu8_mask(A, B) \
308 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
309#define _mm512_mask_cmpgt_epu8_mask(k, A, B) \
310 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
311#define _mm512_cmple_epu8_mask(A, B) \
312 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
313#define _mm512_mask_cmple_epu8_mask(k, A, B) \
314 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
315#define _mm512_cmplt_epu8_mask(A, B) \
316 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
317#define _mm512_mask_cmplt_epu8_mask(k, A, B) \
318 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
319#define _mm512_cmpneq_epu8_mask(A, B) \
320 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
321#define _mm512_mask_cmpneq_epu8_mask(k, A, B) \
322 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
Logan Chien2833ffb2018-10-09 10:03:24 +0800323
Logan Chien55afb0a2018-10-15 10:42:14 +0800324#define _mm512_cmpeq_epi16_mask(A, B) \
325 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
326#define _mm512_mask_cmpeq_epi16_mask(k, A, B) \
327 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
328#define _mm512_cmpge_epi16_mask(A, B) \
329 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
330#define _mm512_mask_cmpge_epi16_mask(k, A, B) \
331 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
332#define _mm512_cmpgt_epi16_mask(A, B) \
333 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
334#define _mm512_mask_cmpgt_epi16_mask(k, A, B) \
335 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
336#define _mm512_cmple_epi16_mask(A, B) \
337 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
338#define _mm512_mask_cmple_epi16_mask(k, A, B) \
339 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
340#define _mm512_cmplt_epi16_mask(A, B) \
341 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
342#define _mm512_mask_cmplt_epi16_mask(k, A, B) \
343 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
344#define _mm512_cmpneq_epi16_mask(A, B) \
345 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
346#define _mm512_mask_cmpneq_epi16_mask(k, A, B) \
347 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
Logan Chien2833ffb2018-10-09 10:03:24 +0800348
Logan Chien55afb0a2018-10-15 10:42:14 +0800349#define _mm512_cmpeq_epu16_mask(A, B) \
350 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
351#define _mm512_mask_cmpeq_epu16_mask(k, A, B) \
352 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
353#define _mm512_cmpge_epu16_mask(A, B) \
354 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
355#define _mm512_mask_cmpge_epu16_mask(k, A, B) \
356 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
357#define _mm512_cmpgt_epu16_mask(A, B) \
358 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
359#define _mm512_mask_cmpgt_epu16_mask(k, A, B) \
360 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
361#define _mm512_cmple_epu16_mask(A, B) \
362 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
363#define _mm512_mask_cmple_epu16_mask(k, A, B) \
364 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
365#define _mm512_cmplt_epu16_mask(A, B) \
366 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
367#define _mm512_mask_cmplt_epu16_mask(k, A, B) \
368 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
369#define _mm512_cmpneq_epu16_mask(A, B) \
370 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
371#define _mm512_mask_cmpneq_epu16_mask(k, A, B) \
372 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
Logan Chien2833ffb2018-10-09 10:03:24 +0800373
Logan Chienb0c84022018-11-09 16:19:54 +0800374static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800375_mm512_add_epi8 (__m512i __A, __m512i __B) {
376 return (__m512i) ((__v64qu) __A + (__v64qu) __B);
377}
378
Logan Chienb0c84022018-11-09 16:19:54 +0800379static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800380_mm512_mask_add_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
381 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
382 (__v64qi)_mm512_add_epi8(__A, __B),
383 (__v64qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800384}
385
Logan Chienb0c84022018-11-09 16:19:54 +0800386static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800387_mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
388 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
389 (__v64qi)_mm512_add_epi8(__A, __B),
390 (__v64qi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800391}
392
Logan Chienb0c84022018-11-09 16:19:54 +0800393static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800394_mm512_sub_epi8 (__m512i __A, __m512i __B) {
395 return (__m512i) ((__v64qu) __A - (__v64qu) __B);
396}
397
Logan Chienb0c84022018-11-09 16:19:54 +0800398static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800399_mm512_mask_sub_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
400 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
401 (__v64qi)_mm512_sub_epi8(__A, __B),
402 (__v64qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800403}
404
Logan Chienb0c84022018-11-09 16:19:54 +0800405static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800406_mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
407 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
408 (__v64qi)_mm512_sub_epi8(__A, __B),
409 (__v64qi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800410}
411
Logan Chienb0c84022018-11-09 16:19:54 +0800412static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800413_mm512_add_epi16 (__m512i __A, __m512i __B) {
414 return (__m512i) ((__v32hu) __A + (__v32hu) __B);
415}
416
Logan Chienb0c84022018-11-09 16:19:54 +0800417static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800418_mm512_mask_add_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
419 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
420 (__v32hi)_mm512_add_epi16(__A, __B),
421 (__v32hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800422}
423
Logan Chienb0c84022018-11-09 16:19:54 +0800424static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800425_mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
426 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
427 (__v32hi)_mm512_add_epi16(__A, __B),
428 (__v32hi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800429}
430
Logan Chienb0c84022018-11-09 16:19:54 +0800431static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800432_mm512_sub_epi16 (__m512i __A, __m512i __B) {
433 return (__m512i) ((__v32hu) __A - (__v32hu) __B);
434}
435
Logan Chienb0c84022018-11-09 16:19:54 +0800436static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800437_mm512_mask_sub_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
438 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
439 (__v32hi)_mm512_sub_epi16(__A, __B),
440 (__v32hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800441}
442
Logan Chienb0c84022018-11-09 16:19:54 +0800443static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800444_mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
445 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
446 (__v32hi)_mm512_sub_epi16(__A, __B),
447 (__v32hi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800448}
449
Logan Chienb0c84022018-11-09 16:19:54 +0800450static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800451_mm512_mullo_epi16 (__m512i __A, __m512i __B) {
452 return (__m512i) ((__v32hu) __A * (__v32hu) __B);
453}
454
Logan Chienb0c84022018-11-09 16:19:54 +0800455static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800456_mm512_mask_mullo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
457 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
458 (__v32hi)_mm512_mullo_epi16(__A, __B),
459 (__v32hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800460}
461
Logan Chienb0c84022018-11-09 16:19:54 +0800462static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800463_mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
464 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
465 (__v32hi)_mm512_mullo_epi16(__A, __B),
466 (__v32hi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800467}
468
Logan Chienb0c84022018-11-09 16:19:54 +0800469static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800470_mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W)
471{
472 return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
473 (__v64qi) __W,
474 (__v64qi) __A);
475}
476
Logan Chienb0c84022018-11-09 16:19:54 +0800477static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800478_mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W)
479{
480 return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
481 (__v32hi) __W,
482 (__v32hi) __A);
483}
484
Logan Chienb0c84022018-11-09 16:19:54 +0800485static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800486_mm512_abs_epi8 (__m512i __A)
487{
Logan Chien55afb0a2018-10-15 10:42:14 +0800488 return (__m512i)__builtin_ia32_pabsb512((__v64qi)__A);
Logan Chien2833ffb2018-10-09 10:03:24 +0800489}
490
Logan Chienb0c84022018-11-09 16:19:54 +0800491static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800492_mm512_mask_abs_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
493{
Logan Chien55afb0a2018-10-15 10:42:14 +0800494 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
495 (__v64qi)_mm512_abs_epi8(__A),
496 (__v64qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800497}
498
Logan Chienb0c84022018-11-09 16:19:54 +0800499static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800500_mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A)
501{
Logan Chien55afb0a2018-10-15 10:42:14 +0800502 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
503 (__v64qi)_mm512_abs_epi8(__A),
504 (__v64qi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800505}
506
Logan Chienb0c84022018-11-09 16:19:54 +0800507static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800508_mm512_abs_epi16 (__m512i __A)
509{
Logan Chien55afb0a2018-10-15 10:42:14 +0800510 return (__m512i)__builtin_ia32_pabsw512((__v32hi)__A);
Logan Chien2833ffb2018-10-09 10:03:24 +0800511}
512
Logan Chienb0c84022018-11-09 16:19:54 +0800513static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800514_mm512_mask_abs_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
515{
Logan Chien55afb0a2018-10-15 10:42:14 +0800516 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
517 (__v32hi)_mm512_abs_epi16(__A),
518 (__v32hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800519}
520
Logan Chienb0c84022018-11-09 16:19:54 +0800521static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800522_mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A)
523{
Logan Chien55afb0a2018-10-15 10:42:14 +0800524 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
525 (__v32hi)_mm512_abs_epi16(__A),
526 (__v32hi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800527}
528
Logan Chienb0c84022018-11-09 16:19:54 +0800529static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800530_mm512_packs_epi32(__m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800531{
Logan Chien55afb0a2018-10-15 10:42:14 +0800532 return (__m512i)__builtin_ia32_packssdw512((__v16si)__A, (__v16si)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +0800533}
534
Logan Chienb0c84022018-11-09 16:19:54 +0800535static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800536_mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800537{
Logan Chien55afb0a2018-10-15 10:42:14 +0800538 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
539 (__v32hi)_mm512_packs_epi32(__A, __B),
540 (__v32hi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800541}
542
Logan Chienb0c84022018-11-09 16:19:54 +0800543static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800544_mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800545{
Logan Chien55afb0a2018-10-15 10:42:14 +0800546 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
547 (__v32hi)_mm512_packs_epi32(__A, __B),
548 (__v32hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800549}
550
Logan Chienb0c84022018-11-09 16:19:54 +0800551static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800552_mm512_packs_epi16(__m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800553{
Logan Chien55afb0a2018-10-15 10:42:14 +0800554 return (__m512i)__builtin_ia32_packsswb512((__v32hi)__A, (__v32hi) __B);
Logan Chien2833ffb2018-10-09 10:03:24 +0800555}
556
Logan Chienb0c84022018-11-09 16:19:54 +0800557static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800558_mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800559{
Logan Chien55afb0a2018-10-15 10:42:14 +0800560 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
561 (__v64qi)_mm512_packs_epi16(__A, __B),
562 (__v64qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800563}
564
Logan Chienb0c84022018-11-09 16:19:54 +0800565static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800566_mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800567{
Logan Chien55afb0a2018-10-15 10:42:14 +0800568 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
569 (__v64qi)_mm512_packs_epi16(__A, __B),
570 (__v64qi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800571}
572
Logan Chienb0c84022018-11-09 16:19:54 +0800573static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800574_mm512_packus_epi32(__m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800575{
Logan Chien55afb0a2018-10-15 10:42:14 +0800576 return (__m512i)__builtin_ia32_packusdw512((__v16si) __A, (__v16si) __B);
Logan Chien2833ffb2018-10-09 10:03:24 +0800577}
578
Logan Chienb0c84022018-11-09 16:19:54 +0800579static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800580_mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800581{
Logan Chien55afb0a2018-10-15 10:42:14 +0800582 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
583 (__v32hi)_mm512_packus_epi32(__A, __B),
584 (__v32hi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800585}
586
Logan Chienb0c84022018-11-09 16:19:54 +0800587static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800588_mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800589{
Logan Chien55afb0a2018-10-15 10:42:14 +0800590 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
591 (__v32hi)_mm512_packus_epi32(__A, __B),
592 (__v32hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800593}
594
Logan Chienb0c84022018-11-09 16:19:54 +0800595static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800596_mm512_packus_epi16(__m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800597{
Logan Chien55afb0a2018-10-15 10:42:14 +0800598 return (__m512i)__builtin_ia32_packuswb512((__v32hi) __A, (__v32hi) __B);
Logan Chien2833ffb2018-10-09 10:03:24 +0800599}
600
Logan Chienb0c84022018-11-09 16:19:54 +0800601static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800602_mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800603{
Logan Chien55afb0a2018-10-15 10:42:14 +0800604 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
605 (__v64qi)_mm512_packus_epi16(__A, __B),
606 (__v64qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800607}
608
Logan Chienb0c84022018-11-09 16:19:54 +0800609static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800610_mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800611{
Logan Chien55afb0a2018-10-15 10:42:14 +0800612 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
613 (__v64qi)_mm512_packus_epi16(__A, __B),
614 (__v64qi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800615}
616
Logan Chienb0c84022018-11-09 16:19:54 +0800617static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800618_mm512_adds_epi8 (__m512i __A, __m512i __B)
619{
Logan Chienb0c84022018-11-09 16:19:54 +0800620 return (__m512i)__builtin_ia32_paddsb512((__v64qi)__A, (__v64qi)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +0800621}
622
Logan Chienb0c84022018-11-09 16:19:54 +0800623static __inline__ __m512i __DEFAULT_FN_ATTRS512
624_mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800625{
Logan Chienb0c84022018-11-09 16:19:54 +0800626 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
627 (__v64qi)_mm512_adds_epi8(__A, __B),
628 (__v64qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800629}
630
Logan Chienb0c84022018-11-09 16:19:54 +0800631static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800632_mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
633{
Logan Chienb0c84022018-11-09 16:19:54 +0800634 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
635 (__v64qi)_mm512_adds_epi8(__A, __B),
636 (__v64qi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800637}
638
Logan Chienb0c84022018-11-09 16:19:54 +0800639static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800640_mm512_adds_epi16 (__m512i __A, __m512i __B)
641{
Logan Chienb0c84022018-11-09 16:19:54 +0800642 return (__m512i)__builtin_ia32_paddsw512((__v32hi)__A, (__v32hi)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +0800643}
644
Logan Chienb0c84022018-11-09 16:19:54 +0800645static __inline__ __m512i __DEFAULT_FN_ATTRS512
646_mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800647{
Logan Chienb0c84022018-11-09 16:19:54 +0800648 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
649 (__v32hi)_mm512_adds_epi16(__A, __B),
650 (__v32hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800651}
652
Logan Chienb0c84022018-11-09 16:19:54 +0800653static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800654_mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
655{
Logan Chienb0c84022018-11-09 16:19:54 +0800656 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
657 (__v32hi)_mm512_adds_epi16(__A, __B),
658 (__v32hi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800659}
660
Logan Chienb0c84022018-11-09 16:19:54 +0800661static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800662_mm512_adds_epu8 (__m512i __A, __m512i __B)
663{
Logan Chienb0c84022018-11-09 16:19:54 +0800664 return (__m512i)__builtin_ia32_paddusb512((__v64qi) __A, (__v64qi) __B);
Logan Chien2833ffb2018-10-09 10:03:24 +0800665}
666
Logan Chienb0c84022018-11-09 16:19:54 +0800667static __inline__ __m512i __DEFAULT_FN_ATTRS512
668_mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800669{
Logan Chienb0c84022018-11-09 16:19:54 +0800670 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
671 (__v64qi)_mm512_adds_epu8(__A, __B),
672 (__v64qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800673}
674
Logan Chienb0c84022018-11-09 16:19:54 +0800675static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800676_mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
677{
Logan Chienb0c84022018-11-09 16:19:54 +0800678 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
679 (__v64qi)_mm512_adds_epu8(__A, __B),
680 (__v64qi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800681}
682
Logan Chienb0c84022018-11-09 16:19:54 +0800683static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800684_mm512_adds_epu16 (__m512i __A, __m512i __B)
685{
Logan Chienb0c84022018-11-09 16:19:54 +0800686 return (__m512i)__builtin_ia32_paddusw512((__v32hi) __A, (__v32hi) __B);
Logan Chien2833ffb2018-10-09 10:03:24 +0800687}
688
Logan Chienb0c84022018-11-09 16:19:54 +0800689static __inline__ __m512i __DEFAULT_FN_ATTRS512
690_mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800691{
Logan Chienb0c84022018-11-09 16:19:54 +0800692 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
693 (__v32hi)_mm512_adds_epu16(__A, __B),
694 (__v32hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800695}
696
Logan Chienb0c84022018-11-09 16:19:54 +0800697static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800698_mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
699{
Logan Chienb0c84022018-11-09 16:19:54 +0800700 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
701 (__v32hi)_mm512_adds_epu16(__A, __B),
702 (__v32hi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800703}
704
Logan Chienb0c84022018-11-09 16:19:54 +0800705static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800706_mm512_avg_epu8 (__m512i __A, __m512i __B)
707{
Logan Chiendf4f7662019-09-04 16:45:23 -0700708 return (__m512i)__builtin_ia32_pavgb512((__v64qi)__A, (__v64qi)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +0800709}
710
Logan Chienb0c84022018-11-09 16:19:54 +0800711static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800712_mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
713 __m512i __B)
714{
Logan Chien55afb0a2018-10-15 10:42:14 +0800715 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
716 (__v64qi)_mm512_avg_epu8(__A, __B),
717 (__v64qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800718}
719
Logan Chienb0c84022018-11-09 16:19:54 +0800720static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800721_mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
722{
Logan Chien55afb0a2018-10-15 10:42:14 +0800723 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
724 (__v64qi)_mm512_avg_epu8(__A, __B),
725 (__v64qi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800726}
727
Logan Chienb0c84022018-11-09 16:19:54 +0800728static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800729_mm512_avg_epu16 (__m512i __A, __m512i __B)
730{
Logan Chiendf4f7662019-09-04 16:45:23 -0700731 return (__m512i)__builtin_ia32_pavgw512((__v32hi)__A, (__v32hi)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +0800732}
733
Logan Chienb0c84022018-11-09 16:19:54 +0800734static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800735_mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
736 __m512i __B)
737{
Logan Chien55afb0a2018-10-15 10:42:14 +0800738 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
739 (__v32hi)_mm512_avg_epu16(__A, __B),
740 (__v32hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800741}
742
Logan Chienb0c84022018-11-09 16:19:54 +0800743static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800744_mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
745{
Logan Chien55afb0a2018-10-15 10:42:14 +0800746 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
747 (__v32hi)_mm512_avg_epu16(__A, __B),
748 (__v32hi) _mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800749}
750
Logan Chienb0c84022018-11-09 16:19:54 +0800751static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800752_mm512_max_epi8 (__m512i __A, __m512i __B)
753{
Logan Chien55afb0a2018-10-15 10:42:14 +0800754 return (__m512i)__builtin_ia32_pmaxsb512((__v64qi) __A, (__v64qi) __B);
Logan Chien2833ffb2018-10-09 10:03:24 +0800755}
756
Logan Chienb0c84022018-11-09 16:19:54 +0800757static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800758_mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
759{
Logan Chien55afb0a2018-10-15 10:42:14 +0800760 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
761 (__v64qi)_mm512_max_epi8(__A, __B),
762 (__v64qi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800763}
764
Logan Chienb0c84022018-11-09 16:19:54 +0800765static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800766_mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800767{
Logan Chien55afb0a2018-10-15 10:42:14 +0800768 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
769 (__v64qi)_mm512_max_epi8(__A, __B),
770 (__v64qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800771}
772
Logan Chienb0c84022018-11-09 16:19:54 +0800773static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800774_mm512_max_epi16 (__m512i __A, __m512i __B)
775{
Logan Chien55afb0a2018-10-15 10:42:14 +0800776 return (__m512i)__builtin_ia32_pmaxsw512((__v32hi) __A, (__v32hi) __B);
Logan Chien2833ffb2018-10-09 10:03:24 +0800777}
778
Logan Chienb0c84022018-11-09 16:19:54 +0800779static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800780_mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
781{
Logan Chien55afb0a2018-10-15 10:42:14 +0800782 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
783 (__v32hi)_mm512_max_epi16(__A, __B),
784 (__v32hi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800785}
786
Logan Chienb0c84022018-11-09 16:19:54 +0800787static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800788_mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
789 __m512i __B)
790{
Logan Chien55afb0a2018-10-15 10:42:14 +0800791 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
792 (__v32hi)_mm512_max_epi16(__A, __B),
793 (__v32hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800794}
795
Logan Chienb0c84022018-11-09 16:19:54 +0800796static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800797_mm512_max_epu8 (__m512i __A, __m512i __B)
798{
Logan Chien55afb0a2018-10-15 10:42:14 +0800799 return (__m512i)__builtin_ia32_pmaxub512((__v64qi)__A, (__v64qi)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +0800800}
801
Logan Chienb0c84022018-11-09 16:19:54 +0800802static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800803_mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
804{
Logan Chien55afb0a2018-10-15 10:42:14 +0800805 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
806 (__v64qi)_mm512_max_epu8(__A, __B),
807 (__v64qi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800808}
809
Logan Chienb0c84022018-11-09 16:19:54 +0800810static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800811_mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800812{
Logan Chien55afb0a2018-10-15 10:42:14 +0800813 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
814 (__v64qi)_mm512_max_epu8(__A, __B),
815 (__v64qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800816}
817
Logan Chienb0c84022018-11-09 16:19:54 +0800818static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800819_mm512_max_epu16 (__m512i __A, __m512i __B)
820{
Logan Chien55afb0a2018-10-15 10:42:14 +0800821 return (__m512i)__builtin_ia32_pmaxuw512((__v32hi)__A, (__v32hi)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +0800822}
823
Logan Chienb0c84022018-11-09 16:19:54 +0800824static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800825_mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
826{
Logan Chien55afb0a2018-10-15 10:42:14 +0800827 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
828 (__v32hi)_mm512_max_epu16(__A, __B),
829 (__v32hi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800830}
831
Logan Chienb0c84022018-11-09 16:19:54 +0800832static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800833_mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800834{
Logan Chien55afb0a2018-10-15 10:42:14 +0800835 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
836 (__v32hi)_mm512_max_epu16(__A, __B),
837 (__v32hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800838}
839
Logan Chienb0c84022018-11-09 16:19:54 +0800840static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800841_mm512_min_epi8 (__m512i __A, __m512i __B)
842{
Logan Chien55afb0a2018-10-15 10:42:14 +0800843 return (__m512i)__builtin_ia32_pminsb512((__v64qi) __A, (__v64qi) __B);
Logan Chien2833ffb2018-10-09 10:03:24 +0800844}
845
Logan Chienb0c84022018-11-09 16:19:54 +0800846static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800847_mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
848{
Logan Chien55afb0a2018-10-15 10:42:14 +0800849 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
850 (__v64qi)_mm512_min_epi8(__A, __B),
851 (__v64qi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800852}
853
Logan Chienb0c84022018-11-09 16:19:54 +0800854static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800855_mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800856{
Logan Chien55afb0a2018-10-15 10:42:14 +0800857 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
858 (__v64qi)_mm512_min_epi8(__A, __B),
859 (__v64qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800860}
861
Logan Chienb0c84022018-11-09 16:19:54 +0800862static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800863_mm512_min_epi16 (__m512i __A, __m512i __B)
864{
Logan Chien55afb0a2018-10-15 10:42:14 +0800865 return (__m512i)__builtin_ia32_pminsw512((__v32hi) __A, (__v32hi) __B);
Logan Chien2833ffb2018-10-09 10:03:24 +0800866}
867
Logan Chienb0c84022018-11-09 16:19:54 +0800868static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800869_mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
870{
Logan Chien55afb0a2018-10-15 10:42:14 +0800871 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
872 (__v32hi)_mm512_min_epi16(__A, __B),
873 (__v32hi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800874}
875
Logan Chienb0c84022018-11-09 16:19:54 +0800876static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800877_mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800878{
Logan Chien55afb0a2018-10-15 10:42:14 +0800879 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
880 (__v32hi)_mm512_min_epi16(__A, __B),
881 (__v32hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800882}
883
Logan Chienb0c84022018-11-09 16:19:54 +0800884static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800885_mm512_min_epu8 (__m512i __A, __m512i __B)
886{
Logan Chien55afb0a2018-10-15 10:42:14 +0800887 return (__m512i)__builtin_ia32_pminub512((__v64qi)__A, (__v64qi)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +0800888}
889
Logan Chienb0c84022018-11-09 16:19:54 +0800890static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800891_mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
892{
Logan Chien55afb0a2018-10-15 10:42:14 +0800893 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
894 (__v64qi)_mm512_min_epu8(__A, __B),
895 (__v64qi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800896}
897
Logan Chienb0c84022018-11-09 16:19:54 +0800898static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800899_mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800900{
Logan Chien55afb0a2018-10-15 10:42:14 +0800901 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
902 (__v64qi)_mm512_min_epu8(__A, __B),
903 (__v64qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800904}
905
Logan Chienb0c84022018-11-09 16:19:54 +0800906static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800907_mm512_min_epu16 (__m512i __A, __m512i __B)
908{
Logan Chien55afb0a2018-10-15 10:42:14 +0800909 return (__m512i)__builtin_ia32_pminuw512((__v32hi)__A, (__v32hi)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +0800910}
911
Logan Chienb0c84022018-11-09 16:19:54 +0800912static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800913_mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
914{
Logan Chien55afb0a2018-10-15 10:42:14 +0800915 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
916 (__v32hi)_mm512_min_epu16(__A, __B),
917 (__v32hi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800918}
919
Logan Chienb0c84022018-11-09 16:19:54 +0800920static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800921_mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800922{
Logan Chien55afb0a2018-10-15 10:42:14 +0800923 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
924 (__v32hi)_mm512_min_epu16(__A, __B),
925 (__v32hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800926}
927
Logan Chienb0c84022018-11-09 16:19:54 +0800928static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800929_mm512_shuffle_epi8(__m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800930{
Logan Chien55afb0a2018-10-15 10:42:14 +0800931 return (__m512i)__builtin_ia32_pshufb512((__v64qi)__A,(__v64qi)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +0800932}
933
Logan Chienb0c84022018-11-09 16:19:54 +0800934static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800935_mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800936{
Logan Chien55afb0a2018-10-15 10:42:14 +0800937 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
938 (__v64qi)_mm512_shuffle_epi8(__A, __B),
939 (__v64qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800940}
941
Logan Chienb0c84022018-11-09 16:19:54 +0800942static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +0800943_mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800944{
Logan Chien55afb0a2018-10-15 10:42:14 +0800945 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
946 (__v64qi)_mm512_shuffle_epi8(__A, __B),
947 (__v64qi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800948}
949
Logan Chienb0c84022018-11-09 16:19:54 +0800950static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800951_mm512_subs_epi8 (__m512i __A, __m512i __B)
952{
Logan Chienb0c84022018-11-09 16:19:54 +0800953 return (__m512i)__builtin_ia32_psubsb512((__v64qi)__A, (__v64qi)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +0800954}
955
Logan Chienb0c84022018-11-09 16:19:54 +0800956static __inline__ __m512i __DEFAULT_FN_ATTRS512
957_mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800958{
Logan Chienb0c84022018-11-09 16:19:54 +0800959 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
960 (__v64qi)_mm512_subs_epi8(__A, __B),
961 (__v64qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800962}
963
Logan Chienb0c84022018-11-09 16:19:54 +0800964static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800965_mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
966{
Logan Chienb0c84022018-11-09 16:19:54 +0800967 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
968 (__v64qi)_mm512_subs_epi8(__A, __B),
969 (__v64qi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800970}
971
Logan Chienb0c84022018-11-09 16:19:54 +0800972static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800973_mm512_subs_epi16 (__m512i __A, __m512i __B)
974{
Logan Chienb0c84022018-11-09 16:19:54 +0800975 return (__m512i)__builtin_ia32_psubsw512((__v32hi)__A, (__v32hi)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +0800976}
977
Logan Chienb0c84022018-11-09 16:19:54 +0800978static __inline__ __m512i __DEFAULT_FN_ATTRS512
979_mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800980{
Logan Chienb0c84022018-11-09 16:19:54 +0800981 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
982 (__v32hi)_mm512_subs_epi16(__A, __B),
983 (__v32hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800984}
985
Logan Chienb0c84022018-11-09 16:19:54 +0800986static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800987_mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
988{
Logan Chienb0c84022018-11-09 16:19:54 +0800989 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
990 (__v32hi)_mm512_subs_epi16(__A, __B),
991 (__v32hi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +0800992}
993
Logan Chienb0c84022018-11-09 16:19:54 +0800994static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +0800995_mm512_subs_epu8 (__m512i __A, __m512i __B)
996{
Logan Chienb0c84022018-11-09 16:19:54 +0800997 return (__m512i)__builtin_ia32_psubusb512((__v64qi) __A, (__v64qi) __B);
Logan Chien2833ffb2018-10-09 10:03:24 +0800998}
999
Logan Chienb0c84022018-11-09 16:19:54 +08001000static __inline__ __m512i __DEFAULT_FN_ATTRS512
1001_mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001002{
Logan Chienb0c84022018-11-09 16:19:54 +08001003 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1004 (__v64qi)_mm512_subs_epu8(__A, __B),
1005 (__v64qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001006}
1007
Logan Chienb0c84022018-11-09 16:19:54 +08001008static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001009_mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
1010{
Logan Chienb0c84022018-11-09 16:19:54 +08001011 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1012 (__v64qi)_mm512_subs_epu8(__A, __B),
1013 (__v64qi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001014}
1015
Logan Chienb0c84022018-11-09 16:19:54 +08001016static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001017_mm512_subs_epu16 (__m512i __A, __m512i __B)
1018{
Logan Chienb0c84022018-11-09 16:19:54 +08001019 return (__m512i)__builtin_ia32_psubusw512((__v32hi) __A, (__v32hi) __B);
Logan Chien2833ffb2018-10-09 10:03:24 +08001020}
1021
Logan Chienb0c84022018-11-09 16:19:54 +08001022static __inline__ __m512i __DEFAULT_FN_ATTRS512
1023_mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001024{
Logan Chienb0c84022018-11-09 16:19:54 +08001025 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1026 (__v32hi)_mm512_subs_epu16(__A, __B),
1027 (__v32hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001028}
1029
Logan Chienb0c84022018-11-09 16:19:54 +08001030static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001031_mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
1032{
Logan Chienb0c84022018-11-09 16:19:54 +08001033 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1034 (__v32hi)_mm512_subs_epu16(__A, __B),
1035 (__v32hi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001036}
1037
Logan Chienb0c84022018-11-09 16:19:54 +08001038static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001039_mm512_permutex2var_epi16(__m512i __A, __m512i __I, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001040{
Logan Chien55afb0a2018-10-15 10:42:14 +08001041 return (__m512i)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I,
1042 (__v32hi)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08001043}
1044
Logan Chienb0c84022018-11-09 16:19:54 +08001045static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001046_mm512_mask_permutex2var_epi16(__m512i __A, __mmask32 __U, __m512i __I,
1047 __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001048{
Logan Chien55afb0a2018-10-15 10:42:14 +08001049 return (__m512i)__builtin_ia32_selectw_512(__U,
1050 (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
1051 (__v32hi)__A);
Logan Chien2833ffb2018-10-09 10:03:24 +08001052}
1053
Logan Chienb0c84022018-11-09 16:19:54 +08001054static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001055_mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I, __mmask32 __U,
1056 __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001057{
Logan Chien55afb0a2018-10-15 10:42:14 +08001058 return (__m512i)__builtin_ia32_selectw_512(__U,
1059 (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
1060 (__v32hi)__I);
Logan Chien2833ffb2018-10-09 10:03:24 +08001061}
1062
Logan Chienb0c84022018-11-09 16:19:54 +08001063static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001064_mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I,
1065 __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001066{
Logan Chien55afb0a2018-10-15 10:42:14 +08001067 return (__m512i)__builtin_ia32_selectw_512(__U,
1068 (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
1069 (__v32hi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001070}
1071
Logan Chienb0c84022018-11-09 16:19:54 +08001072static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001073_mm512_mulhrs_epi16(__m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001074{
Logan Chien55afb0a2018-10-15 10:42:14 +08001075 return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08001076}
1077
Logan Chienb0c84022018-11-09 16:19:54 +08001078static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001079_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001080{
Logan Chien55afb0a2018-10-15 10:42:14 +08001081 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1082 (__v32hi)_mm512_mulhrs_epi16(__A, __B),
1083 (__v32hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001084}
1085
Logan Chienb0c84022018-11-09 16:19:54 +08001086static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001087_mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001088{
Logan Chien55afb0a2018-10-15 10:42:14 +08001089 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1090 (__v32hi)_mm512_mulhrs_epi16(__A, __B),
1091 (__v32hi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001092}
1093
Logan Chienb0c84022018-11-09 16:19:54 +08001094static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001095_mm512_mulhi_epi16(__m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001096{
Logan Chien55afb0a2018-10-15 10:42:14 +08001097 return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B);
Logan Chien2833ffb2018-10-09 10:03:24 +08001098}
1099
Logan Chienb0c84022018-11-09 16:19:54 +08001100static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001101_mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A,
Logan Chien2833ffb2018-10-09 10:03:24 +08001102 __m512i __B)
1103{
Logan Chien55afb0a2018-10-15 10:42:14 +08001104 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1105 (__v32hi)_mm512_mulhi_epi16(__A, __B),
1106 (__v32hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001107}
1108
Logan Chienb0c84022018-11-09 16:19:54 +08001109static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001110_mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001111{
Logan Chien55afb0a2018-10-15 10:42:14 +08001112 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1113 (__v32hi)_mm512_mulhi_epi16(__A, __B),
1114 (__v32hi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001115}
1116
Logan Chienb0c84022018-11-09 16:19:54 +08001117static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001118_mm512_mulhi_epu16(__m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001119{
Logan Chien55afb0a2018-10-15 10:42:14 +08001120 return (__m512i)__builtin_ia32_pmulhuw512((__v32hi) __A, (__v32hi) __B);
Logan Chien2833ffb2018-10-09 10:03:24 +08001121}
1122
Logan Chienb0c84022018-11-09 16:19:54 +08001123static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001124_mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001125{
Logan Chien55afb0a2018-10-15 10:42:14 +08001126 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1127 (__v32hi)_mm512_mulhi_epu16(__A, __B),
1128 (__v32hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001129}
1130
Logan Chienb0c84022018-11-09 16:19:54 +08001131static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001132_mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
1133{
Logan Chien55afb0a2018-10-15 10:42:14 +08001134 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1135 (__v32hi)_mm512_mulhi_epu16(__A, __B),
1136 (__v32hi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001137}
1138
Logan Chienb0c84022018-11-09 16:19:54 +08001139static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001140_mm512_maddubs_epi16(__m512i __X, __m512i __Y) {
1141 return (__m512i)__builtin_ia32_pmaddubsw512((__v64qi)__X, (__v64qi)__Y);
Logan Chien2833ffb2018-10-09 10:03:24 +08001142}
1143
Logan Chienb0c84022018-11-09 16:19:54 +08001144static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001145_mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X,
1146 __m512i __Y) {
1147 return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U,
1148 (__v32hi)_mm512_maddubs_epi16(__X, __Y),
1149 (__v32hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001150}
1151
Logan Chienb0c84022018-11-09 16:19:54 +08001152static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001153_mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) {
1154 return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U,
1155 (__v32hi)_mm512_maddubs_epi16(__X, __Y),
1156 (__v32hi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001157}
1158
Logan Chienb0c84022018-11-09 16:19:54 +08001159static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001160_mm512_madd_epi16(__m512i __A, __m512i __B) {
1161 return (__m512i)__builtin_ia32_pmaddwd512((__v32hi)__A, (__v32hi)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08001162}
1163
Logan Chienb0c84022018-11-09 16:19:54 +08001164static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001165_mm512_mask_madd_epi16(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
1166 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
1167 (__v16si)_mm512_madd_epi16(__A, __B),
1168 (__v16si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001169}
1170
Logan Chienb0c84022018-11-09 16:19:54 +08001171static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001172_mm512_maskz_madd_epi16(__mmask16 __U, __m512i __A, __m512i __B) {
1173 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
1174 (__v16si)_mm512_madd_epi16(__A, __B),
1175 (__v16si)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001176}
1177
Logan Chienb0c84022018-11-09 16:19:54 +08001178static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001179_mm512_cvtsepi16_epi8 (__m512i __A) {
1180 return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
1181 (__v32qi)_mm256_setzero_si256(),
1182 (__mmask32) -1);
1183}
1184
Logan Chienb0c84022018-11-09 16:19:54 +08001185static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001186_mm512_mask_cvtsepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
1187 return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
1188 (__v32qi)__O,
1189 __M);
1190}
1191
Logan Chienb0c84022018-11-09 16:19:54 +08001192static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001193_mm512_maskz_cvtsepi16_epi8 (__mmask32 __M, __m512i __A) {
1194 return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
1195 (__v32qi) _mm256_setzero_si256(),
1196 __M);
1197}
1198
Logan Chienb0c84022018-11-09 16:19:54 +08001199static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001200_mm512_cvtusepi16_epi8 (__m512i __A) {
1201 return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
1202 (__v32qi) _mm256_setzero_si256(),
1203 (__mmask32) -1);
1204}
1205
Logan Chienb0c84022018-11-09 16:19:54 +08001206static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001207_mm512_mask_cvtusepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
1208 return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
1209 (__v32qi) __O,
1210 __M);
1211}
1212
Logan Chienb0c84022018-11-09 16:19:54 +08001213static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001214_mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A) {
1215 return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
1216 (__v32qi) _mm256_setzero_si256(),
1217 __M);
1218}
1219
Logan Chienb0c84022018-11-09 16:19:54 +08001220static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001221_mm512_cvtepi16_epi8 (__m512i __A) {
1222 return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
Logan Chien55afb0a2018-10-15 10:42:14 +08001223 (__v32qi) _mm256_undefined_si256(),
Logan Chien2833ffb2018-10-09 10:03:24 +08001224 (__mmask32) -1);
1225}
1226
Logan Chienb0c84022018-11-09 16:19:54 +08001227static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001228_mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
1229 return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
1230 (__v32qi) __O,
1231 __M);
1232}
1233
Logan Chienb0c84022018-11-09 16:19:54 +08001234static __inline__ __m256i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001235_mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A) {
1236 return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
1237 (__v32qi) _mm256_setzero_si256(),
1238 __M);
1239}
1240
Logan Chienb0c84022018-11-09 16:19:54 +08001241static __inline__ void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001242_mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
1243{
1244 __builtin_ia32_pmovwb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
1245}
1246
Logan Chienb0c84022018-11-09 16:19:54 +08001247static __inline__ void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001248_mm512_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
1249{
1250 __builtin_ia32_pmovswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
1251}
1252
Logan Chienb0c84022018-11-09 16:19:54 +08001253static __inline__ void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001254_mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
1255{
1256 __builtin_ia32_pmovuswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
1257}
1258
Logan Chienb0c84022018-11-09 16:19:54 +08001259static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001260_mm512_unpackhi_epi8(__m512i __A, __m512i __B) {
1261 return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B,
1262 8, 64+8, 9, 64+9,
1263 10, 64+10, 11, 64+11,
1264 12, 64+12, 13, 64+13,
1265 14, 64+14, 15, 64+15,
1266 24, 64+24, 25, 64+25,
1267 26, 64+26, 27, 64+27,
1268 28, 64+28, 29, 64+29,
1269 30, 64+30, 31, 64+31,
1270 40, 64+40, 41, 64+41,
1271 42, 64+42, 43, 64+43,
1272 44, 64+44, 45, 64+45,
1273 46, 64+46, 47, 64+47,
1274 56, 64+56, 57, 64+57,
1275 58, 64+58, 59, 64+59,
1276 60, 64+60, 61, 64+61,
1277 62, 64+62, 63, 64+63);
1278}
1279
Logan Chienb0c84022018-11-09 16:19:54 +08001280static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001281_mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
1282 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1283 (__v64qi)_mm512_unpackhi_epi8(__A, __B),
1284 (__v64qi)__W);
1285}
1286
Logan Chienb0c84022018-11-09 16:19:54 +08001287static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001288_mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
1289 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1290 (__v64qi)_mm512_unpackhi_epi8(__A, __B),
Logan Chien55afb0a2018-10-15 10:42:14 +08001291 (__v64qi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001292}
1293
Logan Chienb0c84022018-11-09 16:19:54 +08001294static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001295_mm512_unpackhi_epi16(__m512i __A, __m512i __B) {
1296 return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B,
1297 4, 32+4, 5, 32+5,
1298 6, 32+6, 7, 32+7,
1299 12, 32+12, 13, 32+13,
1300 14, 32+14, 15, 32+15,
1301 20, 32+20, 21, 32+21,
1302 22, 32+22, 23, 32+23,
1303 28, 32+28, 29, 32+29,
1304 30, 32+30, 31, 32+31);
1305}
1306
Logan Chienb0c84022018-11-09 16:19:54 +08001307static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001308_mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
1309 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1310 (__v32hi)_mm512_unpackhi_epi16(__A, __B),
1311 (__v32hi)__W);
1312}
1313
Logan Chienb0c84022018-11-09 16:19:54 +08001314static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001315_mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
1316 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1317 (__v32hi)_mm512_unpackhi_epi16(__A, __B),
Logan Chien55afb0a2018-10-15 10:42:14 +08001318 (__v32hi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001319}
1320
Logan Chienb0c84022018-11-09 16:19:54 +08001321static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001322_mm512_unpacklo_epi8(__m512i __A, __m512i __B) {
1323 return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B,
1324 0, 64+0, 1, 64+1,
1325 2, 64+2, 3, 64+3,
1326 4, 64+4, 5, 64+5,
1327 6, 64+6, 7, 64+7,
1328 16, 64+16, 17, 64+17,
1329 18, 64+18, 19, 64+19,
1330 20, 64+20, 21, 64+21,
1331 22, 64+22, 23, 64+23,
1332 32, 64+32, 33, 64+33,
1333 34, 64+34, 35, 64+35,
1334 36, 64+36, 37, 64+37,
1335 38, 64+38, 39, 64+39,
1336 48, 64+48, 49, 64+49,
1337 50, 64+50, 51, 64+51,
1338 52, 64+52, 53, 64+53,
1339 54, 64+54, 55, 64+55);
1340}
1341
Logan Chienb0c84022018-11-09 16:19:54 +08001342static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001343_mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
1344 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1345 (__v64qi)_mm512_unpacklo_epi8(__A, __B),
1346 (__v64qi)__W);
1347}
1348
Logan Chienb0c84022018-11-09 16:19:54 +08001349static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001350_mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
1351 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1352 (__v64qi)_mm512_unpacklo_epi8(__A, __B),
Logan Chien55afb0a2018-10-15 10:42:14 +08001353 (__v64qi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001354}
1355
Logan Chienb0c84022018-11-09 16:19:54 +08001356static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001357_mm512_unpacklo_epi16(__m512i __A, __m512i __B) {
1358 return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B,
1359 0, 32+0, 1, 32+1,
1360 2, 32+2, 3, 32+3,
1361 8, 32+8, 9, 32+9,
1362 10, 32+10, 11, 32+11,
1363 16, 32+16, 17, 32+17,
1364 18, 32+18, 19, 32+19,
1365 24, 32+24, 25, 32+25,
1366 26, 32+26, 27, 32+27);
1367}
1368
Logan Chienb0c84022018-11-09 16:19:54 +08001369static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001370_mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
1371 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1372 (__v32hi)_mm512_unpacklo_epi16(__A, __B),
1373 (__v32hi)__W);
1374}
1375
Logan Chienb0c84022018-11-09 16:19:54 +08001376static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001377_mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
1378 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1379 (__v32hi)_mm512_unpacklo_epi16(__A, __B),
Logan Chien55afb0a2018-10-15 10:42:14 +08001380 (__v32hi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001381}
1382
Logan Chienb0c84022018-11-09 16:19:54 +08001383static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001384_mm512_cvtepi8_epi16(__m256i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08001385{
Logan Chien55afb0a2018-10-15 10:42:14 +08001386 /* This function always performs a signed extension, but __v32qi is a char
1387 which may be signed or unsigned, so use __v32qs. */
1388 return (__m512i)__builtin_convertvector((__v32qs)__A, __v32hi);
Logan Chien2833ffb2018-10-09 10:03:24 +08001389}
1390
Logan Chienb0c84022018-11-09 16:19:54 +08001391static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001392_mm512_mask_cvtepi8_epi16(__m512i __W, __mmask32 __U, __m256i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08001393{
Logan Chien55afb0a2018-10-15 10:42:14 +08001394 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1395 (__v32hi)_mm512_cvtepi8_epi16(__A),
1396 (__v32hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001397}
1398
Logan Chienb0c84022018-11-09 16:19:54 +08001399static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001400_mm512_maskz_cvtepi8_epi16(__mmask32 __U, __m256i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08001401{
Logan Chien55afb0a2018-10-15 10:42:14 +08001402 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1403 (__v32hi)_mm512_cvtepi8_epi16(__A),
1404 (__v32hi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001405}
1406
Logan Chienb0c84022018-11-09 16:19:54 +08001407static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001408_mm512_cvtepu8_epi16(__m256i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08001409{
Logan Chien55afb0a2018-10-15 10:42:14 +08001410 return (__m512i)__builtin_convertvector((__v32qu)__A, __v32hi);
Logan Chien2833ffb2018-10-09 10:03:24 +08001411}
1412
Logan Chienb0c84022018-11-09 16:19:54 +08001413static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001414_mm512_mask_cvtepu8_epi16(__m512i __W, __mmask32 __U, __m256i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08001415{
Logan Chien55afb0a2018-10-15 10:42:14 +08001416 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1417 (__v32hi)_mm512_cvtepu8_epi16(__A),
1418 (__v32hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001419}
1420
Logan Chienb0c84022018-11-09 16:19:54 +08001421static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001422_mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08001423{
Logan Chien55afb0a2018-10-15 10:42:14 +08001424 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1425 (__v32hi)_mm512_cvtepu8_epi16(__A),
1426 (__v32hi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001427}
1428
1429
Logan Chien55afb0a2018-10-15 10:42:14 +08001430#define _mm512_shufflehi_epi16(A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001431 ((__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001432
Logan Chien55afb0a2018-10-15 10:42:14 +08001433#define _mm512_mask_shufflehi_epi16(W, U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001434 ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1435 (__v32hi)_mm512_shufflehi_epi16((A), \
1436 (imm)), \
1437 (__v32hi)(__m512i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001438
Logan Chien55afb0a2018-10-15 10:42:14 +08001439#define _mm512_maskz_shufflehi_epi16(U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001440 ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1441 (__v32hi)_mm512_shufflehi_epi16((A), \
1442 (imm)), \
1443 (__v32hi)_mm512_setzero_si512()))
Logan Chien2833ffb2018-10-09 10:03:24 +08001444
Logan Chien55afb0a2018-10-15 10:42:14 +08001445#define _mm512_shufflelo_epi16(A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001446 ((__m512i)__builtin_ia32_pshuflw512((__v32hi)(__m512i)(A), (int)(imm)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001447
1448
Logan Chien55afb0a2018-10-15 10:42:14 +08001449#define _mm512_mask_shufflelo_epi16(W, U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001450 ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1451 (__v32hi)_mm512_shufflelo_epi16((A), \
1452 (imm)), \
1453 (__v32hi)(__m512i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001454
1455
Logan Chien55afb0a2018-10-15 10:42:14 +08001456#define _mm512_maskz_shufflelo_epi16(U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001457 ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1458 (__v32hi)_mm512_shufflelo_epi16((A), \
1459 (imm)), \
1460 (__v32hi)_mm512_setzero_si512()))
Logan Chien2833ffb2018-10-09 10:03:24 +08001461
Logan Chienb0c84022018-11-09 16:19:54 +08001462static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001463_mm512_sllv_epi16(__m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001464{
Logan Chien55afb0a2018-10-15 10:42:14 +08001465 return (__m512i)__builtin_ia32_psllv32hi((__v32hi) __A, (__v32hi) __B);
Logan Chien2833ffb2018-10-09 10:03:24 +08001466}
1467
Logan Chienb0c84022018-11-09 16:19:54 +08001468static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001469_mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001470{
Logan Chien55afb0a2018-10-15 10:42:14 +08001471 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1472 (__v32hi)_mm512_sllv_epi16(__A, __B),
1473 (__v32hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001474}
1475
Logan Chienb0c84022018-11-09 16:19:54 +08001476static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001477_mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001478{
Logan Chien55afb0a2018-10-15 10:42:14 +08001479 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1480 (__v32hi)_mm512_sllv_epi16(__A, __B),
1481 (__v32hi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001482}
1483
Logan Chienb0c84022018-11-09 16:19:54 +08001484static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001485_mm512_sll_epi16(__m512i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001486{
Logan Chien55afb0a2018-10-15 10:42:14 +08001487 return (__m512i)__builtin_ia32_psllw512((__v32hi) __A, (__v8hi) __B);
Logan Chien2833ffb2018-10-09 10:03:24 +08001488}
1489
Logan Chienb0c84022018-11-09 16:19:54 +08001490static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001491_mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001492{
Logan Chien55afb0a2018-10-15 10:42:14 +08001493 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1494 (__v32hi)_mm512_sll_epi16(__A, __B),
1495 (__v32hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001496}
1497
Logan Chienb0c84022018-11-09 16:19:54 +08001498static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001499_mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001500{
Logan Chien55afb0a2018-10-15 10:42:14 +08001501 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1502 (__v32hi)_mm512_sll_epi16(__A, __B),
1503 (__v32hi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001504}
1505
Logan Chienb0c84022018-11-09 16:19:54 +08001506static __inline__ __m512i __DEFAULT_FN_ATTRS512
Sasha Smundak0fc590b2020-10-07 08:11:59 -07001507_mm512_slli_epi16(__m512i __A, unsigned int __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001508{
Logan Chien55afb0a2018-10-15 10:42:14 +08001509 return (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, __B);
Logan Chien2833ffb2018-10-09 10:03:24 +08001510}
1511
Logan Chienb0c84022018-11-09 16:19:54 +08001512static __inline__ __m512i __DEFAULT_FN_ATTRS512
Sasha Smundak0fc590b2020-10-07 08:11:59 -07001513_mm512_mask_slli_epi16(__m512i __W, __mmask32 __U, __m512i __A,
1514 unsigned int __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001515{
Logan Chien55afb0a2018-10-15 10:42:14 +08001516 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1517 (__v32hi)_mm512_slli_epi16(__A, __B),
1518 (__v32hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001519}
1520
Logan Chienb0c84022018-11-09 16:19:54 +08001521static __inline__ __m512i __DEFAULT_FN_ATTRS512
Sasha Smundak0fc590b2020-10-07 08:11:59 -07001522_mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, unsigned int __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001523{
Logan Chien55afb0a2018-10-15 10:42:14 +08001524 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1525 (__v32hi)_mm512_slli_epi16(__A, __B),
1526 (__v32hi)_mm512_setzero_si512());
1527}
1528
1529#define _mm512_bslli_epi128(a, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001530 ((__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm)))
Logan Chien55afb0a2018-10-15 10:42:14 +08001531
Logan Chienb0c84022018-11-09 16:19:54 +08001532static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001533_mm512_srlv_epi16(__m512i __A, __m512i __B)
1534{
1535 return (__m512i)__builtin_ia32_psrlv32hi((__v32hi)__A, (__v32hi)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08001536}
1537
Logan Chienb0c84022018-11-09 16:19:54 +08001538static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001539_mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001540{
Logan Chien55afb0a2018-10-15 10:42:14 +08001541 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1542 (__v32hi)_mm512_srlv_epi16(__A, __B),
1543 (__v32hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001544}
1545
Logan Chienb0c84022018-11-09 16:19:54 +08001546static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001547_mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001548{
Logan Chien55afb0a2018-10-15 10:42:14 +08001549 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1550 (__v32hi)_mm512_srlv_epi16(__A, __B),
1551 (__v32hi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001552}
1553
Logan Chienb0c84022018-11-09 16:19:54 +08001554static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001555_mm512_srav_epi16(__m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001556{
Logan Chien55afb0a2018-10-15 10:42:14 +08001557 return (__m512i)__builtin_ia32_psrav32hi((__v32hi)__A, (__v32hi)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08001558}
1559
Logan Chienb0c84022018-11-09 16:19:54 +08001560static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001561_mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001562{
Logan Chien55afb0a2018-10-15 10:42:14 +08001563 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1564 (__v32hi)_mm512_srav_epi16(__A, __B),
1565 (__v32hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001566}
1567
Logan Chienb0c84022018-11-09 16:19:54 +08001568static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001569_mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001570{
Logan Chien55afb0a2018-10-15 10:42:14 +08001571 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1572 (__v32hi)_mm512_srav_epi16(__A, __B),
1573 (__v32hi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001574}
1575
Logan Chienb0c84022018-11-09 16:19:54 +08001576static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001577_mm512_sra_epi16(__m512i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001578{
Logan Chien55afb0a2018-10-15 10:42:14 +08001579 return (__m512i)__builtin_ia32_psraw512((__v32hi) __A, (__v8hi) __B);
Logan Chien2833ffb2018-10-09 10:03:24 +08001580}
1581
Logan Chienb0c84022018-11-09 16:19:54 +08001582static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001583_mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001584{
Logan Chien55afb0a2018-10-15 10:42:14 +08001585 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1586 (__v32hi)_mm512_sra_epi16(__A, __B),
1587 (__v32hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001588}
1589
Logan Chienb0c84022018-11-09 16:19:54 +08001590static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001591_mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B)
1592{
1593 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1594 (__v32hi)_mm512_sra_epi16(__A, __B),
1595 (__v32hi)_mm512_setzero_si512());
1596}
Logan Chien2833ffb2018-10-09 10:03:24 +08001597
Logan Chienb0c84022018-11-09 16:19:54 +08001598static __inline__ __m512i __DEFAULT_FN_ATTRS512
Sasha Smundak0fc590b2020-10-07 08:11:59 -07001599_mm512_srai_epi16(__m512i __A, unsigned int __B)
Logan Chien55afb0a2018-10-15 10:42:14 +08001600{
1601 return (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, __B);
1602}
Logan Chien2833ffb2018-10-09 10:03:24 +08001603
Logan Chienb0c84022018-11-09 16:19:54 +08001604static __inline__ __m512i __DEFAULT_FN_ATTRS512
Sasha Smundak0fc590b2020-10-07 08:11:59 -07001605_mm512_mask_srai_epi16(__m512i __W, __mmask32 __U, __m512i __A,
1606 unsigned int __B)
Logan Chien55afb0a2018-10-15 10:42:14 +08001607{
1608 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1609 (__v32hi)_mm512_srai_epi16(__A, __B),
1610 (__v32hi)__W);
1611}
Logan Chien2833ffb2018-10-09 10:03:24 +08001612
Logan Chienb0c84022018-11-09 16:19:54 +08001613static __inline__ __m512i __DEFAULT_FN_ATTRS512
Sasha Smundak0fc590b2020-10-07 08:11:59 -07001614_mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, unsigned int __B)
Logan Chien55afb0a2018-10-15 10:42:14 +08001615{
1616 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1617 (__v32hi)_mm512_srai_epi16(__A, __B),
1618 (__v32hi)_mm512_setzero_si512());
1619}
1620
Logan Chienb0c84022018-11-09 16:19:54 +08001621static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001622_mm512_srl_epi16(__m512i __A, __m128i __B)
1623{
1624 return (__m512i)__builtin_ia32_psrlw512((__v32hi) __A, (__v8hi) __B);
1625}
1626
Logan Chienb0c84022018-11-09 16:19:54 +08001627static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001628_mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
1629{
1630 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1631 (__v32hi)_mm512_srl_epi16(__A, __B),
1632 (__v32hi)__W);
1633}
1634
Logan Chienb0c84022018-11-09 16:19:54 +08001635static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001636_mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B)
1637{
1638 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1639 (__v32hi)_mm512_srl_epi16(__A, __B),
1640 (__v32hi)_mm512_setzero_si512());
1641}
1642
Logan Chienb0c84022018-11-09 16:19:54 +08001643static __inline__ __m512i __DEFAULT_FN_ATTRS512
Sasha Smundak0fc590b2020-10-07 08:11:59 -07001644_mm512_srli_epi16(__m512i __A, unsigned int __B)
Logan Chien55afb0a2018-10-15 10:42:14 +08001645{
1646 return (__m512i)__builtin_ia32_psrlwi512((__v32hi)__A, __B);
1647}
1648
Logan Chienb0c84022018-11-09 16:19:54 +08001649static __inline__ __m512i __DEFAULT_FN_ATTRS512
Sasha Smundak0fc590b2020-10-07 08:11:59 -07001650_mm512_mask_srli_epi16(__m512i __W, __mmask32 __U, __m512i __A,
1651 unsigned int __B)
Logan Chien55afb0a2018-10-15 10:42:14 +08001652{
1653 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1654 (__v32hi)_mm512_srli_epi16(__A, __B),
1655 (__v32hi)__W);
1656}
1657
Logan Chienb0c84022018-11-09 16:19:54 +08001658static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien55afb0a2018-10-15 10:42:14 +08001659_mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B)
1660{
1661 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1662 (__v32hi)_mm512_srli_epi16(__A, __B),
1663 (__v32hi)_mm512_setzero_si512());
1664}
1665
1666#define _mm512_bsrli_epi128(a, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001667 ((__m512i)__builtin_ia32_psrldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001668
Logan Chienb0c84022018-11-09 16:19:54 +08001669static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001670_mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
1671{
1672 return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
1673 (__v32hi) __A,
1674 (__v32hi) __W);
1675}
1676
Logan Chienb0c84022018-11-09 16:19:54 +08001677static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001678_mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A)
1679{
1680 return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
1681 (__v32hi) __A,
Logan Chien55afb0a2018-10-15 10:42:14 +08001682 (__v32hi) _mm512_setzero_si512 ());
Logan Chien2833ffb2018-10-09 10:03:24 +08001683}
1684
Logan Chienb0c84022018-11-09 16:19:54 +08001685static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001686_mm512_mask_mov_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
1687{
1688 return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
1689 (__v64qi) __A,
1690 (__v64qi) __W);
1691}
1692
Logan Chienb0c84022018-11-09 16:19:54 +08001693static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001694_mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A)
1695{
1696 return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
1697 (__v64qi) __A,
Logan Chien55afb0a2018-10-15 10:42:14 +08001698 (__v64qi) _mm512_setzero_si512 ());
Logan Chien2833ffb2018-10-09 10:03:24 +08001699}
1700
Logan Chienb0c84022018-11-09 16:19:54 +08001701static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001702_mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A)
1703{
Logan Chien55afb0a2018-10-15 10:42:14 +08001704 return (__m512i) __builtin_ia32_selectb_512(__M,
1705 (__v64qi)_mm512_set1_epi8(__A),
1706 (__v64qi) __O);
Logan Chien2833ffb2018-10-09 10:03:24 +08001707}
1708
Logan Chienb0c84022018-11-09 16:19:54 +08001709static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001710_mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
1711{
Logan Chien55afb0a2018-10-15 10:42:14 +08001712 return (__m512i) __builtin_ia32_selectb_512(__M,
1713 (__v64qi) _mm512_set1_epi8(__A),
1714 (__v64qi) _mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001715}
1716
Logan Chiendf4f7662019-09-04 16:45:23 -07001717static __inline__ __mmask64 __DEFAULT_FN_ATTRS
Logan Chien2833ffb2018-10-09 10:03:24 +08001718_mm512_kunpackd (__mmask64 __A, __mmask64 __B)
1719{
1720 return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
1721 (__mmask64) __B);
1722}
1723
Logan Chiendf4f7662019-09-04 16:45:23 -07001724static __inline__ __mmask32 __DEFAULT_FN_ATTRS
Logan Chien2833ffb2018-10-09 10:03:24 +08001725_mm512_kunpackw (__mmask32 __A, __mmask32 __B)
1726{
1727 return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
1728 (__mmask32) __B);
1729}
1730
Logan Chien969aea62018-12-05 18:40:57 +08001731static __inline __m512i __DEFAULT_FN_ATTRS512
1732_mm512_loadu_epi16 (void const *__P)
1733{
1734 struct __loadu_epi16 {
Logan Chiendbcf4122019-03-21 10:50:25 +08001735 __m512i_u __v;
Logan Chien969aea62018-12-05 18:40:57 +08001736 } __attribute__((__packed__, __may_alias__));
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07001737 return ((const struct __loadu_epi16*)__P)->__v;
Logan Chien969aea62018-12-05 18:40:57 +08001738}
1739
Logan Chienb0c84022018-11-09 16:19:54 +08001740static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001741_mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P)
1742{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07001743 return (__m512i) __builtin_ia32_loaddquhi512_mask ((const __v32hi *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08001744 (__v32hi) __W,
1745 (__mmask32) __U);
1746}
1747
Logan Chienb0c84022018-11-09 16:19:54 +08001748static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001749_mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P)
1750{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07001751 return (__m512i) __builtin_ia32_loaddquhi512_mask ((const __v32hi *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08001752 (__v32hi)
Logan Chien55afb0a2018-10-15 10:42:14 +08001753 _mm512_setzero_si512 (),
Logan Chien2833ffb2018-10-09 10:03:24 +08001754 (__mmask32) __U);
1755}
1756
Logan Chien969aea62018-12-05 18:40:57 +08001757static __inline __m512i __DEFAULT_FN_ATTRS512
1758_mm512_loadu_epi8 (void const *__P)
1759{
1760 struct __loadu_epi8 {
Logan Chiendbcf4122019-03-21 10:50:25 +08001761 __m512i_u __v;
Logan Chien969aea62018-12-05 18:40:57 +08001762 } __attribute__((__packed__, __may_alias__));
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07001763 return ((const struct __loadu_epi8*)__P)->__v;
Logan Chien969aea62018-12-05 18:40:57 +08001764}
1765
Logan Chienb0c84022018-11-09 16:19:54 +08001766static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001767_mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P)
1768{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07001769 return (__m512i) __builtin_ia32_loaddquqi512_mask ((const __v64qi *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08001770 (__v64qi) __W,
1771 (__mmask64) __U);
1772}
1773
Logan Chienb0c84022018-11-09 16:19:54 +08001774static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001775_mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P)
1776{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07001777 return (__m512i) __builtin_ia32_loaddquqi512_mask ((const __v64qi *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08001778 (__v64qi)
Logan Chien55afb0a2018-10-15 10:42:14 +08001779 _mm512_setzero_si512 (),
Logan Chien2833ffb2018-10-09 10:03:24 +08001780 (__mmask64) __U);
1781}
Logan Chien969aea62018-12-05 18:40:57 +08001782
1783static __inline void __DEFAULT_FN_ATTRS512
1784_mm512_storeu_epi16 (void *__P, __m512i __A)
1785{
1786 struct __storeu_epi16 {
Logan Chiendbcf4122019-03-21 10:50:25 +08001787 __m512i_u __v;
Logan Chien969aea62018-12-05 18:40:57 +08001788 } __attribute__((__packed__, __may_alias__));
1789 ((struct __storeu_epi16*)__P)->__v = __A;
1790}
1791
Logan Chienb0c84022018-11-09 16:19:54 +08001792static __inline__ void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001793_mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A)
1794{
1795 __builtin_ia32_storedquhi512_mask ((__v32hi *) __P,
1796 (__v32hi) __A,
1797 (__mmask32) __U);
1798}
1799
Logan Chien969aea62018-12-05 18:40:57 +08001800static __inline void __DEFAULT_FN_ATTRS512
1801_mm512_storeu_epi8 (void *__P, __m512i __A)
1802{
1803 struct __storeu_epi8 {
Logan Chiendbcf4122019-03-21 10:50:25 +08001804 __m512i_u __v;
Logan Chien969aea62018-12-05 18:40:57 +08001805 } __attribute__((__packed__, __may_alias__));
1806 ((struct __storeu_epi8*)__P)->__v = __A;
1807}
1808
Logan Chienb0c84022018-11-09 16:19:54 +08001809static __inline__ void __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001810_mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A)
1811{
1812 __builtin_ia32_storedquqi512_mask ((__v64qi *) __P,
1813 (__v64qi) __A,
1814 (__mmask64) __U);
1815}
1816
Logan Chienb0c84022018-11-09 16:19:54 +08001817static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001818_mm512_test_epi8_mask (__m512i __A, __m512i __B)
1819{
Logan Chien55afb0a2018-10-15 10:42:14 +08001820 return _mm512_cmpneq_epi8_mask (_mm512_and_epi32 (__A, __B),
1821 _mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001822}
1823
Logan Chienb0c84022018-11-09 16:19:54 +08001824static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001825_mm512_mask_test_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
1826{
Logan Chien55afb0a2018-10-15 10:42:14 +08001827 return _mm512_mask_cmpneq_epi8_mask (__U, _mm512_and_epi32 (__A, __B),
1828 _mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001829}
1830
Logan Chienb0c84022018-11-09 16:19:54 +08001831static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001832_mm512_test_epi16_mask (__m512i __A, __m512i __B)
1833{
Logan Chien55afb0a2018-10-15 10:42:14 +08001834 return _mm512_cmpneq_epi16_mask (_mm512_and_epi32 (__A, __B),
1835 _mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001836}
1837
Logan Chienb0c84022018-11-09 16:19:54 +08001838static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001839_mm512_mask_test_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
1840{
Logan Chien55afb0a2018-10-15 10:42:14 +08001841 return _mm512_mask_cmpneq_epi16_mask (__U, _mm512_and_epi32 (__A, __B),
1842 _mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001843}
1844
Logan Chienb0c84022018-11-09 16:19:54 +08001845static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001846_mm512_testn_epi8_mask (__m512i __A, __m512i __B)
1847{
Logan Chien55afb0a2018-10-15 10:42:14 +08001848 return _mm512_cmpeq_epi8_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001849}
1850
Logan Chienb0c84022018-11-09 16:19:54 +08001851static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001852_mm512_mask_testn_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
1853{
Logan Chien55afb0a2018-10-15 10:42:14 +08001854 return _mm512_mask_cmpeq_epi8_mask (__U, _mm512_and_epi32 (__A, __B),
1855 _mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001856}
1857
Logan Chienb0c84022018-11-09 16:19:54 +08001858static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001859_mm512_testn_epi16_mask (__m512i __A, __m512i __B)
1860{
Logan Chien55afb0a2018-10-15 10:42:14 +08001861 return _mm512_cmpeq_epi16_mask (_mm512_and_epi32 (__A, __B),
1862 _mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001863}
1864
Logan Chienb0c84022018-11-09 16:19:54 +08001865static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001866_mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
1867{
Logan Chien55afb0a2018-10-15 10:42:14 +08001868 return _mm512_mask_cmpeq_epi16_mask (__U, _mm512_and_epi32 (__A, __B),
1869 _mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001870}
1871
Logan Chienb0c84022018-11-09 16:19:54 +08001872static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001873_mm512_movepi8_mask (__m512i __A)
1874{
1875 return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A);
1876}
1877
Logan Chienb0c84022018-11-09 16:19:54 +08001878static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001879_mm512_movepi16_mask (__m512i __A)
1880{
1881 return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A);
1882}
1883
Logan Chienb0c84022018-11-09 16:19:54 +08001884static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001885_mm512_movm_epi8 (__mmask64 __A)
1886{
1887 return (__m512i) __builtin_ia32_cvtmask2b512 (__A);
1888}
1889
Logan Chienb0c84022018-11-09 16:19:54 +08001890static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001891_mm512_movm_epi16 (__mmask32 __A)
1892{
1893 return (__m512i) __builtin_ia32_cvtmask2w512 (__A);
1894}
1895
Logan Chienb0c84022018-11-09 16:19:54 +08001896static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001897_mm512_broadcastb_epi8 (__m128i __A)
1898{
Logan Chien55afb0a2018-10-15 10:42:14 +08001899 return (__m512i)__builtin_shufflevector((__v16qi) __A, (__v16qi) __A,
Logan Chien2833ffb2018-10-09 10:03:24 +08001900 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1901 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1902 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1903 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1904}
1905
Logan Chienb0c84022018-11-09 16:19:54 +08001906static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001907_mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A)
1908{
1909 return (__m512i)__builtin_ia32_selectb_512(__M,
1910 (__v64qi) _mm512_broadcastb_epi8(__A),
1911 (__v64qi) __O);
1912}
1913
Logan Chienb0c84022018-11-09 16:19:54 +08001914static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001915_mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A)
1916{
1917 return (__m512i)__builtin_ia32_selectb_512(__M,
1918 (__v64qi) _mm512_broadcastb_epi8(__A),
1919 (__v64qi) _mm512_setzero_si512());
1920}
1921
Logan Chienb0c84022018-11-09 16:19:54 +08001922static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001923_mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A)
1924{
Logan Chien55afb0a2018-10-15 10:42:14 +08001925 return (__m512i) __builtin_ia32_selectw_512(__M,
1926 (__v32hi) _mm512_set1_epi16(__A),
1927 (__v32hi) __O);
Logan Chien2833ffb2018-10-09 10:03:24 +08001928}
1929
Logan Chienb0c84022018-11-09 16:19:54 +08001930static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001931_mm512_maskz_set1_epi16 (__mmask32 __M, short __A)
1932{
Logan Chien55afb0a2018-10-15 10:42:14 +08001933 return (__m512i) __builtin_ia32_selectw_512(__M,
1934 (__v32hi) _mm512_set1_epi16(__A),
1935 (__v32hi) _mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001936}
1937
Logan Chienb0c84022018-11-09 16:19:54 +08001938static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001939_mm512_broadcastw_epi16 (__m128i __A)
1940{
Logan Chien55afb0a2018-10-15 10:42:14 +08001941 return (__m512i)__builtin_shufflevector((__v8hi) __A, (__v8hi) __A,
Logan Chien2833ffb2018-10-09 10:03:24 +08001942 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1943 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1944}
1945
Logan Chienb0c84022018-11-09 16:19:54 +08001946static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001947_mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A)
1948{
1949 return (__m512i)__builtin_ia32_selectw_512(__M,
1950 (__v32hi) _mm512_broadcastw_epi16(__A),
1951 (__v32hi) __O);
1952}
1953
Logan Chienb0c84022018-11-09 16:19:54 +08001954static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001955_mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A)
1956{
1957 return (__m512i)__builtin_ia32_selectw_512(__M,
1958 (__v32hi) _mm512_broadcastw_epi16(__A),
1959 (__v32hi) _mm512_setzero_si512());
1960}
1961
Logan Chienb0c84022018-11-09 16:19:54 +08001962static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001963_mm512_permutexvar_epi16 (__m512i __A, __m512i __B)
1964{
Logan Chien55afb0a2018-10-15 10:42:14 +08001965 return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
Logan Chien2833ffb2018-10-09 10:03:24 +08001966}
1967
Logan Chienb0c84022018-11-09 16:19:54 +08001968static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001969_mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A,
1970 __m512i __B)
1971{
Logan Chien55afb0a2018-10-15 10:42:14 +08001972 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
1973 (__v32hi)_mm512_permutexvar_epi16(__A, __B),
1974 (__v32hi)_mm512_setzero_si512());
Logan Chien2833ffb2018-10-09 10:03:24 +08001975}
1976
Logan Chienb0c84022018-11-09 16:19:54 +08001977static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08001978_mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
1979 __m512i __B)
1980{
Logan Chien55afb0a2018-10-15 10:42:14 +08001981 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
1982 (__v32hi)_mm512_permutexvar_epi16(__A, __B),
1983 (__v32hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001984}
1985
Logan Chien55afb0a2018-10-15 10:42:14 +08001986#define _mm512_alignr_epi8(A, B, N) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001987 ((__m512i)__builtin_ia32_palignr512((__v64qi)(__m512i)(A), \
1988 (__v64qi)(__m512i)(B), (int)(N)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001989
Logan Chien55afb0a2018-10-15 10:42:14 +08001990#define _mm512_mask_alignr_epi8(W, U, A, B, N) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001991 ((__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
1992 (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \
1993 (__v64qi)(__m512i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001994
Logan Chien55afb0a2018-10-15 10:42:14 +08001995#define _mm512_maskz_alignr_epi8(U, A, B, N) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001996 ((__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08001997 (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001998 (__v64qi)(__m512i)_mm512_setzero_si512()))
Logan Chien2833ffb2018-10-09 10:03:24 +08001999
Logan Chien55afb0a2018-10-15 10:42:14 +08002000#define _mm512_dbsad_epu8(A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002001 ((__m512i)__builtin_ia32_dbpsadbw512((__v64qi)(__m512i)(A), \
2002 (__v64qi)(__m512i)(B), (int)(imm)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002003
Logan Chien55afb0a2018-10-15 10:42:14 +08002004#define _mm512_mask_dbsad_epu8(W, U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002005 ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08002006 (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002007 (__v32hi)(__m512i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002008
Logan Chien55afb0a2018-10-15 10:42:14 +08002009#define _mm512_maskz_dbsad_epu8(U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002010 ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08002011 (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002012 (__v32hi)_mm512_setzero_si512()))
Logan Chien2833ffb2018-10-09 10:03:24 +08002013
Logan Chienb0c84022018-11-09 16:19:54 +08002014static __inline__ __m512i __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002015_mm512_sad_epu8 (__m512i __A, __m512i __B)
2016{
2017 return (__m512i) __builtin_ia32_psadbw512 ((__v64qi) __A,
2018 (__v64qi) __B);
2019}
2020
Logan Chienb0c84022018-11-09 16:19:54 +08002021#undef __DEFAULT_FN_ATTRS512
Logan Chien2833ffb2018-10-09 10:03:24 +08002022#undef __DEFAULT_FN_ATTRS
2023
2024#endif