blob: 17fa77722c64f6fa4403b7e7647f6656ab8b6fb0 [file] [log] [blame]
Logan Chien55afb0a2018-10-15 10:42:14 +08001/*===------------- avx512vbmi2intrin.h - VBMI2 intrinsics ------------------===
2 *
3 *
Logan Chiendf4f7662019-09-04 16:45:23 -07004 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 * See https://llvm.org/LICENSE.txt for license information.
6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Logan Chien55afb0a2018-10-15 10:42:14 +08007 *
8 *===-----------------------------------------------------------------------===
9 */
10#ifndef __IMMINTRIN_H
11#error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifndef __AVX512VBMI2INTRIN_H
15#define __AVX512VBMI2INTRIN_H
16
17/* Define the default attributes for the functions in this file. */
18#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"), __min_vector_width__(512)))
19
20
21static __inline__ __m512i __DEFAULT_FN_ATTRS
22_mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D)
23{
24 return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D,
25 (__v32hi) __S,
26 __U);
27}
28
29static __inline__ __m512i __DEFAULT_FN_ATTRS
30_mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D)
31{
32 return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D,
33 (__v32hi) _mm512_setzero_si512(),
34 __U);
35}
36
37static __inline__ __m512i __DEFAULT_FN_ATTRS
38_mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D)
39{
40 return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D,
41 (__v64qi) __S,
42 __U);
43}
44
45static __inline__ __m512i __DEFAULT_FN_ATTRS
46_mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D)
47{
48 return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D,
49 (__v64qi) _mm512_setzero_si512(),
50 __U);
51}
52
53static __inline__ void __DEFAULT_FN_ATTRS
54_mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D)
55{
56 __builtin_ia32_compressstorehi512_mask ((__v32hi *) __P, (__v32hi) __D,
57 __U);
58}
59
60static __inline__ void __DEFAULT_FN_ATTRS
61_mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D)
62{
63 __builtin_ia32_compressstoreqi512_mask ((__v64qi *) __P, (__v64qi) __D,
64 __U);
65}
66
67static __inline__ __m512i __DEFAULT_FN_ATTRS
68_mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D)
69{
70 return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D,
71 (__v32hi) __S,
72 __U);
73}
74
75static __inline__ __m512i __DEFAULT_FN_ATTRS
76_mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D)
77{
78 return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D,
79 (__v32hi) _mm512_setzero_si512(),
80 __U);
81}
82
83static __inline__ __m512i __DEFAULT_FN_ATTRS
84_mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D)
85{
86 return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D,
87 (__v64qi) __S,
88 __U);
89}
90
91static __inline__ __m512i __DEFAULT_FN_ATTRS
92_mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D)
93{
94 return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D,
95 (__v64qi) _mm512_setzero_si512(),
96 __U);
97}
98
99static __inline__ __m512i __DEFAULT_FN_ATTRS
100_mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const *__P)
101{
102 return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P,
103 (__v32hi) __S,
104 __U);
105}
106
107static __inline__ __m512i __DEFAULT_FN_ATTRS
108_mm512_maskz_expandloadu_epi16(__mmask32 __U, void const *__P)
109{
110 return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P,
111 (__v32hi) _mm512_setzero_si512(),
112 __U);
113}
114
115static __inline__ __m512i __DEFAULT_FN_ATTRS
116_mm512_mask_expandloadu_epi8(__m512i __S, __mmask64 __U, void const *__P)
117{
118 return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P,
119 (__v64qi) __S,
120 __U);
121}
122
123static __inline__ __m512i __DEFAULT_FN_ATTRS
124_mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P)
125{
126 return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P,
127 (__v64qi) _mm512_setzero_si512(),
128 __U);
129}
130
131#define _mm512_shldi_epi64(A, B, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800132 ((__m512i)__builtin_ia32_vpshldq512((__v8di)(__m512i)(A), \
133 (__v8di)(__m512i)(B), (int)(I)))
Logan Chien55afb0a2018-10-15 10:42:14 +0800134
135#define _mm512_mask_shldi_epi64(S, U, A, B, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800136 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
137 (__v8di)_mm512_shldi_epi64((A), (B), (I)), \
138 (__v8di)(__m512i)(S)))
Logan Chien55afb0a2018-10-15 10:42:14 +0800139
140#define _mm512_maskz_shldi_epi64(U, A, B, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800141 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
142 (__v8di)_mm512_shldi_epi64((A), (B), (I)), \
143 (__v8di)_mm512_setzero_si512()))
Logan Chien55afb0a2018-10-15 10:42:14 +0800144
145#define _mm512_shldi_epi32(A, B, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800146 ((__m512i)__builtin_ia32_vpshldd512((__v16si)(__m512i)(A), \
147 (__v16si)(__m512i)(B), (int)(I)))
Logan Chien55afb0a2018-10-15 10:42:14 +0800148
149#define _mm512_mask_shldi_epi32(S, U, A, B, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800150 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
151 (__v16si)_mm512_shldi_epi32((A), (B), (I)), \
152 (__v16si)(__m512i)(S)))
Logan Chien55afb0a2018-10-15 10:42:14 +0800153
154#define _mm512_maskz_shldi_epi32(U, A, B, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800155 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
156 (__v16si)_mm512_shldi_epi32((A), (B), (I)), \
157 (__v16si)_mm512_setzero_si512()))
Logan Chien55afb0a2018-10-15 10:42:14 +0800158
159#define _mm512_shldi_epi16(A, B, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800160 ((__m512i)__builtin_ia32_vpshldw512((__v32hi)(__m512i)(A), \
161 (__v32hi)(__m512i)(B), (int)(I)))
Logan Chien55afb0a2018-10-15 10:42:14 +0800162
163#define _mm512_mask_shldi_epi16(S, U, A, B, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800164 ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
165 (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \
166 (__v32hi)(__m512i)(S)))
Logan Chien55afb0a2018-10-15 10:42:14 +0800167
168#define _mm512_maskz_shldi_epi16(U, A, B, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800169 ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
170 (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \
171 (__v32hi)_mm512_setzero_si512()))
Logan Chien55afb0a2018-10-15 10:42:14 +0800172
173#define _mm512_shrdi_epi64(A, B, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800174 ((__m512i)__builtin_ia32_vpshrdq512((__v8di)(__m512i)(A), \
175 (__v8di)(__m512i)(B), (int)(I)))
Logan Chien55afb0a2018-10-15 10:42:14 +0800176
177#define _mm512_mask_shrdi_epi64(S, U, A, B, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800178 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
179 (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \
180 (__v8di)(__m512i)(S)))
Logan Chien55afb0a2018-10-15 10:42:14 +0800181
182#define _mm512_maskz_shrdi_epi64(U, A, B, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800183 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
184 (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \
185 (__v8di)_mm512_setzero_si512()))
Logan Chien55afb0a2018-10-15 10:42:14 +0800186
187#define _mm512_shrdi_epi32(A, B, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800188 ((__m512i)__builtin_ia32_vpshrdd512((__v16si)(__m512i)(A), \
189 (__v16si)(__m512i)(B), (int)(I)))
Logan Chien55afb0a2018-10-15 10:42:14 +0800190
191#define _mm512_mask_shrdi_epi32(S, U, A, B, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800192 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
193 (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \
194 (__v16si)(__m512i)(S)))
Logan Chien55afb0a2018-10-15 10:42:14 +0800195
196#define _mm512_maskz_shrdi_epi32(U, A, B, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800197 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
198 (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \
199 (__v16si)_mm512_setzero_si512()))
Logan Chien55afb0a2018-10-15 10:42:14 +0800200
201#define _mm512_shrdi_epi16(A, B, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800202 ((__m512i)__builtin_ia32_vpshrdw512((__v32hi)(__m512i)(A), \
203 (__v32hi)(__m512i)(B), (int)(I)))
Logan Chien55afb0a2018-10-15 10:42:14 +0800204
205#define _mm512_mask_shrdi_epi16(S, U, A, B, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800206 ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
207 (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \
208 (__v32hi)(__m512i)(S)))
Logan Chien55afb0a2018-10-15 10:42:14 +0800209
210#define _mm512_maskz_shrdi_epi16(U, A, B, I) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800211 ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
212 (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \
213 (__v32hi)_mm512_setzero_si512()))
Logan Chien55afb0a2018-10-15 10:42:14 +0800214
215static __inline__ __m512i __DEFAULT_FN_ATTRS
Logan Chiendbcf4122019-03-21 10:50:25 +0800216_mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C)
Logan Chien55afb0a2018-10-15 10:42:14 +0800217{
Logan Chiendbcf4122019-03-21 10:50:25 +0800218 return (__m512i)__builtin_ia32_vpshldvq512((__v8di)__A, (__v8di)__B,
219 (__v8di)__C);
Logan Chien55afb0a2018-10-15 10:42:14 +0800220}
221
222static __inline__ __m512i __DEFAULT_FN_ATTRS
Logan Chiendbcf4122019-03-21 10:50:25 +0800223_mm512_mask_shldv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C)
Logan Chien55afb0a2018-10-15 10:42:14 +0800224{
Logan Chiendbcf4122019-03-21 10:50:25 +0800225 return (__m512i)__builtin_ia32_selectq_512(__U,
226 (__v8di)_mm512_shldv_epi64(__A, __B, __C),
227 (__v8di)__A);
Logan Chien55afb0a2018-10-15 10:42:14 +0800228}
229
230static __inline__ __m512i __DEFAULT_FN_ATTRS
Logan Chiendbcf4122019-03-21 10:50:25 +0800231_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C)
Logan Chien55afb0a2018-10-15 10:42:14 +0800232{
Logan Chiendbcf4122019-03-21 10:50:25 +0800233 return (__m512i)__builtin_ia32_selectq_512(__U,
234 (__v8di)_mm512_shldv_epi64(__A, __B, __C),
235 (__v8di)_mm512_setzero_si512());
Logan Chien55afb0a2018-10-15 10:42:14 +0800236}
237
238static __inline__ __m512i __DEFAULT_FN_ATTRS
Logan Chiendbcf4122019-03-21 10:50:25 +0800239_mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C)
Logan Chien55afb0a2018-10-15 10:42:14 +0800240{
Logan Chiendbcf4122019-03-21 10:50:25 +0800241 return (__m512i)__builtin_ia32_vpshldvd512((__v16si)__A, (__v16si)__B,
242 (__v16si)__C);
Logan Chien55afb0a2018-10-15 10:42:14 +0800243}
244
245static __inline__ __m512i __DEFAULT_FN_ATTRS
Logan Chiendbcf4122019-03-21 10:50:25 +0800246_mm512_mask_shldv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C)
Logan Chien55afb0a2018-10-15 10:42:14 +0800247{
Logan Chiendbcf4122019-03-21 10:50:25 +0800248 return (__m512i)__builtin_ia32_selectd_512(__U,
249 (__v16si)_mm512_shldv_epi32(__A, __B, __C),
250 (__v16si)__A);
Logan Chien55afb0a2018-10-15 10:42:14 +0800251}
252
253static __inline__ __m512i __DEFAULT_FN_ATTRS
Logan Chiendbcf4122019-03-21 10:50:25 +0800254_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C)
Logan Chien55afb0a2018-10-15 10:42:14 +0800255{
Logan Chiendbcf4122019-03-21 10:50:25 +0800256 return (__m512i)__builtin_ia32_selectd_512(__U,
257 (__v16si)_mm512_shldv_epi32(__A, __B, __C),
258 (__v16si)_mm512_setzero_si512());
Logan Chien55afb0a2018-10-15 10:42:14 +0800259}
260
261static __inline__ __m512i __DEFAULT_FN_ATTRS
Logan Chiendbcf4122019-03-21 10:50:25 +0800262_mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C)
Logan Chien55afb0a2018-10-15 10:42:14 +0800263{
Logan Chiendbcf4122019-03-21 10:50:25 +0800264 return (__m512i)__builtin_ia32_vpshldvw512((__v32hi)__A, (__v32hi)__B,
265 (__v32hi)__C);
Logan Chien55afb0a2018-10-15 10:42:14 +0800266}
267
268static __inline__ __m512i __DEFAULT_FN_ATTRS
Logan Chiendbcf4122019-03-21 10:50:25 +0800269_mm512_mask_shldv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C)
Logan Chien55afb0a2018-10-15 10:42:14 +0800270{
Logan Chiendbcf4122019-03-21 10:50:25 +0800271 return (__m512i)__builtin_ia32_selectw_512(__U,
272 (__v32hi)_mm512_shldv_epi16(__A, __B, __C),
273 (__v32hi)__A);
Logan Chien55afb0a2018-10-15 10:42:14 +0800274}
275
276static __inline__ __m512i __DEFAULT_FN_ATTRS
Logan Chiendbcf4122019-03-21 10:50:25 +0800277_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C)
Logan Chien55afb0a2018-10-15 10:42:14 +0800278{
Logan Chiendbcf4122019-03-21 10:50:25 +0800279 return (__m512i)__builtin_ia32_selectw_512(__U,
280 (__v32hi)_mm512_shldv_epi16(__A, __B, __C),
281 (__v32hi)_mm512_setzero_si512());
Logan Chien55afb0a2018-10-15 10:42:14 +0800282}
283
284static __inline__ __m512i __DEFAULT_FN_ATTRS
Logan Chiendbcf4122019-03-21 10:50:25 +0800285_mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C)
Logan Chien55afb0a2018-10-15 10:42:14 +0800286{
Logan Chiendbcf4122019-03-21 10:50:25 +0800287 return (__m512i)__builtin_ia32_vpshrdvq512((__v8di)__A, (__v8di)__B,
288 (__v8di)__C);
Logan Chien55afb0a2018-10-15 10:42:14 +0800289}
290
291static __inline__ __m512i __DEFAULT_FN_ATTRS
Logan Chiendbcf4122019-03-21 10:50:25 +0800292_mm512_mask_shrdv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C)
Logan Chien55afb0a2018-10-15 10:42:14 +0800293{
Logan Chiendbcf4122019-03-21 10:50:25 +0800294 return (__m512i)__builtin_ia32_selectq_512(__U,
295 (__v8di)_mm512_shrdv_epi64(__A, __B, __C),
296 (__v8di)__A);
Logan Chien55afb0a2018-10-15 10:42:14 +0800297}
298
299static __inline__ __m512i __DEFAULT_FN_ATTRS
Logan Chiendbcf4122019-03-21 10:50:25 +0800300_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C)
Logan Chien55afb0a2018-10-15 10:42:14 +0800301{
Logan Chiendbcf4122019-03-21 10:50:25 +0800302 return (__m512i)__builtin_ia32_selectq_512(__U,
303 (__v8di)_mm512_shrdv_epi64(__A, __B, __C),
304 (__v8di)_mm512_setzero_si512());
Logan Chien55afb0a2018-10-15 10:42:14 +0800305}
306
307static __inline__ __m512i __DEFAULT_FN_ATTRS
Logan Chiendbcf4122019-03-21 10:50:25 +0800308_mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C)
Logan Chien55afb0a2018-10-15 10:42:14 +0800309{
Logan Chiendbcf4122019-03-21 10:50:25 +0800310 return (__m512i)__builtin_ia32_vpshrdvd512((__v16si)__A, (__v16si)__B,
311 (__v16si)__C);
Logan Chien55afb0a2018-10-15 10:42:14 +0800312}
313
314static __inline__ __m512i __DEFAULT_FN_ATTRS
Logan Chiendbcf4122019-03-21 10:50:25 +0800315_mm512_mask_shrdv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C)
Logan Chien55afb0a2018-10-15 10:42:14 +0800316{
Logan Chiendbcf4122019-03-21 10:50:25 +0800317 return (__m512i) __builtin_ia32_selectd_512(__U,
318 (__v16si)_mm512_shrdv_epi32(__A, __B, __C),
319 (__v16si)__A);
Logan Chien55afb0a2018-10-15 10:42:14 +0800320}
321
322static __inline__ __m512i __DEFAULT_FN_ATTRS
Logan Chiendbcf4122019-03-21 10:50:25 +0800323_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C)
Logan Chien55afb0a2018-10-15 10:42:14 +0800324{
Logan Chiendbcf4122019-03-21 10:50:25 +0800325 return (__m512i) __builtin_ia32_selectd_512(__U,
326 (__v16si)_mm512_shrdv_epi32(__A, __B, __C),
327 (__v16si)_mm512_setzero_si512());
Logan Chien55afb0a2018-10-15 10:42:14 +0800328}
329
330static __inline__ __m512i __DEFAULT_FN_ATTRS
Logan Chiendbcf4122019-03-21 10:50:25 +0800331_mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C)
Logan Chien55afb0a2018-10-15 10:42:14 +0800332{
Logan Chiendbcf4122019-03-21 10:50:25 +0800333 return (__m512i)__builtin_ia32_vpshrdvw512((__v32hi)__A, (__v32hi)__B,
334 (__v32hi)__C);
335}
336
337static __inline__ __m512i __DEFAULT_FN_ATTRS
338_mm512_mask_shrdv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C)
339{
340 return (__m512i)__builtin_ia32_selectw_512(__U,
341 (__v32hi)_mm512_shrdv_epi16(__A, __B, __C),
342 (__v32hi)__A);
343}
344
345static __inline__ __m512i __DEFAULT_FN_ATTRS
346_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C)
347{
348 return (__m512i)__builtin_ia32_selectw_512(__U,
349 (__v32hi)_mm512_shrdv_epi16(__A, __B, __C),
350 (__v32hi)_mm512_setzero_si512());
Logan Chien55afb0a2018-10-15 10:42:14 +0800351}
352
353
354#undef __DEFAULT_FN_ATTRS
355
356#endif
357