blob: acc3da233f19463ee41d50b47ef27056ead2a904 [file] [log] [blame]
Stephen Hinesee4ca282014-12-02 17:05:12 -08001/*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------===
2 *
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 * THE SOFTWARE.
21 *
22 *===-----------------------------------------------------------------------===
23 */
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -070024#ifndef __IMMINTRIN_H
25#error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead."
26#endif
Stephen Hinesee4ca282014-12-02 17:05:12 -080027
28#ifndef __AVX512BWINTRIN_H
29#define __AVX512BWINTRIN_H
30
31typedef unsigned int __mmask32;
32typedef unsigned long long __mmask64;
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -070033typedef char __v64qi __attribute__ ((__vector_size__ (64)));
Stephen Hinesee4ca282014-12-02 17:05:12 -080034typedef short __v32hi __attribute__ ((__vector_size__ (64)));
35
36
37/* Integer compare */
38
39static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
40_mm512_cmpeq_epi8_mask(__m512i __a, __m512i __b) {
41 return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b,
42 (__mmask64)-1);
43}
44
45static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
46_mm512_mask_cmpeq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
47 return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b,
48 __u);
49}
50
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -070051static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
52_mm512_cmpeq_epu8_mask(__m512i __a, __m512i __b) {
53 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0,
54 (__mmask64)-1);
55}
56
57static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
58_mm512_mask_cmpeq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
59 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0,
60 __u);
61}
62
Stephen Hinesee4ca282014-12-02 17:05:12 -080063static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
64_mm512_cmpeq_epi16_mask(__m512i __a, __m512i __b) {
65 return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b,
66 (__mmask32)-1);
67}
68
69static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
70_mm512_mask_cmpeq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
71 return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b,
72 __u);
73}
74
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -070075static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
76_mm512_cmpeq_epu16_mask(__m512i __a, __m512i __b) {
77 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0,
78 (__mmask32)-1);
79}
80
81static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
82_mm512_mask_cmpeq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
83 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0,
84 __u);
85}
86
87static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
88_mm512_cmpge_epi8_mask(__m512i __a, __m512i __b) {
89 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
90 (__mmask64)-1);
91}
92
93static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
94_mm512_mask_cmpge_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
95 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
96 __u);
97}
98
99static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
100_mm512_cmpge_epu8_mask(__m512i __a, __m512i __b) {
101 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
102 (__mmask64)-1);
103}
104
105static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
106_mm512_mask_cmpge_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
107 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
108 __u);
109}
110
111static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
112_mm512_cmpge_epi16_mask(__m512i __a, __m512i __b) {
113 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
114 (__mmask32)-1);
115}
116
117static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
118_mm512_mask_cmpge_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
119 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
120 __u);
121}
122
123static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
124_mm512_cmpge_epu16_mask(__m512i __a, __m512i __b) {
125 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
126 (__mmask32)-1);
127}
128
129static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
130_mm512_mask_cmpge_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
131 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
132 __u);
133}
134
135static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
136_mm512_cmpgt_epi8_mask(__m512i __a, __m512i __b) {
137 return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b,
138 (__mmask64)-1);
139}
140
141static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
142_mm512_mask_cmpgt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
143 return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b,
144 __u);
145}
146
147static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
148_mm512_cmpgt_epu8_mask(__m512i __a, __m512i __b) {
149 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6,
150 (__mmask64)-1);
151}
152
153static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
154_mm512_mask_cmpgt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
155 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6,
156 __u);
157}
158
159static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
160_mm512_cmpgt_epi16_mask(__m512i __a, __m512i __b) {
161 return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b,
162 (__mmask32)-1);
163}
164
165static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
166_mm512_mask_cmpgt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
167 return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b,
168 __u);
169}
170
171static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
172_mm512_cmpgt_epu16_mask(__m512i __a, __m512i __b) {
173 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6,
174 (__mmask32)-1);
175}
176
177static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
178_mm512_mask_cmpgt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
179 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6,
180 __u);
181}
182
183static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
184_mm512_cmple_epi8_mask(__m512i __a, __m512i __b) {
185 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
186 (__mmask64)-1);
187}
188
189static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
190_mm512_mask_cmple_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
191 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
192 __u);
193}
194
195static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
196_mm512_cmple_epu8_mask(__m512i __a, __m512i __b) {
197 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
198 (__mmask64)-1);
199}
200
201static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
202_mm512_mask_cmple_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
203 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
204 __u);
205}
206
207static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
208_mm512_cmple_epi16_mask(__m512i __a, __m512i __b) {
209 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
210 (__mmask32)-1);
211}
212
213static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
214_mm512_mask_cmple_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
215 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
216 __u);
217}
218
219static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
220_mm512_cmple_epu16_mask(__m512i __a, __m512i __b) {
221 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
222 (__mmask32)-1);
223}
224
225static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
226_mm512_mask_cmple_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
227 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
228 __u);
229}
230
231static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
232_mm512_cmplt_epi8_mask(__m512i __a, __m512i __b) {
233 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
234 (__mmask64)-1);
235}
236
237static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
238_mm512_mask_cmplt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
239 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
240 __u);
241}
242
243static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
244_mm512_cmplt_epu8_mask(__m512i __a, __m512i __b) {
245 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
246 (__mmask64)-1);
247}
248
249static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
250_mm512_mask_cmplt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
251 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
252 __u);
253}
254
255static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
256_mm512_cmplt_epi16_mask(__m512i __a, __m512i __b) {
257 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
258 (__mmask32)-1);
259}
260
261static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
262_mm512_mask_cmplt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
263 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
264 __u);
265}
266
267static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
268_mm512_cmplt_epu16_mask(__m512i __a, __m512i __b) {
269 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
270 (__mmask32)-1);
271}
272
273static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
274_mm512_mask_cmplt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
275 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
276 __u);
277}
278
279static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
280_mm512_cmpneq_epi8_mask(__m512i __a, __m512i __b) {
281 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
282 (__mmask64)-1);
283}
284
285static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
286_mm512_mask_cmpneq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
287 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
288 __u);
289}
290
291static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
292_mm512_cmpneq_epu8_mask(__m512i __a, __m512i __b) {
293 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
294 (__mmask64)-1);
295}
296
297static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
298_mm512_mask_cmpneq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
299 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
300 __u);
301}
302
303static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
304_mm512_cmpneq_epi16_mask(__m512i __a, __m512i __b) {
305 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
306 (__mmask32)-1);
307}
308
309static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
310_mm512_mask_cmpneq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
311 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
312 __u);
313}
314
315static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
316_mm512_cmpneq_epu16_mask(__m512i __a, __m512i __b) {
317 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
318 (__mmask32)-1);
319}
320
321static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
322_mm512_mask_cmpneq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
323 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
324 __u);
325}
326
327#define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \
328 (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
329 (__v64qi)(__m512i)(b), \
330 (p), (__mmask64)-1); })
331
332#define _mm512_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \
333 (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
334 (__v64qi)(__m512i)(b), \
335 (p), (__mmask64)(m)); })
336
337#define _mm512_cmp_epu8_mask(a, b, p) __extension__ ({ \
338 (__mmask16)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
339 (__v64qi)(__m512i)(b), \
340 (p), (__mmask64)-1); })
341
342#define _mm512_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \
343 (__mmask16)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
344 (__v64qi)(__m512i)(b), \
345 (p), (__mmask64)(m)); })
346
347#define _mm512_cmp_epi16_mask(a, b, p) __extension__ ({ \
348 (__mmask16)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
349 (__v32hi)(__m512i)(b), \
350 (p), (__mmask32)-1); })
351
352#define _mm512_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \
353 (__mmask16)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
354 (__v32hi)(__m512i)(b), \
355 (p), (__mmask32)(m)); })
356
357#define _mm512_cmp_epu16_mask(a, b, p) __extension__ ({ \
358 (__mmask16)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
359 (__v32hi)(__m512i)(b), \
360 (p), (__mmask32)-1); })
361
362#define _mm512_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \
363 (__mmask16)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
364 (__v32hi)(__m512i)(b), \
365 (p), (__mmask32)(m)); })
366
Stephen Hinesee4ca282014-12-02 17:05:12 -0800367#endif