blob: d0591e406f7b5b7ad18a55e0d6b3f0734d49018c [file] [log] [blame]
Stephen Hinesee4ca282014-12-02 17:05:12 -08001/*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------===
2 *
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 * THE SOFTWARE.
21 *
22 *===-----------------------------------------------------------------------===
23 */
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -070024#ifndef __IMMINTRIN_H
25#error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead."
26#endif
Stephen Hinesee4ca282014-12-02 17:05:12 -080027
28#ifndef __AVX512BWINTRIN_H
29#define __AVX512BWINTRIN_H
30
31typedef unsigned int __mmask32;
32typedef unsigned long long __mmask64;
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -070033typedef char __v64qi __attribute__ ((__vector_size__ (64)));
Stephen Hinesee4ca282014-12-02 17:05:12 -080034typedef short __v32hi __attribute__ ((__vector_size__ (64)));
35
Stephen Hines28c5e1e2015-08-13 18:18:46 -070036static __inline __v64qi __attribute__ ((__always_inline__, __nodebug__))
37_mm512_setzero_qi (void) {
38 return (__v64qi){ 0, 0, 0, 0, 0, 0, 0, 0,
39 0, 0, 0, 0, 0, 0, 0, 0,
40 0, 0, 0, 0, 0, 0, 0, 0,
41 0, 0, 0, 0, 0, 0, 0, 0,
42 0, 0, 0, 0, 0, 0, 0, 0,
43 0, 0, 0, 0, 0, 0, 0, 0,
44 0, 0, 0, 0, 0, 0, 0, 0,
45 0, 0, 0, 0, 0, 0, 0, 0 };
46}
47
48static __inline __v32hi __attribute__ ((__always_inline__, __nodebug__))
49_mm512_setzero_hi (void) {
50 return (__v32hi){ 0, 0, 0, 0, 0, 0, 0, 0,
51 0, 0, 0, 0, 0, 0, 0, 0,
52 0, 0, 0, 0, 0, 0, 0, 0,
53 0, 0, 0, 0, 0, 0, 0, 0 };
54}
Stephen Hinesee4ca282014-12-02 17:05:12 -080055
56/* Integer compare */
57
58static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
59_mm512_cmpeq_epi8_mask(__m512i __a, __m512i __b) {
60 return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b,
61 (__mmask64)-1);
62}
63
64static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
65_mm512_mask_cmpeq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
66 return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b,
67 __u);
68}
69
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -070070static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
71_mm512_cmpeq_epu8_mask(__m512i __a, __m512i __b) {
72 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0,
73 (__mmask64)-1);
74}
75
76static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
77_mm512_mask_cmpeq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
78 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0,
79 __u);
80}
81
Stephen Hinesee4ca282014-12-02 17:05:12 -080082static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
83_mm512_cmpeq_epi16_mask(__m512i __a, __m512i __b) {
84 return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b,
85 (__mmask32)-1);
86}
87
88static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
89_mm512_mask_cmpeq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
90 return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b,
91 __u);
92}
93
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -070094static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
95_mm512_cmpeq_epu16_mask(__m512i __a, __m512i __b) {
96 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0,
97 (__mmask32)-1);
98}
99
100static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
101_mm512_mask_cmpeq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
102 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0,
103 __u);
104}
105
106static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
107_mm512_cmpge_epi8_mask(__m512i __a, __m512i __b) {
108 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
109 (__mmask64)-1);
110}
111
112static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
113_mm512_mask_cmpge_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
114 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
115 __u);
116}
117
118static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
119_mm512_cmpge_epu8_mask(__m512i __a, __m512i __b) {
120 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
121 (__mmask64)-1);
122}
123
124static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
125_mm512_mask_cmpge_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
126 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
127 __u);
128}
129
130static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
131_mm512_cmpge_epi16_mask(__m512i __a, __m512i __b) {
132 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
133 (__mmask32)-1);
134}
135
136static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
137_mm512_mask_cmpge_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
138 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
139 __u);
140}
141
142static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
143_mm512_cmpge_epu16_mask(__m512i __a, __m512i __b) {
144 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
145 (__mmask32)-1);
146}
147
148static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
149_mm512_mask_cmpge_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
150 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
151 __u);
152}
153
154static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
155_mm512_cmpgt_epi8_mask(__m512i __a, __m512i __b) {
156 return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b,
157 (__mmask64)-1);
158}
159
160static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
161_mm512_mask_cmpgt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
162 return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b,
163 __u);
164}
165
166static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
167_mm512_cmpgt_epu8_mask(__m512i __a, __m512i __b) {
168 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6,
169 (__mmask64)-1);
170}
171
172static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
173_mm512_mask_cmpgt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
174 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6,
175 __u);
176}
177
178static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
179_mm512_cmpgt_epi16_mask(__m512i __a, __m512i __b) {
180 return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b,
181 (__mmask32)-1);
182}
183
184static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
185_mm512_mask_cmpgt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
186 return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b,
187 __u);
188}
189
190static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
191_mm512_cmpgt_epu16_mask(__m512i __a, __m512i __b) {
192 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6,
193 (__mmask32)-1);
194}
195
196static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
197_mm512_mask_cmpgt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
198 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6,
199 __u);
200}
201
202static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
203_mm512_cmple_epi8_mask(__m512i __a, __m512i __b) {
204 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
205 (__mmask64)-1);
206}
207
208static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
209_mm512_mask_cmple_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
210 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
211 __u);
212}
213
214static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
215_mm512_cmple_epu8_mask(__m512i __a, __m512i __b) {
216 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
217 (__mmask64)-1);
218}
219
220static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
221_mm512_mask_cmple_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
222 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
223 __u);
224}
225
226static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
227_mm512_cmple_epi16_mask(__m512i __a, __m512i __b) {
228 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
229 (__mmask32)-1);
230}
231
232static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
233_mm512_mask_cmple_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
234 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
235 __u);
236}
237
238static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
239_mm512_cmple_epu16_mask(__m512i __a, __m512i __b) {
240 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
241 (__mmask32)-1);
242}
243
244static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
245_mm512_mask_cmple_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
246 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
247 __u);
248}
249
250static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
251_mm512_cmplt_epi8_mask(__m512i __a, __m512i __b) {
252 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
253 (__mmask64)-1);
254}
255
256static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
257_mm512_mask_cmplt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
258 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
259 __u);
260}
261
262static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
263_mm512_cmplt_epu8_mask(__m512i __a, __m512i __b) {
264 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
265 (__mmask64)-1);
266}
267
268static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
269_mm512_mask_cmplt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
270 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
271 __u);
272}
273
274static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
275_mm512_cmplt_epi16_mask(__m512i __a, __m512i __b) {
276 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
277 (__mmask32)-1);
278}
279
280static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
281_mm512_mask_cmplt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
282 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
283 __u);
284}
285
286static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
287_mm512_cmplt_epu16_mask(__m512i __a, __m512i __b) {
288 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
289 (__mmask32)-1);
290}
291
292static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
293_mm512_mask_cmplt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
294 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
295 __u);
296}
297
298static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
299_mm512_cmpneq_epi8_mask(__m512i __a, __m512i __b) {
300 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
301 (__mmask64)-1);
302}
303
304static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
305_mm512_mask_cmpneq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
306 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
307 __u);
308}
309
310static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
311_mm512_cmpneq_epu8_mask(__m512i __a, __m512i __b) {
312 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
313 (__mmask64)-1);
314}
315
316static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
317_mm512_mask_cmpneq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
318 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
319 __u);
320}
321
322static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
323_mm512_cmpneq_epi16_mask(__m512i __a, __m512i __b) {
324 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
325 (__mmask32)-1);
326}
327
328static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
329_mm512_mask_cmpneq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
330 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
331 __u);
332}
333
334static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
335_mm512_cmpneq_epu16_mask(__m512i __a, __m512i __b) {
336 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
337 (__mmask32)-1);
338}
339
340static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
341_mm512_mask_cmpneq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
342 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
343 __u);
344}
345
Stephen Hines28c5e1e2015-08-13 18:18:46 -0700346static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
347_mm512_add_epi8 (__m512i __A, __m512i __B) {
348 return (__m512i) ((__v64qi) __A + (__v64qi) __B);
349}
350
351static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
352_mm512_mask_add_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
353 return (__m512i) __builtin_ia32_paddb512_mask ((__v64qi) __A,
354 (__v64qi) __B,
355 (__v64qi) __W,
356 (__mmask64) __U);
357}
358
359static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
360_mm512_maskz_add_epi8 (__mmask64 __U, __m512i __A, __m512i __B) {
361 return (__m512i) __builtin_ia32_paddb512_mask ((__v64qi) __A,
362 (__v64qi) __B,
363 (__v64qi)
364 _mm512_setzero_qi (),
365 (__mmask64) __U);
366}
367
368static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
369_mm512_sub_epi8 (__m512i __A, __m512i __B) {
370 return (__m512i) ((__v64qi) __A - (__v64qi) __B);
371}
372
373static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
374_mm512_mask_sub_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
375 return (__m512i) __builtin_ia32_psubb512_mask ((__v64qi) __A,
376 (__v64qi) __B,
377 (__v64qi) __W,
378 (__mmask64) __U);
379}
380
381static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
382_mm512_maskz_sub_epi8 (__mmask64 __U, __m512i __A, __m512i __B) {
383 return (__m512i) __builtin_ia32_psubb512_mask ((__v64qi) __A,
384 (__v64qi) __B,
385 (__v64qi)
386 _mm512_setzero_qi (),
387 (__mmask64) __U);
388}
389
390static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
391_mm512_add_epi16 (__m512i __A, __m512i __B) {
392 return (__m512i) ((__v32hi) __A + (__v32hi) __B);
393}
394
395static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
396_mm512_mask_add_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
397 return (__m512i) __builtin_ia32_paddw512_mask ((__v32hi) __A,
398 (__v32hi) __B,
399 (__v32hi) __W,
400 (__mmask32) __U);
401}
402
403static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
404_mm512_maskz_add_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
405 return (__m512i) __builtin_ia32_paddw512_mask ((__v32hi) __A,
406 (__v32hi) __B,
407 (__v32hi)
408 _mm512_setzero_hi (),
409 (__mmask32) __U);
410}
411
412static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
413_mm512_sub_epi16 (__m512i __A, __m512i __B) {
414 return (__m512i) ((__v32hi) __A - (__v32hi) __B);
415}
416
417static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
418_mm512_mask_sub_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
419 return (__m512i) __builtin_ia32_psubw512_mask ((__v32hi) __A,
420 (__v32hi) __B,
421 (__v32hi) __W,
422 (__mmask32) __U);
423}
424
425static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
426_mm512_maskz_sub_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
427 return (__m512i) __builtin_ia32_psubw512_mask ((__v32hi) __A,
428 (__v32hi) __B,
429 (__v32hi)
430 _mm512_setzero_hi (),
431 (__mmask32) __U);
432}
433
434static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
435_mm512_mullo_epi16 (__m512i __A, __m512i __B) {
436 return (__m512i) ((__v32hi) __A * (__v32hi) __B);
437}
438
439static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
440_mm512_mask_mullo_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
441 return (__m512i) __builtin_ia32_pmullw512_mask ((__v32hi) __A,
442 (__v32hi) __B,
443 (__v32hi) __W,
444 (__mmask32) __U);
445}
446
447static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
448_mm512_maskz_mullo_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
449 return (__m512i) __builtin_ia32_pmullw512_mask ((__v32hi) __A,
450 (__v32hi) __B,
451 (__v32hi)
452 _mm512_setzero_hi (),
453 (__mmask32) __U);
454}
455
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -0700456#define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \
457 (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
458 (__v64qi)(__m512i)(b), \
459 (p), (__mmask64)-1); })
460
461#define _mm512_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \
462 (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
463 (__v64qi)(__m512i)(b), \
464 (p), (__mmask64)(m)); })
465
466#define _mm512_cmp_epu8_mask(a, b, p) __extension__ ({ \
467 (__mmask16)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
468 (__v64qi)(__m512i)(b), \
469 (p), (__mmask64)-1); })
470
471#define _mm512_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \
472 (__mmask16)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
473 (__v64qi)(__m512i)(b), \
474 (p), (__mmask64)(m)); })
475
476#define _mm512_cmp_epi16_mask(a, b, p) __extension__ ({ \
477 (__mmask16)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
478 (__v32hi)(__m512i)(b), \
479 (p), (__mmask32)-1); })
480
481#define _mm512_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \
482 (__mmask16)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
483 (__v32hi)(__m512i)(b), \
484 (p), (__mmask32)(m)); })
485
486#define _mm512_cmp_epu16_mask(a, b, p) __extension__ ({ \
487 (__mmask16)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
488 (__v32hi)(__m512i)(b), \
489 (p), (__mmask32)-1); })
490
491#define _mm512_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \
492 (__mmask16)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
493 (__v32hi)(__m512i)(b), \
494 (p), (__mmask32)(m)); })
495
Stephen Hinesee4ca282014-12-02 17:05:12 -0800496#endif