blob: 5889401d105537c9147895b42857f38d1d37abbf [file] [log] [blame]
Logan Chien2833ffb2018-10-09 10:03:24 +08001/*===------------- avx512ifmavlintrin.h - IFMA intrinsics ------------------===
2 *
3 *
Logan Chiendf4f7662019-09-04 16:45:23 -07004 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 * See https://llvm.org/LICENSE.txt for license information.
6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Logan Chien2833ffb2018-10-09 10:03:24 +08007 *
8 *===-----------------------------------------------------------------------===
9 */
10#ifndef __IMMINTRIN_H
11#error "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifndef __IFMAVLINTRIN_H
15#define __IFMAVLINTRIN_H
16
17/* Define the default attributes for the functions in this file. */
Logan Chien55afb0a2018-10-15 10:42:14 +080018#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(128)))
19#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(256)))
Logan Chien2833ffb2018-10-09 10:03:24 +080020
21
22
Logan Chien55afb0a2018-10-15 10:42:14 +080023static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +080024_mm_madd52hi_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
25{
Logan Chien55afb0a2018-10-15 10:42:14 +080026 return (__m128i)__builtin_ia32_vpmadd52huq128((__v2di) __X, (__v2di) __Y,
27 (__v2di) __Z);
Logan Chien2833ffb2018-10-09 10:03:24 +080028}
29
Logan Chien55afb0a2018-10-15 10:42:14 +080030static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +080031_mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
32{
Logan Chien55afb0a2018-10-15 10:42:14 +080033 return (__m128i)__builtin_ia32_selectq_128(__M,
34 (__v2di)_mm_madd52hi_epu64(__W, __X, __Y),
35 (__v2di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +080036}
37
Logan Chien55afb0a2018-10-15 10:42:14 +080038static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +080039_mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
40{
Logan Chien55afb0a2018-10-15 10:42:14 +080041 return (__m128i)__builtin_ia32_selectq_128(__M,
42 (__v2di)_mm_madd52hi_epu64(__X, __Y, __Z),
43 (__v2di)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +080044}
45
Logan Chien55afb0a2018-10-15 10:42:14 +080046static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +080047_mm256_madd52hi_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
48{
Logan Chien55afb0a2018-10-15 10:42:14 +080049 return (__m256i)__builtin_ia32_vpmadd52huq256((__v4di)__X, (__v4di)__Y,
50 (__v4di)__Z);
Logan Chien2833ffb2018-10-09 10:03:24 +080051}
52
Logan Chien55afb0a2018-10-15 10:42:14 +080053static __inline__ __m256i __DEFAULT_FN_ATTRS256
54_mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +080055{
Logan Chien55afb0a2018-10-15 10:42:14 +080056 return (__m256i)__builtin_ia32_selectq_256(__M,
57 (__v4di)_mm256_madd52hi_epu64(__W, __X, __Y),
58 (__v4di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +080059}
60
Logan Chien55afb0a2018-10-15 10:42:14 +080061static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +080062_mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
63{
Logan Chien55afb0a2018-10-15 10:42:14 +080064 return (__m256i)__builtin_ia32_selectq_256(__M,
65 (__v4di)_mm256_madd52hi_epu64(__X, __Y, __Z),
66 (__v4di)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +080067}
68
Logan Chien55afb0a2018-10-15 10:42:14 +080069static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +080070_mm_madd52lo_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
71{
Logan Chien55afb0a2018-10-15 10:42:14 +080072 return (__m128i)__builtin_ia32_vpmadd52luq128((__v2di)__X, (__v2di)__Y,
73 (__v2di)__Z);
Logan Chien2833ffb2018-10-09 10:03:24 +080074}
75
Logan Chien55afb0a2018-10-15 10:42:14 +080076static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +080077_mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
78{
Logan Chien55afb0a2018-10-15 10:42:14 +080079 return (__m128i)__builtin_ia32_selectq_128(__M,
80 (__v2di)_mm_madd52lo_epu64(__W, __X, __Y),
81 (__v2di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +080082}
83
Logan Chien55afb0a2018-10-15 10:42:14 +080084static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +080085_mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
86{
Logan Chien55afb0a2018-10-15 10:42:14 +080087 return (__m128i)__builtin_ia32_selectq_128(__M,
88 (__v2di)_mm_madd52lo_epu64(__X, __Y, __Z),
89 (__v2di)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +080090}
91
Logan Chien55afb0a2018-10-15 10:42:14 +080092static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +080093_mm256_madd52lo_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
94{
Logan Chien55afb0a2018-10-15 10:42:14 +080095 return (__m256i)__builtin_ia32_vpmadd52luq256((__v4di)__X, (__v4di)__Y,
96 (__v4di)__Z);
Logan Chien2833ffb2018-10-09 10:03:24 +080097}
98
Logan Chien55afb0a2018-10-15 10:42:14 +080099static __inline__ __m256i __DEFAULT_FN_ATTRS256
100_mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
Logan Chien2833ffb2018-10-09 10:03:24 +0800101{
Logan Chien55afb0a2018-10-15 10:42:14 +0800102 return (__m256i)__builtin_ia32_selectq_256(__M,
103 (__v4di)_mm256_madd52lo_epu64(__W, __X, __Y),
104 (__v4di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800105}
106
Logan Chien55afb0a2018-10-15 10:42:14 +0800107static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800108_mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
109{
Logan Chien55afb0a2018-10-15 10:42:14 +0800110 return (__m256i)__builtin_ia32_selectq_256(__M,
111 (__v4di)_mm256_madd52lo_epu64(__X, __Y, __Z),
112 (__v4di)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800113}
114
115
Logan Chien55afb0a2018-10-15 10:42:14 +0800116#undef __DEFAULT_FN_ATTRS128
117#undef __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800118
119#endif