blob: 9bff0fcb603e845f1d17009aa4de9da5f636b9cf [file] [log] [blame]
Coby Tayreef4811eb2017-12-27 08:37:47 +00001/*===----------------- gfniintrin.h - GFNI intrinsics ----------------------===
2 *
3 *
Chandler Carruth4cf57432019-04-08 20:51:30 +00004 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 * See https://llvm.org/LICENSE.txt for license information.
6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Coby Tayreef4811eb2017-12-27 08:37:47 +00007 *
8 *===-----------------------------------------------------------------------===
9 */
10#ifndef __IMMINTRIN_H
11#error "Never use <gfniintrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifndef __GFNIINTRIN_H
15#define __GFNIINTRIN_H
16
17
Craig Topperc6338672018-05-31 00:51:20 +000018#define _mm_gf2p8affineinv_epi64_epi8(A, B, I) \
Coby Tayreef4811eb2017-12-27 08:37:47 +000019 (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \
20 (__v16qi)(__m128i)(B), \
Craig Topperc6338672018-05-31 00:51:20 +000021 (char)(I))
Coby Tayreef4811eb2017-12-27 08:37:47 +000022
Craig Topperc6338672018-05-31 00:51:20 +000023#define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
Coby Tayreef4811eb2017-12-27 08:37:47 +000024 (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
25 (__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I), \
Craig Topperc6338672018-05-31 00:51:20 +000026 (__v16qi)(__m128i)(S))
Coby Tayreef4811eb2017-12-27 08:37:47 +000027
28
Craig Topperc6338672018-05-31 00:51:20 +000029#define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \
Coby Tayreef4811eb2017-12-27 08:37:47 +000030 (__m128i)_mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(), \
Craig Topperc6338672018-05-31 00:51:20 +000031 U, A, B, I)
Coby Tayreef4811eb2017-12-27 08:37:47 +000032
33
Craig Topperc6338672018-05-31 00:51:20 +000034#define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) \
Coby Tayreef4811eb2017-12-27 08:37:47 +000035 (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \
36 (__v32qi)(__m256i)(B), \
Craig Topperc6338672018-05-31 00:51:20 +000037 (char)(I))
Coby Tayreef4811eb2017-12-27 08:37:47 +000038
Craig Topperc6338672018-05-31 00:51:20 +000039#define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
Coby Tayreef4811eb2017-12-27 08:37:47 +000040 (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
41 (__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I), \
Craig Topperc6338672018-05-31 00:51:20 +000042 (__v32qi)(__m256i)(S))
Coby Tayreef4811eb2017-12-27 08:37:47 +000043
Craig Topperc6338672018-05-31 00:51:20 +000044#define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \
Coby Tayreef4811eb2017-12-27 08:37:47 +000045 (__m256i)_mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \
Martin Storsjocad7a5f2018-06-01 09:40:50 +000046 U, A, B, I)
Coby Tayreef4811eb2017-12-27 08:37:47 +000047
48
Craig Topperc6338672018-05-31 00:51:20 +000049#define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) \
Coby Tayreef4811eb2017-12-27 08:37:47 +000050 (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A), \
51 (__v64qi)(__m512i)(B), \
Craig Topperc6338672018-05-31 00:51:20 +000052 (char)(I))
Coby Tayreef4811eb2017-12-27 08:37:47 +000053
Craig Topperc6338672018-05-31 00:51:20 +000054#define _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
Coby Tayreef4811eb2017-12-27 08:37:47 +000055 (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
56 (__v64qi)_mm512_gf2p8affineinv_epi64_epi8(A, B, I), \
Craig Topperc6338672018-05-31 00:51:20 +000057 (__v64qi)(__m512i)(S))
Coby Tayreef4811eb2017-12-27 08:37:47 +000058
Craig Topperc6338672018-05-31 00:51:20 +000059#define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \
Craig Topperdff5b312018-05-30 18:02:11 +000060 (__m512i)_mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_si512(), \
Craig Topperc6338672018-05-31 00:51:20 +000061 U, A, B, I)
Coby Tayreef4811eb2017-12-27 08:37:47 +000062
Craig Topperc6338672018-05-31 00:51:20 +000063#define _mm_gf2p8affine_epi64_epi8(A, B, I) \
Coby Tayreef4811eb2017-12-27 08:37:47 +000064 (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \
65 (__v16qi)(__m128i)(B), \
Craig Topperc6338672018-05-31 00:51:20 +000066 (char)(I))
Coby Tayreef4811eb2017-12-27 08:37:47 +000067
Craig Topperc6338672018-05-31 00:51:20 +000068#define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \
Coby Tayreef4811eb2017-12-27 08:37:47 +000069 (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
70 (__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I), \
Craig Topperc6338672018-05-31 00:51:20 +000071 (__v16qi)(__m128i)(S))
Coby Tayreef4811eb2017-12-27 08:37:47 +000072
73
Craig Topperc6338672018-05-31 00:51:20 +000074#define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \
Coby Tayreef4811eb2017-12-27 08:37:47 +000075 (__m128i)_mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(), \
Craig Topperc6338672018-05-31 00:51:20 +000076 U, A, B, I)
Coby Tayreef4811eb2017-12-27 08:37:47 +000077
78
Craig Topperc6338672018-05-31 00:51:20 +000079#define _mm256_gf2p8affine_epi64_epi8(A, B, I) \
Coby Tayreef4811eb2017-12-27 08:37:47 +000080 (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \
81 (__v32qi)(__m256i)(B), \
Craig Topperc6338672018-05-31 00:51:20 +000082 (char)(I))
Coby Tayreef4811eb2017-12-27 08:37:47 +000083
Craig Topperc6338672018-05-31 00:51:20 +000084#define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \
Coby Tayreef4811eb2017-12-27 08:37:47 +000085 (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
86 (__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I), \
Craig Topperc6338672018-05-31 00:51:20 +000087 (__v32qi)(__m256i)(S))
Coby Tayreef4811eb2017-12-27 08:37:47 +000088
Craig Topperc6338672018-05-31 00:51:20 +000089#define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \
Coby Tayreef4811eb2017-12-27 08:37:47 +000090 (__m256i)_mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(), \
Craig Topperc6338672018-05-31 00:51:20 +000091 U, A, B, I)
Coby Tayreef4811eb2017-12-27 08:37:47 +000092
93
Craig Topperc6338672018-05-31 00:51:20 +000094#define _mm512_gf2p8affine_epi64_epi8(A, B, I) \
Coby Tayreef4811eb2017-12-27 08:37:47 +000095 (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \
96 (__v64qi)(__m512i)(B), \
Craig Topperc6338672018-05-31 00:51:20 +000097 (char)(I))
Coby Tayreef4811eb2017-12-27 08:37:47 +000098
Craig Topperc6338672018-05-31 00:51:20 +000099#define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \
Coby Tayreef4811eb2017-12-27 08:37:47 +0000100 (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
101 (__v64qi)_mm512_gf2p8affine_epi64_epi8(A, B, I), \
Craig Topperc6338672018-05-31 00:51:20 +0000102 (__v64qi)(__m512i)(S))
Coby Tayreef4811eb2017-12-27 08:37:47 +0000103
Craig Topperc6338672018-05-31 00:51:20 +0000104#define _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \
Craig Topperdff5b312018-05-30 18:02:11 +0000105 (__m512i)_mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_si512(), \
Craig Topperc6338672018-05-31 00:51:20 +0000106 U, A, B, I)
Coby Tayreef4811eb2017-12-27 08:37:47 +0000107
108/* Default attributes for simple form (no masking). */
Craig Topper74c10e32018-07-09 19:00:16 +0000109#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni"), __min_vector_width__(128)))
Coby Tayreef4811eb2017-12-27 08:37:47 +0000110
Craig Topper66ef4182018-05-07 21:47:11 +0000111/* Default attributes for YMM unmasked form. */
Craig Topper74c10e32018-07-09 19:00:16 +0000112#define __DEFAULT_FN_ATTRS_Y __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"), __min_vector_width__(256)))
Craig Topper66ef4182018-05-07 21:47:11 +0000113
Coby Tayreef4811eb2017-12-27 08:37:47 +0000114/* Default attributes for ZMM forms. */
Craig Topper74c10e32018-07-09 19:00:16 +0000115#define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"), __min_vector_width__(512)))
Coby Tayreef4811eb2017-12-27 08:37:47 +0000116
117/* Default attributes for VLX forms. */
Craig Topper74c10e32018-07-09 19:00:16 +0000118#define __DEFAULT_FN_ATTRS_VL128 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(128)))
119#define __DEFAULT_FN_ATTRS_VL256 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(256)))
Coby Tayreef4811eb2017-12-27 08:37:47 +0000120
121static __inline__ __m128i __DEFAULT_FN_ATTRS
122_mm_gf2p8mul_epi8(__m128i __A, __m128i __B)
123{
124 return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A,
125 (__v16qi) __B);
126}
127
Craig Topper74c10e32018-07-09 19:00:16 +0000128static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128
Coby Tayreef4811eb2017-12-27 08:37:47 +0000129_mm_mask_gf2p8mul_epi8(__m128i __S, __mmask16 __U, __m128i __A, __m128i __B)
130{
131 return (__m128i) __builtin_ia32_selectb_128(__U,
132 (__v16qi) _mm_gf2p8mul_epi8(__A, __B),
133 (__v16qi) __S);
134}
135
Craig Topper74c10e32018-07-09 19:00:16 +0000136static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128
Coby Tayreef4811eb2017-12-27 08:37:47 +0000137_mm_maskz_gf2p8mul_epi8(__mmask16 __U, __m128i __A, __m128i __B)
138{
139 return _mm_mask_gf2p8mul_epi8((__m128i)_mm_setzero_si128(),
140 __U, __A, __B);
141}
142
Craig Topper66ef4182018-05-07 21:47:11 +0000143static __inline__ __m256i __DEFAULT_FN_ATTRS_Y
Coby Tayreef4811eb2017-12-27 08:37:47 +0000144_mm256_gf2p8mul_epi8(__m256i __A, __m256i __B)
145{
146 return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi((__v32qi) __A,
147 (__v32qi) __B);
148}
149
Craig Topper74c10e32018-07-09 19:00:16 +0000150static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256
Coby Tayreef4811eb2017-12-27 08:37:47 +0000151_mm256_mask_gf2p8mul_epi8(__m256i __S, __mmask32 __U, __m256i __A, __m256i __B)
152{
153 return (__m256i) __builtin_ia32_selectb_256(__U,
154 (__v32qi) _mm256_gf2p8mul_epi8(__A, __B),
155 (__v32qi) __S);
156}
157
Craig Topper74c10e32018-07-09 19:00:16 +0000158static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256
Coby Tayreef4811eb2017-12-27 08:37:47 +0000159_mm256_maskz_gf2p8mul_epi8(__mmask32 __U, __m256i __A, __m256i __B)
160{
161 return _mm256_mask_gf2p8mul_epi8((__m256i)_mm256_setzero_si256(),
162 __U, __A, __B);
163}
164
Craig Topper66ef4182018-05-07 21:47:11 +0000165static __inline__ __m512i __DEFAULT_FN_ATTRS_Z
Coby Tayreef4811eb2017-12-27 08:37:47 +0000166_mm512_gf2p8mul_epi8(__m512i __A, __m512i __B)
167{
168 return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi((__v64qi) __A,
169 (__v64qi) __B);
170}
171
Craig Topper66ef4182018-05-07 21:47:11 +0000172static __inline__ __m512i __DEFAULT_FN_ATTRS_Z
Coby Tayreef4811eb2017-12-27 08:37:47 +0000173_mm512_mask_gf2p8mul_epi8(__m512i __S, __mmask64 __U, __m512i __A, __m512i __B)
174{
175 return (__m512i) __builtin_ia32_selectb_512(__U,
176 (__v64qi) _mm512_gf2p8mul_epi8(__A, __B),
177 (__v64qi) __S);
178}
179
Craig Topper66ef4182018-05-07 21:47:11 +0000180static __inline__ __m512i __DEFAULT_FN_ATTRS_Z
Coby Tayreef4811eb2017-12-27 08:37:47 +0000181_mm512_maskz_gf2p8mul_epi8(__mmask64 __U, __m512i __A, __m512i __B)
182{
Craig Topperdff5b312018-05-30 18:02:11 +0000183 return _mm512_mask_gf2p8mul_epi8((__m512i)_mm512_setzero_si512(),
Coby Tayreef4811eb2017-12-27 08:37:47 +0000184 __U, __A, __B);
185}
186
187#undef __DEFAULT_FN_ATTRS
Craig Topper66ef4182018-05-07 21:47:11 +0000188#undef __DEFAULT_FN_ATTRS_Y
189#undef __DEFAULT_FN_ATTRS_Z
Craig Topper74c10e32018-07-09 19:00:16 +0000190#undef __DEFAULT_FN_ATTRS_VL128
191#undef __DEFAULT_FN_ATTRS_VL256
Coby Tayreef4811eb2017-12-27 08:37:47 +0000192
Craig Topper73d1d402018-05-30 22:33:21 +0000193#endif /* __GFNIINTRIN_H */
Coby Tayreef4811eb2017-12-27 08:37:47 +0000194