blob: cc8b72528d012699700aa02652ac51223a487c48 [file] [log] [blame]
Logan Chien55afb0a2018-10-15 10:42:14 +08001/*===---- avx512vlcdintrin.h - AVX512VL and AVX512CD intrinsics ------------===
Logan Chien2833ffb2018-10-09 10:03:24 +08002 *
Logan Chiendf4f7662019-09-04 16:45:23 -07003 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Logan Chien2833ffb2018-10-09 10:03:24 +08006 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __IMMINTRIN_H
10#error "Never use <avx512vlcdintrin.h> directly; include <immintrin.h> instead."
11#endif
12
13#ifndef __AVX512VLCDINTRIN_H
14#define __AVX512VLCDINTRIN_H
15
16/* Define the default attributes for the functions in this file. */
Logan Chien55afb0a2018-10-15 10:42:14 +080017#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(128)))
18#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(256)))
Logan Chien2833ffb2018-10-09 10:03:24 +080019
20
Logan Chien55afb0a2018-10-15 10:42:14 +080021static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +080022_mm_broadcastmb_epi64 (__mmask8 __A)
23{
Logan Chien55afb0a2018-10-15 10:42:14 +080024 return (__m128i) _mm_set1_epi64x((long long) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +080025}
26
Logan Chien55afb0a2018-10-15 10:42:14 +080027static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +080028_mm256_broadcastmb_epi64 (__mmask8 __A)
29{
Logan Chien55afb0a2018-10-15 10:42:14 +080030 return (__m256i) _mm256_set1_epi64x((long long)__A);
Logan Chien2833ffb2018-10-09 10:03:24 +080031}
32
Logan Chien55afb0a2018-10-15 10:42:14 +080033static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +080034_mm_broadcastmw_epi32 (__mmask16 __A)
35{
Logan Chien55afb0a2018-10-15 10:42:14 +080036 return (__m128i) _mm_set1_epi32((int)__A);
Logan Chien2833ffb2018-10-09 10:03:24 +080037}
38
Logan Chien55afb0a2018-10-15 10:42:14 +080039static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +080040_mm256_broadcastmw_epi32 (__mmask16 __A)
41{
Logan Chien55afb0a2018-10-15 10:42:14 +080042 return (__m256i) _mm256_set1_epi32((int)__A);
Logan Chien2833ffb2018-10-09 10:03:24 +080043}
44
45
Logan Chien55afb0a2018-10-15 10:42:14 +080046static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +080047_mm_conflict_epi64 (__m128i __A)
48{
Logan Chiendbcf4122019-03-21 10:50:25 +080049 return (__m128i) __builtin_ia32_vpconflictdi_128 ((__v2di) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +080050}
51
Logan Chien55afb0a2018-10-15 10:42:14 +080052static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +080053_mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
54{
Logan Chiendbcf4122019-03-21 10:50:25 +080055 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
56 (__v2di)_mm_conflict_epi64(__A),
57 (__v2di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +080058}
59
Logan Chien55afb0a2018-10-15 10:42:14 +080060static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +080061_mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
62{
Logan Chiendbcf4122019-03-21 10:50:25 +080063 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
64 (__v2di)_mm_conflict_epi64(__A),
65 (__v2di)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +080066}
67
Logan Chien55afb0a2018-10-15 10:42:14 +080068static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +080069_mm256_conflict_epi64 (__m256i __A)
70{
Logan Chiendbcf4122019-03-21 10:50:25 +080071 return (__m256i) __builtin_ia32_vpconflictdi_256 ((__v4di) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +080072}
73
Logan Chien55afb0a2018-10-15 10:42:14 +080074static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +080075_mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
76{
Logan Chiendbcf4122019-03-21 10:50:25 +080077 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
78 (__v4di)_mm256_conflict_epi64(__A),
79 (__v4di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +080080}
81
Logan Chien55afb0a2018-10-15 10:42:14 +080082static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +080083_mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
84{
Logan Chiendbcf4122019-03-21 10:50:25 +080085 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
86 (__v4di)_mm256_conflict_epi64(__A),
87 (__v4di)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +080088}
89
Logan Chien55afb0a2018-10-15 10:42:14 +080090static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +080091_mm_conflict_epi32 (__m128i __A)
92{
Logan Chiendbcf4122019-03-21 10:50:25 +080093 return (__m128i) __builtin_ia32_vpconflictsi_128 ((__v4si) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +080094}
95
Logan Chien55afb0a2018-10-15 10:42:14 +080096static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +080097_mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
98{
Logan Chiendbcf4122019-03-21 10:50:25 +080099 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
100 (__v4si)_mm_conflict_epi32(__A),
101 (__v4si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800102}
103
Logan Chien55afb0a2018-10-15 10:42:14 +0800104static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800105_mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
106{
Logan Chiendbcf4122019-03-21 10:50:25 +0800107 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
108 (__v4si)_mm_conflict_epi32(__A),
109 (__v4si)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800110}
111
Logan Chien55afb0a2018-10-15 10:42:14 +0800112static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800113_mm256_conflict_epi32 (__m256i __A)
114{
Logan Chiendbcf4122019-03-21 10:50:25 +0800115 return (__m256i) __builtin_ia32_vpconflictsi_256 ((__v8si) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +0800116}
117
Logan Chien55afb0a2018-10-15 10:42:14 +0800118static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800119_mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
120{
Logan Chiendbcf4122019-03-21 10:50:25 +0800121 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
122 (__v8si)_mm256_conflict_epi32(__A),
123 (__v8si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800124}
125
Logan Chien55afb0a2018-10-15 10:42:14 +0800126static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800127_mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
128{
Logan Chiendbcf4122019-03-21 10:50:25 +0800129 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
130 (__v8si)_mm256_conflict_epi32(__A),
131 (__v8si)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800132}
133
Logan Chien55afb0a2018-10-15 10:42:14 +0800134static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800135_mm_lzcnt_epi32 (__m128i __A)
136{
Logan Chien55afb0a2018-10-15 10:42:14 +0800137 return (__m128i) __builtin_ia32_vplzcntd_128 ((__v4si) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +0800138}
139
Logan Chien55afb0a2018-10-15 10:42:14 +0800140static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800141_mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
142{
Logan Chien55afb0a2018-10-15 10:42:14 +0800143 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
144 (__v4si)_mm_lzcnt_epi32(__A),
145 (__v4si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800146}
147
Logan Chien55afb0a2018-10-15 10:42:14 +0800148static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800149_mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
150{
Logan Chien55afb0a2018-10-15 10:42:14 +0800151 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
152 (__v4si)_mm_lzcnt_epi32(__A),
153 (__v4si)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800154}
155
Logan Chien55afb0a2018-10-15 10:42:14 +0800156static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800157_mm256_lzcnt_epi32 (__m256i __A)
158{
Logan Chien55afb0a2018-10-15 10:42:14 +0800159 return (__m256i) __builtin_ia32_vplzcntd_256 ((__v8si) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +0800160}
161
Logan Chien55afb0a2018-10-15 10:42:14 +0800162static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800163_mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
164{
Logan Chien55afb0a2018-10-15 10:42:14 +0800165 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
166 (__v8si)_mm256_lzcnt_epi32(__A),
167 (__v8si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800168}
169
Logan Chien55afb0a2018-10-15 10:42:14 +0800170static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800171_mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
172{
Logan Chien55afb0a2018-10-15 10:42:14 +0800173 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
174 (__v8si)_mm256_lzcnt_epi32(__A),
175 (__v8si)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800176}
177
Logan Chien55afb0a2018-10-15 10:42:14 +0800178static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800179_mm_lzcnt_epi64 (__m128i __A)
180{
Logan Chien55afb0a2018-10-15 10:42:14 +0800181 return (__m128i) __builtin_ia32_vplzcntq_128 ((__v2di) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +0800182}
183
Logan Chien55afb0a2018-10-15 10:42:14 +0800184static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800185_mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
186{
Logan Chien55afb0a2018-10-15 10:42:14 +0800187 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
188 (__v2di)_mm_lzcnt_epi64(__A),
189 (__v2di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800190}
191
Logan Chien55afb0a2018-10-15 10:42:14 +0800192static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800193_mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
194{
Logan Chien55afb0a2018-10-15 10:42:14 +0800195 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
196 (__v2di)_mm_lzcnt_epi64(__A),
197 (__v2di)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800198}
199
Logan Chien55afb0a2018-10-15 10:42:14 +0800200static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800201_mm256_lzcnt_epi64 (__m256i __A)
202{
Logan Chien55afb0a2018-10-15 10:42:14 +0800203 return (__m256i) __builtin_ia32_vplzcntq_256 ((__v4di) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +0800204}
205
Logan Chien55afb0a2018-10-15 10:42:14 +0800206static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800207_mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
208{
Logan Chien55afb0a2018-10-15 10:42:14 +0800209 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
210 (__v4di)_mm256_lzcnt_epi64(__A),
211 (__v4di)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800212}
213
Logan Chien55afb0a2018-10-15 10:42:14 +0800214static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800215_mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
216{
Logan Chien55afb0a2018-10-15 10:42:14 +0800217 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
218 (__v4di)_mm256_lzcnt_epi64(__A),
219 (__v4di)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800220}
221
Logan Chien55afb0a2018-10-15 10:42:14 +0800222#undef __DEFAULT_FN_ATTRS128
223#undef __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800224
225#endif /* __AVX512VLCDINTRIN_H */