[X86] Add kortest intrinsics for 8, 32, and 64 bit masks. Add new intrinsic names for 16 bit masks.
This matches gcc and icc despite not being documented in the Intel Intrinsics Guide.
llvm-svn: 340798
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index fc5096d..ce2288e 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -1749,8 +1749,14 @@
TARGET_BUILTIN(__builtin_ia32_korhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_korsi, "UiUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kordi, "ULLiULLiULLi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kortestcqi, "iUcUc", "nc", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_kortestzqi, "iUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kortestchi, "iUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kortestzhi, "iUsUs", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_kortestcsi, "iUiUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kortestzsi, "iUiUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kortestcdi, "iULLiULLi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kortestzdi, "iULLiULLi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kxnorqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs", "nc", "avx512f")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 46c073e..116863a 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -10012,14 +10012,21 @@
return EmitX86MaskedCompare(*this, CC, false, Ops);
}
+ case X86::BI__builtin_ia32_kortestcqi:
case X86::BI__builtin_ia32_kortestchi:
- case X86::BI__builtin_ia32_kortestzhi: {
+ case X86::BI__builtin_ia32_kortestcsi:
+ case X86::BI__builtin_ia32_kortestcdi: {
Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
- Value *C;
- if (BuiltinID == X86::BI__builtin_ia32_kortestchi)
- C = llvm::Constant::getAllOnesValue(Builder.getInt16Ty());
- else
- C = llvm::Constant::getNullValue(Builder.getInt16Ty());
+ Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
+ Value *Cmp = Builder.CreateICmpEQ(Or, C);
+ return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
+ }
+ case X86::BI__builtin_ia32_kortestzqi:
+ case X86::BI__builtin_ia32_kortestzhi:
+ case X86::BI__builtin_ia32_kortestzsi:
+ case X86::BI__builtin_ia32_kortestzdi: {
+ Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
+ Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
Value *Cmp = Builder.CreateICmpEQ(Or, C);
return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
}
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 6f39b2e..a08833b 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -107,6 +107,42 @@
return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B);
}
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_kortestc_mask32_u8(__mmask32 __A, __mmask32 __B)
+{
+ return (unsigned char)__builtin_ia32_kortestcsi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_kortestz_mask32_u8(__mmask32 __A, __mmask32 __B)
+{
+ return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
+ *__C = (unsigned char)__builtin_ia32_kortestcsi(__A, __B);
+ return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B)
+{
+ return (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B)
+{
+ return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
+ *__C = (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
+ return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
+}
+
/* Integer compare */
#define _mm512_cmp_epi8_mask(a, b, p) \
diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h
index a887078..563e94b 100644
--- a/clang/lib/Headers/avx512dqintrin.h
+++ b/clang/lib/Headers/avx512dqintrin.h
@@ -68,6 +68,24 @@
return (__mmask8)__builtin_ia32_kxorqi((__mmask8)__A, (__mmask8)__B);
}
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_kortestc_mask8_u8(__mmask8 __A, __mmask8 __B)
+{
+ return (unsigned char)__builtin_ia32_kortestcqi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_kortestz_mask8_u8(__mmask8 __A, __mmask8 __B)
+{
+ return (unsigned char)__builtin_ia32_kortestzqi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_kortest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) {
+ *__C = (unsigned char)__builtin_ia32_kortestcqi(__A, __B);
+ return (unsigned char)__builtin_ia32_kortestzqi(__A, __B);
+}
+
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mullo_epi64 (__m512i __A, __m512i __B) {
return (__m512i) ((__v8du) __A * (__v8du) __B);
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 47cc86a..b5ef07a 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -8351,6 +8351,24 @@
return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
}
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
+{
+ return (unsigned char)__builtin_ia32_kortestchi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
+{
+ return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
+ *__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B);
+ return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
+}
+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
{
diff --git a/clang/test/CodeGen/avx512bw-builtins.c b/clang/test/CodeGen/avx512bw-builtins.c
index 7a3a7d0..808b1c0 100644
--- a/clang/test/CodeGen/avx512bw-builtins.c
+++ b/clang/test/CodeGen/avx512bw-builtins.c
@@ -134,6 +134,98 @@
__E, __F);
}
+unsigned char test_kortestz_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
+ // CHECK-LABEL: @test_kortestz_mask32_u8
+ // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[OR:%.*]] = or <32 x i1> [[LHS]], [[RHS]]
+ // CHECK: [[CAST:%.*]] = bitcast <32 x i1> [[OR]] to i32
+ // CHECK: [[CMP:%.*]] = icmp eq i32 [[CAST]], 0
+ // CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+ // CHECK: trunc i32 [[ZEXT]] to i8
+ return _kortestz_mask32_u8(_mm512_cmpneq_epu16_mask(__A, __B),
+ _mm512_cmpneq_epu16_mask(__C, __D));
+}
+
+unsigned char test_kortestc_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
+ // CHECK-LABEL: @test_kortestc_mask32_u8
+ // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[OR:%.*]] = or <32 x i1> [[LHS]], [[RHS]]
+ // CHECK: [[CAST:%.*]] = bitcast <32 x i1> [[OR]] to i32
+ // CHECK: [[CMP:%.*]] = icmp eq i32 [[CAST]], -1
+ // CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+ // CHECK: trunc i32 [[ZEXT]] to i8
+ return _kortestc_mask32_u8(_mm512_cmpneq_epu16_mask(__A, __B),
+ _mm512_cmpneq_epu16_mask(__C, __D));
+}
+
+unsigned char test_kortest_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
+ // CHECK-LABEL: @test_kortest_mask32_u8
+ // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[OR:%.*]] = or <32 x i1> [[LHS]], [[RHS]]
+ // CHECK: [[CAST:%.*]] = bitcast <32 x i1> [[OR]] to i32
+ // CHECK: [[CMP:%.*]] = icmp eq i32 [[CAST]], -1
+ // CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+ // CHECK: trunc i32 [[ZEXT]] to i8
+ // CHECK: [[LHS2:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[RHS2:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[OR2:%.*]] = or <32 x i1> [[LHS2]], [[RHS2]]
+ // CHECK: [[CAST2:%.*]] = bitcast <32 x i1> [[OR2]] to i32
+ // CHECK: [[CMP2:%.*]] = icmp eq i32 [[CAST2]], 0
+ // CHECK: [[ZEXT2:%.*]] = zext i1 [[CMP2]] to i32
+ // CHECK: trunc i32 [[ZEXT2]] to i8
+ return _kortest_mask32_u8(_mm512_cmpneq_epu16_mask(__A, __B),
+ _mm512_cmpneq_epu16_mask(__C, __D), CF);
+}
+
+unsigned char test_kortestz_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
+ // CHECK-LABEL: @test_kortestz_mask64_u8
+ // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[OR:%.*]] = or <64 x i1> [[LHS]], [[RHS]]
+ // CHECK: [[CAST:%.*]] = bitcast <64 x i1> [[OR]] to i64
+ // CHECK: [[CMP:%.*]] = icmp eq i64 [[CAST]], 0
+ // CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+ // CHECK: trunc i32 [[ZEXT]] to i8
+ return _kortestz_mask64_u8(_mm512_cmpneq_epu8_mask(__A, __B),
+ _mm512_cmpneq_epu8_mask(__C, __D));
+}
+
+unsigned char test_kortestc_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
+ // CHECK-LABEL: @test_kortestc_mask64_u8
+ // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[OR:%.*]] = or <64 x i1> [[LHS]], [[RHS]]
+ // CHECK: [[CAST:%.*]] = bitcast <64 x i1> [[OR]] to i64
+ // CHECK: [[CMP:%.*]] = icmp eq i64 [[CAST]], -1
+ // CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+ // CHECK: trunc i32 [[ZEXT]] to i8
+ return _kortestc_mask64_u8(_mm512_cmpneq_epu8_mask(__A, __B),
+ _mm512_cmpneq_epu8_mask(__C, __D));
+}
+
+unsigned char test_kortest_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
+ // CHECK-LABEL: @test_kortest_mask64_u8
+ // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[OR:%.*]] = or <64 x i1> [[LHS]], [[RHS]]
+ // CHECK: [[CAST:%.*]] = bitcast <64 x i1> [[OR]] to i64
+ // CHECK: [[CMP:%.*]] = icmp eq i64 [[CAST]], -1
+ // CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+ // CHECK: trunc i32 [[ZEXT]] to i8
+ // CHECK: [[LHS2:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[RHS2:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[OR2:%.*]] = or <64 x i1> [[LHS2]], [[RHS2]]
+ // CHECK: [[CAST2:%.*]] = bitcast <64 x i1> [[OR2]] to i64
+ // CHECK: [[CMP2:%.*]] = icmp eq i64 [[CAST2]], 0
+ // CHECK: [[ZEXT2:%.*]] = zext i1 [[CMP2]] to i32
+ // CHECK: trunc i32 [[ZEXT2]] to i8
+ return _kortest_mask64_u8(_mm512_cmpneq_epu8_mask(__A, __B),
+ _mm512_cmpneq_epu8_mask(__C, __D), CF);
+}
+
__mmask64 test_mm512_cmpeq_epi8_mask(__m512i __a, __m512i __b) {
// CHECK-LABEL: @test_mm512_cmpeq_epi8_mask
// CHECK: icmp eq <64 x i8> %{{.*}}, %{{.*}}
diff --git a/clang/test/CodeGen/avx512dq-builtins.c b/clang/test/CodeGen/avx512dq-builtins.c
index 0dd0049..ee339a0 100644
--- a/clang/test/CodeGen/avx512dq-builtins.c
+++ b/clang/test/CodeGen/avx512dq-builtins.c
@@ -68,6 +68,52 @@
__E, __F);
}
+unsigned char test_kortestz_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
+ // CHECK-LABEL: @test_kortestz_mask8_u8
+ // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[OR:%.*]] = or <8 x i1> [[LHS]], [[RHS]]
+ // CHECK: [[CAST:%.*]] = bitcast <8 x i1> [[OR]] to i8
+ // CHECK: [[CMP:%.*]] = icmp eq i8 [[CAST]], 0
+ // CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+ // CHECK: trunc i32 [[ZEXT]] to i8
+ return _kortestz_mask8_u8(_mm512_cmpneq_epu64_mask(__A, __B),
+ _mm512_cmpneq_epu64_mask(__C, __D));
+}
+
+unsigned char test_kortestc_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
+ // CHECK-LABEL: @test_kortestc_mask8_u8
+ // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[OR:%.*]] = or <8 x i1> [[LHS]], [[RHS]]
+ // CHECK: [[CAST:%.*]] = bitcast <8 x i1> [[OR]] to i8
+ // CHECK: [[CMP:%.*]] = icmp eq i8 [[CAST]], -1
+ // CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+ // CHECK: trunc i32 [[ZEXT]] to i8
+ return _kortestc_mask8_u8(_mm512_cmpneq_epu64_mask(__A, __B),
+ _mm512_cmpneq_epu64_mask(__C, __D));
+}
+
+unsigned char test_kortest_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
+ // CHECK-LABEL: @test_kortest_mask8_u8
+ // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[OR:%.*]] = or <8 x i1> [[LHS]], [[RHS]]
+ // CHECK: [[CAST:%.*]] = bitcast <8 x i1> [[OR]] to i8
+ // CHECK: [[CMP:%.*]] = icmp eq i8 [[CAST]], -1
+ // CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+ // CHECK: trunc i32 [[ZEXT]] to i8
+ // CHECK: [[LHS2:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[RHS2:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[OR2:%.*]] = or <8 x i1> [[LHS2]], [[RHS2]]
+ // CHECK: [[CAST2:%.*]] = bitcast <8 x i1> [[OR2]] to i8
+ // CHECK: [[CMP2:%.*]] = icmp eq i8 [[CAST2]], 0
+ // CHECK: [[ZEXT2:%.*]] = zext i1 [[CMP2]] to i32
+ // CHECK: trunc i32 [[ZEXT2]] to i8
+ return _kortest_mask8_u8(_mm512_cmpneq_epu64_mask(__A, __B),
+ _mm512_cmpneq_epu64_mask(__C, __D), CF);
+}
+
__m512i test_mm512_mullo_epi64 (__m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_mullo_epi64
// CHECK: mul <8 x i64>
diff --git a/clang/test/CodeGen/avx512f-builtins.c b/clang/test/CodeGen/avx512f-builtins.c
index 326d6d44..39ac42f 100644
--- a/clang/test/CodeGen/avx512f-builtins.c
+++ b/clang/test/CodeGen/avx512f-builtins.c
@@ -8149,6 +8149,52 @@
_mm512_cmpneq_epu32_mask(__C, __D));
}
+unsigned char test_kortestz_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
+ // CHECK-LABEL: @test_kortestz_mask16_u8
+ // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[OR:%.*]] = or <16 x i1> [[LHS]], [[RHS]]
+ // CHECK: [[CAST:%.*]] = bitcast <16 x i1> [[OR]] to i16
+ // CHECK: [[CMP:%.*]] = icmp eq i16 [[CAST]], 0
+ // CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+ // CHECK: trunc i32 [[ZEXT]] to i8
+ return _kortestz_mask16_u8(_mm512_cmpneq_epu32_mask(__A, __B),
+ _mm512_cmpneq_epu32_mask(__C, __D));
+}
+
+unsigned char test_kortestc_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
+ // CHECK-LABEL: @test_kortestc_mask16_u8
+ // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[OR:%.*]] = or <16 x i1> [[LHS]], [[RHS]]
+ // CHECK: [[CAST:%.*]] = bitcast <16 x i1> [[OR]] to i16
+ // CHECK: [[CMP:%.*]] = icmp eq i16 [[CAST]], -1
+ // CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+ // CHECK: trunc i32 [[ZEXT]] to i8
+ return _kortestc_mask16_u8(_mm512_cmpneq_epu32_mask(__A, __B),
+ _mm512_cmpneq_epu32_mask(__C, __D));
+}
+
+unsigned char test_kortest_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
+ // CHECK-LABEL: @test_kortest_mask16_u8
+ // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[OR:%.*]] = or <16 x i1> [[LHS]], [[RHS]]
+ // CHECK: [[CAST:%.*]] = bitcast <16 x i1> [[OR]] to i16
+ // CHECK: [[CMP:%.*]] = icmp eq i16 [[CAST]], -1
+ // CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+ // CHECK: trunc i32 [[ZEXT]] to i8
+ // CHECK: [[LHS2:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[RHS2:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[OR2:%.*]] = or <16 x i1> [[LHS2]], [[RHS2]]
+ // CHECK: [[CAST2:%.*]] = bitcast <16 x i1> [[OR2]] to i16
+ // CHECK: [[CMP2:%.*]] = icmp eq i16 [[CAST2]], 0
+ // CHECK: [[ZEXT2:%.*]] = zext i1 [[CMP2]] to i32
+ // CHECK: trunc i32 [[ZEXT2]] to i8
+ return _kortest_mask16_u8(_mm512_cmpneq_epu32_mask(__A, __B),
+ _mm512_cmpneq_epu32_mask(__C, __D), CF);
+}
+
__mmask16 test_mm512_kunpackb(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
// CHECK-LABEL: @test_mm512_kunpackb
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>