[X86] Stop implicitly enabling avx512vl when avx512bf16 is enabled.
Previously we were doing this so that the 256 bit selectw builtin could be used in the implementation of the 512->256 bit conversion intrinsic.
After this commit we now use a masked convert builtin that will emit the intrinsic call and the 256-bit select from custom code in CGBuiltin. Then the header only needs to call that one intrinsic.
llvm-svn: 360924
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 8e70720..4396357 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -11885,6 +11885,22 @@
return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
}
+ case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
+ case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
+ Intrinsic::ID IID;
+ switch (BuiltinID) {
+ default: llvm_unreachable("Unsupported intrinsic!");
+ case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
+ IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
+ break;
+ case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
+ IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
+ break;
+ }
+ Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);
+ return EmitX86Select(*this, Ops[2], Res, Ops[1]);
+ }
+
case X86::BI__emul:
case X86::BI__emulu: {
llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);