Recommit "[FPEnv][X86] Platform-specific builtin constrained FP enablement"
With REQUIRES: x86-register-target added to the tests.
Also remove some unneeded FIXMEs
But add a FIXME for bad IR generation for FMADDSUB/FMSUBADD with
constrained FP.
Original patch by Kevin P. Neal
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 44947b4..7e0c531 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -10094,8 +10094,14 @@
Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
} else {
llvm::Type *Ty = A->getType();
- Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
- Res = CGF.Builder.CreateCall(FMA, {A, B, C} );
+ Function *FMA;
+ if (CGF.Builder.getIsFPConstrained()) {
+ FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
+ Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
+ } else {
+ FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
+ Res = CGF.Builder.CreateCall(FMA, {A, B, C});
+ }
if (IsAddSub) {
// Negate even elts in C using a mask.
@@ -10104,8 +10110,14 @@
for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = i + (i % 2) * NumElts;
+ // FIXME: This code isn't exception safe for constrained FP. We need to
+ // suppress exceptions on the unselected elements.
Value *NegC = CGF.Builder.CreateFNeg(C);
- Value *FMSub = CGF.Builder.CreateCall(FMA, {A, B, NegC} );
+ Value *FMSub;
+ if (CGF.Builder.getIsFPConstrained())
+ FMSub = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, NegC} );
+ else
+ FMSub = CGF.Builder.CreateCall(FMA, {A, B, NegC} );
Res = CGF.Builder.CreateShuffleVector(FMSub, Res, Indices);
}
}
@@ -10164,6 +10176,10 @@
Intrinsic::x86_avx512_vfmadd_f64;
Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
{Ops[0], Ops[1], Ops[2], Ops[4]});
+ } else if (CGF.Builder.getIsFPConstrained()) {
+ Function *FMA = CGF.CGM.getIntrinsic(
+ Intrinsic::experimental_constrained_fma, Ops[0]->getType());
+ Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
} else {
Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
@@ -11892,8 +11908,15 @@
case X86::BI__builtin_ia32_sqrtss:
case X86::BI__builtin_ia32_sqrtsd: {
Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
- Function *F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
- A = Builder.CreateCall(F, {A});
+ Function *F;
+ if (Builder.getIsFPConstrained()) {
+ F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
+ A->getType());
+ A = Builder.CreateConstrainedFPCall(F, {A});
+ } else {
+ F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
+ A = Builder.CreateCall(F, {A});
+ }
return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
}
case X86::BI__builtin_ia32_sqrtsd_round_mask:
@@ -11908,8 +11931,15 @@
return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
}
Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
- Function *F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
- A = Builder.CreateCall(F, A);
+ Function *F;
+ if (Builder.getIsFPConstrained()) {
+ F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
+ A->getType());
+ A = Builder.CreateConstrainedFPCall(F, A);
+ } else {
+ F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
+ A = Builder.CreateCall(F, A);
+ }
Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
@@ -11931,8 +11961,14 @@
return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
}
}
- Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
- return Builder.CreateCall(F, Ops[0]);
+ if (Builder.getIsFPConstrained()) {
+ Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
+ Ops[0]->getType());
+ return Builder.CreateConstrainedFPCall(F, Ops[0]);
+ } else {
+ Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
+ return Builder.CreateCall(F, Ops[0]);
+ }
}
case X86::BI__builtin_ia32_pabsb128:
case X86::BI__builtin_ia32_pabsw128: