[AMDGPU] Improve reciprocal handling
When denormals are supported we are producing a full division for
1.0f / x. That still can be replaced by the faster version:
bool c = fabs(x) > 0x1.0p+96f;
float s = c ? 0x1.0p-32f : 1.0f;
x *= s;
return s * v_rcp_f32(x)
in case if requested accuracy is 2.5ulp or less. The same version
is used if denormals are not supported for non 1.0 numerators, where
just v_rcp_f32 is then used for 1.0 numerator.
The optimization of 1/x is extended to the case -1/x, which is the
same except for the resulting sign bit.
OpenCL conformance passed with both enabled and disabled denorms.
Differential Revision: https://reviews.llvm.org/D47805
llvm-svn: 334142
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index f0b9c9e..53fb9e3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -372,13 +372,18 @@
return true;
}
-static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv) {
+static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv, bool HasDenormals) {
const ConstantFP *CNum = dyn_cast<ConstantFP>(Num);
if (!CNum)
- return false;
+ return HasDenormals;
+
+ if (UnsafeDiv)
+ return true;
+
+ bool IsOne = CNum->isExactlyValue(+1.0) || CNum->isExactlyValue(-1.0);
// Reciprocal f32 is handled separately without denormals.
- return UnsafeDiv || CNum->isExactlyValue(+1.0);
+ return HasDenormals ^ IsOne;
}
// Insert an intrinsic for fast fdiv for safe math situations where we can
@@ -404,7 +409,7 @@
FMF.allowReciprocal();
// With UnsafeDiv node will be optimized to just rcp and mul.
- if (ST->hasFP32Denormals() || UnsafeDiv)
+ if (UnsafeDiv)
return false;
IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()), FPMath);
@@ -418,6 +423,7 @@
Value *NewFDiv = nullptr;
+ bool HasDenormals = ST->hasFP32Denormals();
if (VectorType *VT = dyn_cast<VectorType>(Ty)) {
NewFDiv = UndefValue::get(VT);
@@ -428,7 +434,7 @@
Value *DenEltI = Builder.CreateExtractElement(Den, I);
Value *NewElt;
- if (shouldKeepFDivF32(NumEltI, UnsafeDiv)) {
+ if (shouldKeepFDivF32(NumEltI, UnsafeDiv, HasDenormals)) {
NewElt = Builder.CreateFDiv(NumEltI, DenEltI);
} else {
NewElt = Builder.CreateCall(Decl, { NumEltI, DenEltI });
@@ -437,7 +443,7 @@
NewFDiv = Builder.CreateInsertElement(NewFDiv, NewElt, I);
}
} else {
- if (!shouldKeepFDivF32(Num, UnsafeDiv))
+ if (!shouldKeepFDivF32(Num, UnsafeDiv, HasDenormals))
NewFDiv = Builder.CreateCall(Decl, { Num, Den });
}
@@ -447,7 +453,7 @@
FDiv.eraseFromParent();
}
- return true;
+ return !!NewFDiv;
}
static bool hasUnsafeFPMath(const Function &F) {