[PowerPC] Make use of the TargetRecip system
r238842 added the TargetRecip system for controlling use of reciprocal
estimates for sqrt and division using a set of parameters that can be set by
the frontend. Clang now supports a sophisticated -mrecip option, and this will
allow that option to effectively control the relevant code-generation
functionality of the PPC backend.
llvm-svn: 241985
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index ceda291..0ed9b05 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9067,6 +9067,19 @@
// Target Optimization Hooks
//===----------------------------------------------------------------------===//
+static std::string getRecipOp(const char *Base, EVT VT) {
+ std::string RecipOp(Base);
+ if (VT.getScalarType() == MVT::f64)
+ RecipOp += "d";
+ else
+ RecipOp += "f";
+
+ if (VT.isVector())
+ RecipOp = "vec-" + RecipOp;
+
+ return RecipOp;
+}
+
SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
DAGCombinerInfo &DCI,
unsigned &RefinementSteps,
@@ -9078,13 +9091,12 @@
(VT == MVT::v2f64 && Subtarget.hasVSX()) ||
(VT == MVT::v4f32 && Subtarget.hasQPX()) ||
(VT == MVT::v4f64 && Subtarget.hasQPX())) {
- // Convergence is quadratic, so we essentially double the number of digits
- // correct after every iteration. For both FRE and FRSQRTE, the minimum
- // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
- // 2^-14. IEEE float has 23 digits and double has 52 digits.
- RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
- if (VT.getScalarType() == MVT::f64)
- ++RefinementSteps;
+ TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
+ std::string RecipOp = getRecipOp("sqrt", VT);
+ if (!Recips.isEnabled(RecipOp))
+ return SDValue();
+
+ RefinementSteps = Recips.getRefinementSteps(RecipOp);
UseOneConstNR = true;
return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
}
@@ -9101,13 +9113,12 @@
(VT == MVT::v2f64 && Subtarget.hasVSX()) ||
(VT == MVT::v4f32 && Subtarget.hasQPX()) ||
(VT == MVT::v4f64 && Subtarget.hasQPX())) {
- // Convergence is quadratic, so we essentially double the number of digits
- // correct after every iteration. For both FRE and FRSQRTE, the minimum
- // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
- // 2^-14. IEEE float has 23 digits and double has 52 digits.
- RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
- if (VT.getScalarType() == MVT::f64)
- ++RefinementSteps;
+ TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
+ std::string RecipOp = getRecipOp("div", VT);
+ if (!Recips.isEnabled(RecipOp))
+ return SDValue();
+
+ RefinementSteps = Recips.getRefinementSteps(RecipOp);
return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
}
return SDValue();
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 074bc87..1daf244 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -172,7 +172,26 @@
: LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU,
computeFSAdditions(FS, OL, TT), Options, RM, CM, OL),
TLOF(createTLOF(getTargetTriple())),
- TargetABI(computeTargetABI(TT, Options)) {
+ TargetABI(computeTargetABI(TT, Options)),
+ Subtarget(TargetTriple, CPU, computeFSAdditions(FS, OL, TT), *this) {
+
+ // For the estimates, convergence is quadratic, so we essentially double the
+ // number of digits correct after every iteration. For both FRE and FRSQRTE,
+ // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
+ // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
+ unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3,
+ RefinementSteps64 = RefinementSteps + 1;
+
+ this->Options.Reciprocals.setDefaults("sqrtf", true, RefinementSteps);
+ this->Options.Reciprocals.setDefaults("vec-sqrtf", true, RefinementSteps);
+ this->Options.Reciprocals.setDefaults("divf", true, RefinementSteps);
+ this->Options.Reciprocals.setDefaults("vec-divf", true, RefinementSteps);
+
+ this->Options.Reciprocals.setDefaults("sqrtd", true, RefinementSteps64);
+ this->Options.Reciprocals.setDefaults("vec-sqrtd", true, RefinementSteps64);
+ this->Options.Reciprocals.setDefaults("divd", true, RefinementSteps64);
+ this->Options.Reciprocals.setDefaults("vec-divd", true, RefinementSteps64);
+
initAsmInfo();
}
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/llvm/lib/Target/PowerPC/PPCTargetMachine.h
index 5c0f7e6..6496339 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.h
@@ -29,6 +29,8 @@
private:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
PPCABI TargetABI;
+ PPCSubtarget Subtarget;
+
mutable StringMap<std::unique_ptr<PPCSubtarget>> SubtargetMap;
public: