[PowerPC] Make use of the TargetRecip system r238842 added the TargetRecip system for controlling use of reciprocal estimates for sqrt and division using a set of parameters that can be set by the frontend. Clang now supports a sophisticated -mrecip option, and this will allow that option to effectively control the relevant code-generation functionality of the PPC backend. llvm-svn: 241985

commit: cbf08925efc9047a6e457101d5ffdb36774c769d [log] [tgz]
author: Hal Finkel <hfinkel@anl.gov> Sun Jul 12 02:33:57 2015 +0000
committer: Hal Finkel <hfinkel@anl.gov> Sun Jul 12 02:33:57 2015 +0000
tree: 2fa82b7a9585d0778c80d703d6a1ad99a5afa716
parent: 965cea5670ffc8b8a2904656203174b0033bd901 [diff]
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index ceda291..0ed9b05 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp

@@ -9067,6 +9067,19 @@
 // Target Optimization Hooks
 //===----------------------------------------------------------------------===//
 
+static std::string getRecipOp(const char *Base, EVT VT) {
+  std::string RecipOp(Base);
+  if (VT.getScalarType() == MVT::f64)
+    RecipOp += "d";
+  else
+    RecipOp += "f";
+
+  if (VT.isVector())
+    RecipOp = "vec-" + RecipOp;
+
+  return RecipOp;
+}
+
 SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
                                             DAGCombinerInfo &DCI,
                                             unsigned &RefinementSteps,
@@ -9078,13 +9091,12 @@
       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
-    // Convergence is quadratic, so we essentially double the number of digits
-    // correct after every iteration. For both FRE and FRSQRTE, the minimum
-    // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
-    // 2^-14. IEEE float has 23 digits and double has 52 digits.
-    RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
-    if (VT.getScalarType() == MVT::f64)
-      ++RefinementSteps;
+    TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
+    std::string RecipOp = getRecipOp("sqrt", VT);
+    if (!Recips.isEnabled(RecipOp))
+      return SDValue();
+
+    RefinementSteps = Recips.getRefinementSteps(RecipOp);
     UseOneConstNR = true;
     return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
   }
@@ -9101,13 +9113,12 @@
       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
-    // Convergence is quadratic, so we essentially double the number of digits
-    // correct after every iteration. For both FRE and FRSQRTE, the minimum
-    // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
-    // 2^-14. IEEE float has 23 digits and double has 52 digits.
-    RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
-    if (VT.getScalarType() == MVT::f64)
-      ++RefinementSteps;
+    TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
+    std::string RecipOp = getRecipOp("div", VT);
+    if (!Recips.isEnabled(RecipOp))
+      return SDValue();
+
+    RefinementSteps = Recips.getRefinementSteps(RecipOp);
     return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
   }
   return SDValue();

diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 074bc87..1daf244 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp

@@ -172,7 +172,26 @@
     : LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU,
                         computeFSAdditions(FS, OL, TT), Options, RM, CM, OL),
       TLOF(createTLOF(getTargetTriple())),
-      TargetABI(computeTargetABI(TT, Options)) {
+      TargetABI(computeTargetABI(TT, Options)),
+      Subtarget(TargetTriple, CPU, computeFSAdditions(FS, OL, TT), *this) {
+
+  // For the estimates, convergence is quadratic, so we essentially double the
+  // number of digits correct after every iteration. For both FRE and FRSQRTE,
+  // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
+  // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
+  unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3,
+           RefinementSteps64 = RefinementSteps + 1;
+
+  this->Options.Reciprocals.setDefaults("sqrtf", true, RefinementSteps);
+  this->Options.Reciprocals.setDefaults("vec-sqrtf", true, RefinementSteps);
+  this->Options.Reciprocals.setDefaults("divf", true, RefinementSteps);
+  this->Options.Reciprocals.setDefaults("vec-divf", true, RefinementSteps);
+
+  this->Options.Reciprocals.setDefaults("sqrtd", true, RefinementSteps64);
+  this->Options.Reciprocals.setDefaults("vec-sqrtd", true, RefinementSteps64);
+  this->Options.Reciprocals.setDefaults("divd", true, RefinementSteps64);
+  this->Options.Reciprocals.setDefaults("vec-divd", true, RefinementSteps64);
+
   initAsmInfo();
 }
 

diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/llvm/lib/Target/PowerPC/PPCTargetMachine.h
index 5c0f7e6..6496339 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.h

@@ -29,6 +29,8 @@
 private:
   std::unique_ptr<TargetLoweringObjectFile> TLOF;
   PPCABI TargetABI;
+  PPCSubtarget Subtarget;
+
   mutable StringMap<std::unique_ptr<PPCSubtarget>> SubtargetMap;
 
 public:
commit	cbf08925efc9047a6e457101d5ffdb36774c769d	[log] [tgz]
author	Hal Finkel <hfinkel@anl.gov>	Sun Jul 12 02:33:57 2015 +0000
committer	Hal Finkel <hfinkel@anl.gov>	Sun Jul 12 02:33:57 2015 +0000
tree	2fa82b7a9585d0778c80d703d6a1ad99a5afa716
parent	965cea5670ffc8b8a2904656203174b0033bd901 [diff]