Split the estimate() interface into separate functions for each type. NFC. It was hacky to use an opcode as a switch because it won't always match (rsqrte != sqrte), and it looks like we'll need to add more special casing per arch than I had hoped for. Eg, x86 will prefer a different NR estimate implementation. ARM will want to use it's 'step' instructions. There also don't appear to be any new estimate instructions in any arch in a long, long time. Altivec vloge and vexpte may have been the first and last in that field... llvm-svn: 218698

commit: 8fde95cb2b547a98b55e13f005ca00b899c095fb [log] [tgz]
author: Sanjay Patel <spatel@rotateright.com> Tue Sep 30 20:28:48 2014 +0000
committer: Sanjay Patel <spatel@rotateright.com> Tue Sep 30 20:28:48 2014 +0000
tree: ab61764af3435ad1b99aac48b5508cfb7b3bdbd9
parent: 916cca728fbc5b8458b30a59e06d72a11f691f65 [diff] [blame]
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 5750e2f..e0396fd 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp

@@ -7458,25 +7458,14 @@
 // Target Optimization Hooks
 //===----------------------------------------------------------------------===//
 
-SDValue PPCTargetLowering::getEstimate(unsigned Opcode, SDValue Operand,
-                                       DAGCombinerInfo &DCI,
-                                       unsigned &RefinementSteps) const {
+SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
+                                            DAGCombinerInfo &DCI,
+                                            unsigned &RefinementSteps) const {
   EVT VT = Operand.getValueType();
-  SDValue RV;
-  if (Opcode == ISD::FSQRT) {
-    if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
-        (VT == MVT::f64 && Subtarget.hasFRSQRTE())  ||
-        (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
-        (VT == MVT::v2f64 && Subtarget.hasVSX()))
-      RV = DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
-  } else if (Opcode == ISD::FDIV) {
-    if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
-        (VT == MVT::f64 && Subtarget.hasFRE())  ||
-        (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
-        (VT == MVT::v2f64 && Subtarget.hasVSX()))
-      RV = DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
-  }
-  if (RV.getNode()) {
+  if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
+      (VT == MVT::f64 && Subtarget.hasFRSQRTE())  ||
+      (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
+      (VT == MVT::v2f64 && Subtarget.hasVSX())) {
     // Convergence is quadratic, so we essentially double the number of digits
     // correct after every iteration. For both FRE and FRSQRTE, the minimum
     // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
@@ -7484,8 +7473,29 @@
     RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
     if (VT.getScalarType() == MVT::f64)
       ++RefinementSteps;
+    return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
   }
-  return RV;
+  return SDValue();
+}
+
+SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
+                                            DAGCombinerInfo &DCI,
+                                            unsigned &RefinementSteps) const {
+  EVT VT = Operand.getValueType();
+  if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
+      (VT == MVT::f64 && Subtarget.hasFRE())  ||
+      (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
+      (VT == MVT::v2f64 && Subtarget.hasVSX())) {
+    // Convergence is quadratic, so we essentially double the number of digits
+    // correct after every iteration. For both FRE and FRSQRTE, the minimum
+    // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
+    // 2^-14. IEEE float has 23 digits and double has 52 digits.
+    RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
+    if (VT.getScalarType() == MVT::f64)
+      ++RefinementSteps;
+    return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
+  }
+  return SDValue();
 }
 
 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
commit	8fde95cb2b547a98b55e13f005ca00b899c095fb	[log] [tgz]
author	Sanjay Patel <spatel@rotateright.com>	Tue Sep 30 20:28:48 2014 +0000
committer	Sanjay Patel <spatel@rotateright.com>	Tue Sep 30 20:28:48 2014 +0000
tree	ab61764af3435ad1b99aac48b5508cfb7b3bdbd9
parent	916cca728fbc5b8458b30a59e06d72a11f691f65 [diff] [blame]