[Target] move reciprocal estimate settings from TargetOptions to TargetLowering The motivation for the change is that we can't have pseudo-global settings for codegen living in TargetOptions because that doesn't work with LTO. Ideally, these reciprocal attributes will be moved to the instruction-level via FMF, metadata, or something else. But making them function attributes is at least an improvement over the current state. The ingredients of this patch are: Remove the reciprocal estimate command-line debug option. Add TargetRecip to TargetLowering. Remove TargetRecip from TargetOptions. Clean up the TargetRecip implementation to work with this new scheme. Set the default reciprocal settings in TargetLoweringBase (everything is off). Update the PowerPC defaults, users, and tests. Update the x86 defaults, users, and tests. Note that if this patch needs to be reverted, the related clang patch checked in at r283251 should be reverted too. Differential Revision: https://reviews.llvm.org/D24816 llvm-svn: 283252

commit: bfdbea6481a2247fcc63c27e3acd393000fd8d41 [log] [tgz]
author: Sanjay Patel <spatel@rotateright.com> Tue Oct 04 20:46:43 2016 +0000
committer: Sanjay Patel <spatel@rotateright.com> Tue Oct 04 20:46:43 2016 +0000
tree: e7dfce8045101809ecdb13200171bdfab26beda5
parent: 0bb72c14240496d74f9263fad6aba830625499bd [diff]
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index be66f7e..74ed7bc 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp

@@ -838,6 +838,7 @@
   InitLibcallNames(LibcallRoutineNames, TM.getTargetTriple());
   InitCmpLibcallCCs(CmpLibcallCCs);
   InitLibcallCallingConvs(LibcallCallingConvs);
+  ReciprocalEstimates.set("all", false, 0);
 }
 
 void TargetLoweringBase::initActions() {
@@ -1485,6 +1486,22 @@
   return MVT::i32; // return the default value
 }
 
+TargetRecip
+TargetLoweringBase::getTargetRecipForFunc(MachineFunction &MF) const {
+  const Function *F = MF.getFunction();
+  StringRef RecipAttrName = "reciprocal-estimates";
+  if (!F->hasFnAttribute(RecipAttrName))
+    return ReciprocalEstimates;
+
+  // Make a copy of the target's default reciprocal codegen settings.
+  TargetRecip Recips = ReciprocalEstimates;
+
+  // Override any settings that are customized for this function.
+  StringRef RecipString = F->getFnAttribute(RecipAttrName).getValueAsString();
+  Recips.set(RecipString);
+  return Recips;
+}
+
 /// getVectorTypeBreakdown - Vector types are broken down into some number of
 /// legal first class types.  For example, MVT::v8f32 maps to 2 MVT::v4f32
 /// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index b1d2fe1..05e823d 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp

@@ -901,6 +901,23 @@
     setTargetDAGCombine(ISD::FSQRT);
   }
 
+  // For the estimates, convergence is quadratic, so we essentially double the
+  // number of digits correct after every iteration. For both FRE and FRSQRTE,
+  // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
+  // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
+  unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3,
+           RefinementSteps64 = RefinementSteps + 1;
+
+  ReciprocalEstimates.set("sqrtf", true, RefinementSteps);
+  ReciprocalEstimates.set("vec-sqrtf", true, RefinementSteps);
+  ReciprocalEstimates.set("divf", true, RefinementSteps);
+  ReciprocalEstimates.set("vec-divf", true, RefinementSteps);
+
+  ReciprocalEstimates.set("sqrtd", true, RefinementSteps64);
+  ReciprocalEstimates.set("vec-sqrtd", true, RefinementSteps64);
+  ReciprocalEstimates.set("divd", true, RefinementSteps64);
+  ReciprocalEstimates.set("vec-divd", true, RefinementSteps64);
+
   // Darwin long double math library functions have $LDBL128 appended.
   if (Subtarget.isDarwin()) {
     setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
@@ -9646,7 +9663,7 @@
       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
-    TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
+    TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction());
     std::string RecipOp = getRecipOp("sqrt", VT);
     if (!Recips.isEnabled(RecipOp))
       return SDValue();
@@ -9668,7 +9685,7 @@
       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
-    TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
+    TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction());
     std::string RecipOp = getRecipOp("div", VT);
     if (!Recips.isEnabled(RecipOp))
       return SDValue();

diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 1bb6b67..9b78739 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp

@@ -204,23 +204,6 @@
       TargetABI(computeTargetABI(TT, Options)),
       Subtarget(TargetTriple, CPU, computeFSAdditions(FS, OL, TT), *this) {
 
-  // For the estimates, convergence is quadratic, so we essentially double the
-  // number of digits correct after every iteration. For both FRE and FRSQRTE,
-  // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
-  // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
-  unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3,
-           RefinementSteps64 = RefinementSteps + 1;
-
-  this->Options.Reciprocals.setDefaults("sqrtf", true, RefinementSteps);
-  this->Options.Reciprocals.setDefaults("vec-sqrtf", true, RefinementSteps);
-  this->Options.Reciprocals.setDefaults("divf", true, RefinementSteps);
-  this->Options.Reciprocals.setDefaults("vec-divf", true, RefinementSteps);
-
-  this->Options.Reciprocals.setDefaults("sqrtd", true, RefinementSteps64);
-  this->Options.Reciprocals.setDefaults("vec-sqrtd", true, RefinementSteps64);
-  this->Options.Reciprocals.setDefaults("divd", true, RefinementSteps64);
-  this->Options.Reciprocals.setDefaults("vec-divd", true, RefinementSteps64);
-
   initAsmInfo();
 }
 

diff --git a/llvm/lib/Target/TargetRecip.cpp b/llvm/lib/Target/TargetRecip.cpp
index 183fa50..938ed9f 100644
--- a/llvm/lib/Target/TargetRecip.cpp
+++ b/llvm/lib/Target/TargetRecip.cpp

@@ -16,7 +16,9 @@
 
 #include "llvm/Target/TargetRecip.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
@@ -36,9 +38,7 @@
   "vec-sqrtf",
 };
 
-// The uninitialized state is needed for the enabled settings and refinement
-// steps because custom settings may arrive via the command-line before target
-// defaults are set.
+/// All operations are disabled by default and refinement steps are set to zero.
 TargetRecip::TargetRecip() {
   unsigned NumStrings = llvm::array_lengthof(RecipOps);
   for (unsigned i = 0; i < NumStrings; ++i)
@@ -137,18 +137,8 @@
         assert(Iter == RecipMap.end() && "Float entry missing from map");
         report_fatal_error("Invalid option for -recip.");
       }
-      
-      // The option was specified without a float or double suffix.
-      if (RecipMap[Val.str() + 'd'].Enabled != Uninitialized) {
-        // Make sure that the double entry was not already specified.
-        // The float entry will be checked below.
-        report_fatal_error("Duplicate option for -recip.");
-      }
     }
     
-    if (Iter->second.Enabled != Uninitialized)
-      report_fatal_error("Duplicate option for -recip.");
-    
     // Mark the matched option as found. Do not allow duplicate specifiers.
     Iter->second.Enabled = !IsDisabled;
     if (!RefStepString.empty())
@@ -164,50 +154,45 @@
   }
 }
 
-TargetRecip::TargetRecip(const std::vector<std::string> &Args) :
-  TargetRecip() {
-  unsigned NumArgs = Args.size();
+void TargetRecip::set(StringRef &RecipString) {
+  SmallVector<StringRef, 4> RecipStringVector;
+  SplitString(RecipString, RecipStringVector, ",");
+  std::vector<std::string> RecipVector;
+  for (unsigned i = 0; i < RecipStringVector.size(); ++i)
+    RecipVector.push_back(RecipStringVector[i].str());
+
+  unsigned NumArgs = RecipVector.size();
 
   // Check if "all", "default", or "none" was specified.
-  if (NumArgs == 1 && parseGlobalParams(Args[0]))
+  if (NumArgs == 1 && parseGlobalParams(RecipVector[0]))
     return;
- 
-  parseIndividualParams(Args);
+
+  parseIndividualParams(RecipVector);
 }
 
 bool TargetRecip::isEnabled(StringRef Key) const {
   ConstRecipIter Iter = RecipMap.find(Key);
   assert(Iter != RecipMap.end() && "Unknown name for reciprocal map");
-  assert(Iter->second.Enabled != Uninitialized &&
-         "Enablement setting was not initialized");
   return Iter->second.Enabled;
 }
 
 unsigned TargetRecip::getRefinementSteps(StringRef Key) const {
   ConstRecipIter Iter = RecipMap.find(Key);
   assert(Iter != RecipMap.end() && "Unknown name for reciprocal map");
-  assert(Iter->second.RefinementSteps != Uninitialized &&
-         "Refinement step setting was not initialized");
   return Iter->second.RefinementSteps;
 }
 
-/// Custom settings (previously initialized values) override target defaults.
-void TargetRecip::setDefaults(StringRef Key, bool Enable,
-                              unsigned RefSteps) {
+void TargetRecip::set(StringRef Key, bool Enable, unsigned RefSteps) {
   if (Key == "all") {
     for (auto &KV : RecipMap) {
       RecipParams &RP = KV.second;
-      if (RP.Enabled == Uninitialized)
-        RP.Enabled = Enable;
-      if (RP.RefinementSteps == Uninitialized)
-        RP.RefinementSteps = RefSteps;
+      RP.Enabled = Enable;
+      RP.RefinementSteps = RefSteps;
     }
   } else {
     RecipParams &RP = RecipMap[Key];
-    if (RP.Enabled == Uninitialized)
-      RP.Enabled = Enable;
-    if (RP.RefinementSteps == Uninitialized)
-      RP.RefinementSteps = RefSteps;
+    RP.Enabled = Enable;
+    RP.RefinementSteps = RefSteps;
   }
 }
 

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index dbe38e6..6fbd9dc 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp

@@ -53,6 +53,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRecip.h"
 #include "X86IntrinsicsInfo.h"
 #include <bitset>
 #include <numeric>
@@ -84,6 +85,15 @@
   // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
 
+  // By default (and when -ffast-math is on), enable estimate codegen with 1
+  // refinement step for floats (not doubles) except scalar division. Scalar
+  // division estimates are disabled because they break too much real-world
+  // code. These defaults are intended to match GCC behavior.
+  ReciprocalEstimates.set("sqrtf", true, 1);
+  ReciprocalEstimates.set("divf", false, 1);
+  ReciprocalEstimates.set("vec-sqrtf", true, 1);
+  ReciprocalEstimates.set("vec-divf", true, 1);
+
   // For 64-bit, since we have so many registers, use the ILP scheduler.
   // For 32-bit, use the register pressure specific scheduling.
   // For Atom, always use ILP scheduling.
@@ -15206,7 +15216,7 @@
   else
     return SDValue();
 
-  TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
+  TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction());
   if (!Recips.isEnabled(RecipOp))
     return SDValue();
 
@@ -15238,7 +15248,7 @@
   else
     return SDValue();
 
-  TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
+  TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction());
   if (!Recips.isEnabled(RecipOp))
     return SDValue();
 

diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index c92f9d8..d231581 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp

@@ -166,16 +166,6 @@
   if ((TT.isOSWindows() && TT.getArch() == Triple::x86_64) || TT.isPS4())
     this->Options.TrapUnreachable = true;
 
-  // By default (and when -ffast-math is on), enable estimate codegen for
-  // everything except scalar division. By default, use 1 refinement step for
-  // all operations. Defaults may be overridden by using command-line options.
-  // Scalar division estimates are disabled because they break too much
-  // real-world code. These defaults match GCC behavior.
-  this->Options.Reciprocals.setDefaults("sqrtf", true, 1);
-  this->Options.Reciprocals.setDefaults("divf", false, 1);
-  this->Options.Reciprocals.setDefaults("vec-sqrtf", true, 1);
-  this->Options.Reciprocals.setDefaults("vec-divf", true, 1);
-
   initAsmInfo();
 }
commit	bfdbea6481a2247fcc63c27e3acd393000fd8d41	[log] [tgz]
author	Sanjay Patel <spatel@rotateright.com>	Tue Oct 04 20:46:43 2016 +0000
committer	Sanjay Patel <spatel@rotateright.com>	Tue Oct 04 20:46:43 2016 +0000
tree	e7dfce8045101809ecdb13200171bdfab26beda5
parent	0bb72c14240496d74f9263fad6aba830625499bd [diff]