[Target] move reciprocal estimate settings from TargetOptions to TargetLowering

The motivation for the change is that we can't have pseudo-global settings for
codegen living in TargetOptions because that doesn't work with LTO.

Ideally, these reciprocal attributes will be moved to the instruction-level via
FMF, metadata, or something else. But making them function attributes is at least
an improvement over the current state.

The ingredients of this patch are:

    Remove the reciprocal estimate command-line debug option.
    Add TargetRecip to TargetLowering.
    Remove TargetRecip from TargetOptions.
    Clean up the TargetRecip implementation to work with this new scheme.
    Set the default reciprocal settings in TargetLoweringBase (everything is off).
    Update the PowerPC defaults, users, and tests.
    Update the x86 defaults, users, and tests.

Note that if this patch needs to be reverted, the related clang patch checked in
at r283251 should be reverted too.

Differential Revision: https://reviews.llvm.org/D24816

llvm-svn: 283252
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index be66f7e..74ed7bc 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -838,6 +838,7 @@
   InitLibcallNames(LibcallRoutineNames, TM.getTargetTriple());
   InitCmpLibcallCCs(CmpLibcallCCs);
   InitLibcallCallingConvs(LibcallCallingConvs);
+  ReciprocalEstimates.set("all", false, 0);
 }
 
 void TargetLoweringBase::initActions() {
@@ -1485,6 +1486,22 @@
   return MVT::i32; // return the default value
 }
 
+TargetRecip
+TargetLoweringBase::getTargetRecipForFunc(MachineFunction &MF) const {
+  const Function *F = MF.getFunction();
+  StringRef RecipAttrName = "reciprocal-estimates";
+  if (!F->hasFnAttribute(RecipAttrName))
+    return ReciprocalEstimates;
+
+  // Make a copy of the target's default reciprocal codegen settings.
+  TargetRecip Recips = ReciprocalEstimates;
+
+  // Override any settings that are customized for this function.
+  StringRef RecipString = F->getFnAttribute(RecipAttrName).getValueAsString();
+  Recips.set(RecipString);
+  return Recips;
+}
+
 /// getVectorTypeBreakdown - Vector types are broken down into some number of
 /// legal first class types.  For example, MVT::v8f32 maps to 2 MVT::v4f32
 /// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index b1d2fe1..05e823d 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -901,6 +901,23 @@
     setTargetDAGCombine(ISD::FSQRT);
   }
 
+  // For the estimates, convergence is quadratic, so we essentially double the
+  // number of digits correct after every iteration. For both FRE and FRSQRTE,
+  // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
+  // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
+  unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3,
+           RefinementSteps64 = RefinementSteps + 1;
+
+  ReciprocalEstimates.set("sqrtf", true, RefinementSteps);
+  ReciprocalEstimates.set("vec-sqrtf", true, RefinementSteps);
+  ReciprocalEstimates.set("divf", true, RefinementSteps);
+  ReciprocalEstimates.set("vec-divf", true, RefinementSteps);
+
+  ReciprocalEstimates.set("sqrtd", true, RefinementSteps64);
+  ReciprocalEstimates.set("vec-sqrtd", true, RefinementSteps64);
+  ReciprocalEstimates.set("divd", true, RefinementSteps64);
+  ReciprocalEstimates.set("vec-divd", true, RefinementSteps64);
+
   // Darwin long double math library functions have $LDBL128 appended.
   if (Subtarget.isDarwin()) {
     setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
@@ -9646,7 +9663,7 @@
       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
-    TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
+    TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction());
     std::string RecipOp = getRecipOp("sqrt", VT);
     if (!Recips.isEnabled(RecipOp))
       return SDValue();
@@ -9668,7 +9685,7 @@
       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
-    TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
+    TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction());
     std::string RecipOp = getRecipOp("div", VT);
     if (!Recips.isEnabled(RecipOp))
       return SDValue();
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 1bb6b67..9b78739 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -204,23 +204,6 @@
       TargetABI(computeTargetABI(TT, Options)),
       Subtarget(TargetTriple, CPU, computeFSAdditions(FS, OL, TT), *this) {
 
-  // For the estimates, convergence is quadratic, so we essentially double the
-  // number of digits correct after every iteration. For both FRE and FRSQRTE,
-  // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
-  // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
-  unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3,
-           RefinementSteps64 = RefinementSteps + 1;
-
-  this->Options.Reciprocals.setDefaults("sqrtf", true, RefinementSteps);
-  this->Options.Reciprocals.setDefaults("vec-sqrtf", true, RefinementSteps);
-  this->Options.Reciprocals.setDefaults("divf", true, RefinementSteps);
-  this->Options.Reciprocals.setDefaults("vec-divf", true, RefinementSteps);
-
-  this->Options.Reciprocals.setDefaults("sqrtd", true, RefinementSteps64);
-  this->Options.Reciprocals.setDefaults("vec-sqrtd", true, RefinementSteps64);
-  this->Options.Reciprocals.setDefaults("divd", true, RefinementSteps64);
-  this->Options.Reciprocals.setDefaults("vec-divd", true, RefinementSteps64);
-
   initAsmInfo();
 }
 
diff --git a/llvm/lib/Target/TargetRecip.cpp b/llvm/lib/Target/TargetRecip.cpp
index 183fa50..938ed9f 100644
--- a/llvm/lib/Target/TargetRecip.cpp
+++ b/llvm/lib/Target/TargetRecip.cpp
@@ -16,7 +16,9 @@
 
 #include "llvm/Target/TargetRecip.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
@@ -36,9 +38,7 @@
   "vec-sqrtf",
 };
 
-// The uninitialized state is needed for the enabled settings and refinement
-// steps because custom settings may arrive via the command-line before target
-// defaults are set.
+/// All operations are disabled by default and refinement steps are set to zero.
 TargetRecip::TargetRecip() {
   unsigned NumStrings = llvm::array_lengthof(RecipOps);
   for (unsigned i = 0; i < NumStrings; ++i)
@@ -137,18 +137,8 @@
         assert(Iter == RecipMap.end() && "Float entry missing from map");
         report_fatal_error("Invalid option for -recip.");
       }
-      
-      // The option was specified without a float or double suffix.
-      if (RecipMap[Val.str() + 'd'].Enabled != Uninitialized) {
-        // Make sure that the double entry was not already specified.
-        // The float entry will be checked below.
-        report_fatal_error("Duplicate option for -recip.");
-      }
     }
     
-    if (Iter->second.Enabled != Uninitialized)
-      report_fatal_error("Duplicate option for -recip.");
-    
     // Mark the matched option as found. Do not allow duplicate specifiers.
     Iter->second.Enabled = !IsDisabled;
     if (!RefStepString.empty())
@@ -164,50 +154,45 @@
   }
 }
 
-TargetRecip::TargetRecip(const std::vector<std::string> &Args) :
-  TargetRecip() {
-  unsigned NumArgs = Args.size();
+void TargetRecip::set(StringRef &RecipString) {
+  SmallVector<StringRef, 4> RecipStringVector;
+  SplitString(RecipString, RecipStringVector, ",");
+  std::vector<std::string> RecipVector;
+  for (unsigned i = 0; i < RecipStringVector.size(); ++i)
+    RecipVector.push_back(RecipStringVector[i].str());
+
+  unsigned NumArgs = RecipVector.size();
 
   // Check if "all", "default", or "none" was specified.
-  if (NumArgs == 1 && parseGlobalParams(Args[0]))
+  if (NumArgs == 1 && parseGlobalParams(RecipVector[0]))
     return;
- 
-  parseIndividualParams(Args);
+
+  parseIndividualParams(RecipVector);
 }
 
 bool TargetRecip::isEnabled(StringRef Key) const {
   ConstRecipIter Iter = RecipMap.find(Key);
   assert(Iter != RecipMap.end() && "Unknown name for reciprocal map");
-  assert(Iter->second.Enabled != Uninitialized &&
-         "Enablement setting was not initialized");
   return Iter->second.Enabled;
 }
 
 unsigned TargetRecip::getRefinementSteps(StringRef Key) const {
   ConstRecipIter Iter = RecipMap.find(Key);
   assert(Iter != RecipMap.end() && "Unknown name for reciprocal map");
-  assert(Iter->second.RefinementSteps != Uninitialized &&
-         "Refinement step setting was not initialized");
   return Iter->second.RefinementSteps;
 }
 
-/// Custom settings (previously initialized values) override target defaults.
-void TargetRecip::setDefaults(StringRef Key, bool Enable,
-                              unsigned RefSteps) {
+void TargetRecip::set(StringRef Key, bool Enable, unsigned RefSteps) {
   if (Key == "all") {
     for (auto &KV : RecipMap) {
       RecipParams &RP = KV.second;
-      if (RP.Enabled == Uninitialized)
-        RP.Enabled = Enable;
-      if (RP.RefinementSteps == Uninitialized)
-        RP.RefinementSteps = RefSteps;
+      RP.Enabled = Enable;
+      RP.RefinementSteps = RefSteps;
     }
   } else {
     RecipParams &RP = RecipMap[Key];
-    if (RP.Enabled == Uninitialized)
-      RP.Enabled = Enable;
-    if (RP.RefinementSteps == Uninitialized)
-      RP.RefinementSteps = RefSteps;
+    RP.Enabled = Enable;
+    RP.RefinementSteps = RefSteps;
   }
 }
 
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index dbe38e6..6fbd9dc 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -53,6 +53,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRecip.h"
 #include "X86IntrinsicsInfo.h"
 #include <bitset>
 #include <numeric>
@@ -84,6 +85,15 @@
   // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
 
+  // By default (and when -ffast-math is on), enable estimate codegen with 1
+  // refinement step for floats (not doubles) except scalar division. Scalar
+  // division estimates are disabled because they break too much real-world
+  // code. These defaults are intended to match GCC behavior.
+  ReciprocalEstimates.set("sqrtf", true, 1);
+  ReciprocalEstimates.set("divf", false, 1);
+  ReciprocalEstimates.set("vec-sqrtf", true, 1);
+  ReciprocalEstimates.set("vec-divf", true, 1);
+
   // For 64-bit, since we have so many registers, use the ILP scheduler.
   // For 32-bit, use the register pressure specific scheduling.
   // For Atom, always use ILP scheduling.
@@ -15206,7 +15216,7 @@
   else
     return SDValue();
 
-  TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
+  TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction());
   if (!Recips.isEnabled(RecipOp))
     return SDValue();
 
@@ -15238,7 +15248,7 @@
   else
     return SDValue();
 
-  TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
+  TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction());
   if (!Recips.isEnabled(RecipOp))
     return SDValue();
 
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index c92f9d8..d231581 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -166,16 +166,6 @@
   if ((TT.isOSWindows() && TT.getArch() == Triple::x86_64) || TT.isPS4())
     this->Options.TrapUnreachable = true;
 
-  // By default (and when -ffast-math is on), enable estimate codegen for
-  // everything except scalar division. By default, use 1 refinement step for
-  // all operations. Defaults may be overridden by using command-line options.
-  // Scalar division estimates are disabled because they break too much
-  // real-world code. These defaults match GCC behavior.
-  this->Options.Reciprocals.setDefaults("sqrtf", true, 1);
-  this->Options.Reciprocals.setDefaults("divf", false, 1);
-  this->Options.Reciprocals.setDefaults("vec-sqrtf", true, 1);
-  this->Options.Reciprocals.setDefaults("vec-divf", true, 1);
-
   initAsmInfo();
 }