Allow invariant loads in the SCoP description

  This patch allows invariant loads to be used in the SCoP description,
  e.g., as loop bounds, conditions or in memory access functions.

  First we collect "required invariant loads" during SCoP detection that
  would otherwise make an expression we care about non-affine. To this
  end a new level of abstraction was introduced before
  SCEVValidator::isAffineExpr() namely ScopDetection::isAffine() and
  ScopDetection::onlyValidRequiredInvariantLoads(). Here we can decide
  if we want a load inside the region to be optimistically assumed
  invariant or not. If we do, it will be marked as required and in the
  SCoP generation we bail if it is actually not invariant. If we don't
  it will be a non-affine expression as before. At the moment we
  optimistically assume all "hoistable" (namely non-loop-carried) loads
  to be invariant. This causes us to expand some SCoPs and dismiss them
  later but it also allows us to detect a lot we would dismiss directly
  if we would ask e.g., AliasAnalysis::canBasicBlockModify(). We also
  allow potential aliases between optimistically assumed invariant loads
  and other pointers as our runtime alias checks are sound in case the
  loads are actually invariant. Together with the invariant checks this
  combination allows to handle a lot more than LICM can.

  The code generation of the invariant loads had to be extended as we
  can now have dependences between parameters and invariant (hoisted)
  loads as well as the other way around, e.g.,
    test/Isl/CodeGen/invariant_load_parameters_cyclic_dependence.ll
  First, it is important to note that we cannot have real cycles but
  only dependences from a hoisted load to a parameter and from another
  parameter to that hoisted load (and so on). To handle such cases we
  materialize llvm::Values for parameters that are referred by a hoisted
  load on demand and then materialize the remaining parameters. Second,
  there are new kinds of dependences between hoisted loads caused by the
  constraints on their execution. If a hoisted load is conditionally
  executed it might depend on the value of another hoisted load. To deal
  with such situations we sort them already in the ScopInfo such that
  they can be generated in the order they are listed in the
  Scop::InvariantAccesses list (see compareInvariantAccesses). The
  dependences between hoisted loads caused by indirect accesses are
  handled the same way as before.

llvm-svn: 249607
diff --git a/polly/lib/Analysis/ScopDetection.cpp b/polly/lib/Analysis/ScopDetection.cpp
index ec54e4f..fefdf67 100644
--- a/polly/lib/Analysis/ScopDetection.cpp
+++ b/polly/lib/Analysis/ScopDetection.cpp
@@ -51,7 +51,6 @@
 #include "polly/ScopDetection.h"
 #include "polly/ScopDetectionDiagnostic.h"
 #include "polly/Support/SCEVValidator.h"
-#include "polly/Support/ScopHelper.h"
 #include "polly/Support/ScopLocation.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -258,9 +257,10 @@
   if (Verify) {
     BoxedLoopsSetTy DummyBoxedLoopsSet;
     NonAffineSubRegionSetTy DummyNonAffineSubRegionSet;
+    InvariantLoadsSetTy DummyILS;
     DetectionContext Context(const_cast<Region &>(R), *AA,
                              DummyNonAffineSubRegionSet, DummyBoxedLoopsSet,
-                             false /*verifying*/);
+                             DummyILS, false /*verifying*/);
     return isValidRegion(Context);
   }
 
@@ -302,15 +302,39 @@
   return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty());
 }
 
+bool ScopDetection::onlyValidRequiredInvariantLoads(
+    InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const {
+  Region &CurRegion = Context.CurRegion;
+
+  for (LoadInst *Load : RequiredILS)
+    if (!isHoistableLoad(Load, CurRegion, *LI, *SE))
+      return false;
+
+  Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end());
+
+  return true;
+}
+
+bool ScopDetection::isAffine(const SCEV *S, DetectionContext &Context,
+                             Value *BaseAddress) const {
+
+  InvariantLoadsSetTy AccessILS;
+  if (!isAffineExpr(&Context.CurRegion, S, *SE, BaseAddress, &AccessILS))
+    return false;
+
+  if (!onlyValidRequiredInvariantLoads(AccessILS, Context))
+    return false;
+
+  return true;
+}
+
 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI,
                                   Value *Condition, bool IsLoopBranch,
                                   DetectionContext &Context) const {
-  Region &CurRegion = Context.CurRegion;
-
   Loop *L = LI->getLoopFor(&BB);
   const SCEV *ConditionSCEV = SE->getSCEVAtScope(Condition, L);
 
-  if (isAffineExpr(&CurRegion, ConditionSCEV, *SE))
+  if (isAffine(ConditionSCEV, Context))
     return true;
 
   if (!IsLoopBranch && AllowNonAffineSubRegions &&
@@ -327,8 +351,6 @@
 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI,
                                   Value *Condition, bool IsLoopBranch,
                                   DetectionContext &Context) const {
-  Region &CurRegion = Context.CurRegion;
-
   // Non constant conditions of branches need to be ICmpInst.
   if (!isa<ICmpInst>(Condition)) {
     if (!IsLoopBranch && AllowNonAffineSubRegions &&
@@ -361,7 +383,7 @@
   const SCEV *LHS = SE->getSCEVAtScope(ICmp->getOperand(0), L);
   const SCEV *RHS = SE->getSCEVAtScope(ICmp->getOperand(1), L);
 
-  if (isAffineExpr(&CurRegion, LHS, *SE) && isAffineExpr(&CurRegion, RHS, *SE))
+  if (isAffine(LHS, Context) && isAffine(RHS, Context))
     return true;
 
   if (!IsLoopBranch && AllowNonAffineSubRegions &&
@@ -452,18 +474,6 @@
     if (!isInvariant(*Operand, Reg))
       return false;
 
-  // When the instruction is a load instruction, check that no write to memory
-  // in the region aliases with the load.
-  if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
-    auto Loc = MemoryLocation::get(LI);
-
-    // Check if any basic block in the region can modify the location pointed to
-    // by 'Loc'.  If so, 'Val' is (likely) not invariant in the region.
-    for (const BasicBlock *BB : Reg.blocks())
-      if (AA->canBasicBlockModify(*BB, Loc))
-        return false;
-  }
-
   return true;
 }
 
@@ -547,7 +557,7 @@
         const Instruction *Insn = Pair.first;
         const SCEV *AF = Pair.second;
 
-        if (!isAffineExpr(&CurRegion, AF, *SE, BaseValue)) {
+        if (!isAffine(AF, Context, BaseValue)) {
           invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn,
                                          BaseValue);
           if (!KeepGoing)
@@ -574,7 +584,7 @@
       MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second;
 
       if (!AF) {
-        if (isAffineExpr(&CurRegion, Pair.second, *SE, BaseValue))
+        if (isAffine(Pair.second, Context, BaseValue))
           Acc->DelinearizedSubscripts.push_back(Pair.second);
         else
           IsNonAffine = true;
@@ -584,7 +594,7 @@
         if (Acc->DelinearizedSubscripts.size() == 0)
           IsNonAffine = true;
         for (const SCEV *S : Acc->DelinearizedSubscripts)
-          if (!isAffineExpr(&CurRegion, S, *SE, BaseValue))
+          if (!isAffine(S, Context, BaseValue))
             IsNonAffine = true;
       }
 
@@ -655,11 +665,11 @@
   if (PollyDelinearize && !isVariantInNonAffineLoop) {
     Context.Accesses[BasePointer].push_back({&Inst, AccessFunction});
 
-    if (!isAffineExpr(&CurRegion, AccessFunction, *SE, BaseValue))
+    if (!isAffine(AccessFunction, Context, BaseValue))
       Context.NonAffineAccesses.insert(BasePointer);
   } else if (!AllowNonAffine) {
     if (isVariantInNonAffineLoop ||
-        !isAffineExpr(&CurRegion, AccessFunction, *SE, BaseValue))
+        !isAffine(AccessFunction, Context, BaseValue))
       return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true,
                                             AccessFunction, &Inst, BaseValue);
   }
@@ -693,9 +703,16 @@
       // the beginning of the SCoP. This breaks if the base pointer is defined
       // inside the scop. Hence, we can only create a run-time check if we are
       // sure the base pointer is not an instruction defined inside the scop.
+      // However, we can ignore loads that will be hoisted.
       for (const auto &Ptr : AS) {
         Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue());
         if (Inst && CurRegion.contains(Inst)) {
+          auto *Load = dyn_cast<LoadInst>(Inst);
+          if (Load && isHoistableLoad(Load, CurRegion, *LI, *SE)) {
+            Context.RequiredILS.insert(Load);
+            continue;
+          }
+
           CanBuildRunTimeCheck = false;
           break;
         }
@@ -815,7 +832,8 @@
   while (ExpandedRegion) {
     DetectionContext Context(
         *ExpandedRegion, *AA, NonAffineSubRegionMap[ExpandedRegion.get()],
-        BoxedLoopsMap[ExpandedRegion.get()], false /* verifying */);
+        BoxedLoopsMap[ExpandedRegion.get()],
+        RequiredInvariantLoadsMap[ExpandedRegion.get()], false /* verifying */);
     DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n");
     // Only expand when we did not collect errors.
 
@@ -877,11 +895,12 @@
   ValidRegions.remove(&R);
   BoxedLoopsMap.erase(&R);
   NonAffineSubRegionMap.erase(&R);
+  RequiredInvariantLoadsMap.erase(&R);
 }
 
 void ScopDetection::findScops(Region &R) {
   DetectionContext Context(R, *AA, NonAffineSubRegionMap[&R], BoxedLoopsMap[&R],
-                           false /*verifying*/);
+                           RequiredInvariantLoadsMap[&R], false /*verifying*/);
 
   bool RegionIsValid = false;
   if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI)) {
@@ -1121,14 +1140,23 @@
   return &BLMIt->second;
 }
 
+const InvariantLoadsSetTy *
+ScopDetection::getRequiredInvariantLoads(const Region *R) const {
+  auto I = RequiredInvariantLoadsMap.find(R);
+  if (I == RequiredInvariantLoadsMap.end())
+    return nullptr;
+  return &I->second;
+}
+
 void polly::ScopDetection::verifyRegion(const Region &R) const {
   assert(isMaxRegionInScop(R) && "Expect R is a valid region.");
 
   BoxedLoopsSetTy DummyBoxedLoopsSet;
   NonAffineSubRegionSetTy DummyNonAffineSubRegionSet;
+  InvariantLoadsSetTy DummyILS;
   DetectionContext Context(const_cast<Region &>(R), *AA,
                            DummyNonAffineSubRegionSet, DummyBoxedLoopsSet,
-                           true /*verifying*/);
+                           DummyILS, true /*verifying*/);
   isValidRegion(Context);
 }
 
@@ -1162,6 +1190,7 @@
   InsnToMemAcc.clear();
   BoxedLoopsMap.clear();
   NonAffineSubRegionMap.clear();
+  RequiredInvariantLoadsMap.clear();
 
   // Do not clear the invalid function set.
 }