[ScopDetect/Info] Allow unconditional hoisting of loads from dereferenceable ptrs

In case LLVM pointers are annotated with !dereferencable attributes/metadata
or LLVM can look at the allocation from which a pointer is derived, we can know
that dereferencing pointers is safe and can be done unconditionally. We use this
information to proof certain pointers as save to hoist and then hoist them
unconditionally.

llvm-svn: 297375
diff --git a/polly/lib/Analysis/ScopDetection.cpp b/polly/lib/Analysis/ScopDetection.cpp
index 909c114..44b1782 100644
--- a/polly/lib/Analysis/ScopDetection.cpp
+++ b/polly/lib/Analysis/ScopDetection.cpp
@@ -53,6 +53,7 @@
 #include "polly/Support/ScopLocation.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/RegionIterator.h"
 #include "llvm/Analysis/ScalarEvolution.h"
@@ -343,6 +344,8 @@
 bool ScopDetection::onlyValidRequiredInvariantLoads(
     InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const {
   Region &CurRegion = Context.CurRegion;
+  const DataLayout &DL =
+      CurRegion.getEntry()->getParent()->getParent()->getDataLayout();
 
   if (!PollyInvariantLoadHoisting && !RequiredILS.empty())
     return false;
@@ -351,10 +354,16 @@
     if (!isHoistableLoad(Load, CurRegion, *LI, *SE, *DT))
       return false;
 
-    for (auto NonAffineRegion : Context.NonAffineSubRegionSet)
+    for (auto NonAffineRegion : Context.NonAffineSubRegionSet) {
+
+      if (isSafeToLoadUnconditionally(Load->getPointerOperand(),
+                                      Load->getAlignment(), DL))
+        continue;
+
       if (NonAffineRegion->contains(Load) &&
           Load->getParent() != NonAffineRegion->getEntry())
         return false;
+    }
   }
 
   Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end());
diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp
index 0a8b4bb..574428c 100644
--- a/polly/lib/Analysis/ScopInfo.cpp
+++ b/polly/lib/Analysis/ScopInfo.cpp
@@ -3719,6 +3719,11 @@
   if (hasNonHoistableBasePtrInScop(Access, Writes))
     return nullptr;
 
+  auto &DL = getFunction().getParent()->getDataLayout();
+  if (isSafeToLoadUnconditionally(LI->getPointerOperand(), LI->getAlignment(),
+                                  DL))
+    return isl_set_empty(getParamSpace());
+
   // Skip accesses in non-affine subregions as they might not be executed
   // under the same condition as the entry of the non-affine subregion.
   if (BB != LI->getParent())
diff --git a/polly/test/Isl/CodeGen/reduction_2.ll b/polly/test/Isl/CodeGen/reduction_2.ll
index 3ee742a..886d7a7 100644
--- a/polly/test/Isl/CodeGen/reduction_2.ll
+++ b/polly/test/Isl/CodeGen/reduction_2.ll
@@ -89,15 +89,13 @@
 
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 
-; Negative test. At the moment we will optimistically assume RED[0] in the conditional after the
-; loop might be invariant and expand the SCoP from the loop to include the conditional. However,
-; during SCoP generation we will realize that RED[0] is in fact not invariant and bail.
+; At some point this was a negative test, where we optimistically assumed RED[0]
+; in the conditional after the loop is invariant and expanded the SCoP from
+; the loop to include the conditional. However, during SCoP generation we
+; realized that RED[0] is in fact not invariant and bailed.
 ;
-; Possible solutions could be:
-;   - Do not optimistically assume it to be invariant (as before this commit), however we would loose
-;     a lot of invariant cases due to possible aliasing.
-;   - Reduce the size of the SCoP if an assumed invariant access is in fact not invariant instead of
-;     rejecting the whole region.
+; Today, LLVM can derive that the load is indeed invariant and Polly uses this
+; information to unconditionally invariant load hoist RED[0].
 ;
-; CHECK-NOT: for (int c0 = 0; c0 <= 1018; c0 += 1)
-; CHECK-NOT:   Stmt_for_body(c0);
+; CHECK: for (int c0 = 0; c0 <= 1018; c0 += 1)
+; CHECK-NEXT:   Stmt_for_body(c0);
diff --git a/polly/test/ScopInfo/invariant_load_dereferenceable.ll b/polly/test/ScopInfo/invariant_load_dereferenceable.ll
new file mode 100644
index 0000000..420bbfe
--- /dev/null
+++ b/polly/test/ScopInfo/invariant_load_dereferenceable.ll
@@ -0,0 +1,112 @@
+; RUN: opt %loadPolly -polly-detect -polly-scops \
+; RUN: -polly-invariant-load-hoisting=true \
+; RUN: -analyze < %s | FileCheck %s
+
+; CHECK-NOT: Function: foo_undereferanceable
+
+; CHECK:       Function: foo_dereferanceable
+
+; CHECK:       Invariant Accesses: {
+; CHECK-NEXT:               ReadAccess :=	[Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:                   [sizeA] -> { Stmt_for_body_j__TO__for_latch_j[i0, i1] -> MemRef_sizeA_ptr[0] };
+; CHECK-NEXT:               Execution Context: [sizeA] -> {  :  }
+; CHECK-NEXT:       }
+
+; CHECK:            MayWriteAccess :=	[Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:               [sizeA] -> { Stmt_for_body_j__TO__for_latch_j[i0, i1] -> MemRef_A[i1, i0] };
+
+; CHECK-NOT: Function: foo_undereferanceable
+
+define void @foo_dereferanceable(double* %A, double* %B, i64* dereferenceable(8) %sizeA_ptr,
+		i32 %lb.i, i32 %lb.j, i32 %ub.i, i32 %ub.j) {
+entry:
+	br label %for.i
+
+for.i:
+	%indvar.i = phi i32 [0, %entry], [%indvar.next.i, %for.latch.i]
+	%indvar.next.i = add i32 %indvar.i, 1
+	%cmp.i = icmp sle i32 %indvar.i, 1024
+	br i1 %cmp.i, label %for.body.i, label %exit
+
+for.body.i:
+	br label %for.j
+
+for.j:
+	%indvar.j = phi i32 [0, %for.body.i], [%indvar.next.j, %for.latch.j]
+	%indvar.next.j = add i32 %indvar.j, 1
+	%cmp.j = icmp sle i32 %indvar.j, 1024
+	br i1 %cmp.j, label %for.body.j, label %for.latch.i
+
+for.body.j:
+	%prod = mul i32 %indvar.j, %indvar.j
+	%cmp = icmp sle i32 %prod, 1024
+	br i1 %cmp, label %stmt, label %for.latch.j
+
+stmt:
+	%sext.i = sext i32 %indvar.i to i64
+	%sext.j = sext i32 %indvar.j to i64
+
+	%sizeA = load i64, i64* %sizeA_ptr
+	%prodA = mul i64 %sext.j, %sizeA
+	%offsetA = add i64 %sext.i, %prodA
+	%ptrA = getelementptr double, double* %A, i64 %offsetA
+	store double 42.0, double* %ptrA
+
+	br label %for.latch.j
+
+for.latch.j:
+	br label %for.j
+
+for.latch.i:
+	br label %for.i
+
+exit:
+	ret void
+}
+
+define void @foo_undereferanceable(double* %A, double* %B, i64* %sizeA_ptr) {
+entry:
+	br label %for.i
+
+for.i:
+	%indvar.i = phi i32 [0, %entry], [%indvar.next.i, %for.latch.i]
+	%indvar.next.i = add i32 %indvar.i, 1
+	%cmp.i = icmp sle i32 %indvar.i, 1024
+	br i1 %cmp.i, label %for.body.i, label %exit
+
+for.body.i:
+	br label %for.j
+
+for.j:
+	%indvar.j = phi i32 [0, %for.body.i], [%indvar.next.j, %for.latch.j]
+	%indvar.next.j = add i32 %indvar.j, 1
+	%cmp.j = icmp sle i32 %indvar.j, 1024
+	br i1 %cmp.j, label %for.body.j, label %for.latch.i
+
+for.body.j:
+	%prod = mul i32 %indvar.j, %indvar.j
+	%cmp = icmp sle i32 %prod, 1024
+	br i1 %cmp, label %stmt, label %for.latch.j
+
+stmt:
+	%sext.i = sext i32 %indvar.i to i64
+	%sext.j = sext i32 %indvar.j to i64
+
+	%sizeA = load i64, i64* %sizeA_ptr
+	%prodA = mul i64 %sext.j, %sizeA
+	%offsetA = add i64 %sext.i, %prodA
+	%ptrA = getelementptr double, double* %A, i64 %offsetA
+	store double 42.0, double* %ptrA
+
+	br label %for.latch.j
+
+for.latch.j:
+	br label %for.j
+
+for.latch.i:
+	br label %for.i
+
+exit:
+	ret void
+}
+