Identify and hoist definitively invariant loads
As a first step in the direction of assumed invariant loads (loads
that are not written in some context) we now detect and hoist
definitively invariant loads. These invariant loads will be preloaded
in the code generation and used in the optimized version of the SCoP.
If the load is only conditionally executed the preloaded version will
also only be executed under the same condition, hence we will never
access memory that wouldn't have been accessed otherwise. This is also
the most distinguishing feature to licm.
As hoisting can make statements empty we will simplify the SCoP and
remove empty statements that would otherwise cause artifacts in the
code generation.
Differential Revision: http://reviews.llvm.org/D13194
llvm-svn: 248861
diff --git a/polly/test/Isl/CodeGen/exprModDiv.ll b/polly/test/Isl/CodeGen/exprModDiv.ll
index 42d555d..ce59ad8 100644
--- a/polly/test/Isl/CodeGen/exprModDiv.ll
+++ b/polly/test/Isl/CodeGen/exprModDiv.ll
@@ -6,7 +6,7 @@
;
; void exprModDiv(float *A, float *B, float *C, long N, long p) {
; for (long i = 0; i < N; i++)
-; C[i] += A[i] + B[i] + A[p] + B[p];
+; C[i] += A[i] + B[i] + A[i] + B[i + p];
; }
;
;
@@ -32,21 +32,21 @@
; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d
; A[p + 127 * floord(-p - 1, 127) + 127]
-; CHECK: %20 = sub nsw i64 0, %p
-; CHECK: %21 = sub nsw i64 %20, 1
-; CHECK: %pexp.fdiv_q.0 = sub i64 %21, 127
+; CHECK: %17 = sub nsw i64 0, %p
+; CHECK: %18 = sub nsw i64 %17, 1
+; CHECK: %pexp.fdiv_q.0 = sub i64 %18, 127
; CHECK: %pexp.fdiv_q.1 = add i64 %pexp.fdiv_q.0, 1
-; CHECK: %pexp.fdiv_q.2 = icmp slt i64 %21, 0
-; CHECK: %pexp.fdiv_q.3 = select i1 %pexp.fdiv_q.2, i64 %pexp.fdiv_q.1, i64 %21
+; CHECK: %pexp.fdiv_q.2 = icmp slt i64 %18, 0
+; CHECK: %pexp.fdiv_q.3 = select i1 %pexp.fdiv_q.2, i64 %pexp.fdiv_q.1, i64 %18
; CHECK: %pexp.fdiv_q.4 = sdiv i64 %pexp.fdiv_q.3, 127
-; CHECK: %22 = mul nsw i64 127, %pexp.fdiv_q.4
-; CHECK: %23 = add nsw i64 %p, %22
-; CHECK: %24 = add nsw i64 %23, 127
-; CHECK: %polly.access.A10 = getelementptr float, float* %A, i64 %24
+; CHECK: %19 = mul nsw i64 127, %pexp.fdiv_q.4
+; CHECK: %20 = add nsw i64 %p, %19
+; CHECK: %21 = add nsw i64 %20, 127
+; CHECK: %polly.access.A10 = getelementptr float, float* %A, i64 %21
; A[p / 127]
; CHECK: %pexp.div = sdiv exact i64 %p, 127
-; CHECK: %polly.access.B12 = getelementptr float, float* %B, i64 %pexp.div
+; CHECK: %polly.access.B13 = getelementptr float, float* %B, i64 %pexp.div
; A[i % 128]
; POW2: %pexp.pdiv_r = urem i64 %polly.indvar, 128
@@ -58,17 +58,17 @@
; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d
; A[p + 128 * floord(-p - 1, 128) + 128]
-; POW2: %20 = sub nsw i64 0, %p
-; POW2: %21 = sub nsw i64 %20, 1
-; POW2: %polly.fdiv_q.shr = ashr i64 %21, 7
-; POW2: %22 = mul nsw i64 128, %polly.fdiv_q.shr
-; POW2: %23 = add nsw i64 %p, %22
-; POW2: %24 = add nsw i64 %23, 128
-; POW2: %polly.access.A10 = getelementptr float, float* %A, i64 %24
+; POW2: %17 = sub nsw i64 0, %p
+; POW2: %18 = sub nsw i64 %17, 1
+; POW2: %polly.fdiv_q.shr = ashr i64 %18, 7
+; POW2: %19 = mul nsw i64 128, %polly.fdiv_q.shr
+; POW2: %20 = add nsw i64 %p, %19
+; POW2: %21 = add nsw i64 %20, 128
+; POW2: %polly.access.A10 = getelementptr float, float* %A, i64 %21
; A[p / 128]
; POW2: %pexp.div = sdiv exact i64 %p, 128
-; POW2: %polly.access.B12 = getelementptr float, float* %B, i64 %pexp.div
+; POW2: %polly.access.B13 = getelementptr float, float* %B, i64 %pexp.div
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -87,10 +87,11 @@
%arrayidx1 = getelementptr inbounds float, float* %B, i64 %i.0
%tmp1 = load float, float* %arrayidx1, align 4
%add = fadd float %tmp, %tmp1
- %arrayidx2 = getelementptr inbounds float, float* %A, i64 %p
+ %arrayidx2 = getelementptr inbounds float, float* %A, i64 %i.0
%tmp2 = load float, float* %arrayidx2, align 4
%add3 = fadd float %add, %tmp2
- %arrayidx4 = getelementptr inbounds float, float* %B, i64 %p
+ %padd = add nsw i64 %p, %i.0
+ %arrayidx4 = getelementptr inbounds float, float* %B, i64 %padd
%tmp3 = load float, float* %arrayidx4, align 4
%add5 = fadd float %add3, %tmp3
%arrayidx6 = getelementptr inbounds float, float* %C, i64 %i.0