Rework loop predication pass
We've found a serious issue with the current implementation of loop predication.
The current implementation relies on SCEV and this turned out to be problematic.
To fix the problem we had to rework the pass substantially. We have had the
reworked implementation in our downstream tree for a while. This is the initial
patch of the series of changes to upstream the new implementation.
For now the transformation is limited to the following case:
* The loop has a single latch with either ult or slt icmp condition.
* The step of the IV used in the latch condition is 1.
* The IV of the latch condition is the same as the post increment IV of the guard condition.
* The guard condition is ult.
See the review or the LoopPredication.cpp header for the details about the
problem and the new implementation.
Reviewed By: sanjoy, mkazantsev
Differential Revision: https://reviews.llvm.org/D37569
llvm-svn: 313981
diff --git a/llvm/test/Transforms/LoopPredication/nested.ll b/llvm/test/Transforms/LoopPredication/nested.ll
index 6b40cde..796839f 100644
--- a/llvm/test/Transforms/LoopPredication/nested.ll
+++ b/llvm/test/Transforms/LoopPredication/nested.ll
@@ -10,8 +10,6 @@
br i1 %tmp5, label %exit, label %outer.loop.preheader
outer.loop.preheader:
-; CHECK: outer.loop.preheader:
-; CHECK: [[iteration_count:[^ ]+]] = add i32 %l, -1
br label %outer.loop
outer.loop:
@@ -22,7 +20,10 @@
inner.loop.preheader:
; CHECK: inner.loop.preheader:
-; CHECK: [[wide_cond:[^ ]+]] = icmp slt i32 [[iteration_count]], %length
+; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length
+; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp sle i32 %l, %length
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
+; CHECK-NEXT: br label %inner.loop
br label %inner.loop
inner.loop:
@@ -31,7 +32,7 @@
%inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ]
%j = phi i32 [ %j.next, %inner.loop ], [ 0, %inner.loop.preheader ]
- %within.bounds = icmp slt i32 %j, %length
+ %within.bounds = icmp ult i32 %j, %length
call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
%j.i64 = zext i32 %j to i64
@@ -62,8 +63,10 @@
outer.loop.preheader:
; CHECK: outer.loop.preheader:
-; CHECK: [[iteration_count:[^ ]+]] = add i32 %n, -1
-; CHECK: [[wide_cond:[^ ]+]] = icmp slt i32 [[iteration_count]], %length
+; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length
+; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp sle i32 %n, %length
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
+; CHECK-NEXT: br label %outer.loop
br label %outer.loop
outer.loop:
@@ -82,7 +85,7 @@
%inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ]
%j = phi i32 [ %j.next, %inner.loop ], [ 0, %inner.loop.preheader ]
- %within.bounds = icmp slt i32 %i, %length
+ %within.bounds = icmp ult i32 %i, %length
call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
%i.i64 = zext i32 %i to i64
@@ -112,14 +115,15 @@
br i1 %tmp5, label %exit, label %outer.loop.preheader
outer.loop.preheader:
+; CHECK: outer.loop.preheader:
+; CHECK-NEXT: [[first_iteration_check_outer:[^ ]+]] = icmp ult i32 0, %length
+; CHECK-NEXT: [[limit_check_outer:[^ ]+]] = icmp sle i32 %n, %length
+; CHECK-NEXT: [[wide_cond_outer:[^ ]+]] = and i1 [[first_iteration_check_outer]], [[limit_check_outer]]
+; CHECK-NEXT: br label %outer.loop
br label %outer.loop
outer.loop:
; CHECK: outer.loop:
-; CHECK: [[i_1:[^ ]+]] = add i32 %i, 1
-; CHECK-NEXT: [[l_sgt_i_1:[^ ]+]] = icmp sgt i32 %l, [[i_1]]
-; CHECK-NEXT: [[smax:[^ ]+]] = select i1 [[l_sgt_i_1]], i32 %l, i32 [[i_1]]
-; CHECK-NEXT: [[max_j:[^ ]+]] = add i32 [[smax]], -1
%outer.loop.acc = phi i32 [ %outer.loop.acc.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
%i = phi i32 [ %i.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
%tmp6 = icmp sle i32 %l, 0
@@ -127,16 +131,69 @@
inner.loop.preheader:
; CHECK: inner.loop.preheader:
-; CHECK: [[wide_cond:[^ ]+]] = icmp slt i32 [[max_j]], %length
+; CHECK: [[limit_check_inner:[^ ]+]] = icmp sle i32 %l, %length
+; CHECK: br label %inner.loop
br label %inner.loop
inner.loop:
; CHECK: inner.loop:
+; CHECK: [[wide_cond:[^ ]+]] = and i1 [[limit_check_inner]], [[wide_cond_outer]]
; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
%inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ]
%j = phi i32 [ %j.next, %inner.loop ], [ %i, %inner.loop.preheader ]
- %within.bounds = icmp slt i32 %j, %length
+ %within.bounds = icmp ult i32 %j, %length
+ call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+ %j.i64 = zext i32 %j to i64
+ %array.j.ptr = getelementptr inbounds i32, i32* %array, i64 %j.i64
+ %array.j = load i32, i32* %array.j.ptr, align 4
+ %inner.loop.acc.next = add i32 %inner.loop.acc, %array.j
+
+ %j.next = add nsw i32 %j, 1
+ %inner.continue = icmp slt i32 %j.next, %l
+ br i1 %inner.continue, label %inner.loop, label %outer.loop.inc
+
+outer.loop.inc:
+ %outer.loop.acc.next = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %outer.loop ]
+ %i.next = add nsw i32 %i, 1
+ %outer.continue = icmp slt i32 %i.next, %n
+ br i1 %outer.continue, label %outer.loop, label %exit
+
+exit:
+ %result = phi i32 [ 0, %entry ], [ %outer.loop.acc.next, %outer.loop.inc ]
+ ret i32 %result
+}
+
+define i32 @cant_expand_guard_check_start(i32* %array, i32 %length, i32 %n, i32 %l, i32 %maybezero) {
+; CHECK-LABEL: @cant_expand_guard_check_start
+entry:
+ %tmp5 = icmp sle i32 %n, 0
+ br i1 %tmp5, label %exit, label %outer.loop.preheader
+
+outer.loop.preheader:
+ br label %outer.loop
+
+outer.loop:
+ %outer.loop.acc = phi i32 [ %outer.loop.acc.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
+ %i = phi i32 [ %i.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
+ %tmp6 = icmp sle i32 %l, 0
+ %div = udiv i32 %i, %maybezero
+ br i1 %tmp6, label %outer.loop.inc, label %inner.loop.preheader
+
+inner.loop.preheader:
+; CHECK: inner.loop.preheader:
+; CHECK: br label %inner.loop
+ br label %inner.loop
+
+inner.loop:
+; CHECK: inner.loop:
+; CHECK: %within.bounds = icmp ult i32 %j, %length
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+ %inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ]
+ %j = phi i32 [ %j.next, %inner.loop ], [ %div, %inner.loop.preheader ]
+
+ %within.bounds = icmp ult i32 %j, %length
call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
%j.i64 = zext i32 %j to i64