[LoopPredication] Handle the case when the guard and the latch IV have different offsets
This is a follow up change for D37569.
Currently the transformation is limited to the case when:
* The loop has a single latch with the condition of the form: ++i <pred> latchLimit, where <pred> is u<, u<=, s<, or s<=.
* The step of the IV used in the latch condition is 1.
* The IV of the latch condition is the same as the post increment IV of the guard condition.
* The guard condition is of the form i u< guardLimit.
This patch enables the transform in the case when the latch is
latchStart + i <pred> latchLimit, where <pred> is u<, u<=, s<, or s<=.
And the guard is
guardStart + i u< guardLimit
Reviewed By: anna
Differential Revision: https://reviews.llvm.org/D39097
llvm-svn: 316768
diff --git a/llvm/test/Transforms/LoopPredication/basic.ll b/llvm/test/Transforms/LoopPredication/basic.ll
index e6c31d1..5984290 100644
--- a/llvm/test/Transforms/LoopPredication/basic.ll
+++ b/llvm/test/Transforms/LoopPredication/basic.ll
@@ -255,6 +255,157 @@
ret i32 %result
}
+define i32 @signed_loop_0_to_n_preincrement_latch_check(i32* %array, i32 %length, i32 %n) {
+; CHECK-LABEL: @signed_loop_0_to_n_preincrement_latch_check
+entry:
+ %tmp5 = icmp sle i32 %n, 0
+ br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK: [[length_minus_1:[^ ]+]] = add i32 %length, -1
+; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp sle i32 %n, [[length_minus_1]]
+; CHECK-NEXT: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
+; CHECK-NEXT: br label %loop
+ br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
+ %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+ %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+ %within.bounds = icmp ult i32 %i, %length
+ call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+ %i.i64 = zext i32 %i to i64
+ %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+ %array.i = load i32, i32* %array.i.ptr, align 4
+ %loop.acc.next = add i32 %loop.acc, %array.i
+
+ %i.next = add i32 %i, 1
+ %continue = icmp slt i32 %i, %n
+ br i1 %continue, label %loop, label %exit
+
+exit:
+ %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+ ret i32 %result
+}
+
+define i32 @signed_loop_0_to_n_preincrement_latch_check_postincrement_guard_check(i32* %array, i32 %length, i32 %n) {
+; CHECK-LABEL: @signed_loop_0_to_n_preincrement_latch_check_postincrement_guard_check
+entry:
+ %tmp5 = icmp sle i32 %n, 0
+ br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK: [[length_minus_2:[^ ]+]] = add i32 %length, -2
+; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp sle i32 %n, [[length_minus_2]]
+; CHECK-NEXT: [[first_iteration_check:[^ ]+]] = icmp ult i32 1, %length
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
+; CHECK-NEXT: br label %loop
+ br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
+ %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+ %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+
+ %i.next = add i32 %i, 1
+ %within.bounds = icmp ult i32 %i.next, %length
+ call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+ %i.i64 = zext i32 %i to i64
+ %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+ %array.i = load i32, i32* %array.i.ptr, align 4
+ %loop.acc.next = add i32 %loop.acc, %array.i
+
+ %continue = icmp slt i32 %i, %n
+ br i1 %continue, label %loop, label %exit
+
+exit:
+ %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+ ret i32 %result
+}
+
+define i32 @signed_loop_0_to_n_sle_latch_offset_ult_check(i32* %array, i32 %length, i32 %n) {
+; CHECK-LABEL: @signed_loop_0_to_n_sle_latch_offset_ult_check
+entry:
+ %tmp5 = icmp sle i32 %n, 0
+ br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK: [[length_minus_1:[^ ]+]] = add i32 %length, -1
+; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp slt i32 %n, [[length_minus_1]]
+; CHECK-NEXT: [[first_iteration_check:[^ ]+]] = icmp ult i32 1, %length
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
+; CHECK-NEXT: br label %loop
+ br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
+ %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+ %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+ %i.offset = add i32 %i, 1
+ %within.bounds = icmp ult i32 %i.offset, %length
+ call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+ %i.i64 = zext i32 %i to i64
+ %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+ %array.i = load i32, i32* %array.i.ptr, align 4
+ %loop.acc.next = add i32 %loop.acc, %array.i
+
+ %i.next = add i32 %i, 1
+ %continue = icmp sle i32 %i.next, %n
+ br i1 %continue, label %loop, label %exit
+
+exit:
+ %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+ ret i32 %result
+}
+
+define i32 @signed_loop_0_to_n_offset_sle_latch_offset_ult_check(i32* %array, i32 %length, i32 %n) {
+; CHECK-LABEL: @signed_loop_0_to_n_offset_sle_latch_offset_ult_check
+entry:
+ %tmp5 = icmp sle i32 %n, 0
+ br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK: [[limit_check:[^ ]+]] = icmp slt i32 %n, %length
+; CHECK-NEXT: [[first_iteration_check:[^ ]+]] = icmp ult i32 1, %length
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
+; CHECK-NEXT: br label %loop
+ br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
+ %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+ %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+ %i.offset = add i32 %i, 1
+ %within.bounds = icmp ult i32 %i.offset, %length
+ call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+ %i.i64 = zext i32 %i to i64
+ %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+ %array.i = load i32, i32* %array.i.ptr, align 4
+ %loop.acc.next = add i32 %loop.acc, %array.i
+
+ %i.next = add i32 %i, 1
+ %i.next.offset = add i32 %i.next, 1
+ %continue = icmp sle i32 %i.next.offset, %n
+ br i1 %continue, label %loop, label %exit
+
+exit:
+ %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+ ret i32 %result
+}
+
define i32 @unsupported_latch_pred_loop_0_to_n(i32* %array, i32 %length, i32 %n) {
; CHECK-LABEL: @unsupported_latch_pred_loop_0_to_n
entry:
@@ -362,8 +513,88 @@
ret i32 %result
}
-define i32 @signed_loop_0_to_n_unrelated_iv_range_check(i32* %array, i32 %start, i32 %length, i32 %n) {
-; CHECK-LABEL: @signed_loop_0_to_n_unrelated_iv_range_check
+define i32 @signed_loop_start_to_n_offset_iv_range_check(i32* %array, i32 %start.i,
+ i32 %start.j, i32 %length,
+ i32 %n) {
+; CHECK-LABEL: @signed_loop_start_to_n_offset_iv_range_check
+entry:
+ %tmp5 = icmp sle i32 %n, 0
+ br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK: [[length_plus_start_i:[^ ]+]] = add i32 %length, %start.i
+; CHECK-NEXT: [[limit:[^ ]+]] = sub i32 [[length_plus_start_i]], %start.j
+; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp sle i32 %n, [[limit]]
+; CHECK-NEXT: [[first_iteration_check:[^ ]+]] = icmp ult i32 %start.j, %length
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
+; CHECK-NEXT: br label %loop
+ br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
+ %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+ %i = phi i32 [ %i.next, %loop ], [ %start.i, %loop.preheader ]
+ %j = phi i32 [ %j.next, %loop ], [ %start.j, %loop.preheader ]
+
+ %within.bounds = icmp ult i32 %j, %length
+ call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+ %i.i64 = zext i32 %i to i64
+ %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+ %array.i = load i32, i32* %array.i.ptr, align 4
+ %loop.acc.next = add i32 %loop.acc, %array.i
+
+ %j.next = add i32 %j, 1
+ %i.next = add i32 %i, 1
+ %continue = icmp slt i32 %i.next, %n
+ br i1 %continue, label %loop, label %exit
+
+exit:
+ %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+ ret i32 %result
+}
+
+define i32 @signed_loop_0_to_n_different_iv_types(i32* %array, i16 %length, i32 %n) {
+; CHECK-LABEL: @signed_loop_0_to_n_different_iv_types
+entry:
+ %tmp5 = icmp sle i32 %n, 0
+ br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK-NEXT: br label %loop
+ br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: %within.bounds = icmp ult i16 %j, %length
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+ %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+ %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+ %j = phi i16 [ %j.next, %loop ], [ 0, %loop.preheader ]
+
+ %within.bounds = icmp ult i16 %j, %length
+ call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+ %i.i64 = zext i32 %i to i64
+ %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+ %array.i = load i32, i32* %array.i.ptr, align 4
+ %loop.acc.next = add i32 %loop.acc, %array.i
+
+ %j.next = add i16 %j, 1
+ %i.next = add i32 %i, 1
+ %continue = icmp slt i32 %i.next, %n
+ br i1 %continue, label %loop, label %exit
+
+exit:
+ %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+ ret i32 %result
+}
+
+define i32 @signed_loop_0_to_n_different_iv_strides(i32* %array, i32 %length, i32 %n) {
+; CHECK-LABEL: @signed_loop_0_to_n_different_iv_strides
entry:
%tmp5 = icmp sle i32 %n, 0
br i1 %tmp5, label %exit, label %loop.preheader
@@ -379,7 +610,7 @@
; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
%loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
%i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
- %j = phi i32 [ %j.next, %loop ], [ %start, %loop.preheader ]
+ %j = phi i32 [ %j.next, %loop ], [ 0, %loop.preheader ]
%within.bounds = icmp ult i32 %j, %length
call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
@@ -389,7 +620,7 @@
%array.i = load i32, i32* %array.i.ptr, align 4
%loop.acc.next = add i32 %loop.acc, %array.i
- %j.next = add nsw i32 %j, 1
+ %j.next = add nsw i32 %j, 2
%i.next = add nsw i32 %i, 1
%continue = icmp slt i32 %i.next, %n
br i1 %continue, label %loop, label %exit