Reapply the new LoopStrengthReduction code, with compile time and
bug fixes, and with improved heuristics for analyzing foreign-loop
addrecs.
This change also flattens IVUsers, eliminating the stride-oriented
groupings, which makes it easier to work with.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@95975 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/test/CodeGen/ARM/arm-negative-stride.ll b/test/CodeGen/ARM/arm-negative-stride.ll
index 72ec8ef..52ab871 100644
--- a/test/CodeGen/ARM/arm-negative-stride.ll
+++ b/test/CodeGen/ARM/arm-negative-stride.ll
@@ -1,7 +1,32 @@
; RUN: llc < %s -march=arm | FileCheck %s
+; This loop is rewritten with an indvar which counts down, which
+; frees up a register from holding the trip count.
+
define void @test(i32* %P, i32 %A, i32 %i) nounwind {
entry:
+; CHECK: str r1, [{{r.*}}, +{{r.*}}, lsl #2]
+ icmp eq i32 %i, 0 ; <i1>:0 [#uses=1]
+ br i1 %0, label %return, label %bb
+
+bb: ; preds = %bb, %entry
+ %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
+ %i_addr.09.0 = sub i32 %i, %indvar ; <i32> [#uses=1]
+ %tmp2 = getelementptr i32* %P, i32 %i_addr.09.0 ; <i32*> [#uses=1]
+ store i32 %A, i32* %tmp2
+ %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
+ icmp eq i32 %indvar.next, %i ; <i1>:1 [#uses=1]
+ br i1 %1, label %return, label %bb
+
+return: ; preds = %bb, %entry
+ ret void
+}
+
+; This loop has a non-address use of the count-up indvar, so
+; it'll remain. Now the original store uses a negative-stride address.
+
+define void @test_with_forced_iv(i32* %P, i32 %A, i32 %i) nounwind {
+entry:
; CHECK: str r1, [{{r.*}}, -{{r.*}}, lsl #2]
icmp eq i32 %i, 0 ; <i1>:0 [#uses=1]
br i1 %0, label %return, label %bb
@@ -11,6 +36,7 @@
%i_addr.09.0 = sub i32 %i, %indvar ; <i32> [#uses=1]
%tmp2 = getelementptr i32* %P, i32 %i_addr.09.0 ; <i32*> [#uses=1]
store i32 %A, i32* %tmp2
+ store i32 %indvar, i32* null
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
icmp eq i32 %indvar.next, %i ; <i1>:1 [#uses=1]
br i1 %1, label %return, label %bb
diff --git a/test/CodeGen/ARM/lsr-code-insertion.ll b/test/CodeGen/ARM/lsr-code-insertion.ll
index 507ec2c..1bbb96d 100644
--- a/test/CodeGen/ARM/lsr-code-insertion.ll
+++ b/test/CodeGen/ARM/lsr-code-insertion.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -stats |& grep {40.*Number of machine instrs printed}
-; RUN: llc < %s -stats |& grep {.*Number of re-materialization}
+; RUN: llc < %s -stats |& grep {39.*Number of machine instrs printed}
+; RUN: llc < %s -stats |& not grep {.*Number of re-materialization}
; This test really wants to check that the resultant "cond_true" block only
; has a single store in it, and that cond_true55 only has code to materialize
; the constant and do a store. We do *not* want something like this: