Re-implement the main strength-reduction portion of LoopStrengthReduction.
This new version is much more aggressive about doing "full" reduction in
cases where it reduces register pressure, and also more aggressive about
rewriting induction variables to count down (or up) to zero when doing so
reduces register pressure.

It currently uses fairly simplistic algorithms for finding reuse
opportunities, but it introduces a new framework allows it to combine
multiple strategies at once to form hybrid solutions, instead of doing
all full-reduction or all base+index.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@94061 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll b/test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll
index 7c7a21c..99cb856 100644
--- a/test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll
+++ b/test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll
@@ -1,5 +1,4 @@
-; RUN: opt < %s -loop-reduce -S | grep ugt
-; PR2535
+; RUN: llc -march=x86-64 < %s -o - | grep {cmpl	\\$\[1\], %}
 
 @.str = internal constant [4 x i8] c"%d\0A\00"
 
@@ -16,7 +15,7 @@
         %add166 = or i32 %mul15, 1              ; <i32> [#uses=1] *
         call i32 (i8*, ...)* @printf( i8* noalias  getelementptr ([4 x i8]* @.str, i32 0, i32 0), i32 %add166 ) nounwind
         %inc = add i32 %i.0, 1          ; <i32> [#uses=3]
-        %cmp = icmp ult i32 %inc, 1027          ; <i1> [#uses=1]
+        %cmp = icmp ne i32 %inc, 1027          ; <i1> [#uses=1]
         br i1 %cmp, label %forbody, label %afterfor
 
 afterfor:               ; preds = %forcond
diff --git a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll b/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll
index 36941ad..d9abc8b 100644
--- a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll
+++ b/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll
@@ -1,10 +1,9 @@
-; RUN: llc %s -o - --x86-asm-syntax=att | grep {cmpl	\$4}
+; RUN: llc < %s -o - | grep {testl	%ecx, %ecx}
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin9"
 
-; This is like change-compare-stride-trickiness-1.ll except the comparison
-; happens before the relevant use, so the comparison stride can't be
-; easily changed.
+; The comparison happens before the relevant use, but it can still be rewritten
+; to compare with zero.
 
 define void @foo() nounwind {
 entry:
diff --git a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll b/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll
index 8a3978b..ea8a259 100644
--- a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll
+++ b/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll
@@ -1,10 +1,10 @@
-; RUN: llc %s -o - --x86-asm-syntax=att | grep {cmpq	\$8}
+; RUN: llc %s -o - --x86-asm-syntax=att | grep {cmp.	\$8}
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin9"
 
-; This is like change-compare-stride-trickiness-0.ll except the comparison
-; happens after the relevant use, so the comparison stride can be
-; easily changed.
+; The comparison happens after the relevant use, so the stride can easily
+; be changed. The comparison can be done in a narrower mode than the
+; induction variable.
 
 define void @foo() nounwind {
 entry:
diff --git a/test/Transforms/LoopStrengthReduce/count-to-zero.ll b/test/Transforms/LoopStrengthReduce/count-to-zero.ll
index 8cc3b5c..feb79f8 100644
--- a/test/Transforms/LoopStrengthReduce/count-to-zero.ll
+++ b/test/Transforms/LoopStrengthReduce/count-to-zero.ll
@@ -19,7 +19,7 @@
   %tmp4 = add i32 %c_addr.1, -1                   ; <i32> [#uses=1]
   %c_addr.1.be = select i1 %tmp2, i32 %tmp3, i32 %tmp4 ; <i32> [#uses=1]
   %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=1]
-; CHECK: sub i32 %lsr.iv, 1
+; CHECK: add i32 %lsr.iv, -1
   br label %bb6
 
 bb6:                                              ; preds = %bb3, %entry
diff --git a/test/Transforms/LoopStrengthReduce/icmp_use_postinc.ll b/test/Transforms/LoopStrengthReduce/icmp_use_postinc.ll
deleted file mode 100644
index 4ad5d14..0000000
--- a/test/Transforms/LoopStrengthReduce/icmp_use_postinc.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: opt < %s -loop-reduce -S | FileCheck %s
-
-define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
-entry:
-  br i1 undef, label %bb4.preheader, label %bb.nph8
-
-bb4.preheader:                                    ; preds = %entry
-  br label %bb4
-
-bb1:                                              ; preds = %bb4
-  br i1 undef, label %bb.nph8, label %bb3
-
-bb3:                                              ; preds = %bb1
-  %phitmp = add i32 %indvar, 1                    ; <i32> [#uses=1]
-  br label %bb4
-
-bb4:                                              ; preds = %bb3, %bb4.preheader
-; CHECK: %lsr.iv = phi
-; CHECK: %lsr.iv.next = add i32 %lsr.iv, 1
-; CHECK: %0 = icmp slt i32 %lsr.iv.next, %argc
-  %indvar = phi i32 [ 1, %bb4.preheader ], [ %phitmp, %bb3 ] ; <i32> [#uses=2]
-  %0 = icmp slt i32 %indvar, %argc                ; <i1> [#uses=1]
-  br i1 %0, label %bb1, label %bb.nph8
-
-bb.nph8:                                          ; preds = %bb4, %bb1, %entry
-  unreachable
-}