[LV] For some IVs, use vector phis instead of widening in the loop body Previously, whenever we needed a vector IV, we would create it on the fly, by splatting the scalar IV and adding a step vector. Instead, we can create a real vector IV. This tends to save a couple of instructions per iteration. This only changes the behavior for the most basic case - integer primary IVs with a constant step. Differential Revision: http://reviews.llvm.org/D20315 llvm-svn: 271410

commit: 3a3c64d23e3d7be3baa93882cc2a23098a032849 [log] [tgz]
author: Michael Kuperstein <mkuper@google.com> Wed Jun 01 17:16:46 2016 +0000
committer: Michael Kuperstein <mkuper@google.com> Wed Jun 01 17:16:46 2016 +0000
tree: 9ab0686375b9e8a35d94ba9910aecbbc9bc2749a
parent: 2c3933f4029ee94ea9070d047293ac2fa2b1f6f0 [diff] [blame]
diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index 8e3cf36..c2d4d96 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll

@@ -1,4 +1,6 @@
 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=UNROLL
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
@@ -27,8 +29,6 @@
   ret void
 }
 
-; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND
-
 ; Make sure we remove unneeded vectorization of induction variables.
 ; In order for instcombine to cleanup the vectorized induction variables that we
 ; create in the loop vectorizer we need to perform some form of redundancy
@@ -241,3 +241,64 @@
  exit:
   ret void
 }
+
+; Check that we generate vectorized IVs in the pre-header
+; instead of widening the scalar IV inside the loop, when
+; we know how to do that.
+; IND-LABEL: veciv
+; IND: vector.body:
+; IND: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; IND: %vec.ind = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %step.add, %vector.body ]
+; IND: %step.add = add <2 x i32> %vec.ind, <i32 2, i32 2>
+; IND: %index.next = add i32 %index, 2
+; IND: %[[CMP:.*]] = icmp eq i32 %index.next
+; IND: br i1 %[[CMP]]
+; UNROLL-LABEL: veciv
+; UNROLL: vector.body:
+; UNROLL: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; UNROLL: %vec.ind = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %step.add1, %vector.body ]
+; UNROLL: %step.add = add <2 x i32> %vec.ind, <i32 2, i32 2>
+; UNROLL: %step.add1 = add <2 x i32> %vec.ind, <i32 4, i32 4>
+; UNROLL: %index.next = add i32 %index, 4
+; UNROLL: %[[CMP:.*]] = icmp eq i32 %index.next
+; UNROLL: br i1 %[[CMP]]
+define void @veciv(i32* nocapture %a, i32 %start, i32 %k) {
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i32 %indvars.iv
+  store i32 %indvars.iv, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
+  %exitcond = icmp eq i32 %indvars.iv.next, %k
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
+; IND-LABEL: trunciv
+; IND: vector.body:
+; IND: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; IND: %[[VECIND:.*]] = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %[[STEPADD:.*]], %vector.body ]
+; IND: %[[STEPADD]] = add <2 x i32> %[[VECIND]], <i32 2, i32 2>
+; IND: %index.next = add i64 %index, 2
+; IND: %[[CMP:.*]] = icmp eq i64 %index.next
+; IND: br i1 %[[CMP]]
+define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) {
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %trunc.iv = trunc i64 %indvars.iv to i32
+  %arrayidx = getelementptr inbounds i32, i32* %a, i32 %trunc.iv
+  store i32 %trunc.iv, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %k
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:
+  ret void
+}
commit	3a3c64d23e3d7be3baa93882cc2a23098a032849	[log] [tgz]
author	Michael Kuperstein <mkuper@google.com>	Wed Jun 01 17:16:46 2016 +0000
committer	Michael Kuperstein <mkuper@google.com>	Wed Jun 01 17:16:46 2016 +0000
tree	9ab0686375b9e8a35d94ba9910aecbbc9bc2749a
parent	2c3933f4029ee94ea9070d047293ac2fa2b1f6f0 [diff] [blame]