[LV] Avoid unnecessary IV scalar-to-vector-to-scalar conversions This patch prevents increases in the number of instructions, pre-instcombine, due to induction variable scalarization. An increase in instructions can lead to an increase in the compile-time required to simplify the induction variables. We now maintain a new map for scalarized induction variables to prevent us from converting between the scalar and vector forms. This patch should resolve compile-time regressions seen after r274627. llvm-svn: 275419

commit: 3c3b4a257b2fd34e572f9df28404619734251b76 [log] [tgz]
author: Matthew Simpson <mssimpso@codeaurora.org> Thu Jul 14 14:36:06 2016 +0000
committer: Matthew Simpson <mssimpso@codeaurora.org> Thu Jul 14 14:36:06 2016 +0000
tree: 162ec31f71590eb99e334167a7ab2d0d4e6e4d66
parent: 4eaedde53031bf4c4342b983b08019c206698cee [diff]
diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index beee397..c1f0bd9 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll

@@ -1,6 +1,7 @@
 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S | FileCheck %s
 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND
 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=UNROLL
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=2 -S | FileCheck %s --check-prefix=UNROLL-NO-IC
 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -enable-interleaved-mem-accesses -instcombine -S | FileCheck %s --check-prefix=INTERLEAVE
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -73,6 +74,26 @@
 ; for (int i = 0; i < n; ++i)
 ;   sum += a[i];
 ;
+; CHECK-LABEL: @scalarize_induction_variable_01(
+; CHECK: vector.body:
+; CHECK:   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK:   %[[i0:.+]] = add i64 %index, 0
+; CHECK:   %[[i1:.+]] = add i64 %index, 1
+; CHECK:   getelementptr inbounds i64, i64* %a, i64 %[[i0]]
+; CHECK:   getelementptr inbounds i64, i64* %a, i64 %[[i1]]
+;
+; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_01(
+; UNROLL-NO-IC: vector.body:
+; UNROLL-NO-IC:   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; UNROLL-NO-IC:   %[[i0:.+]] = add i64 %index, 0
+; UNROLL-NO-IC:   %[[i1:.+]] = add i64 %index, 1
+; UNROLL-NO-IC:   %[[i2:.+]] = add i64 %index, 2
+; UNROLL-NO-IC:   %[[i3:.+]] = add i64 %index, 3
+; UNROLL-NO-IC:   getelementptr inbounds i64, i64* %a, i64 %[[i0]]
+; UNROLL-NO-IC:   getelementptr inbounds i64, i64* %a, i64 %[[i1]]
+; UNROLL-NO-IC:   getelementptr inbounds i64, i64* %a, i64 %[[i2]]
+; UNROLL-NO-IC:   getelementptr inbounds i64, i64* %a, i64 %[[i3]]
+;
 ; IND-LABEL: @scalarize_induction_variable_01(
 ; IND:     vector.body:
 ; IND:       %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
@@ -112,6 +133,34 @@
 ; for (int i ; 0; i < n; i += 8)
 ;   s += (a[i] + b[i] + 1.0f);
 ;
+; CHECK-LABEL: @scalarize_induction_variable_02(
+; CHECK: vector.body:
+; CHECK:   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK:   %offset.idx = shl i64 %index, 3
+; CHECK:   %[[i0:.+]] = add i64 %offset.idx, 0
+; CHECK:   %[[i1:.+]] = add i64 %offset.idx, 8
+; CHECK:   getelementptr inbounds float, float* %a, i64 %[[i0]]
+; CHECK:   getelementptr inbounds float, float* %a, i64 %[[i1]]
+; CHECK:   getelementptr inbounds float, float* %b, i64 %[[i0]]
+; CHECK:   getelementptr inbounds float, float* %b, i64 %[[i1]]
+;
+; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_02(
+; UNROLL-NO-IC: vector.body:
+; UNROLL-NO-IC:   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; UNROLL-NO-IC:   %offset.idx = shl i64 %index, 3
+; UNROLL-NO-IC:   %[[i0:.+]] = add i64 %offset.idx, 0
+; UNROLL-NO-IC:   %[[i1:.+]] = add i64 %offset.idx, 8
+; UNROLL-NO-IC:   %[[i2:.+]] = add i64 %offset.idx, 16
+; UNROLL-NO-IC:   %[[i3:.+]] = add i64 %offset.idx, 24
+; UNROLL-NO-IC:   getelementptr inbounds float, float* %a, i64 %[[i0]]
+; UNROLL-NO-IC:   getelementptr inbounds float, float* %a, i64 %[[i1]]
+; UNROLL-NO-IC:   getelementptr inbounds float, float* %a, i64 %[[i2]]
+; UNROLL-NO-IC:   getelementptr inbounds float, float* %a, i64 %[[i3]]
+; UNROLL-NO-IC:   getelementptr inbounds float, float* %b, i64 %[[i0]]
+; UNROLL-NO-IC:   getelementptr inbounds float, float* %b, i64 %[[i1]]
+; UNROLL-NO-IC:   getelementptr inbounds float, float* %b, i64 %[[i2]]
+; UNROLL-NO-IC:   getelementptr inbounds float, float* %b, i64 %[[i3]]
+;
 ; IND-LABEL: @scalarize_induction_variable_02(
 ; IND: vector.body:
 ; IND:   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]

diff --git a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll
index 7eb3510..24ffb61 100644
--- a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll

@@ -8,21 +8,13 @@
 ; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
 ; CHECK: %offset.idx = sub i64 %startval, %index
 ; CHECK: %[[a0:.+]] = add i64 %offset.idx, 0
-; CHECK: %[[v0:.+]] = insertelement <4 x i64> undef, i64 %[[a0]], i64 0
 ; CHECK: %[[a1:.+]] = add i64 %offset.idx, -1
-; CHECK: %[[v1:.+]] = insertelement <4 x i64> %[[v0]], i64 %[[a1]], i64 1
 ; CHECK: %[[a2:.+]] = add i64 %offset.idx, -2
-; CHECK: %[[v2:.+]] = insertelement <4 x i64> %[[v1]], i64 %[[a2]], i64 2
 ; CHECK: %[[a3:.+]] = add i64 %offset.idx, -3
-; CHECK: %[[v3:.+]] = insertelement <4 x i64> %[[v2]], i64 %[[a3]], i64 3
 ; CHECK: %[[a4:.+]] = add i64 %offset.idx, -4
-; CHECK: %[[v4:.+]] = insertelement <4 x i64> undef, i64 %[[a4]], i64 0
 ; CHECK: %[[a5:.+]] = add i64 %offset.idx, -5
-; CHECK: %[[v5:.+]] = insertelement <4 x i64> %[[v4]], i64 %[[a5]], i64 1
 ; CHECK: %[[a6:.+]] = add i64 %offset.idx, -6
-; CHECK: %[[v6:.+]] = insertelement <4 x i64> %[[v5]], i64 %[[a6]], i64 2
 ; CHECK: %[[a7:.+]] = add i64 %offset.idx, -7
-; CHECK: %[[v7:.+]] = insertelement <4 x i64> %[[v6]], i64 %[[a7]], i64 3
 
 define i32 @reverse_induction_i64(i64 %startval, i32 * %ptr) {
 entry:
@@ -48,21 +40,13 @@
 ; CHECK: %index = phi i128 [ 0, %vector.ph ], [ %index.next, %vector.body ]
 ; CHECK: %offset.idx = sub i128 %startval, %index
 ; CHECK: %[[a0:.+]] = add i128 %offset.idx, 0
-; CHECK: %[[v0:.+]] = insertelement <4 x i128> undef, i128 %[[a0]], i64 0
 ; CHECK: %[[a1:.+]] = add i128 %offset.idx, -1
-; CHECK: %[[v1:.+]] = insertelement <4 x i128> %[[v0]], i128 %[[a1]], i64 1
 ; CHECK: %[[a2:.+]] = add i128 %offset.idx, -2
-; CHECK: %[[v2:.+]] = insertelement <4 x i128> %[[v1]], i128 %[[a2]], i64 2
 ; CHECK: %[[a3:.+]] = add i128 %offset.idx, -3
-; CHECK: %[[v3:.+]] = insertelement <4 x i128> %[[v2]], i128 %[[a3]], i64 3
 ; CHECK: %[[a4:.+]] = add i128 %offset.idx, -4
-; CHECK: %[[v4:.+]] = insertelement <4 x i128> undef, i128 %[[a4]], i64 0
 ; CHECK: %[[a5:.+]] = add i128 %offset.idx, -5
-; CHECK: %[[v5:.+]] = insertelement <4 x i128> %[[v4]], i128 %[[a5]], i64 1
 ; CHECK: %[[a6:.+]] = add i128 %offset.idx, -6
-; CHECK: %[[v6:.+]] = insertelement <4 x i128> %[[v5]], i128 %[[a6]], i64 2
 ; CHECK: %[[a7:.+]] = add i128 %offset.idx, -7
-; CHECK: %[[v7:.+]] = insertelement <4 x i128> %[[v6]], i128 %[[a7]], i64 3
 
 define i32 @reverse_induction_i128(i128 %startval, i32 * %ptr) {
 entry:
@@ -88,21 +72,13 @@
 ; CHECK: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
 ; CHECK: %offset.idx = sub i16 %startval, {{.*}}
 ; CHECK: %[[a0:.+]] = add i16 %offset.idx, 0
-; CHECK: %[[v0:.+]] = insertelement <4 x i16> undef, i16 %[[a0]], i64 0
 ; CHECK: %[[a1:.+]] = add i16 %offset.idx, -1
-; CHECK: %[[v1:.+]] = insertelement <4 x i16> %[[v0]], i16 %[[a1]], i64 1
 ; CHECK: %[[a2:.+]] = add i16 %offset.idx, -2
-; CHECK: %[[v2:.+]] = insertelement <4 x i16> %[[v1]], i16 %[[a2]], i64 2
 ; CHECK: %[[a3:.+]] = add i16 %offset.idx, -3
-; CHECK: %[[v3:.+]] = insertelement <4 x i16> %[[v2]], i16 %[[a3]], i64 3
 ; CHECK: %[[a4:.+]] = add i16 %offset.idx, -4
-; CHECK: %[[v4:.+]] = insertelement <4 x i16> undef, i16 %[[a4]], i64 0
 ; CHECK: %[[a5:.+]] = add i16 %offset.idx, -5
-; CHECK: %[[v5:.+]] = insertelement <4 x i16> %[[v4]], i16 %[[a5]], i64 1
 ; CHECK: %[[a6:.+]] = add i16 %offset.idx, -6
-; CHECK: %[[v6:.+]] = insertelement <4 x i16> %[[v5]], i16 %[[a6]], i64 2
 ; CHECK: %[[a7:.+]] = add i16 %offset.idx, -7
-; CHECK: %[[v7:.+]] = insertelement <4 x i16> %[[v6]], i16 %[[a7]], i64 3
 
 define i32 @reverse_induction_i16(i16 %startval, i32 * %ptr) {
 entry:
commit	3c3b4a257b2fd34e572f9df28404619734251b76	[log] [tgz]
author	Matthew Simpson <mssimpso@codeaurora.org>	Thu Jul 14 14:36:06 2016 +0000
committer	Matthew Simpson <mssimpso@codeaurora.org>	Thu Jul 14 14:36:06 2016 +0000
tree	162ec31f71590eb99e334167a7ab2d0d4e6e4d66
parent	4eaedde53031bf4c4342b983b08019c206698cee [diff]