| Sanjay Patel | b653de1 | 2014-09-10 17:58:16 +0000 | [diff] [blame] | 1 | ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S | FileCheck %s | 
| Michael Kuperstein | 3a3c64d | 2016-06-01 17:16:46 +0000 | [diff] [blame] | 2 | ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND | 
|  | 3 | ; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=UNROLL | 
| Matthew Simpson | 3c3b4a2 | 2016-07-14 14:36:06 +0000 | [diff] [blame] | 4 | ; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=2 -S | FileCheck %s --check-prefix=UNROLL-NO-IC | 
| Matthew Simpson | 433cb1d | 2016-07-06 14:26:59 +0000 | [diff] [blame] | 5 | ; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -enable-interleaved-mem-accesses -instcombine -S | FileCheck %s --check-prefix=INTERLEAVE | 
| Arnold Schwaighofer | 2e7a922 | 2013-05-14 00:21:18 +0000 | [diff] [blame] | 6 |  | 
|  | 7 | target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" | 
|  | 8 |  | 
|  | 9 | ; Make sure that we can handle multiple integer induction variables. | 
| Matthew Simpson | df124a7 | 2017-02-10 16:15:26 +0000 | [diff] [blame] | 10 | ; | 
| Matt Arsenault | e64c7c7 | 2013-10-02 20:29:00 +0000 | [diff] [blame] | 11 | ; CHECK-LABEL: @multi_int_induction( | 
| Matthew Simpson | df124a7 | 2017-02-10 16:15:26 +0000 | [diff] [blame] | 12 | ; CHECK:       vector.body: | 
|  | 13 | ; CHECK-NEXT:    %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] | 
|  | 14 | ; CHECK-NEXT:    %vec.ind = phi <2 x i32> [ <i32 190, i32 191>, %vector.ph ], [ %vec.ind.next, %vector.body ] | 
|  | 15 | ; CHECK:         [[TMP3:%.*]] = add i64 %index, 0 | 
|  | 16 | ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* %A, i64 [[TMP3]] | 
| Daniel Neilson | 9e4bbe8 | 2018-05-01 15:35:08 +0000 | [diff] [blame] | 17 | ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0 | 
| Matthew Simpson | df124a7 | 2017-02-10 16:15:26 +0000 | [diff] [blame] | 18 | ; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <2 x i32>* | 
|  | 19 | ; CHECK-NEXT:    store <2 x i32> %vec.ind, <2 x i32>* [[TMP6]], align 4 | 
|  | 20 | ; CHECK:         %index.next = add i64 %index, 2 | 
|  | 21 | ; CHECK-NEXT:    %vec.ind.next = add <2 x i32> %vec.ind, <i32 2, i32 2> | 
|  | 22 | ; CHECK:         br i1 {{.*}}, label %middle.block, label %vector.body | 
| Arnold Schwaighofer | 2e7a922 | 2013-05-14 00:21:18 +0000 | [diff] [blame] | 23 | define void @multi_int_induction(i32* %A, i32 %N) { | 
|  | 24 | for.body.lr.ph: | 
|  | 25 | br label %for.body | 
|  | 26 |  | 
|  | 27 | for.body: | 
|  | 28 | %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] | 
|  | 29 | %count.09 = phi i32 [ 190, %for.body.lr.ph ], [ %inc, %for.body ] | 
| David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 30 | %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv | 
| Arnold Schwaighofer | 2e7a922 | 2013-05-14 00:21:18 +0000 | [diff] [blame] | 31 | store i32 %count.09, i32* %arrayidx2, align 4 | 
|  | 32 | %inc = add nsw i32 %count.09, 1 | 
|  | 33 | %indvars.iv.next = add i64 %indvars.iv, 1 | 
|  | 34 | %lftr.wideiv = trunc i64 %indvars.iv.next to i32 | 
|  | 35 | %exitcond = icmp ne i32 %lftr.wideiv, %N | 
|  | 36 | br i1 %exitcond, label %for.body, label %for.end | 
|  | 37 |  | 
|  | 38 | for.end: | 
|  | 39 | ret void | 
|  | 40 | } | 
|  | 41 |  | 
| Arnold Schwaighofer | a846a7f | 2013-11-01 22:18:19 +0000 | [diff] [blame] | 42 | ; Make sure we remove unneeded vectorization of induction variables. | 
|  | 43 | ; In order for instcombine to cleanup the vectorized induction variables that we | 
|  | 44 | ; create in the loop vectorizer we need to perform some form of redundancy | 
|  | 45 | ; elimination to get rid of multiple uses. | 
|  | 46 |  | 
|  | 47 | ; IND-LABEL: scalar_use | 
|  | 48 |  | 
|  | 49 | ; IND:     br label %vector.body | 
|  | 50 | ; IND:     vector.body: | 
|  | 51 | ;   Vectorized induction variable. | 
|  | 52 | ; IND-NOT:  insertelement <2 x i64> | 
|  | 53 | ; IND-NOT:  shufflevector <2 x i64> | 
|  | 54 | ; IND:     br {{.*}}, label %vector.body | 
|  | 55 |  | 
|  | 56 | define void @scalar_use(float* %a, float %b, i64 %offset, i64 %offset2, i64 %n) { | 
|  | 57 | entry: | 
|  | 58 | br label %for.body | 
|  | 59 |  | 
|  | 60 | for.body: | 
|  | 61 | %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] | 
|  | 62 | %ind.sum = add i64 %iv, %offset | 
| David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 63 | %arr.idx = getelementptr inbounds float, float* %a, i64 %ind.sum | 
| David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 64 | %l1 = load float, float* %arr.idx, align 4 | 
| Arnold Schwaighofer | a846a7f | 2013-11-01 22:18:19 +0000 | [diff] [blame] | 65 | %ind.sum2 = add i64 %iv, %offset2 | 
| David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 66 | %arr.idx2 = getelementptr inbounds float, float* %a, i64 %ind.sum2 | 
| David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 67 | %l2 = load float, float* %arr.idx2, align 4 | 
| Arnold Schwaighofer | a846a7f | 2013-11-01 22:18:19 +0000 | [diff] [blame] | 68 | %m = fmul fast float %b, %l2 | 
|  | 69 | %ad = fadd fast float %l1, %m | 
|  | 70 | store float %ad, float* %arr.idx, align 4 | 
|  | 71 | %iv.next = add nuw nsw i64 %iv, 1 | 
|  | 72 | %exitcond = icmp eq i64 %iv.next, %n | 
|  | 73 | br i1 %exitcond, label %loopexit, label %for.body | 
|  | 74 |  | 
|  | 75 | loopexit: | 
|  | 76 | ret void | 
|  | 77 | } | 
| Arnold Schwaighofer | b72cb4e | 2013-11-18 13:14:32 +0000 | [diff] [blame] | 78 |  | 
| Matthew Simpson | 433cb1d | 2016-07-06 14:26:59 +0000 | [diff] [blame] | 79 | ; Make sure we don't create a vector induction phi node that is unused. | 
|  | 80 | ; Scalarize the step vectors instead. | 
|  | 81 | ; | 
|  | 82 | ; for (int i = 0; i < n; ++i) | 
|  | 83 | ;   sum += a[i]; | 
|  | 84 | ; | 
| Matthew Simpson | 3c3b4a2 | 2016-07-14 14:36:06 +0000 | [diff] [blame] | 85 | ; CHECK-LABEL: @scalarize_induction_variable_01( | 
|  | 86 | ; CHECK: vector.body: | 
|  | 87 | ; CHECK:   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] | 
|  | 88 | ; CHECK:   %[[i0:.+]] = add i64 %index, 0 | 
| Matthew Simpson | 3c3b4a2 | 2016-07-14 14:36:06 +0000 | [diff] [blame] | 89 | ; CHECK:   getelementptr inbounds i64, i64* %a, i64 %[[i0]] | 
| Matthew Simpson | 3c3b4a2 | 2016-07-14 14:36:06 +0000 | [diff] [blame] | 90 | ; | 
|  | 91 | ; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_01( | 
|  | 92 | ; UNROLL-NO-IC: vector.body: | 
|  | 93 | ; UNROLL-NO-IC:   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] | 
|  | 94 | ; UNROLL-NO-IC:   %[[i0:.+]] = add i64 %index, 0 | 
| Matthew Simpson | 3c3b4a2 | 2016-07-14 14:36:06 +0000 | [diff] [blame] | 95 | ; UNROLL-NO-IC:   %[[i2:.+]] = add i64 %index, 2 | 
| Matthew Simpson | 3c3b4a2 | 2016-07-14 14:36:06 +0000 | [diff] [blame] | 96 | ; UNROLL-NO-IC:   getelementptr inbounds i64, i64* %a, i64 %[[i0]] | 
| Matthew Simpson | 3c3b4a2 | 2016-07-14 14:36:06 +0000 | [diff] [blame] | 97 | ; UNROLL-NO-IC:   getelementptr inbounds i64, i64* %a, i64 %[[i2]] | 
| Matthew Simpson | 3c3b4a2 | 2016-07-14 14:36:06 +0000 | [diff] [blame] | 98 | ; | 
| Matthew Simpson | 433cb1d | 2016-07-06 14:26:59 +0000 | [diff] [blame] | 99 | ; IND-LABEL: @scalarize_induction_variable_01( | 
|  | 100 | ; IND:     vector.body: | 
|  | 101 | ; IND:       %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] | 
|  | 102 | ; IND-NOT:   add i64 {{.*}}, 2 | 
|  | 103 | ; IND:       getelementptr inbounds i64, i64* %a, i64 %index | 
|  | 104 | ; | 
|  | 105 | ; UNROLL-LABEL: @scalarize_induction_variable_01( | 
|  | 106 | ; UNROLL:     vector.body: | 
|  | 107 | ; UNROLL:       %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] | 
|  | 108 | ; UNROLL-NOT:   add i64 {{.*}}, 4 | 
|  | 109 | ; UNROLL:       %[[g1:.+]] = getelementptr inbounds i64, i64* %a, i64 %index | 
| Daniel Neilson | 9e4bbe8 | 2018-05-01 15:35:08 +0000 | [diff] [blame] | 110 | ; UNROLL:       getelementptr inbounds i64, i64* %[[g1]], i64 2 | 
| Matthew Simpson | 433cb1d | 2016-07-06 14:26:59 +0000 | [diff] [blame] | 111 |  | 
|  | 112 | define i64 @scalarize_induction_variable_01(i64 *%a, i64 %n) { | 
|  | 113 | entry: | 
|  | 114 | br label %for.body | 
|  | 115 |  | 
|  | 116 | for.body: | 
|  | 117 | %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] | 
|  | 118 | %sum = phi i64 [ %2, %for.body ], [ 0, %entry ] | 
|  | 119 | %0 = getelementptr inbounds i64, i64* %a, i64 %i | 
|  | 120 | %1 = load i64, i64* %0, align 8 | 
|  | 121 | %2 = add i64 %1, %sum | 
|  | 122 | %i.next = add nuw nsw i64 %i, 1 | 
|  | 123 | %cond = icmp slt i64 %i.next, %n | 
|  | 124 | br i1 %cond, label %for.body, label %for.end | 
|  | 125 |  | 
|  | 126 | for.end: | 
|  | 127 | %3  = phi i64 [ %2, %for.body ] | 
|  | 128 | ret i64 %3 | 
|  | 129 | } | 
|  | 130 |  | 
|  | 131 | ; Make sure we scalarize the step vectors used for the pointer arithmetic. We | 
|  | 132 | ; can't easily simplify vectorized step vectors. | 
|  | 133 | ; | 
|  | 134 | ; float s = 0; | 
|  | 135 | ; for (int i ; 0; i < n; i += 8) | 
|  | 136 | ;   s += (a[i] + b[i] + 1.0f); | 
|  | 137 | ; | 
| Matthew Simpson | 3c3b4a2 | 2016-07-14 14:36:06 +0000 | [diff] [blame] | 138 | ; CHECK-LABEL: @scalarize_induction_variable_02( | 
|  | 139 | ; CHECK: vector.body: | 
|  | 140 | ; CHECK:   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] | 
| Max Kazantsev | b073696 | 2018-10-08 05:46:29 +0000 | [diff] [blame] | 141 | ; CHECK:   %offset.idx = mul i64 %index, 8 | 
| Matthew Simpson | 3c3b4a2 | 2016-07-14 14:36:06 +0000 | [diff] [blame] | 142 | ; CHECK:   %[[i0:.+]] = add i64 %offset.idx, 0 | 
|  | 143 | ; CHECK:   %[[i1:.+]] = add i64 %offset.idx, 8 | 
|  | 144 | ; CHECK:   getelementptr inbounds float, float* %a, i64 %[[i0]] | 
|  | 145 | ; CHECK:   getelementptr inbounds float, float* %a, i64 %[[i1]] | 
|  | 146 | ; CHECK:   getelementptr inbounds float, float* %b, i64 %[[i0]] | 
|  | 147 | ; CHECK:   getelementptr inbounds float, float* %b, i64 %[[i1]] | 
|  | 148 | ; | 
|  | 149 | ; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_02( | 
|  | 150 | ; UNROLL-NO-IC: vector.body: | 
|  | 151 | ; UNROLL-NO-IC:   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] | 
| Max Kazantsev | b073696 | 2018-10-08 05:46:29 +0000 | [diff] [blame] | 152 | ; UNROLL-NO-IC:   %offset.idx = mul i64 %index, 8 | 
| Matthew Simpson | 3c3b4a2 | 2016-07-14 14:36:06 +0000 | [diff] [blame] | 153 | ; UNROLL-NO-IC:   %[[i0:.+]] = add i64 %offset.idx, 0 | 
|  | 154 | ; UNROLL-NO-IC:   %[[i1:.+]] = add i64 %offset.idx, 8 | 
|  | 155 | ; UNROLL-NO-IC:   %[[i2:.+]] = add i64 %offset.idx, 16 | 
|  | 156 | ; UNROLL-NO-IC:   %[[i3:.+]] = add i64 %offset.idx, 24 | 
|  | 157 | ; UNROLL-NO-IC:   getelementptr inbounds float, float* %a, i64 %[[i0]] | 
|  | 158 | ; UNROLL-NO-IC:   getelementptr inbounds float, float* %a, i64 %[[i1]] | 
|  | 159 | ; UNROLL-NO-IC:   getelementptr inbounds float, float* %a, i64 %[[i2]] | 
|  | 160 | ; UNROLL-NO-IC:   getelementptr inbounds float, float* %a, i64 %[[i3]] | 
|  | 161 | ; UNROLL-NO-IC:   getelementptr inbounds float, float* %b, i64 %[[i0]] | 
|  | 162 | ; UNROLL-NO-IC:   getelementptr inbounds float, float* %b, i64 %[[i1]] | 
|  | 163 | ; UNROLL-NO-IC:   getelementptr inbounds float, float* %b, i64 %[[i2]] | 
|  | 164 | ; UNROLL-NO-IC:   getelementptr inbounds float, float* %b, i64 %[[i3]] | 
|  | 165 | ; | 
| Matthew Simpson | 433cb1d | 2016-07-06 14:26:59 +0000 | [diff] [blame] | 166 | ; IND-LABEL: @scalarize_induction_variable_02( | 
|  | 167 | ; IND: vector.body: | 
|  | 168 | ; IND:   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] | 
|  | 169 | ; IND:   %[[i0:.+]] = shl i64 %index, 3 | 
|  | 170 | ; IND:   %[[i1:.+]] = or i64 %[[i0]], 8 | 
|  | 171 | ; IND:   getelementptr inbounds float, float* %a, i64 %[[i0]] | 
|  | 172 | ; IND:   getelementptr inbounds float, float* %a, i64 %[[i1]] | 
|  | 173 | ; | 
|  | 174 | ; UNROLL-LABEL: @scalarize_induction_variable_02( | 
|  | 175 | ; UNROLL: vector.body: | 
|  | 176 | ; UNROLL:   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] | 
|  | 177 | ; UNROLL:   %[[i0:.+]] = shl i64 %index, 3 | 
|  | 178 | ; UNROLL:   %[[i1:.+]] = or i64 %[[i0]], 8 | 
|  | 179 | ; UNROLL:   %[[i2:.+]] = or i64 %[[i0]], 16 | 
|  | 180 | ; UNROLL:   %[[i3:.+]] = or i64 %[[i0]], 24 | 
|  | 181 | ; UNROLL:   getelementptr inbounds float, float* %a, i64 %[[i0]] | 
|  | 182 | ; UNROLL:   getelementptr inbounds float, float* %a, i64 %[[i1]] | 
|  | 183 | ; UNROLL:   getelementptr inbounds float, float* %a, i64 %[[i2]] | 
|  | 184 | ; UNROLL:   getelementptr inbounds float, float* %a, i64 %[[i3]] | 
|  | 185 |  | 
|  | 186 | define float @scalarize_induction_variable_02(float* %a, float* %b, i64 %n) { | 
|  | 187 | entry: | 
|  | 188 | br label %for.body | 
|  | 189 |  | 
|  | 190 | for.body: | 
|  | 191 | %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] | 
|  | 192 | %s = phi float [ 0.0, %entry ], [ %6, %for.body ] | 
|  | 193 | %0 = getelementptr inbounds float, float* %a, i64 %i | 
|  | 194 | %1 = load float, float* %0, align 4 | 
|  | 195 | %2 = getelementptr inbounds float, float* %b, i64 %i | 
|  | 196 | %3 = load float, float* %2, align 4 | 
|  | 197 | %4 = fadd fast float %s, 1.0 | 
|  | 198 | %5 = fadd fast float %4, %1 | 
|  | 199 | %6 = fadd fast float %5, %3 | 
|  | 200 | %i.next = add nuw nsw i64 %i, 8 | 
|  | 201 | %cond = icmp slt i64 %i.next, %n | 
|  | 202 | br i1 %cond, label %for.body, label %for.end | 
|  | 203 |  | 
|  | 204 | for.end: | 
|  | 205 | %s.lcssa = phi float [ %6, %for.body ] | 
|  | 206 | ret float %s.lcssa | 
|  | 207 | } | 
|  | 208 |  | 
|  | 209 | ; Make sure we scalarize the step vectors used for the pointer arithmetic. We | 
|  | 210 | ; can't easily simplify vectorized step vectors. (Interleaved accesses.) | 
|  | 211 | ; | 
|  | 212 | ; for (int i = 0; i < n; ++i) | 
|  | 213 | ;   a[i].f ^= y; | 
|  | 214 | ; | 
|  | 215 | ; INTERLEAVE-LABEL: @scalarize_induction_variable_03( | 
|  | 216 | ; INTERLEAVE: vector.body: | 
|  | 217 | ; INTERLEAVE:   %[[i0:.+]] = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] | 
|  | 218 | ; INTERLEAVE:   %[[i1:.+]] = or i64 %[[i0]], 1 | 
|  | 219 | ; INTERLEAVE:   %[[i2:.+]] = or i64 %[[i0]], 2 | 
|  | 220 | ; INTERLEAVE:   %[[i3:.+]] = or i64 %[[i0]], 3 | 
|  | 221 | ; INTERLEAVE:   %[[i4:.+]] = or i64 %[[i0]], 4 | 
|  | 222 | ; INTERLEAVE:   %[[i5:.+]] = or i64 %[[i0]], 5 | 
|  | 223 | ; INTERLEAVE:   %[[i6:.+]] = or i64 %[[i0]], 6 | 
|  | 224 | ; INTERLEAVE:   %[[i7:.+]] = or i64 %[[i0]], 7 | 
| Matthew Simpson | 18d8898 | 2016-08-02 15:25:16 +0000 | [diff] [blame] | 225 | ; INTERLEAVE:   getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i0]], i32 1 | 
|  | 226 | ; INTERLEAVE:   getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i1]], i32 1 | 
|  | 227 | ; INTERLEAVE:   getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i2]], i32 1 | 
|  | 228 | ; INTERLEAVE:   getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i3]], i32 1 | 
|  | 229 | ; INTERLEAVE:   getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i4]], i32 1 | 
|  | 230 | ; INTERLEAVE:   getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i5]], i32 1 | 
|  | 231 | ; INTERLEAVE:   getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i6]], i32 1 | 
|  | 232 | ; INTERLEAVE:   getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i7]], i32 1 | 
| Matthew Simpson | 433cb1d | 2016-07-06 14:26:59 +0000 | [diff] [blame] | 233 |  | 
| Matthew Simpson | 18d8898 | 2016-08-02 15:25:16 +0000 | [diff] [blame] | 234 | %pair.i32 = type { i32, i32 } | 
|  | 235 | define void @scalarize_induction_variable_03(%pair.i32 *%p, i32 %y, i64 %n) { | 
| Matthew Simpson | 433cb1d | 2016-07-06 14:26:59 +0000 | [diff] [blame] | 236 | entry: | 
|  | 237 | br label %for.body | 
|  | 238 |  | 
|  | 239 | for.body: | 
|  | 240 | %i  = phi i64 [ %i.next, %for.body ], [ 0, %entry ] | 
| Matthew Simpson | 18d8898 | 2016-08-02 15:25:16 +0000 | [diff] [blame] | 241 | %f = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1 | 
| Matthew Simpson | 433cb1d | 2016-07-06 14:26:59 +0000 | [diff] [blame] | 242 | %0 = load i32, i32* %f, align 8 | 
|  | 243 | %1 = xor i32 %0, %y | 
|  | 244 | store i32 %1, i32* %f, align 8 | 
|  | 245 | %i.next = add nuw nsw i64 %i, 1 | 
|  | 246 | %cond = icmp slt i64 %i.next, %n | 
|  | 247 | br i1 %cond, label %for.body, label %for.end | 
|  | 248 |  | 
|  | 249 | for.end: | 
|  | 250 | ret void | 
|  | 251 | } | 
| Arnold Schwaighofer | b72cb4e | 2013-11-18 13:14:32 +0000 | [diff] [blame] | 252 |  | 
| Matthew Simpson | 58f5628 | 2016-08-02 14:29:41 +0000 | [diff] [blame] | 253 | ; Make sure we scalarize the step vectors used for the pointer arithmetic. We | 
|  | 254 | ; can't easily simplify vectorized step vectors. (Interleaved accesses.) | 
|  | 255 | ; | 
|  | 256 | ; for (int i = 0; i < n; ++i) | 
|  | 257 | ;   p[i].f = a[i * 4] | 
|  | 258 | ; | 
|  | 259 | ; INTERLEAVE-LABEL: @scalarize_induction_variable_04( | 
|  | 260 | ; INTERLEAVE: vector.body: | 
|  | 261 | ; INTERLEAVE:   %[[i0:.+]] = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] | 
|  | 262 | ; INTERLEAVE:   %[[i1:.+]] = or i64 %[[i0]], 1 | 
|  | 263 | ; INTERLEAVE:   %[[i2:.+]] = or i64 %[[i0]], 2 | 
|  | 264 | ; INTERLEAVE:   %[[i3:.+]] = or i64 %[[i0]], 3 | 
|  | 265 | ; INTERLEAVE:   %[[i4:.+]] = or i64 %[[i0]], 4 | 
|  | 266 | ; INTERLEAVE:   %[[i5:.+]] = or i64 %[[i0]], 5 | 
|  | 267 | ; INTERLEAVE:   %[[i6:.+]] = or i64 %[[i0]], 6 | 
|  | 268 | ; INTERLEAVE:   %[[i7:.+]] = or i64 %[[i0]], 7 | 
| Matthew Simpson | 18d8898 | 2016-08-02 15:25:16 +0000 | [diff] [blame] | 269 | ; INTERLEAVE:   getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i0]], i32 1 | 
|  | 270 | ; INTERLEAVE:   getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i1]], i32 1 | 
|  | 271 | ; INTERLEAVE:   getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i2]], i32 1 | 
|  | 272 | ; INTERLEAVE:   getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i3]], i32 1 | 
|  | 273 | ; INTERLEAVE:   getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i4]], i32 1 | 
|  | 274 | ; INTERLEAVE:   getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i5]], i32 1 | 
|  | 275 | ; INTERLEAVE:   getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i6]], i32 1 | 
|  | 276 | ; INTERLEAVE:   getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i7]], i32 1 | 
| Matthew Simpson | 58f5628 | 2016-08-02 14:29:41 +0000 | [diff] [blame] | 277 |  | 
| Matthew Simpson | 18d8898 | 2016-08-02 15:25:16 +0000 | [diff] [blame] | 278 | define void @scalarize_induction_variable_04(i32* %a, %pair.i32* %p, i32 %n) { | 
| Matthew Simpson | 58f5628 | 2016-08-02 14:29:41 +0000 | [diff] [blame] | 279 | entry: | 
|  | 280 | br label %for.body | 
|  | 281 |  | 
|  | 282 | for.body: | 
|  | 283 | %i = phi i64 [ %i.next, %for.body ], [ 0, %entry] | 
|  | 284 | %0 = shl nsw i64 %i, 2 | 
|  | 285 | %1 = getelementptr inbounds i32, i32* %a, i64 %0 | 
|  | 286 | %2 = load i32, i32* %1, align 1 | 
| Matthew Simpson | 18d8898 | 2016-08-02 15:25:16 +0000 | [diff] [blame] | 287 | %3 = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1 | 
| Matthew Simpson | 58f5628 | 2016-08-02 14:29:41 +0000 | [diff] [blame] | 288 | store i32 %2, i32* %3, align 1 | 
|  | 289 | %i.next = add nuw nsw i64 %i, 1 | 
|  | 290 | %4 = trunc i64 %i.next to i32 | 
|  | 291 | %cond = icmp eq i32 %4, %n | 
|  | 292 | br i1 %cond, label %for.end, label %for.body | 
|  | 293 |  | 
|  | 294 | for.end: | 
|  | 295 | ret void | 
|  | 296 | } | 
|  | 297 |  | 
| Matthew Simpson | 7808833 | 2016-09-30 15:13:52 +0000 | [diff] [blame] | 298 | ; PR30542. Ensure we generate all the scalar steps for the induction variable. | 
|  | 299 | ; The scalar induction variable is used by a getelementptr instruction | 
|  | 300 | ; (uniform), and a udiv (non-uniform). | 
|  | 301 | ; | 
|  | 302 | ; int sum = 0; | 
|  | 303 | ; for (int i = 0; i < n; ++i) { | 
|  | 304 | ;   int x = a[i]; | 
|  | 305 | ;   if (c) | 
|  | 306 | ;     x /= i; | 
|  | 307 | ;   sum += x; | 
|  | 308 | ; } | 
|  | 309 | ; | 
|  | 310 | ; CHECK-LABEL: @scalarize_induction_variable_05( | 
|  | 311 | ; CHECK: vector.body: | 
| Gil Rapaport | 5c875c3 | 2017-04-25 18:14:24 +0000 | [diff] [blame] | 312 | ; CHECK:   %index = phi i32 [ 0, %vector.ph ], [ %index.next, %pred.udiv.continue{{[0-9]+}} ] | 
| Matthew Simpson | 7808833 | 2016-09-30 15:13:52 +0000 | [diff] [blame] | 313 | ; CHECK:   %[[I0:.+]] = add i32 %index, 0 | 
| Matthew Simpson | 7808833 | 2016-09-30 15:13:52 +0000 | [diff] [blame] | 314 | ; CHECK:   getelementptr inbounds i32, i32* %a, i32 %[[I0]] | 
|  | 315 | ; CHECK: pred.udiv.if: | 
|  | 316 | ; CHECK:   udiv i32 {{.*}}, %[[I0]] | 
| Gil Rapaport | 5c875c3 | 2017-04-25 18:14:24 +0000 | [diff] [blame] | 317 | ; CHECK: pred.udiv.if{{[0-9]+}}: | 
| Matthew Simpson | c62266d | 2016-10-25 18:59:45 +0000 | [diff] [blame] | 318 | ; CHECK:   %[[I1:.+]] = add i32 %index, 1 | 
| Matthew Simpson | 7808833 | 2016-09-30 15:13:52 +0000 | [diff] [blame] | 319 | ; CHECK:   udiv i32 {{.*}}, %[[I1]] | 
|  | 320 | ; | 
|  | 321 | ; UNROLL-NO_IC-LABEL: @scalarize_induction_variable_05( | 
|  | 322 | ; UNROLL-NO-IC: vector.body: | 
| Gil Rapaport | 5c875c3 | 2017-04-25 18:14:24 +0000 | [diff] [blame] | 323 | ; UNROLL-NO-IC:   %index = phi i32 [ 0, %vector.ph ], [ %index.next, %pred.udiv.continue{{[0-9]+}} ] | 
| Matthew Simpson | 7808833 | 2016-09-30 15:13:52 +0000 | [diff] [blame] | 324 | ; UNROLL-NO-IC:   %[[I0:.+]] = add i32 %index, 0 | 
| Matthew Simpson | 7808833 | 2016-09-30 15:13:52 +0000 | [diff] [blame] | 325 | ; UNROLL-NO-IC:   %[[I2:.+]] = add i32 %index, 2 | 
| Matthew Simpson | 7808833 | 2016-09-30 15:13:52 +0000 | [diff] [blame] | 326 | ; UNROLL-NO-IC:   getelementptr inbounds i32, i32* %a, i32 %[[I0]] | 
|  | 327 | ; UNROLL-NO-IC:   getelementptr inbounds i32, i32* %a, i32 %[[I2]] | 
|  | 328 | ; UNROLL-NO-IC: pred.udiv.if: | 
|  | 329 | ; UNROLL-NO-IC:   udiv i32 {{.*}}, %[[I0]] | 
| Gil Rapaport | 5c875c3 | 2017-04-25 18:14:24 +0000 | [diff] [blame] | 330 | ; UNROLL-NO-IC: pred.udiv.if{{[0-9]+}}: | 
| Matthew Simpson | c62266d | 2016-10-25 18:59:45 +0000 | [diff] [blame] | 331 | ; UNROLL-NO-IC:   %[[I1:.+]] = add i32 %index, 1 | 
| Matthew Simpson | 7808833 | 2016-09-30 15:13:52 +0000 | [diff] [blame] | 332 | ; UNROLL-NO-IC:   udiv i32 {{.*}}, %[[I1]] | 
| Gil Rapaport | 5c875c3 | 2017-04-25 18:14:24 +0000 | [diff] [blame] | 333 | ; UNROLL-NO-IC: pred.udiv.if{{[0-9]+}}: | 
| Matthew Simpson | 7808833 | 2016-09-30 15:13:52 +0000 | [diff] [blame] | 334 | ; UNROLL-NO-IC:   udiv i32 {{.*}}, %[[I2]] | 
| Gil Rapaport | 5c875c3 | 2017-04-25 18:14:24 +0000 | [diff] [blame] | 335 | ; UNROLL-NO-IC: pred.udiv.if{{[0-9]+}}: | 
| Matthew Simpson | c62266d | 2016-10-25 18:59:45 +0000 | [diff] [blame] | 336 | ; UNROLL-NO-IC:   %[[I3:.+]] = add i32 %index, 3 | 
| Matthew Simpson | 7808833 | 2016-09-30 15:13:52 +0000 | [diff] [blame] | 337 | ; UNROLL-NO-IC:   udiv i32 {{.*}}, %[[I3]] | 
|  | 338 | ; | 
|  | 339 | ; IND-LABEL: @scalarize_induction_variable_05( | 
|  | 340 | ; IND: vector.body: | 
| Gil Rapaport | 5c875c3 | 2017-04-25 18:14:24 +0000 | [diff] [blame] | 341 | ; IND:   %index = phi i32 [ 0, %vector.ph ], [ %index.next, %pred.udiv.continue{{[0-9]+}} ] | 
| Matthew Simpson | 7808833 | 2016-09-30 15:13:52 +0000 | [diff] [blame] | 342 | ; IND:   %[[E0:.+]] = sext i32 %index to i64 | 
|  | 343 | ; IND:   getelementptr inbounds i32, i32* %a, i64 %[[E0]] | 
|  | 344 | ; IND: pred.udiv.if: | 
|  | 345 | ; IND:   udiv i32 {{.*}}, %index | 
| Gil Rapaport | 5c875c3 | 2017-04-25 18:14:24 +0000 | [diff] [blame] | 346 | ; IND: pred.udiv.if{{[0-9]+}}: | 
| Matthew Simpson | c62266d | 2016-10-25 18:59:45 +0000 | [diff] [blame] | 347 | ; IND:   %[[I1:.+]] = or i32 %index, 1 | 
| Matthew Simpson | 7808833 | 2016-09-30 15:13:52 +0000 | [diff] [blame] | 348 | ; IND:   udiv i32 {{.*}}, %[[I1]] | 
|  | 349 | ; | 
|  | 350 | ; UNROLL-LABEL: @scalarize_induction_variable_05( | 
|  | 351 | ; UNROLL: vector.body: | 
| Gil Rapaport | 5c875c3 | 2017-04-25 18:14:24 +0000 | [diff] [blame] | 352 | ; UNROLL:   %index = phi i32 [ 0, %vector.ph ], [ %index.next, %pred.udiv.continue{{[0-9]+}} ] | 
| Matthew Simpson | 7808833 | 2016-09-30 15:13:52 +0000 | [diff] [blame] | 353 | ; UNROLL:   %[[I2:.+]] = or i32 %index, 2 | 
| Matthew Simpson | 7808833 | 2016-09-30 15:13:52 +0000 | [diff] [blame] | 354 | ; UNROLL:   %[[E0:.+]] = sext i32 %index to i64 | 
|  | 355 | ; UNROLL:   %[[G0:.+]] = getelementptr inbounds i32, i32* %a, i64 %[[E0]] | 
| Daniel Neilson | 9e4bbe8 | 2018-05-01 15:35:08 +0000 | [diff] [blame] | 356 | ; UNROLL:   getelementptr inbounds i32, i32* %[[G0]], i64 2 | 
| Matthew Simpson | 7808833 | 2016-09-30 15:13:52 +0000 | [diff] [blame] | 357 | ; UNROLL: pred.udiv.if: | 
|  | 358 | ; UNROLL:   udiv i32 {{.*}}, %index | 
| Gil Rapaport | 5c875c3 | 2017-04-25 18:14:24 +0000 | [diff] [blame] | 359 | ; UNROLL: pred.udiv.if{{[0-9]+}}: | 
| Matthew Simpson | c62266d | 2016-10-25 18:59:45 +0000 | [diff] [blame] | 360 | ; UNROLL:   %[[I1:.+]] = or i32 %index, 1 | 
| Matthew Simpson | 7808833 | 2016-09-30 15:13:52 +0000 | [diff] [blame] | 361 | ; UNROLL:   udiv i32 {{.*}}, %[[I1]] | 
| Gil Rapaport | 5c875c3 | 2017-04-25 18:14:24 +0000 | [diff] [blame] | 362 | ; UNROLL: pred.udiv.if{{[0-9]+}}: | 
| Matthew Simpson | 7808833 | 2016-09-30 15:13:52 +0000 | [diff] [blame] | 363 | ; UNROLL:   udiv i32 {{.*}}, %[[I2]] | 
| Gil Rapaport | 5c875c3 | 2017-04-25 18:14:24 +0000 | [diff] [blame] | 364 | ; UNROLL: pred.udiv.if{{[0-9]+}}: | 
| Matthew Simpson | c62266d | 2016-10-25 18:59:45 +0000 | [diff] [blame] | 365 | ; UNROLL:   %[[I3:.+]] = or i32 %index, 3 | 
| Matthew Simpson | 7808833 | 2016-09-30 15:13:52 +0000 | [diff] [blame] | 366 | ; UNROLL:   udiv i32 {{.*}}, %[[I3]] | 
|  | 367 |  | 
|  | 368 | define i32 @scalarize_induction_variable_05(i32* %a, i32 %x, i1 %c, i32 %n) { | 
|  | 369 | entry: | 
|  | 370 | br label %for.body | 
|  | 371 |  | 
|  | 372 | for.body: | 
|  | 373 | %i = phi i32 [ 0, %entry ], [ %i.next, %if.end ] | 
|  | 374 | %sum = phi i32 [ 0, %entry ], [ %tmp4, %if.end ] | 
|  | 375 | %tmp0 = getelementptr inbounds i32, i32* %a, i32 %i | 
|  | 376 | %tmp1 = load i32, i32* %tmp0, align 4 | 
|  | 377 | br i1 %c, label %if.then, label %if.end | 
|  | 378 |  | 
|  | 379 | if.then: | 
|  | 380 | %tmp2 = udiv i32 %tmp1, %i | 
|  | 381 | br label %if.end | 
|  | 382 |  | 
|  | 383 | if.end: | 
|  | 384 | %tmp3 = phi i32 [ %tmp2, %if.then ], [ %tmp1, %for.body ] | 
|  | 385 | %tmp4 = add i32 %tmp3, %sum | 
|  | 386 | %i.next = add nuw nsw i32 %i, 1 | 
|  | 387 | %cond = icmp slt i32 %i.next, %n | 
|  | 388 | br i1 %cond, label %for.body, label %for.end | 
|  | 389 |  | 
|  | 390 | for.end: | 
|  | 391 | %tmp5  = phi i32 [ %tmp4, %if.end ] | 
|  | 392 | ret i32 %tmp5 | 
|  | 393 | } | 
|  | 394 |  | 
| Matthew Simpson | 18d8898 | 2016-08-02 15:25:16 +0000 | [diff] [blame] | 395 | ; Ensure we generate both a vector and a scalar induction variable. In this | 
|  | 396 | ; test, the induction variable is used by an instruction that will be | 
|  | 397 | ; vectorized (trunc) as well as an instruction that will remain in scalar form | 
|  | 398 | ; (gepelementptr). | 
|  | 399 | ; | 
|  | 400 | ; CHECK-LABEL: @iv_vector_and_scalar_users( | 
|  | 401 | ; CHECK: vector.body: | 
|  | 402 | ; CHECK:   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] | 
|  | 403 | ; CHECK:   %vec.ind = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph ], [ %vec.ind.next, %vector.body ] | 
|  | 404 | ; CHECK:   %vec.ind1 = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %vec.ind.next2, %vector.body ] | 
|  | 405 | ; CHECK:   %[[i0:.+]] = add i64 %index, 0 | 
|  | 406 | ; CHECK:   %[[i1:.+]] = add i64 %index, 1 | 
|  | 407 | ; CHECK:   getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %[[i0]], i32 1 | 
|  | 408 | ; CHECK:   getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %[[i1]], i32 1 | 
|  | 409 | ; CHECK:   %index.next = add i64 %index, 2 | 
|  | 410 | ; CHECK:   %vec.ind.next = add <2 x i64> %vec.ind, <i64 2, i64 2> | 
|  | 411 | ; CHECK:   %vec.ind.next2 = add <2 x i32> %vec.ind1, <i32 2, i32 2> | 
|  | 412 | ; | 
|  | 413 | ; IND-LABEL: @iv_vector_and_scalar_users( | 
|  | 414 | ; IND: vector.body: | 
|  | 415 | ; IND:   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] | 
|  | 416 | ; IND:   %vec.ind1 = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %vec.ind.next2, %vector.body ] | 
|  | 417 | ; IND:   %[[i1:.+]] = or i64 %index, 1 | 
|  | 418 | ; IND:   getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %index, i32 1 | 
|  | 419 | ; IND:   getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %[[i1]], i32 1 | 
|  | 420 | ; IND:   %index.next = add i64 %index, 2 | 
|  | 421 | ; IND:   %vec.ind.next2 = add <2 x i32> %vec.ind1, <i32 2, i32 2> | 
|  | 422 | ; | 
|  | 423 | ; UNROLL-LABEL: @iv_vector_and_scalar_users( | 
|  | 424 | ; UNROLL: vector.body: | 
|  | 425 | ; UNROLL:   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] | 
|  | 426 | ; UNROLL:   %vec.ind2 = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %vec.ind.next5, %vector.body ] | 
|  | 427 | ; UNROLL:   %[[i1:.+]] = or i64 %index, 1 | 
|  | 428 | ; UNROLL:   %[[i2:.+]] = or i64 %index, 2 | 
|  | 429 | ; UNROLL:   %[[i3:.+]] = or i64 %index, 3 | 
|  | 430 | ; UNROLL:   %step.add3 = add <2 x i32> %vec.ind2, <i32 2, i32 2> | 
|  | 431 | ; UNROLL:   getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %index, i32 1 | 
|  | 432 | ; UNROLL:   getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %[[i1]], i32 1 | 
|  | 433 | ; UNROLL:   getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %[[i2]], i32 1 | 
|  | 434 | ; UNROLL:   getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %[[i3]], i32 1 | 
|  | 435 | ; UNROLL:   %index.next = add i64 %index, 4 | 
|  | 436 | ; UNROLL:   %vec.ind.next5 = add <2 x i32> %vec.ind2, <i32 4, i32 4> | 
|  | 437 |  | 
|  | 438 | %pair.i16 = type { i16, i16 } | 
|  | 439 | define void @iv_vector_and_scalar_users(%pair.i16* %p, i32 %a, i32 %n) { | 
|  | 440 | entry: | 
|  | 441 | br label %for.body | 
|  | 442 |  | 
|  | 443 | for.body: | 
|  | 444 | %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] | 
|  | 445 | %0 = trunc i64 %i to i32 | 
|  | 446 | %1 = add i32 %a, %0 | 
|  | 447 | %2 = trunc i32 %1 to i16 | 
|  | 448 | %3 = getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %i, i32 1 | 
|  | 449 | store i16 %2, i16* %3, align 2 | 
|  | 450 | %i.next = add nuw nsw i64 %i, 1 | 
|  | 451 | %4 = trunc i64 %i.next to i32 | 
|  | 452 | %cond = icmp eq i32 %4, %n | 
|  | 453 | br i1 %cond, label %for.end, label %for.body | 
|  | 454 |  | 
|  | 455 | for.end: | 
|  | 456 | ret void | 
|  | 457 | } | 
|  | 458 |  | 
| Arnold Schwaighofer | b72cb4e | 2013-11-18 13:14:32 +0000 | [diff] [blame] | 459 | ; Make sure that the loop exit count computation does not overflow for i8 and | 
|  | 460 | ; i16. The exit count of these loops is i8/i16 max + 1. If we don't cast the | 
|  | 461 | ; induction variable to a bigger type the exit count computation will overflow | 
|  | 462 | ; to 0. | 
|  | 463 | ; PR17532 | 
|  | 464 |  | 
|  | 465 | ; CHECK-LABEL: i8_loop | 
| Benjamin Kramer | c10563d | 2014-01-11 21:06:00 +0000 | [diff] [blame] | 466 | ; CHECK: icmp eq i32 {{.*}}, 256 | 
| Arnold Schwaighofer | b72cb4e | 2013-11-18 13:14:32 +0000 | [diff] [blame] | 467 | define i32 @i8_loop() nounwind readnone ssp uwtable { | 
|  | 468 | br label %1 | 
|  | 469 |  | 
|  | 470 | ; <label>:1                                       ; preds = %1, %0 | 
|  | 471 | %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ] | 
|  | 472 | %b.0 = phi i8 [ 0, %0 ], [ %3, %1 ] | 
|  | 473 | %2 = and i32 %a.0, 4 | 
|  | 474 | %3 = add i8 %b.0, -1 | 
|  | 475 | %4 = icmp eq i8 %3, 0 | 
|  | 476 | br i1 %4, label %5, label %1 | 
|  | 477 |  | 
|  | 478 | ; <label>:5                                       ; preds = %1 | 
|  | 479 | ret i32 %2 | 
|  | 480 | } | 
|  | 481 |  | 
|  | 482 | ; CHECK-LABEL: i16_loop | 
| Benjamin Kramer | c10563d | 2014-01-11 21:06:00 +0000 | [diff] [blame] | 483 | ; CHECK: icmp eq i32 {{.*}}, 65536 | 
| Arnold Schwaighofer | b72cb4e | 2013-11-18 13:14:32 +0000 | [diff] [blame] | 484 |  | 
|  | 485 | define i32 @i16_loop() nounwind readnone ssp uwtable { | 
|  | 486 | br label %1 | 
|  | 487 |  | 
|  | 488 | ; <label>:1                                       ; preds = %1, %0 | 
|  | 489 | %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ] | 
|  | 490 | %b.0 = phi i16 [ 0, %0 ], [ %3, %1 ] | 
|  | 491 | %2 = and i32 %a.0, 4 | 
|  | 492 | %3 = add i16 %b.0, -1 | 
|  | 493 | %4 = icmp eq i16 %3, 0 | 
|  | 494 | br i1 %4, label %5, label %1 | 
|  | 495 |  | 
|  | 496 | ; <label>:5                                       ; preds = %1 | 
|  | 497 | ret i32 %2 | 
|  | 498 | } | 
| Arnold Schwaighofer | e206768 | 2014-05-29 22:10:01 +0000 | [diff] [blame] | 499 |  | 
|  | 500 | ; This loop has a backedge taken count of i32_max. We need to check for this | 
|  | 501 | ; condition and branch directly to the scalar loop. | 
|  | 502 |  | 
|  | 503 | ; CHECK-LABEL: max_i32_backedgetaken | 
| Ayal Zaks | 8c452d7 | 2017-07-19 05:16:39 +0000 | [diff] [blame] | 504 | ; CHECK:  br i1 true, label %scalar.ph, label %vector.ph | 
| Arnold Schwaighofer | e206768 | 2014-05-29 22:10:01 +0000 | [diff] [blame] | 505 |  | 
| Matthew Simpson | 433cb1d | 2016-07-06 14:26:59 +0000 | [diff] [blame] | 506 | ; CHECK: middle.block: | 
|  | 507 | ; CHECK:  %[[v9:.+]] = extractelement <2 x i32> %bin.rdx, i32 0 | 
| Arnold Schwaighofer | e206768 | 2014-05-29 22:10:01 +0000 | [diff] [blame] | 508 | ; CHECK: scalar.ph: | 
| Matthew Simpson | 433cb1d | 2016-07-06 14:26:59 +0000 | [diff] [blame] | 509 | ; CHECK:  %bc.resume.val = phi i32 [ 0, %middle.block ], [ 0, %[[v0:.+]] ] | 
| Ayal Zaks | 8c452d7 | 2017-07-19 05:16:39 +0000 | [diff] [blame] | 510 | ; CHECK:  %bc.merge.rdx = phi i32 [ 1, %[[v0:.+]] ], [ %[[v9]], %middle.block ] | 
| Arnold Schwaighofer | e206768 | 2014-05-29 22:10:01 +0000 | [diff] [blame] | 511 |  | 
|  | 512 | define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable { | 
|  | 513 |  | 
|  | 514 | br label %1 | 
|  | 515 |  | 
|  | 516 | ; <label>:1                                       ; preds = %1, %0 | 
|  | 517 | %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ] | 
|  | 518 | %b.0 = phi i32 [ 0, %0 ], [ %3, %1 ] | 
|  | 519 | %2 = and i32 %a.0, 4 | 
|  | 520 | %3 = add i32 %b.0, -1 | 
|  | 521 | %4 = icmp eq i32 %3, 0 | 
|  | 522 | br i1 %4, label %5, label %1 | 
|  | 523 |  | 
|  | 524 | ; <label>:5                                       ; preds = %1 | 
|  | 525 | ret i32 %2 | 
|  | 526 | } | 
| Arnold Schwaighofer | c11107c | 2014-06-22 03:38:59 +0000 | [diff] [blame] | 527 |  | 
|  | 528 | ; When generating the overflow check we must sure that the induction start value | 
|  | 529 | ; is defined before the branch to the scalar preheader. | 
|  | 530 |  | 
|  | 531 | ; CHECK-LABEL: testoverflowcheck | 
|  | 532 | ; CHECK: entry | 
|  | 533 | ; CHECK: %[[LOAD:.*]] = load i8 | 
| Arnold Schwaighofer | c11107c | 2014-06-22 03:38:59 +0000 | [diff] [blame] | 534 | ; CHECK: br | 
|  | 535 |  | 
|  | 536 | ; CHECK: scalar.ph | 
| James Molloy | c07701b | 2015-09-02 10:14:54 +0000 | [diff] [blame] | 537 | ; CHECK: phi i8 [ %{{.*}}, %middle.block ], [ %[[LOAD]], %entry ] | 
| Arnold Schwaighofer | c11107c | 2014-06-22 03:38:59 +0000 | [diff] [blame] | 538 |  | 
|  | 539 | @e = global i8 1, align 1 | 
|  | 540 | @d = common global i32 0, align 4 | 
|  | 541 | @c = common global i32 0, align 4 | 
|  | 542 | define i32 @testoverflowcheck() { | 
|  | 543 | entry: | 
| David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 544 | %.pr.i = load i8, i8* @e, align 1 | 
|  | 545 | %0 = load i32, i32* @d, align 4 | 
|  | 546 | %c.promoted.i = load i32, i32* @c, align 4 | 
| Arnold Schwaighofer | c11107c | 2014-06-22 03:38:59 +0000 | [diff] [blame] | 547 | br label %cond.end.i | 
|  | 548 |  | 
|  | 549 | cond.end.i: | 
|  | 550 | %inc4.i = phi i8 [ %.pr.i, %entry ], [ %inc.i, %cond.end.i ] | 
|  | 551 | %and3.i = phi i32 [ %c.promoted.i, %entry ], [ %and.i, %cond.end.i ] | 
|  | 552 | %and.i = and i32 %0, %and3.i | 
|  | 553 | %inc.i = add i8 %inc4.i, 1 | 
|  | 554 | %tobool.i = icmp eq i8 %inc.i, 0 | 
|  | 555 | br i1 %tobool.i, label %loopexit, label %cond.end.i | 
|  | 556 |  | 
|  | 557 | loopexit: | 
|  | 558 | ret i32 %and.i | 
|  | 559 | } | 
| Silviu Baranga | c05bab8 | 2016-05-05 15:20:39 +0000 | [diff] [blame] | 560 |  | 
|  | 561 | ; The SCEV expression of %sphi is (zext i8 {%t,+,1}<%loop> to i32) | 
|  | 562 | ; In order to recognize %sphi as an induction PHI and vectorize this loop, | 
|  | 563 | ; we need to convert the SCEV expression into an AddRecExpr. | 
|  | 564 | ; The expression gets converted to {zext i8 %t to i32,+,1}. | 
|  | 565 |  | 
|  | 566 | ; CHECK-LABEL: wrappingindvars1 | 
|  | 567 | ; CHECK-LABEL: vector.scevcheck | 
| Michael Kuperstein | c5edcde | 2016-06-09 18:03:15 +0000 | [diff] [blame] | 568 | ; CHECK-LABEL: vector.ph | 
|  | 569 | ; CHECK: %[[START:.*]] = add <2 x i32> %{{.*}}, <i32 0, i32 1> | 
| Silviu Baranga | c05bab8 | 2016-05-05 15:20:39 +0000 | [diff] [blame] | 570 | ; CHECK-LABEL: vector.body | 
| Michael Kuperstein | c5edcde | 2016-06-09 18:03:15 +0000 | [diff] [blame] | 571 | ; CHECK: %[[PHI:.*]] = phi <2 x i32> [ %[[START]], %vector.ph ], [ %[[STEP:.*]], %vector.body ] | 
|  | 572 | ; CHECK: %[[STEP]] = add <2 x i32> %[[PHI]], <i32 2, i32 2> | 
| Silviu Baranga | c05bab8 | 2016-05-05 15:20:39 +0000 | [diff] [blame] | 573 | define void @wrappingindvars1(i8 %t, i32 %len, i32 *%A) { | 
|  | 574 | entry: | 
|  | 575 | %st = zext i8 %t to i16 | 
|  | 576 | %ext = zext i8 %t to i32 | 
|  | 577 | %ecmp = icmp ult i16 %st, 42 | 
|  | 578 | br i1 %ecmp, label %loop, label %exit | 
|  | 579 |  | 
|  | 580 | loop: | 
|  | 581 |  | 
|  | 582 | %idx = phi i8 [ %t, %entry ], [ %idx.inc, %loop ] | 
|  | 583 | %idx.b = phi i32 [ 0, %entry ], [ %idx.b.inc, %loop ] | 
|  | 584 | %sphi = phi i32 [ %ext, %entry ], [%idx.inc.ext, %loop] | 
|  | 585 |  | 
|  | 586 | %ptr = getelementptr inbounds i32, i32* %A, i8 %idx | 
|  | 587 | store i32 %sphi, i32* %ptr | 
|  | 588 |  | 
|  | 589 | %idx.inc = add i8 %idx, 1 | 
|  | 590 | %idx.inc.ext = zext i8 %idx.inc to i32 | 
|  | 591 | %idx.b.inc = add nuw nsw i32 %idx.b, 1 | 
|  | 592 |  | 
|  | 593 | %c = icmp ult i32 %idx.b, %len | 
|  | 594 | br i1 %c, label %loop, label %exit | 
|  | 595 |  | 
|  | 596 | exit: | 
|  | 597 | ret void | 
|  | 598 | } | 
|  | 599 |  | 
|  | 600 | ; The SCEV expression of %sphi is (4 * (zext i8 {%t,+,1}<%loop> to i32)) | 
|  | 601 | ; In order to recognize %sphi as an induction PHI and vectorize this loop, | 
|  | 602 | ; we need to convert the SCEV expression into an AddRecExpr. | 
|  | 603 | ; The expression gets converted to ({4 * (zext %t to i32),+,4}). | 
|  | 604 | ; CHECK-LABEL: wrappingindvars2 | 
|  | 605 | ; CHECK-LABEL: vector.scevcheck | 
| Michael Kuperstein | c5edcde | 2016-06-09 18:03:15 +0000 | [diff] [blame] | 606 | ; CHECK-LABEL: vector.ph | 
|  | 607 | ; CHECK: %[[START:.*]] = add <2 x i32> %{{.*}}, <i32 0, i32 4> | 
| Silviu Baranga | c05bab8 | 2016-05-05 15:20:39 +0000 | [diff] [blame] | 608 | ; CHECK-LABEL: vector.body | 
| Michael Kuperstein | c5edcde | 2016-06-09 18:03:15 +0000 | [diff] [blame] | 609 | ; CHECK: %[[PHI:.*]] = phi <2 x i32> [ %[[START]], %vector.ph ], [ %[[STEP:.*]], %vector.body ] | 
|  | 610 | ; CHECK: %[[STEP]] = add <2 x i32> %[[PHI]], <i32 8, i32 8> | 
| Silviu Baranga | c05bab8 | 2016-05-05 15:20:39 +0000 | [diff] [blame] | 611 | define void @wrappingindvars2(i8 %t, i32 %len, i32 *%A) { | 
|  | 612 |  | 
|  | 613 | entry: | 
|  | 614 | %st = zext i8 %t to i16 | 
|  | 615 | %ext = zext i8 %t to i32 | 
|  | 616 | %ext.mul = mul i32 %ext, 4 | 
|  | 617 |  | 
|  | 618 | %ecmp = icmp ult i16 %st, 42 | 
|  | 619 | br i1 %ecmp, label %loop, label %exit | 
|  | 620 |  | 
|  | 621 | loop: | 
|  | 622 |  | 
|  | 623 | %idx = phi i8 [ %t, %entry ], [ %idx.inc, %loop ] | 
|  | 624 | %sphi = phi i32 [ %ext.mul, %entry ], [%mul, %loop] | 
|  | 625 | %idx.b = phi i32 [ 0, %entry ], [ %idx.b.inc, %loop ] | 
|  | 626 |  | 
|  | 627 | %ptr = getelementptr inbounds i32, i32* %A, i8 %idx | 
|  | 628 | store i32 %sphi, i32* %ptr | 
|  | 629 |  | 
|  | 630 | %idx.inc = add i8 %idx, 1 | 
|  | 631 | %idx.inc.ext = zext i8 %idx.inc to i32 | 
|  | 632 | %mul = mul i32 %idx.inc.ext, 4 | 
|  | 633 | %idx.b.inc = add nuw nsw i32 %idx.b, 1 | 
|  | 634 |  | 
|  | 635 | %c = icmp ult i32 %idx.b, %len | 
|  | 636 | br i1 %c, label %loop, label %exit | 
|  | 637 |  | 
|  | 638 | exit: | 
|  | 639 | ret void | 
|  | 640 | } | 
| Michael Kuperstein | 3a3c64d | 2016-06-01 17:16:46 +0000 | [diff] [blame] | 641 |  | 
|  | 642 | ; Check that we generate vectorized IVs in the pre-header | 
|  | 643 | ; instead of widening the scalar IV inside the loop, when | 
|  | 644 | ; we know how to do that. | 
|  | 645 | ; IND-LABEL: veciv | 
|  | 646 | ; IND: vector.body: | 
|  | 647 | ; IND: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] | 
| Matthew Simpson | 102729c | 2016-07-21 21:20:15 +0000 | [diff] [blame] | 648 | ; IND: %vec.ind = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %vec.ind.next, %vector.body ] | 
| Michael Kuperstein | 3a3c64d | 2016-06-01 17:16:46 +0000 | [diff] [blame] | 649 | ; IND: %index.next = add i32 %index, 2 | 
| Matthew Simpson | 102729c | 2016-07-21 21:20:15 +0000 | [diff] [blame] | 650 | ; IND: %vec.ind.next = add <2 x i32> %vec.ind, <i32 2, i32 2> | 
| Michael Kuperstein | 3a3c64d | 2016-06-01 17:16:46 +0000 | [diff] [blame] | 651 | ; IND: %[[CMP:.*]] = icmp eq i32 %index.next | 
|  | 652 | ; IND: br i1 %[[CMP]] | 
|  | 653 | ; UNROLL-LABEL: veciv | 
|  | 654 | ; UNROLL: vector.body: | 
|  | 655 | ; UNROLL: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] | 
| Matthew Simpson | 102729c | 2016-07-21 21:20:15 +0000 | [diff] [blame] | 656 | ; UNROLL: %vec.ind = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %vec.ind.next, %vector.body ] | 
| Michael Kuperstein | 3a3c64d | 2016-06-01 17:16:46 +0000 | [diff] [blame] | 657 | ; UNROLL: %step.add = add <2 x i32> %vec.ind, <i32 2, i32 2> | 
| Michael Kuperstein | 3a3c64d | 2016-06-01 17:16:46 +0000 | [diff] [blame] | 658 | ; UNROLL: %index.next = add i32 %index, 4 | 
| Matthew Simpson | 102729c | 2016-07-21 21:20:15 +0000 | [diff] [blame] | 659 | ; UNROLL: %vec.ind.next = add <2 x i32> %vec.ind, <i32 4, i32 4> | 
| Michael Kuperstein | 3a3c64d | 2016-06-01 17:16:46 +0000 | [diff] [blame] | 660 | ; UNROLL: %[[CMP:.*]] = icmp eq i32 %index.next | 
|  | 661 | ; UNROLL: br i1 %[[CMP]] | 
|  | 662 | define void @veciv(i32* nocapture %a, i32 %start, i32 %k) { | 
|  | 663 | for.body.preheader: | 
|  | 664 | br label %for.body | 
|  | 665 |  | 
|  | 666 | for.body: | 
|  | 667 | %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] | 
|  | 668 | %arrayidx = getelementptr inbounds i32, i32* %a, i32 %indvars.iv | 
|  | 669 | store i32 %indvars.iv, i32* %arrayidx, align 4 | 
|  | 670 | %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 | 
|  | 671 | %exitcond = icmp eq i32 %indvars.iv.next, %k | 
|  | 672 | br i1 %exitcond, label %exit, label %for.body | 
|  | 673 |  | 
|  | 674 | exit: | 
|  | 675 | ret void | 
|  | 676 | } | 
|  | 677 |  | 
|  | 678 | ; IND-LABEL: trunciv | 
|  | 679 | ; IND: vector.body: | 
|  | 680 | ; IND: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] | 
|  | 681 | ; IND: %[[VECIND:.*]] = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %[[STEPADD:.*]], %vector.body ] | 
| Michael Kuperstein | 3a3c64d | 2016-06-01 17:16:46 +0000 | [diff] [blame] | 682 | ; IND: %index.next = add i64 %index, 2 | 
| Matthew Simpson | 102729c | 2016-07-21 21:20:15 +0000 | [diff] [blame] | 683 | ; IND: %[[STEPADD]] = add <2 x i32> %[[VECIND]], <i32 2, i32 2> | 
| Michael Kuperstein | 3a3c64d | 2016-06-01 17:16:46 +0000 | [diff] [blame] | 684 | ; IND: %[[CMP:.*]] = icmp eq i64 %index.next | 
|  | 685 | ; IND: br i1 %[[CMP]] | 
|  | 686 | define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) { | 
|  | 687 | for.body.preheader: | 
|  | 688 | br label %for.body | 
|  | 689 |  | 
|  | 690 | for.body: | 
|  | 691 | %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] | 
|  | 692 | %trunc.iv = trunc i64 %indvars.iv to i32 | 
|  | 693 | %arrayidx = getelementptr inbounds i32, i32* %a, i32 %trunc.iv | 
|  | 694 | store i32 %trunc.iv, i32* %arrayidx, align 4 | 
|  | 695 | %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | 
|  | 696 | %exitcond = icmp eq i64 %indvars.iv.next, %k | 
|  | 697 | br i1 %exitcond, label %exit, label %for.body | 
|  | 698 |  | 
|  | 699 | exit: | 
|  | 700 | ret void | 
|  | 701 | } | 
| Michael Kuperstein | c5edcde | 2016-06-09 18:03:15 +0000 | [diff] [blame] | 702 |  | 
| Matthew Simpson | 18d8898 | 2016-08-02 15:25:16 +0000 | [diff] [blame] | 703 | ; CHECK-LABEL: @nonprimary( | 
|  | 704 | ; CHECK: vector.ph: | 
|  | 705 | ; CHECK:   %[[INSERT:.*]] = insertelement <2 x i32> undef, i32 %i, i32 0 | 
|  | 706 | ; CHECK:   %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer | 
|  | 707 | ; CHECK:   %[[START:.*]] = add <2 x i32> %[[SPLAT]], <i32 0, i32 1> | 
|  | 708 | ; CHECK: vector.body: | 
|  | 709 | ; CHECK:   %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] | 
|  | 710 | ; CHECK:   %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %vec.ind.next, %vector.body ] | 
|  | 711 | ; CHECK:   %offset.idx = add i32 %i, %index | 
|  | 712 | ; CHECK:   %[[A1:.*]] = add i32 %offset.idx, 0 | 
| Matthew Simpson | 18d8898 | 2016-08-02 15:25:16 +0000 | [diff] [blame] | 713 | ; CHECK:   %[[G1:.*]] = getelementptr inbounds i32, i32* %a, i32 %[[A1]] | 
| Daniel Neilson | 9e4bbe8 | 2018-05-01 15:35:08 +0000 | [diff] [blame] | 714 | ; CHECK:   %[[G3:.*]] = getelementptr inbounds i32, i32* %[[G1]], i32 0 | 
| Matthew Simpson | 18d8898 | 2016-08-02 15:25:16 +0000 | [diff] [blame] | 715 | ; CHECK:   %[[B1:.*]] = bitcast i32* %[[G3]] to <2 x i32>* | 
|  | 716 | ; CHECK:   store <2 x i32> %vec.ind, <2 x i32>* %[[B1]] | 
|  | 717 | ; CHECK:   %index.next = add i32 %index, 2 | 
|  | 718 | ; CHECK:   %vec.ind.next = add <2 x i32> %vec.ind, <i32 2, i32 2> | 
|  | 719 | ; CHECK:   %[[CMP:.*]] = icmp eq i32 %index.next, %n.vec | 
|  | 720 | ; CHECK:   br i1 %[[CMP]] | 
|  | 721 | ; | 
|  | 722 | ; IND-LABEL: @nonprimary( | 
|  | 723 | ; IND: vector.ph: | 
|  | 724 | ; IND:   %[[INSERT:.*]] = insertelement <2 x i32> undef, i32 %i, i32 0 | 
|  | 725 | ; IND:   %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer | 
|  | 726 | ; IND:   %[[START:.*]] = add <2 x i32> %[[SPLAT]], <i32 0, i32 1> | 
|  | 727 | ; IND: vector.body: | 
|  | 728 | ; IND:   %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] | 
|  | 729 | ; IND:   %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %vec.ind.next, %vector.body ] | 
|  | 730 | ; IND:   %[[A1:.*]] = add i32 %index, %i | 
|  | 731 | ; IND:   %[[S1:.*]] = sext i32 %[[A1]] to i64 | 
|  | 732 | ; IND:   %[[G1:.*]] = getelementptr inbounds i32, i32* %a, i64 %[[S1]] | 
|  | 733 | ; IND:   %[[B1:.*]] = bitcast i32* %[[G1]] to <2 x i32>* | 
|  | 734 | ; IND:   store <2 x i32> %vec.ind, <2 x i32>* %[[B1]] | 
|  | 735 | ; IND:   %index.next = add i32 %index, 2 | 
|  | 736 | ; IND:   %vec.ind.next = add <2 x i32> %vec.ind, <i32 2, i32 2> | 
|  | 737 | ; IND:   %[[CMP:.*]] = icmp eq i32 %index.next, %n.vec | 
|  | 738 | ; IND:   br i1 %[[CMP]] | 
|  | 739 | ; | 
|  | 740 | ; UNROLL-LABEL: @nonprimary( | 
|  | 741 | ; UNROLL: vector.ph: | 
|  | 742 | ; UNROLL:   %[[INSERT:.*]] = insertelement <2 x i32> undef, i32 %i, i32 0 | 
|  | 743 | ; UNROLL:   %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer | 
|  | 744 | ; UNROLL:   %[[START:.*]] = add <2 x i32> %[[SPLAT]], <i32 0, i32 1> | 
|  | 745 | ; UNROLL: vector.body: | 
|  | 746 | ; UNROLL:   %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] | 
|  | 747 | ; UNROLL:   %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %vec.ind.next, %vector.body ] | 
|  | 748 | ; UNROLL:   %step.add = add <2 x i32> %vec.ind, <i32 2, i32 2> | 
|  | 749 | ; UNROLL:   %[[A1:.*]] = add i32 %index, %i | 
|  | 750 | ; UNROLL:   %[[S1:.*]] = sext i32 %[[A1]] to i64 | 
|  | 751 | ; UNROLL:   %[[G1:.*]] = getelementptr inbounds i32, i32* %a, i64 %[[S1]] | 
|  | 752 | ; UNROLL:   %[[B1:.*]] = bitcast i32* %[[G1]] to <2 x i32>* | 
|  | 753 | ; UNROLL:   store <2 x i32> %vec.ind, <2 x i32>* %[[B1]] | 
| Daniel Neilson | 9e4bbe8 | 2018-05-01 15:35:08 +0000 | [diff] [blame] | 754 | ; UNROLL:   %[[G2:.*]] = getelementptr inbounds i32, i32* %[[G1]], i64 2 | 
| Matthew Simpson | 18d8898 | 2016-08-02 15:25:16 +0000 | [diff] [blame] | 755 | ; UNROLL:   %[[B2:.*]] = bitcast i32* %[[G2]] to <2 x i32>* | 
|  | 756 | ; UNROLL:   store <2 x i32> %step.add, <2 x i32>* %[[B2]] | 
|  | 757 | ; UNROLL:   %index.next = add i32 %index, 4 | 
|  | 758 | ; UNROLL:   %vec.ind.next = add <2 x i32> %vec.ind, <i32 4, i32 4> | 
|  | 759 | ; UNROLL:   %[[CMP:.*]] = icmp eq i32 %index.next, %n.vec | 
|  | 760 | ; UNROLL:   br i1 %[[CMP]] | 
| Michael Kuperstein | c5edcde | 2016-06-09 18:03:15 +0000 | [diff] [blame] | 761 | define void @nonprimary(i32* nocapture %a, i32 %start, i32 %i, i32 %k) { | 
|  | 762 | for.body.preheader: | 
|  | 763 | br label %for.body | 
|  | 764 |  | 
|  | 765 | for.body: | 
|  | 766 | %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ %i, %for.body.preheader ] | 
|  | 767 | %arrayidx = getelementptr inbounds i32, i32* %a, i32 %indvars.iv | 
|  | 768 | store i32 %indvars.iv, i32* %arrayidx, align 4 | 
| Matthew Simpson | 18d8898 | 2016-08-02 15:25:16 +0000 | [diff] [blame] | 769 | %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 | 
| Michael Kuperstein | c5edcde | 2016-06-09 18:03:15 +0000 | [diff] [blame] | 770 | %exitcond = icmp eq i32 %indvars.iv.next, %k | 
|  | 771 | br i1 %exitcond, label %exit, label %for.body | 
|  | 772 |  | 
|  | 773 | exit: | 
|  | 774 | ret void | 
|  | 775 | } | 
| Matthew Simpson | f09d13e5 | 2017-02-14 16:28:32 +0000 | [diff] [blame] | 776 |  | 
|  | 777 | ; CHECK-LABEL: @non_primary_iv_trunc( | 
|  | 778 | ; CHECK:       vector.body: | 
|  | 779 | ; CHECK-NEXT:    %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] | 
|  | 780 | ; CHECK:         [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 2>, %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %vector.body ] | 
|  | 781 | ; CHECK:         [[TMP3:%.*]] = add i64 %index, 0 | 
|  | 782 | ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* %a, i64 [[TMP3]] | 
| Daniel Neilson | 9e4bbe8 | 2018-05-01 15:35:08 +0000 | [diff] [blame] | 783 | ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0 | 
| Matthew Simpson | f09d13e5 | 2017-02-14 16:28:32 +0000 | [diff] [blame] | 784 | ; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <2 x i32>* | 
|  | 785 | ; CHECK-NEXT:    store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP6]], align 4 | 
|  | 786 | ; CHECK-NEXT:    %index.next = add i64 %index, 2 | 
|  | 787 | ; CHECK:         [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4> | 
|  | 788 | ; CHECK:         br i1 {{.*}}, label %middle.block, label %vector.body | 
|  | 789 | define void @non_primary_iv_trunc(i32* %a, i64 %n) { | 
|  | 790 | entry: | 
|  | 791 | br label %for.body | 
|  | 792 |  | 
|  | 793 | for.body: | 
|  | 794 | %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] | 
|  | 795 | %j = phi i64 [ %j.next, %for.body ], [ 0, %entry ] | 
|  | 796 | %tmp0 = getelementptr inbounds i32, i32* %a, i64 %i | 
|  | 797 | %tmp1 = trunc i64 %j to i32 | 
|  | 798 | store i32 %tmp1, i32* %tmp0, align 4 | 
|  | 799 | %i.next = add nuw nsw i64 %i, 1 | 
|  | 800 | %j.next = add nuw nsw i64 %j, 2 | 
|  | 801 | %cond = icmp slt i64 %i.next, %n | 
|  | 802 | br i1 %cond, label %for.body, label %for.end | 
|  | 803 |  | 
|  | 804 | for.end: | 
|  | 805 | ret void | 
|  | 806 | } | 
| Matthew Simpson | b8ff4a4 | 2017-03-27 20:07:38 +0000 | [diff] [blame] | 807 |  | 
|  | 808 | ; PR32419. Ensure we transform truncated non-primary induction variables. In | 
|  | 809 | ; the test case below we replace %tmp1 with a new induction variable. Because | 
|  | 810 | ; the truncated value is non-primary, we must compute an offset from the | 
|  | 811 | ; primary induction variable. | 
|  | 812 | ; | 
|  | 813 | ; CHECK-LABEL: @PR32419( | 
|  | 814 | ; CHECK:       vector.body: | 
|  | 815 | ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %[[PRED_UREM_CONTINUE4:.*]] ] | 
|  | 816 | ; CHECK:         [[OFFSET_IDX:%.*]] = add i32 -20, [[INDEX]] | 
|  | 817 | ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[OFFSET_IDX]] to i16 | 
|  | 818 | ; CHECK:         [[TMP8:%.*]] = add i16 [[TMP1]], 0 | 
|  | 819 | ; CHECK-NEXT:    [[TMP9:%.*]] = urem i16 %b, [[TMP8]] | 
|  | 820 | ; CHECK:         [[TMP15:%.*]] = add i16 [[TMP1]], 1 | 
|  | 821 | ; CHECK-NEXT:    [[TMP16:%.*]] = urem i16 %b, [[TMP15]] | 
|  | 822 | ; CHECK:       [[PRED_UREM_CONTINUE4]]: | 
|  | 823 | ; CHECK:         br i1 {{.*}}, label %middle.block, label %vector.body | 
|  | 824 | ; | 
|  | 825 | define i32 @PR32419(i32 %a, i16 %b) { | 
|  | 826 | entry: | 
|  | 827 | br label %for.body | 
|  | 828 |  | 
|  | 829 | for.body: | 
|  | 830 | %i = phi i32 [ -20, %entry ], [ %i.next, %for.inc ] | 
|  | 831 | %tmp0 = phi i32 [ %a, %entry ], [ %tmp6, %for.inc ] | 
|  | 832 | %tmp1 = trunc i32 %i to i16 | 
|  | 833 | %tmp2 = icmp eq i16 %tmp1, 0 | 
|  | 834 | br i1 %tmp2, label %for.inc, label %for.cond | 
|  | 835 |  | 
|  | 836 | for.cond: | 
|  | 837 | %tmp3 = urem i16 %b, %tmp1 | 
|  | 838 | br label %for.inc | 
|  | 839 |  | 
|  | 840 | for.inc: | 
|  | 841 | %tmp4 = phi i16 [ %tmp3, %for.cond ], [ 0, %for.body ] | 
|  | 842 | %tmp5 = sext i16 %tmp4 to i32 | 
|  | 843 | %tmp6 = or i32 %tmp0, %tmp5 | 
|  | 844 | %i.next = add nsw i32 %i, 1 | 
|  | 845 | %cond = icmp eq i32 %i.next, 0 | 
|  | 846 | br i1 %cond, label %for.end, label %for.body | 
|  | 847 |  | 
|  | 848 | for.end: | 
|  | 849 | %tmp7 = phi i32 [ %tmp6, %for.inc ] | 
|  | 850 | ret i32 %tmp7 | 
|  | 851 | } | 
| Anna Thomas | 0691483 | 2017-05-09 14:29:33 +0000 | [diff] [blame] | 852 |  | 
|  | 853 | ; Ensure that the shuffle vector for first order recurrence is inserted | 
|  | 854 | ; correctly after all the phis. These new phis correspond to new IVs | 
|  | 855 | ; that are generated by optimizing non-free truncs of IVs to IVs themselves | 
|  | 856 | define i64 @trunc_with_first_order_recurrence() { | 
|  | 857 | ; CHECK-LABEL: trunc_with_first_order_recurrence | 
|  | 858 | ; CHECK-LABEL: vector.body: | 
|  | 859 | ; CHECK-NEXT:    %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] | 
|  | 860 | ; CHECK-NEXT:    %vec.phi = phi <2 x i64> | 
|  | 861 | ; CHECK-NEXT:    %vec.ind = phi <2 x i64> [ <i64 1, i64 2>, %vector.ph ], [ %vec.ind.next, %vector.body ] | 
|  | 862 | ; CHECK-NEXT:    %vec.ind2 = phi <2 x i32> [ <i32 1, i32 2>, %vector.ph ], [ %vec.ind.next3, %vector.body ] | 
|  | 863 | ; CHECK-NEXT:    %vector.recur = phi <2 x i32> [ <i32 undef, i32 42>, %vector.ph ], [ %vec.ind5, %vector.body ] | 
|  | 864 | ; CHECK-NEXT:    %vec.ind5 = phi <2 x i32> [ <i32 1, i32 2>, %vector.ph ], [ %vec.ind.next6, %vector.body ] | 
|  | 865 | ; CHECK-NEXT:    %vec.ind7 = phi <2 x i32> [ <i32 1, i32 2>, %vector.ph ], [ %vec.ind.next8, %vector.body ] | 
|  | 866 | ; CHECK-NEXT:    shufflevector <2 x i32> %vector.recur, <2 x i32> %vec.ind5, <2 x i32> <i32 1, i32 2> | 
|  | 867 | entry: | 
|  | 868 | br label %loop | 
|  | 869 |  | 
|  | 870 | exit:                                        ; preds = %loop | 
|  | 871 | %.lcssa = phi i64 [ %c23, %loop ] | 
|  | 872 | ret i64 %.lcssa | 
|  | 873 |  | 
|  | 874 | loop:                                         ; preds = %loop, %entry | 
|  | 875 | %c5 = phi i64 [ %c23, %loop ], [ 0, %entry ] | 
|  | 876 | %indvars.iv = phi i64 [ %indvars.iv.next, %loop ], [ 1, %entry ] | 
|  | 877 | %x = phi i32 [ %c24, %loop ], [ 1, %entry ] | 
|  | 878 | %y = phi i32 [ %c6, %loop ], [ 42, %entry ] | 
|  | 879 | %c6 = trunc i64 %indvars.iv to i32 | 
|  | 880 | %c8 = mul i32 %x, %c6 | 
|  | 881 | %c9 = add i32 %c8, 42 | 
|  | 882 | %c10 = add i32 %y, %c6 | 
|  | 883 | %c11 = add i32 %c10, %c9 | 
|  | 884 | %c12 = sext i32 %c11 to i64 | 
|  | 885 | %c13 = add i64 %c5, %c12 | 
|  | 886 | %indvars.iv.tr = trunc i64 %indvars.iv to i32 | 
|  | 887 | %c14 = shl i32 %indvars.iv.tr, 1 | 
|  | 888 | %c15 = add i32 %c9, %c14 | 
|  | 889 | %c16 = sext i32 %c15 to i64 | 
|  | 890 | %c23 = add i64 %c13, %c16 | 
|  | 891 | %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | 
|  | 892 | %c24 = add nuw nsw i32 %x, 1 | 
|  | 893 | %exitcond.i = icmp eq i64 %indvars.iv.next, 114 | 
|  | 894 | br i1 %exitcond.i, label %exit, label %loop | 
|  | 895 |  | 
|  | 896 | } |