blob: 6213b4a7c2e9d10f3d29af10554a1301c2b957a3 [file] [log] [blame]
Sanjay Patelb653de12014-09-10 17:58:16 +00001; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S | FileCheck %s
Michael Kuperstein3a3c64d2016-06-01 17:16:46 +00002; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND
3; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=UNROLL
Matthew Simpson3c3b4a22016-07-14 14:36:06 +00004; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=2 -S | FileCheck %s --check-prefix=UNROLL-NO-IC
Matthew Simpson433cb1d2016-07-06 14:26:59 +00005; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -enable-interleaved-mem-accesses -instcombine -S | FileCheck %s --check-prefix=INTERLEAVE
Arnold Schwaighofer2e7a9222013-05-14 00:21:18 +00006
7target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
8
9; Make sure that we can handle multiple integer induction variables.
Matt Arsenaulte64c7c72013-10-02 20:29:00 +000010; CHECK-LABEL: @multi_int_induction(
Arnold Schwaighofer2e7a9222013-05-14 00:21:18 +000011; CHECK: vector.body:
12; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
James Molloyc07701b2015-09-02 10:14:54 +000013; CHECK: %[[VAR:.*]] = trunc i64 %index to i32
Arnold Schwaighofer2e7a9222013-05-14 00:21:18 +000014; CHECK: %offset.idx = add i32 190, %[[VAR]]
15define void @multi_int_induction(i32* %A, i32 %N) {
16for.body.lr.ph:
17 br label %for.body
18
19for.body:
20 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
21 %count.09 = phi i32 [ 190, %for.body.lr.ph ], [ %inc, %for.body ]
David Blaikie79e6c742015-02-27 19:29:02 +000022 %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
Arnold Schwaighofer2e7a9222013-05-14 00:21:18 +000023 store i32 %count.09, i32* %arrayidx2, align 4
24 %inc = add nsw i32 %count.09, 1
25 %indvars.iv.next = add i64 %indvars.iv, 1
26 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
27 %exitcond = icmp ne i32 %lftr.wideiv, %N
28 br i1 %exitcond, label %for.body, label %for.end
29
30for.end:
31 ret void
32}
33
Arnold Schwaighofera846a7f2013-11-01 22:18:19 +000034; Make sure we remove unneeded vectorization of induction variables.
35; In order for instcombine to cleanup the vectorized induction variables that we
36; create in the loop vectorizer we need to perform some form of redundancy
37; elimination to get rid of multiple uses.
38
39; IND-LABEL: scalar_use
40
41; IND: br label %vector.body
42; IND: vector.body:
43; Vectorized induction variable.
44; IND-NOT: insertelement <2 x i64>
45; IND-NOT: shufflevector <2 x i64>
46; IND: br {{.*}}, label %vector.body
47
48define void @scalar_use(float* %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
49entry:
50 br label %for.body
51
52for.body:
53 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
54 %ind.sum = add i64 %iv, %offset
David Blaikie79e6c742015-02-27 19:29:02 +000055 %arr.idx = getelementptr inbounds float, float* %a, i64 %ind.sum
David Blaikiea79ac142015-02-27 21:17:42 +000056 %l1 = load float, float* %arr.idx, align 4
Arnold Schwaighofera846a7f2013-11-01 22:18:19 +000057 %ind.sum2 = add i64 %iv, %offset2
David Blaikie79e6c742015-02-27 19:29:02 +000058 %arr.idx2 = getelementptr inbounds float, float* %a, i64 %ind.sum2
David Blaikiea79ac142015-02-27 21:17:42 +000059 %l2 = load float, float* %arr.idx2, align 4
Arnold Schwaighofera846a7f2013-11-01 22:18:19 +000060 %m = fmul fast float %b, %l2
61 %ad = fadd fast float %l1, %m
62 store float %ad, float* %arr.idx, align 4
63 %iv.next = add nuw nsw i64 %iv, 1
64 %exitcond = icmp eq i64 %iv.next, %n
65 br i1 %exitcond, label %loopexit, label %for.body
66
67loopexit:
68 ret void
69}
Arnold Schwaighoferb72cb4e2013-11-18 13:14:32 +000070
Matthew Simpson433cb1d2016-07-06 14:26:59 +000071; Make sure we don't create a vector induction phi node that is unused.
72; Scalarize the step vectors instead.
73;
74; for (int i = 0; i < n; ++i)
75; sum += a[i];
76;
Matthew Simpson3c3b4a22016-07-14 14:36:06 +000077; CHECK-LABEL: @scalarize_induction_variable_01(
78; CHECK: vector.body:
79; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
80; CHECK: %[[i0:.+]] = add i64 %index, 0
Matthew Simpson3c3b4a22016-07-14 14:36:06 +000081; CHECK: getelementptr inbounds i64, i64* %a, i64 %[[i0]]
Matthew Simpson3c3b4a22016-07-14 14:36:06 +000082;
83; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_01(
84; UNROLL-NO-IC: vector.body:
85; UNROLL-NO-IC: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
86; UNROLL-NO-IC: %[[i0:.+]] = add i64 %index, 0
Matthew Simpson3c3b4a22016-07-14 14:36:06 +000087; UNROLL-NO-IC: %[[i2:.+]] = add i64 %index, 2
Matthew Simpson3c3b4a22016-07-14 14:36:06 +000088; UNROLL-NO-IC: getelementptr inbounds i64, i64* %a, i64 %[[i0]]
Matthew Simpson3c3b4a22016-07-14 14:36:06 +000089; UNROLL-NO-IC: getelementptr inbounds i64, i64* %a, i64 %[[i2]]
Matthew Simpson3c3b4a22016-07-14 14:36:06 +000090;
Matthew Simpson433cb1d2016-07-06 14:26:59 +000091; IND-LABEL: @scalarize_induction_variable_01(
92; IND: vector.body:
93; IND: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
94; IND-NOT: add i64 {{.*}}, 2
95; IND: getelementptr inbounds i64, i64* %a, i64 %index
96;
97; UNROLL-LABEL: @scalarize_induction_variable_01(
98; UNROLL: vector.body:
99; UNROLL: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
100; UNROLL-NOT: add i64 {{.*}}, 4
101; UNROLL: %[[g1:.+]] = getelementptr inbounds i64, i64* %a, i64 %index
102; UNROLL: getelementptr i64, i64* %[[g1]], i64 2
103
104define i64 @scalarize_induction_variable_01(i64 *%a, i64 %n) {
105entry:
106 br label %for.body
107
108for.body:
109 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
110 %sum = phi i64 [ %2, %for.body ], [ 0, %entry ]
111 %0 = getelementptr inbounds i64, i64* %a, i64 %i
112 %1 = load i64, i64* %0, align 8
113 %2 = add i64 %1, %sum
114 %i.next = add nuw nsw i64 %i, 1
115 %cond = icmp slt i64 %i.next, %n
116 br i1 %cond, label %for.body, label %for.end
117
118for.end:
119 %3 = phi i64 [ %2, %for.body ]
120 ret i64 %3
121}
122
123; Make sure we scalarize the step vectors used for the pointer arithmetic. We
124; can't easily simplify vectorized step vectors.
125;
126; float s = 0;
127; for (int i ; 0; i < n; i += 8)
128; s += (a[i] + b[i] + 1.0f);
129;
Matthew Simpson3c3b4a22016-07-14 14:36:06 +0000130; CHECK-LABEL: @scalarize_induction_variable_02(
131; CHECK: vector.body:
132; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
133; CHECK: %offset.idx = shl i64 %index, 3
134; CHECK: %[[i0:.+]] = add i64 %offset.idx, 0
135; CHECK: %[[i1:.+]] = add i64 %offset.idx, 8
136; CHECK: getelementptr inbounds float, float* %a, i64 %[[i0]]
137; CHECK: getelementptr inbounds float, float* %a, i64 %[[i1]]
138; CHECK: getelementptr inbounds float, float* %b, i64 %[[i0]]
139; CHECK: getelementptr inbounds float, float* %b, i64 %[[i1]]
140;
141; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_02(
142; UNROLL-NO-IC: vector.body:
143; UNROLL-NO-IC: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
144; UNROLL-NO-IC: %offset.idx = shl i64 %index, 3
145; UNROLL-NO-IC: %[[i0:.+]] = add i64 %offset.idx, 0
146; UNROLL-NO-IC: %[[i1:.+]] = add i64 %offset.idx, 8
147; UNROLL-NO-IC: %[[i2:.+]] = add i64 %offset.idx, 16
148; UNROLL-NO-IC: %[[i3:.+]] = add i64 %offset.idx, 24
149; UNROLL-NO-IC: getelementptr inbounds float, float* %a, i64 %[[i0]]
150; UNROLL-NO-IC: getelementptr inbounds float, float* %a, i64 %[[i1]]
151; UNROLL-NO-IC: getelementptr inbounds float, float* %a, i64 %[[i2]]
152; UNROLL-NO-IC: getelementptr inbounds float, float* %a, i64 %[[i3]]
153; UNROLL-NO-IC: getelementptr inbounds float, float* %b, i64 %[[i0]]
154; UNROLL-NO-IC: getelementptr inbounds float, float* %b, i64 %[[i1]]
155; UNROLL-NO-IC: getelementptr inbounds float, float* %b, i64 %[[i2]]
156; UNROLL-NO-IC: getelementptr inbounds float, float* %b, i64 %[[i3]]
157;
Matthew Simpson433cb1d2016-07-06 14:26:59 +0000158; IND-LABEL: @scalarize_induction_variable_02(
159; IND: vector.body:
160; IND: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
161; IND: %[[i0:.+]] = shl i64 %index, 3
162; IND: %[[i1:.+]] = or i64 %[[i0]], 8
163; IND: getelementptr inbounds float, float* %a, i64 %[[i0]]
164; IND: getelementptr inbounds float, float* %a, i64 %[[i1]]
165;
166; UNROLL-LABEL: @scalarize_induction_variable_02(
167; UNROLL: vector.body:
168; UNROLL: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
169; UNROLL: %[[i0:.+]] = shl i64 %index, 3
170; UNROLL: %[[i1:.+]] = or i64 %[[i0]], 8
171; UNROLL: %[[i2:.+]] = or i64 %[[i0]], 16
172; UNROLL: %[[i3:.+]] = or i64 %[[i0]], 24
173; UNROLL: getelementptr inbounds float, float* %a, i64 %[[i0]]
174; UNROLL: getelementptr inbounds float, float* %a, i64 %[[i1]]
175; UNROLL: getelementptr inbounds float, float* %a, i64 %[[i2]]
176; UNROLL: getelementptr inbounds float, float* %a, i64 %[[i3]]
177
178define float @scalarize_induction_variable_02(float* %a, float* %b, i64 %n) {
179entry:
180 br label %for.body
181
182for.body:
183 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
184 %s = phi float [ 0.0, %entry ], [ %6, %for.body ]
185 %0 = getelementptr inbounds float, float* %a, i64 %i
186 %1 = load float, float* %0, align 4
187 %2 = getelementptr inbounds float, float* %b, i64 %i
188 %3 = load float, float* %2, align 4
189 %4 = fadd fast float %s, 1.0
190 %5 = fadd fast float %4, %1
191 %6 = fadd fast float %5, %3
192 %i.next = add nuw nsw i64 %i, 8
193 %cond = icmp slt i64 %i.next, %n
194 br i1 %cond, label %for.body, label %for.end
195
196for.end:
197 %s.lcssa = phi float [ %6, %for.body ]
198 ret float %s.lcssa
199}
200
201; Make sure we scalarize the step vectors used for the pointer arithmetic. We
202; can't easily simplify vectorized step vectors. (Interleaved accesses.)
203;
204; for (int i = 0; i < n; ++i)
205; a[i].f ^= y;
206;
207; INTERLEAVE-LABEL: @scalarize_induction_variable_03(
208; INTERLEAVE: vector.body:
209; INTERLEAVE: %[[i0:.+]] = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
210; INTERLEAVE: %[[i1:.+]] = or i64 %[[i0]], 1
211; INTERLEAVE: %[[i2:.+]] = or i64 %[[i0]], 2
212; INTERLEAVE: %[[i3:.+]] = or i64 %[[i0]], 3
213; INTERLEAVE: %[[i4:.+]] = or i64 %[[i0]], 4
214; INTERLEAVE: %[[i5:.+]] = or i64 %[[i0]], 5
215; INTERLEAVE: %[[i6:.+]] = or i64 %[[i0]], 6
216; INTERLEAVE: %[[i7:.+]] = or i64 %[[i0]], 7
Matthew Simpson18d88982016-08-02 15:25:16 +0000217; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i0]], i32 1
218; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i1]], i32 1
219; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i2]], i32 1
220; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i3]], i32 1
221; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i4]], i32 1
222; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i5]], i32 1
223; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i6]], i32 1
224; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i7]], i32 1
Matthew Simpson433cb1d2016-07-06 14:26:59 +0000225
Matthew Simpson18d88982016-08-02 15:25:16 +0000226%pair.i32 = type { i32, i32 }
227define void @scalarize_induction_variable_03(%pair.i32 *%p, i32 %y, i64 %n) {
Matthew Simpson433cb1d2016-07-06 14:26:59 +0000228entry:
229 br label %for.body
230
231for.body:
232 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
Matthew Simpson18d88982016-08-02 15:25:16 +0000233 %f = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1
Matthew Simpson433cb1d2016-07-06 14:26:59 +0000234 %0 = load i32, i32* %f, align 8
235 %1 = xor i32 %0, %y
236 store i32 %1, i32* %f, align 8
237 %i.next = add nuw nsw i64 %i, 1
238 %cond = icmp slt i64 %i.next, %n
239 br i1 %cond, label %for.body, label %for.end
240
241for.end:
242 ret void
243}
Arnold Schwaighoferb72cb4e2013-11-18 13:14:32 +0000244
Matthew Simpson58f56282016-08-02 14:29:41 +0000245; Make sure we scalarize the step vectors used for the pointer arithmetic. We
246; can't easily simplify vectorized step vectors. (Interleaved accesses.)
247;
248; for (int i = 0; i < n; ++i)
249; p[i].f = a[i * 4]
250;
251; INTERLEAVE-LABEL: @scalarize_induction_variable_04(
252; INTERLEAVE: vector.body:
253; INTERLEAVE: %[[i0:.+]] = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
254; INTERLEAVE: %[[i1:.+]] = or i64 %[[i0]], 1
255; INTERLEAVE: %[[i2:.+]] = or i64 %[[i0]], 2
256; INTERLEAVE: %[[i3:.+]] = or i64 %[[i0]], 3
257; INTERLEAVE: %[[i4:.+]] = or i64 %[[i0]], 4
258; INTERLEAVE: %[[i5:.+]] = or i64 %[[i0]], 5
259; INTERLEAVE: %[[i6:.+]] = or i64 %[[i0]], 6
260; INTERLEAVE: %[[i7:.+]] = or i64 %[[i0]], 7
Matthew Simpson18d88982016-08-02 15:25:16 +0000261; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i0]], i32 1
262; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i1]], i32 1
263; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i2]], i32 1
264; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i3]], i32 1
265; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i4]], i32 1
266; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i5]], i32 1
267; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i6]], i32 1
268; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i7]], i32 1
Matthew Simpson58f56282016-08-02 14:29:41 +0000269
Matthew Simpson18d88982016-08-02 15:25:16 +0000270define void @scalarize_induction_variable_04(i32* %a, %pair.i32* %p, i32 %n) {
Matthew Simpson58f56282016-08-02 14:29:41 +0000271entry:
272 br label %for.body
273
274for.body:
275 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry]
276 %0 = shl nsw i64 %i, 2
277 %1 = getelementptr inbounds i32, i32* %a, i64 %0
278 %2 = load i32, i32* %1, align 1
Matthew Simpson18d88982016-08-02 15:25:16 +0000279 %3 = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1
Matthew Simpson58f56282016-08-02 14:29:41 +0000280 store i32 %2, i32* %3, align 1
281 %i.next = add nuw nsw i64 %i, 1
282 %4 = trunc i64 %i.next to i32
283 %cond = icmp eq i32 %4, %n
284 br i1 %cond, label %for.end, label %for.body
285
286for.end:
287 ret void
288}
289
Matthew Simpson78088332016-09-30 15:13:52 +0000290; PR30542. Ensure we generate all the scalar steps for the induction variable.
291; The scalar induction variable is used by a getelementptr instruction
292; (uniform), and a udiv (non-uniform).
293;
294; int sum = 0;
295; for (int i = 0; i < n; ++i) {
296; int x = a[i];
297; if (c)
298; x /= i;
299; sum += x;
300; }
301;
302; CHECK-LABEL: @scalarize_induction_variable_05(
303; CHECK: vector.body:
304; CHECK: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %pred.udiv.continue2 ]
305; CHECK: %[[I0:.+]] = add i32 %index, 0
Matthew Simpson78088332016-09-30 15:13:52 +0000306; CHECK: getelementptr inbounds i32, i32* %a, i32 %[[I0]]
307; CHECK: pred.udiv.if:
308; CHECK: udiv i32 {{.*}}, %[[I0]]
309; CHECK: pred.udiv.if1:
Matthew Simpsonc62266d2016-10-25 18:59:45 +0000310; CHECK: %[[I1:.+]] = add i32 %index, 1
Matthew Simpson78088332016-09-30 15:13:52 +0000311; CHECK: udiv i32 {{.*}}, %[[I1]]
312;
313; UNROLL-NO_IC-LABEL: @scalarize_induction_variable_05(
314; UNROLL-NO-IC: vector.body:
315; UNROLL-NO-IC: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %pred.udiv.continue11 ]
316; UNROLL-NO-IC: %[[I0:.+]] = add i32 %index, 0
Matthew Simpson78088332016-09-30 15:13:52 +0000317; UNROLL-NO-IC: %[[I2:.+]] = add i32 %index, 2
Matthew Simpson78088332016-09-30 15:13:52 +0000318; UNROLL-NO-IC: getelementptr inbounds i32, i32* %a, i32 %[[I0]]
319; UNROLL-NO-IC: getelementptr inbounds i32, i32* %a, i32 %[[I2]]
320; UNROLL-NO-IC: pred.udiv.if:
321; UNROLL-NO-IC: udiv i32 {{.*}}, %[[I0]]
322; UNROLL-NO-IC: pred.udiv.if6:
Matthew Simpsonc62266d2016-10-25 18:59:45 +0000323; UNROLL-NO-IC: %[[I1:.+]] = add i32 %index, 1
Matthew Simpson78088332016-09-30 15:13:52 +0000324; UNROLL-NO-IC: udiv i32 {{.*}}, %[[I1]]
325; UNROLL-NO-IC: pred.udiv.if8:
326; UNROLL-NO-IC: udiv i32 {{.*}}, %[[I2]]
327; UNROLL-NO-IC: pred.udiv.if10:
Matthew Simpsonc62266d2016-10-25 18:59:45 +0000328; UNROLL-NO-IC: %[[I3:.+]] = add i32 %index, 3
Matthew Simpson78088332016-09-30 15:13:52 +0000329; UNROLL-NO-IC: udiv i32 {{.*}}, %[[I3]]
330;
331; IND-LABEL: @scalarize_induction_variable_05(
332; IND: vector.body:
333; IND: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %pred.udiv.continue2 ]
Matthew Simpson78088332016-09-30 15:13:52 +0000334; IND: %[[E0:.+]] = sext i32 %index to i64
335; IND: getelementptr inbounds i32, i32* %a, i64 %[[E0]]
336; IND: pred.udiv.if:
337; IND: udiv i32 {{.*}}, %index
338; IND: pred.udiv.if1:
Matthew Simpsonc62266d2016-10-25 18:59:45 +0000339; IND: %[[I1:.+]] = or i32 %index, 1
Matthew Simpson78088332016-09-30 15:13:52 +0000340; IND: udiv i32 {{.*}}, %[[I1]]
341;
342; UNROLL-LABEL: @scalarize_induction_variable_05(
343; UNROLL: vector.body:
344; UNROLL: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %pred.udiv.continue11 ]
Matthew Simpson78088332016-09-30 15:13:52 +0000345; UNROLL: %[[I2:.+]] = or i32 %index, 2
Matthew Simpson78088332016-09-30 15:13:52 +0000346; UNROLL: %[[E0:.+]] = sext i32 %index to i64
347; UNROLL: %[[G0:.+]] = getelementptr inbounds i32, i32* %a, i64 %[[E0]]
348; UNROLL: getelementptr i32, i32* %[[G0]], i64 2
349; UNROLL: pred.udiv.if:
350; UNROLL: udiv i32 {{.*}}, %index
351; UNROLL: pred.udiv.if6:
Matthew Simpsonc62266d2016-10-25 18:59:45 +0000352; UNROLL: %[[I1:.+]] = or i32 %index, 1
Matthew Simpson78088332016-09-30 15:13:52 +0000353; UNROLL: udiv i32 {{.*}}, %[[I1]]
354; UNROLL: pred.udiv.if8:
355; UNROLL: udiv i32 {{.*}}, %[[I2]]
356; UNROLL: pred.udiv.if10:
Matthew Simpsonc62266d2016-10-25 18:59:45 +0000357; UNROLL: %[[I3:.+]] = or i32 %index, 3
Matthew Simpson78088332016-09-30 15:13:52 +0000358; UNROLL: udiv i32 {{.*}}, %[[I3]]
359
360define i32 @scalarize_induction_variable_05(i32* %a, i32 %x, i1 %c, i32 %n) {
361entry:
362 br label %for.body
363
364for.body:
365 %i = phi i32 [ 0, %entry ], [ %i.next, %if.end ]
366 %sum = phi i32 [ 0, %entry ], [ %tmp4, %if.end ]
367 %tmp0 = getelementptr inbounds i32, i32* %a, i32 %i
368 %tmp1 = load i32, i32* %tmp0, align 4
369 br i1 %c, label %if.then, label %if.end
370
371if.then:
372 %tmp2 = udiv i32 %tmp1, %i
373 br label %if.end
374
375if.end:
376 %tmp3 = phi i32 [ %tmp2, %if.then ], [ %tmp1, %for.body ]
377 %tmp4 = add i32 %tmp3, %sum
378 %i.next = add nuw nsw i32 %i, 1
379 %cond = icmp slt i32 %i.next, %n
380 br i1 %cond, label %for.body, label %for.end
381
382for.end:
383 %tmp5 = phi i32 [ %tmp4, %if.end ]
384 ret i32 %tmp5
385}
386
Matthew Simpson18d88982016-08-02 15:25:16 +0000387; Ensure we generate both a vector and a scalar induction variable. In this
388; test, the induction variable is used by an instruction that will be
389; vectorized (trunc) as well as an instruction that will remain in scalar form
390; (gepelementptr).
391;
392; CHECK-LABEL: @iv_vector_and_scalar_users(
393; CHECK: vector.body:
394; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
395; CHECK: %vec.ind = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph ], [ %vec.ind.next, %vector.body ]
396; CHECK: %vec.ind1 = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %vec.ind.next2, %vector.body ]
397; CHECK: %[[i0:.+]] = add i64 %index, 0
398; CHECK: %[[i1:.+]] = add i64 %index, 1
399; CHECK: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %[[i0]], i32 1
400; CHECK: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %[[i1]], i32 1
401; CHECK: %index.next = add i64 %index, 2
402; CHECK: %vec.ind.next = add <2 x i64> %vec.ind, <i64 2, i64 2>
403; CHECK: %vec.ind.next2 = add <2 x i32> %vec.ind1, <i32 2, i32 2>
404;
405; IND-LABEL: @iv_vector_and_scalar_users(
406; IND: vector.body:
407; IND: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
408; IND: %vec.ind1 = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %vec.ind.next2, %vector.body ]
409; IND: %[[i1:.+]] = or i64 %index, 1
410; IND: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %index, i32 1
411; IND: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %[[i1]], i32 1
412; IND: %index.next = add i64 %index, 2
413; IND: %vec.ind.next2 = add <2 x i32> %vec.ind1, <i32 2, i32 2>
414;
415; UNROLL-LABEL: @iv_vector_and_scalar_users(
416; UNROLL: vector.body:
417; UNROLL: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
418; UNROLL: %vec.ind2 = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %vec.ind.next5, %vector.body ]
419; UNROLL: %[[i1:.+]] = or i64 %index, 1
420; UNROLL: %[[i2:.+]] = or i64 %index, 2
421; UNROLL: %[[i3:.+]] = or i64 %index, 3
422; UNROLL: %step.add3 = add <2 x i32> %vec.ind2, <i32 2, i32 2>
423; UNROLL: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %index, i32 1
424; UNROLL: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %[[i1]], i32 1
425; UNROLL: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %[[i2]], i32 1
426; UNROLL: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %[[i3]], i32 1
427; UNROLL: %index.next = add i64 %index, 4
428; UNROLL: %vec.ind.next5 = add <2 x i32> %vec.ind2, <i32 4, i32 4>
429
430%pair.i16 = type { i16, i16 }
431define void @iv_vector_and_scalar_users(%pair.i16* %p, i32 %a, i32 %n) {
432entry:
433 br label %for.body
434
435for.body:
436 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
437 %0 = trunc i64 %i to i32
438 %1 = add i32 %a, %0
439 %2 = trunc i32 %1 to i16
440 %3 = getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %i, i32 1
441 store i16 %2, i16* %3, align 2
442 %i.next = add nuw nsw i64 %i, 1
443 %4 = trunc i64 %i.next to i32
444 %cond = icmp eq i32 %4, %n
445 br i1 %cond, label %for.end, label %for.body
446
447for.end:
448 ret void
449}
450
Arnold Schwaighoferb72cb4e2013-11-18 13:14:32 +0000451; Make sure that the loop exit count computation does not overflow for i8 and
452; i16. The exit count of these loops is i8/i16 max + 1. If we don't cast the
453; induction variable to a bigger type the exit count computation will overflow
454; to 0.
455; PR17532
456
457; CHECK-LABEL: i8_loop
Benjamin Kramerc10563d2014-01-11 21:06:00 +0000458; CHECK: icmp eq i32 {{.*}}, 256
Arnold Schwaighoferb72cb4e2013-11-18 13:14:32 +0000459define i32 @i8_loop() nounwind readnone ssp uwtable {
460 br label %1
461
462; <label>:1 ; preds = %1, %0
463 %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ]
464 %b.0 = phi i8 [ 0, %0 ], [ %3, %1 ]
465 %2 = and i32 %a.0, 4
466 %3 = add i8 %b.0, -1
467 %4 = icmp eq i8 %3, 0
468 br i1 %4, label %5, label %1
469
470; <label>:5 ; preds = %1
471 ret i32 %2
472}
473
474; CHECK-LABEL: i16_loop
Benjamin Kramerc10563d2014-01-11 21:06:00 +0000475; CHECK: icmp eq i32 {{.*}}, 65536
Arnold Schwaighoferb72cb4e2013-11-18 13:14:32 +0000476
477define i32 @i16_loop() nounwind readnone ssp uwtable {
478 br label %1
479
480; <label>:1 ; preds = %1, %0
481 %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ]
482 %b.0 = phi i16 [ 0, %0 ], [ %3, %1 ]
483 %2 = and i32 %a.0, 4
484 %3 = add i16 %b.0, -1
485 %4 = icmp eq i16 %3, 0
486 br i1 %4, label %5, label %1
487
488; <label>:5 ; preds = %1
489 ret i32 %2
490}
Arnold Schwaighofere2067682014-05-29 22:10:01 +0000491
492; This loop has a backedge taken count of i32_max. We need to check for this
493; condition and branch directly to the scalar loop.
494
495; CHECK-LABEL: max_i32_backedgetaken
James Molloycba92302015-09-02 10:15:22 +0000496; CHECK: br i1 true, label %scalar.ph, label %min.iters.checked
Arnold Schwaighofere2067682014-05-29 22:10:01 +0000497
Matthew Simpson433cb1d2016-07-06 14:26:59 +0000498; CHECK: middle.block:
499; CHECK: %[[v9:.+]] = extractelement <2 x i32> %bin.rdx, i32 0
Arnold Schwaighofere2067682014-05-29 22:10:01 +0000500; CHECK: scalar.ph:
Matthew Simpson433cb1d2016-07-06 14:26:59 +0000501; CHECK: %bc.resume.val = phi i32 [ 0, %middle.block ], [ 0, %[[v0:.+]] ]
502; CHECK: %bc.merge.rdx = phi i32 [ 1, %[[v0:.+]] ], [ 1, %min.iters.checked ], [ %[[v9]], %middle.block ]
Arnold Schwaighofere2067682014-05-29 22:10:01 +0000503
504define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable {
505
506 br label %1
507
508; <label>:1 ; preds = %1, %0
509 %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ]
510 %b.0 = phi i32 [ 0, %0 ], [ %3, %1 ]
511 %2 = and i32 %a.0, 4
512 %3 = add i32 %b.0, -1
513 %4 = icmp eq i32 %3, 0
514 br i1 %4, label %5, label %1
515
516; <label>:5 ; preds = %1
517 ret i32 %2
518}
Arnold Schwaighoferc11107c2014-06-22 03:38:59 +0000519
520; When generating the overflow check we must sure that the induction start value
521; is defined before the branch to the scalar preheader.
522
523; CHECK-LABEL: testoverflowcheck
524; CHECK: entry
525; CHECK: %[[LOAD:.*]] = load i8
Arnold Schwaighoferc11107c2014-06-22 03:38:59 +0000526; CHECK: br
527
528; CHECK: scalar.ph
James Molloyc07701b2015-09-02 10:14:54 +0000529; CHECK: phi i8 [ %{{.*}}, %middle.block ], [ %[[LOAD]], %entry ]
Arnold Schwaighoferc11107c2014-06-22 03:38:59 +0000530
531@e = global i8 1, align 1
532@d = common global i32 0, align 4
533@c = common global i32 0, align 4
534define i32 @testoverflowcheck() {
535entry:
David Blaikiea79ac142015-02-27 21:17:42 +0000536 %.pr.i = load i8, i8* @e, align 1
537 %0 = load i32, i32* @d, align 4
538 %c.promoted.i = load i32, i32* @c, align 4
Arnold Schwaighoferc11107c2014-06-22 03:38:59 +0000539 br label %cond.end.i
540
541cond.end.i:
542 %inc4.i = phi i8 [ %.pr.i, %entry ], [ %inc.i, %cond.end.i ]
543 %and3.i = phi i32 [ %c.promoted.i, %entry ], [ %and.i, %cond.end.i ]
544 %and.i = and i32 %0, %and3.i
545 %inc.i = add i8 %inc4.i, 1
546 %tobool.i = icmp eq i8 %inc.i, 0
547 br i1 %tobool.i, label %loopexit, label %cond.end.i
548
549loopexit:
550 ret i32 %and.i
551}
Silviu Barangac05bab82016-05-05 15:20:39 +0000552
553; The SCEV expression of %sphi is (zext i8 {%t,+,1}<%loop> to i32)
554; In order to recognize %sphi as an induction PHI and vectorize this loop,
555; we need to convert the SCEV expression into an AddRecExpr.
556; The expression gets converted to {zext i8 %t to i32,+,1}.
557
558; CHECK-LABEL: wrappingindvars1
559; CHECK-LABEL: vector.scevcheck
Michael Kupersteinc5edcde2016-06-09 18:03:15 +0000560; CHECK-LABEL: vector.ph
561; CHECK: %[[START:.*]] = add <2 x i32> %{{.*}}, <i32 0, i32 1>
Silviu Barangac05bab82016-05-05 15:20:39 +0000562; CHECK-LABEL: vector.body
Michael Kupersteinc5edcde2016-06-09 18:03:15 +0000563; CHECK: %[[PHI:.*]] = phi <2 x i32> [ %[[START]], %vector.ph ], [ %[[STEP:.*]], %vector.body ]
564; CHECK: %[[STEP]] = add <2 x i32> %[[PHI]], <i32 2, i32 2>
Silviu Barangac05bab82016-05-05 15:20:39 +0000565define void @wrappingindvars1(i8 %t, i32 %len, i32 *%A) {
566 entry:
567 %st = zext i8 %t to i16
568 %ext = zext i8 %t to i32
569 %ecmp = icmp ult i16 %st, 42
570 br i1 %ecmp, label %loop, label %exit
571
572 loop:
573
574 %idx = phi i8 [ %t, %entry ], [ %idx.inc, %loop ]
575 %idx.b = phi i32 [ 0, %entry ], [ %idx.b.inc, %loop ]
576 %sphi = phi i32 [ %ext, %entry ], [%idx.inc.ext, %loop]
577
578 %ptr = getelementptr inbounds i32, i32* %A, i8 %idx
579 store i32 %sphi, i32* %ptr
580
581 %idx.inc = add i8 %idx, 1
582 %idx.inc.ext = zext i8 %idx.inc to i32
583 %idx.b.inc = add nuw nsw i32 %idx.b, 1
584
585 %c = icmp ult i32 %idx.b, %len
586 br i1 %c, label %loop, label %exit
587
588 exit:
589 ret void
590}
591
592; The SCEV expression of %sphi is (4 * (zext i8 {%t,+,1}<%loop> to i32))
593; In order to recognize %sphi as an induction PHI and vectorize this loop,
594; we need to convert the SCEV expression into an AddRecExpr.
595; The expression gets converted to ({4 * (zext %t to i32),+,4}).
596; CHECK-LABEL: wrappingindvars2
597; CHECK-LABEL: vector.scevcheck
Michael Kupersteinc5edcde2016-06-09 18:03:15 +0000598; CHECK-LABEL: vector.ph
599; CHECK: %[[START:.*]] = add <2 x i32> %{{.*}}, <i32 0, i32 4>
Silviu Barangac05bab82016-05-05 15:20:39 +0000600; CHECK-LABEL: vector.body
Michael Kupersteinc5edcde2016-06-09 18:03:15 +0000601; CHECK: %[[PHI:.*]] = phi <2 x i32> [ %[[START]], %vector.ph ], [ %[[STEP:.*]], %vector.body ]
602; CHECK: %[[STEP]] = add <2 x i32> %[[PHI]], <i32 8, i32 8>
Silviu Barangac05bab82016-05-05 15:20:39 +0000603define void @wrappingindvars2(i8 %t, i32 %len, i32 *%A) {
604
605entry:
606 %st = zext i8 %t to i16
607 %ext = zext i8 %t to i32
608 %ext.mul = mul i32 %ext, 4
609
610 %ecmp = icmp ult i16 %st, 42
611 br i1 %ecmp, label %loop, label %exit
612
613 loop:
614
615 %idx = phi i8 [ %t, %entry ], [ %idx.inc, %loop ]
616 %sphi = phi i32 [ %ext.mul, %entry ], [%mul, %loop]
617 %idx.b = phi i32 [ 0, %entry ], [ %idx.b.inc, %loop ]
618
619 %ptr = getelementptr inbounds i32, i32* %A, i8 %idx
620 store i32 %sphi, i32* %ptr
621
622 %idx.inc = add i8 %idx, 1
623 %idx.inc.ext = zext i8 %idx.inc to i32
624 %mul = mul i32 %idx.inc.ext, 4
625 %idx.b.inc = add nuw nsw i32 %idx.b, 1
626
627 %c = icmp ult i32 %idx.b, %len
628 br i1 %c, label %loop, label %exit
629
630 exit:
631 ret void
632}
Michael Kuperstein3a3c64d2016-06-01 17:16:46 +0000633
634; Check that we generate vectorized IVs in the pre-header
635; instead of widening the scalar IV inside the loop, when
636; we know how to do that.
637; IND-LABEL: veciv
638; IND: vector.body:
639; IND: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
Matthew Simpson102729c2016-07-21 21:20:15 +0000640; IND: %vec.ind = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %vec.ind.next, %vector.body ]
Michael Kuperstein3a3c64d2016-06-01 17:16:46 +0000641; IND: %index.next = add i32 %index, 2
Matthew Simpson102729c2016-07-21 21:20:15 +0000642; IND: %vec.ind.next = add <2 x i32> %vec.ind, <i32 2, i32 2>
Michael Kuperstein3a3c64d2016-06-01 17:16:46 +0000643; IND: %[[CMP:.*]] = icmp eq i32 %index.next
644; IND: br i1 %[[CMP]]
645; UNROLL-LABEL: veciv
646; UNROLL: vector.body:
647; UNROLL: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
Matthew Simpson102729c2016-07-21 21:20:15 +0000648; UNROLL: %vec.ind = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %vec.ind.next, %vector.body ]
Michael Kuperstein3a3c64d2016-06-01 17:16:46 +0000649; UNROLL: %step.add = add <2 x i32> %vec.ind, <i32 2, i32 2>
Michael Kuperstein3a3c64d2016-06-01 17:16:46 +0000650; UNROLL: %index.next = add i32 %index, 4
Matthew Simpson102729c2016-07-21 21:20:15 +0000651; UNROLL: %vec.ind.next = add <2 x i32> %vec.ind, <i32 4, i32 4>
Michael Kuperstein3a3c64d2016-06-01 17:16:46 +0000652; UNROLL: %[[CMP:.*]] = icmp eq i32 %index.next
653; UNROLL: br i1 %[[CMP]]
654define void @veciv(i32* nocapture %a, i32 %start, i32 %k) {
655for.body.preheader:
656 br label %for.body
657
658for.body:
659 %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
660 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %indvars.iv
661 store i32 %indvars.iv, i32* %arrayidx, align 4
662 %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
663 %exitcond = icmp eq i32 %indvars.iv.next, %k
664 br i1 %exitcond, label %exit, label %for.body
665
666exit:
667 ret void
668}
669
670; IND-LABEL: trunciv
671; IND: vector.body:
672; IND: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
673; IND: %[[VECIND:.*]] = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %[[STEPADD:.*]], %vector.body ]
Michael Kuperstein3a3c64d2016-06-01 17:16:46 +0000674; IND: %index.next = add i64 %index, 2
Matthew Simpson102729c2016-07-21 21:20:15 +0000675; IND: %[[STEPADD]] = add <2 x i32> %[[VECIND]], <i32 2, i32 2>
Michael Kuperstein3a3c64d2016-06-01 17:16:46 +0000676; IND: %[[CMP:.*]] = icmp eq i64 %index.next
677; IND: br i1 %[[CMP]]
678define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) {
679for.body.preheader:
680 br label %for.body
681
682for.body:
683 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
684 %trunc.iv = trunc i64 %indvars.iv to i32
685 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %trunc.iv
686 store i32 %trunc.iv, i32* %arrayidx, align 4
687 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
688 %exitcond = icmp eq i64 %indvars.iv.next, %k
689 br i1 %exitcond, label %exit, label %for.body
690
691exit:
692 ret void
693}
Michael Kupersteinc5edcde2016-06-09 18:03:15 +0000694
Matthew Simpson18d88982016-08-02 15:25:16 +0000695; CHECK-LABEL: @nonprimary(
696; CHECK: vector.ph:
697; CHECK: %[[INSERT:.*]] = insertelement <2 x i32> undef, i32 %i, i32 0
698; CHECK: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer
699; CHECK: %[[START:.*]] = add <2 x i32> %[[SPLAT]], <i32 0, i32 1>
700; CHECK: vector.body:
701; CHECK: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
702; CHECK: %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %vec.ind.next, %vector.body ]
703; CHECK: %offset.idx = add i32 %i, %index
704; CHECK: %[[A1:.*]] = add i32 %offset.idx, 0
Matthew Simpson18d88982016-08-02 15:25:16 +0000705; CHECK: %[[G1:.*]] = getelementptr inbounds i32, i32* %a, i32 %[[A1]]
Matthew Simpson18d88982016-08-02 15:25:16 +0000706; CHECK: %[[G3:.*]] = getelementptr i32, i32* %[[G1]], i32 0
707; CHECK: %[[B1:.*]] = bitcast i32* %[[G3]] to <2 x i32>*
708; CHECK: store <2 x i32> %vec.ind, <2 x i32>* %[[B1]]
709; CHECK: %index.next = add i32 %index, 2
710; CHECK: %vec.ind.next = add <2 x i32> %vec.ind, <i32 2, i32 2>
711; CHECK: %[[CMP:.*]] = icmp eq i32 %index.next, %n.vec
712; CHECK: br i1 %[[CMP]]
713;
714; IND-LABEL: @nonprimary(
715; IND: vector.ph:
716; IND: %[[INSERT:.*]] = insertelement <2 x i32> undef, i32 %i, i32 0
717; IND: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer
718; IND: %[[START:.*]] = add <2 x i32> %[[SPLAT]], <i32 0, i32 1>
719; IND: vector.body:
720; IND: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
721; IND: %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %vec.ind.next, %vector.body ]
722; IND: %[[A1:.*]] = add i32 %index, %i
723; IND: %[[S1:.*]] = sext i32 %[[A1]] to i64
724; IND: %[[G1:.*]] = getelementptr inbounds i32, i32* %a, i64 %[[S1]]
725; IND: %[[B1:.*]] = bitcast i32* %[[G1]] to <2 x i32>*
726; IND: store <2 x i32> %vec.ind, <2 x i32>* %[[B1]]
727; IND: %index.next = add i32 %index, 2
728; IND: %vec.ind.next = add <2 x i32> %vec.ind, <i32 2, i32 2>
729; IND: %[[CMP:.*]] = icmp eq i32 %index.next, %n.vec
730; IND: br i1 %[[CMP]]
731;
732; UNROLL-LABEL: @nonprimary(
733; UNROLL: vector.ph:
734; UNROLL: %[[INSERT:.*]] = insertelement <2 x i32> undef, i32 %i, i32 0
735; UNROLL: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer
736; UNROLL: %[[START:.*]] = add <2 x i32> %[[SPLAT]], <i32 0, i32 1>
737; UNROLL: vector.body:
738; UNROLL: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
739; UNROLL: %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %vec.ind.next, %vector.body ]
740; UNROLL: %step.add = add <2 x i32> %vec.ind, <i32 2, i32 2>
741; UNROLL: %[[A1:.*]] = add i32 %index, %i
742; UNROLL: %[[S1:.*]] = sext i32 %[[A1]] to i64
743; UNROLL: %[[G1:.*]] = getelementptr inbounds i32, i32* %a, i64 %[[S1]]
744; UNROLL: %[[B1:.*]] = bitcast i32* %[[G1]] to <2 x i32>*
745; UNROLL: store <2 x i32> %vec.ind, <2 x i32>* %[[B1]]
746; UNROLL: %[[G2:.*]] = getelementptr i32, i32* %[[G1]], i64 2
747; UNROLL: %[[B2:.*]] = bitcast i32* %[[G2]] to <2 x i32>*
748; UNROLL: store <2 x i32> %step.add, <2 x i32>* %[[B2]]
749; UNROLL: %index.next = add i32 %index, 4
750; UNROLL: %vec.ind.next = add <2 x i32> %vec.ind, <i32 4, i32 4>
751; UNROLL: %[[CMP:.*]] = icmp eq i32 %index.next, %n.vec
752; UNROLL: br i1 %[[CMP]]
Michael Kupersteinc5edcde2016-06-09 18:03:15 +0000753define void @nonprimary(i32* nocapture %a, i32 %start, i32 %i, i32 %k) {
754for.body.preheader:
755 br label %for.body
756
757for.body:
758 %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ %i, %for.body.preheader ]
759 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %indvars.iv
760 store i32 %indvars.iv, i32* %arrayidx, align 4
Matthew Simpson18d88982016-08-02 15:25:16 +0000761 %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
Michael Kupersteinc5edcde2016-06-09 18:03:15 +0000762 %exitcond = icmp eq i32 %indvars.iv.next, %k
763 br i1 %exitcond, label %exit, label %for.body
764
765exit:
766 ret void
767}