blob: d84dc42bdf543302eb4c1d34dafac59f88f1b92e [file] [log] [blame]
Hao Liu32c05392015-06-08 06:39:56 +00001; RUN: opt -S -loop-vectorize -instcombine -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses=true -runtime-memory-check-threshold=24 < %s | FileCheck %s
2
3target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
4
5; Check vectorization on an interleaved load group of factor 2 and an interleaved
6; store group of factor 2.
7
8; int AB[1024];
9; int CD[1024];
10; void test_array_load2_store2(int C, int D) {
11; for (int i = 0; i < 1024; i+=2) {
12; int A = AB[i];
13; int B = AB[i+1];
14; CD[i] = A + C;
15; CD[i+1] = B * D;
16; }
17; }
18
19; CHECK-LABEL: @test_array_load2_store2(
20; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
21; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
22; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
23; CHECK: add nsw <4 x i32>
24; CHECK: mul nsw <4 x i32>
25; CHECK: %interleaved.vec = shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
26; CHECK: store <8 x i32> %interleaved.vec, <8 x i32>* %{{.*}}, align 4
27
28@AB = common global [1024 x i32] zeroinitializer, align 4
29@CD = common global [1024 x i32] zeroinitializer, align 4
30
31define void @test_array_load2_store2(i32 %C, i32 %D) {
32entry:
33 br label %for.body
34
35for.body: ; preds = %for.body, %entry
36 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
37 %arrayidx0 = getelementptr inbounds [1024 x i32], [1024 x i32]* @AB, i64 0, i64 %indvars.iv
38 %tmp = load i32, i32* %arrayidx0, align 4
39 %tmp1 = or i64 %indvars.iv, 1
40 %arrayidx1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @AB, i64 0, i64 %tmp1
41 %tmp2 = load i32, i32* %arrayidx1, align 4
42 %add = add nsw i32 %tmp, %C
43 %mul = mul nsw i32 %tmp2, %D
44 %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @CD, i64 0, i64 %indvars.iv
45 store i32 %add, i32* %arrayidx2, align 4
46 %arrayidx3 = getelementptr inbounds [1024 x i32], [1024 x i32]* @CD, i64 0, i64 %tmp1
47 store i32 %mul, i32* %arrayidx3, align 4
48 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
49 %cmp = icmp slt i64 %indvars.iv.next, 1024
50 br i1 %cmp, label %for.body, label %for.end
51
52for.end: ; preds = %for.body
53 ret void
54}
55
56; int A[3072];
57; struct ST S[1024];
58; void test_struct_st3() {
59; int *ptr = A;
60; for (int i = 0; i < 1024; i++) {
61; int X1 = *ptr++;
62; int X2 = *ptr++;
63; int X3 = *ptr++;
64; T[i].x = X1 + 1;
65; T[i].y = X2 + 2;
66; T[i].z = X3 + 3;
67; }
68; }
69
70; CHECK-LABEL: @test_struct_array_load3_store3(
71; CHECK: %wide.vec = load <12 x i32>, <12 x i32>* {{.*}}, align 4
72; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
73; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
74; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
75; CHECK: add nsw <4 x i32> {{.*}}, <i32 1, i32 1, i32 1, i32 1>
76; CHECK: add nsw <4 x i32> {{.*}}, <i32 2, i32 2, i32 2, i32 2>
77; CHECK: add nsw <4 x i32> {{.*}}, <i32 3, i32 3, i32 3, i32 3>
78; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
79; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
80; CHECK: %interleaved.vec = shufflevector <8 x i32> {{.*}}, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
81; CHECK: store <12 x i32> %interleaved.vec, <12 x i32>* {{.*}}, align 4
82
83%struct.ST3 = type { i32, i32, i32 }
84@A = common global [3072 x i32] zeroinitializer, align 4
85@S = common global [1024 x %struct.ST3] zeroinitializer, align 4
86
87define void @test_struct_array_load3_store3() {
88entry:
89 br label %for.body
90
91for.body: ; preds = %for.body, %entry
92 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
93 %ptr.016 = phi i32* [ getelementptr inbounds ([3072 x i32], [3072 x i32]* @A, i64 0, i64 0), %entry ], [ %incdec.ptr2, %for.body ]
94 %incdec.ptr = getelementptr inbounds i32, i32* %ptr.016, i64 1
95 %tmp = load i32, i32* %ptr.016, align 4
96 %incdec.ptr1 = getelementptr inbounds i32, i32* %ptr.016, i64 2
97 %tmp1 = load i32, i32* %incdec.ptr, align 4
98 %incdec.ptr2 = getelementptr inbounds i32, i32* %ptr.016, i64 3
99 %tmp2 = load i32, i32* %incdec.ptr1, align 4
100 %add = add nsw i32 %tmp, 1
101 %x = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 0
102 store i32 %add, i32* %x, align 4
103 %add3 = add nsw i32 %tmp1, 2
104 %y = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 1
105 store i32 %add3, i32* %y, align 4
106 %add6 = add nsw i32 %tmp2, 3
107 %z = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 2
108 store i32 %add6, i32* %z, align 4
109 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
110 %exitcond = icmp eq i64 %indvars.iv.next, 1024
111 br i1 %exitcond, label %for.end, label %for.body
112
113for.end: ; preds = %for.body
114 ret void
115}
116
117; Check vectorization on an interleaved load group of factor 4.
118
119; struct ST4{
120; int x;
121; int y;
122; int z;
123; int w;
124; };
125; int test_struct_load4(struct ST4 *S) {
126; int r = 0;
127; for (int i = 0; i < 1024; i++) {
128; r += S[i].x;
129; r -= S[i].y;
130; r += S[i].z;
131; r -= S[i].w;
132; }
133; return r;
134; }
135
136; CHECK-LABEL: @test_struct_load4(
137; CHECK: %wide.vec = load <16 x i32>, <16 x i32>* {{.*}}, align 4
138; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
139; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
140; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
141; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
142; CHECK: add nsw <4 x i32>
143; CHECK: sub <4 x i32>
144; CHECK: add nsw <4 x i32>
145; CHECK: sub <4 x i32>
146
147%struct.ST4 = type { i32, i32, i32, i32 }
148
149define i32 @test_struct_load4(%struct.ST4* nocapture readonly %S) {
150entry:
151 br label %for.body
152
153for.body: ; preds = %for.body, %entry
154 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
155 %r.022 = phi i32 [ 0, %entry ], [ %sub8, %for.body ]
156 %x = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 0
157 %tmp = load i32, i32* %x, align 4
158 %add = add nsw i32 %tmp, %r.022
159 %y = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 1
160 %tmp1 = load i32, i32* %y, align 4
161 %sub = sub i32 %add, %tmp1
162 %z = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 2
163 %tmp2 = load i32, i32* %z, align 4
164 %add5 = add nsw i32 %sub, %tmp2
165 %w = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 3
166 %tmp3 = load i32, i32* %w, align 4
167 %sub8 = sub i32 %add5, %tmp3
168 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
169 %exitcond = icmp eq i64 %indvars.iv.next, 1024
170 br i1 %exitcond, label %for.end, label %for.body
171
172for.end: ; preds = %for.body
173 ret i32 %sub8
174}
175
176; Check vectorization on an interleaved store group of factor 4.
177
178; void test_struct_store4(int *A, struct ST4 *B) {
179; int *ptr = A;
180; for (int i = 0; i < 1024; i++) {
181; int X = *ptr++;
182; B[i].x = X + 1;
183; B[i].y = X * 2;
184; B[i].z = X + 3;
185; B[i].w = X + 4;
186; }
187; }
188
189; CHECK-LABEL: @test_struct_store4(
190; CHECK: %[[LD:.*]] = load <4 x i32>, <4 x i32>*
191; CHECK: add nsw <4 x i32> %[[LD]], <i32 1, i32 1, i32 1, i32 1>
192; CHECK: shl nsw <4 x i32> %[[LD]], <i32 1, i32 1, i32 1, i32 1>
193; CHECK: add nsw <4 x i32> %[[LD]], <i32 3, i32 3, i32 3, i32 3>
194; CHECK: add nsw <4 x i32> %[[LD]], <i32 4, i32 4, i32 4, i32 4>
195; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
196; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
197; CHECK: %interleaved.vec = shufflevector <8 x i32> {{.*}}, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
198; CHECK: store <16 x i32> %interleaved.vec, <16 x i32>* {{.*}}, align 4
199
200define void @test_struct_store4(i32* noalias nocapture readonly %A, %struct.ST4* noalias nocapture %B) {
201entry:
202 br label %for.body
203
204for.cond.cleanup: ; preds = %for.body
205 ret void
206
207for.body: ; preds = %for.body, %entry
208 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
209 %ptr.024 = phi i32* [ %A, %entry ], [ %incdec.ptr, %for.body ]
210 %incdec.ptr = getelementptr inbounds i32, i32* %ptr.024, i64 1
211 %tmp = load i32, i32* %ptr.024, align 4
212 %add = add nsw i32 %tmp, 1
213 %x = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 0
214 store i32 %add, i32* %x, align 4
215 %mul = shl nsw i32 %tmp, 1
216 %y = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 1
217 store i32 %mul, i32* %y, align 4
218 %add3 = add nsw i32 %tmp, 3
219 %z = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 2
220 store i32 %add3, i32* %z, align 4
221 %add6 = add nsw i32 %tmp, 4
222 %w = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 3
223 store i32 %add6, i32* %w, align 4
224 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
225 %exitcond = icmp eq i64 %indvars.iv.next, 1024
226 br i1 %exitcond, label %for.cond.cleanup, label %for.body
227}
228
229; Check vectorization on a reverse interleaved load group of factor 2 and
230; a reverse interleaved store group of factor 2.
231
232; struct ST2 {
233; int x;
234; int y;
235; };
236;
237; void test_reversed_load2_store2(struct ST2 *A, struct ST2 *B) {
238; for (int i = 1023; i >= 0; i--) {
239; int a = A[i].x + i; // interleaved load of index 0
240; int b = A[i].y - i; // interleaved load of index 1
241; B[i].x = a; // interleaved store of index 0
242; B[i].y = b; // interleaved store of index 1
243; }
244; }
245
246; CHECK-LABEL: @test_reversed_load2_store2(
Matthew Simpsonb65c2302016-09-02 16:19:22 +0000247; CHECK: %[[G0:.+]] = getelementptr inbounds %struct.ST2, %struct.ST2* %A, i64 %offset.idx, i32 0
248; CHECK: %[[G1:.+]] = getelementptr i32, i32* %[[G0]], i64 -6
249; CHECK: %[[B0:.+]] = bitcast i32* %[[G1]] to <8 x i32>*
250; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %[[B0]], align 4
Hao Liu32c05392015-06-08 06:39:56 +0000251; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
252; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
253; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
254; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
255; CHECK: add nsw <4 x i32>
256; CHECK: sub nsw <4 x i32>
Matthew Simpsonb65c2302016-09-02 16:19:22 +0000257; CHECK: %[[G2:.+]] = getelementptr inbounds %struct.ST2, %struct.ST2* %B, i64 %offset.idx, i32 1
258; CHECK: %[[G3:.+]] = getelementptr i32, i32* %[[G2]], i64 -7
259; CHECK: %[[B1:.+]] = bitcast i32* %[[G3]] to <8 x i32>*
Hao Liu32c05392015-06-08 06:39:56 +0000260; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
261; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
262; CHECK: %interleaved.vec = shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
Matthew Simpsonb65c2302016-09-02 16:19:22 +0000263; CHECK: store <8 x i32> %interleaved.vec, <8 x i32>* %[[B1]], align 4
Hao Liu32c05392015-06-08 06:39:56 +0000264
265%struct.ST2 = type { i32, i32 }
266
267define void @test_reversed_load2_store2(%struct.ST2* noalias nocapture readonly %A, %struct.ST2* noalias nocapture %B) {
268entry:
269 br label %for.body
270
271for.cond.cleanup: ; preds = %for.body
272 ret void
273
274for.body: ; preds = %for.body, %entry
275 %indvars.iv = phi i64 [ 1023, %entry ], [ %indvars.iv.next, %for.body ]
276 %x = getelementptr inbounds %struct.ST2, %struct.ST2* %A, i64 %indvars.iv, i32 0
277 %tmp = load i32, i32* %x, align 4
278 %tmp1 = trunc i64 %indvars.iv to i32
279 %add = add nsw i32 %tmp, %tmp1
280 %y = getelementptr inbounds %struct.ST2, %struct.ST2* %A, i64 %indvars.iv, i32 1
281 %tmp2 = load i32, i32* %y, align 4
282 %sub = sub nsw i32 %tmp2, %tmp1
283 %x5 = getelementptr inbounds %struct.ST2, %struct.ST2* %B, i64 %indvars.iv, i32 0
284 store i32 %add, i32* %x5, align 4
285 %y8 = getelementptr inbounds %struct.ST2, %struct.ST2* %B, i64 %indvars.iv, i32 1
286 store i32 %sub, i32* %y8, align 4
287 %indvars.iv.next = add nsw i64 %indvars.iv, -1
288 %cmp = icmp sgt i64 %indvars.iv, 0
289 br i1 %cmp, label %for.body, label %for.cond.cleanup
290}
291
292; Check vectorization on an interleaved load group of factor 2 with 1 gap
Matthew Simpson622b95b2016-04-27 18:21:36 +0000293; (missing the load of odd elements). Because the vectorized loop would
294; speculatively access memory out-of-bounds, we must execute at least one
295; iteration of the scalar loop.
Hao Liu32c05392015-06-08 06:39:56 +0000296
Matthew Simpson622b95b2016-04-27 18:21:36 +0000297; void even_load_static_tc(int *A, int *B) {
Hao Liu32c05392015-06-08 06:39:56 +0000298; for (unsigned i = 0; i < 1024; i+=2)
299; B[i/2] = A[i] * 2;
300; }
301
Matthew Simpson622b95b2016-04-27 18:21:36 +0000302; CHECK-LABEL: @even_load_static_tc(
303; CHECK: vector.body:
304; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
305; CHECK: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
306; CHECK: icmp eq i64 %index.next, 508
307; CHECK: middle.block:
308; CHECK: br i1 false, label %for.cond.cleanup, label %scalar.ph
Hao Liu32c05392015-06-08 06:39:56 +0000309
Matthew Simpson622b95b2016-04-27 18:21:36 +0000310define void @even_load_static_tc(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) {
Hao Liu32c05392015-06-08 06:39:56 +0000311entry:
312 br label %for.body
313
314for.cond.cleanup: ; preds = %for.body
315 ret void
316
317for.body: ; preds = %for.body, %entry
318 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
319 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
320 %tmp = load i32, i32* %arrayidx, align 4
321 %mul = shl nsw i32 %tmp, 1
322 %tmp1 = lshr exact i64 %indvars.iv, 1
323 %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %tmp1
324 store i32 %mul, i32* %arrayidx2, align 4
325 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
326 %cmp = icmp ult i64 %indvars.iv.next, 1024
327 br i1 %cmp, label %for.body, label %for.cond.cleanup
328}
329
Matthew Simpson622b95b2016-04-27 18:21:36 +0000330; Check vectorization on an interleaved load group of factor 2 with 1 gap
331; (missing the load of odd elements). Because the vectorized loop would
332; speculatively access memory out-of-bounds, we must execute at least one
333; iteration of the scalar loop.
334
335; void even_load_dynamic_tc(int *A, int *B, unsigned N) {
336; for (unsigned i = 0; i < N; i+=2)
337; B[i/2] = A[i] * 2;
338; }
339
340; CHECK-LABEL: @even_load_dynamic_tc(
341; CHECK: min.iters.checked:
342; CHECK: %n.mod.vf = and i64 %[[N:[a-zA-Z0-9]+]], 3
343; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
344; CHECK: %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf
345; CHECK: %n.vec = sub i64 %[[N]], %[[R]]
346; CHECK: vector.body:
347; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
348; CHECK: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
349; CHECK: icmp eq i64 %index.next, %n.vec
350; CHECK: middle.block:
351; CHECK: br i1 false, label %for.cond.cleanup, label %scalar.ph
352
353define void @even_load_dynamic_tc(i32* noalias nocapture readonly %A, i32* noalias nocapture %B, i64 %N) {
354entry:
355 br label %for.body
356
357for.cond.cleanup: ; preds = %for.body
358 ret void
359
360for.body: ; preds = %for.body, %entry
361 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
362 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
363 %tmp = load i32, i32* %arrayidx, align 4
364 %mul = shl nsw i32 %tmp, 1
365 %tmp1 = lshr exact i64 %indvars.iv, 1
366 %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %tmp1
367 store i32 %mul, i32* %arrayidx2, align 4
368 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
369 %cmp = icmp ult i64 %indvars.iv.next, %N
370 br i1 %cmp, label %for.body, label %for.cond.cleanup
371}
372
373; Check vectorization on a reverse interleaved load group of factor 2 with 1
374; gap and a reverse interleaved store group of factor 2. The interleaved load
375; group should be removed since it has a gap and is reverse.
376
377; struct pair {
378; int x;
379; int y;
380; };
381;
382; void load_gap_reverse(struct pair *P1, struct pair *P2, int X) {
383; for (int i = 1023; i >= 0; i--) {
384; int a = X + i;
385; int b = A[i].y - i;
386; B[i].x = a;
387; B[i].y = b;
388; }
389; }
390
391; CHECK-LABEL: @load_gap_reverse(
392; CHECK-NOT: %wide.vec = load <8 x i64>, <8 x i64>* %{{.*}}, align 8
393; CHECK-NOT: %strided.vec = shufflevector <8 x i64> %wide.vec, <8 x i64> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
394
395%pair = type { i64, i64 }
396define void @load_gap_reverse(%pair* noalias nocapture readonly %P1, %pair* noalias nocapture readonly %P2, i64 %X) {
397entry:
398 br label %for.body
399
400for.body:
401 %i = phi i64 [ 1023, %entry ], [ %i.next, %for.body ]
402 %0 = add nsw i64 %X, %i
403 %1 = getelementptr inbounds %pair, %pair* %P1, i64 %i, i32 0
404 %2 = getelementptr inbounds %pair, %pair* %P2, i64 %i, i32 1
405 %3 = load i64, i64* %2, align 8
406 %4 = sub nsw i64 %3, %i
407 store i64 %0, i64* %1, align 8
408 store i64 %4, i64* %2, align 8
409 %i.next = add nsw i64 %i, -1
410 %cond = icmp sgt i64 %i, 0
411 br i1 %cond, label %for.body, label %for.exit
412
413for.exit:
414 ret void
415}
416
Hao Liu32c05392015-06-08 06:39:56 +0000417; Check vectorization on interleaved access groups identified from mixed
418; loads/stores.
419; void mixed_load2_store2(int *A, int *B) {
420; for (unsigned i = 0; i < 1024; i+=2) {
421; B[i] = A[i] * A[i+1];
422; B[i+1] = A[i] + A[i+1];
423; }
424; }
425
426; CHECK-LABEL: @mixed_load2_store2(
427; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* {{.*}}, align 4
428; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
429; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
430; CHECK: %interleaved.vec = shufflevector <4 x i32> %{{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
431; CHECK: store <8 x i32> %interleaved.vec
432
433define void @mixed_load2_store2(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) {
434entry:
435 br label %for.body
436
437for.cond.cleanup: ; preds = %for.body
438 ret void
439
440for.body: ; preds = %for.body, %entry
441 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
442 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
443 %tmp = load i32, i32* %arrayidx, align 4
444 %tmp1 = or i64 %indvars.iv, 1
445 %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %tmp1
446 %tmp2 = load i32, i32* %arrayidx2, align 4
447 %mul = mul nsw i32 %tmp2, %tmp
448 %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
449 store i32 %mul, i32* %arrayidx4, align 4
450 %tmp3 = load i32, i32* %arrayidx, align 4
451 %tmp4 = load i32, i32* %arrayidx2, align 4
452 %add10 = add nsw i32 %tmp4, %tmp3
453 %arrayidx13 = getelementptr inbounds i32, i32* %B, i64 %tmp1
454 store i32 %add10, i32* %arrayidx13, align 4
455 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
456 %cmp = icmp ult i64 %indvars.iv.next, 1024
457 br i1 %cmp, label %for.body, label %for.cond.cleanup
458}
459
460; Check vectorization on interleaved access groups identified from mixed
461; loads/stores.
462; void mixed_load3_store3(int *A) {
463; for (unsigned i = 0; i < 1024; i++) {
464; *A++ += i;
465; *A++ += i;
466; *A++ += i;
467; }
468; }
469
470; CHECK-LABEL: @mixed_load3_store3(
471; CHECK: %wide.vec = load <12 x i32>, <12 x i32>* {{.*}}, align 4
472; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
473; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
474; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
475; CHECK: %interleaved.vec = shufflevector <8 x i32> %{{.*}}, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
476; CHECK: store <12 x i32> %interleaved.vec, <12 x i32>* %{{.*}}, align 4
477
478define void @mixed_load3_store3(i32* nocapture %A) {
479entry:
480 br label %for.body
481
482for.cond.cleanup: ; preds = %for.body
483 ret void
484
485for.body: ; preds = %for.body, %entry
486 %i.013 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
487 %A.addr.012 = phi i32* [ %A, %entry ], [ %incdec.ptr3, %for.body ]
488 %incdec.ptr = getelementptr inbounds i32, i32* %A.addr.012, i64 1
489 %tmp = load i32, i32* %A.addr.012, align 4
490 %add = add i32 %tmp, %i.013
491 store i32 %add, i32* %A.addr.012, align 4
492 %incdec.ptr1 = getelementptr inbounds i32, i32* %A.addr.012, i64 2
493 %tmp1 = load i32, i32* %incdec.ptr, align 4
494 %add2 = add i32 %tmp1, %i.013
495 store i32 %add2, i32* %incdec.ptr, align 4
496 %incdec.ptr3 = getelementptr inbounds i32, i32* %A.addr.012, i64 3
497 %tmp2 = load i32, i32* %incdec.ptr1, align 4
498 %add4 = add i32 %tmp2, %i.013
499 store i32 %add4, i32* %incdec.ptr1, align 4
500 %inc = add nuw nsw i32 %i.013, 1
501 %exitcond = icmp eq i32 %inc, 1024
502 br i1 %exitcond, label %for.cond.cleanup, label %for.body
503}
504
505; Check vectorization on interleaved access groups with members having different
506; kinds of type.
507
508; struct IntFloat {
509; int a;
510; float b;
511; };
512;
513; int SA;
514; float SB;
515;
516; void int_float_struct(struct IntFloat *A) {
517; int SumA;
518; float SumB;
519; for (unsigned i = 0; i < 1024; i++) {
520; SumA += A[i].a;
521; SumB += A[i].b;
522; }
523; SA = SumA;
524; SB = SumB;
525; }
526
527; CHECK-LABEL: @int_float_struct(
528; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
529; CHECK: %[[V0:.*]] = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
530; CHECK: %[[V1:.*]] = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
531; CHECK: bitcast <4 x i32> %[[V1]] to <4 x float>
532; CHECK: add nsw <4 x i32>
533; CHECK: fadd fast <4 x float>
534
535%struct.IntFloat = type { i32, float }
536
537@SA = common global i32 0, align 4
538@SB = common global float 0.000000e+00, align 4
539
540define void @int_float_struct(%struct.IntFloat* nocapture readonly %A) #0 {
541entry:
542 br label %for.body
543
544for.cond.cleanup: ; preds = %for.body
545 store i32 %add, i32* @SA, align 4
546 store float %add3, float* @SB, align 4
547 ret void
548
549for.body: ; preds = %for.body, %entry
550 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
551 %SumB.014 = phi float [ undef, %entry ], [ %add3, %for.body ]
552 %SumA.013 = phi i32 [ undef, %entry ], [ %add, %for.body ]
553 %a = getelementptr inbounds %struct.IntFloat, %struct.IntFloat* %A, i64 %indvars.iv, i32 0
554 %tmp = load i32, i32* %a, align 4
555 %add = add nsw i32 %tmp, %SumA.013
556 %b = getelementptr inbounds %struct.IntFloat, %struct.IntFloat* %A, i64 %indvars.iv, i32 1
557 %tmp1 = load float, float* %b, align 4
558 %add3 = fadd fast float %SumB.014, %tmp1
559 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
560 %exitcond = icmp eq i64 %indvars.iv.next, 1024
561 br i1 %exitcond, label %for.cond.cleanup, label %for.body
562}
563
Matthew Simpsone7946782016-06-24 15:33:25 +0000564; Check vectorization of interleaved access groups in the presence of
565; dependences (PR27626). The following tests check that we don't reorder
566; dependent loads and stores when generating code for interleaved access
567; groups. Stores should be scalarized because the required code motion would
568; break dependences, and the remaining interleaved load groups should have
569; gaps.
570
571; PR27626_0: Ensure a strided store is not moved after a dependent (zero
572; distance) strided load.
573
574; void PR27626_0(struct pair *p, int z, int n) {
575; for (int i = 0; i < n; i++) {
576; p[i].x = z;
577; p[i].y = p[i].x;
578; }
579; }
580
581; CHECK-LABEL: @PR27626_0(
582; CHECK: min.iters.checked:
583; CHECK: %n.mod.vf = and i64 %[[N:.+]], 3
584; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
585; CHECK: %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf
David Majnemera19d0f22016-08-06 08:16:00 +0000586; CHECK: %n.vec = sub nsw i64 %[[N]], %[[R]]
Matthew Simpsone7946782016-06-24 15:33:25 +0000587; CHECK: vector.body:
588; CHECK: %[[L1:.+]] = load <8 x i32>, <8 x i32>* {{.*}}
589; CHECK: %[[X1:.+]] = extractelement <8 x i32> %[[L1]], i32 0
590; CHECK: store i32 %[[X1]], {{.*}}
591; CHECK: %[[X2:.+]] = extractelement <8 x i32> %[[L1]], i32 2
592; CHECK: store i32 %[[X2]], {{.*}}
593; CHECK: %[[X3:.+]] = extractelement <8 x i32> %[[L1]], i32 4
594; CHECK: store i32 %[[X3]], {{.*}}
595; CHECK: %[[X4:.+]] = extractelement <8 x i32> %[[L1]], i32 6
596; CHECK: store i32 %[[X4]], {{.*}}
597
598%pair.i32 = type { i32, i32 }
599define void @PR27626_0(%pair.i32 *%p, i32 %z, i64 %n) {
600entry:
601 br label %for.body
602
603for.body:
604 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
605 %p_i.x = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 0
606 %p_i.y = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1
607 store i32 %z, i32* %p_i.x, align 4
608 %0 = load i32, i32* %p_i.x, align 4
609 store i32 %0, i32 *%p_i.y, align 4
610 %i.next = add nuw nsw i64 %i, 1
611 %cond = icmp slt i64 %i.next, %n
612 br i1 %cond, label %for.body, label %for.end
613
614for.end:
615 ret void
616}
617
618; PR27626_1: Ensure a strided load is not moved before a dependent (zero
619; distance) strided store.
620
621; void PR27626_1(struct pair *p, int n) {
622; int s = 0;
623; for (int i = 0; i < n; i++) {
624; p[i].y = p[i].x;
625; s += p[i].y
626; }
627; }
628
629; CHECK-LABEL: @PR27626_1(
630; CHECK: min.iters.checked:
631; CHECK: %n.mod.vf = and i64 %[[N:.+]], 3
632; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
633; CHECK: %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf
David Majnemera19d0f22016-08-06 08:16:00 +0000634; CHECK: %n.vec = sub nsw i64 %[[N]], %[[R]]
Matthew Simpsone7946782016-06-24 15:33:25 +0000635; CHECK: vector.body:
636; CHECK: %[[Phi:.+]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ {{.*}}, %vector.body ]
637; CHECK: %[[L1:.+]] = load <8 x i32>, <8 x i32>* {{.*}}
638; CHECK: %[[X1:.+]] = extractelement <8 x i32> %[[L1:.+]], i32 0
639; CHECK: store i32 %[[X1:.+]], {{.*}}
640; CHECK: %[[X2:.+]] = extractelement <8 x i32> %[[L1:.+]], i32 2
641; CHECK: store i32 %[[X2:.+]], {{.*}}
642; CHECK: %[[X3:.+]] = extractelement <8 x i32> %[[L1:.+]], i32 4
643; CHECK: store i32 %[[X3:.+]], {{.*}}
644; CHECK: %[[X4:.+]] = extractelement <8 x i32> %[[L1:.+]], i32 6
645; CHECK: store i32 %[[X4:.+]], {{.*}}
646; CHECK: %[[L2:.+]] = load <8 x i32>, <8 x i32>* {{.*}}
647; CHECK: %[[S1:.+]] = shufflevector <8 x i32> %[[L2]], <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
648; CHECK: add nsw <4 x i32> %[[S1]], %[[Phi]]
649
650define i32 @PR27626_1(%pair.i32 *%p, i64 %n) {
651entry:
652 br label %for.body
653
654for.body:
655 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
656 %s = phi i32 [ %2, %for.body ], [ 0, %entry ]
657 %p_i.x = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 0
658 %p_i.y = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1
659 %0 = load i32, i32* %p_i.x, align 4
660 store i32 %0, i32* %p_i.y, align 4
661 %1 = load i32, i32* %p_i.y, align 4
662 %2 = add nsw i32 %1, %s
663 %i.next = add nuw nsw i64 %i, 1
664 %cond = icmp slt i64 %i.next, %n
665 br i1 %cond, label %for.body, label %for.end
666
667for.end:
668 %3 = phi i32 [ %2, %for.body ]
669 ret i32 %3
670}
671
672; PR27626_2: Ensure a strided store is not moved after a dependent (negative
673; distance) strided load.
674
675; void PR27626_2(struct pair *p, int z, int n) {
676; for (int i = 0; i < n; i++) {
677; p[i].x = z;
678; p[i].y = p[i - 1].x;
679; }
680; }
681
682; CHECK-LABEL: @PR27626_2(
683; CHECK: min.iters.checked:
684; CHECK: %n.mod.vf = and i64 %[[N:.+]], 3
685; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
686; CHECK: %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf
David Majnemera19d0f22016-08-06 08:16:00 +0000687; CHECK: %n.vec = sub nsw i64 %[[N]], %[[R]]
Matthew Simpsone7946782016-06-24 15:33:25 +0000688; CHECK: vector.body:
689; CHECK: %[[L1:.+]] = load <8 x i32>, <8 x i32>* {{.*}}
690; CHECK: %[[X1:.+]] = extractelement <8 x i32> %[[L1]], i32 0
691; CHECK: store i32 %[[X1]], {{.*}}
692; CHECK: %[[X2:.+]] = extractelement <8 x i32> %[[L1]], i32 2
693; CHECK: store i32 %[[X2]], {{.*}}
694; CHECK: %[[X3:.+]] = extractelement <8 x i32> %[[L1]], i32 4
695; CHECK: store i32 %[[X3]], {{.*}}
696; CHECK: %[[X4:.+]] = extractelement <8 x i32> %[[L1]], i32 6
697; CHECK: store i32 %[[X4]], {{.*}}
698
699define void @PR27626_2(%pair.i32 *%p, i64 %n, i32 %z) {
700entry:
701 br label %for.body
702
703for.body:
704 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
705 %i_minus_1 = add nuw nsw i64 %i, -1
706 %p_i.x = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 0
707 %p_i_minus_1.x = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i_minus_1, i32 0
708 %p_i.y = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1
709 store i32 %z, i32* %p_i.x, align 4
710 %0 = load i32, i32* %p_i_minus_1.x, align 4
711 store i32 %0, i32 *%p_i.y, align 4
712 %i.next = add nuw nsw i64 %i, 1
713 %cond = icmp slt i64 %i.next, %n
714 br i1 %cond, label %for.body, label %for.end
715
716for.end:
717 ret void
718}
719
720; PR27626_3: Ensure a strided load is not moved before a dependent (negative
721; distance) strided store.
722
723; void PR27626_3(struct pair *p, int z, int n) {
724; for (int i = 0; i < n; i++) {
725; p[i + 1].y = p[i].x;
726; s += p[i].y;
727; }
728; }
729
730; CHECK-LABEL: @PR27626_3(
731; CHECK: min.iters.checked:
732; CHECK: %n.mod.vf = and i64 %[[N:.+]], 3
733; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
734; CHECK: %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf
David Majnemera19d0f22016-08-06 08:16:00 +0000735; CHECK: %n.vec = sub nsw i64 %[[N]], %[[R]]
Matthew Simpsone7946782016-06-24 15:33:25 +0000736; CHECK: vector.body:
737; CHECK: %[[Phi:.+]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ {{.*}}, %vector.body ]
738; CHECK: %[[L1:.+]] = load <8 x i32>, <8 x i32>* {{.*}}
739; CHECK: %[[X1:.+]] = extractelement <8 x i32> %[[L1:.+]], i32 0
740; CHECK: store i32 %[[X1:.+]], {{.*}}
741; CHECK: %[[X2:.+]] = extractelement <8 x i32> %[[L1:.+]], i32 2
742; CHECK: store i32 %[[X2:.+]], {{.*}}
743; CHECK: %[[X3:.+]] = extractelement <8 x i32> %[[L1:.+]], i32 4
744; CHECK: store i32 %[[X3:.+]], {{.*}}
745; CHECK: %[[X4:.+]] = extractelement <8 x i32> %[[L1:.+]], i32 6
746; CHECK: store i32 %[[X4:.+]], {{.*}}
747; CHECK: %[[L2:.+]] = load <8 x i32>, <8 x i32>* {{.*}}
748; CHECK: %[[S1:.+]] = shufflevector <8 x i32> %[[L2]], <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
749; CHECK: add nsw <4 x i32> %[[S1]], %[[Phi]]
750
751define i32 @PR27626_3(%pair.i32 *%p, i64 %n, i32 %z) {
752entry:
753 br label %for.body
754
755for.body:
756 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
757 %s = phi i32 [ %2, %for.body ], [ 0, %entry ]
758 %i_plus_1 = add nuw nsw i64 %i, 1
759 %p_i.x = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 0
760 %p_i.y = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1
761 %p_i_plus_1.y = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i_plus_1, i32 1
762 %0 = load i32, i32* %p_i.x, align 4
763 store i32 %0, i32* %p_i_plus_1.y, align 4
764 %1 = load i32, i32* %p_i.y, align 4
765 %2 = add nsw i32 %1, %s
766 %i.next = add nuw nsw i64 %i, 1
767 %cond = icmp slt i64 %i.next, %n
768 br i1 %cond, label %for.body, label %for.end
769
770for.end:
771 %3 = phi i32 [ %2, %for.body ]
772 ret i32 %3
773}
774
775; PR27626_4: Ensure we form an interleaved group for strided stores in the
776; presence of a write-after-write dependence. We create a group for
777; (2) and (3) while excluding (1).
778
779; void PR27626_4(int *a, int x, int y, int z, int n) {
780; for (int i = 0; i < n; i += 2) {
781; a[i] = x; // (1)
782; a[i] = y; // (2)
783; a[i + 1] = z; // (3)
784; }
785; }
786
787; CHECK-LABEL: @PR27626_4(
788; CHECK: vector.ph:
789; CHECK: %[[INS_Y:.+]] = insertelement <4 x i32> undef, i32 %y, i32 0
790; CHECK: %[[SPLAT_Y:.+]] = shufflevector <4 x i32> %[[INS_Y]], <4 x i32> undef, <4 x i32> zeroinitializer
791; CHECK: %[[INS_Z:.+]] = insertelement <4 x i32> undef, i32 %z, i32 0
792; CHECK: %[[SPLAT_Z:.+]] = shufflevector <4 x i32> %[[INS_Z]], <4 x i32> undef, <4 x i32> zeroinitializer
793; CHECK: vector.body:
794; CHECK: store i32 %x, {{.*}}
795; CHECK: store i32 %x, {{.*}}
796; CHECK: store i32 %x, {{.*}}
797; CHECK: store i32 %x, {{.*}}
798; CHECK: %[[VEC:.+]] = shufflevector <4 x i32> %[[SPLAT_Y]], <4 x i32> %[[SPLAT_Z]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
799; CHECK: store <8 x i32> %[[VEC]], {{.*}}
800
801define void @PR27626_4(i32 *%a, i32 %x, i32 %y, i32 %z, i64 %n) {
802entry:
803 br label %for.body
804
805for.body:
806 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
807 %i_plus_1 = add i64 %i, 1
808 %a_i = getelementptr inbounds i32, i32* %a, i64 %i
809 %a_i_plus_1 = getelementptr inbounds i32, i32* %a, i64 %i_plus_1
810 store i32 %x, i32* %a_i, align 4
811 store i32 %y, i32* %a_i, align 4
812 store i32 %z, i32* %a_i_plus_1, align 4
813 %i.next = add nuw nsw i64 %i, 2
814 %cond = icmp slt i64 %i.next, %n
815 br i1 %cond, label %for.body, label %for.end
816
817for.end:
818 ret void
819}
820
821; PR27626_5: Ensure we do not form an interleaved group for strided stores in
822; the presence of a write-after-write dependence.
823
824; void PR27626_5(int *a, int x, int y, int z, int n) {
825; for (int i = 3; i < n; i += 2) {
826; a[i - 1] = x;
827; a[i - 3] = y;
828; a[i] = z;
829; }
830; }
831
832; CHECK-LABEL: @PR27626_5(
833; CHECK: vector.body:
834; CHECK: store i32 %x, {{.*}}
835; CHECK: store i32 %x, {{.*}}
836; CHECK: store i32 %x, {{.*}}
837; CHECK: store i32 %x, {{.*}}
838; CHECK: store i32 %y, {{.*}}
839; CHECK: store i32 %y, {{.*}}
840; CHECK: store i32 %y, {{.*}}
841; CHECK: store i32 %y, {{.*}}
842; CHECK: store i32 %z, {{.*}}
843; CHECK: store i32 %z, {{.*}}
844; CHECK: store i32 %z, {{.*}}
845; CHECK: store i32 %z, {{.*}}
846
847define void @PR27626_5(i32 *%a, i32 %x, i32 %y, i32 %z, i64 %n) {
848entry:
849 br label %for.body
850
851for.body:
852 %i = phi i64 [ %i.next, %for.body ], [ 3, %entry ]
853 %i_minus_1 = sub i64 %i, 1
854 %i_minus_3 = sub i64 %i_minus_1, 2
855 %a_i = getelementptr inbounds i32, i32* %a, i64 %i
856 %a_i_minus_1 = getelementptr inbounds i32, i32* %a, i64 %i_minus_1
857 %a_i_minus_3 = getelementptr inbounds i32, i32* %a, i64 %i_minus_3
858 store i32 %x, i32* %a_i_minus_1, align 4
859 store i32 %y, i32* %a_i_minus_3, align 4
860 store i32 %z, i32* %a_i, align 4
861 %i.next = add nuw nsw i64 %i, 2
862 %cond = icmp slt i64 %i.next, %n
863 br i1 %cond, label %for.body, label %for.end
864
865for.end:
866 ret void
867}
868
Hao Liu32c05392015-06-08 06:39:56 +0000869attributes #0 = { "unsafe-fp-math"="true" }