blob: 1cce7931936c150d75a51c690520111010493daf [file] [log] [blame]
Hao Liu32c05392015-06-08 06:39:56 +00001; RUN: opt -S -loop-vectorize -instcombine -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses=true -runtime-memory-check-threshold=24 < %s | FileCheck %s
2
3target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
4
5; Check vectorization on an interleaved load group of factor 2 and an interleaved
6; store group of factor 2.
7
8; int AB[1024];
9; int CD[1024];
10; void test_array_load2_store2(int C, int D) {
11; for (int i = 0; i < 1024; i+=2) {
12; int A = AB[i];
13; int B = AB[i+1];
14; CD[i] = A + C;
15; CD[i+1] = B * D;
16; }
17; }
18
19; CHECK-LABEL: @test_array_load2_store2(
20; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
21; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
22; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
23; CHECK: add nsw <4 x i32>
24; CHECK: mul nsw <4 x i32>
25; CHECK: %interleaved.vec = shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
26; CHECK: store <8 x i32> %interleaved.vec, <8 x i32>* %{{.*}}, align 4
27
28@AB = common global [1024 x i32] zeroinitializer, align 4
29@CD = common global [1024 x i32] zeroinitializer, align 4
30
31define void @test_array_load2_store2(i32 %C, i32 %D) {
32entry:
33 br label %for.body
34
35for.body: ; preds = %for.body, %entry
36 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
37 %arrayidx0 = getelementptr inbounds [1024 x i32], [1024 x i32]* @AB, i64 0, i64 %indvars.iv
38 %tmp = load i32, i32* %arrayidx0, align 4
39 %tmp1 = or i64 %indvars.iv, 1
40 %arrayidx1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @AB, i64 0, i64 %tmp1
41 %tmp2 = load i32, i32* %arrayidx1, align 4
42 %add = add nsw i32 %tmp, %C
43 %mul = mul nsw i32 %tmp2, %D
44 %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @CD, i64 0, i64 %indvars.iv
45 store i32 %add, i32* %arrayidx2, align 4
46 %arrayidx3 = getelementptr inbounds [1024 x i32], [1024 x i32]* @CD, i64 0, i64 %tmp1
47 store i32 %mul, i32* %arrayidx3, align 4
48 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
49 %cmp = icmp slt i64 %indvars.iv.next, 1024
50 br i1 %cmp, label %for.body, label %for.end
51
52for.end: ; preds = %for.body
53 ret void
54}
55
56; int A[3072];
57; struct ST S[1024];
58; void test_struct_st3() {
59; int *ptr = A;
60; for (int i = 0; i < 1024; i++) {
61; int X1 = *ptr++;
62; int X2 = *ptr++;
63; int X3 = *ptr++;
64; T[i].x = X1 + 1;
65; T[i].y = X2 + 2;
66; T[i].z = X3 + 3;
67; }
68; }
69
70; CHECK-LABEL: @test_struct_array_load3_store3(
71; CHECK: %wide.vec = load <12 x i32>, <12 x i32>* {{.*}}, align 4
72; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
73; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
74; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
75; CHECK: add nsw <4 x i32> {{.*}}, <i32 1, i32 1, i32 1, i32 1>
76; CHECK: add nsw <4 x i32> {{.*}}, <i32 2, i32 2, i32 2, i32 2>
77; CHECK: add nsw <4 x i32> {{.*}}, <i32 3, i32 3, i32 3, i32 3>
78; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
79; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
80; CHECK: %interleaved.vec = shufflevector <8 x i32> {{.*}}, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
81; CHECK: store <12 x i32> %interleaved.vec, <12 x i32>* {{.*}}, align 4
82
83%struct.ST3 = type { i32, i32, i32 }
84@A = common global [3072 x i32] zeroinitializer, align 4
85@S = common global [1024 x %struct.ST3] zeroinitializer, align 4
86
87define void @test_struct_array_load3_store3() {
88entry:
89 br label %for.body
90
91for.body: ; preds = %for.body, %entry
92 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
93 %ptr.016 = phi i32* [ getelementptr inbounds ([3072 x i32], [3072 x i32]* @A, i64 0, i64 0), %entry ], [ %incdec.ptr2, %for.body ]
94 %incdec.ptr = getelementptr inbounds i32, i32* %ptr.016, i64 1
95 %tmp = load i32, i32* %ptr.016, align 4
96 %incdec.ptr1 = getelementptr inbounds i32, i32* %ptr.016, i64 2
97 %tmp1 = load i32, i32* %incdec.ptr, align 4
98 %incdec.ptr2 = getelementptr inbounds i32, i32* %ptr.016, i64 3
99 %tmp2 = load i32, i32* %incdec.ptr1, align 4
100 %add = add nsw i32 %tmp, 1
101 %x = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 0
102 store i32 %add, i32* %x, align 4
103 %add3 = add nsw i32 %tmp1, 2
104 %y = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 1
105 store i32 %add3, i32* %y, align 4
106 %add6 = add nsw i32 %tmp2, 3
107 %z = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 2
108 store i32 %add6, i32* %z, align 4
109 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
110 %exitcond = icmp eq i64 %indvars.iv.next, 1024
111 br i1 %exitcond, label %for.end, label %for.body
112
113for.end: ; preds = %for.body
114 ret void
115}
116
117; Check vectorization on an interleaved load group of factor 4.
118
119; struct ST4{
120; int x;
121; int y;
122; int z;
123; int w;
124; };
125; int test_struct_load4(struct ST4 *S) {
126; int r = 0;
127; for (int i = 0; i < 1024; i++) {
128; r += S[i].x;
129; r -= S[i].y;
130; r += S[i].z;
131; r -= S[i].w;
132; }
133; return r;
134; }
135
136; CHECK-LABEL: @test_struct_load4(
137; CHECK: %wide.vec = load <16 x i32>, <16 x i32>* {{.*}}, align 4
138; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
139; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
140; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
141; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
142; CHECK: add nsw <4 x i32>
143; CHECK: sub <4 x i32>
144; CHECK: add nsw <4 x i32>
145; CHECK: sub <4 x i32>
146
147%struct.ST4 = type { i32, i32, i32, i32 }
148
149define i32 @test_struct_load4(%struct.ST4* nocapture readonly %S) {
150entry:
151 br label %for.body
152
153for.body: ; preds = %for.body, %entry
154 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
155 %r.022 = phi i32 [ 0, %entry ], [ %sub8, %for.body ]
156 %x = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 0
157 %tmp = load i32, i32* %x, align 4
158 %add = add nsw i32 %tmp, %r.022
159 %y = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 1
160 %tmp1 = load i32, i32* %y, align 4
161 %sub = sub i32 %add, %tmp1
162 %z = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 2
163 %tmp2 = load i32, i32* %z, align 4
164 %add5 = add nsw i32 %sub, %tmp2
165 %w = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 3
166 %tmp3 = load i32, i32* %w, align 4
167 %sub8 = sub i32 %add5, %tmp3
168 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
169 %exitcond = icmp eq i64 %indvars.iv.next, 1024
170 br i1 %exitcond, label %for.end, label %for.body
171
172for.end: ; preds = %for.body
173 ret i32 %sub8
174}
175
176; Check vectorization on an interleaved store group of factor 4.
177
178; void test_struct_store4(int *A, struct ST4 *B) {
179; int *ptr = A;
180; for (int i = 0; i < 1024; i++) {
181; int X = *ptr++;
182; B[i].x = X + 1;
183; B[i].y = X * 2;
184; B[i].z = X + 3;
185; B[i].w = X + 4;
186; }
187; }
188
189; CHECK-LABEL: @test_struct_store4(
190; CHECK: %[[LD:.*]] = load <4 x i32>, <4 x i32>*
191; CHECK: add nsw <4 x i32> %[[LD]], <i32 1, i32 1, i32 1, i32 1>
192; CHECK: shl nsw <4 x i32> %[[LD]], <i32 1, i32 1, i32 1, i32 1>
193; CHECK: add nsw <4 x i32> %[[LD]], <i32 3, i32 3, i32 3, i32 3>
194; CHECK: add nsw <4 x i32> %[[LD]], <i32 4, i32 4, i32 4, i32 4>
195; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
196; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
197; CHECK: %interleaved.vec = shufflevector <8 x i32> {{.*}}, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
198; CHECK: store <16 x i32> %interleaved.vec, <16 x i32>* {{.*}}, align 4
199
200define void @test_struct_store4(i32* noalias nocapture readonly %A, %struct.ST4* noalias nocapture %B) {
201entry:
202 br label %for.body
203
204for.cond.cleanup: ; preds = %for.body
205 ret void
206
207for.body: ; preds = %for.body, %entry
208 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
209 %ptr.024 = phi i32* [ %A, %entry ], [ %incdec.ptr, %for.body ]
210 %incdec.ptr = getelementptr inbounds i32, i32* %ptr.024, i64 1
211 %tmp = load i32, i32* %ptr.024, align 4
212 %add = add nsw i32 %tmp, 1
213 %x = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 0
214 store i32 %add, i32* %x, align 4
215 %mul = shl nsw i32 %tmp, 1
216 %y = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 1
217 store i32 %mul, i32* %y, align 4
218 %add3 = add nsw i32 %tmp, 3
219 %z = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 2
220 store i32 %add3, i32* %z, align 4
221 %add6 = add nsw i32 %tmp, 4
222 %w = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 3
223 store i32 %add6, i32* %w, align 4
224 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
225 %exitcond = icmp eq i64 %indvars.iv.next, 1024
226 br i1 %exitcond, label %for.cond.cleanup, label %for.body
227}
228
229; Check vectorization on a reverse interleaved load group of factor 2 and
230; a reverse interleaved store group of factor 2.
231
232; struct ST2 {
233; int x;
234; int y;
235; };
236;
237; void test_reversed_load2_store2(struct ST2 *A, struct ST2 *B) {
238; for (int i = 1023; i >= 0; i--) {
239; int a = A[i].x + i; // interleaved load of index 0
240; int b = A[i].y - i; // interleaved load of index 1
241; B[i].x = a; // interleaved store of index 0
242; B[i].y = b; // interleaved store of index 1
243; }
244; }
245
246; CHECK-LABEL: @test_reversed_load2_store2(
247; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* {{.*}}, align 4
248; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
249; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
250; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
251; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
252; CHECK: add nsw <4 x i32>
253; CHECK: sub nsw <4 x i32>
254; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
255; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
256; CHECK: %interleaved.vec = shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
257; CHECK: store <8 x i32> %interleaved.vec, <8 x i32>* %{{.*}}, align 4
258
259%struct.ST2 = type { i32, i32 }
260
261define void @test_reversed_load2_store2(%struct.ST2* noalias nocapture readonly %A, %struct.ST2* noalias nocapture %B) {
262entry:
263 br label %for.body
264
265for.cond.cleanup: ; preds = %for.body
266 ret void
267
268for.body: ; preds = %for.body, %entry
269 %indvars.iv = phi i64 [ 1023, %entry ], [ %indvars.iv.next, %for.body ]
270 %x = getelementptr inbounds %struct.ST2, %struct.ST2* %A, i64 %indvars.iv, i32 0
271 %tmp = load i32, i32* %x, align 4
272 %tmp1 = trunc i64 %indvars.iv to i32
273 %add = add nsw i32 %tmp, %tmp1
274 %y = getelementptr inbounds %struct.ST2, %struct.ST2* %A, i64 %indvars.iv, i32 1
275 %tmp2 = load i32, i32* %y, align 4
276 %sub = sub nsw i32 %tmp2, %tmp1
277 %x5 = getelementptr inbounds %struct.ST2, %struct.ST2* %B, i64 %indvars.iv, i32 0
278 store i32 %add, i32* %x5, align 4
279 %y8 = getelementptr inbounds %struct.ST2, %struct.ST2* %B, i64 %indvars.iv, i32 1
280 store i32 %sub, i32* %y8, align 4
281 %indvars.iv.next = add nsw i64 %indvars.iv, -1
282 %cmp = icmp sgt i64 %indvars.iv, 0
283 br i1 %cmp, label %for.body, label %for.cond.cleanup
284}
285
286; Check vectorization on an interleaved load group of factor 2 with 1 gap
Matthew Simpson622b95b2016-04-27 18:21:36 +0000287; (missing the load of odd elements). Because the vectorized loop would
288; speculatively access memory out-of-bounds, we must execute at least one
289; iteration of the scalar loop.
Hao Liu32c05392015-06-08 06:39:56 +0000290
Matthew Simpson622b95b2016-04-27 18:21:36 +0000291; void even_load_static_tc(int *A, int *B) {
Hao Liu32c05392015-06-08 06:39:56 +0000292; for (unsigned i = 0; i < 1024; i+=2)
293; B[i/2] = A[i] * 2;
294; }
295
Matthew Simpson622b95b2016-04-27 18:21:36 +0000296; CHECK-LABEL: @even_load_static_tc(
297; CHECK: vector.body:
298; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
299; CHECK: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
300; CHECK: icmp eq i64 %index.next, 508
301; CHECK: middle.block:
302; CHECK: br i1 false, label %for.cond.cleanup, label %scalar.ph
Hao Liu32c05392015-06-08 06:39:56 +0000303
Matthew Simpson622b95b2016-04-27 18:21:36 +0000304define void @even_load_static_tc(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) {
Hao Liu32c05392015-06-08 06:39:56 +0000305entry:
306 br label %for.body
307
308for.cond.cleanup: ; preds = %for.body
309 ret void
310
311for.body: ; preds = %for.body, %entry
312 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
313 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
314 %tmp = load i32, i32* %arrayidx, align 4
315 %mul = shl nsw i32 %tmp, 1
316 %tmp1 = lshr exact i64 %indvars.iv, 1
317 %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %tmp1
318 store i32 %mul, i32* %arrayidx2, align 4
319 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
320 %cmp = icmp ult i64 %indvars.iv.next, 1024
321 br i1 %cmp, label %for.body, label %for.cond.cleanup
322}
323
Matthew Simpson622b95b2016-04-27 18:21:36 +0000324; Check vectorization on an interleaved load group of factor 2 with 1 gap
325; (missing the load of odd elements). Because the vectorized loop would
326; speculatively access memory out-of-bounds, we must execute at least one
327; iteration of the scalar loop.
328
329; void even_load_dynamic_tc(int *A, int *B, unsigned N) {
330; for (unsigned i = 0; i < N; i+=2)
331; B[i/2] = A[i] * 2;
332; }
333
334; CHECK-LABEL: @even_load_dynamic_tc(
335; CHECK: min.iters.checked:
336; CHECK: %n.mod.vf = and i64 %[[N:[a-zA-Z0-9]+]], 3
337; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
338; CHECK: %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf
339; CHECK: %n.vec = sub i64 %[[N]], %[[R]]
340; CHECK: vector.body:
341; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
342; CHECK: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
343; CHECK: icmp eq i64 %index.next, %n.vec
344; CHECK: middle.block:
345; CHECK: br i1 false, label %for.cond.cleanup, label %scalar.ph
346
347define void @even_load_dynamic_tc(i32* noalias nocapture readonly %A, i32* noalias nocapture %B, i64 %N) {
348entry:
349 br label %for.body
350
351for.cond.cleanup: ; preds = %for.body
352 ret void
353
354for.body: ; preds = %for.body, %entry
355 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
356 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
357 %tmp = load i32, i32* %arrayidx, align 4
358 %mul = shl nsw i32 %tmp, 1
359 %tmp1 = lshr exact i64 %indvars.iv, 1
360 %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %tmp1
361 store i32 %mul, i32* %arrayidx2, align 4
362 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
363 %cmp = icmp ult i64 %indvars.iv.next, %N
364 br i1 %cmp, label %for.body, label %for.cond.cleanup
365}
366
367; Check vectorization on a reverse interleaved load group of factor 2 with 1
368; gap and a reverse interleaved store group of factor 2. The interleaved load
369; group should be removed since it has a gap and is reverse.
370
371; struct pair {
372; int x;
373; int y;
374; };
375;
376; void load_gap_reverse(struct pair *P1, struct pair *P2, int X) {
377; for (int i = 1023; i >= 0; i--) {
378; int a = X + i;
379; int b = A[i].y - i;
380; B[i].x = a;
381; B[i].y = b;
382; }
383; }
384
385; CHECK-LABEL: @load_gap_reverse(
386; CHECK-NOT: %wide.vec = load <8 x i64>, <8 x i64>* %{{.*}}, align 8
387; CHECK-NOT: %strided.vec = shufflevector <8 x i64> %wide.vec, <8 x i64> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
388
389%pair = type { i64, i64 }
390define void @load_gap_reverse(%pair* noalias nocapture readonly %P1, %pair* noalias nocapture readonly %P2, i64 %X) {
391entry:
392 br label %for.body
393
394for.body:
395 %i = phi i64 [ 1023, %entry ], [ %i.next, %for.body ]
396 %0 = add nsw i64 %X, %i
397 %1 = getelementptr inbounds %pair, %pair* %P1, i64 %i, i32 0
398 %2 = getelementptr inbounds %pair, %pair* %P2, i64 %i, i32 1
399 %3 = load i64, i64* %2, align 8
400 %4 = sub nsw i64 %3, %i
401 store i64 %0, i64* %1, align 8
402 store i64 %4, i64* %2, align 8
403 %i.next = add nsw i64 %i, -1
404 %cond = icmp sgt i64 %i, 0
405 br i1 %cond, label %for.body, label %for.exit
406
407for.exit:
408 ret void
409}
410
Hao Liu32c05392015-06-08 06:39:56 +0000411; Check vectorization on interleaved access groups identified from mixed
412; loads/stores.
413; void mixed_load2_store2(int *A, int *B) {
414; for (unsigned i = 0; i < 1024; i+=2) {
415; B[i] = A[i] * A[i+1];
416; B[i+1] = A[i] + A[i+1];
417; }
418; }
419
420; CHECK-LABEL: @mixed_load2_store2(
421; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* {{.*}}, align 4
422; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
423; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
424; CHECK: %interleaved.vec = shufflevector <4 x i32> %{{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
425; CHECK: store <8 x i32> %interleaved.vec
426
427define void @mixed_load2_store2(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) {
428entry:
429 br label %for.body
430
431for.cond.cleanup: ; preds = %for.body
432 ret void
433
434for.body: ; preds = %for.body, %entry
435 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
436 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
437 %tmp = load i32, i32* %arrayidx, align 4
438 %tmp1 = or i64 %indvars.iv, 1
439 %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %tmp1
440 %tmp2 = load i32, i32* %arrayidx2, align 4
441 %mul = mul nsw i32 %tmp2, %tmp
442 %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
443 store i32 %mul, i32* %arrayidx4, align 4
444 %tmp3 = load i32, i32* %arrayidx, align 4
445 %tmp4 = load i32, i32* %arrayidx2, align 4
446 %add10 = add nsw i32 %tmp4, %tmp3
447 %arrayidx13 = getelementptr inbounds i32, i32* %B, i64 %tmp1
448 store i32 %add10, i32* %arrayidx13, align 4
449 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
450 %cmp = icmp ult i64 %indvars.iv.next, 1024
451 br i1 %cmp, label %for.body, label %for.cond.cleanup
452}
453
454; Check vectorization on interleaved access groups identified from mixed
455; loads/stores.
456; void mixed_load3_store3(int *A) {
457; for (unsigned i = 0; i < 1024; i++) {
458; *A++ += i;
459; *A++ += i;
460; *A++ += i;
461; }
462; }
463
464; CHECK-LABEL: @mixed_load3_store3(
465; CHECK: %wide.vec = load <12 x i32>, <12 x i32>* {{.*}}, align 4
466; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
467; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
468; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
469; CHECK: %interleaved.vec = shufflevector <8 x i32> %{{.*}}, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
470; CHECK: store <12 x i32> %interleaved.vec, <12 x i32>* %{{.*}}, align 4
471
472define void @mixed_load3_store3(i32* nocapture %A) {
473entry:
474 br label %for.body
475
476for.cond.cleanup: ; preds = %for.body
477 ret void
478
479for.body: ; preds = %for.body, %entry
480 %i.013 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
481 %A.addr.012 = phi i32* [ %A, %entry ], [ %incdec.ptr3, %for.body ]
482 %incdec.ptr = getelementptr inbounds i32, i32* %A.addr.012, i64 1
483 %tmp = load i32, i32* %A.addr.012, align 4
484 %add = add i32 %tmp, %i.013
485 store i32 %add, i32* %A.addr.012, align 4
486 %incdec.ptr1 = getelementptr inbounds i32, i32* %A.addr.012, i64 2
487 %tmp1 = load i32, i32* %incdec.ptr, align 4
488 %add2 = add i32 %tmp1, %i.013
489 store i32 %add2, i32* %incdec.ptr, align 4
490 %incdec.ptr3 = getelementptr inbounds i32, i32* %A.addr.012, i64 3
491 %tmp2 = load i32, i32* %incdec.ptr1, align 4
492 %add4 = add i32 %tmp2, %i.013
493 store i32 %add4, i32* %incdec.ptr1, align 4
494 %inc = add nuw nsw i32 %i.013, 1
495 %exitcond = icmp eq i32 %inc, 1024
496 br i1 %exitcond, label %for.cond.cleanup, label %for.body
497}
498
499; Check vectorization on interleaved access groups with members having different
500; kinds of type.
501
502; struct IntFloat {
503; int a;
504; float b;
505; };
506;
507; int SA;
508; float SB;
509;
510; void int_float_struct(struct IntFloat *A) {
511; int SumA;
512; float SumB;
513; for (unsigned i = 0; i < 1024; i++) {
514; SumA += A[i].a;
515; SumB += A[i].b;
516; }
517; SA = SumA;
518; SB = SumB;
519; }
520
521; CHECK-LABEL: @int_float_struct(
522; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
523; CHECK: %[[V0:.*]] = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
524; CHECK: %[[V1:.*]] = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
525; CHECK: bitcast <4 x i32> %[[V1]] to <4 x float>
526; CHECK: add nsw <4 x i32>
527; CHECK: fadd fast <4 x float>
528
529%struct.IntFloat = type { i32, float }
530
531@SA = common global i32 0, align 4
532@SB = common global float 0.000000e+00, align 4
533
534define void @int_float_struct(%struct.IntFloat* nocapture readonly %A) #0 {
535entry:
536 br label %for.body
537
538for.cond.cleanup: ; preds = %for.body
539 store i32 %add, i32* @SA, align 4
540 store float %add3, float* @SB, align 4
541 ret void
542
543for.body: ; preds = %for.body, %entry
544 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
545 %SumB.014 = phi float [ undef, %entry ], [ %add3, %for.body ]
546 %SumA.013 = phi i32 [ undef, %entry ], [ %add, %for.body ]
547 %a = getelementptr inbounds %struct.IntFloat, %struct.IntFloat* %A, i64 %indvars.iv, i32 0
548 %tmp = load i32, i32* %a, align 4
549 %add = add nsw i32 %tmp, %SumA.013
550 %b = getelementptr inbounds %struct.IntFloat, %struct.IntFloat* %A, i64 %indvars.iv, i32 1
551 %tmp1 = load float, float* %b, align 4
552 %add3 = fadd fast float %SumB.014, %tmp1
553 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
554 %exitcond = icmp eq i64 %indvars.iv.next, 1024
555 br i1 %exitcond, label %for.cond.cleanup, label %for.body
556}
557
558attributes #0 = { "unsafe-fp-math"="true" }