blob: 54ce3e29293a295e1fa331aa26af1cbfe27b5bfc [file] [log] [blame]
Hao Liu32c05392015-06-08 06:39:56 +00001; RUN: opt -S -loop-vectorize -instcombine -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses=true -runtime-memory-check-threshold=24 < %s | FileCheck %s
2
3target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
4
5; Check vectorization on an interleaved load group of factor 2 and an interleaved
6; store group of factor 2.
7
8; int AB[1024];
9; int CD[1024];
10; void test_array_load2_store2(int C, int D) {
11; for (int i = 0; i < 1024; i+=2) {
12; int A = AB[i];
13; int B = AB[i+1];
14; CD[i] = A + C;
15; CD[i+1] = B * D;
16; }
17; }
18
19; CHECK-LABEL: @test_array_load2_store2(
20; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
21; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
22; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
23; CHECK: add nsw <4 x i32>
24; CHECK: mul nsw <4 x i32>
25; CHECK: %interleaved.vec = shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
26; CHECK: store <8 x i32> %interleaved.vec, <8 x i32>* %{{.*}}, align 4
27
28@AB = common global [1024 x i32] zeroinitializer, align 4
29@CD = common global [1024 x i32] zeroinitializer, align 4
30
31define void @test_array_load2_store2(i32 %C, i32 %D) {
32entry:
33 br label %for.body
34
35for.body: ; preds = %for.body, %entry
36 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
37 %arrayidx0 = getelementptr inbounds [1024 x i32], [1024 x i32]* @AB, i64 0, i64 %indvars.iv
38 %tmp = load i32, i32* %arrayidx0, align 4
39 %tmp1 = or i64 %indvars.iv, 1
40 %arrayidx1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @AB, i64 0, i64 %tmp1
41 %tmp2 = load i32, i32* %arrayidx1, align 4
42 %add = add nsw i32 %tmp, %C
43 %mul = mul nsw i32 %tmp2, %D
44 %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @CD, i64 0, i64 %indvars.iv
45 store i32 %add, i32* %arrayidx2, align 4
46 %arrayidx3 = getelementptr inbounds [1024 x i32], [1024 x i32]* @CD, i64 0, i64 %tmp1
47 store i32 %mul, i32* %arrayidx3, align 4
48 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
49 %cmp = icmp slt i64 %indvars.iv.next, 1024
50 br i1 %cmp, label %for.body, label %for.end
51
52for.end: ; preds = %for.body
53 ret void
54}
55
56; int A[3072];
57; struct ST S[1024];
58; void test_struct_st3() {
59; int *ptr = A;
60; for (int i = 0; i < 1024; i++) {
61; int X1 = *ptr++;
62; int X2 = *ptr++;
63; int X3 = *ptr++;
64; T[i].x = X1 + 1;
65; T[i].y = X2 + 2;
66; T[i].z = X3 + 3;
67; }
68; }
69
70; CHECK-LABEL: @test_struct_array_load3_store3(
71; CHECK: %wide.vec = load <12 x i32>, <12 x i32>* {{.*}}, align 4
72; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
73; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
74; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
75; CHECK: add nsw <4 x i32> {{.*}}, <i32 1, i32 1, i32 1, i32 1>
76; CHECK: add nsw <4 x i32> {{.*}}, <i32 2, i32 2, i32 2, i32 2>
77; CHECK: add nsw <4 x i32> {{.*}}, <i32 3, i32 3, i32 3, i32 3>
78; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
79; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
80; CHECK: %interleaved.vec = shufflevector <8 x i32> {{.*}}, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
81; CHECK: store <12 x i32> %interleaved.vec, <12 x i32>* {{.*}}, align 4
82
83%struct.ST3 = type { i32, i32, i32 }
84@A = common global [3072 x i32] zeroinitializer, align 4
85@S = common global [1024 x %struct.ST3] zeroinitializer, align 4
86
87define void @test_struct_array_load3_store3() {
88entry:
89 br label %for.body
90
91for.body: ; preds = %for.body, %entry
92 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
93 %ptr.016 = phi i32* [ getelementptr inbounds ([3072 x i32], [3072 x i32]* @A, i64 0, i64 0), %entry ], [ %incdec.ptr2, %for.body ]
94 %incdec.ptr = getelementptr inbounds i32, i32* %ptr.016, i64 1
95 %tmp = load i32, i32* %ptr.016, align 4
96 %incdec.ptr1 = getelementptr inbounds i32, i32* %ptr.016, i64 2
97 %tmp1 = load i32, i32* %incdec.ptr, align 4
98 %incdec.ptr2 = getelementptr inbounds i32, i32* %ptr.016, i64 3
99 %tmp2 = load i32, i32* %incdec.ptr1, align 4
100 %add = add nsw i32 %tmp, 1
101 %x = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 0
102 store i32 %add, i32* %x, align 4
103 %add3 = add nsw i32 %tmp1, 2
104 %y = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 1
105 store i32 %add3, i32* %y, align 4
106 %add6 = add nsw i32 %tmp2, 3
107 %z = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 2
108 store i32 %add6, i32* %z, align 4
109 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
110 %exitcond = icmp eq i64 %indvars.iv.next, 1024
111 br i1 %exitcond, label %for.end, label %for.body
112
113for.end: ; preds = %for.body
114 ret void
115}
116
117; Check vectorization on an interleaved load group of factor 4.
118
119; struct ST4{
120; int x;
121; int y;
122; int z;
123; int w;
124; };
125; int test_struct_load4(struct ST4 *S) {
126; int r = 0;
127; for (int i = 0; i < 1024; i++) {
128; r += S[i].x;
129; r -= S[i].y;
130; r += S[i].z;
131; r -= S[i].w;
132; }
133; return r;
134; }
135
136; CHECK-LABEL: @test_struct_load4(
137; CHECK: %wide.vec = load <16 x i32>, <16 x i32>* {{.*}}, align 4
138; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
139; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
140; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
141; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
142; CHECK: add nsw <4 x i32>
143; CHECK: sub <4 x i32>
144; CHECK: add nsw <4 x i32>
145; CHECK: sub <4 x i32>
146
147%struct.ST4 = type { i32, i32, i32, i32 }
148
149define i32 @test_struct_load4(%struct.ST4* nocapture readonly %S) {
150entry:
151 br label %for.body
152
153for.body: ; preds = %for.body, %entry
154 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
155 %r.022 = phi i32 [ 0, %entry ], [ %sub8, %for.body ]
156 %x = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 0
157 %tmp = load i32, i32* %x, align 4
158 %add = add nsw i32 %tmp, %r.022
159 %y = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 1
160 %tmp1 = load i32, i32* %y, align 4
161 %sub = sub i32 %add, %tmp1
162 %z = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 2
163 %tmp2 = load i32, i32* %z, align 4
164 %add5 = add nsw i32 %sub, %tmp2
165 %w = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 3
166 %tmp3 = load i32, i32* %w, align 4
167 %sub8 = sub i32 %add5, %tmp3
168 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
169 %exitcond = icmp eq i64 %indvars.iv.next, 1024
170 br i1 %exitcond, label %for.end, label %for.body
171
172for.end: ; preds = %for.body
173 ret i32 %sub8
174}
175
176; Check vectorization on an interleaved store group of factor 4.
177
178; void test_struct_store4(int *A, struct ST4 *B) {
179; int *ptr = A;
180; for (int i = 0; i < 1024; i++) {
181; int X = *ptr++;
182; B[i].x = X + 1;
183; B[i].y = X * 2;
184; B[i].z = X + 3;
185; B[i].w = X + 4;
186; }
187; }
188
189; CHECK-LABEL: @test_struct_store4(
190; CHECK: %[[LD:.*]] = load <4 x i32>, <4 x i32>*
191; CHECK: add nsw <4 x i32> %[[LD]], <i32 1, i32 1, i32 1, i32 1>
192; CHECK: shl nsw <4 x i32> %[[LD]], <i32 1, i32 1, i32 1, i32 1>
193; CHECK: add nsw <4 x i32> %[[LD]], <i32 3, i32 3, i32 3, i32 3>
194; CHECK: add nsw <4 x i32> %[[LD]], <i32 4, i32 4, i32 4, i32 4>
195; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
196; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
197; CHECK: %interleaved.vec = shufflevector <8 x i32> {{.*}}, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
198; CHECK: store <16 x i32> %interleaved.vec, <16 x i32>* {{.*}}, align 4
199
200define void @test_struct_store4(i32* noalias nocapture readonly %A, %struct.ST4* noalias nocapture %B) {
201entry:
202 br label %for.body
203
204for.cond.cleanup: ; preds = %for.body
205 ret void
206
207for.body: ; preds = %for.body, %entry
208 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
209 %ptr.024 = phi i32* [ %A, %entry ], [ %incdec.ptr, %for.body ]
210 %incdec.ptr = getelementptr inbounds i32, i32* %ptr.024, i64 1
211 %tmp = load i32, i32* %ptr.024, align 4
212 %add = add nsw i32 %tmp, 1
213 %x = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 0
214 store i32 %add, i32* %x, align 4
215 %mul = shl nsw i32 %tmp, 1
216 %y = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 1
217 store i32 %mul, i32* %y, align 4
218 %add3 = add nsw i32 %tmp, 3
219 %z = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 2
220 store i32 %add3, i32* %z, align 4
221 %add6 = add nsw i32 %tmp, 4
222 %w = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 3
223 store i32 %add6, i32* %w, align 4
224 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
225 %exitcond = icmp eq i64 %indvars.iv.next, 1024
226 br i1 %exitcond, label %for.cond.cleanup, label %for.body
227}
228
229; Check vectorization on a reverse interleaved load group of factor 2 and
230; a reverse interleaved store group of factor 2.
231
232; struct ST2 {
233; int x;
234; int y;
235; };
236;
237; void test_reversed_load2_store2(struct ST2 *A, struct ST2 *B) {
238; for (int i = 1023; i >= 0; i--) {
239; int a = A[i].x + i; // interleaved load of index 0
240; int b = A[i].y - i; // interleaved load of index 1
241; B[i].x = a; // interleaved store of index 0
242; B[i].y = b; // interleaved store of index 1
243; }
244; }
245
246; CHECK-LABEL: @test_reversed_load2_store2(
247; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* {{.*}}, align 4
248; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
249; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
250; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
251; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
252; CHECK: add nsw <4 x i32>
253; CHECK: sub nsw <4 x i32>
254; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
255; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
256; CHECK: %interleaved.vec = shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
257; CHECK: store <8 x i32> %interleaved.vec, <8 x i32>* %{{.*}}, align 4
258
259%struct.ST2 = type { i32, i32 }
260
261define void @test_reversed_load2_store2(%struct.ST2* noalias nocapture readonly %A, %struct.ST2* noalias nocapture %B) {
262entry:
263 br label %for.body
264
265for.cond.cleanup: ; preds = %for.body
266 ret void
267
268for.body: ; preds = %for.body, %entry
269 %indvars.iv = phi i64 [ 1023, %entry ], [ %indvars.iv.next, %for.body ]
270 %x = getelementptr inbounds %struct.ST2, %struct.ST2* %A, i64 %indvars.iv, i32 0
271 %tmp = load i32, i32* %x, align 4
272 %tmp1 = trunc i64 %indvars.iv to i32
273 %add = add nsw i32 %tmp, %tmp1
274 %y = getelementptr inbounds %struct.ST2, %struct.ST2* %A, i64 %indvars.iv, i32 1
275 %tmp2 = load i32, i32* %y, align 4
276 %sub = sub nsw i32 %tmp2, %tmp1
277 %x5 = getelementptr inbounds %struct.ST2, %struct.ST2* %B, i64 %indvars.iv, i32 0
278 store i32 %add, i32* %x5, align 4
279 %y8 = getelementptr inbounds %struct.ST2, %struct.ST2* %B, i64 %indvars.iv, i32 1
280 store i32 %sub, i32* %y8, align 4
281 %indvars.iv.next = add nsw i64 %indvars.iv, -1
282 %cmp = icmp sgt i64 %indvars.iv, 0
283 br i1 %cmp, label %for.body, label %for.cond.cleanup
284}
285
286; Check vectorization on an interleaved load group of factor 2 with 1 gap
287; (missing the load of odd elements).
288
289; void even_load(int *A, int *B) {
290; for (unsigned i = 0; i < 1024; i+=2)
291; B[i/2] = A[i] * 2;
292; }
293
294; CHECK-LABEL: @even_load(
Silviu Barangaad1dafb2016-02-19 15:46:10 +0000295; CHECK-NOT: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
296; CHECK-NOT: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
Hao Liu32c05392015-06-08 06:39:56 +0000297
298define void @even_load(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) {
299entry:
300 br label %for.body
301
302for.cond.cleanup: ; preds = %for.body
303 ret void
304
305for.body: ; preds = %for.body, %entry
306 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
307 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
308 %tmp = load i32, i32* %arrayidx, align 4
309 %mul = shl nsw i32 %tmp, 1
310 %tmp1 = lshr exact i64 %indvars.iv, 1
311 %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %tmp1
312 store i32 %mul, i32* %arrayidx2, align 4
313 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
314 %cmp = icmp ult i64 %indvars.iv.next, 1024
315 br i1 %cmp, label %for.body, label %for.cond.cleanup
316}
317
318; Check vectorization on interleaved access groups identified from mixed
319; loads/stores.
320; void mixed_load2_store2(int *A, int *B) {
321; for (unsigned i = 0; i < 1024; i+=2) {
322; B[i] = A[i] * A[i+1];
323; B[i+1] = A[i] + A[i+1];
324; }
325; }
326
327; CHECK-LABEL: @mixed_load2_store2(
328; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* {{.*}}, align 4
329; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
330; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
331; CHECK: %interleaved.vec = shufflevector <4 x i32> %{{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
332; CHECK: store <8 x i32> %interleaved.vec
333
334define void @mixed_load2_store2(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) {
335entry:
336 br label %for.body
337
338for.cond.cleanup: ; preds = %for.body
339 ret void
340
341for.body: ; preds = %for.body, %entry
342 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
343 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
344 %tmp = load i32, i32* %arrayidx, align 4
345 %tmp1 = or i64 %indvars.iv, 1
346 %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %tmp1
347 %tmp2 = load i32, i32* %arrayidx2, align 4
348 %mul = mul nsw i32 %tmp2, %tmp
349 %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
350 store i32 %mul, i32* %arrayidx4, align 4
351 %tmp3 = load i32, i32* %arrayidx, align 4
352 %tmp4 = load i32, i32* %arrayidx2, align 4
353 %add10 = add nsw i32 %tmp4, %tmp3
354 %arrayidx13 = getelementptr inbounds i32, i32* %B, i64 %tmp1
355 store i32 %add10, i32* %arrayidx13, align 4
356 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
357 %cmp = icmp ult i64 %indvars.iv.next, 1024
358 br i1 %cmp, label %for.body, label %for.cond.cleanup
359}
360
361; Check vectorization on interleaved access groups identified from mixed
362; loads/stores.
363; void mixed_load3_store3(int *A) {
364; for (unsigned i = 0; i < 1024; i++) {
365; *A++ += i;
366; *A++ += i;
367; *A++ += i;
368; }
369; }
370
371; CHECK-LABEL: @mixed_load3_store3(
372; CHECK: %wide.vec = load <12 x i32>, <12 x i32>* {{.*}}, align 4
373; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
374; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
375; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
376; CHECK: %interleaved.vec = shufflevector <8 x i32> %{{.*}}, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
377; CHECK: store <12 x i32> %interleaved.vec, <12 x i32>* %{{.*}}, align 4
378
379define void @mixed_load3_store3(i32* nocapture %A) {
380entry:
381 br label %for.body
382
383for.cond.cleanup: ; preds = %for.body
384 ret void
385
386for.body: ; preds = %for.body, %entry
387 %i.013 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
388 %A.addr.012 = phi i32* [ %A, %entry ], [ %incdec.ptr3, %for.body ]
389 %incdec.ptr = getelementptr inbounds i32, i32* %A.addr.012, i64 1
390 %tmp = load i32, i32* %A.addr.012, align 4
391 %add = add i32 %tmp, %i.013
392 store i32 %add, i32* %A.addr.012, align 4
393 %incdec.ptr1 = getelementptr inbounds i32, i32* %A.addr.012, i64 2
394 %tmp1 = load i32, i32* %incdec.ptr, align 4
395 %add2 = add i32 %tmp1, %i.013
396 store i32 %add2, i32* %incdec.ptr, align 4
397 %incdec.ptr3 = getelementptr inbounds i32, i32* %A.addr.012, i64 3
398 %tmp2 = load i32, i32* %incdec.ptr1, align 4
399 %add4 = add i32 %tmp2, %i.013
400 store i32 %add4, i32* %incdec.ptr1, align 4
401 %inc = add nuw nsw i32 %i.013, 1
402 %exitcond = icmp eq i32 %inc, 1024
403 br i1 %exitcond, label %for.cond.cleanup, label %for.body
404}
405
406; Check vectorization on interleaved access groups with members having different
407; kinds of type.
408
409; struct IntFloat {
410; int a;
411; float b;
412; };
413;
414; int SA;
415; float SB;
416;
417; void int_float_struct(struct IntFloat *A) {
418; int SumA;
419; float SumB;
420; for (unsigned i = 0; i < 1024; i++) {
421; SumA += A[i].a;
422; SumB += A[i].b;
423; }
424; SA = SumA;
425; SB = SumB;
426; }
427
428; CHECK-LABEL: @int_float_struct(
429; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
430; CHECK: %[[V0:.*]] = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
431; CHECK: %[[V1:.*]] = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
432; CHECK: bitcast <4 x i32> %[[V1]] to <4 x float>
433; CHECK: add nsw <4 x i32>
434; CHECK: fadd fast <4 x float>
435
436%struct.IntFloat = type { i32, float }
437
438@SA = common global i32 0, align 4
439@SB = common global float 0.000000e+00, align 4
440
441define void @int_float_struct(%struct.IntFloat* nocapture readonly %A) #0 {
442entry:
443 br label %for.body
444
445for.cond.cleanup: ; preds = %for.body
446 store i32 %add, i32* @SA, align 4
447 store float %add3, float* @SB, align 4
448 ret void
449
450for.body: ; preds = %for.body, %entry
451 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
452 %SumB.014 = phi float [ undef, %entry ], [ %add3, %for.body ]
453 %SumA.013 = phi i32 [ undef, %entry ], [ %add, %for.body ]
454 %a = getelementptr inbounds %struct.IntFloat, %struct.IntFloat* %A, i64 %indvars.iv, i32 0
455 %tmp = load i32, i32* %a, align 4
456 %add = add nsw i32 %tmp, %SumA.013
457 %b = getelementptr inbounds %struct.IntFloat, %struct.IntFloat* %A, i64 %indvars.iv, i32 1
458 %tmp1 = load float, float* %b, align 4
459 %add3 = fadd fast float %SumB.014, %tmp1
460 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
461 %exitcond = icmp eq i64 %indvars.iv.next, 1024
462 br i1 %exitcond, label %for.cond.cleanup, label %for.body
463}
464
465attributes #0 = { "unsafe-fp-math"="true" }