blob: 1cfc0dd290172854275901f006eec2d75b1dc41c [file] [log] [blame]
Richard Sandiford8ee1b772013-11-22 16:58:05 +00001; RUN: opt %s -scalarizer -scalarize-load-store -dce -S | FileCheck %s
2target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
3
4declare <4 x float> @ext(<4 x float>)
5@g = global <4 x float> zeroinitializer
6
7define void @f1(<4 x float> %init, <4 x float> *%base, i32 %count) {
8; CHECK-LABEL: @f1(
9; CHECK: entry:
10; CHECK: %init.i0 = extractelement <4 x float> %init, i32 0
11; CHECK: %init.i1 = extractelement <4 x float> %init, i32 1
12; CHECK: %init.i2 = extractelement <4 x float> %init, i32 2
13; CHECK: %init.i3 = extractelement <4 x float> %init, i32 3
14; CHECK: br label %loop
15; CHECK: loop:
16; CHECK: %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
17; CHECK: %acc.i0 = phi float [ %init.i0, %entry ], [ %sel.i0, %loop ]
18; CHECK: %acc.i1 = phi float [ %init.i1, %entry ], [ %sel.i1, %loop ]
19; CHECK: %acc.i2 = phi float [ %init.i2, %entry ], [ %sel.i2, %loop ]
20; CHECK: %acc.i3 = phi float [ %init.i3, %entry ], [ %sel.i3, %loop ]
21; CHECK: %nexti = sub i32 %i, 1
22; CHECK: %ptr = getelementptr <4 x float>* %base, i32 %i
23; CHECK: %ptr.i0 = bitcast <4 x float>* %ptr to float*
24; CHECK: %val.i0 = load float* %ptr.i0, align 16
25; CHECK: %ptr.i1 = getelementptr float* %ptr.i0, i32 1
26; CHECK: %val.i1 = load float* %ptr.i1, align 4
27; CHECK: %ptr.i2 = getelementptr float* %ptr.i0, i32 2
28; CHECK: %val.i2 = load float* %ptr.i2, align 8
29; CHECK: %ptr.i3 = getelementptr float* %ptr.i0, i32 3
30; CHECK: %val.i3 = load float* %ptr.i3, align 4
31; CHECK: %add.i0 = fadd float %val.i0, %val.i2
32; CHECK: %add.i1 = fadd float %val.i1, %val.i3
33; CHECK: %add.i2 = fadd float %acc.i0, %acc.i2
34; CHECK: %add.i3 = fadd float %acc.i1, %acc.i3
35; CHECK: %add.upto0 = insertelement <4 x float> undef, float %add.i0, i32 0
36; CHECK: %add.upto1 = insertelement <4 x float> %add.upto0, float %add.i1, i32 1
37; CHECK: %add.upto2 = insertelement <4 x float> %add.upto1, float %add.i2, i32 2
38; CHECK: %add = insertelement <4 x float> %add.upto2, float %add.i3, i32 3
39; CHECK: %call = call <4 x float> @ext(<4 x float> %add)
40; CHECK: %call.i0 = extractelement <4 x float> %call, i32 0
41; CHECK: %cmp.i0 = fcmp ogt float %call.i0, 1.0
42; CHECK: %call.i1 = extractelement <4 x float> %call, i32 1
43; CHECK: %cmp.i1 = fcmp ogt float %call.i1, 2.0
44; CHECK: %call.i2 = extractelement <4 x float> %call, i32 2
45; CHECK: %cmp.i2 = fcmp ogt float %call.i2, 3.0
46; CHECK: %call.i3 = extractelement <4 x float> %call, i32 3
47; CHECK: %cmp.i3 = fcmp ogt float %call.i3, 4.0
48; CHECK: %sel.i0 = select i1 %cmp.i0, float %call.i0, float 5.0
49; CHECK: %sel.i1 = select i1 %cmp.i1, float %call.i1, float 6.0
50; CHECK: %sel.i2 = select i1 %cmp.i2, float %call.i2, float 7.0
51; CHECK: %sel.i3 = select i1 %cmp.i3, float %call.i3, float 8.0
52; CHECK: store float %sel.i0, float* %ptr.i0
53; CHECK: store float %sel.i1, float* %ptr.i1
54; CHECK: store float %sel.i2, float* %ptr.i2
55; CHECK: store float %sel.i3, float* %ptr.i3
56; CHECK: %test = icmp eq i32 %nexti, 0
57; CHECK: br i1 %test, label %loop, label %exit
58; CHECK: exit:
59; CHECK: ret void
60entry:
61 br label %loop
62
63loop:
64 %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
65 %acc = phi <4 x float> [ %init, %entry ], [ %sel, %loop ]
66 %nexti = sub i32 %i, 1
67
68 %ptr = getelementptr <4 x float> *%base, i32 %i
69 %val = load <4 x float> *%ptr
70 %dval = bitcast <4 x float> %val to <2 x double>
71 %dacc = bitcast <4 x float> %acc to <2 x double>
72 %shuffle1 = shufflevector <2 x double> %dval, <2 x double> %dacc,
73 <2 x i32> <i32 0, i32 2>
74 %shuffle2 = shufflevector <2 x double> %dval, <2 x double> %dacc,
75 <2 x i32> <i32 1, i32 3>
76 %f1 = bitcast <2 x double> %shuffle1 to <4 x float>
77 %f2 = bitcast <2 x double> %shuffle2 to <4 x float>
78 %add = fadd <4 x float> %f1, %f2
79 %call = call <4 x float> @ext(<4 x float> %add)
80 %cmp = fcmp ogt <4 x float> %call,
81 <float 1.0, float 2.0, float 3.0, float 4.0>
82 %sel = select <4 x i1> %cmp, <4 x float> %call,
83 <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0>
84 store <4 x float> %sel, <4 x float> *%ptr
85
86 %test = icmp eq i32 %nexti, 0
87 br i1 %test, label %loop, label %exit
88
89exit:
90 ret void
91}
92
93define void @f2(<4 x i32> %init, <4 x i8> *%base, i32 %count) {
94; CHECK-LABEL: define void @f2(<4 x i32> %init, <4 x i8>* %base, i32 %count) {
95; CHECK: entry:
96; CHECK: %init.i0 = extractelement <4 x i32> %init, i32 0
97; CHECK: %init.i1 = extractelement <4 x i32> %init, i32 1
98; CHECK: %init.i2 = extractelement <4 x i32> %init, i32 2
99; CHECK: %init.i3 = extractelement <4 x i32> %init, i32 3
100; CHECK: br label %loop
101; CHECK: loop:
102; CHECK: %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
103; CHECK: %acc.i0 = phi i32 [ %init.i0, %entry ], [ %sel.i0, %loop ]
104; CHECK: %acc.i1 = phi i32 [ %init.i1, %entry ], [ %sel.i1, %loop ]
105; CHECK: %acc.i2 = phi i32 [ %init.i2, %entry ], [ %sel.i2, %loop ]
106; CHECK: %acc.i3 = phi i32 [ %init.i3, %entry ], [ %sel.i3, %loop ]
107; CHECK: %nexti = sub i32 %i, 1
108; CHECK: %ptr = getelementptr <4 x i8>* %base, i32 %i
109; CHECK: %ptr.i0 = bitcast <4 x i8>* %ptr to i8*
110; CHECK: %val.i0 = load i8* %ptr.i0, align 4
111; CHECK: %ptr.i1 = getelementptr i8* %ptr.i0, i32 1
112; CHECK: %val.i1 = load i8* %ptr.i1, align 1
113; CHECK: %ptr.i2 = getelementptr i8* %ptr.i0, i32 2
114; CHECK: %val.i2 = load i8* %ptr.i2, align 2
115; CHECK: %ptr.i3 = getelementptr i8* %ptr.i0, i32 3
116; CHECK: %val.i3 = load i8* %ptr.i3, align 1
117; CHECK: %ext.i0 = sext i8 %val.i0 to i32
118; CHECK: %ext.i1 = sext i8 %val.i1 to i32
119; CHECK: %ext.i2 = sext i8 %val.i2 to i32
120; CHECK: %ext.i3 = sext i8 %val.i3 to i32
121; CHECK: %add.i0 = add i32 %ext.i0, %acc.i0
122; CHECK: %add.i1 = add i32 %ext.i1, %acc.i1
123; CHECK: %add.i2 = add i32 %ext.i2, %acc.i2
124; CHECK: %add.i3 = add i32 %ext.i3, %acc.i3
125; CHECK: %cmp.i0 = icmp slt i32 %add.i0, -10
126; CHECK: %cmp.i1 = icmp slt i32 %add.i1, -11
127; CHECK: %cmp.i2 = icmp slt i32 %add.i2, -12
128; CHECK: %cmp.i3 = icmp slt i32 %add.i3, -13
129; CHECK: %sel.i0 = select i1 %cmp.i0, i32 %add.i0, i32 %i
130; CHECK: %sel.i1 = select i1 %cmp.i1, i32 %add.i1, i32 %i
131; CHECK: %sel.i2 = select i1 %cmp.i2, i32 %add.i2, i32 %i
132; CHECK: %sel.i3 = select i1 %cmp.i3, i32 %add.i3, i32 %i
133; CHECK: %trunc.i0 = trunc i32 %sel.i0 to i8
134; CHECK: %trunc.i1 = trunc i32 %sel.i1 to i8
135; CHECK: %trunc.i2 = trunc i32 %sel.i2 to i8
136; CHECK: %trunc.i3 = trunc i32 %sel.i3 to i8
137; CHECK: store i8 %trunc.i0, i8* %ptr.i0, align 4
138; CHECK: store i8 %trunc.i1, i8* %ptr.i1, align 1
139; CHECK: store i8 %trunc.i2, i8* %ptr.i2, align 2
140; CHECK: store i8 %trunc.i3, i8* %ptr.i3, align 1
141; CHECK: %test = icmp eq i32 %nexti, 0
142; CHECK: br i1 %test, label %loop, label %exit
143; CHECK: exit:
144; CHECK: ret void
145entry:
146 br label %loop
147
148loop:
149 %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
150 %acc = phi <4 x i32> [ %init, %entry ], [ %sel, %loop ]
151 %nexti = sub i32 %i, 1
152
153 %ptr = getelementptr <4 x i8> *%base, i32 %i
154 %val = load <4 x i8> *%ptr
155 %ext = sext <4 x i8> %val to <4 x i32>
156 %add = add <4 x i32> %ext, %acc
157 %cmp = icmp slt <4 x i32> %add, <i32 -10, i32 -11, i32 -12, i32 -13>
158 %single = insertelement <4 x i32> undef, i32 %i, i32 0
159 %limit = shufflevector <4 x i32> %single, <4 x i32> undef,
160 <4 x i32> zeroinitializer
161 %sel = select <4 x i1> %cmp, <4 x i32> %add, <4 x i32> %limit
162 %trunc = trunc <4 x i32> %sel to <4 x i8>
163 store <4 x i8> %trunc, <4 x i8> *%ptr
164
165 %test = icmp eq i32 %nexti, 0
166 br i1 %test, label %loop, label %exit
167
168exit:
169 ret void
170}
171
172; Check that !tbaa information is preserved.
173define void @f3(<4 x i32> *%src, <4 x i32> *%dst) {
174; CHECK-LABEL: @f3(
175; CHECK: %val.i0 = load i32* %src.i0, align 16, !tbaa ![[TAG:[0-9]*]]
176; CHECK: %val.i1 = load i32* %src.i1, align 4, !tbaa ![[TAG]]
177; CHECK: %val.i2 = load i32* %src.i2, align 8, !tbaa ![[TAG]]
178; CHECK: %val.i3 = load i32* %src.i3, align 4, !tbaa ![[TAG]]
179; CHECK: store i32 %add.i0, i32* %dst.i0, align 16, !tbaa ![[TAG:[0-9]*]]
180; CHECK: store i32 %add.i1, i32* %dst.i1, align 4, !tbaa ![[TAG]]
181; CHECK: store i32 %add.i2, i32* %dst.i2, align 8, !tbaa ![[TAG]]
182; CHECK: store i32 %add.i3, i32* %dst.i3, align 4, !tbaa ![[TAG]]
183; CHECK: ret void
184 %val = load <4 x i32> *%src, !tbaa !1
185 %add = add <4 x i32> %val, %val
186 store <4 x i32> %add, <4 x i32> *%dst, !tbaa !2
187 ret void
188}
189
190; Check that !tbaa.struct information is preserved.
191define void @f4(<4 x i32> *%src, <4 x i32> *%dst) {
192; CHECK-LABEL: @f4(
193; CHECK: %val.i0 = load i32* %src.i0, align 16, !tbaa.struct ![[TAG:[0-9]*]]
194; CHECK: %val.i1 = load i32* %src.i1, align 4, !tbaa.struct ![[TAG]]
195; CHECK: %val.i2 = load i32* %src.i2, align 8, !tbaa.struct ![[TAG]]
196; CHECK: %val.i3 = load i32* %src.i3, align 4, !tbaa.struct ![[TAG]]
197; CHECK: store i32 %add.i0, i32* %dst.i0, align 16, !tbaa.struct ![[TAG]]
198; CHECK: store i32 %add.i1, i32* %dst.i1, align 4, !tbaa.struct ![[TAG]]
199; CHECK: store i32 %add.i2, i32* %dst.i2, align 8, !tbaa.struct ![[TAG]]
200; CHECK: store i32 %add.i3, i32* %dst.i3, align 4, !tbaa.struct ![[TAG]]
201; CHECK: ret void
202 %val = load <4 x i32> *%src, !tbaa.struct !5
203 %add = add <4 x i32> %val, %val
204 store <4 x i32> %add, <4 x i32> *%dst, !tbaa.struct !5
205 ret void
206}
207
208; Check that llvm.mem.parallel_loop_access information is preserved.
209define void @f5(i32 %count, <4 x i32> *%src, <4 x i32> *%dst) {
210; CHECK-LABEL: @f5(
211; CHECK: %val.i0 = load i32* %this_src.i0, align 16, !llvm.mem.parallel_loop_access ![[TAG:[0-9]*]]
212; CHECK: %val.i1 = load i32* %this_src.i1, align 4, !llvm.mem.parallel_loop_access ![[TAG]]
213; CHECK: %val.i2 = load i32* %this_src.i2, align 8, !llvm.mem.parallel_loop_access ![[TAG]]
214; CHECK: %val.i3 = load i32* %this_src.i3, align 4, !llvm.mem.parallel_loop_access ![[TAG]]
215; CHECK: store i32 %add.i0, i32* %this_dst.i0, align 16, !llvm.mem.parallel_loop_access ![[TAG]]
216; CHECK: store i32 %add.i1, i32* %this_dst.i1, align 4, !llvm.mem.parallel_loop_access ![[TAG]]
217; CHECK: store i32 %add.i2, i32* %this_dst.i2, align 8, !llvm.mem.parallel_loop_access ![[TAG]]
218; CHECK: store i32 %add.i3, i32* %this_dst.i3, align 4, !llvm.mem.parallel_loop_access ![[TAG]]
219; CHECK: ret void
220entry:
221 br label %loop
222
223loop:
224 %index = phi i32 [ 0, %entry ], [ %next_index, %loop ]
225 %this_src = getelementptr <4 x i32> *%src, i32 %index
226 %this_dst = getelementptr <4 x i32> *%dst, i32 %index
227 %val = load <4 x i32> *%this_src, !llvm.mem.parallel_loop_access !3
228 %add = add <4 x i32> %val, %val
229 store <4 x i32> %add, <4 x i32> *%this_dst, !llvm.mem.parallel_loop_access !3
230 %next_index = add i32 %index, -1
231 %continue = icmp ne i32 %next_index, %count
232 br i1 %continue, label %loop, label %end, !llvm.loop !3
233
234end:
235 ret void
236}
237
238; Check that fpmath information is preserved.
239define <4 x float> @f6(<4 x float> %x) {
240; CHECK-LABEL: @f6(
241; CHECK: %x.i0 = extractelement <4 x float> %x, i32 0
242; CHECK: %res.i0 = fadd float %x.i0, 1.0{{[e+0]*}}, !fpmath ![[TAG:[0-9]*]]
243; CHECK: %x.i1 = extractelement <4 x float> %x, i32 1
244; CHECK: %res.i1 = fadd float %x.i1, 2.0{{[e+0]*}}, !fpmath ![[TAG]]
245; CHECK: %x.i2 = extractelement <4 x float> %x, i32 2
246; CHECK: %res.i2 = fadd float %x.i2, 3.0{{[e+0]*}}, !fpmath ![[TAG]]
247; CHECK: %x.i3 = extractelement <4 x float> %x, i32 3
248; CHECK: %res.i3 = fadd float %x.i3, 4.0{{[e+0]*}}, !fpmath ![[TAG]]
249; CHECK: %res.upto0 = insertelement <4 x float> undef, float %res.i0, i32 0
250; CHECK: %res.upto1 = insertelement <4 x float> %res.upto0, float %res.i1, i32 1
251; CHECK: %res.upto2 = insertelement <4 x float> %res.upto1, float %res.i2, i32 2
252; CHECK: %res = insertelement <4 x float> %res.upto2, float %res.i3, i32 3
253; CHECK: ret <4 x float> %res
254 %res = fadd <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>,
255 !fpmath !4
256 ret <4 x float> %res
257}
258
259; Check that random metadata isn't kept.
260define void @f7(<4 x i32> *%src, <4 x i32> *%dst) {
261; CHECK-LABEL: @f7(
262; CHECK-NOT: !foo
263; CHECK: ret void
264 %val = load <4 x i32> *%src, !foo !5
265 %add = add <4 x i32> %val, %val
266 store <4 x i32> %add, <4 x i32> *%dst, !foo !5
267 ret void
268}
269
270; Test GEP with vectors.
271define void @f8(<4 x float *> *%dest, <4 x float *> %ptr0, <4 x i32> %i0,
272 float *%other) {
273; CHECK-LABEL: @f8(
274; CHECK: %dest.i0 = bitcast <4 x float*>* %dest to float**
275; CHECK: %dest.i1 = getelementptr float** %dest.i0, i32 1
276; CHECK: %dest.i2 = getelementptr float** %dest.i0, i32 2
277; CHECK: %dest.i3 = getelementptr float** %dest.i0, i32 3
278; CHECK: %i0.i1 = extractelement <4 x i32> %i0, i32 1
279; CHECK: %i0.i3 = extractelement <4 x i32> %i0, i32 3
280; CHECK: %ptr0.i0 = extractelement <4 x float*> %ptr0, i32 0
281; CHECK: %val.i0 = getelementptr float* %ptr0.i0, i32 100
282; CHECK: %val.i1 = getelementptr float* %other, i32 %i0.i1
283; CHECK: %ptr0.i2 = extractelement <4 x float*> %ptr0, i32 2
284; CHECK: %val.i2 = getelementptr float* %ptr0.i2, i32 100
285; CHECK: %ptr0.i3 = extractelement <4 x float*> %ptr0, i32 3
286; CHECK: %val.i3 = getelementptr float* %ptr0.i3, i32 %i0.i3
287; CHECK: store float* %val.i0, float** %dest.i0, align 32
288; CHECK: store float* %val.i1, float** %dest.i1, align 8
289; CHECK: store float* %val.i2, float** %dest.i2, align 16
290; CHECK: store float* %val.i3, float** %dest.i3, align 8
291; CHECK: ret void
292 %i1 = insertelement <4 x i32> %i0, i32 100, i32 0
293 %i2 = insertelement <4 x i32> %i1, i32 100, i32 2
294 %ptr1 = insertelement <4 x float *> %ptr0, float *%other, i32 1
295 %val = getelementptr <4 x float *> %ptr1, <4 x i32> %i2
296 store <4 x float *> %val, <4 x float *> *%dest
297 ret void
298}
299
300; Test the handling of unaligned loads.
301define void @f9(<4 x float> *%dest, <4 x float> *%src) {
302; CHECK: @f9(
303; CHECK: %dest.i0 = bitcast <4 x float>* %dest to float*
304; CHECK: %dest.i1 = getelementptr float* %dest.i0, i32 1
305; CHECK: %dest.i2 = getelementptr float* %dest.i0, i32 2
306; CHECK: %dest.i3 = getelementptr float* %dest.i0, i32 3
307; CHECK: %src.i0 = bitcast <4 x float>* %src to float*
308; CHECK: %val.i0 = load float* %src.i0, align 4
309; CHECK: %src.i1 = getelementptr float* %src.i0, i32 1
310; CHECK: %val.i1 = load float* %src.i1, align 4
311; CHECK: %src.i2 = getelementptr float* %src.i0, i32 2
312; CHECK: %val.i2 = load float* %src.i2, align 4
313; CHECK: %src.i3 = getelementptr float* %src.i0, i32 3
314; CHECK: %val.i3 = load float* %src.i3, align 4
315; CHECK: store float %val.i0, float* %dest.i0, align 8
316; CHECK: store float %val.i1, float* %dest.i1, align 4
317; CHECK: store float %val.i2, float* %dest.i2, align 8
318; CHECK: store float %val.i3, float* %dest.i3, align 4
319; CHECK: ret void
320 %val = load <4 x float> *%src, align 4
321 store <4 x float> %val, <4 x float> *%dest, align 8
322 ret void
323}
324
325; ...and again with subelement alignment.
326define void @f10(<4 x float> *%dest, <4 x float> *%src) {
327; CHECK: @f10(
328; CHECK: %dest.i0 = bitcast <4 x float>* %dest to float*
329; CHECK: %dest.i1 = getelementptr float* %dest.i0, i32 1
330; CHECK: %dest.i2 = getelementptr float* %dest.i0, i32 2
331; CHECK: %dest.i3 = getelementptr float* %dest.i0, i32 3
332; CHECK: %src.i0 = bitcast <4 x float>* %src to float*
333; CHECK: %val.i0 = load float* %src.i0, align 1
334; CHECK: %src.i1 = getelementptr float* %src.i0, i32 1
335; CHECK: %val.i1 = load float* %src.i1, align 1
336; CHECK: %src.i2 = getelementptr float* %src.i0, i32 2
337; CHECK: %val.i2 = load float* %src.i2, align 1
338; CHECK: %src.i3 = getelementptr float* %src.i0, i32 3
339; CHECK: %val.i3 = load float* %src.i3, align 1
340; CHECK: store float %val.i0, float* %dest.i0, align 2
341; CHECK: store float %val.i1, float* %dest.i1, align 2
342; CHECK: store float %val.i2, float* %dest.i2, align 2
343; CHECK: store float %val.i3, float* %dest.i3, align 2
344; CHECK: ret void
345 %val = load <4 x float> *%src, align 1
346 store <4 x float> %val, <4 x float> *%dest, align 2
347 ret void
348}
349
350; Test that sub-byte loads aren't scalarized.
351define void @f11(<32 x i1> *%dest, <32 x i1> *%src0) {
352; CHECK: @f11(
353; CHECK: %val0 = load <32 x i1>* %src0
354; CHECK: %val1 = load <32 x i1>* %src1
355; CHECK: store <32 x i1> %and, <32 x i1>* %dest
356; CHECK: ret void
357 %src1 = getelementptr <32 x i1> *%src0, i32 1
358 %val0 = load <32 x i1> *%src0
359 %val1 = load <32 x i1> *%src1
360 %and = and <32 x i1> %val0, %val1
361 store <32 x i1> %and, <32 x i1> *%dest
362 ret void
363}
364
365; Test that variable inserts aren't scalarized.
366define void @f12(<4 x i32> *%dest, <4 x i32> *%src, i32 %index) {
367; CHECK: @f12(
368; CHECK: %val1 = insertelement <4 x i32> %val0, i32 1, i32 %index
369; CHECK-DAG: %val1.i0 = extractelement <4 x i32> %val1, i32 0
370; CHECK-DAG: %val1.i1 = extractelement <4 x i32> %val1, i32 1
371; CHECK-DAG: %val1.i2 = extractelement <4 x i32> %val1, i32 2
372; CHECK-DAG: %val1.i3 = extractelement <4 x i32> %val1, i32 3
373; CHECK-DAG: %val2.i0 = shl i32 1, %val1.i0
374; CHECK-DAG: %val2.i1 = shl i32 2, %val1.i1
375; CHECK-DAG: %val2.i2 = shl i32 3, %val1.i2
376; CHECK-DAG: %val2.i3 = shl i32 4, %val1.i3
377; CHECK: ret void
378 %val0 = load <4 x i32> *%src
379 %val1 = insertelement <4 x i32> %val0, i32 1, i32 %index
380 %val2 = shl <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %val1
381 store <4 x i32> %val2, <4 x i32> *%dest
382 ret void
383}
384
Richard Sandiford3548cbb2013-12-23 14:45:00 +0000385; Test vector GEPs with more than one index.
386define void @f13(<4 x float *> *%dest, <4 x [4 x float] *> %ptr, <4 x i32> %i,
387 float *%other) {
388; CHECK-LABEL: @f13(
389; CHECK: %dest.i0 = bitcast <4 x float*>* %dest to float**
390; CHECK: %dest.i1 = getelementptr float** %dest.i0, i32 1
391; CHECK: %dest.i2 = getelementptr float** %dest.i0, i32 2
392; CHECK: %dest.i3 = getelementptr float** %dest.i0, i32 3
393; CHECK: %i.i0 = extractelement <4 x i32> %i, i32 0
394; CHECK: %ptr.i0 = extractelement <4 x [4 x float]*> %ptr, i32 0
395; CHECK: %val.i0 = getelementptr inbounds [4 x float]* %ptr.i0, i32 0, i32 %i.i0
396; CHECK: %i.i1 = extractelement <4 x i32> %i, i32 1
397; CHECK: %ptr.i1 = extractelement <4 x [4 x float]*> %ptr, i32 1
398; CHECK: %val.i1 = getelementptr inbounds [4 x float]* %ptr.i1, i32 1, i32 %i.i1
399; CHECK: %i.i2 = extractelement <4 x i32> %i, i32 2
400; CHECK: %ptr.i2 = extractelement <4 x [4 x float]*> %ptr, i32 2
401; CHECK: %val.i2 = getelementptr inbounds [4 x float]* %ptr.i2, i32 2, i32 %i.i2
402; CHECK: %i.i3 = extractelement <4 x i32> %i, i32 3
403; CHECK: %ptr.i3 = extractelement <4 x [4 x float]*> %ptr, i32 3
404; CHECK: %val.i3 = getelementptr inbounds [4 x float]* %ptr.i3, i32 3, i32 %i.i3
405; CHECK: store float* %val.i0, float** %dest.i0, align 32
406; CHECK: store float* %val.i1, float** %dest.i1, align 8
407; CHECK: store float* %val.i2, float** %dest.i2, align 16
408; CHECK: store float* %val.i3, float** %dest.i3, align 8
409; CHECK: ret void
410 %val = getelementptr inbounds <4 x [4 x float] *> %ptr,
411 <4 x i32> <i32 0, i32 1, i32 2, i32 3>,
412 <4 x i32> %i
413 store <4 x float *> %val, <4 x float *> *%dest
414 ret void
415}
416
Richard Sandiford1fb5c132013-12-23 14:51:56 +0000417; Test combinations of vector and non-vector PHIs.
418define <4 x float> @f14(<4 x float> %acc, i32 %count) {
419; CHECK-LABEL: @f14(
420; CHECK: %this_acc.i0 = phi float [ %acc.i0, %entry ], [ %next_acc.i0, %loop ]
421; CHECK: %this_acc.i1 = phi float [ %acc.i1, %entry ], [ %next_acc.i1, %loop ]
422; CHECK: %this_acc.i2 = phi float [ %acc.i2, %entry ], [ %next_acc.i2, %loop ]
423; CHECK: %this_acc.i3 = phi float [ %acc.i3, %entry ], [ %next_acc.i3, %loop ]
424; CHECK: %this_count = phi i32 [ %count, %entry ], [ %next_count, %loop ]
425; CHECK: %this_acc.upto0 = insertelement <4 x float> undef, float %this_acc.i0, i32 0
426; CHECK: %this_acc.upto1 = insertelement <4 x float> %this_acc.upto0, float %this_acc.i1, i32 1
427; CHECK: %this_acc.upto2 = insertelement <4 x float> %this_acc.upto1, float %this_acc.i2, i32 2
428; CHECK: %this_acc = insertelement <4 x float> %this_acc.upto2, float %this_acc.i3, i32 3
429; CHECK: ret <4 x float> %next_acc
430entry:
431 br label %loop
432
433loop:
434 %this_acc = phi <4 x float> [ %acc, %entry ], [ %next_acc, %loop ]
435 %this_count = phi i32 [ %count, %entry ], [ %next_count, %loop ]
436 %foo = call <4 x float> @ext(<4 x float> %this_acc)
437 %next_acc = fadd <4 x float> %this_acc, %foo
438 %next_count = sub i32 %this_count, 1
439 %cmp = icmp eq i32 %next_count, 0
440 br i1 %cmp, label %loop, label %exit
441
442exit:
443 ret <4 x float> %next_acc
444}
445
Richard Sandiford8ee1b772013-11-22 16:58:05 +0000446!0 = metadata !{ metadata !"root" }
447!1 = metadata !{ metadata !"set1", metadata !0 }
448!2 = metadata !{ metadata !"set2", metadata !0 }
449!3 = metadata !{ metadata !3 }
450!4 = metadata !{ float 4.0 }
451!5 = metadata !{ i64 0, i64 8, null }