blob: 3a9d1bbf68b13eafa6ade5acb0f5a4a414fbcca7 [file] [log] [blame]
Elena Demikhovsky376a18b2016-07-24 07:24:54 +00001; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck --check-prefix VEC4_INTERL1 %s
2; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -dce -instcombine -S | FileCheck --check-prefix VEC4_INTERL2 %s
3; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=1 -dce -instcombine -S | FileCheck --check-prefix VEC1_INTERL2 %s
Matthew Simpson5ef66ef2017-02-22 19:09:38 +00004; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -dce -simplifycfg -instcombine -S | FileCheck --check-prefix VEC2_INTERL1_PRED_STORE %s
Elena Demikhovsky376a18b2016-07-24 07:24:54 +00005
Elena Demikhovsky376a18b2016-07-24 07:24:54 +00006@fp_inc = common global float 0.000000e+00, align 4
7
8;void fp_iv_loop1(float init, float * __restrict__ A, int N) {
9; float x = init;
10; for (int i=0; i < N; ++i) {
11; A[i] = x;
12; x -= fp_inc;
13; }
14;}
15
Matthew Simpson835b2462017-02-22 21:56:02 +000016; VEC4_INTERL1-LABEL: @fp_iv_loop1(
17; VEC4_INTERL1: vector.ph:
18; VEC4_INTERL1-NEXT: br label %vector.body
19; VEC4_INTERL1: vector.body:
20; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
21; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = sitofp i64 [[INDEX]] to float
22; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = fmul fast float %fpinc, [[TMP5]]
23; VEC4_INTERL1-NEXT: [[FP_OFFSET_IDX:%.*]] = fsub fast float %init, [[TMP6]]
24; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x float> undef, float [[FP_OFFSET_IDX]], i32 0
25; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT3]], <4 x float> undef, <4 x i32> zeroinitializer
26; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %fpinc, i32 0
27; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
28; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = fmul fast <4 x float> [[DOTSPLAT]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
29; VEC4_INTERL1-NEXT: [[INDUCTION5:%.*]] = fsub fast <4 x float> [[BROADCAST_SPLAT4]], [[TMP7]]
30; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* %A, i64 [[INDEX]]
31; VEC4_INTERL1-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>*
32; VEC4_INTERL1-NEXT: store <4 x float> [[INDUCTION5]], <4 x float>* [[TMP9]], align 4
33; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
34; VEC4_INTERL1: br i1 {{.*}}, label %middle.block, label %vector.body
35
36; VEC4_INTERL2-LABEL: @fp_iv_loop1(
37; VEC4_INTERL2: vector.ph:
38; VEC4_INTERL2-NEXT: br label %vector.body
39; VEC4_INTERL2: vector.body:
40; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
41; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = sitofp i64 [[INDEX]] to float
42; VEC4_INTERL2-NEXT: [[TMP6:%.*]] = fmul fast float %fpinc, [[TMP5]]
43; VEC4_INTERL2-NEXT: [[FP_OFFSET_IDX:%.*]] = fsub fast float %init, [[TMP6]]
44; VEC4_INTERL2-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x float> undef, float [[FP_OFFSET_IDX]], i32 0
45; VEC4_INTERL2-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT4]], <4 x float> undef, <4 x i32> zeroinitializer
46; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %fpinc, i32 0
47; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
48; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = fmul fast <4 x float> [[DOTSPLAT]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
49; VEC4_INTERL2-NEXT: [[INDUCTION6:%.*]] = fsub fast <4 x float> [[BROADCAST_SPLAT5]], [[TMP7]]
50; VEC4_INTERL2-NEXT: [[TMP8:%.*]] = fmul fast <4 x float> [[DOTSPLAT]], <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>
51; VEC4_INTERL2-NEXT: [[INDUCTION9:%.*]] = fsub fast <4 x float> [[BROADCAST_SPLAT5]], [[TMP8]]
52; VEC4_INTERL2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* %A, i64 [[INDEX]]
53; VEC4_INTERL2-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to <4 x float>*
54; VEC4_INTERL2-NEXT: store <4 x float> [[INDUCTION6]], <4 x float>* [[TMP10]], align 4
55; VEC4_INTERL2-NEXT: [[TMP11:%.*]] = getelementptr float, float* [[TMP9]], i64 4
56; VEC4_INTERL2-NEXT: [[TMP12:%.*]] = bitcast float* [[TMP11]] to <4 x float>*
57; VEC4_INTERL2-NEXT: store <4 x float> [[INDUCTION9]], <4 x float>* [[TMP12]], align 4
58; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
59; VEC4_INTERL2: br i1 {{.*}}, label %middle.block, label %vector.body
60
61; VEC1_INTERL2-LABEL: @fp_iv_loop1(
62; VEC1_INTERL2: vector.ph:
63; VEC1_INTERL2-NEXT: br label %vector.body
64; VEC1_INTERL2: vector.body:
65; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
66; VEC1_INTERL2-NEXT: [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1
67; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = sitofp i64 [[INDEX]] to float
68; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = fmul fast float %fpinc, [[TMP6]]
69; VEC1_INTERL2-NEXT: [[FP_OFFSET_IDX:%.*]] = fsub fast float %init, [[TMP7]]
70; VEC1_INTERL2-NEXT: [[TMP8:%.*]] = fsub fast float [[FP_OFFSET_IDX]], %fpinc
71; VEC1_INTERL2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* %A, i64 [[INDEX]]
72; VEC1_INTERL2-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* %A, i64 [[INDUCTION2]]
73; VEC1_INTERL2-NEXT: store float [[FP_OFFSET_IDX]], float* [[TMP9]], align 4
74; VEC1_INTERL2-NEXT: store float [[TMP8]], float* [[TMP10]], align 4
75; VEC1_INTERL2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
76; VEC1_INTERL2: br i1 {{.*}}, label %middle.block, label %vector.body
77
Elena Demikhovsky376a18b2016-07-24 07:24:54 +000078define void @fp_iv_loop1(float %init, float* noalias nocapture %A, i32 %N) #1 {
79entry:
80 %cmp4 = icmp sgt i32 %N, 0
81 br i1 %cmp4, label %for.body.lr.ph, label %for.end
82
83for.body.lr.ph: ; preds = %entry
84 %fpinc = load float, float* @fp_inc, align 4
85 br label %for.body
86
87for.body: ; preds = %for.body, %for.body.lr.ph
88 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
89 %x.05 = phi float [ %init, %for.body.lr.ph ], [ %add, %for.body ]
90 %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
91 store float %x.05, float* %arrayidx, align 4
92 %add = fsub fast float %x.05, %fpinc
93 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
94 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
95 %exitcond = icmp eq i32 %lftr.wideiv, %N
96 br i1 %exitcond, label %for.end.loopexit, label %for.body
97
98for.end.loopexit: ; preds = %for.body
99 br label %for.end
100
101for.end: ; preds = %for.end.loopexit, %entry
102 ret void
103}
104
105;void fp_iv_loop2(float init, float * __restrict__ A, int N) {
106; float x = init;
107; for (int i=0; i < N; ++i) {
108; A[i] = x;
109; x += 0.5;
110; }
111;}
112
113; VEC4_INTERL1-LABEL: @fp_iv_loop2(
Matthew Simpson835b2462017-02-22 21:56:02 +0000114; VEC4_INTERL1: vector.ph:
115; VEC4_INTERL1-NEXT: br label %vector.body
116; VEC4_INTERL1: vector.body:
117; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
118; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = sitofp i64 [[INDEX]] to float
119; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = fmul fast float [[TMP5]], 5.000000e-01
120; VEC4_INTERL1-NEXT: [[FP_OFFSET_IDX:%.*]] = fadd fast float [[TMP6]], %init
121; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x float> undef, float [[FP_OFFSET_IDX]], i32 0
122; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT3]], <4 x float> undef, <4 x i32> zeroinitializer
123; VEC4_INTERL1-NEXT: [[INDUCTION5:%.*]] = fadd fast <4 x float> [[BROADCAST_SPLAT4]], <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float 1.500000e+00>
124; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* %A, i64 [[INDEX]]
125; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = bitcast float* [[TMP7]] to <4 x float>*
126; VEC4_INTERL1-NEXT: store <4 x float> [[INDUCTION5]], <4 x float>* [[TMP8]], align 4
127; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
128; VEC4_INTERL1: br i1 {{.*}}, label %middle.block, label %vector.body
Elena Demikhovsky376a18b2016-07-24 07:24:54 +0000129
130define void @fp_iv_loop2(float %init, float* noalias nocapture %A, i32 %N) #0 {
131entry:
132 %cmp4 = icmp sgt i32 %N, 0
133 br i1 %cmp4, label %for.body.preheader, label %for.end
134
135for.body.preheader: ; preds = %entry
136 br label %for.body
137
138for.body: ; preds = %for.body.preheader, %for.body
139 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
140 %x.06 = phi float [ %conv1, %for.body ], [ %init, %for.body.preheader ]
141 %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
142 store float %x.06, float* %arrayidx, align 4
143 %conv1 = fadd fast float %x.06, 5.000000e-01
144 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
145 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
146 %exitcond = icmp eq i32 %lftr.wideiv, %N
147 br i1 %exitcond, label %for.end.loopexit, label %for.body
148
149for.end.loopexit: ; preds = %for.body
150 br label %for.end
151
152for.end: ; preds = %for.end.loopexit, %entry
153 ret void
154}
155
156;void fp_iv_loop3(float init, float * __restrict__ A, float * __restrict__ B, float * __restrict__ C, int N) {
157; int i = 0;
158; float x = init;
159; float y = 0.1;
160; for (; i < N; ++i) {
161; A[i] = x;
162; x += fp_inc;
163; y -= 0.5;
164; B[i] = x + y;
165; C[i] = y;
166; }
167;}
Matthew Simpson835b2462017-02-22 21:56:02 +0000168
Elena Demikhovsky376a18b2016-07-24 07:24:54 +0000169; VEC4_INTERL1-LABEL: @fp_iv_loop3(
Matthew Simpson835b2462017-02-22 21:56:02 +0000170; VEC4_INTERL1: for.body.lr.ph:
171; VEC4_INTERL1: [[TMP0:%.*]] = load float, float* @fp_inc, align 4
172; VEC4_INTERL1: vector.ph:
173; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x float> undef, float [[TMP0]], i32 0
174; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT14]], <4 x float> undef, <4 x i32> zeroinitializer
175; VEC4_INTERL1-NEXT: br label %vector.body
176; VEC4_INTERL1: vector.body:
177; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
178; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = sitofp i64 [[INDEX]] to float
179; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = fmul fast float [[TMP7]], -5.000000e-01
180; VEC4_INTERL1-NEXT: [[FP_OFFSET_IDX:%.*]] = fadd fast float [[TMP8]], 0x3FB99999A0000000
181; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x float> undef, float [[FP_OFFSET_IDX]], i32 0
182; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT6]], <4 x float> undef, <4 x i32> zeroinitializer
183; VEC4_INTERL1-NEXT: [[TMP9:%.*]] = sitofp i64 [[INDEX]] to float
184; VEC4_INTERL1-NEXT: [[TMP10:%.*]] = fmul fast float [[TMP0]], [[TMP9]]
185; VEC4_INTERL1-NEXT: [[FP_OFFSET_IDX10:%.*]] = fadd fast float [[TMP10]], %init
186; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <4 x float> undef, float [[FP_OFFSET_IDX10]], i32 0
187; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT12:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT11]], <4 x float> undef, <4 x i32> zeroinitializer
188; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[TMP0]], i32 0
189; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
190; VEC4_INTERL1-NEXT: [[TMP11:%.*]] = fmul fast <4 x float> [[DOTSPLAT]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
191; VEC4_INTERL1-NEXT: [[INDUCTION13:%.*]] = fadd fast <4 x float> [[BROADCAST_SPLAT12]], [[TMP11]]
192; VEC4_INTERL1-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, float* %A, i64 [[INDEX]]
193; VEC4_INTERL1-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP12]] to <4 x float>*
194; VEC4_INTERL1-NEXT: store <4 x float> [[INDUCTION13]], <4 x float>* [[TMP13]], align 4
195; VEC4_INTERL1-NEXT: [[TMP14:%.*]] = fadd fast <4 x float> [[INDUCTION13]], [[BROADCAST_SPLAT15]]
196; VEC4_INTERL1-NEXT: [[TMP15:%.*]] = fadd fast <4 x float> [[BROADCAST_SPLAT7]], <float -5.000000e-01, float -1.000000e+00, float -1.500000e+00, float -2.000000e+00>
197; VEC4_INTERL1-NEXT: [[TMP16:%.*]] = fadd fast <4 x float> [[TMP15]], [[TMP14]]
198; VEC4_INTERL1-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, float* %B, i64 [[INDEX]]
199; VEC4_INTERL1-NEXT: [[TMP18:%.*]] = bitcast float* [[TMP17]] to <4 x float>*
200; VEC4_INTERL1-NEXT: store <4 x float> [[TMP16]], <4 x float>* [[TMP18]], align 4
201; VEC4_INTERL1-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, float* %C, i64 [[INDEX]]
202; VEC4_INTERL1-NEXT: [[TMP20:%.*]] = bitcast float* [[TMP19]] to <4 x float>*
203; VEC4_INTERL1-NEXT: store <4 x float> [[TMP15]], <4 x float>* [[TMP20]], align 4
204; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
205; VEC4_INTERL1: br i1 {{.*}}, label %middle.block, label %vector.body
Elena Demikhovsky376a18b2016-07-24 07:24:54 +0000206
207define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalias nocapture %B, float* noalias nocapture %C, i32 %N) #1 {
208entry:
209 %cmp9 = icmp sgt i32 %N, 0
210 br i1 %cmp9, label %for.body.lr.ph, label %for.end
211
212for.body.lr.ph: ; preds = %entry
213 %0 = load float, float* @fp_inc, align 4
214 br label %for.body
215
216for.body: ; preds = %for.body, %for.body.lr.ph
217 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
218 %y.012 = phi float [ 0x3FB99999A0000000, %for.body.lr.ph ], [ %conv1, %for.body ]
219 %x.011 = phi float [ %init, %for.body.lr.ph ], [ %add, %for.body ]
220 %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
221 store float %x.011, float* %arrayidx, align 4
222 %add = fadd fast float %x.011, %0
223 %conv1 = fadd fast float %y.012, -5.000000e-01
224 %add2 = fadd fast float %conv1, %add
225 %arrayidx4 = getelementptr inbounds float, float* %B, i64 %indvars.iv
226 store float %add2, float* %arrayidx4, align 4
227 %arrayidx6 = getelementptr inbounds float, float* %C, i64 %indvars.iv
228 store float %conv1, float* %arrayidx6, align 4
229 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
230 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
231 %exitcond = icmp eq i32 %lftr.wideiv, %N
232 br i1 %exitcond, label %for.end.loopexit, label %for.body
233
234for.end.loopexit:
235 br label %for.end
236
237for.end:
238 ret void
239}
240
241; Start and step values are constants. There is no 'fmul' operation in this case
242;void fp_iv_loop4(float * __restrict__ A, int N) {
243; float x = 1.0;
244; for (int i=0; i < N; ++i) {
245; A[i] = x;
246; x += 0.5;
247; }
248;}
249
250; VEC4_INTERL1-LABEL: @fp_iv_loop4(
Matthew Simpson835b2462017-02-22 21:56:02 +0000251; VEC4_INTERL1: vector.ph:
252; VEC4_INTERL1-NEXT: br label %vector.body
253; VEC4_INTERL1: vector.body:
254; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
255; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = sitofp i64 [[INDEX]] to float
256; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = fmul fast float [[TMP5]], 5.000000e-01
257; VEC4_INTERL1-NEXT: [[FP_OFFSET_IDX:%.*]] = fadd fast float [[TMP6]], 1.000000e+00
258; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x float> undef, float [[FP_OFFSET_IDX]], i32 0
259; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT3]], <4 x float> undef, <4 x i32> zeroinitializer
260; VEC4_INTERL1-NEXT: [[INDUCTION5:%.*]] = fadd fast <4 x float> [[BROADCAST_SPLAT4]], <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float 1.500000e+00>
261; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* %A, i64 [[INDEX]]
262; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = bitcast float* [[TMP7]] to <4 x float>*
263; VEC4_INTERL1-NEXT: store <4 x float> [[INDUCTION5]], <4 x float>* [[TMP8]], align 4
264; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
265; VEC4_INTERL1: br i1 {{.*}}, label %middle.block, label %vector.body
Elena Demikhovsky376a18b2016-07-24 07:24:54 +0000266
267define void @fp_iv_loop4(float* noalias nocapture %A, i32 %N) {
268entry:
269 %cmp4 = icmp sgt i32 %N, 0
270 br i1 %cmp4, label %for.body.preheader, label %for.end
271
272for.body.preheader: ; preds = %entry
273 br label %for.body
274
275for.body: ; preds = %for.body.preheader, %for.body
276 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
277 %x.06 = phi float [ %conv1, %for.body ], [ 1.000000e+00, %for.body.preheader ]
278 %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
279 store float %x.06, float* %arrayidx, align 4
280 %conv1 = fadd fast float %x.06, 5.000000e-01
281 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
282 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
283 %exitcond = icmp eq i32 %lftr.wideiv, %N
284 br i1 %exitcond, label %for.end.loopexit, label %for.body
285
286for.end.loopexit: ; preds = %for.body
287 br label %for.end
288
289for.end: ; preds = %for.end.loopexit, %entry
290 ret void
291}
Matthew Simpson5ef66ef2017-02-22 19:09:38 +0000292
293; VEC2_INTERL1_PRED_STORE-LABEL: @non_primary_iv_float_scalar(
294; VEC2_INTERL1_PRED_STORE: vector.body:
295; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ], [ 0, %min.iters.checked ]
296; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = sitofp i64 [[INDEX]] to float
297; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0
298; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT3]], <2 x float> undef, <2 x i32> zeroinitializer
299; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION5:%.*]] = fadd fast <2 x float> [[BROADCAST_SPLAT4]], <float 0.000000e+00, float 1.000000e+00>
300; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* %A, i64 [[INDEX]]
301; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <2 x float>*
302; VEC2_INTERL1_PRED_STORE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
303; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = fcmp fast oeq <2 x float> [[WIDE_LOAD]], zeroinitializer
304; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
305; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
306; VEC2_INTERL1_PRED_STORE: [[PRED_STORE_IF]]:
307; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[INDUCTION5]], i32 0
308; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* %A, i64 [[INDEX]]
309; VEC2_INTERL1_PRED_STORE-NEXT: store float [[TMP6]], float* [[TMP7]], align 4
310; VEC2_INTERL1_PRED_STORE-NEXT: br label %[[PRED_STORE_CONTINUE]]
311; VEC2_INTERL1_PRED_STORE: [[PRED_STORE_CONTINUE]]:
312; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
313; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7]]
314; VEC2_INTERL1_PRED_STORE: [[PRED_STORE_IF6]]:
315; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[INDUCTION5]], i32 1
316; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP10:%.*]] = or i64 [[INDEX]], 1
317; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, float* %A, i64 [[TMP10]]
318; VEC2_INTERL1_PRED_STORE-NEXT: store float [[TMP9]], float* [[TMP11]], align 4
319; VEC2_INTERL1_PRED_STORE-NEXT: br label %[[PRED_STORE_CONTINUE7]]
320; VEC2_INTERL1_PRED_STORE: [[PRED_STORE_CONTINUE7]]:
321; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
322; VEC2_INTERL1_PRED_STORE: br i1 {{.*}}, label %middle.block, label %vector.body
323
324define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
325entry:
326 br label %for.body
327
328for.body:
329 %i = phi i64 [ %i.next, %for.inc ], [ 0, %entry ]
330 %j = phi float [ %j.next, %for.inc ], [ 0.0, %entry ]
331 %tmp0 = getelementptr inbounds float, float* %A, i64 %i
332 %tmp1 = load float, float* %tmp0, align 4
333 %tmp2 = fcmp fast oeq float %tmp1, 0.0
334 br i1 %tmp2, label %if.pred, label %for.inc
335
336if.pred:
337 store float %j, float* %tmp0, align 4
338 br label %for.inc
339
340for.inc:
341 %i.next = add nuw nsw i64 %i, 1
342 %j.next = fadd fast float %j, 1.0
343 %cond = icmp slt i64 %i.next, %N
344 br i1 %cond, label %for.body, label %for.end
345
346for.end:
347 ret void
348}