blob: 837f41d753f64c37f93cf9a7db22a05bbb97555f [file] [log] [blame]
Elena Demikhovsky376a18b2016-07-24 07:24:54 +00001; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck --check-prefix VEC4_INTERL1 %s
2; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -dce -instcombine -S | FileCheck --check-prefix VEC4_INTERL2 %s
3; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=1 -dce -instcombine -S | FileCheck --check-prefix VEC1_INTERL2 %s
Matthew Simpson5ef66ef2017-02-22 19:09:38 +00004; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -dce -simplifycfg -instcombine -S | FileCheck --check-prefix VEC2_INTERL1_PRED_STORE %s
Elena Demikhovsky376a18b2016-07-24 07:24:54 +00005
6; VEC4_INTERL1-LABEL: @fp_iv_loop1(
7; VEC4_INTERL1: %[[FP_INC:.*]] = load float, float* @fp_inc
8; VEC4_INTERL1: vector.body:
9; VEC4_INTERL1: %[[FP_INDEX:.*]] = sitofp i64 {{.*}} to float
10; VEC4_INTERL1: %[[VEC_INCR:.*]] = fmul fast float {{.*}}, %[[FP_INDEX]]
11; VEC4_INTERL1: %[[FP_OFFSET_IDX:.*]] = fsub fast float %init, %[[VEC_INCR]]
12; VEC4_INTERL1: %[[BRCT_INSERT:.*]] = insertelement <4 x float> undef, float %[[FP_OFFSET_IDX]], i32 0
13; VEC4_INTERL1-NEXT: %[[BRCT_SPLAT:.*]] = shufflevector <4 x float> %[[BRCT_INSERT]], <4 x float> undef, <4 x i32> zeroinitializer
14; VEC4_INTERL1: %[[BRCT_INSERT:.*]] = insertelement {{.*}} %[[FP_INC]]
15; VEC4_INTERL1-NEXT: %[[FP_INC_BCST:.*]] = shufflevector <4 x float> %[[BRCT_INSERT]], {{.*}} zeroinitializer
16; VEC4_INTERL1: %[[VSTEP:.*]] = fmul fast <4 x float> %[[FP_INC_BCST]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
17; VEC4_INTERL1-NEXT: %[[VEC_INDUCTION:.*]] = fsub fast <4 x float> %[[BRCT_SPLAT]], %[[VSTEP]]
18; VEC4_INTERL1: store <4 x float> %[[VEC_INDUCTION]]
19
20; VEC4_INTERL2-LABEL: @fp_iv_loop1(
21; VEC4_INTERL2: %[[FP_INC:.*]] = load float, float* @fp_inc
22; VEC4_INTERL2: vector.body:
23; VEC4_INTERL2: %[[INDEX:.*]] = sitofp i64 {{.*}} to float
24; VEC4_INTERL2: %[[VEC_INCR:.*]] = fmul fast float %{{.*}}, %[[INDEX]]
25; VEC4_INTERL2: fsub fast float %init, %[[VEC_INCR]]
26; VEC4_INTERL2: %[[VSTEP1:.*]] = fmul fast <4 x float> %{{.*}}, <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
27; VEC4_INTERL2-NEXT: %[[VEC_INDUCTION1:.*]] = fsub fast <4 x float> {{.*}}, %[[VSTEP1]]
28; VEC4_INTERL2: %[[VSTEP2:.*]] = fmul fast <4 x float> %{{.*}}, <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>
29; VEC4_INTERL2-NEXT: %[[VEC_INDUCTION2:.*]] = fsub fast <4 x float> {{.*}}, %[[VSTEP2]]
30; VEC4_INTERL2: store <4 x float> %[[VEC_INDUCTION1]]
31; VEC4_INTERL2: store <4 x float> %[[VEC_INDUCTION2]]
32
33; VEC1_INTERL2-LABEL: @fp_iv_loop1(
34; VEC1_INTERL2: %[[FP_INC:.*]] = load float, float* @fp_inc
35; VEC1_INTERL2: vector.body:
36; VEC1_INTERL2: %[[INDEX:.*]] = sitofp i64 {{.*}} to float
37; VEC1_INTERL2: %[[STEP:.*]] = fmul fast float %{{.*}}, %[[INDEX]]
38; VEC1_INTERL2: %[[FP_OFFSET_IDX:.*]] = fsub fast float %init, %[[STEP]]
39; VEC1_INTERL2: %[[SCALAR_INDUCTION2:.*]] = fsub fast float %[[FP_OFFSET_IDX]], %[[FP_INC]]
40; VEC1_INTERL2: store float %[[FP_OFFSET_IDX]]
41; VEC1_INTERL2: store float %[[SCALAR_INDUCTION2]]
42
43@fp_inc = common global float 0.000000e+00, align 4
44
45;void fp_iv_loop1(float init, float * __restrict__ A, int N) {
46; float x = init;
47; for (int i=0; i < N; ++i) {
48; A[i] = x;
49; x -= fp_inc;
50; }
51;}
52
53define void @fp_iv_loop1(float %init, float* noalias nocapture %A, i32 %N) #1 {
54entry:
55 %cmp4 = icmp sgt i32 %N, 0
56 br i1 %cmp4, label %for.body.lr.ph, label %for.end
57
58for.body.lr.ph: ; preds = %entry
59 %fpinc = load float, float* @fp_inc, align 4
60 br label %for.body
61
62for.body: ; preds = %for.body, %for.body.lr.ph
63 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
64 %x.05 = phi float [ %init, %for.body.lr.ph ], [ %add, %for.body ]
65 %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
66 store float %x.05, float* %arrayidx, align 4
67 %add = fsub fast float %x.05, %fpinc
68 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
69 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
70 %exitcond = icmp eq i32 %lftr.wideiv, %N
71 br i1 %exitcond, label %for.end.loopexit, label %for.body
72
73for.end.loopexit: ; preds = %for.body
74 br label %for.end
75
76for.end: ; preds = %for.end.loopexit, %entry
77 ret void
78}
79
80;void fp_iv_loop2(float init, float * __restrict__ A, int N) {
81; float x = init;
82; for (int i=0; i < N; ++i) {
83; A[i] = x;
84; x += 0.5;
85; }
86;}
87
88; VEC4_INTERL1-LABEL: @fp_iv_loop2(
89; VEC4_INTERL1: vector.body
90; VEC4_INTERL1: %[[index:.*]] = phi i64 [ 0, %vector.ph ]
91; VEC4_INTERL1: sitofp i64 %[[index]] to float
92; VEC4_INTERL1: %[[VAR1:.*]] = fmul fast float {{.*}}, 5.000000e-01
93; VEC4_INTERL1: %[[VAR2:.*]] = fadd fast float %[[VAR1]]
94; VEC4_INTERL1: insertelement <4 x float> undef, float %[[VAR2]], i32 0
95; VEC4_INTERL1: shufflevector <4 x float> {{.*}}, <4 x float> undef, <4 x i32> zeroinitializer
96; VEC4_INTERL1: fadd fast <4 x float> {{.*}}, <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float 1.500000e+00>
97; VEC4_INTERL1: store <4 x float>
98
99define void @fp_iv_loop2(float %init, float* noalias nocapture %A, i32 %N) #0 {
100entry:
101 %cmp4 = icmp sgt i32 %N, 0
102 br i1 %cmp4, label %for.body.preheader, label %for.end
103
104for.body.preheader: ; preds = %entry
105 br label %for.body
106
107for.body: ; preds = %for.body.preheader, %for.body
108 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
109 %x.06 = phi float [ %conv1, %for.body ], [ %init, %for.body.preheader ]
110 %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
111 store float %x.06, float* %arrayidx, align 4
112 %conv1 = fadd fast float %x.06, 5.000000e-01
113 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
114 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
115 %exitcond = icmp eq i32 %lftr.wideiv, %N
116 br i1 %exitcond, label %for.end.loopexit, label %for.body
117
118for.end.loopexit: ; preds = %for.body
119 br label %for.end
120
121for.end: ; preds = %for.end.loopexit, %entry
122 ret void
123}
124
125;void fp_iv_loop3(float init, float * __restrict__ A, float * __restrict__ B, float * __restrict__ C, int N) {
126; int i = 0;
127; float x = init;
128; float y = 0.1;
129; for (; i < N; ++i) {
130; A[i] = x;
131; x += fp_inc;
132; y -= 0.5;
133; B[i] = x + y;
134; C[i] = y;
135; }
136;}
137; VEC4_INTERL1-LABEL: @fp_iv_loop3(
138; VEC4_INTERL1: vector.body
139; VEC4_INTERL1: %[[index:.*]] = phi i64 [ 0, %vector.ph ]
140; VEC4_INTERL1: sitofp i64 %[[index]] to float
141; VEC4_INTERL1: %[[VAR1:.*]] = fmul fast float {{.*}}, -5.000000e-01
142; VEC4_INTERL1: fadd fast float %[[VAR1]]
143; VEC4_INTERL1: fadd fast <4 x float> {{.*}}, <float -5.000000e-01, float -1.000000e+00, float -1.500000e+00, float -2.000000e+00>
144; VEC4_INTERL1: store <4 x float>
145
146define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalias nocapture %B, float* noalias nocapture %C, i32 %N) #1 {
147entry:
148 %cmp9 = icmp sgt i32 %N, 0
149 br i1 %cmp9, label %for.body.lr.ph, label %for.end
150
151for.body.lr.ph: ; preds = %entry
152 %0 = load float, float* @fp_inc, align 4
153 br label %for.body
154
155for.body: ; preds = %for.body, %for.body.lr.ph
156 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
157 %y.012 = phi float [ 0x3FB99999A0000000, %for.body.lr.ph ], [ %conv1, %for.body ]
158 %x.011 = phi float [ %init, %for.body.lr.ph ], [ %add, %for.body ]
159 %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
160 store float %x.011, float* %arrayidx, align 4
161 %add = fadd fast float %x.011, %0
162 %conv1 = fadd fast float %y.012, -5.000000e-01
163 %add2 = fadd fast float %conv1, %add
164 %arrayidx4 = getelementptr inbounds float, float* %B, i64 %indvars.iv
165 store float %add2, float* %arrayidx4, align 4
166 %arrayidx6 = getelementptr inbounds float, float* %C, i64 %indvars.iv
167 store float %conv1, float* %arrayidx6, align 4
168 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
169 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
170 %exitcond = icmp eq i32 %lftr.wideiv, %N
171 br i1 %exitcond, label %for.end.loopexit, label %for.body
172
173for.end.loopexit:
174 br label %for.end
175
176for.end:
177 ret void
178}
179
180; Start and step values are constants. There is no 'fmul' operation in this case
181;void fp_iv_loop4(float * __restrict__ A, int N) {
182; float x = 1.0;
183; for (int i=0; i < N; ++i) {
184; A[i] = x;
185; x += 0.5;
186; }
187;}
188
189; VEC4_INTERL1-LABEL: @fp_iv_loop4(
190; VEC4_INTERL1: vector.body
191; VEC4_INTERL1-NOT: fmul fast <4 x float>
192; VEC4_INTERL1: %[[induction:.*]] = fadd fast <4 x float> %{{.*}}, <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float 1.500000e+00>
193; VEC4_INTERL1: store <4 x float> %[[induction]]
194
195define void @fp_iv_loop4(float* noalias nocapture %A, i32 %N) {
196entry:
197 %cmp4 = icmp sgt i32 %N, 0
198 br i1 %cmp4, label %for.body.preheader, label %for.end
199
200for.body.preheader: ; preds = %entry
201 br label %for.body
202
203for.body: ; preds = %for.body.preheader, %for.body
204 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
205 %x.06 = phi float [ %conv1, %for.body ], [ 1.000000e+00, %for.body.preheader ]
206 %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
207 store float %x.06, float* %arrayidx, align 4
208 %conv1 = fadd fast float %x.06, 5.000000e-01
209 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
210 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
211 %exitcond = icmp eq i32 %lftr.wideiv, %N
212 br i1 %exitcond, label %for.end.loopexit, label %for.body
213
214for.end.loopexit: ; preds = %for.body
215 br label %for.end
216
217for.end: ; preds = %for.end.loopexit, %entry
218 ret void
219}
Matthew Simpson5ef66ef2017-02-22 19:09:38 +0000220
221; VEC2_INTERL1_PRED_STORE-LABEL: @non_primary_iv_float_scalar(
222; VEC2_INTERL1_PRED_STORE: vector.body:
223; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ], [ 0, %min.iters.checked ]
224; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = sitofp i64 [[INDEX]] to float
225; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0
226; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT3]], <2 x float> undef, <2 x i32> zeroinitializer
227; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION5:%.*]] = fadd fast <2 x float> [[BROADCAST_SPLAT4]], <float 0.000000e+00, float 1.000000e+00>
228; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* %A, i64 [[INDEX]]
229; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <2 x float>*
230; VEC2_INTERL1_PRED_STORE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
231; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = fcmp fast oeq <2 x float> [[WIDE_LOAD]], zeroinitializer
232; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
233; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
234; VEC2_INTERL1_PRED_STORE: [[PRED_STORE_IF]]:
235; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[INDUCTION5]], i32 0
236; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* %A, i64 [[INDEX]]
237; VEC2_INTERL1_PRED_STORE-NEXT: store float [[TMP6]], float* [[TMP7]], align 4
238; VEC2_INTERL1_PRED_STORE-NEXT: br label %[[PRED_STORE_CONTINUE]]
239; VEC2_INTERL1_PRED_STORE: [[PRED_STORE_CONTINUE]]:
240; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
241; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7]]
242; VEC2_INTERL1_PRED_STORE: [[PRED_STORE_IF6]]:
243; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[INDUCTION5]], i32 1
244; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP10:%.*]] = or i64 [[INDEX]], 1
245; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, float* %A, i64 [[TMP10]]
246; VEC2_INTERL1_PRED_STORE-NEXT: store float [[TMP9]], float* [[TMP11]], align 4
247; VEC2_INTERL1_PRED_STORE-NEXT: br label %[[PRED_STORE_CONTINUE7]]
248; VEC2_INTERL1_PRED_STORE: [[PRED_STORE_CONTINUE7]]:
249; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
250; VEC2_INTERL1_PRED_STORE: br i1 {{.*}}, label %middle.block, label %vector.body
251
252define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
253entry:
254 br label %for.body
255
256for.body:
257 %i = phi i64 [ %i.next, %for.inc ], [ 0, %entry ]
258 %j = phi float [ %j.next, %for.inc ], [ 0.0, %entry ]
259 %tmp0 = getelementptr inbounds float, float* %A, i64 %i
260 %tmp1 = load float, float* %tmp0, align 4
261 %tmp2 = fcmp fast oeq float %tmp1, 0.0
262 br i1 %tmp2, label %if.pred, label %for.inc
263
264if.pred:
265 store float %j, float* %tmp0, align 4
266 br label %for.inc
267
268for.inc:
269 %i.next = add nuw nsw i64 %i, 1
270 %j.next = fadd fast float %j, 1.0
271 %cond = icmp slt i64 %i.next, %N
272 br i1 %cond, label %for.body, label %for.end
273
274for.end:
275 ret void
276}