blob: 72b698613c6e9d59b4369a676d452db045e9721e [file] [log] [blame]
Warren Ristowd27b0c62019-05-08 18:50:07 +00001; RUN: opt -loop-vectorize -force-vector-width=2 -S < %s 2>&1 | FileCheck %s
2; RUN: opt -indvars -S < %s 2>&1 | FileCheck %s -check-prefix=INDVARCHECK
3
4target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
5target triple = "x86_64-unknown-linux-gnu"
6
7; Produced from test-case:
8;
9; void testCountIncrLoop(unsigned char *ptr, int lim, int count, int val)
10; {
11; int inx = 0;
12; for (int outer_i = 0; outer_i < lim; ++outer_i) {
13; if (count > 0) { // At runtime, 'count' is 0, so the following code is dead.
14; int result = val;
15; int tmp = count;
16;
17; while (tmp < 8) {
18; result += val >> tmp;
19; tmp += count;
20; }
21;
22; ptr[inx++] = (unsigned char) result;
23; }
24; }
25; }
26;
27; No explicit division appears in the input, but a division is generated during
28; vectorization, and that division is a division-by-0 when the input 'count'
29; is 0, so it cannot be hoisted above the guard of 'count > 0'.
30
31; Verify that a 'udiv' does not appear in the 'loop1.preheader' block, and that
32; a 'udiv' has been inserted at the top of the 'while.body.preheader' block.
33define void @testCountIncrLoop(i8* %ptr, i32 %lim, i32 %count, i32 %val) {
34; CHECK-LABEL: @testCountIncrLoop(
35; CHECK-NEXT: entry:
36; CHECK: loop1.preheader:
37; CHECK-NOT: udiv
38; CHECK: loop1.body:
39; CHECK: while.cond.preheader:
40; CHECK: while.body.preheader:
41; CHECK-NEXT: [[TMP1:%.*]] = udiv i32 [[TMP0:%.*]], [[COUNT:%.*]]
42; CHECK: vector.ph:
43; CHECK: exit:
44; CHECK: ret void
45;
46entry:
47 %cmp1 = icmp sgt i32 %lim, 0
48 br i1 %cmp1, label %loop1.preheader, label %exit
49
50loop1.preheader: ; preds = %entry
51 %cmp2 = icmp sgt i32 %count, 0
52 %cmp4 = icmp slt i32 %count, 8
53 br label %loop1.body
54
55loop1.body: ; preds = %loop1.inc, %loop1.preheader
56 %outer_i = phi i32 [ 0, %loop1.preheader ], [ %outer_i.1, %loop1.inc ]
57 %inx.1 = phi i32 [ 0, %loop1.preheader ], [ %inx.2, %loop1.inc ]
58 br i1 %cmp2, label %while.cond.preheader, label %loop1.inc
59
60while.cond.preheader: ; preds = %loop1.body
61 br i1 %cmp4, label %while.body, label %while.end
62
63while.body: ; preds = %while.body, %while.cond.preheader
64 %tmp = phi i32 [ %add3, %while.body ], [ %count, %while.cond.preheader ]
65 %result.1 = phi i32 [ %add, %while.body ], [ %val, %while.cond.preheader ]
66 %shr = ashr i32 %val, %tmp
67 %add = add nsw i32 %shr, %result.1
68 %add3 = add nsw i32 %tmp, %count
69 %cmp3 = icmp slt i32 %add3, 8
70 br i1 %cmp3, label %while.body, label %while.end
71
72while.end: ; preds = %while.body, %while.cond.preheader
73 %result.0.lcssa = phi i32 [ %val, %while.cond.preheader ], [ %add, %while.body ]
74 %conv = trunc i32 %result.0.lcssa to i8
75 %inc = add nsw i32 %inx.1, 1
76 %idxprom = sext i32 %inx.1 to i64
77 %arrayidx = getelementptr inbounds i8, i8* %ptr, i64 %idxprom
78 store i8 %conv, i8* %arrayidx, align 1
79 br label %loop1.inc
80
81loop1.inc: ; preds = %while.end, %loop1.body
82 %inx.2 = phi i32 [ %inc, %while.end ], [ %inx.1, %loop1.body ]
83 %outer_i.1 = add nuw nsw i32 %outer_i, 1
84 %exitcond = icmp eq i32 %outer_i.1, %lim
85 br i1 %exitcond, label %exit, label %loop1.body
86
87exit: ; preds = %loop1.inc, %entry
88 ret void
89}
90
91; These next tests are all based on the following source code, with slight
92; variations on the calculation of 'incr' (all of which are loop-invariant
93; divisions, but only some of which can be safely hoisted):
94;
95; uint32_t foo(uint32_t *ptr, uint32_t start1, uint32_t start2) {
96; uint32_t counter1, counter2;
97; uint32_t val = start1;
98; for (counter1 = 1; counter1 < 100; ++counter1) {
99; uint32_t index = 0;
100; val += ptr[index];
101; for (counter2 = start2; counter2 < 10; ++counter2) {
102; // Division is loop invariant, and denominator is guaranteed non-zero:
103; // Safe to hoist it out of the inner loop.
104; uint32_t incr = 16 / counter1;
105; index += incr;
106; val += ptr[index];
107; }
108; }
109; return val;
110; }
111
112; This version is as written above, where 'incr' is '16/counter1', and it is
113; guaranted that 'counter1' is always non-zero. So it is safe to hoist the
114; division from the inner loop to the preheader.
115;
116; Verify that the 'udiv' is hoisted to the preheader, and is not in the loop body.
117define i32 @NonZeroDivHoist(i32* nocapture readonly %ptr, i32 %start1, i32 %start2) {
118; INDVARCHECK-LABEL: @NonZeroDivHoist(
119; INDVARCHECK-NEXT: entry:
120; INDVARCHECK: for.body3.lr.ph:
121; INDVARCHECK-NEXT: [[TMP0:%.*]] = udiv i64 16, [[INDVARS_IV:%.*]]
122; INDVARCHECK-NEXT: br label [[FOR_BODY3:%.*]]
123; INDVARCHECK: for.body3:
124; INDVARCHECK-NOT: udiv
125; INDVARCHECK: for.end10:
126;
127entry:
128 br label %for.cond
129
130for.cond: ; preds = %for.end, %entry
131 %val.0 = phi i32 [ %start1, %entry ], [ %val.1.lcssa, %for.end ]
132 %counter1.0 = phi i32 [ 1, %entry ], [ %inc9, %for.end ]
133 %cmp = icmp ult i32 %counter1.0, 100
134 br i1 %cmp, label %for.body, label %for.end10
135
136for.body: ; preds = %for.cond
137 %tmp = load i32, i32* %ptr, align 4
138 %add = add i32 %tmp, %val.0
139 %cmp224 = icmp ult i32 %start2, 10
140 br i1 %cmp224, label %for.body3.lr.ph, label %for.end
141
142for.body3.lr.ph: ; preds = %for.body
143 br label %for.body3
144
145for.body3: ; preds = %for.body3, %for.body3.lr.ph
146 %index.027 = phi i32 [ 0, %for.body3.lr.ph ], [ %add4, %for.body3 ]
147 %val.126 = phi i32 [ %add, %for.body3.lr.ph ], [ %add7, %for.body3 ]
148 %counter2.025 = phi i32 [ %start2, %for.body3.lr.ph ], [ %inc, %for.body3 ]
149 %div = udiv i32 16, %counter1.0
150 %add4 = add i32 %div, %index.027
151 %idxprom5 = zext i32 %add4 to i64
152 %arrayidx6 = getelementptr inbounds i32, i32* %ptr, i64 %idxprom5
153 %tmp1 = load i32, i32* %arrayidx6, align 4
154 %add7 = add i32 %tmp1, %val.126
155 %inc = add i32 %counter2.025, 1
156 %cmp2 = icmp ult i32 %inc, 10
157 br i1 %cmp2, label %for.body3, label %for.cond1.for.end_crit_edge
158
159for.cond1.for.end_crit_edge: ; preds = %for.body3
160 %split = phi i32 [ %add7, %for.body3 ]
161 br label %for.end
162
163for.end: ; preds = %for.cond1.for.end_crit_edge, %for.body
164 %val.1.lcssa = phi i32 [ %split, %for.cond1.for.end_crit_edge ], [ %add, %for.body ]
165 %inc9 = add i32 %counter1.0, 1
166 br label %for.cond
167
168for.end10: ; preds = %for.cond
169 %val.0.lcssa = phi i32 [ %val.0, %for.cond ]
170 ret i32 %val.0.lcssa
171}
172
173; This version is identical to the above 'NonZeroDivHoist' case, except the
174; outer ('counter1') loop starts at the unknown value of 'start1' rather than 1,
175; and so it is illegal to hoist the division because if 'start1' is 0, hoisting
176; it would incorrectly cause a divide-by-zero trap.
177;
178; Verify that the 'udiv' is not hoisted to the preheader, and it remains in the
179; loop body.
180define i32 @ZeroDivNoHoist(i32* nocapture readonly %ptr, i32 %start1, i32 %start2) {
181; INDVARCHECK-LABEL: @ZeroDivNoHoist(
182; INDVARCHECK-NEXT: entry:
183; INDVARCHECK-NOT: udiv
184; INDVARCHECK: for.body3:
185; INDVARCHECK: [[TMP1:%.*]] = udiv i64 16, [[INDVARS_IV:%.*]]
186; INDVARCHECK: for.cond1.for.end_crit_edge:
187;
188entry:
189 br label %for.cond
190
191for.cond: ; preds = %for.end, %entry
192 %val.0 = phi i32 [ %start1, %entry ], [ %val.1.lcssa, %for.end ]
193 %counter1.0 = phi i32 [ %start1, %entry ], [ %inc9, %for.end ]
194 %cmp = icmp ult i32 %counter1.0, 100
195 br i1 %cmp, label %for.body, label %for.end10
196
197for.body: ; preds = %for.cond
198 %tmp = load i32, i32* %ptr, align 4
199 %add = add i32 %tmp, %val.0
200 %cmp224 = icmp ult i32 %start2, 10
201 br i1 %cmp224, label %for.body3.lr.ph, label %for.end
202
203for.body3.lr.ph: ; preds = %for.body
204 br label %for.body3
205
206for.body3: ; preds = %for.body3, %for.body3.lr.ph
207 %index.027 = phi i32 [ 0, %for.body3.lr.ph ], [ %add4, %for.body3 ]
208 %val.126 = phi i32 [ %add, %for.body3.lr.ph ], [ %add7, %for.body3 ]
209 %counter2.025 = phi i32 [ %start2, %for.body3.lr.ph ], [ %inc, %for.body3 ]
210 %div = udiv i32 16, %counter1.0
211 %add4 = add i32 %div, %index.027
212 %idxprom5 = zext i32 %add4 to i64
213 %arrayidx6 = getelementptr inbounds i32, i32* %ptr, i64 %idxprom5
214 %tmp1 = load i32, i32* %arrayidx6, align 4
215 %add7 = add i32 %tmp1, %val.126
216 %inc = add i32 %counter2.025, 1
217 %cmp2 = icmp ult i32 %inc, 10
218 br i1 %cmp2, label %for.body3, label %for.cond1.for.end_crit_edge
219
220for.cond1.for.end_crit_edge: ; preds = %for.body3
221 %split = phi i32 [ %add7, %for.body3 ]
222 br label %for.end
223
224for.end: ; preds = %for.cond1.for.end_crit_edge, %for.body
225 %val.1.lcssa = phi i32 [ %split, %for.cond1.for.end_crit_edge ], [ %add, %for.body ]
226 %inc9 = add i32 %counter1.0, 1
227 br label %for.cond
228
229for.end10: ; preds = %for.cond
230 %val.0.lcssa = phi i32 [ %val.0, %for.cond ]
231 ret i32 %val.0.lcssa
232}
233
234; This version is has a clearly safe division by a non-zero constant (16). The
235; division is transformed to a logical-shift-right of 4, and it is safely
236; hoisted.
237;
238; Verify that the division-operation is hoisted, and that it appears as a
239; right-shift ('lshr') rather than an explicit division.
240define i32 @DivBy16Hoist(i32* nocapture readonly %ptr, i32 %start1, i32 %start2) {
241; INDVARCHECK-LABEL: @DivBy16Hoist(
242; INDVARCHECK-NEXT: entry:
243; INDVARCHECK: for.cond:
244; INDVARCHECK: [[TMP1:%.*]] = lshr i64 [[INDVARS_IV:%.*]], 4
245; INDVARCHECK: for.body:
246; INDVARCHECK-NOT: lshr
247; INDVARCHECK-NOT: udiv
248; INDVARCHECK: for.end10:
249;
250entry:
251 br label %for.cond
252
253for.cond: ; preds = %for.end, %entry
254 %val.0 = phi i32 [ %start1, %entry ], [ %val.1.lcssa, %for.end ]
255 %counter1.0 = phi i32 [ %start1, %entry ], [ %inc9, %for.end ]
256 %cmp = icmp ult i32 %counter1.0, 100
257 br i1 %cmp, label %for.body, label %for.end10
258
259for.body: ; preds = %for.cond
260 %tmp = load i32, i32* %ptr, align 4
261 %add = add i32 %tmp, %val.0
262 %cmp224 = icmp ult i32 %start2, 10
263 br i1 %cmp224, label %for.body3.lr.ph, label %for.end
264
265for.body3.lr.ph: ; preds = %for.body
266 br label %for.body3
267
268for.body3: ; preds = %for.body3, %for.body3.lr.ph
269 %index.027 = phi i32 [ 0, %for.body3.lr.ph ], [ %add4, %for.body3 ]
270 %val.126 = phi i32 [ %add, %for.body3.lr.ph ], [ %add7, %for.body3 ]
271 %counter2.025 = phi i32 [ %start2, %for.body3.lr.ph ], [ %inc, %for.body3 ]
272 %div = udiv i32 %counter1.0, 16
273 %add4 = add i32 %div, %index.027
274 %idxprom5 = zext i32 %add4 to i64
275 %arrayidx6 = getelementptr inbounds i32, i32* %ptr, i64 %idxprom5
276 %tmp1 = load i32, i32* %arrayidx6, align 4
277 %add7 = add i32 %tmp1, %val.126
278 %inc = add i32 %counter2.025, 1
279 %cmp2 = icmp ult i32 %inc, 10
280 br i1 %cmp2, label %for.body3, label %for.cond1.for.end_crit_edge
281
282for.cond1.for.end_crit_edge: ; preds = %for.body3
283 %split = phi i32 [ %add7, %for.body3 ]
284 br label %for.end
285
286for.end: ; preds = %for.cond1.for.end_crit_edge, %for.body
287 %val.1.lcssa = phi i32 [ %split, %for.cond1.for.end_crit_edge ], [ %add, %for.body ]
288 %inc9 = add i32 %counter1.0, 1
289 br label %for.cond
290
291for.end10: ; preds = %for.cond
292 %val.0.lcssa = phi i32 [ %val.0, %for.cond ]
293 ret i32 %val.0.lcssa
294}
295
296; This version is has a clearly safe division by a non-zero constant (17). The
297; division is safely hoisted, as it was in the 'DivBy16Hoist' verison, but here
298; it remains a division, rather than being transformed to a right-shift.
299;
300; Verify that the division-operation is hoisted.
301define i32 @DivBy17Hoist(i32* nocapture readonly %ptr, i32 %start1, i32 %start2) {
302; INDVARCHECK-LABEL: @DivBy17Hoist(
303; INDVARCHECK-NEXT: entry:
304; INDVARCHECK: for.cond:
305; INDVARCHECK: [[TMP1:%.*]] = udiv i64 [[INDVARS_IV:%.*]], 17
306; INDVARCHECK: for.body:
307; INDVARCHECK-NOT: udiv
308; INDVARCHECK: for.end10:
309;
310entry:
311 br label %for.cond
312
313for.cond: ; preds = %for.end, %entry
314 %val.0 = phi i32 [ %start1, %entry ], [ %val.1.lcssa, %for.end ]
315 %counter1.0 = phi i32 [ %start1, %entry ], [ %inc9, %for.end ]
316 %cmp = icmp ult i32 %counter1.0, 100
317 br i1 %cmp, label %for.body, label %for.end10
318
319for.body: ; preds = %for.cond
320 %tmp = load i32, i32* %ptr, align 4
321 %add = add i32 %tmp, %val.0
322 %cmp224 = icmp ult i32 %start2, 10
323 br i1 %cmp224, label %for.body3.lr.ph, label %for.end
324
325for.body3.lr.ph: ; preds = %for.body
326 br label %for.body3
327
328for.body3: ; preds = %for.body3, %for.body3.lr.ph
329 %index.027 = phi i32 [ 0, %for.body3.lr.ph ], [ %add4, %for.body3 ]
330 %val.126 = phi i32 [ %add, %for.body3.lr.ph ], [ %add7, %for.body3 ]
331 %counter2.025 = phi i32 [ %start2, %for.body3.lr.ph ], [ %inc, %for.body3 ]
332 %div = udiv i32 %counter1.0, 17
333 %add4 = add i32 %div, %index.027
334 %idxprom5 = zext i32 %add4 to i64
335 %arrayidx6 = getelementptr inbounds i32, i32* %ptr, i64 %idxprom5
336 %tmp1 = load i32, i32* %arrayidx6, align 4
337 %add7 = add i32 %tmp1, %val.126
338 %inc = add i32 %counter2.025, 1
339 %cmp2 = icmp ult i32 %inc, 10
340 br i1 %cmp2, label %for.body3, label %for.cond1.for.end_crit_edge
341
342for.cond1.for.end_crit_edge: ; preds = %for.body3
343 %split = phi i32 [ %add7, %for.body3 ]
344 br label %for.end
345
346for.end: ; preds = %for.cond1.for.end_crit_edge, %for.body
347 %val.1.lcssa = phi i32 [ %split, %for.cond1.for.end_crit_edge ], [ %add, %for.body ]
348 %inc9 = add i32 %counter1.0, 1
349 br label %for.cond
350
351for.end10: ; preds = %for.cond
352 %val.0.lcssa = phi i32 [ %val.0, %for.cond ]
353 ret i32 %val.0.lcssa
354}