blob: 95c23ade5130950a547804cb837955412293c8d5 [file] [log] [blame]
Kyle Butt7fbec9b2017-02-15 19:49:14 +00001; RUN: llc -O2 < %s | FileCheck %s
Kyle Butt0846e562016-10-11 20:36:43 +00002target datalayout = "e-m:e-i64:64-n32:64"
3target triple = "powerpc64le-grtev4-linux-gnu"
4
5; Intended layout:
Kyle Butt7fbec9b2017-02-15 19:49:14 +00006; The chain-based outlining produces the layout
Kyle Butt0846e562016-10-11 20:36:43 +00007; test1
8; test2
9; test3
10; test4
Kyle Butt0846e562016-10-11 20:36:43 +000011; optional1
12; optional2
13; optional3
14; optional4
Kyle Butt7fbec9b2017-02-15 19:49:14 +000015; exit
Kyle Butt0846e562016-10-11 20:36:43 +000016; Tail duplication puts test n+1 at the end of optional n
17; so optional1 includes a copy of test2 at the end, and branches
18; to test3 (at the top) or falls through to optional 2.
Kyle Butt7fbec9b2017-02-15 19:49:14 +000019; The CHECK statements check for the whole string of tests
Kyle Butt0846e562016-10-11 20:36:43 +000020; and then check that the correct test has been duplicated into the end of
21; the optional blocks and that the optional blocks are in the correct order.
Kyle Butt7fbec9b2017-02-15 19:49:14 +000022;CHECK-LABEL: straight_test:
Kyle Butt0846e562016-10-11 20:36:43 +000023; test1 may have been merged with entry
24;CHECK: mr [[TAGREG:[0-9]+]], 3
25;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1
Kyle Butt7fbec9b2017-02-15 19:49:14 +000026;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]]
27;CHECK-NEXT: # %test2
Kyle Butt0846e562016-10-11 20:36:43 +000028;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
Kyle Butt7fbec9b2017-02-15 19:49:14 +000029;CHECK-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]]
30;CHECK-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3
Kyle Butt0846e562016-10-11 20:36:43 +000031;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
Kyle Butt7fbec9b2017-02-15 19:49:14 +000032;CHECK-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]]
33;CHECK-NEXT: .[[TEST4LABEL:[_0-9A-Za-z]+]]: # %test4
Kyle Butt0846e562016-10-11 20:36:43 +000034;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
Kyle Butt7fbec9b2017-02-15 19:49:14 +000035;CHECK-NEXT: bne 0, .[[OPT4LABEL:[_0-9A-Za-z]+]]
36;CHECK-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit
Kyle Butt0846e562016-10-11 20:36:43 +000037;CHECK: blr
Kyle Butt7fbec9b2017-02-15 19:49:14 +000038;CHECK-NEXT: .[[OPT1LABEL]]:
Kyle Butt0846e562016-10-11 20:36:43 +000039;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
Kyle Butt7fbec9b2017-02-15 19:49:14 +000040;CHECK-NEXT: beq 0, .[[TEST3LABEL]]
41;CHECK-NEXT: .[[OPT2LABEL]]:
Kyle Butt0846e562016-10-11 20:36:43 +000042;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
Kyle Butt7fbec9b2017-02-15 19:49:14 +000043;CHECK-NEXT: beq 0, .[[TEST4LABEL]]
44;CHECK-NEXT: .[[OPT3LABEL]]:
Kyle Butt0846e562016-10-11 20:36:43 +000045;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
Kyle Butt7fbec9b2017-02-15 19:49:14 +000046;CHECK-NEXT: beq 0, .[[EXITLABEL]]
47;CHECK-NEXT: .[[OPT4LABEL]]:
48;CHECK: b .[[EXITLABEL]]
Kyle Butt0846e562016-10-11 20:36:43 +000049
Kyle Butt7fbec9b2017-02-15 19:49:14 +000050define void @straight_test(i32 %tag) {
Kyle Butt0846e562016-10-11 20:36:43 +000051entry:
52 br label %test1
53test1:
54 %tagbit1 = and i32 %tag, 1
55 %tagbit1eq0 = icmp eq i32 %tagbit1, 0
Kyle Butt7fbec9b2017-02-15 19:49:14 +000056 br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1
Kyle Butt0846e562016-10-11 20:36:43 +000057optional1:
58 call void @a()
59 call void @a()
60 call void @a()
61 call void @a()
62 br label %test2
63test2:
64 %tagbit2 = and i32 %tag, 2
65 %tagbit2eq0 = icmp eq i32 %tagbit2, 0
Kyle Butt7fbec9b2017-02-15 19:49:14 +000066 br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1
Kyle Butt0846e562016-10-11 20:36:43 +000067optional2:
68 call void @b()
69 call void @b()
70 call void @b()
71 call void @b()
72 br label %test3
73test3:
74 %tagbit3 = and i32 %tag, 4
75 %tagbit3eq0 = icmp eq i32 %tagbit3, 0
Kyle Butt7fbec9b2017-02-15 19:49:14 +000076 br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1
Kyle Butt0846e562016-10-11 20:36:43 +000077optional3:
78 call void @c()
79 call void @c()
80 call void @c()
81 call void @c()
82 br label %test4
83test4:
84 %tagbit4 = and i32 %tag, 8
85 %tagbit4eq0 = icmp eq i32 %tagbit4, 0
Kyle Butt7fbec9b2017-02-15 19:49:14 +000086 br i1 %tagbit4eq0, label %exit, label %optional4, !prof !1
Kyle Butt0846e562016-10-11 20:36:43 +000087optional4:
88 call void @d()
89 call void @d()
90 call void @d()
91 call void @d()
92 br label %exit
93exit:
94 ret void
95}
96
Kyle Butt7fbec9b2017-02-15 19:49:14 +000097; Intended layout:
98; The chain-based outlining produces the layout
99; entry
100; --- Begin loop ---
101; for.latch
102; for.check
103; test1
104; test2
105; test3
106; test4
107; optional1
108; optional2
109; optional3
110; optional4
111; --- End loop ---
112; exit
113; The CHECK statements check for the whole string of tests and exit block,
114; and then check that the correct test has been duplicated into the end of
115; the optional blocks and that the optional blocks are in the correct order.
116;CHECK-LABEL: loop_test:
117;CHECK: add [[TAGPTRREG:[0-9]+]], 3, 4
118;CHECK: .[[LATCHLABEL:[._0-9A-Za-z]+]]: # %for.latch
119;CHECK: addi
120;CHECK: .[[CHECKLABEL:[._0-9A-Za-z]+]]: # %for.check
121;CHECK: lwz [[TAGREG:[0-9]+]], 0([[TAGPTRREG]])
122;CHECK: # %test1
123;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1
124;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[._0-9A-Za-z]+]]
125;CHECK-NEXT: # %test2
126;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
127;CHECK-NEXT: bne 0, .[[OPT2LABEL:[._0-9A-Za-z]+]]
128;CHECK-NEXT: .[[TEST3LABEL:[._0-9A-Za-z]+]]: # %test3
129;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
130;CHECK-NEXT: bne 0, .[[OPT3LABEL:[._0-9A-Za-z]+]]
131;CHECK-NEXT: .[[TEST4LABEL:[._0-9A-Za-z]+]]: # %{{(test4|optional3)}}
132;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
133;CHECK-NEXT: beq 0, .[[LATCHLABEL]]
134;CHECK-NEXT: b .[[OPT4LABEL:[._0-9A-Za-z]+]]
135;CHECK: [[OPT1LABEL]]
136;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
137;CHECK-NEXT: beq 0, .[[TEST3LABEL]]
138;CHECK-NEXT: .[[OPT2LABEL]]
139;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
140;CHECK-NEXT: beq 0, .[[TEST4LABEL]]
141;CHECK-NEXT: .[[OPT3LABEL]]
142;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
143;CHECK-NEXT: beq 0, .[[LATCHLABEL]]
144;CHECK: [[OPT4LABEL]]:
145;CHECK: b .[[LATCHLABEL]]
146define void @loop_test(i32* %tags, i32 %count) {
147entry:
148 br label %for.check
149for.check:
150 %count.loop = phi i32 [%count, %entry], [%count.sub, %for.latch]
151 %done.count = icmp ugt i32 %count.loop, 0
152 %tag_ptr = getelementptr inbounds i32, i32* %tags, i32 %count
153 %tag = load i32, i32* %tag_ptr
154 %done.tag = icmp eq i32 %tag, 0
155 %done = and i1 %done.count, %done.tag
156 br i1 %done, label %test1, label %exit, !prof !1
157test1:
158 %tagbit1 = and i32 %tag, 1
159 %tagbit1eq0 = icmp eq i32 %tagbit1, 0
160 br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1
161optional1:
162 call void @a()
163 call void @a()
164 call void @a()
165 call void @a()
166 br label %test2
167test2:
168 %tagbit2 = and i32 %tag, 2
169 %tagbit2eq0 = icmp eq i32 %tagbit2, 0
170 br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1
171optional2:
172 call void @b()
173 call void @b()
174 call void @b()
175 call void @b()
176 br label %test3
177test3:
178 %tagbit3 = and i32 %tag, 4
179 %tagbit3eq0 = icmp eq i32 %tagbit3, 0
180 br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1
181optional3:
182 call void @c()
183 call void @c()
184 call void @c()
185 call void @c()
186 br label %test4
187test4:
188 %tagbit4 = and i32 %tag, 8
189 %tagbit4eq0 = icmp eq i32 %tagbit4, 0
190 br i1 %tagbit4eq0, label %for.latch, label %optional4, !prof !1
191optional4:
192 call void @d()
193 call void @d()
194 call void @d()
195 call void @d()
196 br label %for.latch
197for.latch:
198 %count.sub = sub i32 %count.loop, 1
199 br label %for.check
200exit:
201 ret void
202}
203
204; The block then2 is not unavoidable, meaning it does not dominate the exit.
205; But since it can be tail-duplicated, it should be placed as a fallthrough from
206; test2 and copied. The purpose here is to make sure that the tail-duplication
207; code is independent of the outlining code, which works by choosing the
208; "unavoidable" blocks.
209; CHECK-LABEL: avoidable_test:
210; CHECK: # %entry
211; CHECK: andi.
212; CHECK: # %test2
213; Make sure then2 falls through from test2
214; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}}
215; CHECK: # %then2
216; CHECK: rlwinm. {{[0-9]+}}, {{[0-9]+}}, 0, 29, 29
217; CHECK: # %else1
218; CHECK: bl a
219; CHECK: bl a
220; Make sure then2 was copied into else1
221; CHECK: rlwinm. {{[0-9]+}}, {{[0-9]+}}, 0, 29, 29
222; CHECK: # %end1
223; CHECK: bl d
224; CHECK: # %else2
225; CHECK: bl c
226; CHECK: # %end2
227define void @avoidable_test(i32 %tag) {
228entry:
229 br label %test1
230test1:
231 %tagbit1 = and i32 %tag, 1
232 %tagbit1eq0 = icmp eq i32 %tagbit1, 0
233 br i1 %tagbit1eq0, label %test2, label %else1, !prof !1 ; %test2 more likely
234else1:
235 call void @a()
236 call void @a()
237 br label %then2
238test2:
239 %tagbit2 = and i32 %tag, 2
240 %tagbit2eq0 = icmp eq i32 %tagbit2, 0
241 br i1 %tagbit2eq0, label %then2, label %else2, !prof !1 ; %then2 more likely
242then2:
243 %tagbit3 = and i32 %tag, 4
244 %tagbit3eq0 = icmp eq i32 %tagbit3, 0
245 br i1 %tagbit3eq0, label %end2, label %end1, !prof !1 ; %end2 more likely
246else2:
247 call void @c()
248 br label %end2
249end2:
250 ret void
251end1:
252 call void @d()
253 ret void
254}
255
256; CHECK-LABEL: trellis_test
257; The number in the block labels is the expected block frequency given the
258; probabilities annotated. There is a conflict in the b;c->d;e trellis that
259; should be resolved as c->e;b->d.
260; The d;e->f;g trellis should be resolved as e->g;d->f.
261; The f;g->h;i trellis should be resolved as f->i;g->h.
262; The h;i->j;ret trellis contains a triangle edge, and should be resolved as
263; h->j->ret
264; CHECK: # %entry
265; CHECK: # %c10
266; CHECK: # %e9
267; CHECK: # %g10
268; CHECK: # %h10
269; CHECK: # %j8
270; CHECK: # %ret
271; CHECK: # %b6
272; CHECK: # %d7
273; CHECK: # %f6
274; CHECK: # %i6
275define void @trellis_test(i32 %tag) {
276entry:
277 br label %a16
278a16:
279 call void @a()
280 call void @a()
281 %tagbits.a = and i32 %tag, 3
282 %tagbits.a.eq0 = icmp eq i32 %tagbits.a, 0
283 br i1 %tagbits.a.eq0, label %c10, label %b6, !prof !1 ; 10 to 6
284c10:
285 call void @c()
286 call void @c()
287 %tagbits.c = and i32 %tag, 12
288 %tagbits.c.eq0 = icmp eq i32 %tagbits.c, 0
289 ; Both of these edges should be hotter than the other incoming edge
290 ; for e9 or d7
291 br i1 %tagbits.c.eq0, label %e9, label %d7, !prof !3 ; 6 to 4
292e9:
293 call void @e()
294 call void @e()
295 %tagbits.e = and i32 %tag, 48
296 %tagbits.e.eq0 = icmp eq i32 %tagbits.e, 0
297 br i1 %tagbits.e.eq0, label %g10, label %f6, !prof !4 ; 7 to 2
298g10:
299 call void @g()
300 call void @g()
301 %tagbits.g = and i32 %tag, 192
302 %tagbits.g.eq0 = icmp eq i32 %tagbits.g, 0
303 br i1 %tagbits.g.eq0, label %i6, label %h10, !prof !5 ; 2 to 8
304i6:
305 call void @i()
306 call void @i()
307 %tagbits.i = and i32 %tag, 768
308 %tagbits.i.eq0 = icmp eq i32 %tagbits.i, 0
309 br i1 %tagbits.i.eq0, label %ret, label %j8, !prof !2 ; balanced (3 to 3)
310b6:
311 call void @b()
312 call void @b()
313 %tagbits.b = and i32 %tag, 12
314 %tagbits.b.eq1 = icmp eq i32 %tagbits.b, 8
315 br i1 %tagbits.b.eq1, label %e9, label %d7, !prof !2 ; balanced (3 to 3)
316d7:
317 call void @d()
318 call void @d()
319 %tagbits.d = and i32 %tag, 48
320 %tagbits.d.eq1 = icmp eq i32 %tagbits.d, 32
321 br i1 %tagbits.d.eq1, label %g10, label %f6, !prof !6 ; 3 to 4
322f6:
323 call void @f()
324 call void @f()
325 %tagbits.f = and i32 %tag, 192
326 %tagbits.f.eq1 = icmp eq i32 %tagbits.f, 128
327 br i1 %tagbits.f.eq1, label %i6, label %h10, !prof !7 ; 4 to 2
328h10:
329 call void @h()
330 call void @h()
331 %tagbits.h = and i32 %tag, 768
332 %tagbits.h.eq1 = icmp eq i32 %tagbits.h, 512
333 br i1 %tagbits.h.eq1, label %ret, label %j8, !prof !2 ; balanced (5 to 5)
334j8:
335 call void @j()
336 call void @j()
337 br label %ret
338ret:
339 ret void
340}
341
342; Verify that we still consider tail-duplication opportunities if we find a
343; triangle trellis. Here D->F->G is the triangle, and D;E are both predecessors
344; of both F and G. The basic trellis algorithm picks the F->G edge, but after
345; checking, it's profitable to duplicate G into F. The weights here are not
346; really important. They are there to help make the test stable.
347; CHECK-LABEL: trellis_then_dup_test
348; CHECK: # %entry
349; CHECK: # %b
350; CHECK: # %d
351; CHECK: # %g
352; CHECK: # %ret1
353; CHECK: # %c
354; CHECK: # %e
355; CHECK: # %f
356; CHECK: # %ret2
357; CHECK: # %ret
358define void @trellis_then_dup_test(i32 %tag) {
359entry:
360 br label %a
361a:
362 call void @a()
363 call void @a()
364 %tagbits.a = and i32 %tag, 3
365 %tagbits.a.eq0 = icmp eq i32 %tagbits.a, 0
366 br i1 %tagbits.a.eq0, label %b, label %c, !prof !1 ; 5 to 3
367b:
368 call void @b()
369 call void @b()
370 %tagbits.b = and i32 %tag, 12
371 %tagbits.b.eq1 = icmp eq i32 %tagbits.b, 8
372 br i1 %tagbits.b.eq1, label %d, label %e, !prof !1 ; 5 to 3
373d:
374 call void @d()
375 call void @d()
376 %tagbits.d = and i32 %tag, 48
377 %tagbits.d.eq1 = icmp eq i32 %tagbits.d, 32
378 br i1 %tagbits.d.eq1, label %g, label %f, !prof !1 ; 5 to 3
379f:
380 call void @f()
381 call void @f()
382 br label %g
383g:
384 %tagbits.g = and i32 %tag, 192
385 %tagbits.g.eq0 = icmp eq i32 %tagbits.g, 0
386 br i1 %tagbits.g.eq0, label %ret1, label %ret2, !prof !2 ; balanced
387c:
388 call void @c()
389 call void @c()
390 %tagbits.c = and i32 %tag, 12
391 %tagbits.c.eq0 = icmp eq i32 %tagbits.c, 0
392 br i1 %tagbits.c.eq0, label %d, label %e, !prof !1 ; 5 to 3
393e:
394 call void @e()
395 call void @e()
396 %tagbits.e = and i32 %tag, 48
397 %tagbits.e.eq0 = icmp eq i32 %tagbits.e, 0
398 br i1 %tagbits.e.eq0, label %g, label %f, !prof !1 ; 5 to 3
399ret1:
400 call void @a()
401 br label %ret
402ret2:
403 call void @b()
404 br label %ret
405ret:
406 ret void
407}
408
Kyle Butt0846e562016-10-11 20:36:43 +0000409declare void @a()
410declare void @b()
411declare void @c()
412declare void @d()
Kyle Butt7fbec9b2017-02-15 19:49:14 +0000413declare void @e()
414declare void @f()
415declare void @g()
416declare void @h()
417declare void @i()
418declare void @j()
419
420!1 = !{!"branch_weights", i32 5, i32 3}
421!2 = !{!"branch_weights", i32 50, i32 50}
422!3 = !{!"branch_weights", i32 6, i32 4}
423!4 = !{!"branch_weights", i32 7, i32 2}
424!5 = !{!"branch_weights", i32 2, i32 8}
425!6 = !{!"branch_weights", i32 3, i32 4}
426!7 = !{!"branch_weights", i32 4, i32 2}