blob: c9019983e933c360ffbbdeb34457ef06db8915be [file] [log] [blame]
Nemanja Ivanovic6f590bf2017-12-13 14:47:35 +00001; RUN: llc -O2 -ppc-reduce-cr-logicals -o - %s | FileCheck \
2; RUN: --check-prefix=CHECK --check-prefix=CHECK-O2 %s
3; RUN: llc -O3 -ppc-reduce-cr-logicals -o - %s | FileCheck \
4; RUN: --check-prefix=CHECK --check-prefix=CHECK-O3 %s
Kyle Butt0846e562016-10-11 20:36:43 +00005target datalayout = "e-m:e-i64:64-n32:64"
6target triple = "powerpc64le-grtev4-linux-gnu"
7
8; Intended layout:
Kyle Butt7fbec9b2017-02-15 19:49:14 +00009; The chain-based outlining produces the layout
Kyle Butt0846e562016-10-11 20:36:43 +000010; test1
11; test2
12; test3
13; test4
Kyle Butt0846e562016-10-11 20:36:43 +000014; optional1
15; optional2
16; optional3
17; optional4
Kyle Butt7fbec9b2017-02-15 19:49:14 +000018; exit
Kyle Butt0846e562016-10-11 20:36:43 +000019; Tail duplication puts test n+1 at the end of optional n
20; so optional1 includes a copy of test2 at the end, and branches
21; to test3 (at the top) or falls through to optional 2.
Kyle Butt7fbec9b2017-02-15 19:49:14 +000022; The CHECK statements check for the whole string of tests
Kyle Butt0846e562016-10-11 20:36:43 +000023; and then check that the correct test has been duplicated into the end of
24; the optional blocks and that the optional blocks are in the correct order.
Kyle Butt7fbec9b2017-02-15 19:49:14 +000025;CHECK-LABEL: straight_test:
Kyle Butt0846e562016-10-11 20:36:43 +000026; test1 may have been merged with entry
27;CHECK: mr [[TAGREG:[0-9]+]], 3
Quentin Colombet48abac82018-02-17 03:05:33 +000028;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1
Kyle Butt7fbec9b2017-02-15 19:49:14 +000029;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]]
30;CHECK-NEXT: # %test2
Kyle Butt0846e562016-10-11 20:36:43 +000031;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
Kyle Butt7fbec9b2017-02-15 19:49:14 +000032;CHECK-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]]
33;CHECK-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3
Kyle Butt0846e562016-10-11 20:36:43 +000034;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
Kyle Butt7fbec9b2017-02-15 19:49:14 +000035;CHECK-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]]
36;CHECK-NEXT: .[[TEST4LABEL:[_0-9A-Za-z]+]]: # %test4
Kyle Butt0846e562016-10-11 20:36:43 +000037;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
Kyle Butt7fbec9b2017-02-15 19:49:14 +000038;CHECK-NEXT: bne 0, .[[OPT4LABEL:[_0-9A-Za-z]+]]
39;CHECK-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit
Kyle Butt0846e562016-10-11 20:36:43 +000040;CHECK: blr
Kyle Butt7fbec9b2017-02-15 19:49:14 +000041;CHECK-NEXT: .[[OPT1LABEL]]:
Kyle Butt0846e562016-10-11 20:36:43 +000042;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
Kyle Butt7fbec9b2017-02-15 19:49:14 +000043;CHECK-NEXT: beq 0, .[[TEST3LABEL]]
44;CHECK-NEXT: .[[OPT2LABEL]]:
Kyle Butt0846e562016-10-11 20:36:43 +000045;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
Kyle Butt7fbec9b2017-02-15 19:49:14 +000046;CHECK-NEXT: beq 0, .[[TEST4LABEL]]
47;CHECK-NEXT: .[[OPT3LABEL]]:
Kyle Butt0846e562016-10-11 20:36:43 +000048;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
Kyle Butt7fbec9b2017-02-15 19:49:14 +000049;CHECK-NEXT: beq 0, .[[EXITLABEL]]
50;CHECK-NEXT: .[[OPT4LABEL]]:
51;CHECK: b .[[EXITLABEL]]
Kyle Butt0846e562016-10-11 20:36:43 +000052
Kyle Butt7fbec9b2017-02-15 19:49:14 +000053define void @straight_test(i32 %tag) {
Kyle Butt0846e562016-10-11 20:36:43 +000054entry:
55 br label %test1
56test1:
57 %tagbit1 = and i32 %tag, 1
58 %tagbit1eq0 = icmp eq i32 %tagbit1, 0
Kyle Butt7fbec9b2017-02-15 19:49:14 +000059 br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1
Kyle Butt0846e562016-10-11 20:36:43 +000060optional1:
61 call void @a()
62 call void @a()
63 call void @a()
64 call void @a()
65 br label %test2
66test2:
67 %tagbit2 = and i32 %tag, 2
68 %tagbit2eq0 = icmp eq i32 %tagbit2, 0
Kyle Butt7fbec9b2017-02-15 19:49:14 +000069 br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1
Kyle Butt0846e562016-10-11 20:36:43 +000070optional2:
71 call void @b()
72 call void @b()
73 call void @b()
74 call void @b()
75 br label %test3
76test3:
77 %tagbit3 = and i32 %tag, 4
78 %tagbit3eq0 = icmp eq i32 %tagbit3, 0
Kyle Butt7fbec9b2017-02-15 19:49:14 +000079 br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1
Kyle Butt0846e562016-10-11 20:36:43 +000080optional3:
81 call void @c()
82 call void @c()
83 call void @c()
84 call void @c()
85 br label %test4
86test4:
87 %tagbit4 = and i32 %tag, 8
88 %tagbit4eq0 = icmp eq i32 %tagbit4, 0
Kyle Butt7fbec9b2017-02-15 19:49:14 +000089 br i1 %tagbit4eq0, label %exit, label %optional4, !prof !1
Kyle Butt0846e562016-10-11 20:36:43 +000090optional4:
91 call void @d()
92 call void @d()
93 call void @d()
94 call void @d()
95 br label %exit
96exit:
97 ret void
98}
99
Kyle Butt7fbec9b2017-02-15 19:49:14 +0000100; Intended layout:
Kyle Butt1fa60302017-03-03 01:00:22 +0000101; The chain-of-triangles based duplicating produces the layout
102; test1
103; test2
104; test3
Kyle Butt1fa60302017-03-03 01:00:22 +0000105; optional1
106; optional2
107; optional3
Kyle Butt1fa60302017-03-03 01:00:22 +0000108; exit
109; even for 50/50 branches.
110; Tail duplication puts test n+1 at the end of optional n
111; so optional1 includes a copy of test2 at the end, and branches
112; to test3 (at the top) or falls through to optional 2.
113; The CHECK statements check for the whole string of tests
114; and then check that the correct test has been duplicated into the end of
115; the optional blocks and that the optional blocks are in the correct order.
116;CHECK-LABEL: straight_test_50:
117; test1 may have been merged with entry
118;CHECK: mr [[TAGREG:[0-9]+]], 3
119;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1
120;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]]
121;CHECK-NEXT: # %test2
122;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
123;CHECK-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]]
124;CHECK-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3
125;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
126;CHECK-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]]
Kyle Butt1fa60302017-03-03 01:00:22 +0000127;CHECK-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit
128;CHECK: blr
129;CHECK-NEXT: .[[OPT1LABEL]]:
130;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
131;CHECK-NEXT: beq 0, .[[TEST3LABEL]]
132;CHECK-NEXT: .[[OPT2LABEL]]:
133;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
Kyle Butt1fa60302017-03-03 01:00:22 +0000134;CHECK-NEXT: beq 0, .[[EXITLABEL]]
Kyle Buttd609d6e2017-03-16 21:33:29 +0000135;CHECK-NEXT: .[[OPT3LABEL]]:
Kyle Butt1fa60302017-03-03 01:00:22 +0000136;CHECK: b .[[EXITLABEL]]
137
138define void @straight_test_50(i32 %tag) {
139entry:
140 br label %test1
141test1:
142 %tagbit1 = and i32 %tag, 1
143 %tagbit1eq0 = icmp eq i32 %tagbit1, 0
144 br i1 %tagbit1eq0, label %test2, label %optional1, !prof !2
145optional1:
146 call void @a()
147 br label %test2
148test2:
149 %tagbit2 = and i32 %tag, 2
150 %tagbit2eq0 = icmp eq i32 %tagbit2, 0
151 br i1 %tagbit2eq0, label %test3, label %optional2, !prof !2
152optional2:
153 call void @b()
154 br label %test3
155test3:
156 %tagbit3 = and i32 %tag, 4
157 %tagbit3eq0 = icmp eq i32 %tagbit3, 0
Kyle Buttd609d6e2017-03-16 21:33:29 +0000158 br i1 %tagbit3eq0, label %exit, label %optional3, !prof !1
Kyle Butt1fa60302017-03-03 01:00:22 +0000159optional3:
160 call void @c()
Kyle Butt1fa60302017-03-03 01:00:22 +0000161 br label %exit
162exit:
163 ret void
164}
165
166; Intended layout:
Kyle Butt7d531da2017-05-15 17:30:47 +0000167; The chain-of-triangles based duplicating produces the layout when 3
168; instructions are allowed for tail-duplication.
169; test1
170; test2
171; test3
172; optional1
173; optional2
174; optional3
175; exit
176;
177; Otherwise it produces the layout:
178; test1
179; optional1
180; test2
181; optional2
182; test3
183; optional3
184; exit
185
186;CHECK-LABEL: straight_test_3_instr_test:
187; test1 may have been merged with entry
188;CHECK: mr [[TAGREG:[0-9]+]], 3
189;CHECK: clrlwi {{[0-9]+}}, [[TAGREG]], 30
190;CHECK-NEXT: cmplwi {{[0-9]+}}, 2
191
192;CHECK-O3-NEXT: bne 0, .[[OPT1LABEL:[_0-9A-Za-z]+]]
193;CHECK-O3-NEXT: # %test2
194;CHECK-O3-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29
195;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 8
196;CHECK-O3-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]]
197;CHECK-O3-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3
198;CHECK-O3-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27
199;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 32
200;CHECK-O3-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]]
201;CHECK-O3-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit
202;CHECK-O3: blr
203;CHECK-O3-NEXT: .[[OPT1LABEL]]:
204;CHECK-O3: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29
205;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 8
206;CHECK-O3-NEXT: beq 0, .[[TEST3LABEL]]
207;CHECK-O3-NEXT: .[[OPT2LABEL]]:
208;CHECK-O3: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27
209;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 32
210;CHECK-O3-NEXT: beq 0, .[[EXITLABEL]]
211;CHECK-O3-NEXT: .[[OPT3LABEL]]:
212;CHECK-O3: b .[[EXITLABEL]]
213
214;CHECK-O2-NEXT: beq 0, .[[TEST2LABEL:[_0-9A-Za-z]+]]
215;CHECK-O2-NEXT: # %optional1
216;CHECK-O2: .[[TEST2LABEL]]: # %test2
217;CHECK-O2-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29
218;CHECK-O2-NEXT: cmplwi {{[0-9]+}}, 8
219;CHECK-O2-NEXT: beq 0, .[[TEST3LABEL:[_0-9A-Za-z]+]]
220;CHECK-O2-NEXT: # %optional2
221;CHECK-O2: .[[TEST3LABEL]]: # %test3
222;CHECK-O2-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27
223;CHECK-O2-NEXT: cmplwi {{[0-9]+}}, 32
224;CHECK-O2-NEXT: beq 0, .[[EXITLABEL:[_0-9A-Za-z]+]]
225;CHECK-O2-NEXT: # %optional3
226;CHECK-O2: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit
227;CHECK-O2: blr
228
229
230define void @straight_test_3_instr_test(i32 %tag) {
231entry:
232 br label %test1
233test1:
234 %tagbit1 = and i32 %tag, 3
235 %tagbit1eq0 = icmp eq i32 %tagbit1, 2
236 br i1 %tagbit1eq0, label %test2, label %optional1, !prof !2
237optional1:
238 call void @a()
239 br label %test2
240test2:
241 %tagbit2 = and i32 %tag, 12
242 %tagbit2eq0 = icmp eq i32 %tagbit2, 8
243 br i1 %tagbit2eq0, label %test3, label %optional2, !prof !2
244optional2:
245 call void @b()
246 br label %test3
247test3:
248 %tagbit3 = and i32 %tag, 48
249 %tagbit3eq0 = icmp eq i32 %tagbit3, 32
250 br i1 %tagbit3eq0, label %exit, label %optional3, !prof !1
251optional3:
252 call void @c()
253 br label %exit
254exit:
255 ret void
256}
257
258; Intended layout:
Kyle Butt7fbec9b2017-02-15 19:49:14 +0000259; The chain-based outlining produces the layout
260; entry
261; --- Begin loop ---
262; for.latch
263; for.check
264; test1
265; test2
266; test3
267; test4
268; optional1
269; optional2
270; optional3
271; optional4
272; --- End loop ---
273; exit
274; The CHECK statements check for the whole string of tests and exit block,
275; and then check that the correct test has been duplicated into the end of
276; the optional blocks and that the optional blocks are in the correct order.
277;CHECK-LABEL: loop_test:
278;CHECK: add [[TAGPTRREG:[0-9]+]], 3, 4
279;CHECK: .[[LATCHLABEL:[._0-9A-Za-z]+]]: # %for.latch
280;CHECK: addi
Nemanja Ivanovic6f590bf2017-12-13 14:47:35 +0000281;CHECK-O2: .[[CHECKLABEL:[._0-9A-Za-z]+]]: # %for.check
Kyle Butt7fbec9b2017-02-15 19:49:14 +0000282;CHECK: lwz [[TAGREG:[0-9]+]], 0([[TAGPTRREG]])
Nemanja Ivanovic6f590bf2017-12-13 14:47:35 +0000283;CHECK-O3: .[[CHECKLABEL:[._0-9A-Za-z]+]]: # %for.check
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000284;CHECK: # %bb.{{[0-9]+}}: # %test1
Kyle Butt7fbec9b2017-02-15 19:49:14 +0000285;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1
286;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[._0-9A-Za-z]+]]
287;CHECK-NEXT: # %test2
288;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
289;CHECK-NEXT: bne 0, .[[OPT2LABEL:[._0-9A-Za-z]+]]
290;CHECK-NEXT: .[[TEST3LABEL:[._0-9A-Za-z]+]]: # %test3
291;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
292;CHECK-NEXT: bne 0, .[[OPT3LABEL:[._0-9A-Za-z]+]]
293;CHECK-NEXT: .[[TEST4LABEL:[._0-9A-Za-z]+]]: # %{{(test4|optional3)}}
294;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
295;CHECK-NEXT: beq 0, .[[LATCHLABEL]]
296;CHECK-NEXT: b .[[OPT4LABEL:[._0-9A-Za-z]+]]
297;CHECK: [[OPT1LABEL]]
298;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
299;CHECK-NEXT: beq 0, .[[TEST3LABEL]]
300;CHECK-NEXT: .[[OPT2LABEL]]
301;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
302;CHECK-NEXT: beq 0, .[[TEST4LABEL]]
303;CHECK-NEXT: .[[OPT3LABEL]]
304;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
305;CHECK-NEXT: beq 0, .[[LATCHLABEL]]
306;CHECK: [[OPT4LABEL]]:
307;CHECK: b .[[LATCHLABEL]]
308define void @loop_test(i32* %tags, i32 %count) {
309entry:
310 br label %for.check
311for.check:
312 %count.loop = phi i32 [%count, %entry], [%count.sub, %for.latch]
313 %done.count = icmp ugt i32 %count.loop, 0
314 %tag_ptr = getelementptr inbounds i32, i32* %tags, i32 %count
315 %tag = load i32, i32* %tag_ptr
316 %done.tag = icmp eq i32 %tag, 0
317 %done = and i1 %done.count, %done.tag
318 br i1 %done, label %test1, label %exit, !prof !1
319test1:
320 %tagbit1 = and i32 %tag, 1
321 %tagbit1eq0 = icmp eq i32 %tagbit1, 0
322 br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1
323optional1:
324 call void @a()
325 call void @a()
326 call void @a()
327 call void @a()
328 br label %test2
329test2:
330 %tagbit2 = and i32 %tag, 2
331 %tagbit2eq0 = icmp eq i32 %tagbit2, 0
332 br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1
333optional2:
334 call void @b()
335 call void @b()
336 call void @b()
337 call void @b()
338 br label %test3
339test3:
340 %tagbit3 = and i32 %tag, 4
341 %tagbit3eq0 = icmp eq i32 %tagbit3, 0
342 br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1
343optional3:
344 call void @c()
345 call void @c()
346 call void @c()
347 call void @c()
348 br label %test4
349test4:
350 %tagbit4 = and i32 %tag, 8
351 %tagbit4eq0 = icmp eq i32 %tagbit4, 0
352 br i1 %tagbit4eq0, label %for.latch, label %optional4, !prof !1
353optional4:
354 call void @d()
355 call void @d()
356 call void @d()
357 call void @d()
358 br label %for.latch
359for.latch:
360 %count.sub = sub i32 %count.loop, 1
361 br label %for.check
362exit:
363 ret void
364}
365
366; The block then2 is not unavoidable, meaning it does not dominate the exit.
367; But since it can be tail-duplicated, it should be placed as a fallthrough from
368; test2 and copied. The purpose here is to make sure that the tail-duplication
369; code is independent of the outlining code, which works by choosing the
370; "unavoidable" blocks.
371; CHECK-LABEL: avoidable_test:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000372; CHECK: # %bb.{{[0-9]+}}: # %entry
Kyle Butt7fbec9b2017-02-15 19:49:14 +0000373; CHECK: andi.
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000374; CHECK: # %bb.{{[0-9]+}}: # %test2
Kyle Butt7fbec9b2017-02-15 19:49:14 +0000375; Make sure then2 falls through from test2
376; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}}
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000377; CHECK: # %bb.{{[0-9]+}}: # %then2
Kyle Butt7fbec9b2017-02-15 19:49:14 +0000378; CHECK: rlwinm. {{[0-9]+}}, {{[0-9]+}}, 0, 29, 29
379; CHECK: # %else1
380; CHECK: bl a
381; CHECK: bl a
382; Make sure then2 was copied into else1
383; CHECK: rlwinm. {{[0-9]+}}, {{[0-9]+}}, 0, 29, 29
384; CHECK: # %end1
385; CHECK: bl d
386; CHECK: # %else2
387; CHECK: bl c
388; CHECK: # %end2
389define void @avoidable_test(i32 %tag) {
390entry:
391 br label %test1
392test1:
393 %tagbit1 = and i32 %tag, 1
394 %tagbit1eq0 = icmp eq i32 %tagbit1, 0
395 br i1 %tagbit1eq0, label %test2, label %else1, !prof !1 ; %test2 more likely
396else1:
397 call void @a()
398 call void @a()
399 br label %then2
400test2:
401 %tagbit2 = and i32 %tag, 2
402 %tagbit2eq0 = icmp eq i32 %tagbit2, 0
403 br i1 %tagbit2eq0, label %then2, label %else2, !prof !1 ; %then2 more likely
404then2:
405 %tagbit3 = and i32 %tag, 4
406 %tagbit3eq0 = icmp eq i32 %tagbit3, 0
407 br i1 %tagbit3eq0, label %end2, label %end1, !prof !1 ; %end2 more likely
408else2:
409 call void @c()
410 br label %end2
411end2:
412 ret void
413end1:
414 call void @d()
415 ret void
416}
417
418; CHECK-LABEL: trellis_test
419; The number in the block labels is the expected block frequency given the
420; probabilities annotated. There is a conflict in the b;c->d;e trellis that
421; should be resolved as c->e;b->d.
422; The d;e->f;g trellis should be resolved as e->g;d->f.
423; The f;g->h;i trellis should be resolved as f->i;g->h.
424; The h;i->j;ret trellis contains a triangle edge, and should be resolved as
425; h->j->ret
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000426; CHECK: # %bb.{{[0-9]+}}: # %entry
427; CHECK: # %bb.{{[0-9]+}}: # %c10
Kyle Butt7fbec9b2017-02-15 19:49:14 +0000428; CHECK: # %e9
429; CHECK: # %g10
430; CHECK: # %h10
431; CHECK: # %j8
432; CHECK: # %ret
433; CHECK: # %b6
434; CHECK: # %d7
435; CHECK: # %f6
436; CHECK: # %i6
437define void @trellis_test(i32 %tag) {
438entry:
439 br label %a16
440a16:
441 call void @a()
442 call void @a()
443 %tagbits.a = and i32 %tag, 3
444 %tagbits.a.eq0 = icmp eq i32 %tagbits.a, 0
445 br i1 %tagbits.a.eq0, label %c10, label %b6, !prof !1 ; 10 to 6
446c10:
447 call void @c()
448 call void @c()
449 %tagbits.c = and i32 %tag, 12
450 %tagbits.c.eq0 = icmp eq i32 %tagbits.c, 0
451 ; Both of these edges should be hotter than the other incoming edge
452 ; for e9 or d7
453 br i1 %tagbits.c.eq0, label %e9, label %d7, !prof !3 ; 6 to 4
454e9:
455 call void @e()
456 call void @e()
457 %tagbits.e = and i32 %tag, 48
458 %tagbits.e.eq0 = icmp eq i32 %tagbits.e, 0
459 br i1 %tagbits.e.eq0, label %g10, label %f6, !prof !4 ; 7 to 2
460g10:
461 call void @g()
462 call void @g()
463 %tagbits.g = and i32 %tag, 192
464 %tagbits.g.eq0 = icmp eq i32 %tagbits.g, 0
465 br i1 %tagbits.g.eq0, label %i6, label %h10, !prof !5 ; 2 to 8
466i6:
467 call void @i()
468 call void @i()
469 %tagbits.i = and i32 %tag, 768
470 %tagbits.i.eq0 = icmp eq i32 %tagbits.i, 0
471 br i1 %tagbits.i.eq0, label %ret, label %j8, !prof !2 ; balanced (3 to 3)
472b6:
473 call void @b()
474 call void @b()
475 %tagbits.b = and i32 %tag, 12
476 %tagbits.b.eq1 = icmp eq i32 %tagbits.b, 8
477 br i1 %tagbits.b.eq1, label %e9, label %d7, !prof !2 ; balanced (3 to 3)
478d7:
479 call void @d()
480 call void @d()
481 %tagbits.d = and i32 %tag, 48
482 %tagbits.d.eq1 = icmp eq i32 %tagbits.d, 32
483 br i1 %tagbits.d.eq1, label %g10, label %f6, !prof !6 ; 3 to 4
484f6:
485 call void @f()
486 call void @f()
487 %tagbits.f = and i32 %tag, 192
488 %tagbits.f.eq1 = icmp eq i32 %tagbits.f, 128
489 br i1 %tagbits.f.eq1, label %i6, label %h10, !prof !7 ; 4 to 2
490h10:
491 call void @h()
492 call void @h()
493 %tagbits.h = and i32 %tag, 768
494 %tagbits.h.eq1 = icmp eq i32 %tagbits.h, 512
495 br i1 %tagbits.h.eq1, label %ret, label %j8, !prof !2 ; balanced (5 to 5)
496j8:
497 call void @j()
498 call void @j()
499 br label %ret
500ret:
501 ret void
502}
503
504; Verify that we still consider tail-duplication opportunities if we find a
505; triangle trellis. Here D->F->G is the triangle, and D;E are both predecessors
506; of both F and G. The basic trellis algorithm picks the F->G edge, but after
507; checking, it's profitable to duplicate G into F. The weights here are not
508; really important. They are there to help make the test stable.
509; CHECK-LABEL: trellis_then_dup_test
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000510; CHECK: # %bb.{{[0-9]+}}: # %entry
511; CHECK: # %bb.{{[0-9]+}}: # %b
Kyle Butt7fbec9b2017-02-15 19:49:14 +0000512; CHECK: # %d
513; CHECK: # %g
514; CHECK: # %ret1
515; CHECK: # %c
516; CHECK: # %e
517; CHECK: # %f
518; CHECK: # %ret2
519; CHECK: # %ret
520define void @trellis_then_dup_test(i32 %tag) {
521entry:
522 br label %a
523a:
524 call void @a()
525 call void @a()
526 %tagbits.a = and i32 %tag, 3
527 %tagbits.a.eq0 = icmp eq i32 %tagbits.a, 0
528 br i1 %tagbits.a.eq0, label %b, label %c, !prof !1 ; 5 to 3
529b:
530 call void @b()
531 call void @b()
532 %tagbits.b = and i32 %tag, 12
533 %tagbits.b.eq1 = icmp eq i32 %tagbits.b, 8
534 br i1 %tagbits.b.eq1, label %d, label %e, !prof !1 ; 5 to 3
535d:
536 call void @d()
537 call void @d()
538 %tagbits.d = and i32 %tag, 48
539 %tagbits.d.eq1 = icmp eq i32 %tagbits.d, 32
540 br i1 %tagbits.d.eq1, label %g, label %f, !prof !1 ; 5 to 3
541f:
542 call void @f()
543 call void @f()
544 br label %g
545g:
546 %tagbits.g = and i32 %tag, 192
547 %tagbits.g.eq0 = icmp eq i32 %tagbits.g, 0
548 br i1 %tagbits.g.eq0, label %ret1, label %ret2, !prof !2 ; balanced
549c:
550 call void @c()
551 call void @c()
552 %tagbits.c = and i32 %tag, 12
553 %tagbits.c.eq0 = icmp eq i32 %tagbits.c, 0
554 br i1 %tagbits.c.eq0, label %d, label %e, !prof !1 ; 5 to 3
555e:
556 call void @e()
557 call void @e()
558 %tagbits.e = and i32 %tag, 48
559 %tagbits.e.eq0 = icmp eq i32 %tagbits.e, 0
560 br i1 %tagbits.e.eq0, label %g, label %f, !prof !1 ; 5 to 3
561ret1:
562 call void @a()
563 br label %ret
564ret2:
565 call void @b()
566 br label %ret
567ret:
568 ret void
569}
570
Dehao Chenb197d5b2017-03-23 23:28:09 +0000571; Verify that we did not mis-identify triangle trellises if it is not
572; really a triangle.
573; CHECK-LABEL: trellis_no_triangle
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000574; CHECK: # %bb.{{[0-9]+}}: # %entry
575; CHECK: # %bb.{{[0-9]+}}: # %b
Dehao Chenb197d5b2017-03-23 23:28:09 +0000576; CHECK: # %d
577; CHECK: # %ret
578; CHECK: # %c
579; CHECK: # %e
580define void @trellis_no_triangle(i32 %tag) {
581entry:
582 br label %a
583a:
584 call void @a()
585 call void @a()
586 %tagbits.a = and i32 %tag, 3
587 %tagbits.a.eq0 = icmp eq i32 %tagbits.a, 0
588 br i1 %tagbits.a.eq0, label %b, label %c, !prof !8 ; 98 to 2
589b:
590 call void @b()
591 call void @b()
592 %tagbits.b = and i32 %tag, 12
593 %tagbits.b.eq1 = icmp eq i32 %tagbits.b, 8
594 br i1 %tagbits.b.eq1, label %d, label %e, !prof !9 ; 97 to 1
595d:
596 call void @d()
597 call void @d()
598 %tagbits.d = and i32 %tag, 48
599 %tagbits.d.eq1 = icmp eq i32 %tagbits.d, 32
600 br i1 %tagbits.d.eq1, label %ret, label %e, !prof !10 ; 96 to 2
601c:
602 call void @c()
603 call void @c()
604 %tagbits.c = and i32 %tag, 12
605 %tagbits.c.eq0 = icmp eq i32 %tagbits.c, 0
606 br i1 %tagbits.c.eq0, label %d, label %e, !prof !2 ; 1 to 1
607e:
608 call void @e()
609 call void @e()
610 br label %ret
611ret:
612 call void @f()
613 ret void
614}
615
Kyle Butt0846e562016-10-11 20:36:43 +0000616declare void @a()
617declare void @b()
618declare void @c()
619declare void @d()
Kyle Butt7fbec9b2017-02-15 19:49:14 +0000620declare void @e()
621declare void @f()
622declare void @g()
623declare void @h()
624declare void @i()
625declare void @j()
626
627!1 = !{!"branch_weights", i32 5, i32 3}
628!2 = !{!"branch_weights", i32 50, i32 50}
629!3 = !{!"branch_weights", i32 6, i32 4}
630!4 = !{!"branch_weights", i32 7, i32 2}
631!5 = !{!"branch_weights", i32 2, i32 8}
632!6 = !{!"branch_weights", i32 3, i32 4}
633!7 = !{!"branch_weights", i32 4, i32 2}
Dehao Chenb197d5b2017-03-23 23:28:09 +0000634!8 = !{!"branch_weights", i32 98, i32 2}
635!9 = !{!"branch_weights", i32 97, i32 1}
636!10 = !{!"branch_weights", i32 96, i32 2}