Easwaran Raman | 5a12f23 | 2017-02-14 22:49:28 +0000 | [diff] [blame] | 1 | ; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S -inline-threshold=50 -inline-cold-callsite-threshold=0 -hot-callsite-threshold=50 | FileCheck %s |
| 2 | ; This tests incremental updates to caller's BFI as a callee gets inlined. |
| 3 | ; In bottom-up inlining, first c->e inlining is considered and fails because |
| 4 | ; e's size exceeds the threshold of 50. Then a->c inlining is considered and it |
| 5 | ; succeeds. a's BFI is updated incrementally. As c's blocks get pruned, the |
| 6 | ; block with label cond_false is removed and since the remanining code is |
| 7 | ; straight-line a single block gets cloned into a. This block should get the |
| 8 | ; maximum block frequency among the original blocks in c. If it gets the |
| 9 | ; frequency of the block with label cond_true in @c, its frequency will be |
| 10 | ; 1/10th of function a's entry block frequency, resulting in a callsite count of |
| 11 | ; 2 (since a's entry count is 20) which means that a->e callsite will be |
| 12 | ; considered cold and not inlined. |
| 13 | |
| 14 | @data = external global i32 |
| 15 | ; CHECK-LABEL: define i32 @a( |
| 16 | define i32 @a(i32 %a1) !prof !21 { |
| 17 | ; CHECK-NOT: call i32 @c |
| 18 | ; CHECK-NOT: call i32 @e |
| 19 | ; CHECK: ret |
| 20 | entry: |
| 21 | %cond = icmp sle i32 %a1, 1 |
| 22 | %a2 = call i32 @c(i32 1) |
| 23 | br label %exit |
| 24 | exit: |
| 25 | ret i32 %a2 |
| 26 | } |
| 27 | |
| 28 | declare void @ext(); |
| 29 | |
| 30 | ; CHECK: @c(i32 %c1) !prof [[COUNT1:![0-9]+]] |
| 31 | define i32 @c(i32 %c1) !prof !23 { |
| 32 | call void @ext() |
| 33 | %cond = icmp sle i32 %c1, 1 |
| 34 | br i1 %cond, label %cond_true, label %cond_false, !prof !25 |
| 35 | |
| 36 | cond_false: |
| 37 | br label %exit |
| 38 | |
| 39 | cond_true: |
| 40 | %c11 = call i32 @e(i32 %c1) |
| 41 | br label %exit |
| 42 | exit: |
| 43 | %c12 = phi i32 [ 0, %cond_false], [ %c11, %cond_true ] |
| 44 | ret i32 %c12 |
| 45 | } |
| 46 | |
| 47 | |
| 48 | ; CHECK: @e(i32 %c1) !prof [[COUNT2:![0-9]+]] |
| 49 | define i32 @e(i32 %c1) !prof !24 { |
| 50 | call void @ext() |
| 51 | call void @ext() |
| 52 | %cond = icmp sle i32 %c1, 1 |
| 53 | br i1 %cond, label %cond_true, label %cond_false |
| 54 | |
| 55 | cond_false: |
| 56 | call void @ext() |
| 57 | %c2 = load i32, i32* @data, align 4 |
| 58 | %c3 = add i32 %c1, %c2 |
| 59 | %c4 = mul i32 %c3, %c2 |
| 60 | %c5 = add i32 %c4, %c2 |
| 61 | %c6 = mul i32 %c5, %c2 |
| 62 | %c7 = add i32 %c6, %c2 |
| 63 | %c8 = mul i32 %c7, %c2 |
| 64 | %c9 = add i32 %c8, %c2 |
| 65 | %c10 = mul i32 %c9, %c2 |
| 66 | ret i32 %c10 |
| 67 | |
| 68 | cond_true: |
| 69 | ret i32 0 |
| 70 | } |
| 71 | |
| 72 | ; CHECK: [[COUNT1]] = !{!"function_entry_count", i64 480} |
| 73 | ; CHECK: [[COUNT2]] = !{!"function_entry_count", i64 80} |
| 74 | !21 = !{!"function_entry_count", i64 20} |
| 75 | !23 = !{!"function_entry_count", i64 500} |
| 76 | !24 = !{!"function_entry_count", i64 100} |
| 77 | !25 = !{!"branch_weights", i32 1, i32 9} |
| 78 | |
| 79 | !llvm.module.flags = !{!1} |
| 80 | !1 = !{i32 1, !"ProfileSummary", !2} |
| 81 | !2 = !{!3, !4, !5, !6, !7, !8, !9, !10} |
| 82 | !3 = !{!"ProfileFormat", !"InstrProf"} |
| 83 | !4 = !{!"TotalCount", i64 10000} |
| 84 | !5 = !{!"MaxCount", i64 1000} |
| 85 | !6 = !{!"MaxInternalCount", i64 1} |
| 86 | !7 = !{!"MaxFunctionCount", i64 1000} |
| 87 | !8 = !{!"NumCounts", i64 3} |
| 88 | !9 = !{!"NumFunctions", i64 3} |
| 89 | !10 = !{!"DetailedSummary", !11} |
| 90 | !11 = !{!12, !13, !14} |
| 91 | !12 = !{i32 10000, i64 1000, i32 1} |
| 92 | !13 = !{i32 999000, i64 1000, i32 1} |
| 93 | !14 = !{i32 999999, i64 5, i32 2} |