| Duncan P. N. Exon Smith | 10be9a8 | 2014-04-21 17:57:07 +0000 | [diff] [blame] | 1 | ; RUN: opt < %s -analyze -block-freq | FileCheck %s |
| 2 | |
| 3 | ; A loop with multiple exits should be handled correctly. |
| 4 | ; |
| 5 | ; CHECK-LABEL: Printing analysis {{.*}} for function 'multiexit': |
| 6 | ; CHECK-NEXT: block-frequency-info: multiexit |
| 7 | define void @multiexit(i32 %a) { |
| 8 | ; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]] |
| 9 | entry: |
| 10 | br label %loop.1 |
| 11 | |
| 12 | ; CHECK-NEXT: loop.1: float = 1.333{{3*}}, |
| 13 | loop.1: |
| 14 | %i = phi i32 [ 0, %entry ], [ %inc.2, %loop.2 ] |
| 15 | call void @f(i32 %i) |
| 16 | %inc.1 = add i32 %i, 1 |
| 17 | %cmp.1 = icmp ugt i32 %inc.1, %a |
| 18 | br i1 %cmp.1, label %exit.1, label %loop.2, !prof !0 |
| 19 | |
| 20 | ; CHECK-NEXT: loop.2: float = 0.666{{6*7}}, |
| 21 | loop.2: |
| 22 | call void @g(i32 %inc.1) |
| 23 | %inc.2 = add i32 %inc.1, 1 |
| 24 | %cmp.2 = icmp ugt i32 %inc.2, %a |
| 25 | br i1 %cmp.2, label %exit.2, label %loop.1, !prof !1 |
| 26 | |
| 27 | ; CHECK-NEXT: exit.1: float = 0.666{{6*7}}, |
| 28 | exit.1: |
| 29 | call void @h(i32 %inc.1) |
| 30 | br label %return |
| 31 | |
| 32 | ; CHECK-NEXT: exit.2: float = 0.333{{3*}}, |
| 33 | exit.2: |
| 34 | call void @i(i32 %inc.2) |
| 35 | br label %return |
| 36 | |
| 37 | ; CHECK-NEXT: return: float = 1.0, int = [[ENTRY]] |
| 38 | return: |
| 39 | ret void |
| 40 | } |
| 41 | |
| 42 | declare void @f(i32 %x) |
| 43 | declare void @g(i32 %x) |
| 44 | declare void @h(i32 %x) |
| 45 | declare void @i(i32 %x) |
| 46 | |
| 47 | !0 = metadata !{metadata !"branch_weights", i32 3, i32 3} |
| 48 | !1 = metadata !{metadata !"branch_weights", i32 5, i32 5} |
| 49 | |
| 50 | ; The current BlockFrequencyInfo algorithm doesn't handle multiple entrances |
| 51 | ; into a loop very well. The frequencies assigned to blocks in the loop are |
| 52 | ; predictable (and not absurd), but also not correct and therefore not worth |
| 53 | ; testing. |
| 54 | ; |
| 55 | ; There are two testcases below. |
| 56 | ; |
| 57 | ; For each testcase, I use a CHECK-NEXT/NOT combo like an XFAIL with the |
| 58 | ; granularity of a single check. If/when this behaviour is fixed, we'll know |
| 59 | ; about it, and the test should be updated. |
| 60 | ; |
| 61 | ; Testcase #1 |
| 62 | ; =========== |
| 63 | ; |
| 64 | ; In this case c1 and c2 should have frequencies of 15/7 and 13/7, |
| 65 | ; respectively. To calculate this, consider assigning 1.0 to entry, and |
| 66 | ; distributing frequency iteratively (to infinity). At the first iteration, |
| 67 | ; entry gives 3/4 to c1 and 1/4 to c2. At every step after, c1 and c2 give 3/4 |
| 68 | ; of what they have to each other. Somehow, all of it comes out to exit. |
| 69 | ; |
| 70 | ; c1 = 3/4 + 1/4*3/4 + 3/4*3^2/4^2 + 1/4*3^3/4^3 + 3/4*3^3/4^3 + ... |
| 71 | ; c2 = 1/4 + 3/4*3/4 + 1/4*3^2/4^2 + 3/4*3^3/4^3 + 1/4*3^3/4^3 + ... |
| 72 | ; |
| 73 | ; Simplify by splitting up the odd and even terms of the series and taking out |
| 74 | ; factors so that the infite series matches: |
| 75 | ; |
| 76 | ; c1 = 3/4 *(9^0/16^0 + 9^1/16^1 + 9^2/16^2 + ...) |
| 77 | ; + 3/16*(9^0/16^0 + 9^1/16^1 + 9^2/16^2 + ...) |
| 78 | ; c2 = 1/4 *(9^0/16^0 + 9^1/16^1 + 9^2/16^2 + ...) |
| 79 | ; + 9/16*(9^0/16^0 + 9^1/16^1 + 9^2/16^2 + ...) |
| 80 | ; |
| 81 | ; c1 = 15/16*(9^0/16^0 + 9^1/16^1 + 9^2/16^2 + ...) |
| 82 | ; c2 = 13/16*(9^0/16^0 + 9^1/16^1 + 9^2/16^2 + ...) |
| 83 | ; |
| 84 | ; Since this geometric series sums to 16/7: |
| 85 | ; |
| 86 | ; c1 = 15/7 |
| 87 | ; c2 = 13/7 |
| 88 | ; |
| 89 | ; If we treat c1 and c2 as members of the same loop, the exit frequency of the |
| 90 | ; loop as a whole is 1/4, so the loop scale should be 4. Summing c1 and c2 |
| 91 | ; gives 28/7, or 4.0, which is nice confirmation of the math above. |
| 92 | ; |
| 93 | ; However, assuming c1 precedes c2 in reverse post-order, the current algorithm |
| 94 | ; returns 3/4 and 13/16, respectively. LoopInfo ignores edges between loops |
| 95 | ; (and doesn't see any loops here at all), and -block-freq ignores the |
| 96 | ; irreducible edge from c2 to c1. |
| 97 | ; |
| 98 | ; CHECK-LABEL: Printing analysis {{.*}} for function 'multientry': |
| 99 | ; CHECK-NEXT: block-frequency-info: multientry |
| 100 | define void @multientry(i32 %a) { |
| 101 | ; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]] |
| 102 | entry: |
| 103 | %choose = call i32 @choose(i32 %a) |
| 104 | %compare = icmp ugt i32 %choose, %a |
| 105 | br i1 %compare, label %c1, label %c2, !prof !2 |
| 106 | |
| 107 | ; This is like a single-line XFAIL (see above). |
| 108 | ; CHECK-NEXT: c1: |
| 109 | ; CHECK-NOT: float = 2.142857{{[0-9]*}}, |
| 110 | c1: |
| 111 | %i1 = phi i32 [ %a, %entry ], [ %i2.inc, %c2 ] |
| 112 | %i1.inc = add i32 %i1, 1 |
| 113 | %choose1 = call i32 @choose(i32 %i1) |
| 114 | %compare1 = icmp ugt i32 %choose1, %a |
| 115 | br i1 %compare1, label %c2, label %exit, !prof !2 |
| 116 | |
| 117 | ; This is like a single-line XFAIL (see above). |
| 118 | ; CHECK-NEXT: c2: |
| 119 | ; CHECK-NOT: float = 1.857142{{[0-9]*}}, |
| 120 | c2: |
| 121 | %i2 = phi i32 [ %a, %entry ], [ %i1.inc, %c1 ] |
| 122 | %i2.inc = add i32 %i2, 1 |
| 123 | %choose2 = call i32 @choose(i32 %i2) |
| 124 | %compare2 = icmp ugt i32 %choose2, %a |
| 125 | br i1 %compare2, label %c1, label %exit, !prof !2 |
| 126 | |
| 127 | ; We still shouldn't lose any frequency. |
| 128 | ; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]] |
| 129 | exit: |
| 130 | ret void |
| 131 | } |
| 132 | |
| 133 | ; Testcase #2 |
| 134 | ; =========== |
| 135 | ; |
| 136 | ; In this case c1 and c2 should be treated as equals in a single loop. The |
| 137 | ; exit frequency is 1/3, so the scaling factor for the loop should be 3.0. The |
| 138 | ; loop is entered 2/3 of the time, and c1 and c2 split the total loop frequency |
| 139 | ; evenly (1/2), so they should each have frequencies of 1.0 (3.0*2/3*1/2). |
| 140 | ; Another way of computing this result is by assigning 1.0 to entry and showing |
| 141 | ; that c1 and c2 should accumulate frequencies of: |
| 142 | ; |
| 143 | ; 1/3 + 2/9 + 4/27 + 8/81 + ... |
| 144 | ; 2^0/3^1 + 2^1/3^2 + 2^2/3^3 + 2^3/3^4 + ... |
| 145 | ; |
| 146 | ; At the first step, c1 and c2 each get 1/3 of the entry. At each subsequent |
| 147 | ; step, c1 and c2 each get 1/3 of what's left in c1 and c2 combined. This |
| 148 | ; infinite series sums to 1. |
| 149 | ; |
| 150 | ; However, assuming c1 precedes c2 in reverse post-order, the current algorithm |
| 151 | ; returns 1/2 and 3/4, respectively. LoopInfo ignores edges between loops (and |
| 152 | ; treats c1 and c2 as self-loops only), and -block-freq ignores the irreducible |
| 153 | ; edge from c2 to c1. |
| 154 | ; |
| 155 | ; Below I use a CHECK-NEXT/NOT combo like an XFAIL with the granularity of a |
| 156 | ; single check. If/when this behaviour is fixed, we'll know about it, and the |
| 157 | ; test should be updated. |
| 158 | ; |
| 159 | ; CHECK-LABEL: Printing analysis {{.*}} for function 'crossloops': |
| 160 | ; CHECK-NEXT: block-frequency-info: crossloops |
| 161 | define void @crossloops(i32 %a) { |
| 162 | ; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]] |
| 163 | entry: |
| 164 | %choose = call i32 @choose(i32 %a) |
| 165 | switch i32 %choose, label %exit [ i32 1, label %c1 |
| 166 | i32 2, label %c2 ], !prof !3 |
| 167 | |
| 168 | ; This is like a single-line XFAIL (see above). |
| 169 | ; CHECK-NEXT: c1: |
| 170 | ; CHECK-NOT: float = 1.0, |
| 171 | c1: |
| 172 | %i1 = phi i32 [ %a, %entry ], [ %i1.inc, %c1 ], [ %i2.inc, %c2 ] |
| 173 | %i1.inc = add i32 %i1, 1 |
| 174 | %choose1 = call i32 @choose(i32 %i1) |
| 175 | switch i32 %choose1, label %exit [ i32 1, label %c1 |
| 176 | i32 2, label %c2 ], !prof !3 |
| 177 | |
| 178 | ; This is like a single-line XFAIL (see above). |
| 179 | ; CHECK-NEXT: c2: |
| 180 | ; CHECK-NOT: float = 1.0, |
| 181 | c2: |
| 182 | %i2 = phi i32 [ %a, %entry ], [ %i1.inc, %c1 ], [ %i2.inc, %c2 ] |
| 183 | %i2.inc = add i32 %i2, 1 |
| 184 | %choose2 = call i32 @choose(i32 %i2) |
| 185 | switch i32 %choose2, label %exit [ i32 1, label %c1 |
| 186 | i32 2, label %c2 ], !prof !3 |
| 187 | |
| 188 | ; We still shouldn't lose any frequency. |
| 189 | ; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]] |
| 190 | exit: |
| 191 | ret void |
| 192 | } |
| 193 | |
| 194 | declare i32 @choose(i32) |
| 195 | |
| 196 | !2 = metadata !{metadata !"branch_weights", i32 3, i32 1} |
| 197 | !3 = metadata !{metadata !"branch_weights", i32 2, i32 2, i32 2} |