Eric Christopher | cee313d | 2019-04-17 04:52:47 +0000 | [diff] [blame] | 1 | ; RUN: opt < %s -inline -inline-threshold=20 -S | FileCheck %s |
| 2 | ; RUN: opt < %s -passes='cgscc(inline)' -inline-threshold=20 -S | FileCheck %s |
| 3 | |
| 4 | define internal i32 @callee1(i32 %A, i32 %B) { |
| 5 | %C = sdiv i32 %A, %B |
| 6 | ret i32 %C |
| 7 | } |
| 8 | |
| 9 | define i32 @caller1() { |
| 10 | ; CHECK-LABEL: define i32 @caller1( |
| 11 | ; CHECK-NEXT: ret i32 3 |
| 12 | |
| 13 | %X = call i32 @callee1( i32 10, i32 3 ) |
| 14 | ret i32 %X |
| 15 | } |
| 16 | |
| 17 | define i32 @caller2() { |
| 18 | ; Check that we can constant-prop through instructions after inlining callee21 |
| 19 | ; to get constants in the inlined callsite to callee22. |
| 20 | ; FIXME: Currently, the threshold is fixed at 20 because we don't perform |
| 21 | ; *recursive* cost analysis to realize that the nested call site will definitely |
| 22 | ; inline and be cheap. We should eventually do that and lower the threshold here |
| 23 | ; to 1. |
| 24 | ; |
| 25 | ; CHECK-LABEL: @caller2( |
| 26 | ; CHECK-NOT: call void @callee2 |
| 27 | ; CHECK: ret |
| 28 | |
| 29 | %x = call i32 @callee21(i32 42, i32 48) |
| 30 | ret i32 %x |
| 31 | } |
| 32 | |
| 33 | define i32 @callee21(i32 %x, i32 %y) { |
| 34 | %sub = sub i32 %y, %x |
| 35 | %result = call i32 @callee22(i32 %sub) |
| 36 | ret i32 %result |
| 37 | } |
| 38 | |
| 39 | declare i8* @getptr() |
| 40 | |
| 41 | define i32 @callee22(i32 %x) { |
| 42 | %icmp = icmp ugt i32 %x, 42 |
| 43 | br i1 %icmp, label %bb.true, label %bb.false |
| 44 | bb.true: |
| 45 | ; This block musn't be counted in the inline cost. |
| 46 | %x1 = add i32 %x, 1 |
| 47 | %x2 = add i32 %x1, 1 |
| 48 | %x3 = add i32 %x2, 1 |
| 49 | %x4 = add i32 %x3, 1 |
| 50 | %x5 = add i32 %x4, 1 |
| 51 | %x6 = add i32 %x5, 1 |
| 52 | %x7 = add i32 %x6, 1 |
| 53 | %x8 = add i32 %x7, 1 |
| 54 | |
| 55 | ret i32 %x8 |
| 56 | bb.false: |
| 57 | ret i32 %x |
| 58 | } |
| 59 | |
| 60 | define i32 @caller3() { |
| 61 | ; Check that even if the expensive path is hidden behind several basic blocks, |
| 62 | ; it doesn't count toward the inline cost when constant-prop proves those paths |
| 63 | ; dead. |
| 64 | ; |
| 65 | ; CHECK-LABEL: @caller3( |
| 66 | ; CHECK-NOT: call |
| 67 | ; CHECK: ret i32 6 |
| 68 | |
| 69 | entry: |
| 70 | %x = call i32 @callee3(i32 42, i32 48) |
| 71 | ret i32 %x |
| 72 | } |
| 73 | |
| 74 | define i32 @callee3(i32 %x, i32 %y) { |
| 75 | %sub = sub i32 %y, %x |
| 76 | %icmp = icmp ugt i32 %sub, 42 |
| 77 | br i1 %icmp, label %bb.true, label %bb.false |
| 78 | |
| 79 | bb.true: |
| 80 | %icmp2 = icmp ult i32 %sub, 64 |
| 81 | br i1 %icmp2, label %bb.true.true, label %bb.true.false |
| 82 | |
| 83 | bb.true.true: |
| 84 | ; This block musn't be counted in the inline cost. |
| 85 | %x1 = add i32 %x, 1 |
| 86 | %x2 = add i32 %x1, 1 |
| 87 | %x3 = add i32 %x2, 1 |
| 88 | %x4 = add i32 %x3, 1 |
| 89 | %x5 = add i32 %x4, 1 |
| 90 | %x6 = add i32 %x5, 1 |
| 91 | %x7 = add i32 %x6, 1 |
| 92 | %x8 = add i32 %x7, 1 |
| 93 | br label %bb.merge |
| 94 | |
| 95 | bb.true.false: |
| 96 | ; This block musn't be counted in the inline cost. |
| 97 | %y1 = add i32 %y, 1 |
| 98 | %y2 = add i32 %y1, 1 |
| 99 | %y3 = add i32 %y2, 1 |
| 100 | %y4 = add i32 %y3, 1 |
| 101 | %y5 = add i32 %y4, 1 |
| 102 | %y6 = add i32 %y5, 1 |
| 103 | %y7 = add i32 %y6, 1 |
| 104 | %y8 = add i32 %y7, 1 |
| 105 | br label %bb.merge |
| 106 | |
| 107 | bb.merge: |
| 108 | %result = phi i32 [ %x8, %bb.true.true ], [ %y8, %bb.true.false ] |
| 109 | ret i32 %result |
| 110 | |
| 111 | bb.false: |
| 112 | ret i32 %sub |
| 113 | } |
| 114 | |
| 115 | declare {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a, i8 %b) |
| 116 | |
| 117 | define i8 @caller4(i8 %z) { |
| 118 | ; Check that we can constant fold through intrinsics such as the |
| 119 | ; overflow-detecting arithmetic instrinsics. These are particularly important |
| 120 | ; as they are used heavily in standard library code and generic C++ code where |
| 121 | ; the arguments are oftent constant but complete generality is required. |
| 122 | ; |
| 123 | ; CHECK-LABEL: @caller4( |
| 124 | ; CHECK-NOT: call |
| 125 | ; CHECK: ret i8 -1 |
| 126 | |
| 127 | entry: |
| 128 | %x = call i8 @callee4(i8 254, i8 14, i8 %z) |
| 129 | ret i8 %x |
| 130 | } |
| 131 | |
| 132 | define i8 @callee4(i8 %x, i8 %y, i8 %z) { |
| 133 | %uadd = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %x, i8 %y) |
| 134 | %o = extractvalue {i8, i1} %uadd, 1 |
| 135 | br i1 %o, label %bb.true, label %bb.false |
| 136 | |
| 137 | bb.true: |
| 138 | ret i8 -1 |
| 139 | |
| 140 | bb.false: |
| 141 | ; This block musn't be counted in the inline cost. |
| 142 | %z1 = add i8 %z, 1 |
| 143 | %z2 = add i8 %z1, 1 |
| 144 | %z3 = add i8 %z2, 1 |
| 145 | %z4 = add i8 %z3, 1 |
| 146 | %z5 = add i8 %z4, 1 |
| 147 | %z6 = add i8 %z5, 1 |
| 148 | %z7 = add i8 %z6, 1 |
| 149 | %z8 = add i8 %z7, 1 |
| 150 | ret i8 %z8 |
| 151 | } |
| 152 | |
| 153 | define i64 @caller5(i64 %y) { |
| 154 | ; Check that we can round trip constants through various kinds of casts etc w/o |
| 155 | ; losing track of the constant prop in the inline cost analysis. |
| 156 | ; |
| 157 | ; CHECK-LABEL: @caller5( |
| 158 | ; CHECK-NOT: call |
| 159 | ; CHECK: ret i64 -1 |
| 160 | |
| 161 | entry: |
| 162 | %x = call i64 @callee5(i64 42, i64 %y) |
| 163 | ret i64 %x |
| 164 | } |
| 165 | |
| 166 | define i64 @callee5(i64 %x, i64 %y) { |
| 167 | %inttoptr = inttoptr i64 %x to i8* |
| 168 | %bitcast = bitcast i8* %inttoptr to i32* |
| 169 | %ptrtoint = ptrtoint i32* %bitcast to i64 |
| 170 | %trunc = trunc i64 %ptrtoint to i32 |
| 171 | %zext = zext i32 %trunc to i64 |
| 172 | %cmp = icmp eq i64 %zext, 42 |
| 173 | br i1 %cmp, label %bb.true, label %bb.false |
| 174 | |
| 175 | bb.true: |
| 176 | ret i64 -1 |
| 177 | |
| 178 | bb.false: |
| 179 | ; This block musn't be counted in the inline cost. |
| 180 | %y1 = add i64 %y, 1 |
| 181 | %y2 = add i64 %y1, 1 |
| 182 | %y3 = add i64 %y2, 1 |
| 183 | %y4 = add i64 %y3, 1 |
| 184 | %y5 = add i64 %y4, 1 |
| 185 | %y6 = add i64 %y5, 1 |
| 186 | %y7 = add i64 %y6, 1 |
| 187 | %y8 = add i64 %y7, 1 |
| 188 | ret i64 %y8 |
| 189 | } |
| 190 | |
| 191 | define float @caller6() { |
| 192 | ; Check that we can constant-prop through fcmp instructions |
| 193 | ; |
| 194 | ; CHECK-LABEL: @caller6( |
| 195 | ; CHECK-NOT: call |
| 196 | ; CHECK: ret |
| 197 | %x = call float @callee6(float 42.0) |
| 198 | ret float %x |
| 199 | } |
| 200 | |
| 201 | define float @callee6(float %x) { |
| 202 | %icmp = fcmp ugt float %x, 42.0 |
| 203 | br i1 %icmp, label %bb.true, label %bb.false |
| 204 | |
| 205 | bb.true: |
| 206 | ; This block musn't be counted in the inline cost. |
| 207 | %x1 = fadd float %x, 1.0 |
| 208 | %x2 = fadd float %x1, 1.0 |
| 209 | %x3 = fadd float %x2, 1.0 |
| 210 | %x4 = fadd float %x3, 1.0 |
| 211 | %x5 = fadd float %x4, 1.0 |
| 212 | %x6 = fadd float %x5, 1.0 |
| 213 | %x7 = fadd float %x6, 1.0 |
| 214 | %x8 = fadd float %x7, 1.0 |
| 215 | ret float %x8 |
| 216 | |
| 217 | bb.false: |
| 218 | ret float %x |
| 219 | } |
| 220 | |
| 221 | |
| 222 | |
| 223 | define i32 @PR13412.main() { |
| 224 | ; This is a somewhat complicated three layer subprogram that was reported to |
| 225 | ; compute the wrong value for a branch due to assuming that an argument |
| 226 | ; mid-inline couldn't be equal to another pointer. |
| 227 | ; |
| 228 | ; After inlining, the branch should point directly to the exit block, not to |
| 229 | ; the intermediate block. |
| 230 | ; CHECK: @PR13412.main |
| 231 | ; CHECK: br i1 true, label %[[TRUE_DEST:.*]], label %[[FALSE_DEST:.*]] |
| 232 | ; CHECK: [[FALSE_DEST]]: |
| 233 | ; CHECK-NEXT: call void @PR13412.fail() |
| 234 | ; CHECK: [[TRUE_DEST]]: |
| 235 | ; CHECK-NEXT: ret i32 0 |
| 236 | |
| 237 | entry: |
| 238 | %i1 = alloca i64 |
| 239 | store i64 0, i64* %i1 |
| 240 | %arraydecay = bitcast i64* %i1 to i32* |
| 241 | %call = call i1 @PR13412.first(i32* %arraydecay, i32* %arraydecay) |
| 242 | br i1 %call, label %cond.end, label %cond.false |
| 243 | |
| 244 | cond.false: |
| 245 | call void @PR13412.fail() |
| 246 | br label %cond.end |
| 247 | |
| 248 | cond.end: |
| 249 | ret i32 0 |
| 250 | } |
| 251 | |
| 252 | define internal i1 @PR13412.first(i32* %a, i32* %b) { |
| 253 | entry: |
| 254 | %call = call i32* @PR13412.second(i32* %a, i32* %b) |
| 255 | %cmp = icmp eq i32* %call, %b |
| 256 | ret i1 %cmp |
| 257 | } |
| 258 | |
| 259 | declare void @PR13412.fail() |
| 260 | |
| 261 | define internal i32* @PR13412.second(i32* %a, i32* %b) { |
| 262 | entry: |
| 263 | %sub.ptr.lhs.cast = ptrtoint i32* %b to i64 |
| 264 | %sub.ptr.rhs.cast = ptrtoint i32* %a to i64 |
| 265 | %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast |
| 266 | %sub.ptr.div = ashr exact i64 %sub.ptr.sub, 2 |
| 267 | %cmp = icmp ugt i64 %sub.ptr.div, 1 |
| 268 | br i1 %cmp, label %if.then, label %if.end3 |
| 269 | |
| 270 | if.then: |
| 271 | %0 = load i32, i32* %a |
| 272 | %1 = load i32, i32* %b |
| 273 | %cmp1 = icmp eq i32 %0, %1 |
| 274 | br i1 %cmp1, label %return, label %if.end3 |
| 275 | |
| 276 | if.end3: |
| 277 | br label %return |
| 278 | |
| 279 | return: |
| 280 | %retval.0 = phi i32* [ %b, %if.end3 ], [ %a, %if.then ] |
| 281 | ret i32* %retval.0 |
| 282 | } |
| 283 | |
| 284 | declare i32 @PR28802.external(i32 returned %p1) |
| 285 | |
| 286 | define internal i32 @PR28802.callee() { |
| 287 | entry: |
| 288 | br label %cont |
| 289 | |
| 290 | cont: |
| 291 | %0 = phi i32 [ 0, %entry ] |
| 292 | %call = call i32 @PR28802.external(i32 %0) |
| 293 | ret i32 %call |
| 294 | } |
| 295 | |
| 296 | define i32 @PR28802() { |
| 297 | entry: |
| 298 | %call = call i32 @PR28802.callee() |
| 299 | ret i32 %call |
| 300 | } |
| 301 | |
| 302 | ; CHECK-LABEL: define i32 @PR28802( |
| 303 | ; CHECK: %[[call:.*]] = call i32 @PR28802.external(i32 0) |
| 304 | ; CHECK: ret i32 %[[call]] |
| 305 | |
| 306 | define internal i32 @PR28848.callee(i32 %p2, i1 %c) { |
| 307 | entry: |
| 308 | br i1 %c, label %cond.end, label %cond.true |
| 309 | |
| 310 | cond.true: |
| 311 | br label %cond.end |
| 312 | |
| 313 | cond.end: |
| 314 | %cond = phi i32 [ 0, %cond.true ], [ %p2, %entry ] |
| 315 | %or = or i32 %cond, %p2 |
| 316 | ret i32 %or |
| 317 | } |
| 318 | |
| 319 | define i32 @PR28848() { |
| 320 | entry: |
| 321 | %call = call i32 @PR28848.callee(i32 0, i1 false) |
| 322 | ret i32 %call |
| 323 | } |
| 324 | ; CHECK-LABEL: define i32 @PR28848( |
| 325 | ; CHECK: ret i32 0 |
| 326 | |
| 327 | define internal void @callee7(i16 %param1, i16 %param2) { |
| 328 | entry: |
| 329 | br label %bb |
| 330 | |
| 331 | bb: |
| 332 | %phi = phi i16 [ %param2, %entry ] |
| 333 | %add = add i16 %phi, %param1 |
| 334 | ret void |
| 335 | } |
| 336 | |
| 337 | declare i16 @caller7.external(i16 returned) |
| 338 | |
| 339 | define void @caller7() { |
| 340 | bb1: |
| 341 | %call = call i16 @caller7.external(i16 1) |
| 342 | call void @callee7(i16 0, i16 %call) |
| 343 | ret void |
| 344 | } |
| 345 | ; CHECK-LABEL: define void @caller7( |
| 346 | ; CHECK: %call = call i16 @caller7.external(i16 1) |
| 347 | ; CHECK-NEXT: ret void |
Craig Topper | 6cda33ba | 2019-06-06 19:02:18 +0000 | [diff] [blame] | 348 | |
| 349 | define float @caller8(float %y) { |
| 350 | ; Check that we can constant-prop through fneg instructions |
| 351 | ; |
| 352 | ; CHECK-LABEL: @caller8( |
| 353 | ; CHECK-NOT: call |
| 354 | ; CHECK: ret |
| 355 | %x = call float @callee8(float -42.0, float %y) |
| 356 | ret float %x |
| 357 | } |
| 358 | |
| 359 | define float @callee8(float %x, float %y) { |
| 360 | %neg = fneg float %x |
| 361 | %icmp = fcmp ugt float %neg, 42.0 |
| 362 | br i1 %icmp, label %bb.true, label %bb.false |
| 363 | |
| 364 | bb.true: |
| 365 | ; This block musn't be counted in the inline cost. |
| 366 | %y1 = fadd float %y, 1.0 |
| 367 | %y2 = fadd float %y1, 1.0 |
| 368 | %y3 = fadd float %y2, 1.0 |
| 369 | %y4 = fadd float %y3, 1.0 |
| 370 | %y5 = fadd float %y4, 1.0 |
| 371 | %y6 = fadd float %y5, 1.0 |
| 372 | %y7 = fadd float %y6, 1.0 |
| 373 | %y8 = fadd float %y7, 1.0 |
| 374 | ret float %y8 |
| 375 | |
| 376 | bb.false: |
| 377 | ret float %x |
| 378 | } |