Fedor Sergeev | 2e3e224 | 2018-11-16 21:16:43 +0000 | [diff] [blame] | 1 | ; |
| 2 | ; Here all the branches we unswitch are exiting from the inner loop. |
| 3 | ; That means we should not be getting exponential behavior on inner-loop |
| 4 | ; unswitch. In fact there should be just a single version of inner-loop, |
| 5 | ; with possibly some outer loop copies. |
| 6 | ; |
| 7 | ; There should be just a single copy of each loop when strictest mutiplier |
| 8 | ; candidates formula (unscaled candidates == 0) is enforced: |
| 9 | |
| 10 | ; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \ |
| 11 | ; RUN: -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=1 \ |
| 12 | ; RUN: -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1 |
| 13 | ; |
| 14 | ; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \ |
| 15 | ; RUN: -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=16 \ |
| 16 | ; RUN: -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1 |
| 17 | ; |
| 18 | ; |
| 19 | ; When we relax the candidates part of a multiplier formula |
| 20 | ; (unscaled candidates == 2) we start getting some unswitches in outer loops, |
| 21 | ; which leads to siblings multiplier kicking in. |
| 22 | ; |
| 23 | ; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \ |
| 24 | ; RUN: -unswitch-num-initial-unscaled-candidates=3 -unswitch-siblings-toplevel-div=1 \ |
| 25 | ; RUN: -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | \ |
Michal Gorny | ff13c24 | 2018-12-02 16:49:33 +0000 | [diff] [blame] | 26 | ; RUN: sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-UNSCALE3-DIV1 |
Fedor Sergeev | 2e3e224 | 2018-11-16 21:16:43 +0000 | [diff] [blame] | 27 | ; |
| 28 | ; NB: sort -b is essential here and below, otherwise blanks might lead to different |
| 29 | ; order depending on locale. |
| 30 | ; |
| 31 | ; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \ |
| 32 | ; RUN: -unswitch-num-initial-unscaled-candidates=3 -unswitch-siblings-toplevel-div=2 \ |
| 33 | ; RUN: -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | \ |
Michal Gorny | ff13c24 | 2018-12-02 16:49:33 +0000 | [diff] [blame] | 34 | ; RUN: sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-UNSCALE3-DIV2 |
Fedor Sergeev | 2e3e224 | 2018-11-16 21:16:43 +0000 | [diff] [blame] | 35 | ; |
| 36 | ; With disabled cost-multiplier we get maximal possible amount of unswitches. |
| 37 | ; |
| 38 | ; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=false \ |
| 39 | ; RUN: -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | \ |
Michal Gorny | ff13c24 | 2018-12-02 16:49:33 +0000 | [diff] [blame] | 40 | ; RUN: sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-MAX |
Fedor Sergeev | 2e3e224 | 2018-11-16 21:16:43 +0000 | [diff] [blame] | 41 | ; |
| 42 | ; Single loop nest, not unswitched |
| 43 | ; LOOP1: Loop at depth 1 containing: |
| 44 | ; LOOP1-NOT: Loop at depth 1 containing: |
| 45 | ; LOOP1: Loop at depth 2 containing: |
| 46 | ; LOOP1-NOT: Loop at depth 2 containing: |
| 47 | ; LOOP1: Loop at depth 3 containing: |
| 48 | ; LOOP1-NOT: Loop at depth 3 containing: |
| 49 | ; |
| 50 | ; Half unswitched loop nests, with unscaled3 and div1 it gets less depth1 loops unswitched |
| 51 | ; since they have more cost. |
| 52 | ; LOOP-UNSCALE3-DIV1-COUNT-4: Loop at depth 1 containing: |
| 53 | ; LOOP-UNSCALE3-DIV1-NOT: Loop at depth 1 containing: |
| 54 | ; LOOP-UNSCALE3-DIV1-COUNT-1: Loop at depth 2 containing: |
| 55 | ; LOOP-UNSCALE3-DIV1-NOT: Loop at depth 2 containing: |
| 56 | ; LOOP-UNSCALE3-DIV1-COUNT-1: Loop at depth 3 containing: |
| 57 | ; LOOP-UNSCALE3-DIV1-NOT: Loop at depth 3 containing: |
| 58 | ; |
| 59 | ; Half unswitched loop nests, with unscaled3 and div2 it gets more depth1 loops unswitched |
| 60 | ; as div2 kicks in. |
| 61 | ; LOOP-UNSCALE3-DIV2-COUNT-6: Loop at depth 1 containing: |
| 62 | ; LOOP-UNSCALE3-DIV2-NOT: Loop at depth 1 containing: |
| 63 | ; LOOP-UNSCALE3-DIV2-COUNT-1: Loop at depth 2 containing: |
| 64 | ; LOOP-UNSCALE3-DIV2-NOT: Loop at depth 2 containing: |
| 65 | ; LOOP-UNSCALE3-DIV2-COUNT-1: Loop at depth 3 containing: |
| 66 | ; LOOP-UNSCALE3-DIV2-NOT: Loop at depth 3 containing: |
| 67 | ; |
| 68 | ; Maximally unswitched (copy of the outer loop per each condition) |
| 69 | ; LOOP-MAX-COUNT-6: Loop at depth 1 containing: |
| 70 | ; LOOP-MAX-NOT: Loop at depth 1 containing: |
| 71 | ; LOOP-MAX-COUNT-1: Loop at depth 2 containing: |
| 72 | ; LOOP-MAX-NOT: Loop at depth 2 containing: |
| 73 | ; LOOP-MAX-COUNT-1: Loop at depth 3 containing: |
| 74 | ; LOOP-MAX-NOT: Loop at depth 3 containing: |
| 75 | |
| 76 | declare void @bar() |
| 77 | |
| 78 | define void @loop_nested3_conds5(i32* %addr, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5) { |
| 79 | entry: |
| 80 | %addr1 = getelementptr i32, i32* %addr, i64 0 |
| 81 | %addr2 = getelementptr i32, i32* %addr, i64 1 |
| 82 | %addr3 = getelementptr i32, i32* %addr, i64 2 |
| 83 | br label %outer |
| 84 | outer: |
| 85 | %iv1 = phi i32 [0, %entry], [%iv1.next, %outer_latch] |
| 86 | %iv1.next = add i32 %iv1, 1 |
| 87 | ;; skip nontrivial unswitch |
| 88 | call void @bar() |
| 89 | br label %middle |
| 90 | middle: |
| 91 | %iv2 = phi i32 [0, %outer], [%iv2.next, %middle_latch] |
| 92 | %iv2.next = add i32 %iv2, 1 |
| 93 | ;; skip nontrivial unswitch |
| 94 | call void @bar() |
| 95 | br label %loop |
| 96 | loop: |
| 97 | %iv3 = phi i32 [0, %middle], [%iv3.next, %loop_latch] |
| 98 | %iv3.next = add i32 %iv3, 1 |
| 99 | ;; skip nontrivial unswitch |
| 100 | call void @bar() |
| 101 | br i1 %c1, label %loop_next1_left, label %outer_latch |
| 102 | loop_next1_left: |
| 103 | br label %loop_next1 |
| 104 | loop_next1_right: |
| 105 | br label %loop_next1 |
| 106 | |
| 107 | loop_next1: |
| 108 | br i1 %c2, label %loop_next2_left, label %outer_latch |
| 109 | loop_next2_left: |
| 110 | br label %loop_next2 |
| 111 | loop_next2_right: |
| 112 | br label %loop_next2 |
| 113 | |
| 114 | loop_next2: |
| 115 | br i1 %c3, label %loop_next3_left, label %outer_latch |
| 116 | loop_next3_left: |
| 117 | br label %loop_next3 |
| 118 | loop_next3_right: |
| 119 | br label %loop_next3 |
| 120 | |
| 121 | loop_next3: |
| 122 | br i1 %c4, label %loop_next4_left, label %outer_latch |
| 123 | loop_next4_left: |
| 124 | br label %loop_next4 |
| 125 | loop_next4_right: |
| 126 | br label %loop_next4 |
| 127 | |
| 128 | loop_next4: |
| 129 | br i1 %c5, label %loop_latch_left, label %outer_latch |
| 130 | loop_latch_left: |
| 131 | br label %loop_latch |
| 132 | loop_latch_right: |
| 133 | br label %loop_latch |
| 134 | |
| 135 | loop_latch: |
| 136 | store volatile i32 0, i32* %addr1 |
| 137 | %test_loop = icmp slt i32 %iv3, 50 |
| 138 | br i1 %test_loop, label %loop, label %middle_latch |
| 139 | middle_latch: |
| 140 | store volatile i32 0, i32* %addr2 |
| 141 | %test_middle = icmp slt i32 %iv2, 50 |
| 142 | br i1 %test_middle, label %middle, label %outer_latch |
| 143 | outer_latch: |
| 144 | store volatile i32 0, i32* %addr3 |
| 145 | %test_outer = icmp slt i32 %iv1, 50 |
| 146 | br i1 %test_outer, label %outer, label %exit |
| 147 | exit: |
| 148 | ret void |
| 149 | } |