Silviu Baranga | 6f444df | 2016-04-08 14:29:09 +0000 | [diff] [blame] | 1 | ; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s |
| 2 | |
| 3 | target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" |
| 4 | |
| 5 | @A = weak global [1000 x i32] zeroinitializer, align 32 |
| 6 | |
| 7 | ; The resulting predicate is i16 {0,+,1} <nssw>, meanining |
| 8 | ; that the resulting backedge expression will be valid for: |
| 9 | ; (1 + (-1 smax %M)) <= MAX_INT16 |
| 10 | ; |
| 11 | ; At the limit condition for M (MAX_INT16 - 1) we have in the |
| 12 | ; last iteration: |
| 13 | ; i0 <- MAX_INT16 |
| 14 | ; i0.ext <- MAX_INT16 |
| 15 | ; |
| 16 | ; and therefore no wrapping happend for i0 or i0.ext |
| 17 | ; throughout the execution of the loop. The resulting predicated |
| 18 | ; backedge taken count is correct. |
| 19 | |
| 20 | ; CHECK: Classifying expressions for: @test1 |
| 21 | ; CHECK: %i.0.ext = sext i16 %i.0 to i32 |
| 22 | ; CHECK-NEXT: --> (sext i16 {0,+,1}<%bb3> to i32) |
| 23 | ; CHECK: Loop %bb3: Unpredictable backedge-taken count. |
| 24 | ; CHECK-NEXT: Loop %bb3: Unpredictable max backedge-taken count. |
| 25 | ; CHECK-NEXT: Loop %bb3: Predicated backedge-taken count is (1 + (-1 smax %M)) |
| 26 | ; CHECK-NEXT: Predicates: |
| 27 | ; CHECK-NEXT: {0,+,1}<%bb3> Added Flags: <nssw> |
| 28 | define void @test1(i32 %N, i32 %M) { |
| 29 | entry: |
| 30 | br label %bb3 |
| 31 | |
| 32 | bb: ; preds = %bb3 |
| 33 | %tmp = getelementptr [1000 x i32], [1000 x i32]* @A, i32 0, i16 %i.0 ; <i32*> [#uses=1] |
| 34 | store i32 123, i32* %tmp |
| 35 | %tmp2 = add i16 %i.0, 1 ; <i32> [#uses=1] |
| 36 | br label %bb3 |
| 37 | |
| 38 | bb3: ; preds = %bb, %entry |
| 39 | %i.0 = phi i16 [ 0, %entry ], [ %tmp2, %bb ] ; <i32> [#uses=3] |
| 40 | %i.0.ext = sext i16 %i.0 to i32 |
| 41 | %tmp3 = icmp sle i32 %i.0.ext, %M ; <i1> [#uses=1] |
| 42 | br i1 %tmp3, label %bb, label %bb5 |
| 43 | |
| 44 | bb5: ; preds = %bb3 |
| 45 | br label %return |
| 46 | |
| 47 | return: ; preds = %bb5 |
| 48 | ret void |
| 49 | } |
| 50 | |
| 51 | ; The predicated backedge taken count is: |
| 52 | ; (2 + (zext i16 %Start to i32) + ((-2 + (-1 * (sext i16 %Start to i32))) |
| 53 | ; smax (-1 + (-1 * %M))) |
| 54 | ; ) |
| 55 | |
| 56 | ; -1 + (-1 * %M) <= (-2 + (-1 * (sext i16 %Start to i32)) |
| 57 | ; The predicated backedge taken count is 0. |
| 58 | ; From the IR, this is correct since we will bail out at the |
| 59 | ; first iteration. |
| 60 | |
| 61 | |
| 62 | ; * -1 + (-1 * %M) > (-2 + (-1 * (sext i16 %Start to i32)) |
| 63 | ; or: %M < 1 + (sext i16 %Start to i32) |
| 64 | ; |
| 65 | ; The predicated backedge taken count is 1 + (zext i16 %Start to i32) - %M |
| 66 | ; |
| 67 | ; If %M >= MIN_INT + 1, this predicated backedge taken count would be correct (even |
| 68 | ; without predicates). However, for %M < MIN_INT this would be an infinite loop. |
| 69 | ; In these cases, the {%Start,+,-1} <nusw> predicate would be false, as the |
| 70 | ; final value of the expression {%Start,+,-1} expression (%M - 1) would not be |
| 71 | ; representable as an i16. |
| 72 | |
| 73 | ; There is also a limit case here where the value of %M is MIN_INT. In this case |
| 74 | ; we still have an infinite loop, since icmp sge %x, MIN_INT will always return |
| 75 | ; true. |
| 76 | |
| 77 | ; CHECK: Classifying expressions for: @test2 |
| 78 | |
| 79 | ; CHECK: %i.0.ext = sext i16 %i.0 to i32 |
| 80 | ; CHECK-NEXT: --> (sext i16 {%Start,+,-1}<%bb3> to i32) |
| 81 | ; CHECK: Loop %bb3: Unpredictable backedge-taken count. |
| 82 | ; CHECK-NEXT: Loop %bb3: Unpredictable max backedge-taken count. |
Tim Shen | a064622 | 2018-07-13 23:58:46 +0000 | [diff] [blame] | 83 | ; CHECK-NEXT: Loop %bb3: Predicated backedge-taken count is (2 + (sext i16 %Start to i32) + ((-2 + (-1 * (sext i16 %Start to i32))<nsw>) smax (-1 + (-1 * %M)))) |
Silviu Baranga | 6f444df | 2016-04-08 14:29:09 +0000 | [diff] [blame] | 84 | ; CHECK-NEXT: Predicates: |
| 85 | ; CHECK-NEXT: {%Start,+,-1}<%bb3> Added Flags: <nssw> |
| 86 | |
| 87 | define void @test2(i32 %N, i32 %M, i16 %Start) { |
| 88 | entry: |
| 89 | br label %bb3 |
| 90 | |
| 91 | bb: ; preds = %bb3 |
| 92 | %tmp = getelementptr [1000 x i32], [1000 x i32]* @A, i32 0, i16 %i.0 ; <i32*> [#uses=1] |
| 93 | store i32 123, i32* %tmp |
| 94 | %tmp2 = sub i16 %i.0, 1 ; <i32> [#uses=1] |
| 95 | br label %bb3 |
| 96 | |
| 97 | bb3: ; preds = %bb, %entry |
| 98 | %i.0 = phi i16 [ %Start, %entry ], [ %tmp2, %bb ] ; <i32> [#uses=3] |
| 99 | %i.0.ext = sext i16 %i.0 to i32 |
| 100 | %tmp3 = icmp sge i32 %i.0.ext, %M ; <i1> [#uses=1] |
| 101 | br i1 %tmp3, label %bb, label %bb5 |
| 102 | |
| 103 | bb5: ; preds = %bb3 |
| 104 | br label %return |
| 105 | |
| 106 | return: ; preds = %bb5 |
| 107 | ret void |
| 108 | } |
| 109 | |