[PM] Teach LoopUnroll to update the LPM infrastructure as it unrolls
loops.
We do this by reconstructing the newly added loops after the unroll
completes to avoid threading pass manager details through all the mess
of the unrolling infrastructure.
I've enabled some extra assertions in the LPM to try and catch issues
here and enabled a bunch of unroller tests to try and make sure this is
sane.
Currently, I'm manually running loop-simplify when needed. That should
go away once it is folded into the LPM infrastructure.
Differential Revision: https://reviews.llvm.org/D28848
llvm-svn: 293011
diff --git a/llvm/test/Transforms/LoopUnroll/basic.ll b/llvm/test/Transforms/LoopUnroll/basic.ll
index 2bfd3e6..e965f2a 100644
--- a/llvm/test/Transforms/LoopUnroll/basic.ll
+++ b/llvm/test/Transforms/LoopUnroll/basic.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -loop-unroll -S | FileCheck %s
+; RUN: opt < %s -passes='require<opt-remark-emit>,loop(unroll)' -S | FileCheck %s
; This should not unroll since the address of the loop header is taken.
diff --git a/llvm/test/Transforms/LoopUnroll/full-unroll-bad-cost.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-bad-cost.ll
index e5694fb..9b6da8b 100644
--- a/llvm/test/Transforms/LoopUnroll/full-unroll-bad-cost.ll
+++ b/llvm/test/Transforms/LoopUnroll/full-unroll-bad-cost.ll
@@ -1,4 +1,5 @@
; RUN: opt -S -loop-unroll < %s | FileCheck %s
+; RUN: opt < %s -passes='require<opt-remark-emit>,loop(unroll)' -S | FileCheck %s
; LLVM should not try to fully unroll this loop.
diff --git a/llvm/test/Transforms/LoopUnroll/full-unroll-crashers.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-crashers.ll
index 9f15291..64a01a7 100644
--- a/llvm/test/Transforms/LoopUnroll/full-unroll-crashers.ll
+++ b/llvm/test/Transforms/LoopUnroll/full-unroll-crashers.ll
@@ -1,5 +1,6 @@
; Check that we don't crash on corner cases.
; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=1 -unroll-max-percent-threshold-boost=200 -o /dev/null
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=1 -unroll-max-percent-threshold-boost=200 -o /dev/null
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
@known_constant = internal unnamed_addr constant [10 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 16
diff --git a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll
index 26124fb..b83913b 100644
--- a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll
+++ b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
@unknown_global = internal unnamed_addr global [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16
diff --git a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll
index 8bddb1b..31b9197 100644
--- a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll
+++ b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
@known_constant = internal unnamed_addr constant [10 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 16
diff --git a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-dce.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-dce.ll
index 83c105c..5a3123d 100644
--- a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-dce.ll
+++ b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-dce.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=12 -unroll-max-percent-threshold-boost=400 | FileCheck %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=12 -unroll-max-percent-threshold-boost=400 | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
@known_constant = internal unnamed_addr constant [10 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0], align 16
diff --git a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-geps.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-geps.ll
index 2309125..3e5fe1d 100644
--- a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-geps.ll
+++ b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-geps.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; When examining gep-instructions we shouldn't consider them simplified if the
diff --git a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll
index a1fab3c..bfcd920 100644
--- a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll
+++ b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
define i64 @propagate_loop_phis() {
diff --git a/llvm/test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll
index e70ff41..27cef36 100644
--- a/llvm/test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll
+++ b/llvm/test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll
@@ -1,4 +1,5 @@
; RUN: opt -S -loop-unroll < %s | FileCheck %s
+; RUN: opt -S -passes='require<opt-remark-emit>,loop(unroll)' < %s | FileCheck %s
; Unroll twice, with first loop exit kept
; CHECK-LABEL: @s32_max1
diff --git a/llvm/test/Transforms/LoopUnroll/revisit.ll b/llvm/test/Transforms/LoopUnroll/revisit.ll
new file mode 100644
index 0000000..18ae3658
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/revisit.ll
@@ -0,0 +1,150 @@
+; This test checks that nested loops are revisited in various scenarios when
+; unrolling. Note that if we ever start doing outer loop peeling a test case
+; for that should be added here that will look essentially like a hybrid of the
+; current two cases.
+;
+; RUN: opt < %s -disable-output -debug-pass-manager 2>&1 \
+; RUN: -passes='require<opt-remark-emit>,loop(unroll)' \
+; RUN: | FileCheck %s
+;
+; Also run in a special mode that visits children.
+; RUN: opt < %s -disable-output -debug-pass-manager -unroll-revisit-child-loops 2>&1 \
+; RUN: -passes='require<opt-remark-emit>,loop(unroll)' \
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-CHILDREN
+
+; Basic test is fully unrolled and we revisit the post-unroll new sibling
+; loops, including the ones that used to be child loops.
+define void @full_unroll(i1* %ptr) {
+; CHECK-LABEL: FunctionToLoopPassAdaptor{{.*}} on full_unroll
+; CHECK-NOT: LoopUnrollPass
+
+entry:
+ br label %l0
+
+l0:
+ %cond.0 = load volatile i1, i1* %ptr
+ br i1 %cond.0, label %l0.0.ph, label %exit
+
+l0.0.ph:
+ br label %l0.0
+
+l0.0:
+ %iv = phi i32 [ %iv.next, %l0.0.latch ], [ 0, %l0.0.ph ]
+ %iv.next = add i32 %iv, 1
+ br label %l0.0.0.ph
+
+l0.0.0.ph:
+ br label %l0.0.0
+
+l0.0.0:
+ %cond.0.0.0 = load volatile i1, i1* %ptr
+ br i1 %cond.0.0.0, label %l0.0.0, label %l0.0.1.ph
+; CHECK: LoopUnrollPass on Loop at depth 3 containing: %l0.0.0<header>
+; CHECK-NOT: LoopUnrollPass
+
+l0.0.1.ph:
+ br label %l0.0.1
+
+l0.0.1:
+ %cond.0.0.1 = load volatile i1, i1* %ptr
+ br i1 %cond.0.0.1, label %l0.0.1, label %l0.0.latch
+; CHECK: LoopUnrollPass on Loop at depth 3 containing: %l0.0.1<header>
+; CHECK-NOT: LoopUnrollPass
+
+l0.0.latch:
+ %cmp = icmp slt i32 %iv.next, 2
+ br i1 %cmp, label %l0.0, label %l0.latch
+; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0
+; CHECK-NOT: LoopUnrollPass
+;
+; Unrolling occurs, so we visit what were the inner loops twice over. First we
+; visit their clones, and then we visit the original loops re-parented.
+; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.1.1<header>
+; CHECK-NOT: LoopUnrollPass
+; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.0.1<header>
+; CHECK-NOT: LoopUnrollPass
+; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.1<header>
+; CHECK-NOT: LoopUnrollPass
+; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.0<header>
+; CHECK-NOT: LoopUnrollPass
+
+l0.latch:
+ br label %l0
+; CHECK: LoopUnrollPass on Loop at depth 1 containing: %l0<header>
+; CHECK-NOT: LoopUnrollPass
+
+exit:
+ ret void
+}
+
+; Now we test forced runtime partial unrolling with metadata. Here we end up
+; duplicating child loops without changing their structure and so they aren't by
+; default visited, but will be visited with a special parameter.
+define void @partial_unroll(i32 %count, i1* %ptr) {
+; CHECK-LABEL: FunctionToLoopPassAdaptor{{.*}} on partial_unroll
+; CHECK-NOT: LoopUnrollPass
+
+entry:
+ br label %l0
+
+l0:
+ %cond.0 = load volatile i1, i1* %ptr
+ br i1 %cond.0, label %l0.0.ph, label %exit
+
+l0.0.ph:
+ br label %l0.0
+
+l0.0:
+ %iv = phi i32 [ %iv.next, %l0.0.latch ], [ 0, %l0.0.ph ]
+ %iv.next = add i32 %iv, 1
+ br label %l0.0.0.ph
+
+l0.0.0.ph:
+ br label %l0.0.0
+
+l0.0.0:
+ %cond.0.0.0 = load volatile i1, i1* %ptr
+ br i1 %cond.0.0.0, label %l0.0.0, label %l0.0.1.ph
+; CHECK: LoopUnrollPass on Loop at depth 3 containing: %l0.0.0<header>
+; CHECK-NOT: LoopUnrollPass
+
+l0.0.1.ph:
+ br label %l0.0.1
+
+l0.0.1:
+ %cond.0.0.1 = load volatile i1, i1* %ptr
+ br i1 %cond.0.0.1, label %l0.0.1, label %l0.0.latch
+; CHECK: LoopUnrollPass on Loop at depth 3 containing: %l0.0.1<header>
+; CHECK-NOT: LoopUnrollPass
+
+l0.0.latch:
+ %cmp = icmp slt i32 %iv.next, %count
+ br i1 %cmp, label %l0.0, label %l0.latch, !llvm.loop !1
+; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0
+; CHECK-NOT: LoopUnrollPass
+;
+; Partial unrolling occurs which introduces new child loops but not new sibling
+; loops. We only visit the child loops in a special mode, not by default.
+; CHECK-CHILDREN: LoopUnrollPass on Loop at depth 3 containing: %l0.0.0<header>
+; CHECK-CHILDREN-NOT: LoopUnrollPass
+; CHECK-CHILDREN: LoopUnrollPass on Loop at depth 3 containing: %l0.0.1<header>
+; CHECK-CHILDREN-NOT: LoopUnrollPass
+; CHECK-CHILDREN: LoopUnrollPass on Loop at depth 3 containing: %l0.0.0.1<header>
+; CHECK-CHILDREN-NOT: LoopUnrollPass
+; CHECK-CHILDREN: LoopUnrollPass on Loop at depth 3 containing: %l0.0.1.1<header>
+; CHECK-CHILDREN-NOT: LoopUnrollPass
+;
+; When we revisit children, we also revisit the current loop.
+; CHECK-CHILDREN: LoopUnrollPass on Loop at depth 2 containing: %l0.0<header>
+; CHECK-CHILDREN-NOT: LoopUnrollPass
+
+l0.latch:
+ br label %l0
+; CHECK: LoopUnrollPass on Loop at depth 1 containing: %l0<header>
+; CHECK-NOT: LoopUnrollPass
+
+exit:
+ ret void
+}
+!1 = !{!1, !2}
+!2 = !{!"llvm.loop.unroll.count", i32 2}
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll
index b5299bb..0466131 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll
@@ -1,6 +1,9 @@
; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG
; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
; Tests for unrolling loops with run-time trip counts
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop1.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop1.ll
index 5d7c648..d32c835 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop1.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop1.ll
@@ -1,6 +1,9 @@
; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG
; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
+
; This tests that setting the unroll count works
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop2.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop2.ll
index 704ecca..7e7fb97 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop2.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop2.ll
@@ -1,6 +1,9 @@
; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=true -unroll-count=8 | FileCheck %s -check-prefix=EPILOG
; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=true -unroll-count=8 | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
+
; Choose a smaller, power-of-two, unroll count if the loop is too large.
; This test makes sure we're not unrolling 'odd' counts
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop3.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop3.ll
index fd13ebfa..ef39a29 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop3.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop3.ll
@@ -1,5 +1,6 @@
; REQUIRES: asserts
; RUN: opt < %s -disable-output -stats -loop-unroll -unroll-runtime -unroll-threshold=400 -info-output-file - | FileCheck %s --check-prefix=STATS
+; RUN: opt < %s -disable-output -stats -passes='require<opt-remark-emit>,loop(unroll)' -unroll-runtime -unroll-threshold=400 -info-output-file - | FileCheck %s --check-prefix=STATS
; Test that nested loops can be unrolled. We need to increase threshold to do it
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
index e8d5177..6340058 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
@@ -1,6 +1,9 @@
; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-count=16 | FileCheck --check-prefix=UNROLL-16 %s
; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-count=4 | FileCheck --check-prefix=UNROLL-4 %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-runtime=true -unroll-count=16 | FileCheck --check-prefix=UNROLL-16 %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-runtime=true -unroll-count=4 | FileCheck --check-prefix=UNROLL-4 %s
+
; Given that the trip-count of this loop is a 3-bit value, we cannot
; safely unroll it with a count of anything more than 8.
diff --git a/llvm/test/Transforms/LoopUnroll/unloop.ll b/llvm/test/Transforms/LoopUnroll/unloop.ll
index db7bad5..6af13a5 100644
--- a/llvm/test/Transforms/LoopUnroll/unloop.ll
+++ b/llvm/test/Transforms/LoopUnroll/unloop.ll
@@ -1,5 +1,5 @@
; RUN: opt < %s -S -loop-unroll -verify-loop-info | FileCheck %s
-; RUN: opt < %s -S -passes='function(require<scalar-evolution>,require<targetir>,require<opt-remark-emit>,loop(unroll),verify<loops>)' | FileCheck %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll),verify<loops>' | FileCheck %s
;
; Unit tests for LoopInfo::markAsRemoved.
diff --git a/llvm/test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll b/llvm/test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll
index adbf47d..6748ebe 100644
--- a/llvm/test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll
+++ b/llvm/test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll
@@ -1,4 +1,5 @@
; RUN: opt -S < %s -loop-unroll -block-freq | FileCheck %s
+; RUN: opt -S < %s -passes='require<opt-remark-emit>,loop(unroll),require<block-freq>' | FileCheck %s
; Crasher from PR20987.
; CHECK: define void @update_loop_info_in_subloops