[HotColdSplitting] Outline more than once per function

Algorithm: Identify maximal cold regions and put them in a worklist. If
a candidate region overlaps with another, discard it. While the worklist
is full, remove a single-entry sub-region from the worklist and attempt
to outline it. By the non-overlap property, this should not invalidate
parts of the domtree pertaining to other outlining regions.

Testing: LNT results on X86 are clean. With test-suite + externals, llvm
outlines 134KB pre-patch, and 352KB post-patch (+ ~2.6x). The file
483.xalancbmk/src/Constants.cpp stands out as an extreme case where llvm
outlines over 100 times in some functions (mostly EH paths). There was
not a significant performance impact pre vs. post-patch.

Differential Revision: https://reviews.llvm.org/D53887

llvm-svn: 348639
diff --git a/llvm/test/Transforms/HotColdSplit/succ-block-with-self-edge.ll b/llvm/test/Transforms/HotColdSplit/succ-block-with-self-edge.ll
new file mode 100644
index 0000000..f8cff9e
--- /dev/null
+++ b/llvm/test/Transforms/HotColdSplit/succ-block-with-self-edge.ll
@@ -0,0 +1,56 @@
+; RUN: opt -S -hotcoldsplit < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.14.0"
+
+; CHECK-LABEL: define {{.*}}@exit_block_with_same_incoming_vals
+; CHECK: call {{.*}}@exit_block_with_same_incoming_vals.cold.1(
+; CHECK-NOT: br i1 undef
+; CHECK: phi i32 [ 0, %entry ], [ %p.ce.reload, %codeRepl ]
+define void @exit_block_with_same_incoming_vals(i32 %cond) {
+entry:
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %if.end, label %coldbb
+
+coldbb:
+  call void @sink()
+  call void @sideeffect()
+  call void @sideeffect()
+  br i1 undef, label %if.end, label %coldbb2
+
+coldbb2:
+  %p2 = phi i32 [0, %coldbb], [1, %coldbb2]
+  br i1 undef, label %if.end, label %coldbb2
+
+if.end:
+  %p = phi i32 [0, %entry], [1, %coldbb], [1, %coldbb2]
+  ret void
+}
+
+; CHECK-LABEL: define {{.*}}@exit_block_with_distinct_incoming_vals
+; CHECK: call {{.*}}@exit_block_with_distinct_incoming_vals.cold.1(
+; CHECK-NOT: br i1 undef
+; CHECK: phi i32 [ 0, %entry ], [ %p.ce.reload, %codeRepl ]
+define void @exit_block_with_distinct_incoming_vals(i32 %cond) {
+entry:
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %if.end, label %coldbb
+
+coldbb:
+  call void @sink()
+  call void @sideeffect()
+  call void @sideeffect()
+  br i1 undef, label %if.end, label %coldbb2
+
+coldbb2:
+  %p2 = phi i32 [0, %coldbb], [1, %coldbb2]
+  br i1 undef, label %if.end, label %coldbb2
+
+if.end:
+  %p = phi i32 [0, %entry], [1, %coldbb], [2, %coldbb2]
+  ret void
+}
+
+declare void @sink() cold
+
+declare void @sideeffect()