[PM] Introduce a devirtualization iteration layer for the new PM.

This is an orthogonal and separated layer instead of being embedded
inside the pass manager. While it adds a small amount of complexity, it
is fairly minimal and the composability and control seems worth the
cost.

The logic for this ends up being nicely isolated and targeted. It should
be easy to experiment with different iteration strategies wrapped around
the CGSCC bottom-up walk using this kind of facility.

The mechanism used to track devirtualization is the simplest one I came
up with. I think it handles most of the cases the existing iteration
machinery handles, but I haven't done a *very* in depth analysis. It
does however match the basic intended semantics, and we can tweak or
tune its exact behavior incrementally as necessary. One thing that we
may want to revisit is freshly building the value handle set on each
iteration. While I don't think this will be a significant cost (it is
strictly fewer value handles but more churn of value handes than the old
call graph), it is conceivable that we'll want a somewhat more clever
tracking mechanism. My hope is to layer that on as a follow up patch
with data supporting any implementation complexity it adds.

This code also provides for a basic count heuristic: if the number of
indirect calls decreases and the number of direct calls increases for
a given function in the SCC, we assume devirtualization is responsible.
This matches the heuristics currently used in the legacy pass manager.

Differential Revision: https://reviews.llvm.org/D23114

llvm-svn: 290665
diff --git a/llvm/test/Other/cgscc-devirt-iteration.ll b/llvm/test/Other/cgscc-devirt-iteration.ll
new file mode 100644
index 0000000..df5ea29
--- /dev/null
+++ b/llvm/test/Other/cgscc-devirt-iteration.ll
@@ -0,0 +1,114 @@
+; The CGSCC pass manager includes an SCC iteration utility that tracks indirect
+; calls that are turned into direct calls (devirtualization) and re-visits the
+; SCC to expose those calls to the SCC-based IPO passes. We trigger
+; devirtualization here with GVN which forwards a store through a load and to
+; an indirect call.
+;
+; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(function-attrs,function(gvn,instcombine))' -S < %s | FileCheck %s --check-prefix=CHECK --check-prefix=BEFORE
+; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(devirt<1>(function-attrs,function(gvn,instcombine)))' -S < %s | FileCheck %s --check-prefix=CHECK --check-prefix=AFTER --check-prefix=AFTER1
+; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(devirt<2>(function-attrs,function(gvn,instcombine)))' -S < %s | FileCheck %s --check-prefix=CHECK --check-prefix=AFTER --check-prefix=AFTER2
+
+declare void @readnone() readnone
+; CHECK: Function Attrs: readnone
+; CHECK: declare void @readnone()
+
+declare void @unknown()
+; CHECK-NOT: Function Attrs
+; CHECK: declare void @unknown()
+
+; The @test1 function checks that when we refine an indirect call to a direct
+; call we revisit the SCC passes to reflect the more precise information. This
+; is the basic functionality.
+
+define void @test1() {
+; BEFORE-NOT: Function Attrs
+; AFTER: Function Attrs: readnone
+; CHECK: define void @test1()
+entry:
+  %fptr = alloca void ()*
+  store void ()* @readnone, void ()** %fptr
+  %f = load void ()*, void ()** %fptr
+  call void %f()
+  ret void
+}
+
+; The @test2_* functions check that when we need multiple (in this case 2)
+; repetitions to compute some state that is incrementally exposed with each
+; one, the limit on repetitions is enforced. So we make progress with
+; one repetition but not as much as with three.
+;
+; This is somewhat awkward to test because we have to contrive to have a state
+; repetition triggered and observed with very few passes. The technique here
+; is to have one indirect call that can only be resolved when the entire SCC is
+; deduced as readonly, and mark that indirect call at the call site as readonly
+; to make that possible. This forces us to first deduce readonly, then
+; devirtualize again, and then deduce readnone.
+
+declare void @readnone_with_arg(void ()**) readnone
+; CHECK: Function Attrs: readnone
+; CHECK: declare void @readnone_with_arg(void ()**)
+
+define void @test2_a(void ()** %ignore) {
+; BEFORE-NOT: Function Attrs
+; AFTER1: Function Attrs: readonly
+; AFTER2: Function Attrs: readnone
+; BEFORE: define void @test2_a(void ()** %ignore)
+; AFTER: define void @test2_a(void ()** readnone %ignore)
+entry:
+  %f1ptr = alloca void (void ()**)*
+  store void (void ()**)* @readnone_with_arg, void (void ()**)** %f1ptr
+  %f1 = load void (void ()**)*, void (void ()**)** %f1ptr
+  ; This indirect call is the first to be resolved, allowing us to deduce
+  ; readonly but not (yet) readnone.
+  call void %f1(void ()** %ignore)
+; CHECK: call void @readnone_with_arg(void ()** %ignore)
+
+  ; Bogus call to test2_b to make this a cycle.
+  call void @test2_b()
+
+  ret void
+}
+
+define void @test2_b() {
+; BEFORE-NOT: Function Attrs
+; AFTER1: Function Attrs: readonly
+; AFTER2: Function Attrs: readnone
+; CHECK: define void @test2_b()
+entry:
+  %f2ptr = alloca void ()*
+  store void ()* @readnone, void ()** %f2ptr
+  ; Call the other function here to prevent forwarding until the SCC has had
+  ; function attrs deduced.
+  call void @test2_a(void ()** %f2ptr)
+
+  %f2 = load void ()*, void ()** %f2ptr
+  ; This is the second indirect call to be resolved, and can only be resolved
+  ; after we deduce 'readonly' for the rest of the SCC. Once it is
+  ; devirtualized, we can deduce readnone for the SCC.
+  call void %f2() readonly
+; BEFORE: call void %f2()
+; AFTER: call void @readnone()
+
+  ret void
+}
+
+declare i8* @memcpy(i8*, i8*, i64)
+; CHECK-NOT: Function Attrs
+; CHECK: declare i8* @memcpy(i8*, i8*, i64)
+
+; The @test3 function checks that when we refine an indirect call to an
+; intrinsic we still revisit the SCC pass. This also covers cases where the
+; value handle itself doesn't persist due to the nature of how instcombine
+; creates the memcpy intrinsic call, and we rely on the count of indirect calls
+; decreasing and the count of direct calls increasing.
+define void @test3(i8* %src, i8* %dest, i64 %size) {
+; CHECK-NOT: Function Attrs
+; BEFORE: define void @test3(i8* %src, i8* %dest, i64 %size)
+; AFTER: define void @test3(i8* nocapture readonly %src, i8* nocapture %dest, i64 %size)
+  %fptr = alloca i8* (i8*, i8*, i64)*
+  store i8* (i8*, i8*, i64)* @memcpy, i8* (i8*, i8*, i64)** %fptr
+  %f = load i8* (i8*, i8*, i64)*, i8* (i8*, i8*, i64)** %fptr
+  call i8* %f(i8* %dest, i8* %src, i64 %size)
+; CHECK: call void @llvm.memcpy
+  ret void
+}
diff --git a/llvm/test/Other/cgscc-observe-devirt.ll b/llvm/test/Other/cgscc-observe-devirt.ll
index 2b1e7d8..3b35f0e 100644
--- a/llvm/test/Other/cgscc-observe-devirt.ll
+++ b/llvm/test/Other/cgscc-observe-devirt.ll
@@ -1,130 +1,103 @@
+; Make sure that even without some external devirtualization iteration tool,
+; the CGSCC pass manager correctly observes and re-visits SCCs that change
+; structure due to devirtualization. We trigger devirtualization here with GVN
+; which forwards a store through a load and to an indirect call.
+;
 ; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(function-attrs)' -S < %s | FileCheck %s --check-prefix=BEFORE
 ; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(function-attrs,function(gvn))' -S < %s | FileCheck %s --check-prefix=AFTER
 ;
 ; Also check that adding an extra CGSCC pass after the function update but
 ; without requiring the outer manager to iterate doesn't break any invariant.
-; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(function-attrs,function(gvn),function-attrs)' -S < %s | FileCheck %s --check-prefix=AFTER2
+; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(function-attrs,function(gvn),function-attrs)' -S < %s | FileCheck %s --check-prefix=AFTER
 
 declare void @readnone() readnone
 declare void @unknown()
 
-; The @test1_* functions check that when we refine an indirect call to a direct
-; call, even if it doesn't change the call graph structure, we revisit the SCC
-; passes to reflect the more precise information.
-; FIXME: Currently, this isn't implemented in the new pass manager and so we
-; only get this with AFTER2, not with AFTER.
-
-; BEFORE: define void @test1_a() {
-; AFTER: define void @test1_a() {
-; AFTER2: define void @test1_a() {
-define void @test1_a() {
-  %fptr = alloca void()*
-  store void()* @unknown, void()** %fptr
-  %f = load void()*, void()** %fptr
-  call void %f()
-  ret void
-}
-
-; BEFORE: define void @test1_b() {
-; AFTER: define void @test1_b() {
-; AFTER2: define void @test1_b() #0 {
-define void @test1_b() {
-  %fptr = alloca void()*
-  store void()* @readnone, void()** %fptr
-  %f = load void()*, void()** %fptr
-  call void %f()
-  ret void
-}
-
-; The @test2_* checks that if we refine an indirect call to a direct call and
+; The @test1_* checks that if we refine an indirect call to a direct call and
 ; in the process change the very structure of the call graph we also revisit
 ; that component of the graph and do so in an up-to-date fashion.
 
-; BEFORE: define void @test2_a1() {
-; AFTER: define void @test2_a1() {
-; AFTER2: define void @test2_a1() {
-define void @test2_a1() {
+; BEFORE: define void @test1_a1() {
+; AFTER: define void @test1_a1() {
+define void @test1_a1() {
   %fptr = alloca void()*
-  store void()* @test2_b2, void()** %fptr
-  store void()* @test2_b1, void()** %fptr
+  store void()* @test1_b2, void()** %fptr
+  store void()* @test1_b1, void()** %fptr
   %f = load void()*, void()** %fptr
   call void %f()
   ret void
 }
 
-; BEFORE: define void @test2_b1() {
-; AFTER: define void @test2_b1() {
-; AFTER2: define void @test2_b1() {
-define void @test2_b1() {
+; BEFORE: define void @test1_b1() {
+; AFTER: define void @test1_b1() {
+define void @test1_b1() {
   call void @unknown()
-  call void @test2_a1()
+  call void @test1_a1()
   ret void
 }
 
-; BEFORE: define void @test2_a2() {
-; AFTER: define void @test2_a2() #0 {
-; AFTER2: define void @test2_a2() #0 {
-define void @test2_a2() {
+; BEFORE: define void @test1_a2() {
+; AFTER: define void @test1_a2() #0 {
+define void @test1_a2() {
   %fptr = alloca void()*
-  store void()* @test2_b1, void()** %fptr
-  store void()* @test2_b2, void()** %fptr
+  store void()* @test1_b1, void()** %fptr
+  store void()* @test1_b2, void()** %fptr
   %f = load void()*, void()** %fptr
   call void %f()
   ret void
 }
 
-; BEFORE: define void @test2_b2() {
-; AFTER: define void @test2_b2() #0 {
-; AFTER2: define void @test2_b2() #0 {
-define void @test2_b2() {
+; BEFORE: define void @test1_b2() {
+; AFTER: define void @test1_b2() #0 {
+define void @test1_b2() {
   call void @readnone()
-  call void @test2_a2()
+  call void @test1_a2()
   ret void
 }
 
 
-; The @test3_* set of functions exercise a case where running function passes
+; The @test2_* set of functions exercise a case where running function passes
 ; introduces a new post-order relationship that was not present originally and
 ; makes sure we walk across the SCCs in that order.
 
-; CHECK: define void @test3_a() {
-define void @test3_a() {
-  call void @test3_b1()
-  call void @test3_b2()
-  call void @test3_b3()
+; CHECK: define void @test2_a() {
+define void @test2_a() {
+  call void @test2_b1()
+  call void @test2_b2()
+  call void @test2_b3()
   call void @unknown()
   ret void
 }
 
-; CHECK: define void @test3_b1() #0 {
-define void @test3_b1() {
+; CHECK: define void @test2_b1() #0 {
+define void @test2_b1() {
   %fptr = alloca void()*
-  store void()* @test3_a, void()** %fptr
+  store void()* @test2_a, void()** %fptr
   store void()* @readnone, void()** %fptr
   %f = load void()*, void()** %fptr
   call void %f()
   ret void
 }
 
-; CHECK: define void @test3_b2() #0 {
-define void @test3_b2() {
+; CHECK: define void @test2_b2() #0 {
+define void @test2_b2() {
   %fptr = alloca void()*
-  store void()* @test3_a, void()** %fptr
-  store void()* @test3_b2, void()** %fptr
-  store void()* @test3_b3, void()** %fptr
-  store void()* @test3_b1, void()** %fptr
+  store void()* @test2_a, void()** %fptr
+  store void()* @test2_b2, void()** %fptr
+  store void()* @test2_b3, void()** %fptr
+  store void()* @test2_b1, void()** %fptr
   %f = load void()*, void()** %fptr
   call void %f()
   ret void
 }
 
-; CHECK: define void @test3_b3() #0 {
-define void @test3_b3() {
+; CHECK: define void @test2_b3() #0 {
+define void @test2_b3() {
   %fptr = alloca void()*
-  store void()* @test3_a, void()** %fptr
-  store void()* @test3_b2, void()** %fptr
-  store void()* @test3_b3, void()** %fptr
-  store void()* @test3_b1, void()** %fptr
+  store void()* @test2_a, void()** %fptr
+  store void()* @test2_b2, void()** %fptr
+  store void()* @test2_b3, void()** %fptr
+  store void()* @test2_b1, void()** %fptr
   %f = load void()*, void()** %fptr
   call void %f()
   ret void
diff --git a/llvm/test/Transforms/Inline/devirtualize-2.ll b/llvm/test/Transforms/Inline/devirtualize-2.ll
index cca4e75..e2c1e7c 100644
--- a/llvm/test/Transforms/Inline/devirtualize-2.ll
+++ b/llvm/test/Transforms/Inline/devirtualize-2.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -inline -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(devirt<4>(inline))' -S | FileCheck %s
 ; PR4834
 
 define i32 @test1() {