WholeProgramDevirt: introduce.

This pass implements whole program optimization of virtual calls in cases
where we know (via bitset information) that the list of callees is fixed. This
includes the following:

- Single implementation devirtualization: if a virtual call has a single
  possible callee, replace all calls with a direct call to that callee.

- Virtual constant propagation: if the virtual function's return type is an
  integer <=64 bits and all possible callees are readnone, for each class and
  each list of constant arguments: evaluate the function, store the return
  value alongside the virtual table, and rewrite each virtual call as a load
  from the virtual table.

- Uniform return value optimization: if the conditions for virtual constant
  propagation hold and each function returns the same constant value, replace
  each virtual call with that constant.

- Unique return value optimization for i1 return values: if the conditions
  for virtual constant propagation hold and a single vtable's function
  returns 0, or a single vtable's function returns 1, replace each virtual
  call with a comparison of the vptr against that vtable's address.

Differential Revision: http://reviews.llvm.org/D16795

llvm-svn: 260312
diff --git a/llvm/test/Transforms/WholeProgramDevirt/bad-read-from-vtable.ll b/llvm/test/Transforms/WholeProgramDevirt/bad-read-from-vtable.ll
new file mode 100644
index 0000000..9402076
--- /dev/null
+++ b/llvm/test/Transforms/WholeProgramDevirt/bad-read-from-vtable.ll
@@ -0,0 +1,64 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@vt = global [2 x i8*] [i8* zeroinitializer, i8* bitcast (void (i8*)* @vf to i8*)]
+
+define void @vf(i8* %this) {
+  ret void
+}
+
+; CHECK: define void @unaligned
+define void @unaligned(i8* %obj) {
+  %vtableptr = bitcast i8* %obj to [1 x i8*]**
+  %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+  %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+  %p = call i1 @llvm.bitset.test(i8* %vtablei8, metadata !"bitset")
+  call void @llvm.assume(i1 %p)
+  %fptrptr = getelementptr i8, i8* %vtablei8, i32 1
+  %fptrptr_casted = bitcast i8* %fptrptr to i8**
+  %fptr = load i8*, i8** %fptrptr_casted
+  %fptr_casted = bitcast i8* %fptr to void (i8*)*
+  ; CHECK: call void %
+  call void %fptr_casted(i8* %obj)
+  ret void
+}
+
+; CHECK: define void @outofbounds
+define void @outofbounds(i8* %obj) {
+  %vtableptr = bitcast i8* %obj to [1 x i8*]**
+  %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+  %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+  %p = call i1 @llvm.bitset.test(i8* %vtablei8, metadata !"bitset")
+  call void @llvm.assume(i1 %p)
+  %fptrptr = getelementptr i8, i8* %vtablei8, i32 16
+  %fptrptr_casted = bitcast i8* %fptrptr to i8**
+  %fptr = load i8*, i8** %fptrptr_casted
+  %fptr_casted = bitcast i8* %fptr to void (i8*)*
+  ; CHECK: call void %
+  call void %fptr_casted(i8* %obj)
+  ret void
+}
+
+; CHECK: define void @nonfunction
+define void @nonfunction(i8* %obj) {
+  %vtableptr = bitcast i8* %obj to [1 x i8*]**
+  %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+  %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+  %p = call i1 @llvm.bitset.test(i8* %vtablei8, metadata !"bitset")
+  call void @llvm.assume(i1 %p)
+  %fptrptr = getelementptr i8, i8* %vtablei8, i32 0
+  %fptrptr_casted = bitcast i8* %fptrptr to i8**
+  %fptr = load i8*, i8** %fptrptr_casted
+  %fptr_casted = bitcast i8* %fptr to void (i8*)*
+  ; CHECK: call void %
+  call void %fptr_casted(i8* %obj)
+  ret void
+}
+
+declare i1 @llvm.bitset.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{!"bitset", [2 x i8*]* @vt, i32 0}
+!llvm.bitsets = !{!0}
diff --git a/llvm/test/Transforms/WholeProgramDevirt/constant-arg.ll b/llvm/test/Transforms/WholeProgramDevirt/constant-arg.ll
new file mode 100644
index 0000000..ea72d16
--- /dev/null
+++ b/llvm/test/Transforms/WholeProgramDevirt/constant-arg.ll
@@ -0,0 +1,79 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: private constant { [8 x i8], [1 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\00\00\00\00\01", [1 x i8*] [i8* bitcast (i1 (i8*, i32)* @vf1 to i8*)], [0 x i8] zeroinitializer }
+; CHECK: private constant { [8 x i8], [1 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\00\00\00\00\02", [1 x i8*] [i8* bitcast (i1 (i8*, i32)* @vf2 to i8*)], [0 x i8] zeroinitializer }
+; CHECK: private constant { [8 x i8], [1 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\00\00\00\00\01", [1 x i8*] [i8* bitcast (i1 (i8*, i32)* @vf4 to i8*)], [0 x i8] zeroinitializer }
+; CHECK: private constant { [8 x i8], [1 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\00\00\00\00\02", [1 x i8*] [i8* bitcast (i1 (i8*, i32)* @vf8 to i8*)], [0 x i8] zeroinitializer }
+
+@vt1 = constant [1 x i8*] [i8* bitcast (i1 (i8*, i32)* @vf1 to i8*)]
+@vt2 = constant [1 x i8*] [i8* bitcast (i1 (i8*, i32)* @vf2 to i8*)]
+@vt4 = constant [1 x i8*] [i8* bitcast (i1 (i8*, i32)* @vf4 to i8*)]
+@vt8 = constant [1 x i8*] [i8* bitcast (i1 (i8*, i32)* @vf8 to i8*)]
+
+define i1 @vf1(i8* %this, i32 %arg) readnone {
+  %and = and i32 %arg, 1
+  %cmp = icmp ne i32 %and, 0
+  ret i1 %cmp
+}
+
+define i1 @vf2(i8* %this, i32 %arg) readnone {
+  %and = and i32 %arg, 2
+  %cmp = icmp ne i32 %and, 0
+  ret i1 %cmp
+}
+
+define i1 @vf4(i8* %this, i32 %arg) readnone {
+  %and = and i32 %arg, 4
+  %cmp = icmp ne i32 %and, 0
+  ret i1 %cmp
+}
+
+define i1 @vf8(i8* %this, i32 %arg) readnone {
+  %and = and i32 %arg, 8
+  %cmp = icmp ne i32 %and, 0
+  ret i1 %cmp
+}
+
+; CHECK: define i1 @call1
+define i1 @call1(i8* %obj) {
+  %vtableptr = bitcast i8* %obj to [1 x i8*]**
+  %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+  %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+  %p = call i1 @llvm.bitset.test(i8* %vtablei8, metadata !"bitset")
+  call void @llvm.assume(i1 %p)
+  %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+  %fptr = load i8*, i8** %fptrptr
+  %fptr_casted = bitcast i8* %fptr to i1 (i8*, i32)*
+  ; CHECK: getelementptr {{.*}} -1
+  ; CHECK: and {{.*}}, 1
+  %result = call i1 %fptr_casted(i8* %obj, i32 5)
+  ret i1 %result
+}
+
+; CHECK: define i1 @call2
+define i1 @call2(i8* %obj) {
+  %vtableptr = bitcast i8* %obj to [1 x i8*]**
+  %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+  %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+  %p = call i1 @llvm.bitset.test(i8* %vtablei8, metadata !"bitset")
+  call void @llvm.assume(i1 %p)
+  %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+  %fptr = load i8*, i8** %fptrptr
+  %fptr_casted = bitcast i8* %fptr to i1 (i8*, i32)*
+  ; CHECK: getelementptr {{.*}} -1
+  ; CHECK: and {{.*}}, 2
+  %result = call i1 %fptr_casted(i8* %obj, i32 10)
+  ret i1 %result
+}
+
+declare i1 @llvm.bitset.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{!"bitset", [1 x i8*]* @vt1, i32 0}
+!1 = !{!"bitset", [1 x i8*]* @vt2, i32 0}
+!2 = !{!"bitset", [1 x i8*]* @vt4, i32 0}
+!3 = !{!"bitset", [1 x i8*]* @vt8, i32 0}
+!llvm.bitsets = !{!0, !1, !2, !3}
diff --git a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl.ll b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl.ll
new file mode 100644
index 0000000..8504155
--- /dev/null
+++ b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl.ll
@@ -0,0 +1,33 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@vt1 = constant [1 x i8*] [i8* bitcast (void (i8*)* @vf to i8*)]
+@vt2 = constant [1 x i8*] [i8* bitcast (void (i8*)* @vf to i8*)]
+
+define void @vf(i8* %this) {
+  ret void
+}
+
+; CHECK: define void @call
+define void @call(i8* %obj) {
+  %vtableptr = bitcast i8* %obj to [1 x i8*]**
+  %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+  %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+  %p = call i1 @llvm.bitset.test(i8* %vtablei8, metadata !"bitset")
+  call void @llvm.assume(i1 %p)
+  %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+  %fptr = load i8*, i8** %fptrptr
+  %fptr_casted = bitcast i8* %fptr to void (i8*)*
+  ; CHECK: call void @vf(
+  call void %fptr_casted(i8* %obj)
+  ret void
+}
+
+declare i1 @llvm.bitset.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{!"bitset", [1 x i8*]* @vt1, i32 0}
+!1 = !{!"bitset", [1 x i8*]* @vt2, i32 0}
+!llvm.bitsets = !{!0, !1}
diff --git a/llvm/test/Transforms/WholeProgramDevirt/non-array-vtable.ll b/llvm/test/Transforms/WholeProgramDevirt/non-array-vtable.ll
new file mode 100644
index 0000000..d6befd5
--- /dev/null
+++ b/llvm/test/Transforms/WholeProgramDevirt/non-array-vtable.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@vt = constant i8* bitcast (void (i8*)* @vf to i8*)
+
+define void @vf(i8* %this) {
+  ret void
+}
+
+; CHECK: define void @call
+define void @call(i8* %obj) {
+  %vtableptr = bitcast i8* %obj to [1 x i8*]**
+  %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+  %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+  %p = call i1 @llvm.bitset.test(i8* %vtablei8, metadata !"bitset")
+  call void @llvm.assume(i1 %p)
+  %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+  %fptr = load i8*, i8** %fptrptr
+  %fptr_casted = bitcast i8* %fptr to void (i8*)*
+  ; CHECK: call void %
+  call void %fptr_casted(i8* %obj)
+  ret void
+}
+
+declare i1 @llvm.bitset.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{!"bitset", i8** @vt, i32 0}
+!llvm.bitsets = !{!0}
diff --git a/llvm/test/Transforms/WholeProgramDevirt/non-constant-vtable.ll b/llvm/test/Transforms/WholeProgramDevirt/non-constant-vtable.ll
new file mode 100644
index 0000000..394f5e6
--- /dev/null
+++ b/llvm/test/Transforms/WholeProgramDevirt/non-constant-vtable.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@vt = global [1 x i8*] [i8* bitcast (void (i8*)* @vf to i8*)]
+
+define void @vf(i8* %this) {
+  ret void
+}
+
+; CHECK: define void @call
+define void @call(i8* %obj) {
+  %vtableptr = bitcast i8* %obj to [1 x i8*]**
+  %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+  %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+  %p = call i1 @llvm.bitset.test(i8* %vtablei8, metadata !"bitset")
+  call void @llvm.assume(i1 %p)
+  %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+  %fptr = load i8*, i8** %fptrptr
+  %fptr_casted = bitcast i8* %fptr to void (i8*)*
+  ; CHECK: call void %
+  call void %fptr_casted(i8* %obj)
+  ret void
+}
+
+declare i1 @llvm.bitset.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{!"bitset", [1 x i8*]* @vt, i32 0}
+!llvm.bitsets = !{!0}
diff --git a/llvm/test/Transforms/WholeProgramDevirt/uniform-retval-invoke.ll b/llvm/test/Transforms/WholeProgramDevirt/uniform-retval-invoke.ll
new file mode 100644
index 0000000..d34a529
--- /dev/null
+++ b/llvm/test/Transforms/WholeProgramDevirt/uniform-retval-invoke.ll
@@ -0,0 +1,45 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@vt1 = constant [1 x i8*] [i8* bitcast (i32 (i8*)* @vf1 to i8*)]
+@vt2 = constant [1 x i8*] [i8* bitcast (i32 (i8*)* @vf2 to i8*)]
+
+define i32 @vf1(i8* %this) readnone {
+  ret i32 123
+}
+
+define i32 @vf2(i8* %this) readnone {
+  ret i32 123
+}
+
+; CHECK: define i32 @call
+define i32 @call(i8* %obj) personality i8* undef {
+  %vtableptr = bitcast i8* %obj to [1 x i8*]**
+  %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+  %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+  %p = call i1 @llvm.bitset.test(i8* %vtablei8, metadata !"bitset")
+  call void @llvm.assume(i1 %p)
+  %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+  %fptr = load i8*, i8** %fptrptr
+  %fptr_casted = bitcast i8* %fptr to i32 (i8*)*
+  ; CHECK: br label %[[RET:[0-9A-Za-z]*]]
+  %result = invoke i32 %fptr_casted(i8* %obj) to label %ret unwind label %unwind
+
+unwind:
+  %x = landingpad i32 cleanup
+  unreachable
+
+ret:
+  ; CHECK: [[RET]]:
+  ; CHECK-NEXT: ret i32 123
+  ret i32 %result
+}
+
+declare i1 @llvm.bitset.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{!"bitset", [1 x i8*]* @vt1, i32 0}
+!1 = !{!"bitset", [1 x i8*]* @vt2, i32 0}
+!llvm.bitsets = !{!0, !1}
diff --git a/llvm/test/Transforms/WholeProgramDevirt/uniform-retval.ll b/llvm/test/Transforms/WholeProgramDevirt/uniform-retval.ll
new file mode 100644
index 0000000..f6433c3
--- /dev/null
+++ b/llvm/test/Transforms/WholeProgramDevirt/uniform-retval.ll
@@ -0,0 +1,38 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@vt1 = constant [1 x i8*] [i8* bitcast (i32 (i8*)* @vf1 to i8*)]
+@vt2 = constant [1 x i8*] [i8* bitcast (i32 (i8*)* @vf2 to i8*)]
+
+define i32 @vf1(i8* %this) readnone {
+  ret i32 123
+}
+
+define i32 @vf2(i8* %this) readnone {
+  ret i32 123
+}
+
+; CHECK: define i32 @call
+define i32 @call(i8* %obj) {
+  %vtableptr = bitcast i8* %obj to [1 x i8*]**
+  %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+  %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+  %p = call i1 @llvm.bitset.test(i8* %vtablei8, metadata !"bitset")
+  call void @llvm.assume(i1 %p)
+  %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+  %fptr = load i8*, i8** %fptrptr
+  %fptr_casted = bitcast i8* %fptr to i32 (i8*)*
+  %result = call i32 %fptr_casted(i8* %obj)
+  ; CHECK-NOT: call
+  ; CHECK: ret i32 123
+  ret i32 %result
+}
+
+declare i1 @llvm.bitset.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{!"bitset", [1 x i8*]* @vt1, i32 0}
+!1 = !{!"bitset", [1 x i8*]* @vt2, i32 0}
+!llvm.bitsets = !{!0, !1}
diff --git a/llvm/test/Transforms/WholeProgramDevirt/unique-retval.ll b/llvm/test/Transforms/WholeProgramDevirt/unique-retval.ll
new file mode 100644
index 0000000..41aa3e5
--- /dev/null
+++ b/llvm/test/Transforms/WholeProgramDevirt/unique-retval.ll
@@ -0,0 +1,61 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@vt1 = constant [1 x i8*] [i8* bitcast (i1 (i8*)* @vf0 to i8*)]
+@vt2 = constant [1 x i8*] [i8* bitcast (i1 (i8*)* @vf0 to i8*)]
+@vt3 = constant [1 x i8*] [i8* bitcast (i1 (i8*)* @vf1 to i8*)]
+@vt4 = constant [1 x i8*] [i8* bitcast (i1 (i8*)* @vf1 to i8*)]
+
+define i1 @vf0(i8* %this) readnone {
+  ret i1 0
+}
+
+define i1 @vf1(i8* %this) readnone {
+  ret i1 1
+}
+
+; CHECK: define i1 @call1
+define i1 @call1(i8* %obj) {
+  %vtableptr = bitcast i8* %obj to [1 x i8*]**
+  %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+  ; CHECK: [[VT1:%[^ ]*]] = bitcast [1 x i8*]* {{.*}} to i8*
+  %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+  %p = call i1 @llvm.bitset.test(i8* %vtablei8, metadata !"bitset1")
+  call void @llvm.assume(i1 %p)
+  %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+  %fptr = load i8*, i8** %fptrptr
+  %fptr_casted = bitcast i8* %fptr to i1 (i8*)*
+  ; CHECK: [[RES1:%[^ ]*]] = icmp eq i8* [[VT1]], bitcast ([1 x i8*]* @vt3 to i8*)
+  %result = call i1 %fptr_casted(i8* %obj)
+  ; CHECK: ret i1 [[RES1]]
+  ret i1 %result
+}
+
+; CHECK: define i1 @call2
+define i1 @call2(i8* %obj) {
+  %vtableptr = bitcast i8* %obj to [1 x i8*]**
+  %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+  ; CHECK: [[VT2:%[^ ]*]] = bitcast [1 x i8*]* {{.*}} to i8*
+  %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+  %p = call i1 @llvm.bitset.test(i8* %vtablei8, metadata !"bitset2")
+  call void @llvm.assume(i1 %p)
+  %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+  %fptr = load i8*, i8** %fptrptr
+  %fptr_casted = bitcast i8* %fptr to i1 (i8*)*
+  ; CHECK: [[RES1:%[^ ]*]] = icmp ne i8* [[VT1]], bitcast ([1 x i8*]* @vt2 to i8*)
+  %result = call i1 %fptr_casted(i8* %obj)
+  ret i1 %result
+}
+
+declare i1 @llvm.bitset.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{!"bitset1", [1 x i8*]* @vt1, i32 0}
+!1 = !{!"bitset1", [1 x i8*]* @vt2, i32 0}
+!2 = !{!"bitset1", [1 x i8*]* @vt3, i32 0}
+!3 = !{!"bitset2", [1 x i8*]* @vt2, i32 0}
+!4 = !{!"bitset2", [1 x i8*]* @vt3, i32 0}
+!5 = !{!"bitset2", [1 x i8*]* @vt4, i32 0}
+!llvm.bitsets = !{!0, !1, !2, !3, !4, !5}
diff --git a/llvm/test/Transforms/WholeProgramDevirt/vcp-accesses-memory.ll b/llvm/test/Transforms/WholeProgramDevirt/vcp-accesses-memory.ll
new file mode 100644
index 0000000..f963d18
--- /dev/null
+++ b/llvm/test/Transforms/WholeProgramDevirt/vcp-accesses-memory.ll
@@ -0,0 +1,37 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@vt1 = global [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf1 to i8*)]
+@vt2 = global [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf2 to i8*)]
+
+define i32 @vf1(i8* %this, i32 %arg) {
+  ret i32 %arg
+}
+
+define i32 @vf2(i8* %this, i32 %arg) {
+  ret i32 %arg
+}
+
+; CHECK: define i32 @call
+define i32 @call(i8* %obj) {
+  %vtableptr = bitcast i8* %obj to [1 x i8*]**
+  %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+  %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+  %p = call i1 @llvm.bitset.test(i8* %vtablei8, metadata !"bitset")
+  call void @llvm.assume(i1 %p)
+  %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+  %fptr = load i8*, i8** %fptrptr
+  %fptr_casted = bitcast i8* %fptr to i32 (i8*, i32)*
+  ; CHECK: call i32 %
+  %result = call i32 %fptr_casted(i8* %obj, i32 1)
+  ret i32 %result
+}
+
+declare i1 @llvm.bitset.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{!"bitset", [1 x i8*]* @vt1, i32 0}
+!1 = !{!"bitset", [1 x i8*]* @vt2, i32 0}
+!llvm.bitsets = !{!0}
diff --git a/llvm/test/Transforms/WholeProgramDevirt/vcp-no-this.ll b/llvm/test/Transforms/WholeProgramDevirt/vcp-no-this.ll
new file mode 100644
index 0000000..28d39e1
--- /dev/null
+++ b/llvm/test/Transforms/WholeProgramDevirt/vcp-no-this.ll
@@ -0,0 +1,37 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@vt1 = global [1 x i8*] [i8* bitcast (i32 ()* @vf1 to i8*)]
+@vt2 = global [1 x i8*] [i8* bitcast (i32 ()* @vf2 to i8*)]
+
+define i32 @vf1() readnone {
+  ret i32 1
+}
+
+define i32 @vf2() readnone {
+  ret i32 2
+}
+
+; CHECK: define i32 @call
+define i32 @call(i8* %obj) {
+  %vtableptr = bitcast i8* %obj to [1 x i8*]**
+  %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+  %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+  %p = call i1 @llvm.bitset.test(i8* %vtablei8, metadata !"bitset")
+  call void @llvm.assume(i1 %p)
+  %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+  %fptr = load i8*, i8** %fptrptr
+  %fptr_casted = bitcast i8* %fptr to i32 ()*
+  ; CHECK: call i32 %
+  %result = call i32 %fptr_casted()
+  ret i32 %result
+}
+
+declare i1 @llvm.bitset.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{!"bitset", [1 x i8*]* @vt1, i32 0}
+!1 = !{!"bitset", [1 x i8*]* @vt2, i32 0}
+!llvm.bitsets = !{!0}
diff --git a/llvm/test/Transforms/WholeProgramDevirt/vcp-non-constant-arg.ll b/llvm/test/Transforms/WholeProgramDevirt/vcp-non-constant-arg.ll
new file mode 100644
index 0000000..c056832
--- /dev/null
+++ b/llvm/test/Transforms/WholeProgramDevirt/vcp-non-constant-arg.ll
@@ -0,0 +1,37 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@vt1 = global [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf1 to i8*)]
+@vt2 = global [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf2 to i8*)]
+
+define i32 @vf1(i8* %this, i32 %arg) readnone {
+  ret i32 %arg
+}
+
+define i32 @vf2(i8* %this, i32 %arg) readnone {
+  ret i32 %arg
+}
+
+; CHECK: define void @call
+define void @call(i8* %obj, i32 %arg) {
+  %vtableptr = bitcast i8* %obj to [1 x i8*]**
+  %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+  %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+  %p = call i1 @llvm.bitset.test(i8* %vtablei8, metadata !"bitset")
+  call void @llvm.assume(i1 %p)
+  %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+  %fptr = load i8*, i8** %fptrptr
+  %fptr_casted = bitcast i8* %fptr to i32 (i8*, i32)*
+  ; CHECK: call i32 %
+  %result = call i32 %fptr_casted(i8* %obj, i32 %arg)
+  ret void
+}
+
+declare i1 @llvm.bitset.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{!"bitset", [1 x i8*]* @vt1, i32 0}
+!1 = !{!"bitset", [1 x i8*]* @vt2, i32 0}
+!llvm.bitsets = !{!0}
diff --git a/llvm/test/Transforms/WholeProgramDevirt/vcp-too-wide-ints.ll b/llvm/test/Transforms/WholeProgramDevirt/vcp-too-wide-ints.ll
new file mode 100644
index 0000000..4dbbc1b
--- /dev/null
+++ b/llvm/test/Transforms/WholeProgramDevirt/vcp-too-wide-ints.ll
@@ -0,0 +1,37 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@vt1 = global [1 x i8*] [i8* bitcast (i128 (i8*, i128)* @vf1 to i8*)]
+@vt2 = global [1 x i8*] [i8* bitcast (i128 (i8*, i128)* @vf2 to i8*)]
+
+define i128 @vf1(i8* %this, i128 %arg) readnone {
+  ret i128 %arg
+}
+
+define i128 @vf2(i8* %this, i128 %arg) readnone {
+  ret i128 %arg
+}
+
+; CHECK: define i128 @call
+define i128 @call(i8* %obj) {
+  %vtableptr = bitcast i8* %obj to [1 x i8*]**
+  %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+  %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+  %p = call i1 @llvm.bitset.test(i8* %vtablei8, metadata !"bitset")
+  call void @llvm.assume(i1 %p)
+  %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+  %fptr = load i8*, i8** %fptrptr
+  %fptr_casted = bitcast i8* %fptr to i128 (i8*, i128)*
+  ; CHECK: call i128 %
+  %result = call i128 %fptr_casted(i8* %obj, i128 1)
+  ret i128 %result
+}
+
+declare i1 @llvm.bitset.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{!"bitset", [1 x i8*]* @vt1, i32 0}
+!1 = !{!"bitset", [1 x i8*]* @vt2, i32 0}
+!llvm.bitsets = !{!0}
diff --git a/llvm/test/Transforms/WholeProgramDevirt/vcp-type-mismatch.ll b/llvm/test/Transforms/WholeProgramDevirt/vcp-type-mismatch.ll
new file mode 100644
index 0000000..6dff4d1
--- /dev/null
+++ b/llvm/test/Transforms/WholeProgramDevirt/vcp-type-mismatch.ll
@@ -0,0 +1,67 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@vt1 = global [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf1 to i8*)]
+@vt2 = global [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf2 to i8*)]
+
+define i32 @vf1(i8* %this, i32 %arg) readnone {
+  ret i32 %arg
+}
+
+define i32 @vf2(i8* %this, i32 %arg) readnone {
+  ret i32 %arg
+}
+
+; CHECK: define i32 @bad_arg_type
+define i32 @bad_arg_type(i8* %obj) {
+  %vtableptr = bitcast i8* %obj to [1 x i8*]**
+  %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+  %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+  %p = call i1 @llvm.bitset.test(i8* %vtablei8, metadata !"bitset")
+  call void @llvm.assume(i1 %p)
+  %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+  %fptr = load i8*, i8** %fptrptr
+  %fptr_casted = bitcast i8* %fptr to i32 (i8*, i64)*
+  ; CHECK: call i32 %
+  %result = call i32 %fptr_casted(i8* %obj, i64 1)
+  ret i32 %result
+}
+
+; CHECK: define i32 @bad_arg_count
+define i32 @bad_arg_count(i8* %obj) {
+  %vtableptr = bitcast i8* %obj to [1 x i8*]**
+  %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+  %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+  %p = call i1 @llvm.bitset.test(i8* %vtablei8, metadata !"bitset")
+  call void @llvm.assume(i1 %p)
+  %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+  %fptr = load i8*, i8** %fptrptr
+  %fptr_casted = bitcast i8* %fptr to i32 (i8*, i64, i64)*
+  ; CHECK: call i32 %
+  %result = call i32 %fptr_casted(i8* %obj, i64 1, i64 2)
+  ret i32 %result
+}
+
+; CHECK: define i64 @bad_return_type
+define i64 @bad_return_type(i8* %obj) {
+  %vtableptr = bitcast i8* %obj to [1 x i8*]**
+  %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+  %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+  %p = call i1 @llvm.bitset.test(i8* %vtablei8, metadata !"bitset")
+  call void @llvm.assume(i1 %p)
+  %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+  %fptr = load i8*, i8** %fptrptr
+  %fptr_casted = bitcast i8* %fptr to i64 (i8*, i32)*
+  ; CHECK: call i64 %
+  %result = call i64 %fptr_casted(i8* %obj, i32 1)
+  ret i64 %result
+}
+
+declare i1 @llvm.bitset.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{!"bitset", [1 x i8*]* @vt1, i32 0}
+!1 = !{!"bitset", [1 x i8*]* @vt2, i32 0}
+!llvm.bitsets = !{!0}
diff --git a/llvm/test/Transforms/WholeProgramDevirt/vcp-uses-this.ll b/llvm/test/Transforms/WholeProgramDevirt/vcp-uses-this.ll
new file mode 100644
index 0000000..39a63c6
--- /dev/null
+++ b/llvm/test/Transforms/WholeProgramDevirt/vcp-uses-this.ll
@@ -0,0 +1,39 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@vt1 = global [1 x i8*] [i8* bitcast (i32 (i8*)* @vf1 to i8*)]
+@vt2 = global [1 x i8*] [i8* bitcast (i32 (i8*)* @vf2 to i8*)]
+
+define i32 @vf1(i8* %this) readnone {
+  %this_int = ptrtoint i8* %this to i32
+  ret i32 %this_int
+}
+
+define i32 @vf2(i8* %this) readnone {
+  %this_int = ptrtoint i8* %this to i32
+  ret i32 %this_int
+}
+
+; CHECK: define i32 @call
+define i32 @call(i8* %obj) {
+  %vtableptr = bitcast i8* %obj to [1 x i8*]**
+  %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+  %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+  %p = call i1 @llvm.bitset.test(i8* %vtablei8, metadata !"bitset")
+  call void @llvm.assume(i1 %p)
+  %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+  %fptr = load i8*, i8** %fptrptr
+  %fptr_casted = bitcast i8* %fptr to i32 (i8*)*
+  ; CHECK: call i32 %
+  %result = call i32 %fptr_casted(i8* %obj)
+  ret i32 %result
+}
+
+declare i1 @llvm.bitset.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{!"bitset", [1 x i8*]* @vt1, i32 0}
+!1 = !{!"bitset", [1 x i8*]* @vt2, i32 0}
+!llvm.bitsets = !{!0}
diff --git a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-begin.ll b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-begin.ll
new file mode 100644
index 0000000..3686f8a
--- /dev/null
+++ b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-begin.ll
@@ -0,0 +1,137 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: [[VT1DATA:@[^ ]*]] = private constant { [8 x i8], [3 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\01\01\00\00\00", [3 x i8*] [i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i32 (i8*)* @vf1i32 to i8*)], [0 x i8] zeroinitializer }, section "vt1sec"
+@vt1 = constant [3 x i8*] [
+i8* bitcast (i1 (i8*)* @vf0i1 to i8*),
+i8* bitcast (i1 (i8*)* @vf1i1 to i8*),
+i8* bitcast (i32 (i8*)* @vf1i32 to i8*)
+], section "vt1sec"
+
+; CHECK: [[VT2DATA:@[^ ]*]] = private constant { [8 x i8], [3 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\02\02\00\00\00", [3 x i8*] [i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i32 (i8*)* @vf2i32 to i8*)], [0 x i8] zeroinitializer }{{$}}
+@vt2 = constant [3 x i8*] [
+i8* bitcast (i1 (i8*)* @vf1i1 to i8*),
+i8* bitcast (i1 (i8*)* @vf0i1 to i8*),
+i8* bitcast (i32 (i8*)* @vf2i32 to i8*)
+]
+
+; CHECK: [[VT3DATA:@[^ ]*]] = private constant { [8 x i8], [3 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\01\03\00\00\00", [3 x i8*] [i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i32 (i8*)* @vf3i32 to i8*)], [0 x i8] zeroinitializer }{{$}}
+@vt3 = constant [3 x i8*] [
+i8* bitcast (i1 (i8*)* @vf0i1 to i8*),
+i8* bitcast (i1 (i8*)* @vf1i1 to i8*),
+i8* bitcast (i32 (i8*)* @vf3i32 to i8*)
+]
+
+; CHECK: [[VT4DATA:@[^ ]*]] = private constant { [8 x i8], [3 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\02\04\00\00\00", [3 x i8*] [i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i32 (i8*)* @vf4i32 to i8*)], [0 x i8] zeroinitializer }{{$}}
+@vt4 = constant [3 x i8*] [
+i8* bitcast (i1 (i8*)* @vf1i1 to i8*),
+i8* bitcast (i1 (i8*)* @vf0i1 to i8*),
+i8* bitcast (i32 (i8*)* @vf4i32 to i8*)
+]
+
+@vt5 = constant [3 x i8*] [
+i8* bitcast (void ()* @__cxa_pure_virtual to i8*),
+i8* bitcast (void ()* @__cxa_pure_virtual to i8*),
+i8* bitcast (void ()* @__cxa_pure_virtual to i8*)
+]
+
+; CHECK: @vt1 = alias [3 x i8*], getelementptr inbounds ({ [8 x i8], [3 x i8*], [0 x i8] }, { [8 x i8], [3 x i8*], [0 x i8] }* [[VT1DATA]], i32 0, i32 1)
+; CHECK: @vt2 = alias [3 x i8*], getelementptr inbounds ({ [8 x i8], [3 x i8*], [0 x i8] }, { [8 x i8], [3 x i8*], [0 x i8] }* [[VT2DATA]], i32 0, i32 1)
+; CHECK: @vt3 = alias [3 x i8*], getelementptr inbounds ({ [8 x i8], [3 x i8*], [0 x i8] }, { [8 x i8], [3 x i8*], [0 x i8] }* [[VT3DATA]], i32 0, i32 1)
+; CHECK: @vt4 = alias [3 x i8*], getelementptr inbounds ({ [8 x i8], [3 x i8*], [0 x i8] }, { [8 x i8], [3 x i8*], [0 x i8] }* [[VT4DATA]], i32 0, i32 1)
+
+define i1 @vf0i1(i8* %this) readnone {
+  ret i1 0
+}
+
+define i1 @vf1i1(i8* %this) readnone {
+  ret i1 1
+}
+
+define i32 @vf1i32(i8* %this) readnone {
+  ret i32 1
+}
+
+define i32 @vf2i32(i8* %this) readnone {
+  ret i32 2
+}
+
+define i32 @vf3i32(i8* %this) readnone {
+  ret i32 3
+}
+
+define i32 @vf4i32(i8* %this) readnone {
+  ret i32 4
+}
+
+; CHECK: define i1 @call1(
+define i1 @call1(i8* %obj) {
+  %vtableptr = bitcast i8* %obj to [3 x i8*]**
+  %vtable = load [3 x i8*]*, [3 x i8*]** %vtableptr
+  ; CHECK: [[VT1:%[^ ]*]] = bitcast [3 x i8*]* {{.*}} to i8*
+  %vtablei8 = bitcast [3 x i8*]* %vtable to i8*
+  %p = call i1 @llvm.bitset.test(i8* %vtablei8, metadata !"bitset")
+  call void @llvm.assume(i1 %p)
+  %fptrptr = getelementptr [3 x i8*], [3 x i8*]* %vtable, i32 0, i32 0
+  %fptr = load i8*, i8** %fptrptr
+  %fptr_casted = bitcast i8* %fptr to i1 (i8*)*
+  ; CHECK: [[VTGEP1:%[^ ]*]] = getelementptr i8, i8* [[VT1]], i64 -5
+  ; CHECK: [[VTLOAD1:%[^ ]*]] = load i8, i8* [[VTGEP1]]
+  ; CHECK: [[VTAND1:%[^ ]*]] = and i8 [[VTLOAD1]], 2
+  ; CHECK: [[VTCMP1:%[^ ]*]] = icmp ne i8 [[VTAND1]], 0
+  %result = call i1 %fptr_casted(i8* %obj)
+  ; CHECK: ret i1 [[VTCMP1]]
+  ret i1 %result
+}
+
+; CHECK: define i1 @call2(
+define i1 @call2(i8* %obj) {
+  %vtableptr = bitcast i8* %obj to [3 x i8*]**
+  %vtable = load [3 x i8*]*, [3 x i8*]** %vtableptr
+  ; CHECK: [[VT2:%[^ ]*]] = bitcast [3 x i8*]* {{.*}} to i8*
+  %vtablei8 = bitcast [3 x i8*]* %vtable to i8*
+  %p = call i1 @llvm.bitset.test(i8* %vtablei8, metadata !"bitset")
+  call void @llvm.assume(i1 %p)
+  %fptrptr = getelementptr [3 x i8*], [3 x i8*]* %vtable, i32 0, i32 1
+  %fptr = load i8*, i8** %fptrptr
+  %fptr_casted = bitcast i8* %fptr to i1 (i8*)*
+  ; CHECK: [[VTGEP2:%[^ ]*]] = getelementptr i8, i8* [[VT2]], i64 -5
+  ; CHECK: [[VTLOAD2:%[^ ]*]] = load i8, i8* [[VTGEP2]]
+  ; CHECK: [[VTAND2:%[^ ]*]] = and i8 [[VTLOAD2]], 1
+  ; CHECK: [[VTCMP2:%[^ ]*]] = icmp ne i8 [[VTAND2]], 0
+  %result = call i1 %fptr_casted(i8* %obj)
+  ; CHECK: ret i1 [[VTCMP2]]
+  ret i1 %result
+}
+
+; CHECK: define i32 @call3(
+define i32 @call3(i8* %obj) {
+  %vtableptr = bitcast i8* %obj to [3 x i8*]**
+  %vtable = load [3 x i8*]*, [3 x i8*]** %vtableptr
+  ; CHECK: [[VT3:%[^ ]*]] = bitcast [3 x i8*]* {{.*}} to i8*
+  %vtablei8 = bitcast [3 x i8*]* %vtable to i8*
+  %p = call i1 @llvm.bitset.test(i8* %vtablei8, metadata !"bitset")
+  call void @llvm.assume(i1 %p)
+  %fptrptr = getelementptr [3 x i8*], [3 x i8*]* %vtable, i32 0, i32 2
+  %fptr = load i8*, i8** %fptrptr
+  %fptr_casted = bitcast i8* %fptr to i32 (i8*)*
+  ; CHECK: [[VTGEP3:%[^ ]*]] = getelementptr i8, i8* [[VT3]], i64 -4
+  ; CHECK: [[VTBC3:%[^ ]*]] = bitcast i8* [[VTGEP3]] to i32*
+  ; CHECK: [[VTLOAD3:%[^ ]*]] = load i32, i32* [[VTBC3]]
+  %result = call i32 %fptr_casted(i8* %obj)
+  ; CHECK: ret i32 [[VTLOAD3]]
+  ret i32 %result
+}
+
+declare i1 @llvm.bitset.test(i8*, metadata)
+declare void @llvm.assume(i1)
+declare void @__cxa_pure_virtual()
+
+!0 = !{!"bitset", [3 x i8*]* @vt1, i32 0}
+!1 = !{!"bitset", [3 x i8*]* @vt2, i32 0}
+!2 = !{!"bitset", [3 x i8*]* @vt3, i32 0}
+!3 = !{!"bitset", [3 x i8*]* @vt4, i32 0}
+!4 = !{!"bitset", [3 x i8*]* @vt5, i32 0}
+!llvm.bitsets = !{!0, !1, !2, !3, !4}
diff --git a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-end.ll b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-end.ll
new file mode 100644
index 0000000..44608cf
--- /dev/null
+++ b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-end.ll
@@ -0,0 +1,131 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: [[VT1DATA:@[^ ]*]] = private constant { [0 x i8], [4 x i8*], [8 x i8] } { [0 x i8] zeroinitializer, [4 x i8*] [i8* null, i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i32 (i8*)* @vf1i32 to i8*)], [8 x i8] c"\01\00\00\00\01\00\00\00" }
+@vt1 = constant [4 x i8*] [
+i8* null,
+i8* bitcast (i1 (i8*)* @vf0i1 to i8*),
+i8* bitcast (i1 (i8*)* @vf1i1 to i8*),
+i8* bitcast (i32 (i8*)* @vf1i32 to i8*)
+]
+
+; CHECK: [[VT2DATA:@[^ ]*]] = private constant { [0 x i8], [3 x i8*], [8 x i8] } { [0 x i8] zeroinitializer, [3 x i8*] [i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i32 (i8*)* @vf2i32 to i8*)], [8 x i8] c"\02\00\00\00\02\00\00\00" }
+@vt2 = constant [3 x i8*] [
+i8* bitcast (i1 (i8*)* @vf1i1 to i8*),
+i8* bitcast (i1 (i8*)* @vf0i1 to i8*),
+i8* bitcast (i32 (i8*)* @vf2i32 to i8*)
+]
+
+; CHECK: [[VT3DATA:@[^ ]*]] = private constant { [0 x i8], [4 x i8*], [8 x i8] } { [0 x i8] zeroinitializer, [4 x i8*] [i8* null, i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i32 (i8*)* @vf3i32 to i8*)], [8 x i8] c"\03\00\00\00\01\00\00\00" }
+@vt3 = constant [4 x i8*] [
+i8* null,
+i8* bitcast (i1 (i8*)* @vf0i1 to i8*),
+i8* bitcast (i1 (i8*)* @vf1i1 to i8*),
+i8* bitcast (i32 (i8*)* @vf3i32 to i8*)
+]
+
+; CHECK: [[VT4DATA:@[^ ]*]] = private constant { [0 x i8], [3 x i8*], [8 x i8] } { [0 x i8] zeroinitializer, [3 x i8*] [i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i32 (i8*)* @vf4i32 to i8*)], [8 x i8] c"\04\00\00\00\02\00\00\00" }
+@vt4 = constant [3 x i8*] [
+i8* bitcast (i1 (i8*)* @vf1i1 to i8*),
+i8* bitcast (i1 (i8*)* @vf0i1 to i8*),
+i8* bitcast (i32 (i8*)* @vf4i32 to i8*)
+]
+
+; CHECK: @vt1 = alias [4 x i8*], getelementptr inbounds ({ [0 x i8], [4 x i8*], [8 x i8] }, { [0 x i8], [4 x i8*], [8 x i8] }* [[VT1DATA]], i32 0, i32 1)
+; CHECK: @vt2 = alias [3 x i8*], getelementptr inbounds ({ [0 x i8], [3 x i8*], [8 x i8] }, { [0 x i8], [3 x i8*], [8 x i8] }* [[VT2DATA]], i32 0, i32 1)
+; CHECK: @vt3 = alias [4 x i8*], getelementptr inbounds ({ [0 x i8], [4 x i8*], [8 x i8] }, { [0 x i8], [4 x i8*], [8 x i8] }* [[VT3DATA]], i32 0, i32 1)
+; CHECK: @vt4 = alias [3 x i8*], getelementptr inbounds ({ [0 x i8], [3 x i8*], [8 x i8] }, { [0 x i8], [3 x i8*], [8 x i8] }* [[VT4DATA]], i32 0, i32 1)
+
+define i1 @vf0i1(i8* %this) readnone {
+  ret i1 0
+}
+
+define i1 @vf1i1(i8* %this) readnone {
+  ret i1 1
+}
+
+define i32 @vf1i32(i8* %this) readnone {
+  ret i32 1
+}
+
+define i32 @vf2i32(i8* %this) readnone {
+  ret i32 2
+}
+
+define i32 @vf3i32(i8* %this) readnone {
+  ret i32 3
+}
+
+define i32 @vf4i32(i8* %this) readnone {
+  ret i32 4
+}
+
+; CHECK: define i1 @call1(
+define i1 @call1(i8* %obj) {
+  %vtableptr = bitcast i8* %obj to [3 x i8*]**
+  %vtable = load [3 x i8*]*, [3 x i8*]** %vtableptr
+  ; CHECK: [[VT1:%[^ ]*]] = bitcast [3 x i8*]* {{.*}} to i8*
+  %vtablei8 = bitcast [3 x i8*]* %vtable to i8*
+  %p = call i1 @llvm.bitset.test(i8* %vtablei8, metadata !"bitset")
+  call void @llvm.assume(i1 %p)
+  %fptrptr = getelementptr [3 x i8*], [3 x i8*]* %vtable, i32 0, i32 0
+  %fptr = load i8*, i8** %fptrptr
+  %fptr_casted = bitcast i8* %fptr to i1 (i8*)*
+  ; CHECK: [[VTGEP1:%[^ ]*]] = getelementptr i8, i8* [[VT1]], i64 28
+  ; CHECK: [[VTLOAD1:%[^ ]*]] = load i8, i8* [[VTGEP1]]
+  ; CHECK: [[VTAND1:%[^ ]*]] = and i8 [[VTLOAD1]], 2
+  ; CHECK: [[VTCMP1:%[^ ]*]] = icmp ne i8 [[VTAND1]], 0
+  %result = call i1 %fptr_casted(i8* %obj)
+  ; CHECK: ret i1 [[VTCMP1]]
+  ret i1 %result
+}
+
+; CHECK: define i1 @call2(
+define i1 @call2(i8* %obj) {
+  %vtableptr = bitcast i8* %obj to [3 x i8*]**
+  %vtable = load [3 x i8*]*, [3 x i8*]** %vtableptr
+  ; CHECK: [[VT2:%[^ ]*]] = bitcast [3 x i8*]* {{.*}} to i8*
+  %vtablei8 = bitcast [3 x i8*]* %vtable to i8*
+  %p = call i1 @llvm.bitset.test(i8* %vtablei8, metadata !"bitset")
+  call void @llvm.assume(i1 %p)
+  %fptrptr = getelementptr [3 x i8*], [3 x i8*]* %vtable, i32 0, i32 1
+  %fptr = load i8*, i8** %fptrptr
+  %fptr_casted = bitcast i8* %fptr to i1 (i8*)*
+  ; CHECK: [[VTGEP2:%[^ ]*]] = getelementptr i8, i8* [[VT2]], i64 28
+  ; CHECK: [[VTLOAD2:%[^ ]*]] = load i8, i8* [[VTGEP2]]
+  ; CHECK: [[VTAND2:%[^ ]*]] = and i8 [[VTLOAD2]], 1
+  ; CHECK: [[VTCMP2:%[^ ]*]] = icmp ne i8 [[VTAND2]], 0
+  %result = call i1 %fptr_casted(i8* %obj)
+  ; CHECK: ret i1 [[VTCMP2]]
+  ret i1 %result
+}
+
+; CHECK: define i32 @call3(
+define i32 @call3(i8* %obj) {
+  %vtableptr = bitcast i8* %obj to [3 x i8*]**
+  %vtable = load [3 x i8*]*, [3 x i8*]** %vtableptr
+  ; CHECK: [[VT3:%[^ ]*]] = bitcast [3 x i8*]* {{.*}} to i8*
+  %vtablei8 = bitcast [3 x i8*]* %vtable to i8*
+  %p = call i1 @llvm.bitset.test(i8* %vtablei8, metadata !"bitset")
+  call void @llvm.assume(i1 %p)
+  %fptrptr = getelementptr [3 x i8*], [3 x i8*]* %vtable, i32 0, i32 2
+  %fptr = load i8*, i8** %fptrptr
+  %fptr_casted = bitcast i8* %fptr to i32 (i8*)*
+  ; CHECK: [[VTGEP3:%[^ ]*]] = getelementptr i8, i8* [[VT3]], i64 24
+  ; CHECK: [[VTBC3:%[^ ]*]] = bitcast i8* [[VTGEP3]] to i32*
+  ; CHECK: [[VTLOAD3:%[^ ]*]] = load i32, i32* [[VTBC3]]
+  %result = call i32 %fptr_casted(i8* %obj)
+  ; CHECK: ret i32 [[VTLOAD3]]
+  ret i32 %result
+}
+
+declare i1 @llvm.bitset.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{!"bitset", [4 x i8*]* @vt1, i32 8}
+!1 = !{!"bitset", [3 x i8*]* @vt2, i32 0}
+!2 = !{!"bitset", [4 x i8*]* @vt3, i32 8}
+!3 = !{!"bitset", [3 x i8*]* @vt4, i32 0}
+!llvm.bitsets = !{!0, !1, !2, !3}