Split out return adjustments in thunks from this adjustment in thunks
so the optimizer can tailcall into the return value adjustment thunk.
This improves codesize for complex hierarchies.


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@85988 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/CGCXX.cpp b/lib/CodeGen/CGCXX.cpp
index e22670c..6dbc53d 100644
--- a/lib/CodeGen/CGCXX.cpp
+++ b/lib/CodeGen/CGCXX.cpp
@@ -779,9 +779,17 @@
 
   QualType ArgType = MD->getThisType(getContext());
   llvm::Value *Arg = Builder.CreateLoad(LocalDeclMap[ThisDecl], "this");
-  if (nv_t || v_t)
+  if (nv_t || v_t) {
     // Do the this adjustment.
+    const llvm::Type *OrigTy = Callee->getType();
     Arg = DynamicTypeAdjust(Arg, nv_t, v_t);
+    if (nv_r || v_r) {
+      Callee = CGM.BuildCovariantThunk(MD, Extern, 0, 0, nv_r, v_r);
+      Callee = Builder.CreateBitCast(Callee, OrigTy);
+      nv_r = v_r = 0;
+    }
+  }    
+
   CallArgs.push_back(std::make_pair(RValue::get(Arg), ArgType));
 
   for (FunctionDecl::param_const_iterator i = MD->param_begin(),
@@ -795,7 +803,6 @@
     CallArgs.push_back(std::make_pair(EmitCallArg(Arg, ArgType), ArgType));
   }
 
-  // FIXME: be sure to call the right function when we thunk to a thunk
   RValue RV = EmitCall(CGM.getTypes().getFunctionInfo(ResultType, CallArgs),
                        Callee, CallArgs, MD);
   if (nv_r || v_r) {
diff --git a/test/CodeGenCXX/virt.cpp b/test/CodeGenCXX/virt.cpp
index cd0d24f..7911940 100644
--- a/test/CodeGenCXX/virt.cpp
+++ b/test/CodeGenCXX/virt.cpp
@@ -93,10 +93,10 @@
 
 // FIXME: This is the wrong thunk, but until these issues are fixed, better
 // than nothing.
-// CHECK-LP64:     __ZTcvn16_n72_v16_n32_N8test16_D4foo1Ev27:
-// CHECK-LP64-NEXT:Leh_func_begin33:
+// CHECK-LP64:     __ZTcvn16_n72_v16_n32_N8test16_D4foo1Ev:
+// CHECK-LP64-NEXT:Leh_func_begin43:
 // CHECK-LP64-NEXT:    subq    $24, %rsp
-// CHECK-LP64-NEXT:Llabel33:
+// CHECK-LP64-NEXT:Llabel43:
 // CHECK-LP64-NEXT:    movq    %rdi, %rax
 // CHECK-LP64-NEXT:    movq    %rax, 8(%rsp)
 // CHECK-LP64-NEXT:    movq    8(%rsp), %rax
@@ -108,6 +108,20 @@
 // CHECK-LP64-NEXT:    addq    %rax, %rcx
 // CHECK-LP64-NEXT:    movq    %rcx, %rax
 // CHECK-LP64-NEXT:    movq    %rax, %rdi
+// CHECK-LP64-NEXT:    call    __ZTch0_v16_n32_N8test16_D4foo1Ev
+// CHECK-LP64-NEXT:    movq    %rax, 16(%rsp)
+// CHECK-LP64-NEXT:    movq    16(%rsp), %rax
+// CHECK-LP64-NEXT:    addq    $24, %rsp
+// CHECK-LP64-NEXT:    ret
+
+// CHECK-LP64:     __ZTch0_v16_n32_N8test16_D4foo1Ev:
+// CHECK-LP64-NEXT:Leh_func_begin44:
+// CHECK-LP64-NEXT:    subq    $24, %rsp
+// CHECK-LP64-NEXT:Llabel44:
+// CHECK-LP64-NEXT:    movq    %rdi, %rax
+// CHECK-LP64-NEXT:    movq    %rax, 8(%rsp)
+// CHECK-LP64-NEXT:    movq    8(%rsp), %rax
+// CHECK-LP64-NEXT:    movq    %rax, %rdi
 // CHECK-LP64-NEXT:    call    __ZN8test16_D4foo1Ev
 // CHECK-LP64-NEXT:    movq    %rax, %rcx
 // CHECK-LP64-NEXT:    movabsq $16, %rdx