MS ABI x64: Pass small objects with dtors but no copy ctors directly

Passing objects directly (in registers or memory) creates a second copy
of the object in the callee.  The callee always destroys its copy, but
we also have to destroy any temporary created in the caller.  In other
words, copy elision of these kinds of objects is impossible.

Objects larger than 8 bytes with non-trivial dtors and trivial copy
ctors are still passed indirectly, and we can still elide copies of
them.

Fixes PR19640.

llvm-svn: 207889
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 4428b96..da50473 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2289,20 +2289,25 @@
   if (HasAggregateEvalKind &&
       CGM.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) {
     // If we're using inalloca, use the argument memory.  Otherwise, use a
-    // temporary.  Either way, the aggregate is destroyed externally in the
-    // callee.
+    // temporary.
     AggValueSlot Slot;
     if (args.isUsingInAlloca())
       Slot = createPlaceholderSlot(*this, type);
     else
       Slot = CreateAggTemp(type, "agg.tmp");
-    Slot.setExternallyDestructed();
+
+    const CXXRecordDecl *RD = type->getAsCXXRecordDecl();
+    bool DestroyedInCallee =
+        RD && RD->hasNonTrivialDestructor() &&
+        CGM.getCXXABI().getRecordArgABI(RD) != CGCXXABI::RAA_Default;
+    if (DestroyedInCallee)
+      Slot.setExternallyDestructed();
+
     EmitAggExpr(E, Slot);
     RValue RV = Slot.asRValue();
     args.add(RV, type);
 
-    const CXXRecordDecl *RD = type->getAsCXXRecordDecl();
-    if (RD && RD->hasNonTrivialDestructor()) {
+    if (DestroyedInCallee) {
       // Create a no-op GEP between the placeholder and the cleanup so we can
       // RAUW it successfully.  It also serves as a marker of the first
       // instruction where the cleanup is active.
diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
index 869734a..f22b96a 100644
--- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -44,17 +44,7 @@
     return !RD->isPOD();
   }
 
-  RecordArgABI getRecordArgABI(const CXXRecordDecl *RD) const override {
-    if (RD->hasNonTrivialCopyConstructor() || RD->hasNonTrivialDestructor()) {
-      llvm::Triple::ArchType Arch = CGM.getTarget().getTriple().getArch();
-      if (Arch == llvm::Triple::x86)
-        return RAA_DirectInMemory;
-      // On x64, pass non-trivial records indirectly.
-      // FIXME: Test other Windows architectures.
-      return RAA_Indirect;
-    }
-    return RAA_Default;
-  }
+  RecordArgABI getRecordArgABI(const CXXRecordDecl *RD) const override;
 
   StringRef GetPureVirtualCallName() override { return "_purecall"; }
   // No known support for deleted functions in MSVC yet, so this choice is
@@ -407,6 +397,33 @@
 
 }
 
+CGCXXABI::RecordArgABI
+MicrosoftCXXABI::getRecordArgABI(const CXXRecordDecl *RD) const {
+  switch (CGM.getTarget().getTriple().getArch()) {
+  default:
+    // FIXME: Implement for other architectures.
+    return RAA_Default;
+
+  case llvm::Triple::x86:
+    // 32-bit x86 constructs non-trivial objects directly in outgoing argument
+    // slots.  LLVM uses the inalloca attribute to implement this.
+    if (RD->hasNonTrivialCopyConstructor() || RD->hasNonTrivialDestructor())
+      return RAA_DirectInMemory;
+    return RAA_Default;
+
+  case llvm::Triple::x86_64:
+    // Win64 passes objects with non-trivial copy ctors indirectly.
+    if (RD->hasNonTrivialCopyConstructor())
+      return RAA_Indirect;
+    // Win64 passes objects larger than 8 bytes indirectly.
+    if (getContext().getTypeSize(RD->getTypeForDecl()) > 64)
+      return RAA_Indirect;
+    return RAA_Default;
+  }
+
+  llvm_unreachable("invalid enum");
+}
+
 llvm::Value *MicrosoftCXXABI::adjustToCompleteObject(CodeGenFunction &CGF,
                                                      llvm::Value *ptr,
                                                      QualType type) {