[CodeGen]: don't treat structures returned in registers as memory inputs

Summary:
The "=r" output constraint for a structure variable passed to inline asm
shouldn't be converted to "=*r", as this changes the asm directive
semantics and prevents DSE optimizations.
Instead, preserve the constraints and return such structures as integers
of corresponding size, which are converted back to structures when
storing the result.

Fixes PR42672.

Subscribers: cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D65234

llvm-svn: 370335
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 40ab795..9097434 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -1984,6 +1984,7 @@
   std::vector<llvm::Type *> ResultTruncRegTypes;
   std::vector<llvm::Type *> ArgTypes;
   std::vector<llvm::Value*> Args;
+  llvm::BitVector ResultTypeRequiresCast;
 
   // Keep track of inout constraints.
   std::string InOutConstraints;
@@ -2022,13 +2023,23 @@
 
     // If this is a register output, then make the inline asm return it
     // by-value.  If this is a memory result, return the value by-reference.
-    if (!Info.allowsMemory() && hasScalarEvaluationKind(OutExpr->getType())) {
+    bool isScalarizableAggregate =
+        hasAggregateEvaluationKind(OutExpr->getType());
+    if (!Info.allowsMemory() && (hasScalarEvaluationKind(OutExpr->getType()) ||
+                                 isScalarizableAggregate)) {
       Constraints += "=" + OutputConstraint;
       ResultRegQualTys.push_back(OutExpr->getType());
       ResultRegDests.push_back(Dest);
-      ResultRegTypes.push_back(ConvertTypeForMem(OutExpr->getType()));
-      ResultTruncRegTypes.push_back(ResultRegTypes.back());
-
+      ResultTruncRegTypes.push_back(ConvertTypeForMem(OutExpr->getType()));
+      if (Info.allowsRegister() && isScalarizableAggregate) {
+        ResultTypeRequiresCast.push_back(true);
+        unsigned Size = getContext().getTypeSize(OutExpr->getType());
+        llvm::Type *ConvTy = llvm::IntegerType::get(getLLVMContext(), Size);
+        ResultRegTypes.push_back(ConvTy);
+      } else {
+        ResultTypeRequiresCast.push_back(false);
+        ResultRegTypes.push_back(ResultTruncRegTypes.back());
+      }
       // If this output is tied to an input, and if the input is larger, then
       // we need to set the actual result type of the inline asm node to be the
       // same as the input type.
@@ -2271,6 +2282,9 @@
   assert(RegResults.size() == ResultRegTypes.size());
   assert(RegResults.size() == ResultTruncRegTypes.size());
   assert(RegResults.size() == ResultRegDests.size());
+  // ResultRegDests can be also populated by addReturnRegisterOutputs() above,
+  // in which case its size may grow.
+  assert(ResultTypeRequiresCast.size() <= ResultRegDests.size());
   for (unsigned i = 0, e = RegResults.size(); i != e; ++i) {
     llvm::Value *Tmp = RegResults[i];
 
@@ -2300,7 +2314,24 @@
       }
     }
 
-    EmitStoreThroughLValue(RValue::get(Tmp), ResultRegDests[i]);
+    LValue Dest = ResultRegDests[i];
+    // ResultTypeRequiresCast elements correspond to the first
+    // ResultTypeRequiresCast.size() elements of RegResults.
+    if ((i < ResultTypeRequiresCast.size()) && ResultTypeRequiresCast[i]) {
+      unsigned Size = getContext().getTypeSize(ResultRegQualTys[i]);
+      Address A = Builder.CreateBitCast(Dest.getAddress(),
+                                        ResultRegTypes[i]->getPointerTo());
+      QualType Ty = getContext().getIntTypeForBitwidth(Size, /*Signed*/ false);
+      if (Ty.isNull()) {
+        const Expr *OutExpr = S.getOutputExpr(i);
+        CGM.Error(
+            OutExpr->getExprLoc(),
+            "impossible constraint in asm: can't store struct into a register");
+        return;
+      }
+      Dest = MakeAddrLValue(A, Ty);
+    }
+    EmitStoreThroughLValue(RValue::get(Tmp), Dest);
   }
 }