Skip extra copy from aggregate where it isn't necessary; rdar://problem/8139919 . This shouldn't make much of a difference at -O3, but should substantially reduce the number of generated memcpy's at -O0.
Originally r130717, but was backed out due to an ObjC regression.
llvm-svn: 132102
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 4c9f3d4..712ae89 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1189,6 +1189,15 @@
return args.add(EmitReferenceBindingToExpr(E, /*InitializedDecl=*/0),
type);
+ if (hasAggregateLLVMType(type) && isa<ImplicitCastExpr>(E) &&
+ cast<CastExpr>(E)->getCastKind() == CK_LValueToRValue) {
+ LValue L = EmitLValue(cast<CastExpr>(E)->getSubExpr());
+ assert(L.isSimple());
+ args.add(RValue::getAggregate(L.getAddress(), L.isVolatileQualified()),
+ type, /*NeedsCopy*/true);
+ return;
+ }
+
args.add(EmitAnyExprToTemp(E), type);
}
@@ -1254,6 +1263,10 @@
Alignment, I->Ty);
else
StoreComplexToAddr(RV.getComplexVal(), Args.back(), false);
+ } else if (I->NeedsCopy && !ArgInfo.getIndirectByVal()) {
+ Args.push_back(CreateMemTemp(I->Ty));
+ EmitAggregateCopy(Args.back(), RV.getAggregateAddr(), I->Ty,
+ RV.isVolatileQualified());
} else {
Args.push_back(RV.getAggregateAddr());
}