AArch64: simplify PCS mapping.

Now that LLVM can count the registers needed to implement AAPCS rules, we don't
need to duplicate that logic here. This means we can drop the explicit padding
and also use more natural types in many cases (e.g. "struct { float arr[3]; }"
used to end up as "[2 x double]" to avoid holes on the stack.

The one wrinkle is that AAPCS va_arg was also using the register counting
machinery. But the local replacement isn't too bad.

llvm-svn: 222904
diff --git a/clang/test/CodeGenCXX/homogeneous-aggregates.cpp b/clang/test/CodeGenCXX/homogeneous-aggregates.cpp
index 4800aac..77c6b3a 100644
--- a/clang/test/CodeGenCXX/homogeneous-aggregates.cpp
+++ b/clang/test/CodeGenCXX/homogeneous-aggregates.cpp
@@ -46,7 +46,7 @@
 
 // PPC: define [3 x double] @_Z7func_D22D2([3 x double] %x.coerce)
 // ARM32: define arm_aapcs_vfpcc %struct.D2 @_Z7func_D22D2(%struct.D2 %x.coerce)
-// ARM64: define %struct.D2 @_Z7func_D22D2(double %x.0, double %x.1, double %x.2)
+// ARM64: define %struct.D2 @_Z7func_D22D2([3 x double] %x.coerce)
 // X64: define x86_vectorcallcc %struct.D2 @"\01_Z7func_D22D2@@24"(double %x.0, double %x.1, double %x.2)
 D2 CC func_D2(D2 x) { return x; }
 
@@ -57,7 +57,7 @@
 
 // PPC: define [4 x double] @_Z7func_D42D4([4 x double] %x.coerce)
 // ARM32: define arm_aapcs_vfpcc %struct.D4 @_Z7func_D42D4(%struct.D4 %x.coerce)
-// ARM64: define %struct.D4 @_Z7func_D42D4(double %x.0, double %x.1, double %x.2, double %x.3)
+// ARM64: define %struct.D4 @_Z7func_D42D4([4 x double] %x.coerce)
 D4 CC func_D4(D4 x) { return x; }
 
 D5 CC func_D5(D5 x) { return x; }
@@ -67,17 +67,9 @@
 // The C++ multiple inheritance expansion case is a little more complicated, so
 // do some extra checking.
 //
-// ARM64-LABEL: define %struct.D5 @_Z7func_D52D5(double %x.0, double %x.1, double %x.2)
-// ARM64: bitcast %struct.D5* %{{.*}} to %struct.I1*
-// ARM64: bitcast %struct.I1* %{{.*}} to %struct.Base2*
-// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
-// ARM64: store double %x.0, double*
-// ARM64: getelementptr inbounds i8* %{{.*}}, i64 8
-// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
-// ARM64: store double %x.1, double*
-// ARM64: getelementptr inbounds i8* %{{.*}}, i64 16
-// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
-// ARM64: store double %x.2, double*
+// ARM64-LABEL: define %struct.D5 @_Z7func_D52D5([3 x double] %x.coerce)
+// ARM64: bitcast %struct.D5* %{{.*}} to [3 x double]*
+// ARM64: store [3 x double] %x.coerce, [3 x double]*
 
 void call_D5(D5 *p) {
   func_D5(*p);
@@ -86,21 +78,8 @@
 // Check the call site.
 //
 // ARM64-LABEL: define void @_Z7call_D5P2D5(%struct.D5* %p)
-// ARM64: bitcast %struct.D5* %{{.*}} to %struct.I1*
-// ARM64: bitcast %struct.I1* %{{.*}} to %struct.Base2*
-// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
-// ARM64: load double*
-// ARM64: getelementptr inbounds i8* %{{.*}}, i64 8
-// ARM64: bitcast i8* %{{.*}} to %struct.I2*
-// ARM64: bitcast %struct.I2* %{{.*}} to %struct.Base2*
-// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
-// ARM64: load double*
-// ARM64: getelementptr inbounds i8* %{{.*}}, i64 16
-// ARM64: bitcast i8* %{{.*}} to %struct.I3*
-// ARM64: bitcast %struct.I3* %{{.*}} to %struct.Base2*
-// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
-// ARM64: load double*
-// ARM64: call %struct.D5 @_Z7func_D52D5(double %{{.*}}, double %{{.*}}, double %{{.*}})
+// ARM64: load [3 x double]*
+// ARM64: call %struct.D5 @_Z7func_D52D5([3 x double] %{{.*}})
 
 struct Empty { };
 struct Float1 { float x; };
@@ -108,7 +87,7 @@
 struct HVAWithEmptyBase : Float1, Empty, Float2 { float z; };
 
 // PPC: define void @_Z15with_empty_base16HVAWithEmptyBase([3 x float] %a.coerce)
-// ARM64: define void @_Z15with_empty_base16HVAWithEmptyBase(float %a.0, float %a.1, float %a.2)
+// ARM64: define void @_Z15with_empty_base16HVAWithEmptyBase([3 x float] %a.coerce)
 // ARM32: define arm_aapcs_vfpcc void @_Z15with_empty_base16HVAWithEmptyBase(%struct.HVAWithEmptyBase %a.coerce)
 void CC with_empty_base(HVAWithEmptyBase a) {}
 
@@ -121,7 +100,7 @@
 };
 
 // PPC: define void @_Z19with_empty_bitfield20HVAWithEmptyBitField([3 x float] %a.coerce)
-// ARM64: define void @_Z19with_empty_bitfield20HVAWithEmptyBitField(float %a.0, float %a.1, float %a.2)
+// ARM64: define void @_Z19with_empty_bitfield20HVAWithEmptyBitField([3 x float] %a.coerce)
 // ARM32: define arm_aapcs_vfpcc void @_Z19with_empty_bitfield20HVAWithEmptyBitField(%struct.HVAWithEmptyBitField %a.coerce)
 // X64: define x86_vectorcallcc void @"\01_Z19with_empty_bitfield20HVAWithEmptyBitField@@16"(float %a.0, float %a.1, float %a.2)
 void CC with_empty_bitfield(HVAWithEmptyBitField a) {}