Use vAny type to get rid of Neon intrinsics that differed only in whether
the overloaded vector types allowed floating-point or integer vector elements.
Most of these operations actually depend on the element type, so bitcasting
was not an option.

If you include the vpadd intrinsics that I updated earlier, this gets rid
of 20 intrinsics.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78646 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/test/CodeGen/ARM/vld2.ll b/test/CodeGen/ARM/vld2.ll
index 2c16ac1..168b62b 100644
--- a/test/CodeGen/ARM/vld2.ll
+++ b/test/CodeGen/ARM/vld2.ll
@@ -8,7 +8,7 @@
 define <8 x i8> @vld2i8(i8* %A) nounwind {
 ;CHECK: vld2i8:
 ;CHECK: vld2.8
-	%tmp1 = call %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vld2i.v8i8(i8* %A)
+	%tmp1 = call %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vld2.v8i8(i8* %A)
         %tmp2 = extractvalue %struct.__builtin_neon_v8qi2 %tmp1, 0
         %tmp3 = extractvalue %struct.__builtin_neon_v8qi2 %tmp1, 1
         %tmp4 = add <8 x i8> %tmp2, %tmp3
@@ -18,7 +18,7 @@
 define <4 x i16> @vld2i16(i16* %A) nounwind {
 ;CHECK: vld2i16:
 ;CHECK: vld2.16
-	%tmp1 = call %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vld2i.v4i16(i16* %A)
+	%tmp1 = call %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vld2.v4i16(i16* %A)
         %tmp2 = extractvalue %struct.__builtin_neon_v4hi2 %tmp1, 0
         %tmp3 = extractvalue %struct.__builtin_neon_v4hi2 %tmp1, 1
         %tmp4 = add <4 x i16> %tmp2, %tmp3
@@ -28,7 +28,7 @@
 define <2 x i32> @vld2i32(i32* %A) nounwind {
 ;CHECK: vld2i32:
 ;CHECK: vld2.32
-	%tmp1 = call %struct.__builtin_neon_v2si2 @llvm.arm.neon.vld2i.v2i32(i32* %A)
+	%tmp1 = call %struct.__builtin_neon_v2si2 @llvm.arm.neon.vld2.v2i32(i32* %A)
         %tmp2 = extractvalue %struct.__builtin_neon_v2si2 %tmp1, 0
         %tmp3 = extractvalue %struct.__builtin_neon_v2si2 %tmp1, 1
         %tmp4 = add <2 x i32> %tmp2, %tmp3
@@ -38,14 +38,14 @@
 define <2 x float> @vld2f(float* %A) nounwind {
 ;CHECK: vld2f:
 ;CHECK: vld2.32
-	%tmp1 = call %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vld2f.v2f32(float* %A)
+	%tmp1 = call %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vld2.v2f32(float* %A)
         %tmp2 = extractvalue %struct.__builtin_neon_v2sf2 %tmp1, 0
         %tmp3 = extractvalue %struct.__builtin_neon_v2sf2 %tmp1, 1
         %tmp4 = add <2 x float> %tmp2, %tmp3
 	ret <2 x float> %tmp4
 }
 
-declare %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vld2i.v8i8(i8*) nounwind readonly
-declare %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vld2i.v4i16(i8*) nounwind readonly
-declare %struct.__builtin_neon_v2si2 @llvm.arm.neon.vld2i.v2i32(i8*) nounwind readonly
-declare %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vld2f.v2f32(i8*) nounwind readonly
+declare %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vld2.v8i8(i8*) nounwind readonly
+declare %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vld2.v4i16(i8*) nounwind readonly
+declare %struct.__builtin_neon_v2si2 @llvm.arm.neon.vld2.v2i32(i8*) nounwind readonly
+declare %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vld2.v2f32(i8*) nounwind readonly