Now that we have a canonical way to handle 256-bit splats:
vinsertf128 $1 + vpermilps $0, remove the old code that used to first
do the splat in a 128-bit vector and then insert it into a larger one.
This is better because the handling code gets simpler and also makes a
better room for the upcoming vbroadcast!

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137807 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll
index ca35b7f..417d7b0 100644
--- a/test/CodeGen/X86/avx-splat.ll
+++ b/test/CodeGen/X86/avx-splat.ll
@@ -24,8 +24,8 @@
 }
 
 ; CHECK: vmovd
-; CHECK-NEXT: movlhps
 ; CHECK-NEXT: vinsertf128 $1
+; CHECK-NEXT: vpermilps $0
 define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
 entry:
   %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
@@ -35,8 +35,8 @@
   ret <4 x i64> %vecinit6.i
 }
 
-; CHECK: vshufpd
-; CHECK-NEXT: vinsertf128 $1
+; CHECK: vinsertf128 $1
+; CHECK-NEXT: vpermilps $0
 define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
 entry:
   %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
@@ -78,8 +78,8 @@
   ret <8 x float> %load_broadcast12281250
 }
 
-; CHECK: vpshufd  $0
-; CHECK-NEXT: vinsertf128 $1
+; CHECK: vinsertf128 $1
+; CHECK-NEXT: vpermilps $0
 define <8 x float> @funcF(i32* %ptr) nounwind {
   %val = load i32* %ptr, align 4
   %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6