Now that we have a canonical way to handle 256-bit splats:
vinsertf128 $1 + vpermilps $0, remove the old code that used to first
do the splat in a 128-bit vector and then insert it into a larger one.
This is better because the handling code gets simpler and also makes a
better room for the upcoming vbroadcast!
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137807 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll
index ca35b7f..417d7b0 100644
--- a/test/CodeGen/X86/avx-splat.ll
+++ b/test/CodeGen/X86/avx-splat.ll
@@ -24,8 +24,8 @@
}
; CHECK: vmovd
-; CHECK-NEXT: movlhps
; CHECK-NEXT: vinsertf128 $1
+; CHECK-NEXT: vpermilps $0
define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
entry:
%vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
@@ -35,8 +35,8 @@
ret <4 x i64> %vecinit6.i
}
-; CHECK: vshufpd
-; CHECK-NEXT: vinsertf128 $1
+; CHECK: vinsertf128 $1
+; CHECK-NEXT: vpermilps $0
define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
entry:
%vecinit.i = insertelement <4 x double> undef, double %q, i32 0
@@ -78,8 +78,8 @@
ret <8 x float> %load_broadcast12281250
}
-; CHECK: vpshufd $0
-; CHECK-NEXT: vinsertf128 $1
+; CHECK: vinsertf128 $1
+; CHECK-NEXT: vpermilps $0
define <8 x float> @funcF(i32* %ptr) nounwind {
%val = load i32* %ptr, align 4
%ret6 = insertelement <8 x i32> undef, i32 %val, i32 6