- Handle special scalar_to_vector case: splats. Using a native 128-bit shuffle before inserting on a 256-bit vector. - Add AVX versions of movd/movq instructions - Introduce a few COPY patterns to match insert_subvector instructions. This turns a trivial insert_subvector instruction into a register copy, coalescing the xmm into a ymm and avoid emiting on more instruction. llvm-svn: 136002

commit: 123dff0f5822f2f6d2adcf37ec4ec0caea51d739 [log] [tgz]
author: Bruno Cardoso Lopes <bruno.cardoso@gmail.com> Mon Jul 25 23:05:25 2011 +0000
committer: Bruno Cardoso Lopes <bruno.cardoso@gmail.com> Mon Jul 25 23:05:25 2011 +0000
tree: 68901fabbdbec9be6c44b64e800a0039b66a7362
parent: 276eb8debf0321da3e67cc95fc78c06d8023bed0 [diff]
diff --git a/llvm/test/CodeGen/X86/avx-256-splat.ll b/llvm/test/CodeGen/X86/avx-256-splat.ll
index 39230fe..edc17b7 100644
--- a/llvm/test/CodeGen/X86/avx-256-splat.ll
+++ b/llvm/test/CodeGen/X86/avx-256-splat.ll

@@ -5,7 +5,6 @@
 ; CHECK: vextractf128 $0
 ; CHECK-NEXT: punpcklbw
 ; CHECK-NEXT: punpckhbw
-; CHECK-NEXT: vinsertf128 $0
 ; CHECK-NEXT: vinsertf128 $1
 ; CHECK-NEXT: vpermilps $85
 define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
@@ -16,7 +15,6 @@
 
 ; CHECK: vextractf128 $0
 ; CHECK-NEXT: punpckhwd
-; CHECK-NEXT: vinsertf128 $0
 ; CHECK-NEXT: vinsertf128 $1
 ; CHECK-NEXT: vpermilps $85
 define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
@@ -25,3 +23,25 @@
   ret <16 x i16> %shuffle
 }
 
+; CHECK: vmovd
+; CHECK-NEXT: movlhps
+; CHECK-NEXT: vinsertf128 $1
+define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
+entry:
+  %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
+  %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
+  %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
+  %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
+  ret <4 x i64> %vecinit6.i
+}
+
+; CHECK: vshufpd
+; CHECK-NEXT: vinsertf128 $1
+define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
+entry:
+  %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
+  %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
+  %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
+  %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
+  ret <4 x double> %vecinit6.i
+}

diff --git a/llvm/test/CodeGen/X86/avx-256.ll b/llvm/test/CodeGen/X86/avx-256.ll
index 20d31e7..a6d1450 100644
--- a/llvm/test/CodeGen/X86/avx-256.ll
+++ b/llvm/test/CodeGen/X86/avx-256.ll

@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7 -mattr=avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
 
 @x = common global <8 x float> zeroinitializer, align 32
 @y = common global <4 x double> zeroinitializer, align 32
@@ -12,4 +12,3 @@
   store <4 x double> zeroinitializer, <4 x double>* @y, align 32
   ret void
 }
-
commit	123dff0f5822f2f6d2adcf37ec4ec0caea51d739	[log] [tgz]
author	Bruno Cardoso Lopes <bruno.cardoso@gmail.com>	Mon Jul 25 23:05:25 2011 +0000
committer	Bruno Cardoso Lopes <bruno.cardoso@gmail.com>	Mon Jul 25 23:05:25 2011 +0000
tree	68901fabbdbec9be6c44b64e800a0039b66a7362
parent	276eb8debf0321da3e67cc95fc78c06d8023bed0 [diff]