Implement "punpckldq  %xmm0, $xmm0" as "pshufd  $0x50, %xmm0, %xmm" unless optimizing for code size.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@56711 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/test/CodeGen/X86/vec_extract.ll b/test/CodeGen/X86/vec_extract.ll
index eddba87..b45f939 100644
--- a/test/CodeGen/X86/vec_extract.ll
+++ b/test/CodeGen/X86/vec_extract.ll
@@ -4,7 +4,7 @@
 ; RUN: grep pshufd   %t | count 1
 ; RUN: grep unpckhpd %t | count 1
 
-define void @test1(<4 x float>* %F, float* %f) {
+define void @test1(<4 x float>* %F, float* %f) nounwind {
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
 	%tmp7 = add <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	%tmp2 = extractelement <4 x float> %tmp7, i32 0		; <float> [#uses=1]
@@ -12,21 +12,21 @@
 	ret void
 }
 
-define float @test2(<4 x float>* %F, float* %f) {
+define float @test2(<4 x float>* %F, float* %f) nounwind {
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
 	%tmp7 = add <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	%tmp2 = extractelement <4 x float> %tmp7, i32 2		; <float> [#uses=1]
 	ret float %tmp2
 }
 
-define void @test3(float* %R, <4 x float>* %P1) {
+define void @test3(float* %R, <4 x float>* %P1) nounwind {
 	%X = load <4 x float>* %P1		; <<4 x float>> [#uses=1]
 	%tmp = extractelement <4 x float> %X, i32 3		; <float> [#uses=1]
 	store float %tmp, float* %R
 	ret void
 }
 
-define double @test4(double %A) {
+define double @test4(double %A) nounwind {
 	%tmp1 = call <2 x double> @foo( )		; <<2 x double>> [#uses=1]
 	%tmp2 = extractelement <2 x double> %tmp1, i32 1		; <double> [#uses=1]
 	%tmp3 = add double %tmp2, %A		; <double> [#uses=1]
diff --git a/test/CodeGen/X86/vec_shuffle-23.ll b/test/CodeGen/X86/vec_shuffle-23.ll
new file mode 100644
index 0000000..34d84ef
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-23.ll
@@ -0,0 +1,19 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2                | not grep punpck
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2                |     grep pshufd
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -optimize-size |     grep punpck
+
+define i32 @t() nounwind {
+entry:
+	%a = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
+	%b = alloca <4 x i32>		; <<4 x i32>*> [#uses=5]
+	volatile store <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a
+	%tmp = load <4 x i32>* %a		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %tmp, <4 x i32>* %b
+	%tmp1 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
+	%tmp2 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
+	%punpckldq = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %punpckldq, <4 x i32>* %b
+	%tmp3 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
+	%result = extractelement <4 x i32> %tmp3, i32 0		; <i32> [#uses=1]
+	ret i32 %result
+}