Add a pattern to do move the low element of a v4f32 and zero extend the rest.

llvm-svn: 50922
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 12e8b70..adb6399 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -2768,6 +2768,8 @@
           (MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>;
 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
           (MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
+          (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE2]>;
 }
 
 // Splat v2f64 / v2i64
diff --git a/llvm/test/CodeGen/X86/vec_set-G.ll b/llvm/test/CodeGen/X86/vec_set-G.ll
new file mode 100644
index 0000000..f81907c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/vec_set-G.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movss
+
+define fastcc void @t(<4 x float> %A) nounwind  {
+	%tmp41896 = extractelement <4 x float> %A, i32 0		; <float> [#uses=1]
+	%tmp14082 = insertelement <4 x float> < float 0.000000e+00, float undef, float undef, float undef >, float %tmp41896, i32 1		; <<4 x float>> [#uses=1]
+	%tmp14083 = insertelement <4 x float> %tmp14082, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp14083, <4 x float>* null, align 16
+        ret void
+}