Use movaps to load a v4f32 build_vector of all-constant values into a
register instead of loading each element individually.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@40478 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index a25710b..23f9e95 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2495,6 +2495,7 @@
   unsigned NumZero  = 0;
   unsigned NumNonZero = 0;
   unsigned NonZeros = 0;
+  unsigned NumNonZeroImms = 0;
   std::set<SDOperand> Values;
   for (unsigned i = 0; i < NumElems; ++i) {
     SDOperand Elt = Op.getOperand(i);
@@ -2505,6 +2506,9 @@
       else {
         NonZeros |= (1 << i);
         NumNonZero++;
+        if (Elt.getOpcode() == ISD::Constant ||
+            Elt.getOpcode() == ISD::ConstantFP)
+          NumNonZeroImms++;
       }
     }
   }
@@ -2548,6 +2552,11 @@
     }
   }
 
+  // A vector full of immediates; various special cases are already
+  // handled, so this is best done with a single constant-pool load.
+  if (NumNonZero == NumNonZeroImms)
+    return SDOperand();
+
   // Let legalizer expand 2-wide build_vectors.
   if (EVTBits == 64)
     return SDOperand();
diff --git a/test/CodeGen/X86/v4f32-immediate.ll b/test/CodeGen/X86/v4f32-immediate.ll
new file mode 100644
index 0000000..67b5e79
--- /dev/null
+++ b/test/CodeGen/X86/v4f32-immediate.ll
@@ -0,0 +1,5 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse | grep movaps
+
+define <4 x float> @foo() {
+  ret <4 x float> <float 3.223542354, float 2.3, float 1.2, float 0.1>
+}