Slightly generalize the code that handles shuffles of consecutive loads on x86 to handle more cases. Fix a bug in said code that would cause it to read past the end of an object. Rewrite the code in SelectionDAGLegalize::ExpandBUILD_VECTOR to be a bit more general. Remove PerformBuildVectorCombine, which is no longer necessary with these changes. In addition to simplifying the code, with this change, we can now catch a few more cases of consecutive loads. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@73012 91177308-0d34-0410-b5e6-96231b3b80d8

commit: e6bb1e57c7e9e84d6ad12baa89e7f6119b6c46a7 [log] [tgz]
author: Eli Friedman <eli.friedman@gmail.com> Sun Jun 07 06:52:44 2009 +0000
committer: Eli Friedman <eli.friedman@gmail.com> Sun Jun 07 06:52:44 2009 +0000
tree: a75cc13d22340b807d3939f42886d26cf476aaf9
parent: 334e72553fd46ccc1d35cad006117936004814a0 [diff] [blame]
diff --git a/test/CodeGen/X86/vec_loadsingles.ll b/test/CodeGen/X86/vec_loadsingles.ll
new file mode 100644
index 0000000..6712276
--- /dev/null
+++ b/test/CodeGen/X86/vec_loadsingles.ll

@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movq
+
+define <4 x float> @a(<4 x float> %a, float* nocapture %p) nounwind readonly {
+entry:
+	%tmp1 = load float* %p
+	%vecins = insertelement <4 x float> undef, float %tmp1, i32 0
+	%add.ptr = getelementptr float* %p, i32 1
+	%tmp5 = load float* %add.ptr
+	%vecins7 = insertelement <4 x float> %vecins, float %tmp5, i32 1
+	ret <4 x float> %vecins7
+}
+
commit	e6bb1e57c7e9e84d6ad12baa89e7f6119b6c46a7	[log] [tgz]
author	Eli Friedman <eli.friedman@gmail.com>	Sun Jun 07 06:52:44 2009 +0000
committer	Eli Friedman <eli.friedman@gmail.com>	Sun Jun 07 06:52:44 2009 +0000
tree	a75cc13d22340b807d3939f42886d26cf476aaf9
parent	334e72553fd46ccc1d35cad006117936004814a0 [diff] [blame]