- Improved v8i16 shuffle lowering. It now uses pshuflw and pshufhw as much as possible before resorting to pextrw and pinsrw. - Better codegen for v4i32 shuffles masquerading as v8i16 or v16i8 shuffles. - Improves (i16 extract_vector_element 0) codegen by recognizing (i32 extract_vector_element 0) does not require a pextrw. llvm-svn: 44836

commit: 4fbf4595493c64edda4c0b8ba2dae2f243e00dac [log] [tgz]
author: Evan Cheng <evan.cheng@apple.com> Tue Dec 11 01:46:18 2007 +0000
committer: Evan Cheng <evan.cheng@apple.com> Tue Dec 11 01:46:18 2007 +0000
tree: ac0c5fd3a1df30143e64d8573ed0d290afff07dc
parent: a55a67ae912b4cb936c100bd7aba65649e795f8a [diff] [blame]
diff --git a/llvm/test/CodeGen/X86/vec_shuffle-12.ll b/llvm/test/CodeGen/X86/vec_shuffle-12.ll
index f66f3bb..70c6a56 100644
--- a/llvm/test/CodeGen/X86/vec_shuffle-12.ll
+++ b/llvm/test/CodeGen/X86/vec_shuffle-12.ll

@@ -1,37 +1,28 @@
 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep punpck
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pextrw | count 7
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pinsrw | count 7
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshuf | count 2
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pextrw | count 4
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pinsrw | count 6
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshuflw | count 3
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufhw | count 2
 
-define void @t1(<8 x i16>* %res, <8 x i16>* %A, <8 x i16>* %B) {
+define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) {
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
-	store <8 x i16> %tmp3, <8 x i16>* %res
-	ret void
+	ret <8 x i16> %tmp3
 }
 
-define void @t2(<8 x i16>* %res, <8 x i16>* %A, <8 x i16>* %B) {
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 13, i32 4, i32 5, i32 6, i32 7 >
-	store <8 x i16> %tmp3, <8 x i16>* %res
-	ret void
+define <8 x i16> @t2(<8 x i16> %A, <8 x i16> %B) {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7 >
+	ret <8 x i16> %tmp
 }
 
-define void @t3(<8 x i16>* %res, <8 x i16>* %A, <8 x i16>* %B) {
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 3, i32 2, i32 13, i32 7, i32 6, i32 5, i32 4 >
-	store <8 x i16> %tmp3, <8 x i16>* %res
-	ret void
+define <8 x i16> @t3(<8 x i16> %A, <8 x i16> %B) {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %A, <8 x i32> < i32 8, i32 3, i32 2, i32 13, i32 7, i32 6, i32 5, i32 4 >
+	ret <8 x i16> %tmp
 }
 
-define void @t4(<8 x i16>* %res, <8 x i16>* %A, <8 x i16>* %B) {
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
-	store <8 x i16> %tmp3, <8 x i16>* %res
-	ret void
+define <8 x i16> @t4(<8 x i16> %A, <8 x i16> %B) {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 7, i32 2, i32 3, i32 1, i32 5, i32 6, i32 5 >
+	ret <8 x i16> %tmp
 }
commit	4fbf4595493c64edda4c0b8ba2dae2f243e00dac	[log] [tgz]
author	Evan Cheng <evan.cheng@apple.com>	Tue Dec 11 01:46:18 2007 +0000
committer	Evan Cheng <evan.cheng@apple.com>	Tue Dec 11 01:46:18 2007 +0000
tree	ac0c5fd3a1df30143e64d8573ed0d290afff07dc
parent	a55a67ae912b4cb936c100bd7aba65649e795f8a [diff] [blame]