Optimization for "truncate" operation on AVX.
Truncating v4i64 -> v4i32 and v8i32 -> v8i16 may be done with set of shuffles.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@149485 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/test/CodeGen/X86/avx-trunc.ll b/test/CodeGen/X86/avx-trunc.ll
new file mode 100755
index 0000000..dd0e268
--- /dev/null
+++ b/test/CodeGen/X86/avx-trunc.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s

+

+define <4 x i32> @trunc_64_32(<4 x i64> %A) nounwind uwtable readnone ssp{

+; CHECK: trunc_64_32

+; CHECK: pshufd

+  %B = trunc <4 x i64> %A to <4 x i32>

+  ret <4 x i32>%B

+}

+define <8 x i16> @trunc_32_16(<8 x i32> %A) nounwind uwtable readnone ssp{

+; CHECK: trunc_32_16

+; CHECK: pshufb

+  %B = trunc <8 x i32> %A to <8 x i16>

+  ret <8 x i16>%B

+}

+