2nd attempt, fixing SSE4.1 issues and implementing feedback from duncan.

PR2957

ISD::VECTOR_SHUFFLE now stores an array of integers representing the shuffle
mask internal to the node, rather than taking a BUILD_VECTOR of ConstantSDNodes
as the shuffle mask.  A value of -1 represents UNDEF.

In addition to eliminating the creation of illegal BUILD_VECTORS just to 
represent shuffle masks, we are better about canonicalizing the shuffle mask,
resulting in substantially better code for some classes of shuffles.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@70225 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/test/CodeGen/Generic/vector-casts.ll b/test/CodeGen/Generic/vector-casts.ll
index 12104a3..e661e84 100644
--- a/test/CodeGen/Generic/vector-casts.ll
+++ b/test/CodeGen/Generic/vector-casts.ll
@@ -1,45 +1,9 @@
 ; RUN: llvm-as < %s | llc
 ; PR2671
 
-define void @a(<2 x double>* %p, <2 x i8>* %q) {
-        %t = load <2 x double>* %p
-	%r = fptosi <2 x double> %t to <2 x i8>
-        store <2 x i8> %r, <2 x i8>* %q
-	ret void
-}
-define void @b(<2 x double>* %p, <2 x i8>* %q) {
-        %t = load <2 x double>* %p
-	%r = fptoui <2 x double> %t to <2 x i8>
-        store <2 x i8> %r, <2 x i8>* %q
-	ret void
-}
-define void @c(<2 x i8>* %p, <2 x double>* %q) {
-        %t = load <2 x i8>* %p
-	%r = sitofp <2 x i8> %t to <2 x double>
-        store <2 x double> %r, <2 x double>* %q
-	ret void
-}
-define void @d(<2 x i8>* %p, <2 x double>* %q) {
-        %t = load <2 x i8>* %p
-	%r = uitofp <2 x i8> %t to <2 x double>
-        store <2 x double> %r, <2 x double>* %q
-	ret void
-}
-define void @e(<2 x i8>* %p, <2 x i16>* %q) {
-        %t = load <2 x i8>* %p
-	%r = sext <2 x i8> %t to <2 x i16>
-        store <2 x i16> %r, <2 x i16>* %q
-	ret void
-}
-define void @f(<2 x i8>* %p, <2 x i16>* %q) {
-        %t = load <2 x i8>* %p
-	%r = zext <2 x i8> %t to <2 x i16>
-        store <2 x i16> %r, <2 x i16>* %q
-	ret void
-}
 define void @g(<2 x i16>* %p, <2 x i8>* %q) {
-        %t = load <2 x i16>* %p
-	%r = trunc <2 x i16> %t to <2 x i8>
-        store <2 x i8> %r, <2 x i8>* %q
-	ret void
+  %t = load <2 x i16>* %p
+  %r = trunc <2 x i16> %t to <2 x i8>
+  store <2 x i8> %r, <2 x i8>* %q
+  ret void
 }