Optimization for the following SIGN_EXTEND pairs:
v8i8  -> v8i64, 
v8i8  -> v8i32, 
v4i8  -> v4i64, 
v4i16 -> v4i64 
for AVX and AVX2.

Bug 14865.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172708 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a82410a..3e5a446 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4298,11 +4298,19 @@
   if (isa<ConstantSDNode>(N0))
     return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0);
 
-  // fold (sext (sext x)) -> (sext x)
-  // fold (sext (aext x)) -> (sext x)
-  if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
-    return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT,
-                       N0.getOperand(0));
+  // Folding (sext (sext x)) is obvious, but we do it only after the type 
+  // legalization phase. When the sequence is like {(T1->T2), (T2->T3)} and 
+  // T1 or T3 (or the both) are illegal types, the TypeLegalizer may not 
+  // give a good sequence for the (T1->T3) pair.
+  // So we give a chance to target specific combiner to optimize T1->T2 and T2->T3
+  // separately and may be fold them in a preceding of subsequent instruction.
+  if (Level >= AfterLegalizeTypes) {
+    // fold (sext (sext x)) -> (sext x)
+    // fold (sext (aext x)) -> (sext x)
+    if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+      return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT,
+                         N0.getOperand(0));
+  }
 
   if (N0.getOpcode() == ISD::TRUNCATE) {
     // fold (sext (truncate (load x))) -> (sext (smaller load x))