[x86] Revert r212324 which was too aggressive w.r.t. allowing undef lanes in vector splats. The core problem here is that undef lanes can't *unilaterally* be considered to contribute to splats. Their handling needs to be more cautious. There is also a reported failure of the nightly testers (thanks Tobias!) that may well stem from the same core issue. I'm going to fix this theoretical issue, factor the APIs a bit better, and then verify that I don't see anything bad with Tobias's reduction from the test suite before recommitting. Original commit message for r212324: [x86] Generalize BuildVectorSDNode::getConstantSplatValue to work for any constant, constant FP, or undef splat and to tolerate any undef lanes in a splat, then replace all uses of isSplatVector in X86's lowering with it. This fixes issues where undef lanes in an otherwise splat vector would prevent the splat logic from firing. It is a touch more awkward to use this interface, but it is much more accurate. Suggestions for better interface structuring welcome. With this fix, the code generated with the widening legalization strategy for widen_cast-4.ll is *dramatically* improved as the special lowering strategies for a v16i8 SRA kick in even though the high lanes are undef. We also get a slightly different choice for broadcasting an aligned memory location, and use vpshufd instead of vbroadcastss. This looks like a minor win for pipelining and domain crossing, but a minor loss for the number of micro-ops. I suspect its a wash, but folks can easily tweak the lowering if they want. llvm-svn: 212475

commit: beeacac0b32a94abd813148ff142152f74d0f78a [log] [tgz]
author: Chandler Carruth <chandlerc@gmail.com> Mon Jul 07 19:03:32 2014 +0000
committer: Chandler Carruth <chandlerc@gmail.com> Mon Jul 07 19:03:32 2014 +0000
tree: 57b7cca702e05aa63c01fd301a14ce82cb39edc1
parent: c1856835d13e938c838bc66851db0351a6db4b66 [diff]
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9a91dcc..7198203 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

@@ -654,12 +654,13 @@
   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
     return CN;
 
-  if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N))
-    if (SDValue Splat = BV->getConstantSplatValue())
-      if (auto *CN = dyn_cast<ConstantSDNode>(Splat))
-        // BuildVectors can truncate their operands. Ignore that case here.
-        if (CN->getValueType(0) == N.getValueType().getScalarType())
-          return CN;
+  if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
+    ConstantSDNode *CN = BV->getConstantSplatValue();
+
+    // BuildVectors can truncate their operands. Ignore that case here.
+    if (CN && CN->getValueType(0) == N.getValueType().getScalarType())
+      return CN;
+  }
 
   return nullptr;
 }

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index fb7d1b1..3a8a5f9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

@@ -6603,28 +6603,16 @@
   return true;
 }
 
-SDValue BuildVectorSDNode::getConstantSplatValue() const {
-  SDValue Splatted;
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
-    SDValue Op = getOperand(i);
-    if (Op.getOpcode() == ISD::UNDEF)
-      continue;
-    if (Op.getOpcode() != ISD::Constant && Op.getOpcode() != ISD::ConstantFP)
-      return SDValue();
+ConstantSDNode *BuildVectorSDNode::getConstantSplatValue() const {
+  SDValue Op0 = getOperand(0);
+  if (Op0.getOpcode() != ISD::Constant)
+    return nullptr;
 
-    if (!Splatted)
-      Splatted = Op;
-    else if (Splatted != Op)
-      return SDValue();
-  }
+  for (unsigned i = 1, e = getNumOperands(); i != e; ++i)
+    if (getOperand(i) != Op0)
+      return nullptr;
 
-  if (!Splatted) {
-    assert(getOperand(0).getOpcode() == ISD::UNDEF &&
-           "Can only have a splat without a constant for all undefs.");
-    return getOperand(0);
-  }
-
-  return Splatted;
+  return cast<ConstantSDNode>(Op0);
 }
 
 bool BuildVectorSDNode::isConstant() const {

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 1b3e428..ad91d4a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

@@ -1152,15 +1152,14 @@
 
   bool IsVec = false;
   const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
-  if (!CN)
-    if (auto *BV = dyn_cast<BuildVectorSDNode>(N))
-      if (SDValue Splat = BV->getConstantSplatValue())
-        if (auto *SplatCN = dyn_cast<ConstantSDNode>(Splat)) {
-          IsVec = true;
-          CN = SplatCN;
-        }
-  if (!CN)
-    return false;
+  if (!CN) {
+    const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
+    if (!BV)
+      return false;
+
+    IsVec = true;
+    CN = BV->getConstantSplatValue();
+  }
 
   switch (getBooleanContents(IsVec)) {
   case UndefinedBooleanContent:
@@ -1180,15 +1179,14 @@
 
   bool IsVec = false;
   const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
-  if (!CN)
-    if (auto *BV = dyn_cast<BuildVectorSDNode>(N))
-      if (SDValue Splat = BV->getConstantSplatValue())
-        if (auto *SplatCN = dyn_cast<ConstantSDNode>(Splat)) {
-          IsVec = true;
-          CN = SplatCN;
-        }
-  if (!CN)
-    return false;
+  if (!CN) {
+    const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
+    if (!BV)
+      return false;
+
+    IsVec = true;
+    CN = BV->getConstantSplatValue();
+  }
 
   if (getBooleanContents(IsVec) == UndefinedBooleanContent)
     return !CN->getAPIntValue()[0];

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 61accea..4748daa 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp

@@ -4858,6 +4858,19 @@
   return true;
 }
 
+/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
+/// all the same.
+static bool isSplatVector(SDNode *N) {
+  if (N->getOpcode() != ISD::BUILD_VECTOR)
+    return false;
+
+  SDValue SplatValue = N->getOperand(0);
+  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
+    if (N->getOperand(i) != SplatValue)
+      return false;
+  return true;
+}
+
 /// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
 /// to an zero vector.
 /// FIXME: move to dag combiner / method on ShuffleVectorSDNode
@@ -5766,20 +5779,17 @@
       return SDValue();
 
     case ISD::BUILD_VECTOR: {
-      auto *BVOp = cast<BuildVectorSDNode>(Op.getNode());
       // The BUILD_VECTOR node must be a splat.
-      SDValue Splat = BVOp->getConstantSplatValue();
-      if (!Splat)
+      if (!isSplatVector(Op.getNode()))
         return SDValue();
 
-      Ld = Splat;
+      Ld = Op.getOperand(0);
       ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
                      Ld.getOpcode() == ISD::ConstantFP);
 
       // The suspected load node has several users. Make sure that all
       // of its users are from the BUILD_VECTOR node.
       // Constants may have multiple users.
-      // FIXME: This doesn't make sense if the build vector contains undefs.
       if (!ConstSplatVal && !Ld->hasNUsesOfValue(VT.getVectorNumElements(), 0))
         return SDValue();
       break;
@@ -9406,12 +9416,8 @@
   bool Commuted = false;
   // FIXME: This should also accept a bitcast of a splat?  Be careful, not
   // 1,1,1,1 -> v8i16 though.
-  if (auto *BVOp = dyn_cast<BuildVectorSDNode>(V1.getNode()))
-    if (BVOp->getConstantSplatValue())
-      V1IsSplat = true;
-  if (auto *BVOp = dyn_cast<BuildVectorSDNode>(V2.getNode()))
-    if (BVOp->getConstantSplatValue())
-      V2IsSplat = true;
+  V1IsSplat = isSplatVector(V1.getNode());
+  V2IsSplat = isSplatVector(V2.getNode());
 
   // Canonicalize the splat or undef, if present, to be on the RHS.
   if (!V2IsUndef && V1IsSplat && !V2IsSplat) {
@@ -15206,11 +15212,10 @@
   SDValue Amt = Op.getOperand(1);
 
   // Optimize shl/srl/sra with constant shift amount.
-  if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
-    if (SDValue Splat = BVAmt->getConstantSplatValue()) {
-      uint64_t ShiftAmt = Splat.getOpcode() == ISD::UNDEF
-                              ? 0
-                              : cast<ConstantSDNode>(Splat)->getZExtValue();
+  if (isSplatVector(Amt.getNode())) {
+    SDValue SclrAmt = Amt->getOperand(0);
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) {
+      uint64_t ShiftAmt = C->getZExtValue();
 
       if (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
           (Subtarget->hasInt256() &&
@@ -19459,35 +19464,27 @@
           Other->getOpcode() == ISD::SUB && DAG.isEqualTo(OpRHS, CondRHS))
         return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS);
 
-      if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS)) {
-        SDValue OpRHSSplat = OpRHSBV->getConstantSplatValue();
-        auto *OpRHSSplatConst = dyn_cast<ConstantSDNode>(OpRHSSplat);
-        if (auto *CondRHSBV = dyn_cast<BuildVectorSDNode>(CondRHS)) {
-          // If the RHS is a constant we have to reverse the const
-          // canonicalization.
-          // x > C-1 ? x+-C : 0 --> subus x, C
-          SDValue CondRHSSplat = CondRHSBV->getConstantSplatValue();
-          auto *CondRHSSplatConst = dyn_cast<ConstantSDNode>(CondRHSSplat);
-          if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD &&
-              CondRHSSplatConst && OpRHSSplatConst) {
-            APInt A = OpRHSSplatConst->getAPIntValue();
-            if (CondRHSSplatConst->getAPIntValue() == -A - 1)
-              return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS,
-                                 DAG.getConstant(-A, VT));
-          }
-        }
+      // If the RHS is a constant we have to reverse the const canonicalization.
+      // x > C-1 ? x+-C : 0 --> subus x, C
+      if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD &&
+          isSplatVector(CondRHS.getNode()) && isSplatVector(OpRHS.getNode())) {
+        APInt A = cast<ConstantSDNode>(OpRHS.getOperand(0))->getAPIntValue();
+        if (CondRHS.getConstantOperandVal(0) == -A-1)
+          return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS,
+                             DAG.getConstant(-A, VT));
+      }
 
-        // Another special case: If C was a sign bit, the sub has been
-        // canonicalized into a xor.
-        // FIXME: Would it be better to use computeKnownBits to determine
-        //        whether it's safe to decanonicalize the xor?
-        // x s< 0 ? x^C : 0 --> subus x, C
-        if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR &&
-            ISD::isBuildVectorAllZeros(CondRHS.getNode()) && OpRHSSplatConst) {
-          APInt A = OpRHSSplatConst->getAPIntValue();
-          if (A.isSignBit())
-            return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS);
-        }
+      // Another special case: If C was a sign bit, the sub has been
+      // canonicalized into a xor.
+      // FIXME: Would it be better to use computeKnownBits to determine whether
+      //        it's safe to decanonicalize the xor?
+      // x s< 0 ? x^C : 0 --> subus x, C
+      if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR &&
+          ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
+          isSplatVector(OpRHS.getNode())) {
+        APInt A = cast<ConstantSDNode>(OpRHS.getOperand(0))->getAPIntValue();
+        if (A.isSignBit())
+          return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS);
       }
     }
   }
@@ -20196,16 +20193,16 @@
   // vector operations in many cases. Also, on sandybridge ADD is faster than
   // shl.
   // (shl V, 1) -> add V,V
-  if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1))
-    if (SDValue N1Splat = N1BV->getConstantSplatValue()) {
-      assert(N0.getValueType().isVector() && "Invalid vector shift type");
-      // We shift all of the values by one. In many cases we do not have
-      // hardware support for this operation. This is better expressed as an ADD
-      // of two values.
-      if (N1Splat.getOpcode() == ISD::Constant &&
-          cast<ConstantSDNode>(N1Splat)->getZExtValue() == 1)
-        return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
+  if (isSplatVector(N1.getNode())) {
+    assert(N0.getValueType().isVector() && "Invalid vector shift type");
+    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(0));
+    // We shift all of the values by one. In many cases we do not have
+    // hardware support for this operation. This is better expressed as an ADD
+    // of two values.
+    if (N1C && (1 == N1C->getZExtValue())) {
+      return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
     }
+  }
 
   return SDValue();
 }
@@ -20224,19 +20221,20 @@
 
   SDValue Amt = N->getOperand(1);
   SDLoc DL(N);
-  if (auto *AmtBV = dyn_cast<BuildVectorSDNode>(Amt))
-    if (SDValue AmtSplat = AmtBV->getConstantSplatValue())
-      if (auto *AmtConst = dyn_cast<ConstantSDNode>(AmtSplat)) {
-        APInt ShiftAmt = AmtConst->getAPIntValue();
-        unsigned MaxAmount = VT.getVectorElementType().getSizeInBits();
+  if (isSplatVector(Amt.getNode())) {
+    SDValue SclrAmt = Amt->getOperand(0);
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) {
+      APInt ShiftAmt = C->getAPIntValue();
+      unsigned MaxAmount = VT.getVectorElementType().getSizeInBits();
 
-        // SSE2/AVX2 logical shifts always return a vector of 0s
-        // if the shift amount is bigger than or equal to
-        // the element size. The constant shift amount will be
-        // encoded as a 8-bit immediate.
-        if (ShiftAmt.trunc(8).uge(MaxAmount))
-          return getZeroVector(VT, Subtarget, DAG, DL);
-      }
+      // SSE2/AVX2 logical shifts always return a vector of 0s
+      // if the shift amount is bigger than or equal to
+      // the element size. The constant shift amount will be
+      // encoded as a 8-bit immediate.
+      if (ShiftAmt.trunc(8).uge(MaxAmount))
+        return getZeroVector(VT, Subtarget, DAG, DL);
+    }
+  }
 
   return SDValue();
 }
@@ -20430,10 +20428,9 @@
 
   // The right side has to be a 'trunc' or a constant vector.
   bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE;
-  SDValue RHSConstSplat;
-  if (auto *RHSBV = dyn_cast<BuildVectorSDNode>(N1))
-    RHSConstSplat = RHSBV->getConstantSplatValue();
-  if (!RHSTrunc && !RHSConstSplat)
+  bool RHSConst = (isSplatVector(N1.getNode()) &&
+                   isa<ConstantSDNode>(N1->getOperand(0)));
+  if (!RHSTrunc && !RHSConst)
     return SDValue();
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -20443,9 +20440,9 @@
 
   // Set N0 and N1 to hold the inputs to the new wide operation.
   N0 = N0->getOperand(0);
-  if (RHSConstSplat) {
+  if (RHSConst) {
     N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getScalarType(),
-                     RHSConstSplat);
+                     N1->getOperand(0));
     SmallVector<SDValue, 8> C(WideVT.getVectorNumElements(), N1);
     N1 = DAG.getNode(ISD::BUILD_VECTOR, DL, WideVT, C);
   } else if (RHSTrunc) {
@@ -20591,10 +20588,12 @@
       unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
       unsigned SraAmt = ~0;
       if (Mask.getOpcode() == ISD::SRA) {
-        if (auto *AmtBV = dyn_cast<BuildVectorSDNode>(Mask.getOperand(1)))
-          if (SDValue AmtSplat = AmtBV->getConstantSplatValue())
-            if (auto *AmtConst = dyn_cast<ConstantSDNode>(AmtSplat))
-              SraAmt = AmtConst->getZExtValue();
+        SDValue Amt = Mask.getOperand(1);
+        if (isSplatVector(Amt.getNode())) {
+          SDValue SclrAmt = Amt->getOperand(0);
+          if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt))
+            SraAmt = C->getZExtValue();
+        }
       } else if (Mask.getOpcode() == X86ISD::VSRAI) {
         SDValue SraC = Mask.getOperand(1);
         SraAmt  = cast<ConstantSDNode>(SraC)->getZExtValue();
commit	beeacac0b32a94abd813148ff142152f74d0f78a	[log] [tgz]
author	Chandler Carruth <chandlerc@gmail.com>	Mon Jul 07 19:03:32 2014 +0000
committer	Chandler Carruth <chandlerc@gmail.com>	Mon Jul 07 19:03:32 2014 +0000
tree	57b7cca702e05aa63c01fd301a14ce82cb39edc1
parent	c1856835d13e938c838bc66851db0351a6db4b66 [diff]