[InstCombine][x86] try even harder to convert blendv intrinsic to generic IR (PR38814)

Follow-up to rL342324 (D52059):

Missing optimizations with blendv are shown in:
https://bugs.llvm.org/show_bug.cgi?id=38814

This is an easier and more powerful solution than adding pattern matching for a few 
special cases in the backend. The potential danger with this transform in IR is that 
the condition value can get separated from the select, and the backend might not be 
able to make a blendv out of it again.

llvm-svn: 342806
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index bff4659..efe41b4 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2949,14 +2949,27 @@
     // Convert to a vector select if we can bypass casts and find a boolean
     // vector condition value.
     Value *BoolVec;
-    if (match(peekThroughBitcast(Mask), m_SExt(m_Value(BoolVec)))) {
-      auto *VTy = dyn_cast<VectorType>(BoolVec->getType());
-      if (VTy && VTy->getScalarSizeInBits() == 1 &&
-          VTy->getVectorNumElements() == II->getType()->getVectorNumElements())
+    Mask = peekThroughBitcast(Mask);
+    if (match(Mask, m_SExt(m_Value(BoolVec))) &&
+        BoolVec->getType()->isVectorTy() &&
+        BoolVec->getType()->getScalarSizeInBits() == 1) {
+      assert(Mask->getType()->getPrimitiveSizeInBits() ==
+             II->getType()->getPrimitiveSizeInBits() &&
+             "Not expecting mask and operands with different sizes");
+
+      unsigned NumMaskElts = Mask->getType()->getVectorNumElements();
+      unsigned NumOperandElts = II->getType()->getVectorNumElements();
+      if (NumMaskElts == NumOperandElts)
         return SelectInst::Create(BoolVec, Op1, Op0);
-      // TODO: If we can find a boolean vector condition with less elements,
-      //       then we can form a vector select by bitcasting Op0/Op1 to a
-      //       vector type with wider elements and bitcasting the result.
+
+      // If the mask has less elements than the operands, each mask bit maps to
+      // multiple elements of the operands. Bitcast back and forth.
+      if (NumMaskElts < NumOperandElts) {
+        Value *CastOp0 = Builder.CreateBitCast(Op0, Mask->getType());
+        Value *CastOp1 = Builder.CreateBitCast(Op1, Mask->getType());
+        Value *Sel = Builder.CreateSelect(BoolVec, CastOp1, CastOp0);
+        return new BitCastInst(Sel, II->getType());
+      }
     }
 
     break;