[DagCombine] Improve DAGCombiner BUILD_VECTOR when it has two sources of elements
This partially fixes PR21943.
For AVX, we go from:
vmovq (%rsi), %xmm0
vmovq (%rdi), %xmm1
vpermilps $-27, %xmm1, %xmm2 ## xmm2 = xmm1[1,1,2,3]
vinsertps $16, %xmm2, %xmm1, %xmm1 ## xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
vinsertps $32, %xmm0, %xmm1, %xmm1 ## xmm1 = xmm1[0,1],xmm0[0],xmm1[3]
vpermilps $-27, %xmm0, %xmm0 ## xmm0 = xmm0[1,1,2,3]
vinsertps $48, %xmm0, %xmm1, %xmm0 ## xmm0 = xmm1[0,1,2],xmm0[0]
To the expected:
vmovq (%rdi), %xmm0
vmovhpd (%rsi), %xmm0, %xmm0
retq
Fixing this for AVX2 is still open.
Differential Revision: http://reviews.llvm.org/D6749
llvm-svn: 224759
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index dc556fa..7a91aae 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10832,6 +10832,7 @@
// If everything is good, we can make a shuffle operation.
if (VecIn1.getNode()) {
+ unsigned InNumElements = VecIn1.getValueType().getVectorNumElements();
SmallVector<int, 8> Mask;
for (unsigned i = 0; i != NumInScalars; ++i) {
unsigned Opcode = N->getOperand(i).getOpcode();
@@ -10858,8 +10859,8 @@
continue;
}
- // Otherwise, use InIdx + VecSize
- Mask.push_back(NumInScalars+ExtIndex);
+ // Otherwise, use InIdx + InputVecSize
+ Mask.push_back(InNumElements + ExtIndex);
}
// Avoid introducing illegal shuffles with zero.
@@ -10869,14 +10870,12 @@
// We can't generate a shuffle node with mismatched input and output types.
// Attempt to transform a single input vector to the correct type.
if ((VT != VecIn1.getValueType())) {
- // We don't support shuffeling between TWO values of different types.
- if (VecIn2.getNode())
- return SDValue();
-
// If the input vector type has a different base type to the output
// vector type, bail out.
- if (VecIn1.getValueType().getVectorElementType() !=
- VT.getVectorElementType())
+ EVT VTElemType = VT.getVectorElementType();
+ if ((VecIn1.getValueType().getVectorElementType() != VTElemType) ||
+ (VecIn2.getNode() &&
+ (VecIn2.getValueType().getVectorElementType() != VTElemType)))
return SDValue();
// If the input vector is too small, widen it.
@@ -10884,11 +10883,22 @@
// output registers. For example XMM->YMM widening on X86 with AVX.
EVT VecInT = VecIn1.getValueType();
if (VecInT.getSizeInBits() * 2 == VT.getSizeInBits()) {
- // Widen the input vector by adding undef values.
- VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
- VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+ // If we only have one small input, widen it by adding undef values.
+ if (!VecIn2.getNode())
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1,
+ DAG.getUNDEF(VecIn1.getValueType()));
+ else if (VecIn1.getValueType() == VecIn2.getValueType()) {
+ // If we have two small inputs of the same type, try to concat them.
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, VecIn2);
+ VecIn2 = SDValue(nullptr, 0);
+ } else
+ return SDValue();
} else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) {
// If the input vector is too large, try to split it.
+ // We don't support having two input vectors that are too large.
+ if (VecIn2.getNode())
+ return SDValue();
+
if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements()))
return SDValue();
@@ -10899,7 +10909,7 @@
VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
DAG.getConstant(0, TLI.getVectorIdxTy()));
UsesZeroVector = false;
- } else
+ } else
return SDValue();
}