Implement vector shift up / down and insert zero with ps{rl}lq / ps{rl}ldq.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@51667 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 44c72b6..24954d7 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1853,10 +1853,17 @@
unsigned NumElems = PermMask.getNumOperands();
SDOperand V = (Idx < NumElems) ? N->getOperand(0) : N->getOperand(1);
Idx %= NumElems;
- if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) {
- return (Idx == 0)
- ? V.getOperand(0) : getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
+
+ if (V.getOpcode() == ISD::BIT_CONVERT) {
+ V = V.getOperand(0);
+ if (MVT::getVectorNumElements(V.getValueType()) != NumElems)
+ return SDOperand();
}
+ if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return (Idx == 0) ? V.getOperand(0)
+ : getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
+ if (V.getOpcode() == ISD::BUILD_VECTOR)
+ return V.getOperand(Idx);
if (V.getOpcode() == ISD::VECTOR_SHUFFLE) {
SDOperand Elt = PermMask.getOperand(Idx);
if (Elt.getOpcode() == ISD::UNDEF)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 5343971..d194d38 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2923,6 +2923,70 @@
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
}
+/// getNumOfConsecutiveZeros - Return the number of elements in a result of
+/// a shuffle that is zero.
+static
+unsigned getNumOfConsecutiveZeros(SDOperand Op, SDOperand Mask,
+ unsigned NumElems, bool Low,
+ SelectionDAG &DAG) {
+ unsigned NumZeros = 0;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDOperand Idx = Mask.getOperand(Low ? i : NumElems-i-1);
+ if (Idx.getOpcode() == ISD::UNDEF) {
+ ++NumZeros;
+ continue;
+ }
+ unsigned Index = cast<ConstantSDNode>(Idx)->getValue();
+ SDOperand Elt = DAG.getShuffleScalarElt(Op.Val, Index);
+ if (Elt.Val && isZeroNode(Elt))
+ ++NumZeros;
+ else
+ break;
+ }
+ return NumZeros;
+}
+
+/// isVectorShift - Returns true if the shuffle can be implemented as a
+/// logical left or right shift of a vector.
+static bool isVectorShift(SDOperand Op, SDOperand Mask, SelectionDAG &DAG,
+ bool &isLeft, SDOperand &ShVal, unsigned &ShAmt) {
+ unsigned NumElems = Mask.getNumOperands();
+
+ isLeft = true;
+ unsigned NumZeros= getNumOfConsecutiveZeros(Op, Mask, NumElems, true, DAG);
+ if (!NumZeros) {
+ isLeft = false;
+ NumZeros = getNumOfConsecutiveZeros(Op, Mask, NumElems, false, DAG);
+ if (!NumZeros)
+ return false;
+ }
+
+ bool SeenV1 = false;
+ bool SeenV2 = false;
+ for (unsigned i = NumZeros; i < NumElems; ++i) {
+ unsigned Val = isLeft ? (i - NumZeros) : i;
+ SDOperand Idx = Mask.getOperand(isLeft ? i : (i - NumZeros));
+ if (Idx.getOpcode() == ISD::UNDEF)
+ continue;
+ unsigned Index = cast<ConstantSDNode>(Idx)->getValue();
+ if (Index < NumElems)
+ SeenV1 = true;
+ else {
+ Index -= NumElems;
+ SeenV2 = true;
+ }
+ if (Index != Val)
+ return false;
+ }
+ if (SeenV1 && SeenV2)
+ return false;
+
+ ShVal = SeenV1 ? Op.getOperand(0) : Op.getOperand(1);
+ ShAmt = NumZeros;
+ return true;
+}
+
+
/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
///
static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros,
@@ -2995,6 +3059,20 @@
return V;
}
+/// getVShift - Return a vector logical shift node.
+///
+static SDOperand getVShift(bool isLeft, MVT::ValueType VT, SDOperand SrcOp,
+ unsigned NumBits, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ bool isMMX = MVT::getSizeInBits(VT) == 64;
+ MVT::ValueType ShVT = isMMX ? MVT::v1i64 : MVT::v2i64;
+ unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL;
+ SrcOp = DAG.getNode(ISD::BIT_CONVERT, ShVT, SrcOp);
+ return DAG.getNode(ISD::BIT_CONVERT, VT,
+ DAG.getNode(Opc, ShVT, SrcOp,
+ DAG.getConstant(NumBits, TLI.getShiftAmountTy())));
+}
+
SDOperand
X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
// All zero's are handled with pxor, all one's are handled with pcmpeqd.
@@ -3091,6 +3169,15 @@
return getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0,
Subtarget->hasSSE2(), DAG);
}
+
+ // Is it a vector logical left shift?
+ if (NumElems == 2 && Idx == 1 &&
+ isZeroNode(Op.getOperand(0)) && !isZeroNode(Op.getOperand(1))) {
+ unsigned NumBits = MVT::getSizeInBits(VT);
+ return getVShift(true, VT,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(1)),
+ NumBits/2, DAG, *this);
+ }
if (IsAllConstants) // Otherwise, it's better to do a constpool load.
return SDOperand();
@@ -3615,6 +3702,19 @@
}
}
+ // Check if this can be converted into a logical shift.
+ bool isLeft = false;
+ unsigned ShAmt = 0;
+ SDOperand ShVal;
+ bool isShift = isVectorShift(Op, PermMask, DAG, isLeft, ShVal, ShAmt);
+ if (isShift && ShVal.hasOneUse()) {
+ // If the shifted value has multiple uses, it may be cheaper to use
+ // v_set0 + movlhps or movhlps, etc.
+ MVT::ValueType EVT = MVT::getVectorElementType(VT);
+ ShAmt *= MVT::getSizeInBits(EVT);
+ return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this);
+ }
+
if (X86::isMOVLMask(PermMask.Val)) {
if (V1IsUndef)
return V2;
@@ -3634,6 +3734,13 @@
ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val))
return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
+ if (isShift) {
+ // No better options. Use a vshl / vsrl.
+ MVT::ValueType EVT = MVT::getVectorElementType(VT);
+ ShAmt *= MVT::getSizeInBits(EVT);
+ return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this);
+ }
+
bool Commuted = false;
// FIXME: This should also accept a bitcast of a splat? Be careful, not
// 1,1,1,1 -> v8i16 though.
@@ -5729,6 +5836,8 @@
case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG";
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
+ case X86ISD::VSHL: return "X86ISD::VSHL";
+ case X86ISD::VSRL: return "X86ISD::VSRL";
}
}
@@ -6296,8 +6405,10 @@
static SDOperand PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget,
const TargetLowering &TLI) {
+ unsigned NumOps = N->getNumOperands();
+
// Ignore single operand BUILD_VECTOR.
- if (N->getNumOperands() == 1)
+ if (NumOps == 1)
return SDOperand();
MVT::ValueType VT = N->getValueType(0);
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index b99a09b..0c67794 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -205,7 +205,10 @@
VZEXT_MOVL,
// VZEXT_LOAD - Load, scalar_to_vector, and zero extend.
- VZEXT_LOAD
+ VZEXT_LOAD,
+
+ // VSHL, VSRL - Vector logical left / right shift.
+ VSHL, VSRL
};
}
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 42f19af..b167a7a 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -294,6 +294,12 @@
defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
int_x86_mmx_psra_d, int_x86_mmx_psrai_d>;
+// Shift up / down and insert zero's.
+def : Pat<(v1i64 (X86vshl VR64:$src, (i8 imm:$amt))),
+ (v1i64 (MMX_PSLLQri VR64:$src, imm:$amt))>;
+def : Pat<(v1i64 (X86vshr VR64:$src, (i8 imm:$amt))),
+ (v1i64 (MMX_PSRLQri VR64:$src, imm:$amt))>;
+
// Comparison Instructions
defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b>;
defm MMX_PCMPEQW : MMXI_binop_rm_int<0x75, "pcmpeqw", int_x86_mmx_pcmpeq_w>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 1ea4bfd..3d5959a 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -51,6 +51,8 @@
SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
[SDNPHasChain, SDNPMayLoad]>;
+def X86vshl : SDNode<"X86ISD::VSHL", SDTIntShiftOp>;
+def X86vshr : SDNode<"X86ISD::VSRL", SDTIntShiftOp>;
//===----------------------------------------------------------------------===//
// SSE Complex Patterns
@@ -1957,6 +1959,12 @@
(v2i64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
(v2f64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
+
+ // Shift up / down and insert zero's.
+ def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))),
+ (v2i64 (PSLLDQri VR128:$src, (PSxLDQ_imm imm:$amt)))>;
+ def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))),
+ (v2i64 (PSRLDQri VR128:$src, (PSxLDQ_imm imm:$amt)))>;
}
// Logical