[AVX] Optimize x86 VSELECT instructions using SimplifyDemandedBits.
We know that the blend instructions only use the MSB, so if the mask is
sign-extended then we can convert it into a SHL instruction. This is a
common pattern because the type-legalizer sign-extends the i1 type which
is used by the LLVM-IR for the condition.
Added a new optimization in SimplifyDemandedBits for SIGN_EXTEND_INREG -> SHL.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148225 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 68cd441..b5198c8 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -12868,6 +12868,7 @@
/// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT
/// nodes.
static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
DebugLoc DL = N->getDebugLoc();
SDValue Cond = N->getOperand(0);
@@ -13144,6 +13145,26 @@
}
}
+ // If we know that this node is legal then we know that it is going to be
+ // matched by one of the SSE/AVX BLEND instructions. These instructions only
+ // depend on the highest bit in each word. Try to use SimplifyDemandedBits
+ // to simplify previous instructions.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() &&
+ !DCI.isBeforeLegalize() &&
+ TLI.isOperationLegal(ISD::VSELECT, VT)) {
+ unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits();
+ assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
+ APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1);
+
+ APInt KnownZero, KnownOne;
+ TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(),
+ DCI.isBeforeLegalizeOps());
+ if (TLO.ShrinkDemandedConstant(Cond, DemandedMask) ||
+ TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne, TLO))
+ DCI.CommitTargetLoweringOpt(TLO);
+ }
+
return SDValue();
}
@@ -14609,7 +14630,7 @@
case ISD::EXTRACT_VECTOR_ELT:
return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
case ISD::VSELECT:
- case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
+ case ISD::SELECT: return PerformSELECTCombine(N, DAG, DCI, Subtarget);
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI);
case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget);
case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget);