AMDGPU: Use generic bitreverse intrinsic
Also fix bug in vector legalization for bitreverse.
llvm-svn: 255512
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index eddf666..4bd0b6b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -105,6 +105,7 @@
SDValue ExpandLoad(SDValue Op);
SDValue ExpandStore(SDValue Op);
SDValue ExpandFNEG(SDValue Op);
+ SDValue ExpandBITREVERSE(SDValue Op);
/// \brief Implements vector promotion.
///
@@ -280,6 +281,7 @@
case ISD::ROTL:
case ISD::ROTR:
case ISD::BSWAP:
+ case ISD::BITREVERSE:
case ISD::CTLZ:
case ISD::CTTZ:
case ISD::CTLZ_ZERO_UNDEF:
@@ -417,7 +419,7 @@
else
Operands[j] = Op.getOperand(j);
}
-
+
Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Op.getNode()->getFlags());
if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
(VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
@@ -715,6 +717,8 @@
return ExpandFNEG(Op);
case ISD::SETCC:
return UnrollVSETCC(Op);
+ case ISD::BITREVERSE:
+ return ExpandBITREVERSE(Op);
default:
return DAG.UnrollVectorOp(Op.getNode());
}
@@ -900,6 +904,25 @@
return DAG.getNode(ISD::BITCAST, DL, VT, Op);
}
+SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) {
+ EVT VT = Op.getValueType();
+
+ // If we have the scalar operation, it's probably cheaper to unroll it.
+ if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType()))
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ // If we have the appropriate vector bit operations, it is better to use them
+ // than unrolling and expanding each component.
+ if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) ||
+ !TLI.isOperationLegalOrCustom(ISD::SRL, VT) ||
+ !TLI.isOperationLegalOrCustom(ISD::AND, VT) ||
+ !TLI.isOperationLegalOrCustom(ISD::OR, VT))
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ // Let LegalizeDAG handle this later.
+ return Op;
+}
+
SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
// Implement VSELECT in terms of XOR, AND, OR
// on platforms which do not support blend natively.