AMDGPU/SI: Add d16 support for buffer intrinsics.
Differential Revision:
https://reviews.llvm.org/D38906
Reviewers:
Matt and Brian.
llvm-svn: 322402
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 415d8a5..461f5f8 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -207,11 +207,14 @@
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f16, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v2f16, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v4f16, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::v2i16, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::v2f16, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::v4f16, Custom);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setOperationAction(ISD::BR_CC, MVT::i1, Expand);
@@ -3501,6 +3504,78 @@
return SDValue();
}
+static SDValue adjustLoadValueType(SDValue Result, EVT LoadVT, SDLoc DL,
+ SelectionDAG &DAG, bool Unpacked) {
+ if (Unpacked) { // From v2i32/v4i32 back to v2f16/v4f16.
+ // Truncate to v2i16/v4i16.
+ EVT IntLoadVT = LoadVT.changeTypeToInteger();
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, IntLoadVT, Result);
+ // Bitcast to original type (v2f16/v4f16).
+ return DAG.getNode(ISD::BITCAST, DL, LoadVT, Trunc);
+ }
+ // Cast back to the original packed type.
+ return DAG.getNode(ISD::BITCAST, DL, LoadVT, Result);
+}
+
+// This is to lower INTRINSIC_W_CHAIN with illegal result types.
+SDValue SITargetLowering::lowerIntrinsicWChain_IllegalReturnType(SDValue Op,
+ SDValue &Chain, SelectionDAG &DAG) const {
+ EVT LoadVT = Op.getValueType();
+ // TODO: handle v3f16.
+ if (LoadVT != MVT::v2f16 && LoadVT != MVT::v4f16)
+ return SDValue();
+
+ bool Unpacked = Subtarget->hasUnpackedD16VMem();
+ EVT UnpackedLoadVT = (LoadVT == MVT::v2f16) ? MVT::v2i32 : MVT::v4i32;
+ EVT EquivLoadVT = Unpacked ? UnpackedLoadVT :
+ getEquivalentMemType(*DAG.getContext(), LoadVT);
+ // Change from v4f16/v2f16 to EquivLoadVT.
+ SDVTList VTList = DAG.getVTList(EquivLoadVT, MVT::Other);
+
+ SDValue Res;
+ SDLoc DL(Op);
+ MemSDNode *M = cast<MemSDNode>(Op);
+ unsigned IID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ switch (IID) {
+ case Intrinsic::amdgcn_tbuffer_load: {
+ SDValue Ops[] = {
+ Op.getOperand(0), // Chain
+ Op.getOperand(2), // rsrc
+ Op.getOperand(3), // vindex
+ Op.getOperand(4), // voffset
+ Op.getOperand(5), // soffset
+ Op.getOperand(6), // offset
+ Op.getOperand(7), // dfmt
+ Op.getOperand(8), // nfmt
+ Op.getOperand(9), // glc
+ Op.getOperand(10) // slc
+ };
+ Res = DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16, DL,
+ VTList, Ops, M->getMemoryVT(),
+ M->getMemOperand());
+ Chain = Res.getValue(1);
+ return adjustLoadValueType(Res, LoadVT, DL, DAG, Unpacked);
+ }
+ case Intrinsic::amdgcn_buffer_load_format: {
+ SDValue Ops[] = {
+ Op.getOperand(0), // Chain
+ Op.getOperand(2), // rsrc
+ Op.getOperand(3), // vindex
+ Op.getOperand(4), // offset
+ Op.getOperand(5), // glc
+ Op.getOperand(6) // slc
+ };
+ Res = DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_LOAD_FORMAT_D16,
+ DL, VTList, Ops, M->getMemoryVT(),
+ M->getMemOperand());
+ Chain = Res.getValue(1);
+ return adjustLoadValueType(Res, LoadVT, DL, DAG, Unpacked);
+ }
+ default:
+ return SDValue();
+ }
+}
+
void SITargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
@@ -3528,6 +3603,16 @@
}
break;
}
+ case ISD::INTRINSIC_W_CHAIN: {
+ SDValue Chain;
+ if (SDValue Res = lowerIntrinsicWChain_IllegalReturnType(SDValue(N, 0),
+ Chain, DAG)) {
+ Results.push_back(Res);
+ Results.push_back(Chain);
+ return;
+ }
+ break;
+ }
case ISD::SELECT: {
SDLoc SL(N);
EVT VT = N->getValueType(0);
@@ -4656,6 +4741,31 @@
}
}
+SDValue SITargetLowering::handleD16VData(SDValue VData,
+ SelectionDAG &DAG) const {
+ EVT StoreVT = VData.getValueType();
+ SDLoc DL(VData);
+
+ if (StoreVT.isVector()) {
+ assert ((StoreVT.getVectorNumElements() != 3) && "Handle v3f16");
+ if (!Subtarget->hasUnpackedD16VMem()) {
+ if (!isTypeLegal(StoreVT)) {
+ // If Target supports packed vmem, we just need to workaround
+ // the illegal type by casting to an equivalent one.
+ EVT EquivStoreVT = getEquivalentMemType(*DAG.getContext(), StoreVT);
+ return DAG.getNode(ISD::BITCAST, DL, EquivStoreVT, VData);
+ }
+ } else { // We need to unpack the packed data to store.
+ EVT IntStoreVT = StoreVT.changeTypeToInteger();
+ SDValue IntVData = DAG.getNode(ISD::BITCAST, DL, IntStoreVT, VData);
+ EVT EquivStoreVT = (StoreVT == MVT::v2f16) ? MVT::v2i32 : MVT::v4i32;
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, EquivStoreVT, IntVData);
+ }
+ }
+ // No change for f16 and legal vector D16 types.
+ return VData;
+}
+
SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
@@ -4798,9 +4908,13 @@
}
case Intrinsic::amdgcn_tbuffer_store: {
+ SDValue VData = Op.getOperand(2);
+ bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
+ if (IsD16)
+ VData = handleD16VData(VData, DAG);
SDValue Ops[] = {
Chain,
- Op.getOperand(2), // vdata
+ VData, // vdata
Op.getOperand(3), // rsrc
Op.getOperand(4), // vindex
Op.getOperand(5), // voffset
@@ -4811,37 +4925,34 @@
Op.getOperand(10), // glc
Op.getOperand(11) // slc
};
- EVT VT = Op.getOperand(3).getValueType();
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo(),
- MachineMemOperand::MOStore,
- VT.getStoreSize(), 4);
- return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL,
- Op->getVTList(), Ops, VT, MMO);
+ unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
+ AMDGPUISD::TBUFFER_STORE_FORMAT;
+ MemSDNode *M = cast<MemSDNode>(Op);
+ return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
+ M->getMemoryVT(), M->getMemOperand());
}
case Intrinsic::amdgcn_buffer_store:
case Intrinsic::amdgcn_buffer_store_format: {
+ SDValue VData = Op.getOperand(2);
+ bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
+ if (IsD16)
+ VData = handleD16VData(VData, DAG);
SDValue Ops[] = {
Chain,
- Op.getOperand(2), // vdata
+ VData, // vdata
Op.getOperand(3), // rsrc
Op.getOperand(4), // vindex
Op.getOperand(5), // offset
Op.getOperand(6), // glc
Op.getOperand(7) // slc
};
- EVT VT = Op.getOperand(3).getValueType();
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo(),
- MachineMemOperand::MOStore |
- MachineMemOperand::MODereferenceable,
- VT.getStoreSize(), 4);
-
- unsigned Opcode = IntrinsicID == Intrinsic::amdgcn_buffer_store ?
- AMDGPUISD::BUFFER_STORE :
- AMDGPUISD::BUFFER_STORE_FORMAT;
- return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT, MMO);
+ unsigned Opc = IntrinsicID == Intrinsic::amdgcn_buffer_store ?
+ AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
+ Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
+ MemSDNode *M = cast<MemSDNode>(Op);
+ return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
+ M->getMemoryVT(), M->getMemOperand());
}
default: