Reapply "AMDGPU: Add ds_gws_init / ds_gws_barrier intrinsics"
This reapplies r363678, using the correct chain for the CopyToReg for
v0. glueCopyToM0 counterintuitively changes the operands of the
original node.
llvm-svn: 363870
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 0c880a3..eb1e1be 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -218,7 +218,9 @@
void SelectFMAD_FMA(SDNode *N);
void SelectATOMIC_CMP_SWAP(SDNode *N);
void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
+ void SelectDS_GWS(SDNode *N, unsigned IntrID);
void SelectINTRINSIC_W_CHAIN(SDNode *N);
+ void SelectINTRINSIC_VOID(SDNode *N);
protected:
// Include the pieces autogenerated from the target description.
@@ -832,6 +834,10 @@
SelectINTRINSIC_W_CHAIN(N);
return;
}
+ case ISD::INTRINSIC_VOID: {
+ SelectINTRINSIC_VOID(N);
+ return;
+ }
}
SelectCode(N);
@@ -2034,6 +2040,72 @@
CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
}
+void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
+ SDLoc SL(N);
+ SDValue VSrc0 = N->getOperand(2);
+ SDValue BaseOffset = N->getOperand(3);
+ int ImmOffset = 0;
+ MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
+ MachineMemOperand *MMO = M->getMemOperand();
+
+ // Don't worry if the offset ends up in a VGPR. Only one lane will have
+ // effect, so SIFixSGPRCopies will validly insert readfirstlane.
+
+ // The resource id offset is computed as (<isa opaque base> + M0[21:16] +
+ // offset field) % 64. Some versions of the programming guide omit the m0
+ // part, or claim it's from offset 0.
+ if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
+ // If we have a constant offset, try to use the default value for m0 as a
+ // base to possibly avoid setting it up.
+ glueCopyToM0(N, CurDAG->getTargetConstant(-1, SL, MVT::i32));
+ ImmOffset = ConstOffset->getZExtValue() + 1;
+ } else {
+ if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
+ ImmOffset = BaseOffset.getConstantOperandVal(1);
+ BaseOffset = BaseOffset.getOperand(0);
+ }
+
+ // Prefer to do the shift in an SGPR since it should be possible to use m0
+ // as the result directly. If it's already an SGPR, it will be eliminated
+ // later.
+ SDNode *SGPROffset
+ = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
+ BaseOffset);
+ // Shift to offset in m0
+ SDNode *M0Base
+ = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
+ SDValue(SGPROffset, 0),
+ CurDAG->getTargetConstant(16, SL, MVT::i32));
+ glueCopyToM0(N, SDValue(M0Base, 0));
+ }
+
+ // The manual doesn't mention this, but it seems only v0 works.
+ SDValue V0 = CurDAG->getRegister(AMDGPU::VGPR0, MVT::i32);
+
+ SDValue CopyToV0 = CurDAG->getCopyToReg(
+ N->getOperand(0), SL, V0, VSrc0,
+ N->getOperand(N->getNumOperands() - 1));
+
+ SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
+
+ // TODO: Can this just be removed from the instruction?
+ SDValue GDS = CurDAG->getTargetConstant(1, SL, MVT::i1);
+
+ unsigned Opc = IntrID == Intrinsic::amdgcn_ds_gws_init ?
+ AMDGPU::DS_GWS_INIT : AMDGPU::DS_GWS_BARRIER;
+
+ SDValue Ops[] = {
+ V0,
+ OffsetField,
+ GDS,
+ CopyToV0, // Chain
+ CopyToV0.getValue(1) // Glue
+ };
+
+ SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
+}
+
void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
switch (IntrID) {
@@ -2044,6 +2116,18 @@
SelectDSAppendConsume(N, IntrID);
return;
}
+ }
+
+ SelectCode(N);
+}
+
+void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
+ unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ switch (IntrID) {
+ case Intrinsic::amdgcn_ds_gws_init:
+ case Intrinsic::amdgcn_ds_gws_barrier:
+ SelectDS_GWS(N, IntrID);
+ return;
default:
break;
}