AMDGPU: Lower buffer store and atomic intrinsics manually
Summary:
Without this, SIMemoryLegalizer inserts s_waitcnt vmcnt(0) before every
buffer store and atomic instruction.
Reviewers: arsenm, nhaehnle
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D39060
llvm-svn: 317754
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index d1120f5..4428b7c 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4238,6 +4238,95 @@
return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
Op->getVTList(), Ops, VT, MMO);
}
+ case Intrinsic::amdgcn_buffer_atomic_swap:
+ case Intrinsic::amdgcn_buffer_atomic_add:
+ case Intrinsic::amdgcn_buffer_atomic_sub:
+ case Intrinsic::amdgcn_buffer_atomic_smin:
+ case Intrinsic::amdgcn_buffer_atomic_umin:
+ case Intrinsic::amdgcn_buffer_atomic_smax:
+ case Intrinsic::amdgcn_buffer_atomic_umax:
+ case Intrinsic::amdgcn_buffer_atomic_and:
+ case Intrinsic::amdgcn_buffer_atomic_or:
+ case Intrinsic::amdgcn_buffer_atomic_xor: {
+ SDValue Ops[] = {
+ Op.getOperand(0), // Chain
+ Op.getOperand(2), // vdata
+ Op.getOperand(3), // rsrc
+ Op.getOperand(4), // vindex
+ Op.getOperand(5), // offset
+ Op.getOperand(6) // slc
+ };
+ EVT VT = Op.getOperand(3).getValueType();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(),
+ MachineMemOperand::MOLoad |
+ MachineMemOperand::MOStore |
+ MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOVolatile,
+ VT.getStoreSize(), 4);
+ unsigned Opcode = 0;
+
+ switch (IntrID) {
+ case Intrinsic::amdgcn_buffer_atomic_swap:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_SWAP;
+ break;
+ case Intrinsic::amdgcn_buffer_atomic_add:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_ADD;
+ break;
+ case Intrinsic::amdgcn_buffer_atomic_sub:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_SUB;
+ break;
+ case Intrinsic::amdgcn_buffer_atomic_smin:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_SMIN;
+ break;
+ case Intrinsic::amdgcn_buffer_atomic_umin:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_UMIN;
+ break;
+ case Intrinsic::amdgcn_buffer_atomic_smax:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_SMAX;
+ break;
+ case Intrinsic::amdgcn_buffer_atomic_umax:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_UMAX;
+ break;
+ case Intrinsic::amdgcn_buffer_atomic_and:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_AND;
+ break;
+ case Intrinsic::amdgcn_buffer_atomic_or:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_OR;
+ break;
+ case Intrinsic::amdgcn_buffer_atomic_xor:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_XOR;
+ break;
+ default:
+ llvm_unreachable("unhandled atomic opcode");
+ }
+
+ return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT, MMO);
+ }
+
+ case Intrinsic::amdgcn_buffer_atomic_cmpswap: {
+ SDValue Ops[] = {
+ Op.getOperand(0), // Chain
+ Op.getOperand(2), // src
+ Op.getOperand(3), // cmp
+ Op.getOperand(4), // rsrc
+ Op.getOperand(5), // vindex
+ Op.getOperand(6), // offset
+ Op.getOperand(7) // slc
+ };
+ EVT VT = Op.getOperand(4).getValueType();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(),
+ MachineMemOperand::MOLoad |
+ MachineMemOperand::MOStore |
+ MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOVolatile,
+ VT.getStoreSize(), 4);
+
+ return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
+ Op->getVTList(), Ops, VT, MMO);
+ }
+
// Basic sample.
case Intrinsic::amdgcn_image_sample:
case Intrinsic::amdgcn_image_sample_cl:
@@ -4465,6 +4554,30 @@
Op->getVTList(), Ops, VT, MMO);
}
+ case Intrinsic::amdgcn_buffer_store:
+ case Intrinsic::amdgcn_buffer_store_format: {
+ SDValue Ops[] = {
+ Chain,
+ Op.getOperand(2), // vdata
+ Op.getOperand(3), // rsrc
+ Op.getOperand(4), // vindex
+ Op.getOperand(5), // offset
+ Op.getOperand(6), // glc
+ Op.getOperand(7) // slc
+ };
+ EVT VT = Op.getOperand(3).getValueType();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(),
+ MachineMemOperand::MOStore |
+ MachineMemOperand::MODereferenceable,
+ VT.getStoreSize(), 4);
+
+ unsigned Opcode = IntrinsicID == Intrinsic::amdgcn_buffer_store ?
+ AMDGPUISD::BUFFER_STORE :
+ AMDGPUISD::BUFFER_STORE_FORMAT;
+ return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT, MMO);
+ }
+
default:
return Op;
}