Reapply "AMDGPU: Add ds_gws_init / ds_gws_barrier intrinsics"
This reapplies r363678, using the correct chain for the CopyToReg for
v0. glueCopyToM0 counterintuitively changes the operands of the
original node.
llvm-svn: 363870
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 3269423..29c891c 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -536,15 +536,19 @@
// Put score on the source vgprs. If this is a store, just use those
// specific register(s).
if (TII->isDS(Inst) && (Inst.mayStore() || Inst.mayLoad())) {
+ int AddrOpIdx =
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::addr);
// All GDS operations must protect their address register (same as
// export.)
- if (Inst.getOpcode() != AMDGPU::DS_APPEND &&
- Inst.getOpcode() != AMDGPU::DS_CONSUME) {
- setExpScore(
- &Inst, TII, TRI, MRI,
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::addr),
- CurrScore);
+ if (AddrOpIdx != -1) {
+ setExpScore(&Inst, TII, TRI, MRI, AddrOpIdx, CurrScore);
+ } else {
+ assert(Inst.getOpcode() == AMDGPU::DS_APPEND ||
+ Inst.getOpcode() == AMDGPU::DS_CONSUME ||
+ Inst.getOpcode() == AMDGPU::DS_GWS_INIT ||
+ Inst.getOpcode() == AMDGPU::DS_GWS_BARRIER);
}
+
if (Inst.mayStore()) {
if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
AMDGPU::OpName::data0) != -1) {
@@ -1407,18 +1411,6 @@
ScoreBrackets.dump();
});
- // Check to see if this is a GWS instruction. If so, and if this is CI or
- // VI, then the generated code sequence will include an S_WAITCNT 0.
- // TODO: Are these the only GWS instructions?
- if (Inst.getOpcode() == AMDGPU::DS_GWS_INIT ||
- Inst.getOpcode() == AMDGPU::DS_GWS_SEMA_V ||
- Inst.getOpcode() == AMDGPU::DS_GWS_SEMA_BR ||
- Inst.getOpcode() == AMDGPU::DS_GWS_SEMA_P ||
- Inst.getOpcode() == AMDGPU::DS_GWS_BARRIER) {
- // TODO: && context->target_info->GwsRequiresMemViolTest() ) {
- ScoreBrackets.applyWaitcnt(AMDGPU::Waitcnt::allZeroExceptVsCnt());
- }
-
// TODO: Remove this work-around after fixing the scheduler and enable the
// assert above.
if (VCCZBugWorkAround) {