AMDGPU: Move hazard avoidance out of waitcnt pass. This is mostly moving VMEM clause breaking into the hazard recognizer. Also move another hazard currently handled in the waitcnt pass. Also stops breaking clauses unless xnack is enabled. llvm-svn: 318557

commit: a41351e37c72c86c0547a74166712d023dd076b0 [log] [tgz]
author: Matt Arsenault <Matthew.Arsenault@amd.com> Fri Nov 17 21:35:32 2017 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> Fri Nov 17 21:35:32 2017 +0000
tree: 838f31d0d67836cebe3f12ee9415bbe314011fe0
parent: c0a81071d3d89c4e07f0ef39f5bb96eb9e6df76b [diff]
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index f9b400c..63634f4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h

@@ -806,10 +806,14 @@
     return getGeneration() >= AMDGPUSubtarget::GFX9;
   }
 
-  bool hasReadM0Hazard() const {
+  bool hasReadM0MovRelInterpHazard() const {
     return getGeneration() >= AMDGPUSubtarget::GFX9;
   }
 
+  bool hasReadM0SendMsgHazard() const {
+    return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
+  }
+
   unsigned getKernArgSegmentSize(const MachineFunction &MF,
                                  unsigned ExplictArgBytes) const;
 

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 42bd202..be0588b 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

@@ -87,6 +87,18 @@
   }
 }
 
+static bool isSendMsgTraceDataOrGDS(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+  case AMDGPU::S_SENDMSG:
+  case AMDGPU::S_SENDMSGHALT:
+  case AMDGPU::S_TTRACEDATA:
+    return true;
+  default:
+    // TODO: GDS
+    return false;
+  }
+}
+
 static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
   const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
                                                      AMDGPU::OpName::simm16);
@@ -100,7 +112,10 @@
   if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
     return NoopHazard;
 
-  if (SIInstrInfo::isVMEM(*MI) && checkVMEMHazards(MI) > 0)
+  // FIXME: Should flat be considered vmem?
+  if ((SIInstrInfo::isVMEM(*MI) ||
+       SIInstrInfo::isFLAT(*MI))
+      && checkVMEMHazards(MI) > 0)
     return NoopHazard;
 
   if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
@@ -124,7 +139,12 @@
   if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
     return NoopHazard;
 
-  if ((TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
+  if (ST.hasReadM0MovRelInterpHazard() &&
+      (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
+      checkReadM0Hazards(MI) > 0)
+    return NoopHazard;
+
+  if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI) &&
       checkReadM0Hazards(MI) > 0)
     return NoopHazard;
 
@@ -144,26 +164,20 @@
   if (SIInstrInfo::isSMRD(*MI))
     return std::max(WaitStates, checkSMRDHazards(MI));
 
-  if (SIInstrInfo::isVALU(*MI)) {
-      WaitStates = std::max(WaitStates, checkVALUHazards(MI));
+  if (SIInstrInfo::isVALU(*MI))
+    WaitStates = std::max(WaitStates, checkVALUHazards(MI));
 
-    if (SIInstrInfo::isVMEM(*MI))
-      WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
+  if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
+    WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
 
-    if (SIInstrInfo::isDPP(*MI))
-      WaitStates = std::max(WaitStates, checkDPPHazards(MI));
+  if (SIInstrInfo::isDPP(*MI))
+    WaitStates = std::max(WaitStates, checkDPPHazards(MI));
 
-    if (isDivFMas(MI->getOpcode()))
-      WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
+  if (isDivFMas(MI->getOpcode()))
+    WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
 
-    if (isRWLane(MI->getOpcode()))
-      WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
-
-    if (TII.isVINTRP(*MI))
-      WaitStates = std::max(WaitStates, checkReadM0Hazards(MI));
-
-    return WaitStates;
-  }
+  if (isRWLane(MI->getOpcode()))
+    WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
 
   if (isSGetReg(MI->getOpcode()))
     return std::max(WaitStates, checkGetRegHazards(MI));
@@ -174,7 +188,11 @@
   if (isRFE(MI->getOpcode()))
     return std::max(WaitStates, checkRFEHazards(MI));
 
-  if (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode()))
+  if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) ||
+                                           isSMovRel(MI->getOpcode())))
+    return std::max(WaitStates, checkReadM0Hazards(MI));
+
+  if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI))
     return std::max(WaitStates, checkReadM0Hazards(MI));
 
   return WaitStates;
@@ -282,12 +300,14 @@
   addRegsToSet(TRI, MI.uses(), ClauseUses);
 }
 
-int GCNHazardRecognizer::checkSMEMSoftClauseHazards(MachineInstr *SMEM) {
+int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
   // SMEM soft clause are only present on VI+, and only matter if xnack is
   // enabled.
   if (!ST.isXNACKEnabled())
     return 0;
 
+  bool IsSMRD = TII.isSMRD(*MEM);
+
   resetClause();
 
   // A soft-clause is any group of consecutive SMEM instructions.  The
@@ -303,7 +323,10 @@
   for (MachineInstr *MI : EmittedInstrs) {
     // When we hit a non-SMEM instruction then we have passed the start of the
     // clause and we can stop.
-    if (!MI || !SIInstrInfo::isSMRD(*MI))
+    if (!MI)
+      break;
+
+    if (IsSMRD != SIInstrInfo::isSMRD(*MI))
       break;
 
     addClauseInst(*MI);
@@ -312,13 +335,13 @@
   if (ClauseDefs.none())
     return 0;
 
-  // FIXME: When we support stores, we need to make sure not to put loads and
-  // stores in the same clause if they use the same address.  For now, just
-  // start a new clause whenever we see a store.
-  if (SMEM->mayStore())
+  // We need to make sure not to put loads and stores in the same clause if they
+  // use the same address. For now, just start a new clause whenever we see a
+  // store.
+  if (MEM->mayStore())
     return 1;
 
-  addClauseInst(*SMEM);
+  addClauseInst(*MEM);
 
   // If the set of defs and uses intersect then we cannot add this instruction
   // to the clause, so we have a hazard.
@@ -329,7 +352,7 @@
   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
   int WaitStatesNeeded = 0;
 
-  WaitStatesNeeded = checkSMEMSoftClauseHazards(SMRD);
+  WaitStatesNeeded = checkSoftClauseHazards(SMRD);
 
   // This SMRD hazard only affects SI.
   if (ST.getGeneration() != SISubtarget::SOUTHERN_ISLANDS)
@@ -369,18 +392,15 @@
 }
 
 int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
-  const SIInstrInfo *TII = ST.getInstrInfo();
-
   if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
     return 0;
 
-  const SIRegisterInfo &TRI = TII->getRegisterInfo();
+  int WaitStatesNeeded = checkSoftClauseHazards(VMEM);
 
   // A read of an SGPR by a VMEM instruction requires 5 wait states when the
   // SGPR was written by a VALU Instruction.
-  int VmemSgprWaitStates = 5;
-  int WaitStatesNeeded = 0;
-  auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
+  const int VmemSgprWaitStates = 5;
+  auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
 
   for (const MachineOperand &Use : VMEM->uses()) {
     if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
@@ -598,11 +618,8 @@
 }
 
 int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
-  if (!ST.hasReadM0Hazard())
-    return 0;
-
   const SIInstrInfo *TII = ST.getInstrInfo();
-  int SMovRelWaitStates = 1;
+  const int SMovRelWaitStates = 1;
   auto IsHazardFn = [TII] (MachineInstr *MI) {
     return TII->isSALU(*MI);
   };

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index eb382cc..01682ac 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h

@@ -58,7 +58,7 @@
                                 [](MachineInstr *) { return true; });
   int getWaitStatesSinceSetReg(function_ref<bool(MachineInstr *)> IsHazard);
 
-  int checkSMEMSoftClauseHazards(MachineInstr *SMEM);
+  int checkSoftClauseHazards(MachineInstr *SMEM);
   int checkSMRDHazards(MachineInstr *SMRD);
   int checkVMEMHazards(MachineInstr* VMEM);
   int checkDPPHazards(MachineInstr *DPP);

diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index c41757d..2d41d89 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

@@ -1522,8 +1522,6 @@
     ScoreBrackets->dump();
   });
 
-  bool InsertNOP = false;
-
   // Walk over the instructions.
   for (MachineBasicBlock::iterator Iter = Block.begin(), E = Block.end();
        Iter != E;) {
@@ -1624,58 +1622,6 @@
       VCCZBugHandledSet.insert(&Inst);
     }
 
-    if (ST->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
-
-      // This avoids a s_nop after a waitcnt has just been inserted.
-      if (!SWaitInst && InsertNOP) {
-        BuildMI(Block, Inst, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
-      }
-      InsertNOP = false;
-
-      // Any occurrence of consecutive VMEM or SMEM instructions forms a VMEM
-      // or SMEM clause, respectively.
-      //
-      // The temporary workaround is to break the clauses with S_NOP.
-      //
-      // The proper solution would be to allocate registers such that all source
-      // and destination registers don't overlap, e.g. this is illegal:
-      //   r0 = load r2
-      //   r2 = load r0
-      bool IsSMEM = false;
-      bool IsVMEM = false;
-      if (TII->isSMRD(Inst))
-        IsSMEM = true;
-      else if (TII->usesVM_CNT(Inst))
-        IsVMEM = true;
-
-      ++Iter;
-      if (Iter == E)
-        break;
-
-      MachineInstr &Next = *Iter;
-
-      // TODO: How about consecutive SMEM instructions?
-      //       The comments above says break the clause but the code does not.
-      // if ((TII->isSMRD(next) && isSMEM) ||
-      if (!IsSMEM && TII->usesVM_CNT(Next) && IsVMEM &&
-          // TODO: Enable this check when hasSoftClause is upstreamed.
-          // ST->hasSoftClauses() &&
-          ST->isXNACKEnabled()) {
-        // Insert a NOP to break the clause.
-        InsertNOP = true;
-        continue;
-      }
-
-      // There must be "S_NOP 0" between an instruction writing M0 and
-      // S_SENDMSG.
-      if ((Next.getOpcode() == AMDGPU::S_SENDMSG ||
-           Next.getOpcode() == AMDGPU::S_SENDMSGHALT) &&
-          Inst.definesRegister(AMDGPU::M0))
-        InsertNOP = true;
-
-      continue;
-    }
-
     ++Iter;
   }
commit	a41351e37c72c86c0547a74166712d023dd076b0	[log] [tgz]
author	Matt Arsenault <Matthew.Arsenault@amd.com>	Fri Nov 17 21:35:32 2017 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	Fri Nov 17 21:35:32 2017 +0000
tree	838f31d0d67836cebe3f12ee9415bbe314011fe0
parent	c0a81071d3d89c4e07f0ef39f5bb96eb9e6df76b [diff]