[AMDGPU] Check MI bundles for hazards

Summary: GCNHazardRecognizer fails to identify hazards that are in and around bundles. This patch allows the hazard recognizer to consider bundled instructions in both scheduler and hazard recognizer mode. We ignore “bundledness” for the purpose of detecting hazards and examine the instructions individually.

Reviewers: arsenm, msearles, rampitec

Reviewed By: rampitec

Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D61564

llvm-svn: 360199
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index c2af348..3bf9691 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -124,6 +124,8 @@
 ScheduleHazardRecognizer::HazardType
 GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
   MachineInstr *MI = SU->getInstr();
+  if (MI->isBundle())
+   return NoHazard;
 
   if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
     return NoopHazard;
@@ -179,6 +181,37 @@
   return NoHazard;
 }
 
+static void insertNoopInBundle(MachineInstr *MI, const SIInstrInfo &TII) {
+  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
+      .addImm(0);
+}
+
+void GCNHazardRecognizer::processBundle() {
+  MachineBasicBlock::instr_iterator MI = std::next(CurrCycleInstr->getIterator());
+  MachineBasicBlock::instr_iterator E = CurrCycleInstr->getParent()->instr_end();
+  // Check bundled MachineInstr's for hazards.
+  for (; MI != E && MI->isInsideBundle(); ++MI) {
+    CurrCycleInstr = &*MI;
+    unsigned WaitStates = PreEmitNoopsCommon(CurrCycleInstr);
+
+    if (IsHazardRecognizerMode)
+      fixHazards(CurrCycleInstr);
+
+    for (unsigned i = 0; i < WaitStates; ++i)
+      insertNoopInBundle(CurrCycleInstr, TII);
+
+    // It’s unnecessary to track more than MaxLookAhead instructions. Since we
+    // include the bundled MI directly after, only add a maximum of
+    // (MaxLookAhead - 1) noops to EmittedInstrs.
+    for (unsigned i = 0, e = std::min(WaitStates, MaxLookAhead - 1); i < e; ++i)
+      EmittedInstrs.push_front(nullptr);
+
+    EmittedInstrs.push_front(CurrCycleInstr);
+    EmittedInstrs.resize(MaxLookAhead);
+  }
+  CurrCycleInstr = nullptr;
+}
+
 unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) {
   IsHazardRecognizerMode = false;
   return PreEmitNoopsCommon(SU->getInstr());
@@ -188,17 +221,15 @@
   IsHazardRecognizerMode = true;
   CurrCycleInstr = MI;
   unsigned W = PreEmitNoopsCommon(MI);
-
-  fixVMEMtoScalarWriteHazards(MI);
-  fixSMEMtoVectorWriteHazards(MI);
-  fixVcmpxExecWARHazard(MI);
-  fixLdsBranchVmemWARHazard(MI);
-
+  fixHazards(MI);
   CurrCycleInstr = nullptr;
   return W;
 }
 
 unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
+  if (MI->isBundle())
+    return 0;
+
   int WaitStates = std::max(0, checkAnyInstHazards(MI));
 
   if (SIInstrInfo::isSMRD(*MI))
@@ -264,6 +295,11 @@
       CurrCycleInstr->isKill())
     return;
 
+  if (CurrCycleInstr->isBundle()) {
+    processBundle();
+    return;
+  }
+
   unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
 
   // Keep track of emitted instructions
@@ -304,8 +340,11 @@
                               int WaitStates,
                               IsExpiredFn IsExpired,
                               DenseSet<const MachineBasicBlock *> &Visited) {
+  for (auto E = MBB->instr_rend(); I != E; ++I) {
+    // Don't add WaitStates for parent BUNDLE instructions.
+    if (I->isBundle())
+      continue;
 
-  for (auto E = MBB->rend() ; I != E; ++I) {
     if (IsHazard(&*I))
       return WaitStates;
 
@@ -437,9 +476,9 @@
   // instructions in this group may return out of order and/or may be
   // replayed (i.e. the same instruction issued more than once).
   //
-  // In order to handle these situations correctly we need to make sure
-  // that when a clause has more than one instruction, no instruction in the
-  // clause writes to a register that is read another instruction in the clause
+  // In order to handle these situations correctly we need to make sure that
+  // when a clause has more than one instruction, no instruction in the clause
+  // writes to a register that is read by another instruction in the clause
   // (including itself). If we encounter this situaion, we need to break the
   // clause by inserting a non SMEM instruction.
 
@@ -525,7 +564,6 @@
   // SGPR was written by a VALU Instruction.
   const int VmemSgprWaitStates = 5;
   auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
-
   for (const MachineOperand &Use : VMEM->uses()) {
     if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
       continue;
@@ -795,6 +833,13 @@
                                                    SMovRelWaitStates);
 }
 
+void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
+  fixVMEMtoScalarWriteHazards(MI);
+  fixSMEMtoVectorWriteHazards(MI);
+  fixVcmpxExecWARHazard(MI);
+  fixLdsBranchVmemWARHazard(MI);
+}
+
 bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
   if (!ST.hasVMEMtoScalarWriteHazard())
     return false;