[AMDGPU] Fixed hazard recognizer to walk predecessors Fixes two problems with GCNHazardRecognizer: 1. It only scans up to 5 instructions emitted earlier. 2. It does not take control flow into account. An earlier instruction from the previous basic block is not necessarily a predecessor. At the same time a real predecessor block is not scanned. The patch provides a way to distinguish between scheduler and hazard recognizer mode. It is OK to work with emitted instructions in the scheduler because we do not really know what will be emitted later and its order. However, when pass works as a hazard recognizer the schedule is already finalized, and we have full access to the instructions for the whole function, so we can properly traverse predecessors and their instructions. Differential Revision: https://reviews.llvm.org/D56923 llvm-svn: 351759

commit: f92ed6966eb898b1ab1587c5fb1bf059783977b0 [log] [tgz]
author: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> Mon Jan 21 19:11:26 2019 +0000
committer: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> Mon Jan 21 19:11:26 2019 +0000
tree: 58583c47d14adf49943473acc1124cd52763f744
parent: 8441c467bf9b02638afc9fb3affce8eb1207e65d [diff] [blame]
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index bb1c6db..4af8a45 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

@@ -37,6 +37,7 @@
 //===----------------------------------------------------------------------===//
 
 GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
+  IsHazardRecognizerMode(false),
   CurrCycleInstr(nullptr),
   MF(MF),
   ST(MF.getSubtarget<GCNSubtarget>()),
@@ -172,10 +173,19 @@
 }
 
 unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) {
-  return PreEmitNoops(SU->getInstr());
+  IsHazardRecognizerMode = false;
+  return PreEmitNoopsCommon(SU->getInstr());
 }
 
 unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
+  IsHazardRecognizerMode = true;
+  CurrCycleInstr = MI;
+  unsigned W = PreEmitNoopsCommon(MI);
+  CurrCycleInstr = nullptr;
+  return W;
+}
+
+unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
   int WaitStates = std::max(0, checkAnyInstHazards(MI));
 
   if (SIInstrInfo::isSMRD(*MI))
@@ -231,7 +241,7 @@
   // Do not track non-instructions which do not affect the wait states.
   // If included, these instructions can lead to buffer overflow such that
   // detectable hazards are missed.
-  if (CurrCycleInstr->getOpcode() == AMDGPU::IMPLICIT_DEF)
+  if (CurrCycleInstr->isImplicitDef())
     return;
   else if (CurrCycleInstr->isDebugInstr())
     return;
@@ -265,41 +275,109 @@
 // Helper Functions
 //===----------------------------------------------------------------------===//
 
-int GCNHazardRecognizer::getWaitStatesSince(
-    function_ref<bool(MachineInstr *)> IsHazard) {
+typedef function_ref<bool(MachineInstr *, int WaitStates)> IsExpiredFn;
+
+// Returns a minimum wait states since \p I walking all predecessors.
+// Only scans until \p IsExpired does not return true.
+// Can only be run in a hazard recognizer mode.
+static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
+                              MachineBasicBlock *MBB,
+                              MachineBasicBlock::reverse_instr_iterator I,
+                              int WaitStates,
+                              IsExpiredFn IsExpired,
+                              DenseSet<const MachineBasicBlock *> &Visited) {
+
+  for (auto E = MBB->rend() ; I != E; ++I) {
+    if (IsHazard(&*I))
+      return WaitStates;
+
+    if (I->isInlineAsm() || I->isImplicitDef() || I->isDebugInstr())
+      continue;
+
+    WaitStates += SIInstrInfo::getNumWaitStates(*I);
+
+    if (IsExpired(&*I, WaitStates))
+      return std::numeric_limits<int>::max();
+  }
+
+  int MinWaitStates = WaitStates;
+  bool Found = false;
+  for (MachineBasicBlock *Pred : MBB->predecessors()) {
+    if (!Visited.insert(Pred).second)
+      continue;
+
+    int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(),
+                               WaitStates, IsExpired, Visited);
+
+    if (W == std::numeric_limits<int>::max())
+      continue;
+
+    MinWaitStates = Found ? std::min(MinWaitStates, W) : W;
+    if (IsExpired(nullptr, MinWaitStates))
+      return MinWaitStates;
+
+    Found = true;
+  }
+
+  if (Found)
+    return MinWaitStates;
+
+  return std::numeric_limits<int>::max();
+}
+
+static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
+                              MachineInstr *MI,
+                              IsExpiredFn IsExpired) {
+  DenseSet<const MachineBasicBlock *> Visited;
+  return getWaitStatesSince(IsHazard, MI->getParent(),
+                            std::next(MI->getReverseIterator()),
+                            0, IsExpired, Visited);
+}
+
+int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) {
+  if (IsHazardRecognizerMode) {
+    auto IsExpiredFn = [Limit] (MachineInstr *, int WaitStates) {
+      return WaitStates >= Limit;
+    };
+    return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn);
+  }
+
   int WaitStates = 0;
   for (MachineInstr *MI : EmittedInstrs) {
     if (MI) {
       if (IsHazard(MI))
         return WaitStates;
 
-      unsigned Opcode = MI->getOpcode();
-      if (Opcode == AMDGPU::INLINEASM)
+      if (MI->isInlineAsm())
         continue;
     }
     ++WaitStates;
+
+    if (WaitStates >= Limit)
+      break;
   }
   return std::numeric_limits<int>::max();
 }
 
-int GCNHazardRecognizer::getWaitStatesSinceDef(
-    unsigned Reg, function_ref<bool(MachineInstr *)> IsHazardDef) {
+int GCNHazardRecognizer::getWaitStatesSinceDef(unsigned Reg,
+                                               IsHazardFn IsHazardDef,
+                                               int Limit) {
   const SIRegisterInfo *TRI = ST.getRegisterInfo();
 
   auto IsHazardFn = [IsHazardDef, TRI, Reg] (MachineInstr *MI) {
     return IsHazardDef(MI) && MI->modifiesRegister(Reg, TRI);
   };
 
-  return getWaitStatesSince(IsHazardFn);
+  return getWaitStatesSince(IsHazardFn, Limit);
 }
 
-int GCNHazardRecognizer::getWaitStatesSinceSetReg(
-    function_ref<bool(MachineInstr *)> IsHazard) {
+int GCNHazardRecognizer::getWaitStatesSinceSetReg(IsHazardFn IsHazard,
+                                                  int Limit) {
   auto IsHazardFn = [IsHazard] (MachineInstr *MI) {
     return isSSetReg(MI->getOpcode()) && IsHazard(MI);
   };
 
-  return getWaitStatesSince(IsHazardFn);
+  return getWaitStatesSince(IsHazardFn, Limit);
 }
 
 //===----------------------------------------------------------------------===//
@@ -397,7 +475,8 @@
     if (!Use.isReg())
       continue;
     int WaitStatesNeededForUse =
-        SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn);
+        SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
+                                                   SmrdSgprWaitStates);
     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
 
     // This fixes what appears to be undocumented hardware behavior in SI where
@@ -410,7 +489,8 @@
     if (IsBufferSMRD) {
       int WaitStatesNeededForUse =
         SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
-                                                   IsBufferHazardDefFn);
+                                                   IsBufferHazardDefFn,
+                                                   SmrdSgprWaitStates);
       WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
     }
   }
@@ -434,7 +514,8 @@
       continue;
 
     int WaitStatesNeededForUse =
-        VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn);
+        VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
+                                                   VmemSgprWaitStates);
     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
   }
   return WaitStatesNeeded;
@@ -454,13 +535,16 @@
     if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
       continue;
     int WaitStatesNeededForUse =
-        DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg());
+        DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
+                              [](MachineInstr *) { return true; },
+                              DppVgprWaitStates);
     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
   }
 
   WaitStatesNeeded = std::max(
       WaitStatesNeeded,
-      DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn));
+      DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn,
+                                                DppExecWaitStates));
 
   return WaitStatesNeeded;
 }
@@ -472,7 +556,8 @@
   // instruction.
   const int DivFMasWaitStates = 4;
   auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
-  int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn);
+  int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn,
+                                               DivFMasWaitStates);
 
   return DivFMasWaitStates - WaitStatesNeeded;
 }
@@ -485,7 +570,7 @@
   auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) {
     return GetRegHWReg == getHWReg(TII, *MI);
   };
-  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
+  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, GetRegWaitStates);
 
   return GetRegWaitStates - WaitStatesNeeded;
 }
@@ -499,7 +584,7 @@
   auto IsHazardFn = [TII, HWReg] (MachineInstr *MI) {
     return HWReg == getHWReg(TII, *MI);
   };
-  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
+  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, SetRegWaitStates);
   return SetRegWaitStates - WaitStatesNeeded;
 }
 
@@ -570,7 +655,7 @@
     TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg);
   };
   int WaitStatesNeededForDef =
-    VALUWaitStates - getWaitStatesSince(IsHazardFn);
+    VALUWaitStates - getWaitStatesSince(IsHazardFn, VALUWaitStates);
   WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
 
   return WaitStatesNeeded;
@@ -635,7 +720,8 @@
   };
 
   const int RWLaneWaitStates = 4;
-  int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn);
+  int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn,
+                                              RWLaneWaitStates);
   return RWLaneWaitStates - WaitStatesSince;
 }
 
@@ -650,7 +736,7 @@
   auto IsHazardFn = [TII] (MachineInstr *MI) {
     return getHWReg(TII, *MI) == AMDGPU::Hwreg::ID_TRAPSTS;
   };
-  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
+  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, RFEWaitStates);
   return RFEWaitStates - WaitStatesNeeded;
 }
 
@@ -674,7 +760,8 @@
       return MI->getOpcode() == AMDGPU::S_MOV_FED_B32;
     };
     int WaitStatesNeededForUse =
-        MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn);
+        MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn,
+                                                 MovFedWaitStates);
     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
   }
 
@@ -687,5 +774,6 @@
   auto IsHazardFn = [TII] (MachineInstr *MI) {
     return TII->isSALU(*MI);
   };
-  return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn);
+  return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn,
+                                                   SMovRelWaitStates);
 }
commit	f92ed6966eb898b1ab1587c5fb1bf059783977b0	[log] [tgz]
author	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>	Mon Jan 21 19:11:26 2019 +0000
committer	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>	Mon Jan 21 19:11:26 2019 +0000
tree	58583c47d14adf49943473acc1124cd52763f744
parent	8441c467bf9b02638afc9fb3affce8eb1207e65d [diff] [blame]