AMDGPU] gfx1010 hazard recognizer

Differential Revision: https://reviews.llvm.org/D61536

llvm-svn: 359961
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 9aea423..adae968 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -20,6 +20,7 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/MC/MCInstrDesc.h"
@@ -133,6 +134,12 @@
       && checkVMEMHazards(MI) > 0)
     return NoopHazard;
 
+  if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0)
+    return NoopHazard;
+
+  if (ST.hasNoDataDepHazard())
+    return NoHazard;
+
   if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
     return NoopHazard;
 
@@ -181,6 +188,12 @@
   IsHazardRecognizerMode = true;
   CurrCycleInstr = MI;
   unsigned W = PreEmitNoopsCommon(MI);
+
+  fixVMEMtoScalarWriteHazards(MI);
+  fixSMEMtoVectorWriteHazards(MI);
+  fixVcmpxExecWARHazard(MI);
+  fixLdsBranchVmemWARHazard(MI);
+
   CurrCycleInstr = nullptr;
   return W;
 }
@@ -191,12 +204,18 @@
   if (SIInstrInfo::isSMRD(*MI))
     return std::max(WaitStates, checkSMRDHazards(MI));
 
-  if (SIInstrInfo::isVALU(*MI))
-    WaitStates = std::max(WaitStates, checkVALUHazards(MI));
-
   if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
     WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
 
+  if (ST.hasNSAtoVMEMBug())
+    WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI));
+
+  if (ST.hasNoDataDepHazard())
+    return WaitStates;
+
+  if (SIInstrInfo::isVALU(*MI))
+    WaitStates = std::max(WaitStates, checkVALUHazards(MI));
+
   if (SIInstrInfo::isDPP(*MI))
     WaitStates = std::max(WaitStates, checkDPPHazards(MI));
 
@@ -775,3 +794,243 @@
   return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn,
                                                    SMovRelWaitStates);
 }
+
+bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
+  if (!ST.hasVMEMtoScalarWriteHazard())
+    return false;
+
+  if (!SIInstrInfo::isSALU(*MI) && !SIInstrInfo::isSMRD(*MI))
+    return false;
+
+  if (MI->getNumDefs() == 0)
+    return false;
+
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
+
+  auto IsHazardFn = [TRI, MI] (MachineInstr *I) {
+    if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isDS(*I) &&
+        !SIInstrInfo::isFLAT(*I))
+      return false;
+
+    for (const MachineOperand &Def : MI->defs()) {
+      MachineOperand *Op = I->findRegisterUseOperand(Def.getReg(), false, TRI);
+      if (!Op || (Op->isImplicit() && Op->getReg() == AMDGPU::EXEC))
+        continue;
+      return true;
+    }
+    return false;
+  };
+
+  auto IsExpiredFn = [] (MachineInstr *MI, int) {
+    return MI && (SIInstrInfo::isVALU(*MI) ||
+                  (MI->getOpcode() == AMDGPU::S_WAITCNT &&
+                   !MI->getOperand(0).getImm()));
+  };
+
+  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
+      std::numeric_limits<int>::max())
+    return false;
+
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_NOP_e32));
+  return true;
+}
+
+bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
+  if (!ST.hasSMEMtoVectorWriteHazard())
+    return false;
+
+  if (!SIInstrInfo::isVALU(*MI))
+    return false;
+
+  unsigned SDSTName;
+  switch (MI->getOpcode()) {
+  case AMDGPU::V_READLANE_B32:
+  case AMDGPU::V_READFIRSTLANE_B32:
+    SDSTName = AMDGPU::OpName::vdst;
+    break;
+  default:
+    SDSTName = AMDGPU::OpName::sdst;
+    break;
+  }
+
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
+  const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName);
+  if (!SDST) {
+    for (auto MO : MI->implicit_operands()) {
+      if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) {
+        SDST = &MO;
+        break;
+      }
+    }
+  }
+
+  if (!SDST)
+    return false;
+
+  const unsigned SDSTReg = SDST->getReg();
+  auto IsHazardFn = [SDSTReg, TRI] (MachineInstr *I) {
+    return SIInstrInfo::isSMRD(*I) && I->readsRegister(SDSTReg, TRI);
+  };
+
+  // This assumes that there will be s_waitcnt lgkmcnt(0) or equivalent
+  // between any at risk SMEM and any SALU dependent on the SMEM results.
+  auto IsExpiredFn = [TII] (MachineInstr *MI, int) {
+    if (MI) {
+      if (TII->isSALU(*MI)) {
+        if (TII->isSOPP(*MI))
+          return false;
+        switch (MI->getOpcode()) {
+        case AMDGPU::S_SETVSKIP:
+        case AMDGPU::S_VERSION:
+        case AMDGPU::S_WAITCNT_VSCNT:
+        case AMDGPU::S_WAITCNT_VMCNT:
+        case AMDGPU::S_WAITCNT_EXPCNT:
+        case AMDGPU::S_WAITCNT_LGKMCNT:
+          return false;
+        default:
+          return true;
+        }
+      }
+    }
+    return false;
+  };
+
+  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
+      std::numeric_limits<int>::max())
+    return false;
+
+  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+          TII->get(AMDGPU::S_MOV_B32), AMDGPU::SGPR_NULL)
+      .addImm(0);
+  return true;
+}
+
+bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) {
+  if (!ST.hasVcmpxExecWARHazard() || !SIInstrInfo::isVALU(*MI))
+    return false;
+
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
+  if (!MI->modifiesRegister(AMDGPU::EXEC, TRI))
+    return false;
+
+  auto IsHazardFn = [TRI] (MachineInstr *I) {
+    if (SIInstrInfo::isVALU(*I))
+      return false;
+    return I->readsRegister(AMDGPU::EXEC, TRI);
+  };
+
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  auto IsExpiredFn = [TII, TRI] (MachineInstr *MI, int) {
+    if (!MI)
+      return false;
+    if (SIInstrInfo::isVALU(*MI)) {
+      if (TII->getNamedOperand(*MI, AMDGPU::OpName::sdst))
+        return true;
+      for (auto MO : MI->implicit_operands())
+        if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg())))
+          return true;
+    }
+    if (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
+        (MI->getOperand(0).getImm() & 0xfffe) == 0xfffe)
+      return true;
+    return false;
+  };
+
+  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
+      std::numeric_limits<int>::max())
+    return false;
+
+  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+          TII->get(AMDGPU::S_WAITCNT_DEPCTR))
+    .addImm(0xfffe);
+  return true;
+}
+
+bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
+  if (!ST.hasLdsBranchVmemWARHazard())
+    return false;
+
+  auto IsHazardInst = [] (const MachineInstr *MI) {
+    if (SIInstrInfo::isDS(*MI))
+      return 1;
+    if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isSegmentSpecificFLAT(*MI))
+      return 2;
+    return 0;
+  };
+
+  auto InstType = IsHazardInst(MI);
+  if (!InstType)
+    return false;
+
+  auto IsExpiredFn = [&IsHazardInst] (MachineInstr *I, int) {
+    return I && (IsHazardInst(I) ||
+                 (I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
+                  I->getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
+                  !I->getOperand(1).getImm()));
+  };
+
+  auto IsHazardFn = [InstType, &IsHazardInst] (MachineInstr *I) {
+    if (!I->isBranch())
+      return false;
+
+    auto IsHazardFn = [InstType, IsHazardInst] (MachineInstr *I) {
+      auto InstType2 = IsHazardInst(I);
+      return InstType2 && InstType != InstType2;
+    };
+
+    auto IsExpiredFn = [InstType, &IsHazardInst] (MachineInstr *I, int) {
+      if (!I)
+        return false;
+
+      auto InstType2 = IsHazardInst(I);
+      if (InstType == InstType2)
+        return true;
+
+      return I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
+             I->getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
+             !I->getOperand(1).getImm();
+    };
+
+    return ::getWaitStatesSince(IsHazardFn, I, IsExpiredFn) !=
+           std::numeric_limits<int>::max();
+  };
+
+  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
+      std::numeric_limits<int>::max())
+    return false;
+
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+          TII->get(AMDGPU::S_WAITCNT_VSCNT))
+    .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
+    .addImm(0);
+
+  return true;
+}
+
+int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) {
+  int NSAtoVMEMWaitStates = 1;
+
+  if (!ST.hasNSAtoVMEMBug())
+    return 0;
+
+  if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isMTBUF(*MI))
+    return 0;
+
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  const auto *Offset = TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
+  if (!Offset || (Offset->getImm() & 6) == 0)
+    return 0;
+
+  auto IsHazardFn = [TII] (MachineInstr *I) {
+    if (!SIInstrInfo::isMIMG(*I))
+      return false;
+    const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(I->getOpcode());
+    return Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA &&
+           TII->getInstSizeInBytes(*I) >= 16;
+  };
+
+  return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1);
+}