AMDGPU/SI: Enable the post-ra scheduler
Summary:
This includes a hazard recognizer implementation to replace some of
the hazard handling we had during frame index elimination.
Reviewers: arsenm
Subscribers: qcolombet, arsenm, llvm-commits
Differential Revision: http://reviews.llvm.org/D18602
llvm-svn: 268143
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 13abe7f..342afff 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -384,6 +384,17 @@
}
void GCNPassConfig::addPreEmitPass() {
+
+ // The hazard recognizer that runs as part of the post-ra scheduler does not
+ // gaurantee to be able handle all hazards correctly. This is because
+ // if there are multiple scheduling regions in a basic block, the regions
+ // are scheduled bottom up, so when we begin to schedule a region we don't
+ // know what instructions were emitted directly before it.
+ //
+ // Here we add a stand-alone hazard recognizer pass which can handle all cases.
+ // hazard recognizer pass.
+ addPass(&PostRAHazardRecognizerID);
+
addPass(createSIInsertWaitsPass(), false);
addPass(createSIShrinkInstructionsPass());
addPass(createSILowerControlFlowPass(), false);
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index ef68e95..d09791f 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -47,6 +47,7 @@
AMDGPUInstrInfo.cpp
AMDGPUPromoteAlloca.cpp
AMDGPURegisterInfo.cpp
+ GCNHazardRecognizer.cpp
R600ClauseMergePass.cpp
R600ControlFlowFinalizer.cpp
R600EmitClauseMarkers.cpp
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
new file mode 100644
index 0000000..4c830bc
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -0,0 +1,182 @@
+//===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements hazard recognizers for scheduling on GCN processors.
+//
+//===----------------------------------------------------------------------===//
+
+#include "GCNHazardRecognizer.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Hazard Recoginizer Implementation
+//===----------------------------------------------------------------------===//
+
+GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
+ CurrCycleInstr(nullptr),
+ MF(MF) {
+ MaxLookAhead = 5;
+}
+
+void GCNHazardRecognizer::EmitInstruction(SUnit *SU) {
+ EmitInstruction(SU->getInstr());
+}
+
+void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) {
+ CurrCycleInstr = MI;
+}
+
+ScheduleHazardRecognizer::HazardType
+GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo*>(MF.getSubtarget().getInstrInfo());
+ MachineInstr *MI = SU->getInstr();
+
+ if (TII->isSMRD(*MI) && checkSMRDHazards(MI) > 0)
+ return NoopHazard;
+
+ if (TII->isVMEM(*MI) && checkVMEMHazards(MI) > 0)
+ return NoopHazard;
+
+ return NoHazard;
+}
+
+unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) {
+ return PreEmitNoops(SU->getInstr());
+}
+
+unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo*>(MF.getSubtarget().getInstrInfo());
+
+ if (TII->isSMRD(*MI))
+ return std::max(0, checkSMRDHazards(MI));
+
+ if (TII->isVMEM(*MI))
+ return std::max(0, checkVMEMHazards(MI));
+
+ return 0;
+}
+
+void GCNHazardRecognizer::EmitNoop() {
+ EmittedInstrs.push_front(nullptr);
+}
+
+void GCNHazardRecognizer::AdvanceCycle() {
+
+ // When the scheduler detects a stall, it will call AdvanceCycle() without
+ // emitting any instructions.
+ if (!CurrCycleInstr)
+ return;
+
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo*>(MF.getSubtarget().getInstrInfo());
+ unsigned NumWaitStates = TII->getNumWaitStates(*CurrCycleInstr);
+
+ // Keep track of emitted instructions
+ EmittedInstrs.push_front(CurrCycleInstr);
+
+ // Add a nullptr for each additional wait state after the first. Make sure
+ // not to add more than getMaxLookAhead() items to the list, since we
+ // truncate the list to that size right after this loop.
+ for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead());
+ i < e; ++i) {
+ EmittedInstrs.push_front(nullptr);
+ }
+
+ // getMaxLookahead() is the largest number of wait states we will ever need
+ // to insert, so there is no point in keeping track of more than that many
+ // wait states.
+ EmittedInstrs.resize(getMaxLookAhead());
+
+ CurrCycleInstr = nullptr;
+}
+
+void GCNHazardRecognizer::RecedeCycle() {
+ llvm_unreachable("hazard recognizer does not support bottom-up scheduling.");
+}
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+int GCNHazardRecognizer::getWaitStatesSinceDef(unsigned Reg,
+ std::function<bool(MachineInstr*)> IsHazardDef ) {
+ const TargetRegisterInfo *TRI =
+ MF.getSubtarget<AMDGPUSubtarget>().getRegisterInfo();
+
+ int WaitStates = -1;
+ for (MachineInstr *MI : EmittedInstrs) {
+ ++WaitStates;
+ if (!MI || !IsHazardDef(MI))
+ continue;
+ if (MI->modifiesRegister(Reg, TRI))
+ return WaitStates;
+ }
+ return std::numeric_limits<int>::max();
+}
+
+//===----------------------------------------------------------------------===//
+// No-op Hazard Detection
+//===----------------------------------------------------------------------===//
+
+int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
+ const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
+
+ // This SMRD hazard only affects SI.
+ if (ST.getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS)
+ return 0;
+
+ // A read of an SGPR by SMRD instruction requires 4 wait states when the
+ // SGPR was written by a VALU instruction.
+ int SmrdSgprWaitStates = 4;
+ int WaitStatesNeeded = 0;
+ auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
+
+ for (const MachineOperand &Use : SMRD->uses()) {
+ if (!Use.isReg())
+ continue;
+ int WaitStatesNeededForUse =
+ SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+ }
+ return WaitStatesNeeded;
+}
+
+int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
+ const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
+
+ if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ return 0;
+
+ const SIRegisterInfo &TRI = TII->getRegisterInfo();
+
+ // A read of an SGPR by a VMEM instruction requires 5 wait states when the
+ // SGPR was written by a VALU Instruction.
+ int VmemSgprWaitStates = 5;
+ int WaitStatesNeeded = 0;
+ auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
+
+ for (const MachineOperand &Use : VMEM->uses()) {
+ if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
+ continue;
+
+ int WaitStatesNeededForUse =
+ VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+ }
+ return WaitStatesNeeded;
+}
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
new file mode 100644
index 0000000..e75c350
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -0,0 +1,59 @@
+//===-- GCNHazardRecognizers.h - GCN Hazard Recognizers ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines hazard recognizers for scheduling on GCN processors.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPUHAZARDRECOGNIZERS_H
+#define LLVM_LIB_TARGET_AMDGPUHAZARDRECOGNIZERS_H
+
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include <functional>
+#include <list>
+
+namespace llvm {
+
+class MachineFunction;
+class MachineInstr;
+class ScheduleDAG;
+class SIInstrInfo;
+
+class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
+
+ // This variable stores the instruction that has been emitted this cycle.
+ // It will be added to EmittedInstrs, when AdvanceCycle() or RecedeCycle() is
+ // called.
+ MachineInstr *CurrCycleInstr;
+ std::list<MachineInstr*> EmittedInstrs;
+ const MachineFunction &MF;
+
+ int getWaitStatesSinceDef(unsigned Reg,
+ std::function<bool(MachineInstr*)> IsHazardDef =
+ [](MachineInstr*) {return true;});
+
+ int checkSMRDHazards(MachineInstr *SMRD);
+ int checkVMEMHazards(MachineInstr* VMEM);
+public:
+ GCNHazardRecognizer(const MachineFunction &MF);
+ // We can only issue one instruction per cycle.
+ bool atIssueLimit() const override { return true; }
+ void EmitInstruction(SUnit *SU) override;
+ void EmitInstruction(MachineInstr *MI) override;
+ HazardType getHazardType(SUnit *SU, int Stalls) override;
+ void EmitNoop() override;
+ unsigned PreEmitNoops(SUnit *SU) override;
+ unsigned PreEmitNoops(MachineInstr *) override;
+ void AdvanceCycle() override;
+ void RecedeCycle() override;
+};
+
+} // end namespace llvm
+
+#endif //LLVM_LIB_TARGET_AMDGPUHAZARDRECOGNIZERS_H
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 2ab4f78..eb17ffe 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -15,11 +15,13 @@
#include "SIInstrInfo.h"
#include "AMDGPUTargetMachine.h"
+#include "GCNHazardRecognizer.h"
#include "SIDefines.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/IR/Function.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -816,6 +818,20 @@
}
}
+void SIInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ insertWaitStates(MBB, MI, 1);
+}
+
+unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
+ default: return 1; // FIXME: Do wait states equal cycles?
+
+ case AMDGPU::S_NOP:
+ return MI.getOperand(0).getImm() + 1;
+ }
+}
+
bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MBB.findDebugLoc(MI);
@@ -1188,8 +1204,11 @@
if (getMemOpBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) &&
getMemOpBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) {
- assert(MIa->hasOneMemOperand() && MIb->hasOneMemOperand() &&
- "read2 / write2 not expected here yet");
+
+ if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) {
+ // FIXME: Handle ds_read2 / ds_write2.
+ return false;
+ }
unsigned Width0 = (*MIa->memoperands_begin())->getSize();
unsigned Width1 = (*MIb->memoperands_begin())->getSize();
if (BaseReg0 == BaseReg1 &&
@@ -2964,3 +2983,18 @@
{AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}};
return makeArrayRef(TargetIndices);
}
+
+/// This is used by the post-RA scheduler (SchedulePostRAList.cpp). The
+/// post-RA version of misched uses CreateTargetMIHazardRecognizer.
+ScheduleHazardRecognizer *
+SIInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const {
+ return new GCNHazardRecognizer(DAG->MF);
+}
+
+/// This is the hazard recognizer used at -O0 by the PostRAHazardRecognizer
+/// pass.
+ScheduleHazardRecognizer *
+SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const {
+ return new GCNHazardRecognizer(MF);
+}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index a5cd2e1..2121ae1 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -169,6 +169,14 @@
return get(Opcode).TSFlags & SIInstrFlags::VALU;
}
+ static bool isVMEM(const MachineInstr &MI) {
+ return isMUBUF(MI) || isMTBUF(MI) || isMIMG(MI);
+ }
+
+ bool isVMEM(uint16_t Opcode) const {
+ return isMUBUF(Opcode) || isMTBUF(Opcode) || isMIMG(Opcode);
+ }
+
static bool isSOP1(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::SOP1;
}
@@ -440,6 +448,12 @@
void insertWaitStates(MachineBasicBlock &MBB,MachineBasicBlock::iterator MI,
int Count) const;
+ void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const;
+
+ /// \brief Return the number of wait states that result from executing this
+ /// instruction.
+ unsigned getNumWaitStates(const MachineInstr &MI) const;
+
/// \brief Returns the operand named \p Op. If \p MI does not have an
/// operand named \c Op, this function returns nullptr.
LLVM_READONLY
@@ -472,6 +486,13 @@
ArrayRef<std::pair<int, const char *>>
getSerializableTargetIndices() const override;
+ ScheduleHazardRecognizer *
+ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const override;
+
+ ScheduleHazardRecognizer *
+ CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override;
+
};
namespace AMDGPU {
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 4bc89ea..d0ba8e6 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -596,22 +596,6 @@
}
}
- // TODO: only do this when it is needed
- switch (MF->getSubtarget<AMDGPUSubtarget>().getGeneration()) {
- case AMDGPUSubtarget::SOUTHERN_ISLANDS:
- // "VALU writes SGPR" -> "SMRD reads that SGPR" needs 4 wait states
- // ("S_NOP 3") on SI
- TII->insertWaitStates(*MBB, MI, 4);
- break;
- case AMDGPUSubtarget::SEA_ISLANDS:
- break;
- default: // VOLCANIC_ISLANDS and later
- // "VALU writes SGPR -> VMEM reads that SGPR" needs 5 wait states
- // ("S_NOP 4") on VI and later. This also applies to VALUs which write
- // VCC, but we're unlikely to see VMEM use VCC.
- TII->insertWaitStates(*MBB, MI, 5);
- }
-
MI->eraseFromParent();
break;
}
@@ -991,3 +975,14 @@
}
}
}
+
+bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
+ unsigned Reg) const {
+ const TargetRegisterClass *RC;
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ RC = MRI.getRegClass(Reg);
+ else
+ RC = getPhysRegClass(Reg);
+
+ return hasVGPRs(RC);
+}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index f641031..e43b2c1 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -188,6 +188,8 @@
unsigned getSGPR32PressureSet() const { return SGPR32SetID; };
unsigned getVGPR32PressureSet() const { return VGPR32SetID; };
+ bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const;
+
private:
void buildScratchLoadStore(MachineBasicBlock::iterator MI,
unsigned LoadStoreOp, unsigned Value,
diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td
index 4a46eb4..26f73c4 100644
--- a/llvm/lib/Target/AMDGPU/SISchedule.td
+++ b/llvm/lib/Target/AMDGPU/SISchedule.td
@@ -42,6 +42,7 @@
class SISchedMachineModel : SchedMachineModel {
let CompleteModel = 0;
let IssueWidth = 1;
+ let PostRAScheduler = 1;
}
def SIFullSpeedModel : SISchedMachineModel;