[llvm-mca] Move llvm-mca library to llvm/lib/MCA.
Summary: See PR38731.
Reviewers: andreadb
Subscribers: mgorny, javed.absar, tschuett, gbedwell, andreadb, RKSimon, llvm-commits
Differential Revision: https://reviews.llvm.org/D55557
llvm-svn: 349332
diff --git a/llvm/lib/MCA/HardwareUnits/HardwareUnit.cpp b/llvm/lib/MCA/HardwareUnits/HardwareUnit.cpp
new file mode 100644
index 0000000..edd32b9
--- /dev/null
+++ b/llvm/lib/MCA/HardwareUnits/HardwareUnit.cpp
@@ -0,0 +1,25 @@
+//===------------------------- HardwareUnit.cpp -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the anchor for the base class that describes
+/// simulated hardware units.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
+
+namespace llvm {
+namespace mca {
+
+// Pin the vtable with this method.
+HardwareUnit::~HardwareUnit() = default;
+
+} // namespace mca
+} // namespace llvm
diff --git a/llvm/lib/MCA/HardwareUnits/LSUnit.cpp b/llvm/lib/MCA/HardwareUnits/LSUnit.cpp
new file mode 100644
index 0000000..8895eb3
--- /dev/null
+++ b/llvm/lib/MCA/HardwareUnits/LSUnit.cpp
@@ -0,0 +1,190 @@
+//===----------------------- LSUnit.cpp --------------------------*- C++-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// A Load-Store Unit for the llvm-mca tool.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/HardwareUnits/LSUnit.h"
+#include "llvm/MCA/Instruction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace llvm {
+namespace mca {
+
+LSUnit::LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ,
+ bool AssumeNoAlias)
+ : LQ_Size(LQ), SQ_Size(SQ), NoAlias(AssumeNoAlias) {
+ if (SM.hasExtraProcessorInfo()) {
+ const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
+ if (!LQ_Size && EPI.LoadQueueID) {
+ const MCProcResourceDesc &LdQDesc = *SM.getProcResource(EPI.LoadQueueID);
+ LQ_Size = LdQDesc.BufferSize;
+ }
+
+ if (!SQ_Size && EPI.StoreQueueID) {
+ const MCProcResourceDesc &StQDesc = *SM.getProcResource(EPI.StoreQueueID);
+ SQ_Size = StQDesc.BufferSize;
+ }
+ }
+}
+
+#ifndef NDEBUG
+void LSUnit::dump() const {
+ dbgs() << "[LSUnit] LQ_Size = " << LQ_Size << '\n';
+ dbgs() << "[LSUnit] SQ_Size = " << SQ_Size << '\n';
+ dbgs() << "[LSUnit] NextLQSlotIdx = " << LoadQueue.size() << '\n';
+ dbgs() << "[LSUnit] NextSQSlotIdx = " << StoreQueue.size() << '\n';
+}
+#endif
+
+void LSUnit::assignLQSlot(unsigned Index) {
+ assert(!isLQFull());
+ assert(LoadQueue.count(Index) == 0);
+
+ LLVM_DEBUG(dbgs() << "[LSUnit] - AssignLQSlot <Idx=" << Index
+ << ",slot=" << LoadQueue.size() << ">\n");
+ LoadQueue.insert(Index);
+}
+
+void LSUnit::assignSQSlot(unsigned Index) {
+ assert(!isSQFull());
+ assert(StoreQueue.count(Index) == 0);
+
+ LLVM_DEBUG(dbgs() << "[LSUnit] - AssignSQSlot <Idx=" << Index
+ << ",slot=" << StoreQueue.size() << ">\n");
+ StoreQueue.insert(Index);
+}
+
+void LSUnit::dispatch(const InstRef &IR) {
+ const InstrDesc &Desc = IR.getInstruction()->getDesc();
+ unsigned IsMemBarrier = Desc.HasSideEffects;
+ assert((Desc.MayLoad || Desc.MayStore) && "Not a memory operation!");
+
+ const unsigned Index = IR.getSourceIndex();
+ if (Desc.MayLoad) {
+ if (IsMemBarrier)
+ LoadBarriers.insert(Index);
+ assignLQSlot(Index);
+ }
+
+ if (Desc.MayStore) {
+ if (IsMemBarrier)
+ StoreBarriers.insert(Index);
+ assignSQSlot(Index);
+ }
+}
+
+LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const {
+ const InstrDesc &Desc = IR.getInstruction()->getDesc();
+ if (Desc.MayLoad && isLQFull())
+ return LSUnit::LSU_LQUEUE_FULL;
+ if (Desc.MayStore && isSQFull())
+ return LSUnit::LSU_SQUEUE_FULL;
+ return LSUnit::LSU_AVAILABLE;
+}
+
+bool LSUnit::isReady(const InstRef &IR) const {
+ const InstrDesc &Desc = IR.getInstruction()->getDesc();
+ const unsigned Index = IR.getSourceIndex();
+ bool IsALoad = Desc.MayLoad;
+ bool IsAStore = Desc.MayStore;
+ assert((IsALoad || IsAStore) && "Not a memory operation!");
+ assert((!IsALoad || LoadQueue.count(Index) == 1) && "Load not in queue!");
+ assert((!IsAStore || StoreQueue.count(Index) == 1) && "Store not in queue!");
+
+ if (IsALoad && !LoadBarriers.empty()) {
+ unsigned LoadBarrierIndex = *LoadBarriers.begin();
+ // A younger load cannot pass a older load barrier.
+ if (Index > LoadBarrierIndex)
+ return false;
+ // A load barrier cannot pass a older load.
+ if (Index == LoadBarrierIndex && Index != *LoadQueue.begin())
+ return false;
+ }
+
+ if (IsAStore && !StoreBarriers.empty()) {
+ unsigned StoreBarrierIndex = *StoreBarriers.begin();
+ // A younger store cannot pass a older store barrier.
+ if (Index > StoreBarrierIndex)
+ return false;
+ // A store barrier cannot pass a older store.
+ if (Index == StoreBarrierIndex && Index != *StoreQueue.begin())
+ return false;
+ }
+
+ // A load may not pass a previous store unless flag 'NoAlias' is set.
+ // A load may pass a previous load.
+ if (NoAlias && IsALoad)
+ return true;
+
+ if (StoreQueue.size()) {
+ // A load may not pass a previous store.
+ // A store may not pass a previous store.
+ if (Index > *StoreQueue.begin())
+ return false;
+ }
+
+ // Okay, we are older than the oldest store in the queue.
+ // If there are no pending loads, then we can say for sure that this
+ // instruction is ready.
+ if (isLQEmpty())
+ return true;
+
+ // Check if there are no older loads.
+ if (Index <= *LoadQueue.begin())
+ return true;
+
+ // There is at least one younger load.
+ //
+ // A store may not pass a previous load.
+ // A load may pass a previous load.
+ return !IsAStore;
+}
+
+void LSUnit::onInstructionExecuted(const InstRef &IR) {
+ const InstrDesc &Desc = IR.getInstruction()->getDesc();
+ const unsigned Index = IR.getSourceIndex();
+ bool IsALoad = Desc.MayLoad;
+ bool IsAStore = Desc.MayStore;
+
+ if (IsALoad) {
+ if (LoadQueue.erase(Index)) {
+ LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
+ << " has been removed from the load queue.\n");
+ }
+ if (!LoadBarriers.empty() && Index == *LoadBarriers.begin()) {
+ LLVM_DEBUG(
+ dbgs() << "[LSUnit]: Instruction idx=" << Index
+ << " has been removed from the set of load barriers.\n");
+ LoadBarriers.erase(Index);
+ }
+ }
+
+ if (IsAStore) {
+ if (StoreQueue.erase(Index)) {
+ LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
+ << " has been removed from the store queue.\n");
+ }
+
+ if (!StoreBarriers.empty() && Index == *StoreBarriers.begin()) {
+ LLVM_DEBUG(
+ dbgs() << "[LSUnit]: Instruction idx=" << Index
+ << " has been removed from the set of store barriers.\n");
+ StoreBarriers.erase(Index);
+ }
+ }
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp b/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp
new file mode 100644
index 0000000..22977e5
--- /dev/null
+++ b/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp
@@ -0,0 +1,491 @@
+//===--------------------- RegisterFile.cpp ---------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a register mapping file class. This class is responsible
+/// for managing hardware register files and the tracking of data dependencies
+/// between registers.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/HardwareUnits/RegisterFile.h"
+#include "llvm/MCA/Instruction.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace llvm {
+namespace mca {
+
+RegisterFile::RegisterFile(const MCSchedModel &SM, const MCRegisterInfo &mri,
+ unsigned NumRegs)
+ : MRI(mri),
+ RegisterMappings(mri.getNumRegs(), {WriteRef(), RegisterRenamingInfo()}),
+ ZeroRegisters(mri.getNumRegs(), false) {
+ initialize(SM, NumRegs);
+}
+
+void RegisterFile::initialize(const MCSchedModel &SM, unsigned NumRegs) {
+ // Create a default register file that "sees" all the machine registers
+ // declared by the target. The number of physical registers in the default
+ // register file is set equal to `NumRegs`. A value of zero for `NumRegs`
+ // means: this register file has an unbounded number of physical registers.
+ RegisterFiles.emplace_back(NumRegs);
+ if (!SM.hasExtraProcessorInfo())
+ return;
+
+ // For each user defined register file, allocate a RegisterMappingTracker
+ // object. The size of every register file, as well as the mapping between
+ // register files and register classes is specified via tablegen.
+ const MCExtraProcessorInfo &Info = SM.getExtraProcessorInfo();
+
+ // Skip invalid register file at index 0.
+ for (unsigned I = 1, E = Info.NumRegisterFiles; I < E; ++I) {
+ const MCRegisterFileDesc &RF = Info.RegisterFiles[I];
+ assert(RF.NumPhysRegs && "Invalid PRF with zero physical registers!");
+
+ // The cost of a register definition is equivalent to the number of
+ // physical registers that are allocated at register renaming stage.
+ unsigned Length = RF.NumRegisterCostEntries;
+ const MCRegisterCostEntry *FirstElt =
+ &Info.RegisterCostTable[RF.RegisterCostEntryIdx];
+ addRegisterFile(RF, ArrayRef<MCRegisterCostEntry>(FirstElt, Length));
+ }
+}
+
+void RegisterFile::cycleStart() {
+ for (RegisterMappingTracker &RMT : RegisterFiles)
+ RMT.NumMoveEliminated = 0;
+}
+
+void RegisterFile::addRegisterFile(const MCRegisterFileDesc &RF,
+ ArrayRef<MCRegisterCostEntry> Entries) {
+ // A default register file is always allocated at index #0. That register file
+ // is mainly used to count the total number of mappings created by all
+ // register files at runtime. Users can limit the number of available physical
+ // registers in register file #0 through the command line flag
+ // `-register-file-size`.
+ unsigned RegisterFileIndex = RegisterFiles.size();
+ RegisterFiles.emplace_back(RF.NumPhysRegs, RF.MaxMovesEliminatedPerCycle,
+ RF.AllowZeroMoveEliminationOnly);
+
+ // Special case where there is no register class identifier in the set.
+ // An empty set of register classes means: this register file contains all
+ // the physical registers specified by the target.
+ // We optimistically assume that a register can be renamed at the cost of a
+ // single physical register. The constructor of RegisterFile ensures that
+ // a RegisterMapping exists for each logical register defined by the Target.
+ if (Entries.empty())
+ return;
+
+ // Now update the cost of individual registers.
+ for (const MCRegisterCostEntry &RCE : Entries) {
+ const MCRegisterClass &RC = MRI.getRegClass(RCE.RegisterClassID);
+ for (const MCPhysReg Reg : RC) {
+ RegisterRenamingInfo &Entry = RegisterMappings[Reg].second;
+ IndexPlusCostPairTy &IPC = Entry.IndexPlusCost;
+ if (IPC.first && IPC.first != RegisterFileIndex) {
+ // The only register file that is allowed to overlap is the default
+ // register file at index #0. The analysis is inaccurate if register
+ // files overlap.
+ errs() << "warning: register " << MRI.getName(Reg)
+ << " defined in multiple register files.";
+ }
+ IPC = std::make_pair(RegisterFileIndex, RCE.Cost);
+ Entry.RenameAs = Reg;
+ Entry.AllowMoveElimination = RCE.AllowMoveElimination;
+
+ // Assume the same cost for each sub-register.
+ for (MCSubRegIterator I(Reg, &MRI); I.isValid(); ++I) {
+ RegisterRenamingInfo &OtherEntry = RegisterMappings[*I].second;
+ if (!OtherEntry.IndexPlusCost.first &&
+ (!OtherEntry.RenameAs ||
+ MRI.isSuperRegister(*I, OtherEntry.RenameAs))) {
+ OtherEntry.IndexPlusCost = IPC;
+ OtherEntry.RenameAs = Reg;
+ }
+ }
+ }
+ }
+}
+
+void RegisterFile::allocatePhysRegs(const RegisterRenamingInfo &Entry,
+ MutableArrayRef<unsigned> UsedPhysRegs) {
+ unsigned RegisterFileIndex = Entry.IndexPlusCost.first;
+ unsigned Cost = Entry.IndexPlusCost.second;
+ if (RegisterFileIndex) {
+ RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex];
+ RMT.NumUsedPhysRegs += Cost;
+ UsedPhysRegs[RegisterFileIndex] += Cost;
+ }
+
+ // Now update the default register mapping tracker.
+ RegisterFiles[0].NumUsedPhysRegs += Cost;
+ UsedPhysRegs[0] += Cost;
+}
+
+void RegisterFile::freePhysRegs(const RegisterRenamingInfo &Entry,
+ MutableArrayRef<unsigned> FreedPhysRegs) {
+ unsigned RegisterFileIndex = Entry.IndexPlusCost.first;
+ unsigned Cost = Entry.IndexPlusCost.second;
+ if (RegisterFileIndex) {
+ RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex];
+ RMT.NumUsedPhysRegs -= Cost;
+ FreedPhysRegs[RegisterFileIndex] += Cost;
+ }
+
+ // Now update the default register mapping tracker.
+ RegisterFiles[0].NumUsedPhysRegs -= Cost;
+ FreedPhysRegs[0] += Cost;
+}
+
+void RegisterFile::addRegisterWrite(WriteRef Write,
+ MutableArrayRef<unsigned> UsedPhysRegs) {
+ WriteState &WS = *Write.getWriteState();
+ unsigned RegID = WS.getRegisterID();
+ assert(RegID && "Adding an invalid register definition?");
+
+ LLVM_DEBUG({
+ dbgs() << "RegisterFile: addRegisterWrite [ " << Write.getSourceIndex()
+ << ", " << MRI.getName(RegID) << "]\n";
+ });
+
+ // If RenameAs is equal to RegID, then RegID is subject to register renaming
+ // and false dependencies on RegID are all eliminated.
+
+ // If RenameAs references the invalid register, then we optimistically assume
+ // that it can be renamed. In the absence of tablegen descriptors for register
+ // files, RenameAs is always set to the invalid register ID. In all other
+ // cases, RenameAs must be either equal to RegID, or it must reference a
+ // super-register of RegID.
+
+ // If RenameAs is a super-register of RegID, then a write to RegID has always
+ // a false dependency on RenameAs. The only exception is for when the write
+ // implicitly clears the upper portion of the underlying register.
+ // If a write clears its super-registers, then it is renamed as `RenameAs`.
+ bool IsWriteZero = WS.isWriteZero();
+ bool IsEliminated = WS.isEliminated();
+ bool ShouldAllocatePhysRegs = !IsWriteZero && !IsEliminated;
+ const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
+ WS.setPRF(RRI.IndexPlusCost.first);
+
+ if (RRI.RenameAs && RRI.RenameAs != RegID) {
+ RegID = RRI.RenameAs;
+ WriteRef &OtherWrite = RegisterMappings[RegID].first;
+
+ if (!WS.clearsSuperRegisters()) {
+ // The processor keeps the definition of `RegID` together with register
+ // `RenameAs`. Since this partial write is not renamed, no physical
+ // register is allocated.
+ ShouldAllocatePhysRegs = false;
+
+ WriteState *OtherWS = OtherWrite.getWriteState();
+ if (OtherWS && (OtherWrite.getSourceIndex() != Write.getSourceIndex())) {
+ // This partial write has a false dependency on RenameAs.
+ assert(!IsEliminated && "Unexpected partial update!");
+ OtherWS->addUser(&WS);
+ }
+ }
+ }
+
+ // Update zero registers.
+ unsigned ZeroRegisterID =
+ WS.clearsSuperRegisters() ? RegID : WS.getRegisterID();
+ if (IsWriteZero) {
+ ZeroRegisters.setBit(ZeroRegisterID);
+ for (MCSubRegIterator I(ZeroRegisterID, &MRI); I.isValid(); ++I)
+ ZeroRegisters.setBit(*I);
+ } else {
+ ZeroRegisters.clearBit(ZeroRegisterID);
+ for (MCSubRegIterator I(ZeroRegisterID, &MRI); I.isValid(); ++I)
+ ZeroRegisters.clearBit(*I);
+ }
+
+ // If this is move has been eliminated, then the call to tryEliminateMove
+ // should have already updated all the register mappings.
+ if (!IsEliminated) {
+ // Update the mapping for register RegID including its sub-registers.
+ RegisterMappings[RegID].first = Write;
+ RegisterMappings[RegID].second.AliasRegID = 0U;
+ for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) {
+ RegisterMappings[*I].first = Write;
+ RegisterMappings[*I].second.AliasRegID = 0U;
+ }
+
+ // No physical registers are allocated for instructions that are optimized
+ // in hardware. For example, zero-latency data-dependency breaking
+ // instructions don't consume physical registers.
+ if (ShouldAllocatePhysRegs)
+ allocatePhysRegs(RegisterMappings[RegID].second, UsedPhysRegs);
+ }
+
+ if (!WS.clearsSuperRegisters())
+ return;
+
+ for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) {
+ if (!IsEliminated) {
+ RegisterMappings[*I].first = Write;
+ RegisterMappings[*I].second.AliasRegID = 0U;
+ }
+
+ if (IsWriteZero)
+ ZeroRegisters.setBit(*I);
+ else
+ ZeroRegisters.clearBit(*I);
+ }
+}
+
+void RegisterFile::removeRegisterWrite(
+ const WriteState &WS, MutableArrayRef<unsigned> FreedPhysRegs) {
+ // Early exit if this write was eliminated. A write eliminated at register
+ // renaming stage generates an alias, and it is not added to the PRF.
+ if (WS.isEliminated())
+ return;
+
+ unsigned RegID = WS.getRegisterID();
+
+ assert(RegID != 0 && "Invalidating an already invalid register?");
+ assert(WS.getCyclesLeft() != UNKNOWN_CYCLES &&
+ "Invalidating a write of unknown cycles!");
+ assert(WS.getCyclesLeft() <= 0 && "Invalid cycles left for this write!");
+
+ bool ShouldFreePhysRegs = !WS.isWriteZero();
+ unsigned RenameAs = RegisterMappings[RegID].second.RenameAs;
+ if (RenameAs && RenameAs != RegID) {
+ RegID = RenameAs;
+
+ if (!WS.clearsSuperRegisters()) {
+ // Keep the definition of `RegID` together with register `RenameAs`.
+ ShouldFreePhysRegs = false;
+ }
+ }
+
+ if (ShouldFreePhysRegs)
+ freePhysRegs(RegisterMappings[RegID].second, FreedPhysRegs);
+
+ WriteRef &WR = RegisterMappings[RegID].first;
+ if (WR.getWriteState() == &WS)
+ WR.invalidate();
+
+ for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) {
+ WriteRef &OtherWR = RegisterMappings[*I].first;
+ if (OtherWR.getWriteState() == &WS)
+ OtherWR.invalidate();
+ }
+
+ if (!WS.clearsSuperRegisters())
+ return;
+
+ for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) {
+ WriteRef &OtherWR = RegisterMappings[*I].first;
+ if (OtherWR.getWriteState() == &WS)
+ OtherWR.invalidate();
+ }
+}
+
+bool RegisterFile::tryEliminateMove(WriteState &WS, ReadState &RS) {
+ const RegisterMapping &RMFrom = RegisterMappings[RS.getRegisterID()];
+ const RegisterMapping &RMTo = RegisterMappings[WS.getRegisterID()];
+
+ // From and To must be owned by the same PRF.
+ const RegisterRenamingInfo &RRIFrom = RMFrom.second;
+ const RegisterRenamingInfo &RRITo = RMTo.second;
+ unsigned RegisterFileIndex = RRIFrom.IndexPlusCost.first;
+ if (RegisterFileIndex != RRITo.IndexPlusCost.first)
+ return false;
+
+ // We only allow move elimination for writes that update a full physical
+ // register. On X86, move elimination is possible with 32-bit general purpose
+ // registers because writes to those registers are not partial writes. If a
+ // register move is a partial write, then we conservatively assume that move
+ // elimination fails, since it would either trigger a partial update, or the
+ // issue of a merge opcode.
+ //
+ // Note that this constraint may be lifted in future. For example, we could
+ // make this model more flexible, and let users customize the set of registers
+ // (i.e. register classes) that allow move elimination.
+ //
+ // For now, we assume that there is a strong correlation between registers
+ // that allow move elimination, and how those same registers are renamed in
+ // hardware.
+ if (RRITo.RenameAs && RRITo.RenameAs != WS.getRegisterID()) {
+ // Early exit if the PRF doesn't support move elimination for this register.
+ if (!RegisterMappings[RRITo.RenameAs].second.AllowMoveElimination)
+ return false;
+ if (!WS.clearsSuperRegisters())
+ return false;
+ }
+
+ RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex];
+ if (RMT.MaxMoveEliminatedPerCycle &&
+ RMT.NumMoveEliminated == RMT.MaxMoveEliminatedPerCycle)
+ return false;
+
+ bool IsZeroMove = ZeroRegisters[RS.getRegisterID()];
+ if (RMT.AllowZeroMoveEliminationOnly && !IsZeroMove)
+ return false;
+
+ MCPhysReg FromReg = RS.getRegisterID();
+ MCPhysReg ToReg = WS.getRegisterID();
+
+ // Construct an alias.
+ MCPhysReg AliasReg = FromReg;
+ if (RRIFrom.RenameAs)
+ AliasReg = RRIFrom.RenameAs;
+
+ const RegisterRenamingInfo &RMAlias = RegisterMappings[AliasReg].second;
+ if (RMAlias.AliasRegID)
+ AliasReg = RMAlias.AliasRegID;
+
+ if (AliasReg != ToReg) {
+ RegisterMappings[ToReg].second.AliasRegID = AliasReg;
+ for (MCSubRegIterator I(ToReg, &MRI); I.isValid(); ++I)
+ RegisterMappings[*I].second.AliasRegID = AliasReg;
+ }
+
+ RMT.NumMoveEliminated++;
+ if (IsZeroMove) {
+ WS.setWriteZero();
+ RS.setReadZero();
+ }
+ WS.setEliminated();
+
+ return true;
+}
+
+void RegisterFile::collectWrites(const ReadState &RS,
+ SmallVectorImpl<WriteRef> &Writes) const {
+ unsigned RegID = RS.getRegisterID();
+ assert(RegID && RegID < RegisterMappings.size());
+ LLVM_DEBUG(dbgs() << "RegisterFile: collecting writes for register "
+ << MRI.getName(RegID) << '\n');
+
+ // Check if this is an alias.
+ const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
+ if (RRI.AliasRegID)
+ RegID = RRI.AliasRegID;
+
+ const WriteRef &WR = RegisterMappings[RegID].first;
+ if (WR.isValid())
+ Writes.push_back(WR);
+
+ // Handle potential partial register updates.
+ for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) {
+ const WriteRef &WR = RegisterMappings[*I].first;
+ if (WR.isValid())
+ Writes.push_back(WR);
+ }
+
+ // Remove duplicate entries and resize the input vector.
+ if (Writes.size() > 1) {
+ sort(Writes, [](const WriteRef &Lhs, const WriteRef &Rhs) {
+ return Lhs.getWriteState() < Rhs.getWriteState();
+ });
+ auto It = std::unique(Writes.begin(), Writes.end());
+ Writes.resize(std::distance(Writes.begin(), It));
+ }
+
+ LLVM_DEBUG({
+ for (const WriteRef &WR : Writes) {
+ const WriteState &WS = *WR.getWriteState();
+ dbgs() << "[PRF] Found a dependent use of Register "
+ << MRI.getName(WS.getRegisterID()) << " (defined by instruction #"
+ << WR.getSourceIndex() << ")\n";
+ }
+ });
+}
+
+void RegisterFile::addRegisterRead(ReadState &RS,
+ SmallVectorImpl<WriteRef> &Defs) const {
+ unsigned RegID = RS.getRegisterID();
+ const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
+ RS.setPRF(RRI.IndexPlusCost.first);
+ if (RS.isIndependentFromDef())
+ return;
+
+ if (ZeroRegisters[RS.getRegisterID()])
+ RS.setReadZero();
+ collectWrites(RS, Defs);
+ RS.setDependentWrites(Defs.size());
+}
+
+unsigned RegisterFile::isAvailable(ArrayRef<unsigned> Regs) const {
+ SmallVector<unsigned, 4> NumPhysRegs(getNumRegisterFiles());
+
+ // Find how many new mappings must be created for each register file.
+ for (const unsigned RegID : Regs) {
+ const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
+ const IndexPlusCostPairTy &Entry = RRI.IndexPlusCost;
+ if (Entry.first)
+ NumPhysRegs[Entry.first] += Entry.second;
+ NumPhysRegs[0] += Entry.second;
+ }
+
+ unsigned Response = 0;
+ for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) {
+ unsigned NumRegs = NumPhysRegs[I];
+ if (!NumRegs)
+ continue;
+
+ const RegisterMappingTracker &RMT = RegisterFiles[I];
+ if (!RMT.NumPhysRegs) {
+ // The register file has an unbounded number of microarchitectural
+ // registers.
+ continue;
+ }
+
+ if (RMT.NumPhysRegs < NumRegs) {
+ // The current register file is too small. This may occur if the number of
+ // microarchitectural registers in register file #0 was changed by the
+ // users via flag -reg-file-size. Alternatively, the scheduling model
+ // specified a too small number of registers for this register file.
+ LLVM_DEBUG(dbgs() << "Not enough registers in the register file.\n");
+
+ // FIXME: Normalize the instruction register count to match the
+ // NumPhysRegs value. This is a highly unusual case, and is not expected
+ // to occur. This normalization is hiding an inconsistency in either the
+ // scheduling model or in the value that the user might have specified
+ // for NumPhysRegs.
+ NumRegs = RMT.NumPhysRegs;
+ }
+
+ if (RMT.NumPhysRegs < (RMT.NumUsedPhysRegs + NumRegs))
+ Response |= (1U << I);
+ }
+
+ return Response;
+}
+
+#ifndef NDEBUG
+void RegisterFile::dump() const {
+ for (unsigned I = 0, E = MRI.getNumRegs(); I < E; ++I) {
+ const RegisterMapping &RM = RegisterMappings[I];
+ const RegisterRenamingInfo &RRI = RM.second;
+ if (ZeroRegisters[I]) {
+ dbgs() << MRI.getName(I) << ", " << I
+ << ", PRF=" << RRI.IndexPlusCost.first
+ << ", Cost=" << RRI.IndexPlusCost.second
+ << ", RenameAs=" << RRI.RenameAs << ", IsZero=" << ZeroRegisters[I]
+ << ",";
+ RM.first.dump();
+ dbgs() << '\n';
+ }
+ }
+
+ for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) {
+ dbgs() << "Register File #" << I;
+ const RegisterMappingTracker &RMT = RegisterFiles[I];
+ dbgs() << "\n TotalMappings: " << RMT.NumPhysRegs
+ << "\n NumUsedMappings: " << RMT.NumUsedPhysRegs << '\n';
+ }
+}
+#endif
+
+} // namespace mca
+} // namespace llvm
diff --git a/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp b/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
new file mode 100644
index 0000000..b62fccd
--- /dev/null
+++ b/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
@@ -0,0 +1,326 @@
+//===--------------------- ResourceManager.cpp ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// The classes here represent processor resource units and their management
+/// strategy. These classes are managed by the Scheduler.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/HardwareUnits/ResourceManager.h"
+#include "llvm/MCA/Support.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace mca {
+
+#define DEBUG_TYPE "llvm-mca"
+ResourceStrategy::~ResourceStrategy() = default;
+
+uint64_t DefaultResourceStrategy::select(uint64_t ReadyMask) {
+ // This method assumes that ReadyMask cannot be zero.
+ uint64_t CandidateMask = ReadyMask & NextInSequenceMask;
+ if (CandidateMask) {
+ CandidateMask = PowerOf2Floor(CandidateMask);
+ NextInSequenceMask &= (CandidateMask | (CandidateMask - 1));
+ return CandidateMask;
+ }
+
+ NextInSequenceMask = ResourceUnitMask ^ RemovedFromNextInSequence;
+ RemovedFromNextInSequence = 0;
+ CandidateMask = ReadyMask & NextInSequenceMask;
+
+ if (CandidateMask) {
+ CandidateMask = PowerOf2Floor(CandidateMask);
+ NextInSequenceMask &= (CandidateMask | (CandidateMask - 1));
+ return CandidateMask;
+ }
+
+ NextInSequenceMask = ResourceUnitMask;
+ CandidateMask = PowerOf2Floor(ReadyMask & NextInSequenceMask);
+ NextInSequenceMask &= (CandidateMask | (CandidateMask - 1));
+ return CandidateMask;
+}
+
+void DefaultResourceStrategy::used(uint64_t Mask) {
+ if (Mask > NextInSequenceMask) {
+ RemovedFromNextInSequence |= Mask;
+ return;
+ }
+
+ NextInSequenceMask &= (~Mask);
+ if (NextInSequenceMask)
+ return;
+
+ NextInSequenceMask = ResourceUnitMask ^ RemovedFromNextInSequence;
+ RemovedFromNextInSequence = 0;
+}
+
+ResourceState::ResourceState(const MCProcResourceDesc &Desc, unsigned Index,
+ uint64_t Mask)
+ : ProcResourceDescIndex(Index), ResourceMask(Mask),
+ BufferSize(Desc.BufferSize), IsAGroup(countPopulation(ResourceMask)>1) {
+ if (IsAGroup)
+ ResourceSizeMask = ResourceMask ^ PowerOf2Floor(ResourceMask);
+ else
+ ResourceSizeMask = (1ULL << Desc.NumUnits) - 1;
+ ReadyMask = ResourceSizeMask;
+ AvailableSlots = BufferSize == -1 ? 0U : static_cast<unsigned>(BufferSize);
+ Unavailable = false;
+}
+
+bool ResourceState::isReady(unsigned NumUnits) const {
+ return (!isReserved() || isADispatchHazard()) &&
+ countPopulation(ReadyMask) >= NumUnits;
+}
+
+ResourceStateEvent ResourceState::isBufferAvailable() const {
+ if (isADispatchHazard() && isReserved())
+ return RS_RESERVED;
+ if (!isBuffered() || AvailableSlots)
+ return RS_BUFFER_AVAILABLE;
+ return RS_BUFFER_UNAVAILABLE;
+}
+
+#ifndef NDEBUG
+void ResourceState::dump() const {
+ dbgs() << "MASK: " << ResourceMask << ", SIZE_MASK: " << ResourceSizeMask
+ << ", RDYMASK: " << ReadyMask << ", BufferSize=" << BufferSize
+ << ", AvailableSlots=" << AvailableSlots
+ << ", Reserved=" << Unavailable << '\n';
+}
+#endif
+
+static unsigned getResourceStateIndex(uint64_t Mask) {
+ return std::numeric_limits<uint64_t>::digits - countLeadingZeros(Mask);
+}
+
+static std::unique_ptr<ResourceStrategy>
+getStrategyFor(const ResourceState &RS) {
+ if (RS.isAResourceGroup() || RS.getNumUnits() > 1)
+ return llvm::make_unique<DefaultResourceStrategy>(RS.getReadyMask());
+ return std::unique_ptr<ResourceStrategy>(nullptr);
+}
+
+ResourceManager::ResourceManager(const MCSchedModel &SM) {
+ computeProcResourceMasks(SM, ProcResID2Mask);
+ Resources.resize(SM.getNumProcResourceKinds());
+ Strategies.resize(SM.getNumProcResourceKinds());
+
+ for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ uint64_t Mask = ProcResID2Mask[I];
+ unsigned Index = getResourceStateIndex(Mask);
+ Resources[Index] =
+ llvm::make_unique<ResourceState>(*SM.getProcResource(I), I, Mask);
+ Strategies[Index] = getStrategyFor(*Resources[Index]);
+ }
+}
+
+void ResourceManager::setCustomStrategyImpl(std::unique_ptr<ResourceStrategy> S,
+ uint64_t ResourceMask) {
+ unsigned Index = getResourceStateIndex(ResourceMask);
+ assert(Index < Resources.size() && "Invalid processor resource index!");
+ assert(S && "Unexpected null strategy in input!");
+ Strategies[Index] = std::move(S);
+}
+
+unsigned ResourceManager::resolveResourceMask(uint64_t Mask) const {
+ return Resources[getResourceStateIndex(Mask)]->getProcResourceID();
+}
+
+unsigned ResourceManager::getNumUnits(uint64_t ResourceID) const {
+ return Resources[getResourceStateIndex(ResourceID)]->getNumUnits();
+}
+
+// Returns the actual resource consumed by this Use.
+// First, is the primary resource ID.
+// Second, is the specific sub-resource ID.
+ResourceRef ResourceManager::selectPipe(uint64_t ResourceID) {
+ unsigned Index = getResourceStateIndex(ResourceID);
+ ResourceState &RS = *Resources[Index];
+ assert(RS.isReady() && "No available units to select!");
+
+ // Special case where RS is not a group, and it only declares a single
+ // resource unit.
+ if (!RS.isAResourceGroup() && RS.getNumUnits() == 1)
+ return std::make_pair(ResourceID, RS.getReadyMask());
+
+ uint64_t SubResourceID = Strategies[Index]->select(RS.getReadyMask());
+ if (RS.isAResourceGroup())
+ return selectPipe(SubResourceID);
+ return std::make_pair(ResourceID, SubResourceID);
+}
+
+void ResourceManager::use(const ResourceRef &RR) {
+ // Mark the sub-resource referenced by RR as used.
+ unsigned RSID = getResourceStateIndex(RR.first);
+ ResourceState &RS = *Resources[RSID];
+ RS.markSubResourceAsUsed(RR.second);
+ // Remember to update the resource strategy for non-group resources with
+ // multiple units.
+ if (RS.getNumUnits() > 1)
+ Strategies[RSID]->used(RR.second);
+
+ // If there are still available units in RR.first,
+ // then we are done.
+ if (RS.isReady())
+ return;
+
+ // Notify to other resources that RR.first is no longer available.
+ for (std::unique_ptr<ResourceState> &Res : Resources) {
+ ResourceState &Current = *Res;
+ if (!Current.isAResourceGroup() || Current.getResourceMask() == RR.first)
+ continue;
+
+ if (Current.containsResource(RR.first)) {
+ unsigned Index = getResourceStateIndex(Current.getResourceMask());
+ Current.markSubResourceAsUsed(RR.first);
+ Strategies[Index]->used(RR.first);
+ }
+ }
+}
+
+void ResourceManager::release(const ResourceRef &RR) {
+ ResourceState &RS = *Resources[getResourceStateIndex(RR.first)];
+ bool WasFullyUsed = !RS.isReady();
+ RS.releaseSubResource(RR.second);
+ if (!WasFullyUsed)
+ return;
+
+ for (std::unique_ptr<ResourceState> &Res : Resources) {
+ ResourceState &Current = *Res;
+ if (!Current.isAResourceGroup() || Current.getResourceMask() == RR.first)
+ continue;
+
+ if (Current.containsResource(RR.first))
+ Current.releaseSubResource(RR.first);
+ }
+}
+
+ResourceStateEvent
+ResourceManager::canBeDispatched(ArrayRef<uint64_t> Buffers) const {
+ ResourceStateEvent Result = ResourceStateEvent::RS_BUFFER_AVAILABLE;
+ for (uint64_t Buffer : Buffers) {
+ ResourceState &RS = *Resources[getResourceStateIndex(Buffer)];
+ Result = RS.isBufferAvailable();
+ if (Result != ResourceStateEvent::RS_BUFFER_AVAILABLE)
+ break;
+ }
+ return Result;
+}
+
+void ResourceManager::reserveBuffers(ArrayRef<uint64_t> Buffers) {
+ for (const uint64_t Buffer : Buffers) {
+ ResourceState &RS = *Resources[getResourceStateIndex(Buffer)];
+ assert(RS.isBufferAvailable() == ResourceStateEvent::RS_BUFFER_AVAILABLE);
+ RS.reserveBuffer();
+
+ if (RS.isADispatchHazard()) {
+ assert(!RS.isReserved());
+ RS.setReserved();
+ }
+ }
+}
+
+void ResourceManager::releaseBuffers(ArrayRef<uint64_t> Buffers) {
+ for (const uint64_t R : Buffers)
+ Resources[getResourceStateIndex(R)]->releaseBuffer();
+}
+
+bool ResourceManager::canBeIssued(const InstrDesc &Desc) const {
+ return all_of(
+ Desc.Resources, [&](const std::pair<uint64_t, const ResourceUsage> &E) {
+ unsigned NumUnits = E.second.isReserved() ? 0U : E.second.NumUnits;
+ unsigned Index = getResourceStateIndex(E.first);
+ return Resources[Index]->isReady(NumUnits);
+ });
+}
+
+// Returns true if all resources are in-order, and there is at least one
+// resource which is a dispatch hazard (BufferSize = 0).
+bool ResourceManager::mustIssueImmediately(const InstrDesc &Desc) const {
+ if (!canBeIssued(Desc))
+ return false;
+ bool AllInOrderResources = all_of(Desc.Buffers, [&](uint64_t BufferMask) {
+ unsigned Index = getResourceStateIndex(BufferMask);
+ const ResourceState &Resource = *Resources[Index];
+ return Resource.isInOrder() || Resource.isADispatchHazard();
+ });
+ if (!AllInOrderResources)
+ return false;
+
+ return any_of(Desc.Buffers, [&](uint64_t BufferMask) {
+ return Resources[getResourceStateIndex(BufferMask)]->isADispatchHazard();
+ });
+}
+
+void ResourceManager::issueInstruction(
+ const InstrDesc &Desc,
+ SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Pipes) {
+ for (const std::pair<uint64_t, ResourceUsage> &R : Desc.Resources) {
+ const CycleSegment &CS = R.second.CS;
+ if (!CS.size()) {
+ releaseResource(R.first);
+ continue;
+ }
+
+ assert(CS.begin() == 0 && "Invalid {Start, End} cycles!");
+ if (!R.second.isReserved()) {
+ ResourceRef Pipe = selectPipe(R.first);
+ use(Pipe);
+ BusyResources[Pipe] += CS.size();
+ // Replace the resource mask with a valid processor resource index.
+ const ResourceState &RS = *Resources[getResourceStateIndex(Pipe.first)];
+ Pipe.first = RS.getProcResourceID();
+ Pipes.emplace_back(std::pair<ResourceRef, ResourceCycles>(
+ Pipe, ResourceCycles(CS.size())));
+ } else {
+ assert((countPopulation(R.first) > 1) && "Expected a group!");
+ // Mark this group as reserved.
+ assert(R.second.isReserved());
+ reserveResource(R.first);
+ BusyResources[ResourceRef(R.first, R.first)] += CS.size();
+ }
+ }
+}
+
+void ResourceManager::cycleEvent(SmallVectorImpl<ResourceRef> &ResourcesFreed) {
+ for (std::pair<ResourceRef, unsigned> &BR : BusyResources) {
+ if (BR.second)
+ BR.second--;
+ if (!BR.second) {
+ // Release this resource.
+ const ResourceRef &RR = BR.first;
+
+ if (countPopulation(RR.first) == 1)
+ release(RR);
+
+ releaseResource(RR.first);
+ ResourcesFreed.push_back(RR);
+ }
+ }
+
+ for (const ResourceRef &RF : ResourcesFreed)
+ BusyResources.erase(RF);
+}
+
+void ResourceManager::reserveResource(uint64_t ResourceID) {
+ ResourceState &Resource = *Resources[getResourceStateIndex(ResourceID)];
+ assert(!Resource.isReserved());
+ Resource.setReserved();
+}
+
+void ResourceManager::releaseResource(uint64_t ResourceID) {
+ ResourceState &Resource = *Resources[getResourceStateIndex(ResourceID)];
+ Resource.clearReserved();
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp b/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp
new file mode 100644
index 0000000..de9f245
--- /dev/null
+++ b/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp
@@ -0,0 +1,88 @@
+//===---------------------- RetireControlUnit.cpp ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file simulates the hardware responsible for retiring instructions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/HardwareUnits/RetireControlUnit.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace llvm {
+namespace mca {
+
+RetireControlUnit::RetireControlUnit(const MCSchedModel &SM)
+ : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0),
+ AvailableSlots(SM.MicroOpBufferSize), MaxRetirePerCycle(0) {
+ // Check if the scheduling model provides extra information about the machine
+ // processor. If so, then use that information to set the reorder buffer size
+ // and the maximum number of instructions retired per cycle.
+ if (SM.hasExtraProcessorInfo()) {
+ const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
+ if (EPI.ReorderBufferSize)
+ AvailableSlots = EPI.ReorderBufferSize;
+ MaxRetirePerCycle = EPI.MaxRetirePerCycle;
+ }
+
+ assert(AvailableSlots && "Invalid reorder buffer size!");
+ Queue.resize(AvailableSlots);
+}
+
+// Reserves a number of slots, and returns a new token.
+unsigned RetireControlUnit::reserveSlot(const InstRef &IR,
+ unsigned NumMicroOps) {
+ assert(isAvailable(NumMicroOps) && "Reorder Buffer unavailable!");
+ unsigned NormalizedQuantity =
+ std::min(NumMicroOps, static_cast<unsigned>(Queue.size()));
+ // Zero latency instructions may have zero uOps. Artificially bump this
+ // value to 1. Although zero latency instructions don't consume scheduler
+ // resources, they still consume one slot in the retire queue.
+ NormalizedQuantity = std::max(NormalizedQuantity, 1U);
+ unsigned TokenID = NextAvailableSlotIdx;
+ Queue[NextAvailableSlotIdx] = {IR, NormalizedQuantity, false};
+ NextAvailableSlotIdx += NormalizedQuantity;
+ NextAvailableSlotIdx %= Queue.size();
+ AvailableSlots -= NormalizedQuantity;
+ return TokenID;
+}
+
+const RetireControlUnit::RUToken &RetireControlUnit::peekCurrentToken() const {
+ return Queue[CurrentInstructionSlotIdx];
+}
+
+void RetireControlUnit::consumeCurrentToken() {
+ RetireControlUnit::RUToken &Current = Queue[CurrentInstructionSlotIdx];
+ assert(Current.NumSlots && "Reserved zero slots?");
+ assert(Current.IR && "Invalid RUToken in the RCU queue.");
+ Current.IR.getInstruction()->retire();
+
+ // Update the slot index to be the next item in the circular queue.
+ CurrentInstructionSlotIdx += Current.NumSlots;
+ CurrentInstructionSlotIdx %= Queue.size();
+ AvailableSlots += Current.NumSlots;
+}
+
+void RetireControlUnit::onInstructionExecuted(unsigned TokenID) {
+ assert(Queue.size() > TokenID);
+ assert(Queue[TokenID].Executed == false && Queue[TokenID].IR);
+ Queue[TokenID].Executed = true;
+}
+
+#ifndef NDEBUG
+void RetireControlUnit::dump() const {
+ dbgs() << "Retire Unit: { Total Slots=" << Queue.size()
+ << ", Available Slots=" << AvailableSlots << " }\n";
+}
+#endif
+
+} // namespace mca
+} // namespace llvm
diff --git a/llvm/lib/MCA/HardwareUnits/Scheduler.cpp b/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
new file mode 100644
index 0000000..3924ac5
--- /dev/null
+++ b/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
@@ -0,0 +1,245 @@
+//===--------------------- Scheduler.cpp ------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A scheduler for processor resource units and processor resource groups.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/HardwareUnits/Scheduler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace mca {
+
+#define DEBUG_TYPE "llvm-mca"
+
+void Scheduler::initializeStrategy(std::unique_ptr<SchedulerStrategy> S) {
+ // Ensure we have a valid (non-null) strategy object.
+ Strategy = S ? std::move(S) : llvm::make_unique<DefaultSchedulerStrategy>();
+}
+
+// Anchor the vtable of SchedulerStrategy and DefaultSchedulerStrategy.
+SchedulerStrategy::~SchedulerStrategy() = default;
+DefaultSchedulerStrategy::~DefaultSchedulerStrategy() = default;
+
+#ifndef NDEBUG
+void Scheduler::dump() const {
+ dbgs() << "[SCHEDULER]: WaitSet size is: " << WaitSet.size() << '\n';
+ dbgs() << "[SCHEDULER]: ReadySet size is: " << ReadySet.size() << '\n';
+ dbgs() << "[SCHEDULER]: IssuedSet size is: " << IssuedSet.size() << '\n';
+ Resources->dump();
+}
+#endif
+
+Scheduler::Status Scheduler::isAvailable(const InstRef &IR) const {
+ const InstrDesc &Desc = IR.getInstruction()->getDesc();
+
+ switch (Resources->canBeDispatched(Desc.Buffers)) {
+ case ResourceStateEvent::RS_BUFFER_UNAVAILABLE:
+ return Scheduler::SC_BUFFERS_FULL;
+ case ResourceStateEvent::RS_RESERVED:
+ return Scheduler::SC_DISPATCH_GROUP_STALL;
+ case ResourceStateEvent::RS_BUFFER_AVAILABLE:
+ break;
+ }
+
+ // Give lower priority to LSUnit stall events.
+ switch (LSU.isAvailable(IR)) {
+ case LSUnit::LSU_LQUEUE_FULL:
+ return Scheduler::SC_LOAD_QUEUE_FULL;
+ case LSUnit::LSU_SQUEUE_FULL:
+ return Scheduler::SC_STORE_QUEUE_FULL;
+ case LSUnit::LSU_AVAILABLE:
+ return Scheduler::SC_AVAILABLE;
+ }
+
+ llvm_unreachable("Don't know how to process this LSU state result!");
+}
+
+void Scheduler::issueInstructionImpl(
+ InstRef &IR,
+ SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &UsedResources) {
+ Instruction *IS = IR.getInstruction();
+ const InstrDesc &D = IS->getDesc();
+
+ // Issue the instruction and collect all the consumed resources
+ // into a vector. That vector is then used to notify the listener.
+ Resources->issueInstruction(D, UsedResources);
+
+ // Notify the instruction that it started executing.
+ // This updates the internal state of each write.
+ IS->execute();
+
+ if (IS->isExecuting())
+ IssuedSet.emplace_back(IR);
+ else if (IS->isExecuted())
+ LSU.onInstructionExecuted(IR);
+}
+
+// Release the buffered resources and issue the instruction.
+void Scheduler::issueInstruction(
+ InstRef &IR,
+ SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &UsedResources,
+ SmallVectorImpl<InstRef> &ReadyInstructions) {
+ const Instruction &Inst = *IR.getInstruction();
+ bool HasDependentUsers = Inst.hasDependentUsers();
+
+ Resources->releaseBuffers(Inst.getDesc().Buffers);
+ issueInstructionImpl(IR, UsedResources);
+ // Instructions that have been issued during this cycle might have unblocked
+ // other dependent instructions. Dependent instructions may be issued during
+ // this same cycle if operands have ReadAdvance entries. Promote those
+ // instructions to the ReadySet and notify the caller that those are ready.
+ if (HasDependentUsers)
+ promoteToReadySet(ReadyInstructions);
+}
+
+void Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) {
+ // Scan the set of waiting instructions and promote them to the
+ // ready queue if operands are all ready.
+ unsigned RemovedElements = 0;
+ for (auto I = WaitSet.begin(), E = WaitSet.end(); I != E;) {
+ InstRef &IR = *I;
+ if (!IR)
+ break;
+
+ // Check if this instruction is now ready. In case, force
+ // a transition in state using method 'update()'.
+ Instruction &IS = *IR.getInstruction();
+ if (!IS.isReady())
+ IS.update();
+
+ // Check if there are still unsolved data dependencies.
+ if (!isReady(IR)) {
+ ++I;
+ continue;
+ }
+
+ Ready.emplace_back(IR);
+ ReadySet.emplace_back(IR);
+
+ IR.invalidate();
+ ++RemovedElements;
+ std::iter_swap(I, E - RemovedElements);
+ }
+
+ WaitSet.resize(WaitSet.size() - RemovedElements);
+}
+
+InstRef Scheduler::select() {
+ unsigned QueueIndex = ReadySet.size();
+ for (unsigned I = 0, E = ReadySet.size(); I != E; ++I) {
+ const InstRef &IR = ReadySet[I];
+ if (QueueIndex == ReadySet.size() ||
+ Strategy->compare(IR, ReadySet[QueueIndex])) {
+ const InstrDesc &D = IR.getInstruction()->getDesc();
+ if (Resources->canBeIssued(D))
+ QueueIndex = I;
+ }
+ }
+
+ if (QueueIndex == ReadySet.size())
+ return InstRef();
+
+ // We found an instruction to issue.
+ InstRef IR = ReadySet[QueueIndex];
+ std::swap(ReadySet[QueueIndex], ReadySet[ReadySet.size() - 1]);
+ ReadySet.pop_back();
+ return IR;
+}
+
+void Scheduler::updateIssuedSet(SmallVectorImpl<InstRef> &Executed) {
+ unsigned RemovedElements = 0;
+ for (auto I = IssuedSet.begin(), E = IssuedSet.end(); I != E;) {
+ InstRef &IR = *I;
+ if (!IR)
+ break;
+ Instruction &IS = *IR.getInstruction();
+ if (!IS.isExecuted()) {
+ LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR
+ << " is still executing.\n");
+ ++I;
+ continue;
+ }
+
+ // Instruction IR has completed execution.
+ LSU.onInstructionExecuted(IR);
+ Executed.emplace_back(IR);
+ ++RemovedElements;
+ IR.invalidate();
+ std::iter_swap(I, E - RemovedElements);
+ }
+
+ IssuedSet.resize(IssuedSet.size() - RemovedElements);
+}
+
+void Scheduler::cycleEvent(SmallVectorImpl<ResourceRef> &Freed,
+ SmallVectorImpl<InstRef> &Executed,
+ SmallVectorImpl<InstRef> &Ready) {
+ // Release consumed resources.
+ Resources->cycleEvent(Freed);
+
+ // Propagate the cycle event to the 'Issued' and 'Wait' sets.
+ for (InstRef &IR : IssuedSet)
+ IR.getInstruction()->cycleEvent();
+
+ updateIssuedSet(Executed);
+
+ for (InstRef &IR : WaitSet)
+ IR.getInstruction()->cycleEvent();
+
+ promoteToReadySet(Ready);
+}
+
+bool Scheduler::mustIssueImmediately(const InstRef &IR) const {
+ // Instructions that use an in-order dispatch/issue processor resource must be
+ // issued immediately to the pipeline(s). Any other in-order buffered
+ // resources (i.e. BufferSize=1) is consumed.
+ const InstrDesc &Desc = IR.getInstruction()->getDesc();
+ return Desc.isZeroLatency() || Resources->mustIssueImmediately(Desc);
+}
+
+void Scheduler::dispatch(const InstRef &IR) {
+ const InstrDesc &Desc = IR.getInstruction()->getDesc();
+ Resources->reserveBuffers(Desc.Buffers);
+
+ // If necessary, reserve queue entries in the load-store unit (LSU).
+ bool IsMemOp = Desc.MayLoad || Desc.MayStore;
+ if (IsMemOp)
+ LSU.dispatch(IR);
+
+ if (!isReady(IR)) {
+ LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the WaitSet\n");
+ WaitSet.push_back(IR);
+ return;
+ }
+
+ // Don't add a zero-latency instruction to the Ready queue.
+ // A zero-latency instruction doesn't consume any scheduler resources. That is
+ // because it doesn't need to be executed, and it is often removed at register
+ // renaming stage. For example, register-register moves are often optimized at
+ // register renaming stage by simply updating register aliases. On some
+ // targets, zero-idiom instructions (for example: a xor that clears the value
+ // of a register) are treated specially, and are often eliminated at register
+ // renaming stage.
+ if (!mustIssueImmediately(IR)) {
+ LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the ReadySet\n");
+ ReadySet.push_back(IR);
+ }
+}
+
+bool Scheduler::isReady(const InstRef &IR) const {
+ const InstrDesc &Desc = IR.getInstruction()->getDesc();
+ bool IsMemOp = Desc.MayLoad || Desc.MayStore;
+ return IR.getInstruction()->isReady() && (!IsMemOp || LSU.isReady(IR));
+}
+
+} // namespace mca
+} // namespace llvm