[llvm-mca] Speed up the computation of the wait/ready/issued sets in the Scheduler.
This patch is a follow-up to r338702.
We don't need to use a map to model the wait/ready/issued sets. It is much more
efficient to use a vector instead.
This patch gives us an average 7.5% speedup (on top of the ~12% speedup obtained
after r338702).
llvm-svn: 338883
diff --git a/llvm/tools/llvm-mca/Scheduler.cpp b/llvm/tools/llvm-mca/Scheduler.cpp
index 553cd00..764cd23 100644
--- a/llvm/tools/llvm-mca/Scheduler.cpp
+++ b/llvm/tools/llvm-mca/Scheduler.cpp
@@ -250,9 +250,9 @@
#ifndef NDEBUG
void Scheduler::dump() const {
- dbgs() << "[SCHEDULER]: WaitQueue size is: " << WaitQueue.size() << '\n';
- dbgs() << "[SCHEDULER]: ReadyQueue size is: " << ReadyQueue.size() << '\n';
- dbgs() << "[SCHEDULER]: IssuedQueue size is: " << IssuedQueue.size() << '\n';
+ dbgs() << "[SCHEDULER]: WaitSet size is: " << WaitSet.size() << '\n';
+ dbgs() << "[SCHEDULER]: ReadySet size is: " << ReadySet.size() << '\n';
+ dbgs() << "[SCHEDULER]: IssuedSet size is: " << IssuedSet.size() << '\n';
Resources->dump();
}
#endif
@@ -296,7 +296,7 @@
IS->execute();
if (IS->isExecuting())
- IssuedQueue[IR.getSourceIndex()] = IS;
+ IssuedSet.emplace_back(IR);
}
// Release the buffered resources and issue the instruction.
@@ -308,92 +308,109 @@
issueInstructionImpl(IR, UsedResources);
}
-void Scheduler::promoteToReadyQueue(SmallVectorImpl<InstRef> &Ready) {
+void Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) {
// Scan the set of waiting instructions and promote them to the
// ready queue if operands are all ready.
- for (auto I = WaitQueue.begin(), E = WaitQueue.end(); I != E;) {
- const unsigned IID = I->first;
- Instruction *IS = I->second;
+ unsigned RemovedElements = 0;
+ for (auto I = WaitSet.begin(), E = WaitSet.end(); I != E; ) {
+ InstRef &IR = *I;
+ if (!IR.isValid())
+ break;
// Check if this instruction is now ready. In case, force
// a transition in state using method 'update()'.
- if (!IS->isReady())
- IS->update();
+ Instruction &IS = *IR.getInstruction();
+ if (!IS.isReady())
+ IS.update();
- const InstrDesc &Desc = IS->getDesc();
+ const InstrDesc &Desc = IS.getDesc();
bool IsMemOp = Desc.MayLoad || Desc.MayStore;
- if (!IS->isReady() || (IsMemOp && !LSU->isReady({IID, IS}))) {
+ if (!IS.isReady() || (IsMemOp && !LSU->isReady(IR))) {
++I;
continue;
}
- Ready.emplace_back(IID, IS);
- ReadyQueue[IID] = IS;
- auto ToRemove = I;
- ++I;
- WaitQueue.erase(ToRemove);
+ Ready.emplace_back(IR);
+ ReadySet.emplace_back(IR);
+
+ IR.invalidate();
+ ++RemovedElements;
+ std::iter_swap(I, E - RemovedElements);
}
+
+ WaitSet.resize(WaitSet.size() - RemovedElements);
}
InstRef Scheduler::select() {
- // Find the oldest ready-to-issue instruction in the ReadyQueue.
- auto It = std::find_if(ReadyQueue.begin(), ReadyQueue.end(),
- [&](const QueueEntryTy &Entry) {
- const InstrDesc &D = Entry.second->getDesc();
- return Resources->canBeIssued(D);
- });
+ unsigned QueueIndex = ReadySet.size();
+ int Rank = std::numeric_limits<int>::max();
- if (It == ReadyQueue.end())
- return {0, nullptr};
+ for (unsigned I = 0, E = ReadySet.size(); I != E; ++I) {
+ const InstRef &IR = ReadySet[I];
+ const unsigned IID = IR.getSourceIndex();
+ const Instruction &IS = *IR.getInstruction();
- // We want to prioritize older instructions over younger instructions to
- // minimize the pressure on the reorder buffer. We also want to
- // rank higher the instructions with more users to better expose ILP.
+ // Compute a rank value based on the age of an instruction (i.e. its source
+ // index) and its number of users. The lower the rank value, the better.
+ int CurrentRank = IID - IS.getNumUsers();
- // Compute a rank value based on the age of an instruction (i.e. its source
- // index) and its number of users. The lower the rank value, the better.
- int Rank = It->first - It->second->getNumUsers();
- for (auto I = It, E = ReadyQueue.end(); I != E; ++I) {
- int CurrentRank = I->first - I->second->getNumUsers();
- if (CurrentRank < Rank) {
- const InstrDesc &D = I->second->getDesc();
+ // We want to prioritize older instructions over younger instructions to
+ // minimize the pressure on the reorder buffer. We also want to
+ // rank higher the instructions with more users to better expose ILP.
+ if (CurrentRank == Rank)
+ if (IID > ReadySet[QueueIndex].getSourceIndex())
+ continue;
+
+ if (CurrentRank <= Rank) {
+ const InstrDesc &D = IS.getDesc();
if (Resources->canBeIssued(D)) {
Rank = CurrentRank;
- It = I;
+ QueueIndex = I;
}
}
}
+ if (QueueIndex == ReadySet.size())
+ return InstRef();
+
// We found an instruction to issue.
- InstRef IR(It->first, It->second);
- ReadyQueue.erase(It);
+
+ InstRef IR = ReadySet[QueueIndex];
+ std::swap(ReadySet[QueueIndex], ReadySet[ReadySet.size() - 1]);
+ ReadySet.pop_back();
return IR;
}
void Scheduler::updatePendingQueue(SmallVectorImpl<InstRef> &Ready) {
// Notify to instructions in the pending queue that a new cycle just
// started.
- for (QueueEntryTy Entry : WaitQueue)
- Entry.second->cycleEvent();
- promoteToReadyQueue(Ready);
+ for (InstRef &Entry : WaitSet)
+ Entry.getInstruction()->cycleEvent();
+ promoteToReadySet(Ready);
}
-void Scheduler::updateIssuedQueue(SmallVectorImpl<InstRef> &Executed) {
- for (auto I = IssuedQueue.begin(), E = IssuedQueue.end(); I != E;) {
- const QueueEntryTy Entry = *I;
- Instruction *IS = Entry.second;
- IS->cycleEvent();
- if (IS->isExecuted()) {
- Executed.push_back({Entry.first, Entry.second});
- auto ToRemove = I;
- ++I;
- IssuedQueue.erase(ToRemove);
- } else {
- LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << Entry.first
+void Scheduler::updateIssuedSet(SmallVectorImpl<InstRef> &Executed) {
+ unsigned RemovedElements = 0;
+ for (auto I = IssuedSet.begin(), E = IssuedSet.end(); I != E; ) {
+ InstRef &IR = *I;
+ if (!IR.isValid())
+ break;
+ Instruction &IS = *IR.getInstruction();
+ IS.cycleEvent();
+ if (!IS.isExecuted()) {
+ LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR
<< " is still executing.\n");
++I;
+ continue;
}
+
+ Executed.emplace_back(IR);
+ ++RemovedElements;
+ IR.invalidate();
+ std::iter_swap(I, E - RemovedElements);
}
+
+ IssuedSet.resize(IssuedSet.size() - RemovedElements);
}
void Scheduler::onInstructionExecuted(const InstRef &IR) {
@@ -408,9 +425,8 @@
// If necessary, reserve queue entries in the load-store unit (LSU).
const bool Reserved = LSU->reserve(IR);
if (!IR.getInstruction()->isReady() || (Reserved && !LSU->isReady(IR))) {
- LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR
- << " to the Wait Queue\n");
- WaitQueue[IR.getSourceIndex()] = IR.getInstruction();
+ LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the WaitSet\n");
+ WaitSet.push_back(IR);
return false;
}
return true;
@@ -419,9 +435,8 @@
bool Scheduler::issueImmediately(InstRef &IR) {
const InstrDesc &Desc = IR.getInstruction()->getDesc();
if (!Desc.isZeroLatency() && !Resources->mustIssueImmediately(Desc)) {
- LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR
- << " to the Ready Queue\n");
- ReadyQueue[IR.getSourceIndex()] = IR.getInstruction();
+ LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the ReadySet\n");
+ ReadySet.push_back(IR);
return false;
}
return true;