[MCA] Fix wrong definition of ResourceUnitMask in DefaultResourceStrategy.
Field ResourceUnitMask was incorrectly defined as a 'const unsigned' mask. It
should have been a 64 bit quantity instead. That means, ResourceUnitMask was
always implicitly truncated to a 32 bit quantity.
This issue has been found by inspection. Surprisingly, that bug was latent, and
it never negatively affected any existing upstream targets.
This patch fixes the wrong definition of ResourceUnitMask, and adds a bunch of
extra debug prints to help debugging potential issues related to invalid
processor resource masks.
llvm-svn: 350820
diff --git a/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp b/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
index d05ec00..2039b58 100644
--- a/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
+++ b/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
@@ -118,7 +118,8 @@
ResourceManager::ResourceManager(const MCSchedModel &SM)
: Resources(SM.getNumProcResourceKinds()),
Strategies(SM.getNumProcResourceKinds()),
- Resource2Groups(SM.getNumProcResourceKinds(), 0) {
+ Resource2Groups(SM.getNumProcResourceKinds(), 0),
+ ProcResID2Mask(SM.getNumProcResourceKinds()) {
computeProcResourceMasks(SM, ProcResID2Mask);
for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
@@ -283,9 +284,6 @@
ResourceRef Pipe = selectPipe(R.first);
use(Pipe);
BusyResources[Pipe] += CS.size();
- // Replace the resource mask with a valid processor resource index.
- const ResourceState &RS = *Resources[getResourceStateIndex(Pipe.first)];
- Pipe.first = RS.getProcResourceID();
Pipes.emplace_back(std::pair<ResourceRef, ResourceCycles>(
Pipe, ResourceCycles(CS.size())));
} else {
diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp
index 2cfe154..d2d65e5 100644
--- a/llvm/lib/MCA/InstrBuilder.cpp
+++ b/llvm/lib/MCA/InstrBuilder.cpp
@@ -31,6 +31,8 @@
const llvm::MCInstrAnalysis *mcia)
: STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), FirstCallInst(true),
FirstReturnInst(true) {
+ const MCSchedModel &SM = STI.getSchedModel();
+ ProcResourceMasks.resize(SM.getNumProcResourceKinds());
computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks);
}
@@ -178,8 +180,8 @@
LLVM_DEBUG({
for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
- dbgs() << "\t\tMask=" << format_hex(R.first, 16) << ", " <<
- "cy=" << R.second.size() << '\n';
+ dbgs() << "\t\tMask=" << format_hex(R.first, 16) << ", "
+ << "cy=" << R.second.size() << '\n';
for (const uint64_t R : ID.Buffers)
dbgs() << "\t\tBuffer Mask=" << format_hex(R, 16) << '\n';
});
@@ -525,6 +527,9 @@
MCI);
}
+ LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n');
+ LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n');
+
// Create a new empty descriptor.
std::unique_ptr<InstrDesc> ID = llvm::make_unique<InstrDesc>();
ID->NumMicroOps = SCDesc.NumMicroOps;
@@ -559,9 +564,6 @@
populateWrites(*ID, MCI, SchedClassID);
populateReads(*ID, MCI, SchedClassID);
-#ifndef NDEBUG
- ID->Name = MCII.getName(Opcode);
-#endif
LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
diff --git a/llvm/lib/MCA/Pipeline.cpp b/llvm/lib/MCA/Pipeline.cpp
index fd97ea6..4c0e37c 100644
--- a/llvm/lib/MCA/Pipeline.cpp
+++ b/llvm/lib/MCA/Pipeline.cpp
@@ -83,13 +83,13 @@
}
void Pipeline::notifyCycleBegin() {
- LLVM_DEBUG(dbgs() << "[E] Cycle begin: " << Cycles << '\n');
+ LLVM_DEBUG(dbgs() << "\n[E] Cycle begin: " << Cycles << '\n');
for (HWEventListener *Listener : Listeners)
Listener->onCycleBegin();
}
void Pipeline::notifyCycleEnd() {
- LLVM_DEBUG(dbgs() << "[E] Cycle end: " << Cycles << "\n\n");
+ LLVM_DEBUG(dbgs() << "[E] Cycle end: " << Cycles << "\n");
for (HWEventListener *Listener : Listeners)
Listener->onCycleEnd();
}
diff --git a/llvm/lib/MCA/Stages/ExecuteStage.cpp b/llvm/lib/MCA/Stages/ExecuteStage.cpp
index 17f7ff7..e783277 100644
--- a/llvm/lib/MCA/Stages/ExecuteStage.cpp
+++ b/llvm/lib/MCA/Stages/ExecuteStage.cpp
@@ -57,6 +57,7 @@
HWS.issueInstruction(IR, Used, Ready);
notifyReservedOrReleasedBuffers(IR, /* Reserved */ false);
+
notifyInstructionIssued(IR, Used);
if (IR.getInstruction()->isExecuted()) {
notifyInstructionExecuted(IR);
@@ -184,7 +185,7 @@
void ExecuteStage::notifyInstructionIssued(
const InstRef &IR,
- ArrayRef<std::pair<ResourceRef, ResourceCycles>> Used) const {
+ MutableArrayRef<std::pair<ResourceRef, ResourceCycles>> Used) const {
LLVM_DEBUG({
dbgs() << "[E] Instruction Issued: #" << IR << '\n';
for (const std::pair<ResourceRef, ResourceCycles> &Resource : Used) {
@@ -193,6 +194,11 @@
dbgs() << "cycles: " << Resource.second << '\n';
}
});
+
+ // Replace resource masks with valid resource processor IDs.
+ for (std::pair<ResourceRef, ResourceCycles> &Use : Used)
+ Use.first.first = HWS.getResourceID(Use.first.first);
+
notifyEvent<HWInstructionEvent>(HWInstructionIssuedEvent(IR, Used));
}
diff --git a/llvm/lib/MCA/Support.cpp b/llvm/lib/MCA/Support.cpp
index 3271bc6..335953e 100644
--- a/llvm/lib/MCA/Support.cpp
+++ b/llvm/lib/MCA/Support.cpp
@@ -19,13 +19,18 @@
namespace llvm {
namespace mca {
+#define DEBUG_TYPE "llvm-mca"
+
void computeProcResourceMasks(const MCSchedModel &SM,
- SmallVectorImpl<uint64_t> &Masks) {
+ MutableArrayRef<uint64_t> Masks) {
unsigned ProcResourceID = 0;
+ assert(Masks.size() == SM.getNumProcResourceKinds() &&
+ "Invalid number of elements");
+ // Resource at index 0 is the 'InvalidUnit'. Set an invalid mask for it.
+ Masks[0] = 0;
+
// Create a unique bitmask for every processor resource unit.
- // Skip resource at index 0, since it always references 'InvalidUnit'.
- Masks.resize(SM.getNumProcResourceKinds());
for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
const MCProcResourceDesc &Desc = *SM.getProcResource(I);
if (Desc.SubUnitsIdxBegin)
@@ -46,6 +51,16 @@
}
ProcResourceID++;
}
+
+#ifndef NDEBUG
+ LLVM_DEBUG(dbgs() << "\nProcessor resource masks:"
+ << "\n");
+ for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc &Desc = *SM.getProcResource(I);
+ LLVM_DEBUG(dbgs() << '[' << I << "] " << Desc.Name << " - " << Masks[I]
+ << '\n');
+ }
+#endif
}
double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth,