[DFAPacketizer] Allow up to 64 functional units
Summary:
To drive the automaton we used a uint64_t as an action type. This
contained the transition's resource requirements as a conjunction:
(a OR b) AND (b OR c)
We encoded this conjunction as a sequence of four 16-bit bitmasks.
This limited the number of addressable functional units to 16, which
is quite low and has bitten many people in the past.
Instead, the DFAEmitter now generates a lookup table from InstrItinerary
class (index of the ItinData inside the ProcItineraries) to an internal
action index which is essentially a dense embedding of the conjunctive
form. Because we never materialize the conjunctive form, we no longer
have the 16 FU restriction.
In this patch we limit to 64 functional units due to using a uint64_t
bitmask in the DFAEmitter. Now that we've decoupled these representations
we can increase this in future.
Reviewers: ThomasRaoux, kparzysz, majnemer
Reviewed By: ThomasRaoux
Subscribers: hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69110
diff --git a/llvm/utils/TableGen/DFAPacketizerEmitter.cpp b/llvm/utils/TableGen/DFAPacketizerEmitter.cpp
index ccb4ef1..018bda1 100644
--- a/llvm/utils/TableGen/DFAPacketizerEmitter.cpp
+++ b/llvm/utils/TableGen/DFAPacketizerEmitter.cpp
@@ -16,15 +16,16 @@
#define DEBUG_TYPE "dfa-emitter"
+#include "CodeGenSchedule.h"
#include "CodeGenTarget.h"
#include "DFAEmitter.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/TableGenBackend.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
#include <cassert>
#include <cstdint>
#include <map>
@@ -35,216 +36,101 @@
using namespace llvm;
-// --------------------------------------------------------------------
-// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
-
-// DFA_MAX_RESTERMS * DFA_MAX_RESOURCES must fit within sizeof DFAInput.
-// This is verified in DFAPacketizer.cpp:DFAPacketizer::DFAPacketizer.
-//
-// e.g. terms x resource bit combinations that fit in uint32_t:
-// 4 terms x 8 bits = 32 bits
-// 3 terms x 10 bits = 30 bits
-// 2 terms x 16 bits = 32 bits
-//
-// e.g. terms x resource bit combinations that fit in uint64_t:
-// 8 terms x 8 bits = 64 bits
-// 7 terms x 9 bits = 63 bits
-// 6 terms x 10 bits = 60 bits
-// 5 terms x 12 bits = 60 bits
-// 4 terms x 16 bits = 64 bits <--- current
-// 3 terms x 21 bits = 63 bits
-// 2 terms x 32 bits = 64 bits
-//
-#define DFA_MAX_RESTERMS 4 // The max # of AND'ed resource terms.
-#define DFA_MAX_RESOURCES 16 // The max # of resource bits in one term.
-
-typedef uint64_t DFAInput;
-typedef int64_t DFAStateInput;
-#define DFA_TBLTYPE "int64_t" // For generating DFAStateInputTable.
+// We use a uint64_t to represent a resource bitmask.
+#define DFA_MAX_RESOURCES 64
namespace {
+using ResourceVector = SmallVector<uint64_t, 4>;
- DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) {
- return (Inp << DFA_MAX_RESOURCES) | FuncUnits;
- }
+struct ScheduleClass {
+ /// The parent itinerary index (processor model ID).
+ unsigned ItineraryID;
- /// Return the DFAInput for an instruction class input vector.
- /// This function is used in both DFAPacketizer.cpp and in
- /// DFAPacketizerEmitter.cpp.
- DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) {
- DFAInput InsnInput = 0;
- assert((InsnClass.size() <= DFA_MAX_RESTERMS) &&
- "Exceeded maximum number of DFA terms");
- for (auto U : InsnClass)
- InsnInput = addDFAFuncUnits(InsnInput, U);
- return InsnInput;
- }
+ /// Index within this itinerary of the schedule class.
+ unsigned Idx;
-} // end anonymous namespace
+ /// The index within the uniqued set of required resources of Resources.
+ unsigned ResourcesIdx;
-// --------------------------------------------------------------------
+ /// Conjunctive list of resource requirements:
+ /// {a|b, b|c} => (a OR b) AND (b or c).
+ /// Resources are unique across all itineraries.
+ ResourceVector Resources;
+};
-#ifndef NDEBUG
-// To enable debugging, run llvm-tblgen with: "-debug-only dfa-emitter".
-//
-// dbgsInsnClass - When debugging, print instruction class stages.
-//
-void dbgsInsnClass(const std::vector<unsigned> &InsnClass);
-//
-// dbgsStateInfo - When debugging, print the set of state info.
-//
-void dbgsStateInfo(const std::set<unsigned> &stateInfo);
-//
-// dbgsIndent - When debugging, indent by the specified amount.
-//
-void dbgsIndent(unsigned indent);
-#endif
-
-//
-// class DFAPacketizerEmitter: class that generates and prints out the DFA
-// for resource tracking.
-//
-namespace {
-
+// Generates and prints out the DFA for resource tracking.
class DFAPacketizerEmitter {
private:
std::string TargetName;
- //
- // allInsnClasses is the set of all possible resources consumed by an
- // InstrStage.
- //
- std::vector<std::vector<unsigned>> allInsnClasses;
RecordKeeper &Records;
+ UniqueVector<ResourceVector> UniqueResources;
+ std::vector<ScheduleClass> ScheduleClasses;
+ std::map<std::string, uint64_t> FUNameToBitsMap;
+ std::map<unsigned, uint64_t> ComboBitToBitsMap;
+
public:
DFAPacketizerEmitter(RecordKeeper &R);
- //
- // collectAllFuncUnits - Construct a map of function unit names to bits.
- //
- int collectAllFuncUnits(std::vector<Record*> &ProcItinList,
- std::map<std::string, unsigned> &FUNameToBitsMap,
- int &maxResources,
- raw_ostream &OS);
+ // Construct a map of function unit names to bits.
+ int collectAllFuncUnits(
+ ArrayRef<const CodeGenProcModel *> ProcModels);
- //
- // collectAllComboFuncs - Construct a map from a combo function unit bit to
- // the bits of all included functional units.
- //
- int collectAllComboFuncs(std::vector<Record*> &ComboFuncList,
- std::map<std::string, unsigned> &FUNameToBitsMap,
- std::map<unsigned, unsigned> &ComboBitToBitsMap,
- raw_ostream &OS);
+ // Construct a map from a combo function unit bit to the bits of all included
+ // functional units.
+ int collectAllComboFuncs(ArrayRef<Record *> ComboFuncList);
- //
- // collectOneInsnClass - Populate allInsnClasses with one instruction class.
- //
- int collectOneInsnClass(const std::string &ProcName,
- std::vector<Record*> &ProcItinList,
- std::map<std::string, unsigned> &FUNameToBitsMap,
- Record *ItinData,
- raw_ostream &OS);
-
- //
- // collectAllInsnClasses - Populate allInsnClasses which is a set of units
- // used in each stage.
- //
- int collectAllInsnClasses(const std::string &ProcName,
- std::vector<Record*> &ProcItinList,
- std::map<std::string, unsigned> &FUNameToBitsMap,
- std::vector<Record*> &ItinDataList,
- int &maxStages,
- raw_ostream &OS);
+ ResourceVector getResourcesForItinerary(Record *Itinerary);
+ void createScheduleClasses(unsigned ItineraryIdx, const RecVec &Itineraries);
// Emit code for a subset of itineraries.
void emitForItineraries(raw_ostream &OS,
- std::vector<Record *> &ProcItinList,
+ std::vector<const CodeGenProcModel *> &ProcItinList,
std::string DFAName);
void run(raw_ostream &OS);
};
} // end anonymous namespace
-#ifndef NDEBUG
-// To enable debugging, run llvm-tblgen with: "-debug-only dfa-emitter".
-//
-// dbgsInsnClass - When debugging, print instruction class stages.
-//
-void dbgsInsnClass(const std::vector<unsigned> &InsnClass) {
- LLVM_DEBUG(dbgs() << "InsnClass: ");
- for (unsigned i = 0; i < InsnClass.size(); ++i) {
- if (i > 0) {
- LLVM_DEBUG(dbgs() << ", ");
- }
- LLVM_DEBUG(dbgs() << "0x" << Twine::utohexstr(InsnClass[i]));
- }
- DFAInput InsnInput = getDFAInsnInput(InsnClass);
- LLVM_DEBUG(dbgs() << " (input: 0x" << Twine::utohexstr(InsnInput) << ")");
-}
+DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R)
+ : TargetName(CodeGenTarget(R).getName()), Records(R) {}
-//
-// dbgsIndent - When debugging, indent by the specified amount.
-//
-void dbgsIndent(unsigned indent) {
- for (unsigned i = 0; i < indent; ++i) {
- LLVM_DEBUG(dbgs() << " ");
- }
-}
-#endif // NDEBUG
-
-DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R):
- TargetName(CodeGenTarget(R).getName()), Records(R) {}
-
-//
-// collectAllFuncUnits - Construct a map of function unit names to bits.
-//
int DFAPacketizerEmitter::collectAllFuncUnits(
- std::vector<Record*> &ProcItinList,
- std::map<std::string, unsigned> &FUNameToBitsMap,
- int &maxFUs,
- raw_ostream &OS) {
+ ArrayRef<const CodeGenProcModel *> ProcModels) {
LLVM_DEBUG(dbgs() << "-------------------------------------------------------"
"----------------------\n");
LLVM_DEBUG(dbgs() << "collectAllFuncUnits");
- LLVM_DEBUG(dbgs() << " (" << ProcItinList.size() << " itineraries)\n");
+ LLVM_DEBUG(dbgs() << " (" << ProcModels.size() << " itineraries)\n");
+
+ std::set<Record *> ProcItinList;
+ for (const CodeGenProcModel *Model : ProcModels)
+ ProcItinList.insert(Model->ItinsDef);
int totalFUs = 0;
// Parse functional units for all the itineraries.
- for (unsigned i = 0, N = ProcItinList.size(); i < N; ++i) {
- Record *Proc = ProcItinList[i];
- std::vector<Record*> FUs = Proc->getValueAsListOfDefs("FU");
+ for (Record *Proc : ProcItinList) {
+ std::vector<Record *> FUs = Proc->getValueAsListOfDefs("FU");
- LLVM_DEBUG(dbgs() << " FU:" << i << " (" << FUs.size() << " FUs) "
- << Proc->getName());
+ LLVM_DEBUG(dbgs() << " FU:"
+ << " (" << FUs.size() << " FUs) " << Proc->getName());
// Convert macros to bits for each stage.
unsigned numFUs = FUs.size();
for (unsigned j = 0; j < numFUs; ++j) {
- assert ((j < DFA_MAX_RESOURCES) &&
- "Exceeded maximum number of representable resources");
- unsigned FuncResources = (unsigned) (1U << j);
+ assert((j < DFA_MAX_RESOURCES) &&
+ "Exceeded maximum number of representable resources");
+ uint64_t FuncResources = 1ULL << j;
FUNameToBitsMap[FUs[j]->getName()] = FuncResources;
LLVM_DEBUG(dbgs() << " " << FUs[j]->getName() << ":0x"
<< Twine::utohexstr(FuncResources));
}
- if (((int) numFUs) > maxFUs) {
- maxFUs = numFUs;
- }
totalFUs += numFUs;
LLVM_DEBUG(dbgs() << "\n");
}
return totalFUs;
}
-//
-// collectAllComboFuncs - Construct a map from a combo function unit bit to
-// the bits of all included functional units.
-//
-int DFAPacketizerEmitter::collectAllComboFuncs(
- std::vector<Record*> &ComboFuncList,
- std::map<std::string, unsigned> &FUNameToBitsMap,
- std::map<unsigned, unsigned> &ComboBitToBitsMap,
- raw_ostream &OS) {
+int DFAPacketizerEmitter::collectAllComboFuncs(ArrayRef<Record *> ComboFuncList) {
LLVM_DEBUG(dbgs() << "-------------------------------------------------------"
"----------------------\n");
LLVM_DEBUG(dbgs() << "collectAllComboFuncs");
@@ -253,27 +139,27 @@
int numCombos = 0;
for (unsigned i = 0, N = ComboFuncList.size(); i < N; ++i) {
Record *Func = ComboFuncList[i];
- std::vector<Record*> FUs = Func->getValueAsListOfDefs("CFD");
+ std::vector<Record *> FUs = Func->getValueAsListOfDefs("CFD");
LLVM_DEBUG(dbgs() << " CFD:" << i << " (" << FUs.size() << " combo FUs) "
<< Func->getName() << "\n");
// Convert macros to bits for each stage.
for (unsigned j = 0, N = FUs.size(); j < N; ++j) {
- assert ((j < DFA_MAX_RESOURCES) &&
- "Exceeded maximum number of DFA resources");
+ assert((j < DFA_MAX_RESOURCES) &&
+ "Exceeded maximum number of DFA resources");
Record *FuncData = FUs[j];
Record *ComboFunc = FuncData->getValueAsDef("TheComboFunc");
- const std::vector<Record*> &FuncList =
- FuncData->getValueAsListOfDefs("FuncList");
+ const std::vector<Record *> &FuncList =
+ FuncData->getValueAsListOfDefs("FuncList");
const std::string &ComboFuncName = ComboFunc->getName();
- unsigned ComboBit = FUNameToBitsMap[ComboFuncName];
- unsigned ComboResources = ComboBit;
+ uint64_t ComboBit = FUNameToBitsMap[ComboFuncName];
+ uint64_t ComboResources = ComboBit;
LLVM_DEBUG(dbgs() << " combo: " << ComboFuncName << ":0x"
<< Twine::utohexstr(ComboResources) << "\n");
for (unsigned k = 0, M = FuncList.size(); k < M; ++k) {
std::string FuncName = FuncList[k]->getName();
- unsigned FuncResources = FUNameToBitsMap[FuncName];
+ uint64_t FuncResources = FUNameToBitsMap[FuncName];
LLVM_DEBUG(dbgs() << " " << FuncName << ":0x"
<< Twine::utohexstr(FuncResources) << "\n");
ComboResources |= FuncResources;
@@ -288,101 +174,33 @@
return numCombos;
}
-//
-// collectOneInsnClass - Populate allInsnClasses with one instruction class
-//
-int DFAPacketizerEmitter::collectOneInsnClass(const std::string &ProcName,
- std::vector<Record*> &ProcItinList,
- std::map<std::string, unsigned> &FUNameToBitsMap,
- Record *ItinData,
- raw_ostream &OS) {
- const std::vector<Record*> &StageList =
- ItinData->getValueAsListOfDefs("Stages");
-
- // The number of stages.
- unsigned NStages = StageList.size();
-
- LLVM_DEBUG(dbgs() << " " << ItinData->getValueAsDef("TheClass")->getName()
- << "\n");
-
- std::vector<unsigned> UnitBits;
-
- // Compute the bitwise or of each unit used in this stage.
- for (unsigned i = 0; i < NStages; ++i) {
- const Record *Stage = StageList[i];
-
- // Get unit list.
- const std::vector<Record*> &UnitList =
- Stage->getValueAsListOfDefs("Units");
-
- LLVM_DEBUG(dbgs() << " stage:" << i << " [" << UnitList.size()
- << " units]:");
- unsigned dbglen = 26; // cursor after stage dbgs
-
- // Compute the bitwise or of each unit used in this stage.
- unsigned UnitBitValue = 0;
- for (unsigned j = 0, M = UnitList.size(); j < M; ++j) {
- // Conduct bitwise or.
- std::string UnitName = UnitList[j]->getName();
- LLVM_DEBUG(dbgs() << " " << j << ":" << UnitName);
- dbglen += 3 + UnitName.length();
- assert(FUNameToBitsMap.count(UnitName));
- UnitBitValue |= FUNameToBitsMap[UnitName];
+ResourceVector
+DFAPacketizerEmitter::getResourcesForItinerary(Record *Itinerary) {
+ ResourceVector Resources;
+ assert(Itinerary);
+ for (Record *StageDef : Itinerary->getValueAsListOfDefs("Stages")) {
+ uint64_t StageResources = 0;
+ for (Record *Unit : StageDef->getValueAsListOfDefs("Units")) {
+ StageResources |= FUNameToBitsMap[Unit->getName()];
}
-
- if (UnitBitValue != 0)
- UnitBits.push_back(UnitBitValue);
-
- while (dbglen <= 64) { // line up bits dbgs
- dbglen += 8;
- LLVM_DEBUG(dbgs() << "\t");
- }
- LLVM_DEBUG(dbgs() << " (bits: 0x" << Twine::utohexstr(UnitBitValue)
- << ")\n");
+ if (StageResources != 0)
+ Resources.push_back(StageResources);
}
-
- if (!UnitBits.empty())
- allInsnClasses.push_back(UnitBits);
-
- LLVM_DEBUG({
- dbgs() << " ";
- dbgsInsnClass(UnitBits);
- dbgs() << "\n";
- });
-
- return NStages;
+ return Resources;
}
-//
-// collectAllInsnClasses - Populate allInsnClasses which is a set of units
-// used in each stage.
-//
-int DFAPacketizerEmitter::collectAllInsnClasses(const std::string &ProcName,
- std::vector<Record*> &ProcItinList,
- std::map<std::string, unsigned> &FUNameToBitsMap,
- std::vector<Record*> &ItinDataList,
- int &maxStages,
- raw_ostream &OS) {
- // Collect all instruction classes.
- unsigned M = ItinDataList.size();
-
- int numInsnClasses = 0;
- LLVM_DEBUG(dbgs() << "-------------------------------------------------------"
- "----------------------\n"
- << "collectAllInsnClasses " << ProcName << " (" << M
- << " classes)\n");
-
- // Collect stages for each instruction class for all itinerary data
- for (unsigned j = 0; j < M; j++) {
- Record *ItinData = ItinDataList[j];
- int NStages = collectOneInsnClass(ProcName, ProcItinList,
- FUNameToBitsMap, ItinData, OS);
- if (NStages > maxStages) {
- maxStages = NStages;
+void DFAPacketizerEmitter::createScheduleClasses(unsigned ItineraryIdx,
+ const RecVec &Itineraries) {
+ unsigned Idx = 0;
+ for (Record *Itinerary : Itineraries) {
+ if (!Itinerary) {
+ ScheduleClasses.push_back({ItineraryIdx, Idx++, 0, ResourceVector{}});
+ continue;
}
- numInsnClasses++;
+ ResourceVector Resources = getResourcesForItinerary(Itinerary);
+ ScheduleClasses.push_back(
+ {ItineraryIdx, Idx++, UniqueResources.insert(Resources), Resources});
}
- return numInsnClasses;
}
//
@@ -393,19 +211,17 @@
<< "#include \"llvm/CodeGen/DFAPacketizer.h\"\n";
OS << "namespace llvm {\n";
- OS << "\n// Input format:\n";
- OS << "#define DFA_MAX_RESTERMS " << DFA_MAX_RESTERMS
- << "\t// maximum AND'ed resource terms\n";
- OS << "#define DFA_MAX_RESOURCES " << DFA_MAX_RESOURCES
- << "\t// maximum resource bits in one term\n";
+ CodeGenTarget CGT(Records);
+ CodeGenSchedModels CGS(Records, CGT);
- // Collect processor iteraries.
- std::vector<Record*> ProcItinList =
- Records.getAllDerivedDefinitions("ProcessorItineraries");
-
- std::unordered_map<std::string, std::vector<Record*>> ItinsByNamespace;
- for (Record *R : ProcItinList)
- ItinsByNamespace[R->getValueAsString("PacketizerNamespace")].push_back(R);
+ std::unordered_map<std::string, std::vector<const CodeGenProcModel *>>
+ ItinsByNamespace;
+ for (const CodeGenProcModel &ProcModel : CGS.procModels()) {
+ if (ProcModel.hasItineraries()) {
+ auto NS = ProcModel.ItinsDef->getValueAsString("PacketizerNamespace");
+ ItinsByNamespace[NS].push_back(&ProcModel);
+ }
+ }
for (auto &KV : ItinsByNamespace)
emitForItineraries(OS, KV.second, KV.first);
@@ -413,80 +229,68 @@
}
void DFAPacketizerEmitter::emitForItineraries(
- raw_ostream &OS, std::vector<Record *> &ProcItinList,
+ raw_ostream &OS, std::vector<const CodeGenProcModel *> &ProcModels,
std::string DFAName) {
- //
- // Collect the Functional units.
- //
- std::map<std::string, unsigned> FUNameToBitsMap;
- int maxResources = 0;
- collectAllFuncUnits(ProcItinList,
- FUNameToBitsMap, maxResources, OS);
+ OS << "} // end namespace llvm\n\n";
+ OS << "namespace {\n";
+ collectAllFuncUnits(ProcModels);
+ collectAllComboFuncs(Records.getAllDerivedDefinitions("ComboFuncUnits"));
- //
- // Collect the Combo Functional units.
- //
- std::map<unsigned, unsigned> ComboBitToBitsMap;
- std::vector<Record*> ComboFuncList =
- Records.getAllDerivedDefinitions("ComboFuncUnits");
- collectAllComboFuncs(ComboFuncList, FUNameToBitsMap, ComboBitToBitsMap, OS);
-
- //
// Collect the itineraries.
- //
- int maxStages = 0;
- int numInsnClasses = 0;
- for (unsigned i = 0, N = ProcItinList.size(); i < N; i++) {
- Record *Proc = ProcItinList[i];
-
- // Get processor itinerary name.
- const std::string &ProcName = Proc->getName();
-
- // Skip default.
- if (ProcName == "NoItineraries")
- continue;
-
- // Sanity check for at least one instruction itinerary class.
- unsigned NItinClasses =
- Records.getAllDerivedDefinitions("InstrItinClass").size();
- if (NItinClasses == 0)
- return;
-
- // Get itinerary data list.
- std::vector<Record*> ItinDataList = Proc->getValueAsListOfDefs("IID");
-
- // Collect all instruction classes
- numInsnClasses += collectAllInsnClasses(ProcName, ProcItinList,
- FUNameToBitsMap, ItinDataList, maxStages, OS);
+ DenseMap<const CodeGenProcModel *, unsigned> ProcModelStartIdx;
+ for (const CodeGenProcModel *Model : ProcModels) {
+ assert(Model->hasItineraries());
+ ProcModelStartIdx[Model] = ScheduleClasses.size();
+ createScheduleClasses(Model->Index, Model->ItinDefList);
}
+ // Output the mapping from ScheduleClass to ResourcesIdx.
+ unsigned Idx = 0;
+ OS << "unsigned " << TargetName << DFAName << "ResourceIndices[] = {";
+ for (const ScheduleClass &SC : ScheduleClasses) {
+ if (Idx++ % 32 == 0)
+ OS << "\n ";
+ OS << SC.ResourcesIdx << ", ";
+ }
+ OS << "\n};\n\n";
+
+ // And the mapping from Itinerary index into the previous table.
+ OS << "unsigned " << TargetName << DFAName
+ << "ProcResourceIndexStart[] = {\n";
+ OS << " 0, // NoSchedModel\n";
+ for (const CodeGenProcModel *Model : ProcModels) {
+ OS << " " << ProcModelStartIdx[Model] << ", // " << Model->ModelName
+ << "\n";
+ }
+ OS << ScheduleClasses.size() << "\n};\n\n";
+
// The type of a state in the nondeterministic automaton we're defining.
- using NfaStateTy = unsigned;
+ using NfaStateTy = uint64_t;
// Given a resource state, return all resource states by applying
// InsnClass.
- auto applyInsnClass = [&](ArrayRef<unsigned> InsnClass,
- NfaStateTy State) -> std::deque<unsigned> {
- std::deque<unsigned> V(1, State);
+ auto applyInsnClass = [&](const ResourceVector &InsnClass,
+ NfaStateTy State) -> std::deque<NfaStateTy> {
+ std::deque<NfaStateTy> V(1, State);
// Apply every stage in the class individually.
- for (unsigned Stage : InsnClass) {
+ for (NfaStateTy Stage : InsnClass) {
// Apply this stage to every existing member of V in turn.
size_t Sz = V.size();
for (unsigned I = 0; I < Sz; ++I) {
- unsigned S = V.front();
+ NfaStateTy S = V.front();
V.pop_front();
// For this stage, state combination, try all possible resources.
for (unsigned J = 0; J < DFA_MAX_RESOURCES; ++J) {
- unsigned ResourceMask = 1U << J;
+ NfaStateTy ResourceMask = 1ULL << J;
if ((ResourceMask & Stage) == 0)
// This resource isn't required by this stage.
continue;
- unsigned Combo = ComboBitToBitsMap[ResourceMask];
+ NfaStateTy Combo = ComboBitToBitsMap[ResourceMask];
if (Combo && ((~S & Combo) != Combo))
// This combo units bits are not available.
continue;
- unsigned ResultingResourceState = S | ResourceMask | Combo;
+ NfaStateTy ResultingResourceState = S | ResourceMask | Combo;
if (ResultingResourceState == S)
continue;
V.push_back(ResultingResourceState);
@@ -499,9 +303,9 @@
// Given a resource state, return a quick (conservative) guess as to whether
// InsnClass can be applied. This is a filter for the more heavyweight
// applyInsnClass.
- auto canApplyInsnClass = [](ArrayRef<unsigned> InsnClass,
+ auto canApplyInsnClass = [](const ResourceVector &InsnClass,
NfaStateTy State) -> bool {
- for (unsigned Resources : InsnClass) {
+ for (NfaStateTy Resources : InsnClass) {
if ((State | Resources) == State)
return false;
}
@@ -515,20 +319,18 @@
while (!Worklist.empty()) {
NfaStateTy State = Worklist.front();
Worklist.pop_front();
- for (unsigned i = 0; i < allInsnClasses.size(); i++) {
- const std::vector<unsigned> &InsnClass = allInsnClasses[i];
- if (!canApplyInsnClass(InsnClass, State))
+ for (const ResourceVector &Resources : UniqueResources) {
+ if (!canApplyInsnClass(Resources, State))
continue;
- for (unsigned NewState : applyInsnClass(InsnClass, State)) {
+ unsigned ResourcesID = UniqueResources.idFor(Resources);
+ for (uint64_t NewState : applyInsnClass(Resources, State)) {
if (SeenStates.emplace(NewState).second)
Worklist.emplace_back(NewState);
- Emitter.addTransition(State, NewState, getDFAInsnInput(InsnClass));
+ Emitter.addTransition(State, NewState, ResourcesID);
}
}
}
- OS << "} // end namespace llvm\n\n";
- OS << "namespace {\n";
std::string TargetAndDFAName = TargetName + DFAName;
Emitter.emit(TargetAndDFAName, OS);
OS << "} // end anonymous namespace\n\n";
@@ -541,7 +343,13 @@
<< " static Automaton<uint64_t> A(ArrayRef<" << TargetAndDFAName
<< "Transition>(" << TargetAndDFAName << "Transitions), "
<< TargetAndDFAName << "TransitionInfo);\n"
- << " return new DFAPacketizer(IID, A);\n"
+ << " unsigned ProcResIdxStart = " << TargetAndDFAName
+ << "ProcResourceIndexStart[IID->SchedModel.ProcID];\n"
+ << " unsigned ProcResIdxNum = " << TargetAndDFAName
+ << "ProcResourceIndexStart[IID->SchedModel.ProcID + 1] - "
+ "ProcResIdxStart;\n"
+ << " return new DFAPacketizer(IID, A, {&" << TargetAndDFAName
+ << "ResourceIndices[ProcResIdxStart], ProcResIdxNum});\n"
<< "\n}\n\n";
}