Blame - llvm/tools/llvm-mca/Scheduler.h - toolchain/llvm-project

blob: 2b58437404cac6255f19f631c078f773ab8d1138 [file] [log] [blame]

Andrea Di Biagio	3a6b092	2018-03-08 13:05:02 +0000	[diff] [blame^]	1	//===--------------------- Scheduler.h ------------------------- C++ --===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	/// \file
				10	///
				11	/// A scheduler for Processor Resource Units and Processor Resource Groups.
				12	///
				13	//===----------------------------------------------------------------------===//
				14
				15	#ifndef LLVM_TOOLS_LLVM_MCA_SCHEDULER_H
				16	#define LLVM_TOOLS_LLVM_MCA_SCHEDULER_H
				17
				18	#include "Instruction.h"
				19	#include "LSUnit.h"
				20	#include "llvm/ADT/DenseMap.h"
				21	#include "llvm/MC/MCSubtargetInfo.h"
				22	#include <map>
				23
				24	namespace mca {
				25
				26	class Backend;
				27	/// Used to notify the internal state of a processor resource.
				28	///
				29	/// A processor resource is available if it is not reserved, and there are
				30	/// available slots in the buffer. A processor resource is unavailable if it
				31	/// is either reserved, or the associated buffer is full. A processor resource
				32	/// with a buffer size of -1 is always available if it is not reserved.
				33	///
				34	/// Values of type ResourceStateEvent are returned by method
				35	/// ResourceState::isBufferAvailable(), which is used to query the internal
				36	/// state of a resource.
				37	///
				38	/// The naming convention for resource state events is:
				39	/// * Event names start with prefix RS_
				40	/// * Prefix RS_ is followed by a string describing the actual resource state.
				41	enum ResourceStateEvent {
				42	RS_BUFFER_AVAILABLE,
				43	RS_BUFFER_UNAVAILABLE,
				44	RS_RESERVED
				45	};
				46
				47	/// \brief A descriptor for processor resources.
				48	///
				49	/// Each object of class ResourceState is associated to a specific processor
				50	/// resource. There is an instance of this class for every processor resource
				51	/// defined by the scheduling model.
				52	/// A ResourceState dynamically tracks the availability of units of a processor
				53	/// resource. For example, the ResourceState of a ProcResGroup tracks the
				54	/// availability of resource units which are part of the group.
				55	///
				56	/// Internally, ResourceState uses a round-robin selector to identify
				57	/// which unit of the group shall be used next.
				58	class ResourceState {
				59	// A resource unique identifier generated by the tool.
				60	// For processor resource groups, the number of number of bits set in this
				61	// mask is equivalent to the cardinality of the group plus one.
				62	// Excluding the most significant bit, the remaining bits in the resource mask
				63	// identify resources that are part of the group.
				64	//
				65	// Example, lets assume that this ResourceState describes a
				66	// group containing ResourceA and ResourceB:
				67	//
				68	// ResourceA -- Mask: 0b001
				69	// ResourceB -- Mask: 0b010
				70	// ResourceAB -- Mask: 0b100 U (ResourceA::Mask \| ResourceB::Mask) == 0b111
				71	//
				72	// There is only one bit set for non-group resources.
				73	// A ResourceMask can be used to solve set membership problems with simple bit
				74	// manipulation operations.
				75	uint64_t ResourceMask;
				76
				77	// A ProcResource can specify a number of units. For the purpose of dynamic
				78	// scheduling, a processor resource with more than one unit behaves like a
				79	// group. This field has one bit set for every unit/resource that is part of
				80	// the group.
				81	// For groups, this field defaults to 'ResourceMask'. For non-group
				82	// resources, the number of bits set in this mask is equivalent to the
				83	// number of units (i.e. field 'NumUnits' in 'ProcResourceUnits').
				84	uint64_t ResourceSizeMask;
				85
				86	// A simple round-robin selector for processor resources.
				87	// Each bit of the mask identifies a sub resource within this group.
				88	//
				89	// As an example, lets assume that this ResourceState describes a
				90	// processor resource group composed of the following three units:
				91	// ResourceA -- 0b001
				92	// ResourceB -- 0b010
				93	// ResourceC -- 0b100
				94	//
				95	// Each unit is identified by a ResourceMask which always contains a
				96	// single bit set. Field NextInSequenceMask is initially set to value
				97	// 0xb111. That value is obtained by OR'ing the resource masks of
				98	// processor resource that are part of the group.
				99	//
				100	// NextInSequenceMask -- 0b111
				101	//
				102	// Field NextInSequenceMask is used by the resource manager (i.e.
				103	// an object of class ResourceManager) to select the "next available resource"
				104	// from the set. The algorithm would prioritize resources with a bigger
				105	// ResourceMask value.
				106	//
				107	// In this example, there are three resources in the set, and 'ResourceC'
				108	// has the highest mask value. The round-robin selector would firstly select
				109	// 'ResourceC', then 'ResourceB', and eventually 'ResourceA'.
				110	//
				111	// When a resource R is used, its corresponding bit is cleared from the set.
				112	//
				113	// Back to the example:
				114	// If 'ResourceC' is selected, then the new value of NextInSequenceMask
				115	// becomes 0xb011.
				116	//
				117	// When NextInSequenceMask becomes zero, it is reset to its original value
				118	// (in this example, that value would be 0b111).
				119	uint64_t NextInSequenceMask;
				120
				121	// Some instructions can only be issued on very specific pipeline resources.
				122	// For those instructions, we know exactly which resource would be consumed
				123	// without having to dynamically select it using field 'NextInSequenceMask'.
				124	//
				125	// The resource mask bit associated to the (statically) selected
				126	// processor resource is still cleared from the 'NextInSequenceMask'.
				127	// If that bit was already zero in NextInSequenceMask, then we update
				128	// mask 'RemovedFromNextInSequence'.
				129	//
				130	// When NextInSequenceMask is reset back to its initial value, the algorithm
				131	// removes any bits which are set in RemoveFromNextInSequence.
				132	uint64_t RemovedFromNextInSequence;
				133
				134	// A mask of ready units.
				135	uint64_t ReadyMask;
				136
				137	// Buffered resources will have this field set to a positive number bigger
				138	// than 0. A buffered resource behaves like a separate reservation station
				139	// implementing its own buffer for out-of-order execution.
				140	// A buffer of 1 is for units that force in-order execution.
				141	// A value of 0 is treated specially. In particular, a resource with
				142	// A BufferSize = 0 is for an in-order issue/dispatch resource.
				143	// That means, this resource is reserved starting from the dispatch event,
				144	// until all the "resource cycles" are consumed after the issue event.
				145	// While this resource is reserved, no other instruction may be dispatched.
				146	int BufferSize;
				147
				148	// Available slots in the buffer (zero, if this is not a buffered resource).
				149	unsigned AvailableSlots;
				150
				151	// Maximum number of buffer slots seen used during one cycle.
				152	// This helps tracking dynamic dispatch stalls caused by the lack of
				153	// entries in the scheduler's queue.
				154	unsigned MaxUsedSlots;
				155
				156	// True if this is resource is currently unavailable.
				157	// An instruction may "reserve" a resource for a number of cycles.
				158	// During those cycles, the reserved resource cannot be used for other
				159	// instructions, even if the ReadyMask is set.
				160	bool Unavailable;
				161
				162	bool isSubResourceReady(uint64_t ID) const { return ReadyMask & ID; }
				163
				164	/// Returns the mask identifier of the next available resource in the set.
				165	uint64_t getNextInSequence() const {
				166	assert(NextInSequenceMask);
				167	return llvm::PowerOf2Floor(NextInSequenceMask);
				168	}
				169
				170	/// Returns the mask of the next available resource within the set,
				171	/// and updates the resource selector.
				172	void updateNextInSequence() {
				173	NextInSequenceMask ^= getNextInSequence();
				174	if (!NextInSequenceMask)
				175	NextInSequenceMask = ResourceSizeMask;
				176	}
				177
				178	uint64_t computeResourceSizeMaskForGroup(uint64_t ResourceMask) {
				179	assert(llvm::countPopulation(ResourceMask) > 1);
				180	return ResourceMask ^ llvm::PowerOf2Floor(ResourceMask);
				181	}
				182
				183	public:
				184	ResourceState(const llvm::MCProcResourceDesc &Desc, uint64_t Mask)
				185	: ResourceMask(Mask) {
				186	bool IsAGroup = llvm::countPopulation(ResourceMask) > 1;
				187	ResourceSizeMask = IsAGroup ? computeResourceSizeMaskForGroup(ResourceMask)
				188	: ((1ULL << Desc.NumUnits) - 1);
				189	NextInSequenceMask = ResourceSizeMask;
				190	RemovedFromNextInSequence = 0;
				191	ReadyMask = ResourceSizeMask;
				192	BufferSize = Desc.BufferSize;
				193	AvailableSlots = BufferSize == -1 ? 0U : static_cast<unsigned>(BufferSize);
				194	MaxUsedSlots = 0;
				195	Unavailable = false;
				196	}
				197
				198	uint64_t getResourceMask() const { return ResourceMask; }
				199	int getBufferSize() const { return BufferSize; }
				200	unsigned getMaxUsedSlots() const { return MaxUsedSlots; }
				201
				202	bool isBuffered() const { return BufferSize > 0; }
				203	bool isInOrder() const { return BufferSize == 1; }
				204	bool isADispatchHazard() const { return BufferSize == 0; }
				205	bool isReserved() const { return Unavailable; }
				206
				207	void setReserved() { Unavailable = true; }
				208	void clearReserved() { Unavailable = false; }
				209
				210	// A resource is ready if it is not reserved, and if there are enough
				211	// available units.
				212	// If a resource is also a dispatch hazard, then we don't check if
				213	// it is reserved because that check would always return true.
				214	// A resource marked as "dispatch hazard" is always reserved at
				215	// dispatch time. When this method is called, the assumption is that
				216	// the user of this resource has been already dispatched.
				217	bool isReady(unsigned NumUnits = 1) const {
				218	return (!isReserved() \|\| isADispatchHazard()) &&
				219	llvm::countPopulation(ReadyMask) >= NumUnits;
				220	}
				221	bool isAResourceGroup() const {
				222	return llvm::countPopulation(ResourceMask) > 1;
				223	}
				224
				225	bool containsResource(uint64_t ID) const { return ResourceMask & ID; }
				226
				227	void markSubResourceAsUsed(uint64_t ID) {
				228	assert(isSubResourceReady(ID));
				229	ReadyMask ^= ID;
				230	}
				231
				232	void releaseSubResource(uint64_t ID) {
				233	assert(!isSubResourceReady(ID));
				234	ReadyMask ^= ID;
				235	}
				236
				237	unsigned getNumUnits() const {
				238	return isAResourceGroup() ? 1U : llvm::countPopulation(ResourceSizeMask);
				239	}
				240
				241	uint64_t selectNextInSequence();
				242	void removeFromNextInSequence(uint64_t ID);
				243
				244	ResourceStateEvent isBufferAvailable() const {
				245	if (isADispatchHazard() && isReserved())
				246	return RS_RESERVED;
				247	if (!isBuffered() \|\| AvailableSlots)
				248	return RS_BUFFER_AVAILABLE;
				249	return RS_BUFFER_UNAVAILABLE;
				250	}
				251
				252	void reserveBuffer() {
				253	if (AvailableSlots)
				254	AvailableSlots--;
				255	unsigned UsedSlots = static_cast<unsigned>(BufferSize) - AvailableSlots;
				256	MaxUsedSlots = std::max(MaxUsedSlots, UsedSlots);
				257	}
				258
				259	void releaseBuffer() {
				260	if (BufferSize > 0)
				261	AvailableSlots++;
				262	assert(AvailableSlots <= static_cast<unsigned>(BufferSize));
				263	}
				264
				265	#ifndef NDEBUG
				266	void dump() const;
				267	#endif
				268	};
				269
				270	/// \brief A resource unit identifier.
				271	///
				272	/// This is used to identify a specific processor resource unit using a pair
				273	/// of indices where the 'first' index is a processor resource mask, and the
				274	/// 'second' index is an index for a "sub-resource" (i.e. unit).
				275	typedef std::pair<uint64_t, uint64_t> ResourceRef;
				276
				277	// First: a resource mask identifying a buffered resource.
				278	// Second: max number of buffer entries used in this resource.
				279	typedef std::pair<uint64_t, unsigned> BufferUsageEntry;
				280
				281	/// A resource manager for processor resource units and groups.
				282	///
				283	/// This class owns all the ResourceState objects, and it is responsible for
				284	/// acting on requests from a Scheduler by updating the internal state of
				285	/// ResourceState objects.
				286	/// This class doesn't know about instruction itineraries and functional units.
				287	/// In future, it can be extended to support itineraries too through the same
				288	/// public interface.
				289	class ResourceManager {
				290	// The resource manager owns all the ResourceState.
				291	using UniqueResourceState = std::unique_ptr<ResourceState>;
				292	llvm::SmallDenseMap<uint64_t, UniqueResourceState> Resources;
				293
				294	// Keeps track of which resources are busy, and how many cycles are left
				295	// before those become usable again.
				296	llvm::SmallDenseMap<ResourceRef, unsigned> BusyResources;
				297
				298	// A table to map processor resource IDs to processor resource masks.
				299	llvm::SmallVector<uint64_t, 8> ProcResID2Mask;
				300
				301	// Adds a new resource state in Resources, as well as a new descriptor in
				302	// ResourceDescriptor.
				303	void addResource(const llvm::MCProcResourceDesc &Desc, uint64_t Mask);
				304
				305	// Compute processor resource masks for each processor resource declared by
				306	// the scheduling model.
				307	void computeProcResourceMasks(const llvm::MCSchedModel &SM);
				308
				309	// Populate resource descriptors.
				310	void initialize(const llvm::MCSchedModel &SM) {
				311	computeProcResourceMasks(SM);
				312	for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I)
				313	addResource(*SM.getProcResource(I), ProcResID2Mask[I]);
				314	}
				315
				316	// Returns the actual resource unit that will be used.
				317	ResourceRef selectPipe(uint64_t ResourceID);
				318
				319	void use(ResourceRef RR);
				320	void release(ResourceRef RR);
				321
				322	unsigned getNumUnits(uint64_t ResourceID) const {
				323	assert(Resources.find(ResourceID) != Resources.end());
				324	return Resources.find(ResourceID)->getSecond()->getNumUnits();
				325	}
				326
				327	// Reserve a specific Resource kind.
				328	void reserveBuffer(uint64_t ResourceID) {
				329	assert(isBufferAvailable(ResourceID) ==
				330	ResourceStateEvent::RS_BUFFER_AVAILABLE);
				331	ResourceState &Resource = *Resources[ResourceID];
				332	Resource.reserveBuffer();
				333	}
				334
				335	void releaseBuffer(uint64_t ResourceID) {
				336	Resources[ResourceID]->releaseBuffer();
				337	}
				338
				339	ResourceStateEvent isBufferAvailable(uint64_t ResourceID) const {
				340	const ResourceState &Resource = *Resources.find(ResourceID)->second;
				341	return Resource.isBufferAvailable();
				342	}
				343
				344	bool isReady(uint64_t ResourceID, unsigned NumUnits) const {
				345	const ResourceState &Resource = *Resources.find(ResourceID)->second;
				346	return Resource.isReady(NumUnits);
				347	}
				348
				349	public:
				350	ResourceManager(const llvm::MCSchedModel &SM) { initialize(SM); }
				351
				352	ResourceStateEvent canBeDispatched(const InstrDesc &Desc) const {
				353	ResourceStateEvent Result = ResourceStateEvent::RS_BUFFER_AVAILABLE;
				354	for (uint64_t Buffer : Desc.Buffers) {
				355	Result = isBufferAvailable(Buffer);
				356	if (Result != ResourceStateEvent::RS_BUFFER_AVAILABLE)
				357	break;
				358	}
				359
				360	return Result;
				361	}
				362
				363	void reserveBuffers(const InstrDesc &Desc) {
				364	for (const uint64_t R : Desc.Buffers)
				365	reserveBuffer(R);
				366	}
				367
				368	void releaseBuffers(const InstrDesc &Desc) {
				369	for (const uint64_t R : Desc.Buffers)
				370	releaseBuffer(R);
				371	}
				372
				373	void reserveResource(uint64_t ResourceID) {
				374	ResourceState &Resource = *Resources[ResourceID];
				375	assert(!Resource.isReserved());
				376	Resource.setReserved();
				377	}
				378
				379	void releaseResource(uint64_t ResourceID) {
				380	ResourceState &Resource = *Resources[ResourceID];
				381	Resource.clearReserved();
				382	}
				383
				384	void reserveDispatchHazardResources(const InstrDesc &Desc);
				385
				386	// Returns true if all resources are in-order, and there is at least one
				387	// resource which is a dispatch hazard (BufferSize = 0).
				388	bool mustIssueImmediately(const InstrDesc &Desc);
				389
				390	bool canBeIssued(const InstrDesc &Desc) const;
				391	double getRThroughput(const InstrDesc &Desc) const;
				392
				393	void issueInstruction(
				394	unsigned Index, const InstrDesc &Desc,
				395	llvm::SmallVectorImpl<std::pair<ResourceRef, unsigned>> &Pipes);
				396
				397	void cycleEvent(llvm::SmallVectorImpl<ResourceRef> &ResourcesFreed);
				398
				399	void getBuffersUsage(std::vector<BufferUsageEntry> &Usage) const {
				400	for (const std::pair<uint64_t, UniqueResourceState> &Resource : Resources) {
				401	const ResourceState &RS = *Resource.second;
				402	if (RS.isBuffered())
				403	Usage.emplace_back(std::pair<uint64_t, unsigned>(RS.getResourceMask(),
				404	RS.getMaxUsedSlots()));
				405	}
				406	}
				407
				408	const llvm::ArrayRef<uint64_t> getProcResMasks() const {
				409	return ProcResID2Mask;
				410	}
				411
				412	#ifndef NDEBUG
				413	void dump() const {
				414	for (const std::pair<uint64_t, UniqueResourceState> &Resource : Resources)
				415	Resource.second->dump();
				416	}
				417	#endif
				418	}; // namespace mca
				419
				420	/// Class Scheduler is responsible for issuing instructions to pipeline
				421	/// resources. Internally, it delegates to a ResourceManager the management of
				422	/// processor resources.
				423	/// This class is also responsible for tracking the progress of instructions
				424	/// from the dispatch stage, until the write-back stage.
				425	///
				426	/// An nstruction dispatched to the Scheduler is initially placed into either
				427	/// the 'WaitQueue' or the 'ReadyQueue' depending on the availability of the
				428	/// input operands. Instructions in the WaitQueue are ordered by instruction
				429	/// index. An instruction is moved from the WaitQueue to the ReadyQueue when
				430	/// register operands become available, and all memory dependencies are met.
				431	/// Instructions that are moved from the WaitQueue to the ReadyQueue transition
				432	/// from state 'IS_AVAILABLE' to state 'IS_READY'.
				433	///
				434	/// At the beginning of each cycle, the Scheduler checks if there are
				435	/// instructions in the WaitQueue that can be moved to the ReadyQueue. If the
				436	/// ReadyQueue is not empty, then older instructions from the queue are issued
				437	/// to the processor pipelines, and the underlying ResourceManager is updated
				438	/// accordingly. The ReadyQueue is ordered by instruction index to guarantee
				439	/// that the first instructions in the set are also the oldest.
				440	///
				441	/// An Instruction is moved from the ReadyQueue the `IssuedQueue` when it is
				442	/// issued to a (one or more) pipeline(s). This event also causes an instruction
				443	/// state transition (i.e. from state IS_READY, to state IS_EXECUTING).
				444	/// An Instruction leaves the IssuedQueue when it reaches the write-back stage.
				445	class Scheduler {
				446	const llvm::MCSchedModel &SM;
				447
				448	// Hardware resources that are managed by this scheduler.
				449	std::unique_ptr<ResourceManager> Resources;
				450	std::unique_ptr<LSUnit> LSU;
				451
				452	// The Backend gets notified when instructions are ready/issued/executed.
				453	Backend *Owner;
				454
				455	using QueueEntryTy = std::pair<unsigned, Instruction *>;
				456	std::map<unsigned, Instruction *> WaitQueue;
				457	std::map<unsigned, Instruction *> ReadyQueue;
				458	std::map<unsigned, Instruction *> IssuedQueue;
				459
				460	void notifyInstructionIssued(
				461	unsigned Index,
				462	const llvm::ArrayRef<std::pair<ResourceRef, unsigned>> &Used);
				463	void notifyInstructionExecuted(unsigned Index);
				464	void notifyInstructionReady(unsigned Index);
				465	void notifyResourceAvailable(const ResourceRef &RR);
				466
				467	/// Issue instructions from the ready queue by giving priority to older
				468	/// instructions.
				469	void issue();
				470
				471	/// Issue an instruction without updating the ready queue.
				472	void issueInstruction(Instruction *IS, unsigned InstrIndex);
				473	void updatePendingQueue();
				474	void updateIssuedQueue();
				475
				476	public:
				477	Scheduler(Backend *B, const llvm::MCSchedModel &Model, unsigned LoadQueueSize,
				478	unsigned StoreQueueSize, bool AssumeNoAlias)
				479	: SM(Model), Resources(llvm::make_unique<ResourceManager>(SM)),
				480	LSU(llvm::make_unique<LSUnit>(LoadQueueSize, StoreQueueSize,
				481	AssumeNoAlias)),
				482	Owner(B) {}
				483
				484	/// Scheduling events.
				485	///
				486	/// The DispatchUnit is responsible for querying the Scheduler before
				487	/// dispatching new instructions. Queries are performed through method
				488	/// `Scheduler::CanBeDispatched`, which returns an instance of this enum to
				489	/// tell if the dispatch would fail or not. If scheduling resources are
				490	/// available, and the instruction can be dispatched, then the query returns
				491	/// HWS_AVAILABLE. A values different than HWS_AVAILABLE means that the
				492	/// instruction cannot be dispatched during this cycle.
				493	///
				494	/// Each event name starts with prefix "HWS_", and it is followed by
				495	/// a substring which describes the reason why the Scheduler was unavailable
				496	/// (or "AVAILABLE" if the instruction is allowed to be dispatched).
				497	///
				498	/// HWS_QUEUE_UNAVAILABLE is returned if there are not enough available slots
				499	/// in the scheduler's queue. That means, one (or more) buffered resources
				500	/// consumed by the instruction were full.
				501	///
				502	/// HWS_LD_QUEUE_UNAVAILABLE is returned when an instruction 'mayLoad', and
				503	/// the load queue in the load/store unit (implemented by class LSUnit) is
				504	/// full. Similarly, HWS_ST_QUEUE_UNAVAILABLE is returned when the store
				505	/// queue is full, and the instruction to be dispatched 'mayStore'.
				506	///
				507	/// HWS_DISPATCH_GROUP_RESTRICTION is only returned in special cases where the
				508	/// instruction consumes an in-order issue/dispatch resource (i.e. a resource
				509	/// with `BufferSize=0`), and the pipeline resource is not immediately
				510	/// available.
				511	enum Event {
				512	HWS_AVAILABLE,
				513	HWS_QUEUE_UNAVAILABLE,
				514	HWS_DISPATCH_GROUP_RESTRICTION,
				515	HWS_LD_QUEUE_UNAVAILABLE,
				516	HWS_ST_QUEUE_UNAVAILABLE
				517	};
				518
				519	Event canBeDispatched(const InstrDesc &Desc) const;
				520	Instruction scheduleInstruction(unsigned Idx, Instruction MCIS);
				521
				522	double getRThroughput(const InstrDesc &Desc) const {
				523	return Resources->getRThroughput(Desc);
				524	}
				525
				526	void cycleEvent(unsigned Cycle);
				527
				528	void getBuffersUsage(std::vector<BufferUsageEntry> &Usage) const {
				529	Resources->getBuffersUsage(Usage);
				530	}
				531
				532	const llvm::ArrayRef<uint64_t> getProcResourceMasks() const {
				533	return Resources->getProcResMasks();
				534	}
				535
				536	#ifndef NDEBUG
				537	void dump() const;
				538	#endif
				539	};
				540
				541	} // Namespace mca
				542
				543	#endif