blob: 2b58437404cac6255f19f631c078f773ab8d1138 [file] [log] [blame]
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +00001//===--------------------- Scheduler.h ------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9/// \file
10///
11/// A scheduler for Processor Resource Units and Processor Resource Groups.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_TOOLS_LLVM_MCA_SCHEDULER_H
16#define LLVM_TOOLS_LLVM_MCA_SCHEDULER_H
17
18#include "Instruction.h"
19#include "LSUnit.h"
20#include "llvm/ADT/DenseMap.h"
21#include "llvm/MC/MCSubtargetInfo.h"
22#include <map>
23
24namespace mca {
25
26class Backend;
27/// Used to notify the internal state of a processor resource.
28///
29/// A processor resource is available if it is not reserved, and there are
30/// available slots in the buffer. A processor resource is unavailable if it
31/// is either reserved, or the associated buffer is full. A processor resource
32/// with a buffer size of -1 is always available if it is not reserved.
33///
34/// Values of type ResourceStateEvent are returned by method
35/// ResourceState::isBufferAvailable(), which is used to query the internal
36/// state of a resource.
37///
38/// The naming convention for resource state events is:
39/// * Event names start with prefix RS_
40/// * Prefix RS_ is followed by a string describing the actual resource state.
41enum ResourceStateEvent {
42 RS_BUFFER_AVAILABLE,
43 RS_BUFFER_UNAVAILABLE,
44 RS_RESERVED
45};
46
47/// \brief A descriptor for processor resources.
48///
49/// Each object of class ResourceState is associated to a specific processor
50/// resource. There is an instance of this class for every processor resource
51/// defined by the scheduling model.
52/// A ResourceState dynamically tracks the availability of units of a processor
53/// resource. For example, the ResourceState of a ProcResGroup tracks the
54/// availability of resource units which are part of the group.
55///
56/// Internally, ResourceState uses a round-robin selector to identify
57/// which unit of the group shall be used next.
58class ResourceState {
59 // A resource unique identifier generated by the tool.
60 // For processor resource groups, the number of number of bits set in this
61 // mask is equivalent to the cardinality of the group plus one.
62 // Excluding the most significant bit, the remaining bits in the resource mask
63 // identify resources that are part of the group.
64 //
65 // Example, lets assume that this ResourceState describes a
66 // group containing ResourceA and ResourceB:
67 //
68 // ResourceA -- Mask: 0b001
69 // ResourceB -- Mask: 0b010
70 // ResourceAB -- Mask: 0b100 U (ResourceA::Mask | ResourceB::Mask) == 0b111
71 //
72 // There is only one bit set for non-group resources.
73 // A ResourceMask can be used to solve set membership problems with simple bit
74 // manipulation operations.
75 uint64_t ResourceMask;
76
77 // A ProcResource can specify a number of units. For the purpose of dynamic
78 // scheduling, a processor resource with more than one unit behaves like a
79 // group. This field has one bit set for every unit/resource that is part of
80 // the group.
81 // For groups, this field defaults to 'ResourceMask'. For non-group
82 // resources, the number of bits set in this mask is equivalent to the
83 // number of units (i.e. field 'NumUnits' in 'ProcResourceUnits').
84 uint64_t ResourceSizeMask;
85
86 // A simple round-robin selector for processor resources.
87 // Each bit of the mask identifies a sub resource within this group.
88 //
89 // As an example, lets assume that this ResourceState describes a
90 // processor resource group composed of the following three units:
91 // ResourceA -- 0b001
92 // ResourceB -- 0b010
93 // ResourceC -- 0b100
94 //
95 // Each unit is identified by a ResourceMask which always contains a
96 // single bit set. Field NextInSequenceMask is initially set to value
97 // 0xb111. That value is obtained by OR'ing the resource masks of
98 // processor resource that are part of the group.
99 //
100 // NextInSequenceMask -- 0b111
101 //
102 // Field NextInSequenceMask is used by the resource manager (i.e.
103 // an object of class ResourceManager) to select the "next available resource"
104 // from the set. The algorithm would prioritize resources with a bigger
105 // ResourceMask value.
106 //
107 // In this example, there are three resources in the set, and 'ResourceC'
108 // has the highest mask value. The round-robin selector would firstly select
109 // 'ResourceC', then 'ResourceB', and eventually 'ResourceA'.
110 //
111 // When a resource R is used, its corresponding bit is cleared from the set.
112 //
113 // Back to the example:
114 // If 'ResourceC' is selected, then the new value of NextInSequenceMask
115 // becomes 0xb011.
116 //
117 // When NextInSequenceMask becomes zero, it is reset to its original value
118 // (in this example, that value would be 0b111).
119 uint64_t NextInSequenceMask;
120
121 // Some instructions can only be issued on very specific pipeline resources.
122 // For those instructions, we know exactly which resource would be consumed
123 // without having to dynamically select it using field 'NextInSequenceMask'.
124 //
125 // The resource mask bit associated to the (statically) selected
126 // processor resource is still cleared from the 'NextInSequenceMask'.
127 // If that bit was already zero in NextInSequenceMask, then we update
128 // mask 'RemovedFromNextInSequence'.
129 //
130 // When NextInSequenceMask is reset back to its initial value, the algorithm
131 // removes any bits which are set in RemoveFromNextInSequence.
132 uint64_t RemovedFromNextInSequence;
133
134 // A mask of ready units.
135 uint64_t ReadyMask;
136
137 // Buffered resources will have this field set to a positive number bigger
138 // than 0. A buffered resource behaves like a separate reservation station
139 // implementing its own buffer for out-of-order execution.
140 // A buffer of 1 is for units that force in-order execution.
141 // A value of 0 is treated specially. In particular, a resource with
142 // A BufferSize = 0 is for an in-order issue/dispatch resource.
143 // That means, this resource is reserved starting from the dispatch event,
144 // until all the "resource cycles" are consumed after the issue event.
145 // While this resource is reserved, no other instruction may be dispatched.
146 int BufferSize;
147
148 // Available slots in the buffer (zero, if this is not a buffered resource).
149 unsigned AvailableSlots;
150
151 // Maximum number of buffer slots seen used during one cycle.
152 // This helps tracking dynamic dispatch stalls caused by the lack of
153 // entries in the scheduler's queue.
154 unsigned MaxUsedSlots;
155
156 // True if this is resource is currently unavailable.
157 // An instruction may "reserve" a resource for a number of cycles.
158 // During those cycles, the reserved resource cannot be used for other
159 // instructions, even if the ReadyMask is set.
160 bool Unavailable;
161
162 bool isSubResourceReady(uint64_t ID) const { return ReadyMask & ID; }
163
164 /// Returns the mask identifier of the next available resource in the set.
165 uint64_t getNextInSequence() const {
166 assert(NextInSequenceMask);
167 return llvm::PowerOf2Floor(NextInSequenceMask);
168 }
169
170 /// Returns the mask of the next available resource within the set,
171 /// and updates the resource selector.
172 void updateNextInSequence() {
173 NextInSequenceMask ^= getNextInSequence();
174 if (!NextInSequenceMask)
175 NextInSequenceMask = ResourceSizeMask;
176 }
177
178 uint64_t computeResourceSizeMaskForGroup(uint64_t ResourceMask) {
179 assert(llvm::countPopulation(ResourceMask) > 1);
180 return ResourceMask ^ llvm::PowerOf2Floor(ResourceMask);
181 }
182
183public:
184 ResourceState(const llvm::MCProcResourceDesc &Desc, uint64_t Mask)
185 : ResourceMask(Mask) {
186 bool IsAGroup = llvm::countPopulation(ResourceMask) > 1;
187 ResourceSizeMask = IsAGroup ? computeResourceSizeMaskForGroup(ResourceMask)
188 : ((1ULL << Desc.NumUnits) - 1);
189 NextInSequenceMask = ResourceSizeMask;
190 RemovedFromNextInSequence = 0;
191 ReadyMask = ResourceSizeMask;
192 BufferSize = Desc.BufferSize;
193 AvailableSlots = BufferSize == -1 ? 0U : static_cast<unsigned>(BufferSize);
194 MaxUsedSlots = 0;
195 Unavailable = false;
196 }
197
198 uint64_t getResourceMask() const { return ResourceMask; }
199 int getBufferSize() const { return BufferSize; }
200 unsigned getMaxUsedSlots() const { return MaxUsedSlots; }
201
202 bool isBuffered() const { return BufferSize > 0; }
203 bool isInOrder() const { return BufferSize == 1; }
204 bool isADispatchHazard() const { return BufferSize == 0; }
205 bool isReserved() const { return Unavailable; }
206
207 void setReserved() { Unavailable = true; }
208 void clearReserved() { Unavailable = false; }
209
210 // A resource is ready if it is not reserved, and if there are enough
211 // available units.
212 // If a resource is also a dispatch hazard, then we don't check if
213 // it is reserved because that check would always return true.
214 // A resource marked as "dispatch hazard" is always reserved at
215 // dispatch time. When this method is called, the assumption is that
216 // the user of this resource has been already dispatched.
217 bool isReady(unsigned NumUnits = 1) const {
218 return (!isReserved() || isADispatchHazard()) &&
219 llvm::countPopulation(ReadyMask) >= NumUnits;
220 }
221 bool isAResourceGroup() const {
222 return llvm::countPopulation(ResourceMask) > 1;
223 }
224
225 bool containsResource(uint64_t ID) const { return ResourceMask & ID; }
226
227 void markSubResourceAsUsed(uint64_t ID) {
228 assert(isSubResourceReady(ID));
229 ReadyMask ^= ID;
230 }
231
232 void releaseSubResource(uint64_t ID) {
233 assert(!isSubResourceReady(ID));
234 ReadyMask ^= ID;
235 }
236
237 unsigned getNumUnits() const {
238 return isAResourceGroup() ? 1U : llvm::countPopulation(ResourceSizeMask);
239 }
240
241 uint64_t selectNextInSequence();
242 void removeFromNextInSequence(uint64_t ID);
243
244 ResourceStateEvent isBufferAvailable() const {
245 if (isADispatchHazard() && isReserved())
246 return RS_RESERVED;
247 if (!isBuffered() || AvailableSlots)
248 return RS_BUFFER_AVAILABLE;
249 return RS_BUFFER_UNAVAILABLE;
250 }
251
252 void reserveBuffer() {
253 if (AvailableSlots)
254 AvailableSlots--;
255 unsigned UsedSlots = static_cast<unsigned>(BufferSize) - AvailableSlots;
256 MaxUsedSlots = std::max(MaxUsedSlots, UsedSlots);
257 }
258
259 void releaseBuffer() {
260 if (BufferSize > 0)
261 AvailableSlots++;
262 assert(AvailableSlots <= static_cast<unsigned>(BufferSize));
263 }
264
265#ifndef NDEBUG
266 void dump() const;
267#endif
268};
269
270/// \brief A resource unit identifier.
271///
272/// This is used to identify a specific processor resource unit using a pair
273/// of indices where the 'first' index is a processor resource mask, and the
274/// 'second' index is an index for a "sub-resource" (i.e. unit).
275typedef std::pair<uint64_t, uint64_t> ResourceRef;
276
277// First: a resource mask identifying a buffered resource.
278// Second: max number of buffer entries used in this resource.
279typedef std::pair<uint64_t, unsigned> BufferUsageEntry;
280
281/// A resource manager for processor resource units and groups.
282///
283/// This class owns all the ResourceState objects, and it is responsible for
284/// acting on requests from a Scheduler by updating the internal state of
285/// ResourceState objects.
286/// This class doesn't know about instruction itineraries and functional units.
287/// In future, it can be extended to support itineraries too through the same
288/// public interface.
289class ResourceManager {
290 // The resource manager owns all the ResourceState.
291 using UniqueResourceState = std::unique_ptr<ResourceState>;
292 llvm::SmallDenseMap<uint64_t, UniqueResourceState> Resources;
293
294 // Keeps track of which resources are busy, and how many cycles are left
295 // before those become usable again.
296 llvm::SmallDenseMap<ResourceRef, unsigned> BusyResources;
297
298 // A table to map processor resource IDs to processor resource masks.
299 llvm::SmallVector<uint64_t, 8> ProcResID2Mask;
300
301 // Adds a new resource state in Resources, as well as a new descriptor in
302 // ResourceDescriptor.
303 void addResource(const llvm::MCProcResourceDesc &Desc, uint64_t Mask);
304
305 // Compute processor resource masks for each processor resource declared by
306 // the scheduling model.
307 void computeProcResourceMasks(const llvm::MCSchedModel &SM);
308
309 // Populate resource descriptors.
310 void initialize(const llvm::MCSchedModel &SM) {
311 computeProcResourceMasks(SM);
312 for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I)
313 addResource(*SM.getProcResource(I), ProcResID2Mask[I]);
314 }
315
316 // Returns the actual resource unit that will be used.
317 ResourceRef selectPipe(uint64_t ResourceID);
318
319 void use(ResourceRef RR);
320 void release(ResourceRef RR);
321
322 unsigned getNumUnits(uint64_t ResourceID) const {
323 assert(Resources.find(ResourceID) != Resources.end());
324 return Resources.find(ResourceID)->getSecond()->getNumUnits();
325 }
326
327 // Reserve a specific Resource kind.
328 void reserveBuffer(uint64_t ResourceID) {
329 assert(isBufferAvailable(ResourceID) ==
330 ResourceStateEvent::RS_BUFFER_AVAILABLE);
331 ResourceState &Resource = *Resources[ResourceID];
332 Resource.reserveBuffer();
333 }
334
335 void releaseBuffer(uint64_t ResourceID) {
336 Resources[ResourceID]->releaseBuffer();
337 }
338
339 ResourceStateEvent isBufferAvailable(uint64_t ResourceID) const {
340 const ResourceState &Resource = *Resources.find(ResourceID)->second;
341 return Resource.isBufferAvailable();
342 }
343
344 bool isReady(uint64_t ResourceID, unsigned NumUnits) const {
345 const ResourceState &Resource = *Resources.find(ResourceID)->second;
346 return Resource.isReady(NumUnits);
347 }
348
349public:
350 ResourceManager(const llvm::MCSchedModel &SM) { initialize(SM); }
351
352 ResourceStateEvent canBeDispatched(const InstrDesc &Desc) const {
353 ResourceStateEvent Result = ResourceStateEvent::RS_BUFFER_AVAILABLE;
354 for (uint64_t Buffer : Desc.Buffers) {
355 Result = isBufferAvailable(Buffer);
356 if (Result != ResourceStateEvent::RS_BUFFER_AVAILABLE)
357 break;
358 }
359
360 return Result;
361 }
362
363 void reserveBuffers(const InstrDesc &Desc) {
364 for (const uint64_t R : Desc.Buffers)
365 reserveBuffer(R);
366 }
367
368 void releaseBuffers(const InstrDesc &Desc) {
369 for (const uint64_t R : Desc.Buffers)
370 releaseBuffer(R);
371 }
372
373 void reserveResource(uint64_t ResourceID) {
374 ResourceState &Resource = *Resources[ResourceID];
375 assert(!Resource.isReserved());
376 Resource.setReserved();
377 }
378
379 void releaseResource(uint64_t ResourceID) {
380 ResourceState &Resource = *Resources[ResourceID];
381 Resource.clearReserved();
382 }
383
384 void reserveDispatchHazardResources(const InstrDesc &Desc);
385
386 // Returns true if all resources are in-order, and there is at least one
387 // resource which is a dispatch hazard (BufferSize = 0).
388 bool mustIssueImmediately(const InstrDesc &Desc);
389
390 bool canBeIssued(const InstrDesc &Desc) const;
391 double getRThroughput(const InstrDesc &Desc) const;
392
393 void issueInstruction(
394 unsigned Index, const InstrDesc &Desc,
395 llvm::SmallVectorImpl<std::pair<ResourceRef, unsigned>> &Pipes);
396
397 void cycleEvent(llvm::SmallVectorImpl<ResourceRef> &ResourcesFreed);
398
399 void getBuffersUsage(std::vector<BufferUsageEntry> &Usage) const {
400 for (const std::pair<uint64_t, UniqueResourceState> &Resource : Resources) {
401 const ResourceState &RS = *Resource.second;
402 if (RS.isBuffered())
403 Usage.emplace_back(std::pair<uint64_t, unsigned>(RS.getResourceMask(),
404 RS.getMaxUsedSlots()));
405 }
406 }
407
408 const llvm::ArrayRef<uint64_t> getProcResMasks() const {
409 return ProcResID2Mask;
410 }
411
412#ifndef NDEBUG
413 void dump() const {
414 for (const std::pair<uint64_t, UniqueResourceState> &Resource : Resources)
415 Resource.second->dump();
416 }
417#endif
418}; // namespace mca
419
420/// Class Scheduler is responsible for issuing instructions to pipeline
421/// resources. Internally, it delegates to a ResourceManager the management of
422/// processor resources.
423/// This class is also responsible for tracking the progress of instructions
424/// from the dispatch stage, until the write-back stage.
425///
426/// An nstruction dispatched to the Scheduler is initially placed into either
427/// the 'WaitQueue' or the 'ReadyQueue' depending on the availability of the
428/// input operands. Instructions in the WaitQueue are ordered by instruction
429/// index. An instruction is moved from the WaitQueue to the ReadyQueue when
430/// register operands become available, and all memory dependencies are met.
431/// Instructions that are moved from the WaitQueue to the ReadyQueue transition
432/// from state 'IS_AVAILABLE' to state 'IS_READY'.
433///
434/// At the beginning of each cycle, the Scheduler checks if there are
435/// instructions in the WaitQueue that can be moved to the ReadyQueue. If the
436/// ReadyQueue is not empty, then older instructions from the queue are issued
437/// to the processor pipelines, and the underlying ResourceManager is updated
438/// accordingly. The ReadyQueue is ordered by instruction index to guarantee
439/// that the first instructions in the set are also the oldest.
440///
441/// An Instruction is moved from the ReadyQueue the `IssuedQueue` when it is
442/// issued to a (one or more) pipeline(s). This event also causes an instruction
443/// state transition (i.e. from state IS_READY, to state IS_EXECUTING).
444/// An Instruction leaves the IssuedQueue when it reaches the write-back stage.
445class Scheduler {
446 const llvm::MCSchedModel &SM;
447
448 // Hardware resources that are managed by this scheduler.
449 std::unique_ptr<ResourceManager> Resources;
450 std::unique_ptr<LSUnit> LSU;
451
452 // The Backend gets notified when instructions are ready/issued/executed.
453 Backend *Owner;
454
455 using QueueEntryTy = std::pair<unsigned, Instruction *>;
456 std::map<unsigned, Instruction *> WaitQueue;
457 std::map<unsigned, Instruction *> ReadyQueue;
458 std::map<unsigned, Instruction *> IssuedQueue;
459
460 void notifyInstructionIssued(
461 unsigned Index,
462 const llvm::ArrayRef<std::pair<ResourceRef, unsigned>> &Used);
463 void notifyInstructionExecuted(unsigned Index);
464 void notifyInstructionReady(unsigned Index);
465 void notifyResourceAvailable(const ResourceRef &RR);
466
467 /// Issue instructions from the ready queue by giving priority to older
468 /// instructions.
469 void issue();
470
471 /// Issue an instruction without updating the ready queue.
472 void issueInstruction(Instruction *IS, unsigned InstrIndex);
473 void updatePendingQueue();
474 void updateIssuedQueue();
475
476public:
477 Scheduler(Backend *B, const llvm::MCSchedModel &Model, unsigned LoadQueueSize,
478 unsigned StoreQueueSize, bool AssumeNoAlias)
479 : SM(Model), Resources(llvm::make_unique<ResourceManager>(SM)),
480 LSU(llvm::make_unique<LSUnit>(LoadQueueSize, StoreQueueSize,
481 AssumeNoAlias)),
482 Owner(B) {}
483
484 /// Scheduling events.
485 ///
486 /// The DispatchUnit is responsible for querying the Scheduler before
487 /// dispatching new instructions. Queries are performed through method
488 /// `Scheduler::CanBeDispatched`, which returns an instance of this enum to
489 /// tell if the dispatch would fail or not. If scheduling resources are
490 /// available, and the instruction can be dispatched, then the query returns
491 /// HWS_AVAILABLE. A values different than HWS_AVAILABLE means that the
492 /// instruction cannot be dispatched during this cycle.
493 ///
494 /// Each event name starts with prefix "HWS_", and it is followed by
495 /// a substring which describes the reason why the Scheduler was unavailable
496 /// (or "AVAILABLE" if the instruction is allowed to be dispatched).
497 ///
498 /// HWS_QUEUE_UNAVAILABLE is returned if there are not enough available slots
499 /// in the scheduler's queue. That means, one (or more) buffered resources
500 /// consumed by the instruction were full.
501 ///
502 /// HWS_LD_QUEUE_UNAVAILABLE is returned when an instruction 'mayLoad', and
503 /// the load queue in the load/store unit (implemented by class LSUnit) is
504 /// full. Similarly, HWS_ST_QUEUE_UNAVAILABLE is returned when the store
505 /// queue is full, and the instruction to be dispatched 'mayStore'.
506 ///
507 /// HWS_DISPATCH_GROUP_RESTRICTION is only returned in special cases where the
508 /// instruction consumes an in-order issue/dispatch resource (i.e. a resource
509 /// with `BufferSize=0`), and the pipeline resource is not immediately
510 /// available.
511 enum Event {
512 HWS_AVAILABLE,
513 HWS_QUEUE_UNAVAILABLE,
514 HWS_DISPATCH_GROUP_RESTRICTION,
515 HWS_LD_QUEUE_UNAVAILABLE,
516 HWS_ST_QUEUE_UNAVAILABLE
517 };
518
519 Event canBeDispatched(const InstrDesc &Desc) const;
520 Instruction *scheduleInstruction(unsigned Idx, Instruction *MCIS);
521
522 double getRThroughput(const InstrDesc &Desc) const {
523 return Resources->getRThroughput(Desc);
524 }
525
526 void cycleEvent(unsigned Cycle);
527
528 void getBuffersUsage(std::vector<BufferUsageEntry> &Usage) const {
529 Resources->getBuffersUsage(Usage);
530 }
531
532 const llvm::ArrayRef<uint64_t> getProcResourceMasks() const {
533 return Resources->getProcResMasks();
534 }
535
536#ifndef NDEBUG
537 void dump() const;
538#endif
539};
540
541} // Namespace mca
542
543#endif