blob: e810ddaf77b3fa12450b7ed8acc2e619c9ced73e [file] [log] [blame]
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +00001//===----------------------- Dispatch.h -------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9/// \file
10///
11/// This file implements classes that are used to model register files,
12/// reorder buffers and the hardware dispatch logic.
13///
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_TOOLS_LLVM_MCA_DISPATCH_H
17#define LLVM_TOOLS_LLVM_MCA_DISPATCH_H
18
19#include "Instruction.h"
20#include "llvm/MC/MCRegisterInfo.h"
21#include <map>
22
23namespace mca {
24
25class WriteState;
26class DispatchUnit;
27class Scheduler;
28class Backend;
29
30/// \brief Keeps track of register definitions.
31///
32/// This class tracks register definitions, and performs register renaming
33/// to break anti dependencies.
34/// By default, there is no limit in the number of register aliases which
35/// can be created for the purpose of register renaming. However, users can
36/// specify at object construction time a limit in the number of temporary
37/// registers which can be used by the register renaming logic.
38class RegisterFile {
39 const llvm::MCRegisterInfo &MRI;
40 // Currently used mappings and maximum used mappings.
41 // These are to generate statistics only.
42 unsigned NumUsedMappings;
43 unsigned MaxUsedMappings;
44 // Total number of mappings created over time.
45 unsigned TotalMappingsCreated;
46
47 // The maximum number of register aliases which can be used by the
48 // register renamer. Defaut value for this field is zero.
49 // A value of zero for this field means that there is no limit in the
50 // amount of register mappings which can be created. That is equivalent
51 // to having a theoretically infinite number of temporary registers.
52 unsigned TotalMappings;
53
54 // This map contains an entry for every physical register.
55 // A register index is used as a key value to access a WriteState.
56 // This is how we track RAW dependencies for dispatched
57 // instructions. For every register, we track the last seen write only.
58 // This assumes that all writes fully update both super and sub registers.
59 // We need a flag in MCInstrDesc to check if a write also updates super
60 // registers. We can then have a extra tablegen flag to set for instructions.
61 // This is a separate patch on its own.
62 std::vector<WriteState *> RegisterMappings;
63 // Assumptions are:
64 // a) a false dependencies is always removed by the register renamer.
65 // b) the register renamer can create an "infinite" number of mappings.
66 // Since we track the number of mappings created, in future we may
67 // introduce constraints on the number of mappings that can be created.
68 // For example, the maximum number of registers that are available for
69 // register renaming purposes may default to the size of the register file.
70
71 // In future, we can extend this design to allow multiple register files, and
72 // apply different restrictions on the register mappings and the number of
73 // temporary registers used by mappings.
74
75public:
76 RegisterFile(const llvm::MCRegisterInfo &mri, unsigned Mappings = 0)
77 : MRI(mri), NumUsedMappings(0), MaxUsedMappings(0),
78 TotalMappingsCreated(0), TotalMappings(Mappings),
79 RegisterMappings(MRI.getNumRegs(), nullptr) {}
80
81 // Creates a new register mapping for RegID.
82 // This reserves a temporary register in the register file.
83 void addRegisterMapping(WriteState &WS);
84
85 // Invalidates register mappings associated to the input WriteState object.
86 // This releases temporary registers in the register file.
87 void invalidateRegisterMapping(const WriteState &WS);
88
89 bool isAvailable(unsigned NumRegWrites);
90 void collectWrites(llvm::SmallVectorImpl<WriteState *> &Writes,
91 unsigned RegID) const;
92 void updateOnRead(ReadState &RS, unsigned RegID);
93 unsigned getMaxUsedRegisterMappings() const { return MaxUsedMappings; }
94 unsigned getTotalRegisterMappingsCreated() const {
95 return TotalMappingsCreated;
96 }
97
98#ifndef NDEBUG
99 void dump() const;
100#endif
101};
102
103/// \brief tracks which instructions are in-flight (i.e. dispatched but not
104/// retired) in the OoO backend.
105///
106/// This class checks on every cycle if/which instructions can be retired.
107/// Instructions are retired in program order.
108/// In the event of instruction retired, the DispatchUnit object that owns
109/// this RetireControlUnit gets notified.
110/// On instruction retired, register updates are all architecturally
111/// committed, and any temporary registers originally allocated for the
112/// retired instruction are freed.
113struct RetireControlUnit {
114 // A "token" (object of class RUToken) is created by the retire unit for every
115 // instruction dispatched to the schedulers. Flag 'Executed' is used to
116 // quickly check if an instruction has reached the write-back stage. A token
117 // also carries information related to the number of entries consumed by the
118 // instruction in the reorder buffer. The idea is that those entries will
119 // become available again once the instruction is retired. On every cycle,
120 // the RCU (Retire Control Unit) scans every token starting to search for
121 // instructions that are ready to retire. retired. Instructions are retired
122 // in program order. Only 'Executed' instructions are eligible for retire.
123 // Note that the size of the reorder buffer is defined by the scheduling model
124 // via field 'NumMicroOpBufferSize'.
125 struct RUToken {
126 unsigned Index; // Instruction index.
127 unsigned NumSlots; // Slots reserved to this instruction.
128 bool Executed; // True if the instruction is past the WB stage.
129 };
130
131private:
132 unsigned NextAvailableSlotIdx;
133 unsigned CurrentInstructionSlotIdx;
134 unsigned AvailableSlots;
135 unsigned MaxRetirePerCycle; // 0 means no limit.
136 std::vector<RUToken> Queue;
137 DispatchUnit *Owner;
138
139public:
140 RetireControlUnit(unsigned NumSlots, unsigned RPC, DispatchUnit *DU)
141 : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0),
142 AvailableSlots(NumSlots), MaxRetirePerCycle(RPC), Owner(DU) {
143 assert(NumSlots && "Expected at least one slot!");
144 Queue.resize(NumSlots);
145 }
146
147 bool isFull() const { return !AvailableSlots; }
148 bool isEmpty() const { return AvailableSlots == Queue.size(); }
149 bool isAvailable(unsigned Quantity = 1) const {
150 // Some instructions may declare a number of uOps which exceedes the size
151 // of the reorder buffer. To avoid problems, cap the amount of slots to
152 // the size of the reorder buffer.
153 Quantity = std::min(Quantity, static_cast<unsigned>(Queue.size()));
154 return AvailableSlots >= Quantity;
155 }
156
157 // Reserves a number of slots, and returns a new token.
158 unsigned reserveSlot(unsigned Index, unsigned NumMicroOps);
159
160 /// Retires instructions in program order.
161 void cycleEvent();
162
163 void onInstructionExecuted(unsigned TokenID);
164
165#ifndef NDEBUG
166 void dump() const;
167#endif
168};
169
170// \brief Implements the hardware dispatch logic.
171//
172// This class is responsible for the dispatch stage, in which instructions are
173// dispatched in groups to the Scheduler. An instruction can be dispatched if
174// functional units are available.
175// To be more specific, an instruction can be dispatched to the Scheduler if:
176// 1) There are enough entries in the reorder buffer (implemented by class
177// RetireControlUnit) to accomodate all opcodes.
178// 2) There are enough temporaries to rename output register operands.
179// 3) There are enough entries available in the used buffered resource(s).
180//
181// The number of micro opcodes that can be dispatched in one cycle is limited by
182// the value of field 'DispatchWidth'. A "dynamic dispatch stall" occurs when
183// processor resources are not available (i.e. at least one of the
184// abovementioned checks fails). Dispatch stall events are counted during the
185// entire execution of the code, and displayed by the performance report when
186// flag '-verbose' is specified.
187//
188// If the number of micro opcodes of an instruction is bigger than
189// DispatchWidth, then it can only be dispatched at the beginning of one cycle.
190// The DispatchUnit will still have to wait for a number of cycles (depending on
191// the DispatchWidth and the number of micro opcodes) before it can serve other
192// instructions.
193class DispatchUnit {
194 unsigned DispatchWidth;
195 unsigned AvailableEntries;
196 unsigned CarryOver;
197 Scheduler *SC;
198
199 std::unique_ptr<RegisterFile> RAT;
200 std::unique_ptr<RetireControlUnit> RCU;
201 Backend *Owner;
202
203 /// Dispatch stall event identifiers.
204 ///
205 /// The naming convention is:
206 /// * Event names starts with the "DS_" prefix
207 /// * For dynamic dispatch stalls, the "DS_" prefix is followed by the
208 /// the unavailable resource/functional unit acronym (example: RAT)
209 /// * The last substring is the event reason (example: REG_UNAVAILABLE means
210 /// that register renaming couldn't find enough spare registers in the
211 /// register file).
212 ///
213 /// List of acronyms used for processor resoures:
214 /// RAT - Register Alias Table (used by the register renaming logic)
215 /// RCU - Retire Control Unit
216 /// SQ - Scheduler's Queue
217 /// LDQ - Load Queue
218 /// STQ - Store Queue
219 enum {
220 DS_RAT_REG_UNAVAILABLE,
221 DS_RCU_TOKEN_UNAVAILABLE,
222 DS_SQ_TOKEN_UNAVAILABLE,
223 DS_LDQ_TOKEN_UNAVAILABLE,
224 DS_STQ_TOKEN_UNAVAILABLE,
225 DS_DISPATCH_GROUP_RESTRICTION,
226 DS_LAST
227 };
228
229 // The DispatchUnit track dispatch stall events caused by unavailable
230 // of hardware resources. Events are classified based on the stall kind;
231 // so we have a counter for every source of dispatch stall. Counters are
232 // stored into a vector `DispatchStall` which is always of size DS_LAST.
233 std::vector<unsigned> DispatchStalls;
234
235 bool checkRAT(const InstrDesc &Desc);
236 bool checkRCU(const InstrDesc &Desc);
237 bool checkScheduler(const InstrDesc &Desc);
238
239 void notifyInstructionDispatched(unsigned IID);
240
241public:
242 DispatchUnit(Backend *B, const llvm::MCRegisterInfo &MRI,
243 unsigned MicroOpBufferSize, unsigned RegisterFileSize,
244 unsigned MaxRetirePerCycle, unsigned MaxDispatchWidth,
245 Scheduler *Sched)
246 : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth),
247 CarryOver(0U), SC(Sched),
248 RAT(llvm::make_unique<RegisterFile>(MRI, RegisterFileSize)),
249 RCU(llvm::make_unique<RetireControlUnit>(MicroOpBufferSize,
250 MaxRetirePerCycle, this)),
251 Owner(B), DispatchStalls(DS_LAST, 0) {}
252
253 unsigned getDispatchWidth() const { return DispatchWidth; }
254
255 bool isAvailable(unsigned NumEntries) const {
256 return NumEntries <= AvailableEntries || AvailableEntries == DispatchWidth;
257 }
258
259 bool isRCUEmpty() const { return RCU->isEmpty(); }
260
261 bool canDispatch(const InstrDesc &Desc) {
262 assert(isAvailable(Desc.NumMicroOps));
263 return checkRCU(Desc) && checkRAT(Desc) && checkScheduler(Desc);
264 }
265
266 unsigned dispatch(unsigned IID, Instruction *NewInst);
267
268 void collectWrites(llvm::SmallVectorImpl<WriteState *> &Vec,
269 unsigned RegID) const {
270 return RAT->collectWrites(Vec, RegID);
271 }
272 unsigned getNumRATStalls() const {
273 return DispatchStalls[DS_RAT_REG_UNAVAILABLE];
274 }
275 unsigned getNumRCUStalls() const {
276 return DispatchStalls[DS_RCU_TOKEN_UNAVAILABLE];
277 }
278 unsigned getNumSQStalls() const {
279 return DispatchStalls[DS_SQ_TOKEN_UNAVAILABLE];
280 }
281 unsigned getNumLDQStalls() const {
282 return DispatchStalls[DS_LDQ_TOKEN_UNAVAILABLE];
283 }
284 unsigned getNumSTQStalls() const {
285 return DispatchStalls[DS_STQ_TOKEN_UNAVAILABLE];
286 }
287 unsigned getNumDispatchGroupStalls() const {
288 return DispatchStalls[DS_DISPATCH_GROUP_RESTRICTION];
289 }
290 unsigned getMaxUsedRegisterMappings() const {
291 return RAT->getMaxUsedRegisterMappings();
292 }
293 unsigned getTotalRegisterMappingsCreated() const {
294 return RAT->getTotalRegisterMappingsCreated();
295 }
296 void addNewRegisterMapping(WriteState &WS) { RAT->addRegisterMapping(WS); }
297
298 void cycleEvent(unsigned Cycle) {
299 RCU->cycleEvent();
300 AvailableEntries =
301 CarryOver >= DispatchWidth ? 0 : DispatchWidth - CarryOver;
302 CarryOver = CarryOver >= DispatchWidth ? CarryOver - DispatchWidth : 0U;
303 }
304
305 void notifyInstructionRetired(unsigned Index);
306
307 void onInstructionExecuted(unsigned TokenID) {
308 RCU->onInstructionExecuted(TokenID);
309 }
310
311 void invalidateRegisterMappings(const Instruction &Inst);
312#ifndef NDEBUG
313 void dump() const;
314#endif
315};
316
317} // namespace mca
318
319#endif