blob: b342976db27aa63eea90d3f7a4235ec722b43d89 [file] [log] [blame]
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +00001//===----------------------- Dispatch.h -------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9/// \file
10///
11/// This file implements classes that are used to model register files,
12/// reorder buffers and the hardware dispatch logic.
13///
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_TOOLS_LLVM_MCA_DISPATCH_H
17#define LLVM_TOOLS_LLVM_MCA_DISPATCH_H
18
19#include "Instruction.h"
20#include "llvm/MC/MCRegisterInfo.h"
Andrea Di Biagio4732d43ca2018-03-14 14:57:23 +000021#include "llvm/MC/MCSubtargetInfo.h"
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +000022#include <map>
23
24namespace mca {
25
26class WriteState;
27class DispatchUnit;
28class Scheduler;
29class Backend;
30
31/// \brief Keeps track of register definitions.
32///
33/// This class tracks register definitions, and performs register renaming
34/// to break anti dependencies.
35/// By default, there is no limit in the number of register aliases which
36/// can be created for the purpose of register renaming. However, users can
37/// specify at object construction time a limit in the number of temporary
38/// registers which can be used by the register renaming logic.
39class RegisterFile {
40 const llvm::MCRegisterInfo &MRI;
41 // Currently used mappings and maximum used mappings.
42 // These are to generate statistics only.
43 unsigned NumUsedMappings;
44 unsigned MaxUsedMappings;
45 // Total number of mappings created over time.
46 unsigned TotalMappingsCreated;
47
48 // The maximum number of register aliases which can be used by the
49 // register renamer. Defaut value for this field is zero.
50 // A value of zero for this field means that there is no limit in the
51 // amount of register mappings which can be created. That is equivalent
52 // to having a theoretically infinite number of temporary registers.
53 unsigned TotalMappings;
54
55 // This map contains an entry for every physical register.
56 // A register index is used as a key value to access a WriteState.
57 // This is how we track RAW dependencies for dispatched
58 // instructions. For every register, we track the last seen write only.
59 // This assumes that all writes fully update both super and sub registers.
60 // We need a flag in MCInstrDesc to check if a write also updates super
61 // registers. We can then have a extra tablegen flag to set for instructions.
62 // This is a separate patch on its own.
63 std::vector<WriteState *> RegisterMappings;
64 // Assumptions are:
65 // a) a false dependencies is always removed by the register renamer.
66 // b) the register renamer can create an "infinite" number of mappings.
67 // Since we track the number of mappings created, in future we may
68 // introduce constraints on the number of mappings that can be created.
69 // For example, the maximum number of registers that are available for
70 // register renaming purposes may default to the size of the register file.
71
72 // In future, we can extend this design to allow multiple register files, and
73 // apply different restrictions on the register mappings and the number of
74 // temporary registers used by mappings.
75
76public:
77 RegisterFile(const llvm::MCRegisterInfo &mri, unsigned Mappings = 0)
78 : MRI(mri), NumUsedMappings(0), MaxUsedMappings(0),
79 TotalMappingsCreated(0), TotalMappings(Mappings),
80 RegisterMappings(MRI.getNumRegs(), nullptr) {}
81
82 // Creates a new register mapping for RegID.
83 // This reserves a temporary register in the register file.
84 void addRegisterMapping(WriteState &WS);
85
86 // Invalidates register mappings associated to the input WriteState object.
87 // This releases temporary registers in the register file.
88 void invalidateRegisterMapping(const WriteState &WS);
89
90 bool isAvailable(unsigned NumRegWrites);
91 void collectWrites(llvm::SmallVectorImpl<WriteState *> &Writes,
92 unsigned RegID) const;
93 void updateOnRead(ReadState &RS, unsigned RegID);
94 unsigned getMaxUsedRegisterMappings() const { return MaxUsedMappings; }
95 unsigned getTotalRegisterMappingsCreated() const {
96 return TotalMappingsCreated;
97 }
98
99#ifndef NDEBUG
100 void dump() const;
101#endif
102};
103
104/// \brief tracks which instructions are in-flight (i.e. dispatched but not
105/// retired) in the OoO backend.
106///
107/// This class checks on every cycle if/which instructions can be retired.
108/// Instructions are retired in program order.
109/// In the event of instruction retired, the DispatchUnit object that owns
110/// this RetireControlUnit gets notified.
111/// On instruction retired, register updates are all architecturally
112/// committed, and any temporary registers originally allocated for the
113/// retired instruction are freed.
114struct RetireControlUnit {
115 // A "token" (object of class RUToken) is created by the retire unit for every
116 // instruction dispatched to the schedulers. Flag 'Executed' is used to
117 // quickly check if an instruction has reached the write-back stage. A token
118 // also carries information related to the number of entries consumed by the
119 // instruction in the reorder buffer. The idea is that those entries will
120 // become available again once the instruction is retired. On every cycle,
121 // the RCU (Retire Control Unit) scans every token starting to search for
122 // instructions that are ready to retire. retired. Instructions are retired
123 // in program order. Only 'Executed' instructions are eligible for retire.
124 // Note that the size of the reorder buffer is defined by the scheduling model
125 // via field 'NumMicroOpBufferSize'.
126 struct RUToken {
127 unsigned Index; // Instruction index.
128 unsigned NumSlots; // Slots reserved to this instruction.
129 bool Executed; // True if the instruction is past the WB stage.
130 };
131
132private:
133 unsigned NextAvailableSlotIdx;
134 unsigned CurrentInstructionSlotIdx;
135 unsigned AvailableSlots;
136 unsigned MaxRetirePerCycle; // 0 means no limit.
137 std::vector<RUToken> Queue;
138 DispatchUnit *Owner;
139
140public:
141 RetireControlUnit(unsigned NumSlots, unsigned RPC, DispatchUnit *DU)
142 : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0),
143 AvailableSlots(NumSlots), MaxRetirePerCycle(RPC), Owner(DU) {
144 assert(NumSlots && "Expected at least one slot!");
145 Queue.resize(NumSlots);
146 }
147
148 bool isFull() const { return !AvailableSlots; }
149 bool isEmpty() const { return AvailableSlots == Queue.size(); }
150 bool isAvailable(unsigned Quantity = 1) const {
151 // Some instructions may declare a number of uOps which exceedes the size
152 // of the reorder buffer. To avoid problems, cap the amount of slots to
153 // the size of the reorder buffer.
154 Quantity = std::min(Quantity, static_cast<unsigned>(Queue.size()));
155 return AvailableSlots >= Quantity;
156 }
157
158 // Reserves a number of slots, and returns a new token.
159 unsigned reserveSlot(unsigned Index, unsigned NumMicroOps);
160
161 /// Retires instructions in program order.
162 void cycleEvent();
163
164 void onInstructionExecuted(unsigned TokenID);
165
166#ifndef NDEBUG
167 void dump() const;
168#endif
169};
170
171// \brief Implements the hardware dispatch logic.
172//
173// This class is responsible for the dispatch stage, in which instructions are
174// dispatched in groups to the Scheduler. An instruction can be dispatched if
175// functional units are available.
176// To be more specific, an instruction can be dispatched to the Scheduler if:
177// 1) There are enough entries in the reorder buffer (implemented by class
178// RetireControlUnit) to accomodate all opcodes.
179// 2) There are enough temporaries to rename output register operands.
180// 3) There are enough entries available in the used buffered resource(s).
181//
182// The number of micro opcodes that can be dispatched in one cycle is limited by
183// the value of field 'DispatchWidth'. A "dynamic dispatch stall" occurs when
184// processor resources are not available (i.e. at least one of the
185// abovementioned checks fails). Dispatch stall events are counted during the
186// entire execution of the code, and displayed by the performance report when
187// flag '-verbose' is specified.
188//
189// If the number of micro opcodes of an instruction is bigger than
190// DispatchWidth, then it can only be dispatched at the beginning of one cycle.
191// The DispatchUnit will still have to wait for a number of cycles (depending on
192// the DispatchWidth and the number of micro opcodes) before it can serve other
193// instructions.
194class DispatchUnit {
195 unsigned DispatchWidth;
196 unsigned AvailableEntries;
197 unsigned CarryOver;
198 Scheduler *SC;
199
200 std::unique_ptr<RegisterFile> RAT;
201 std::unique_ptr<RetireControlUnit> RCU;
202 Backend *Owner;
203
204 /// Dispatch stall event identifiers.
205 ///
206 /// The naming convention is:
207 /// * Event names starts with the "DS_" prefix
208 /// * For dynamic dispatch stalls, the "DS_" prefix is followed by the
209 /// the unavailable resource/functional unit acronym (example: RAT)
210 /// * The last substring is the event reason (example: REG_UNAVAILABLE means
211 /// that register renaming couldn't find enough spare registers in the
212 /// register file).
213 ///
214 /// List of acronyms used for processor resoures:
215 /// RAT - Register Alias Table (used by the register renaming logic)
216 /// RCU - Retire Control Unit
217 /// SQ - Scheduler's Queue
218 /// LDQ - Load Queue
219 /// STQ - Store Queue
220 enum {
221 DS_RAT_REG_UNAVAILABLE,
222 DS_RCU_TOKEN_UNAVAILABLE,
223 DS_SQ_TOKEN_UNAVAILABLE,
224 DS_LDQ_TOKEN_UNAVAILABLE,
225 DS_STQ_TOKEN_UNAVAILABLE,
226 DS_DISPATCH_GROUP_RESTRICTION,
227 DS_LAST
228 };
229
230 // The DispatchUnit track dispatch stall events caused by unavailable
231 // of hardware resources. Events are classified based on the stall kind;
232 // so we have a counter for every source of dispatch stall. Counters are
233 // stored into a vector `DispatchStall` which is always of size DS_LAST.
234 std::vector<unsigned> DispatchStalls;
235
Andrea Di Biagioaf904b92018-03-15 16:13:12 +0000236 bool checkRAT(const Instruction &Desc);
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000237 bool checkRCU(const InstrDesc &Desc);
238 bool checkScheduler(const InstrDesc &Desc);
239
Andrea Di Biagio4732d43ca2018-03-14 14:57:23 +0000240 void updateRAWDependencies(ReadState &RS, const llvm::MCSubtargetInfo &STI);
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000241 void notifyInstructionDispatched(unsigned IID);
242
243public:
244 DispatchUnit(Backend *B, const llvm::MCRegisterInfo &MRI,
245 unsigned MicroOpBufferSize, unsigned RegisterFileSize,
246 unsigned MaxRetirePerCycle, unsigned MaxDispatchWidth,
247 Scheduler *Sched)
248 : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth),
249 CarryOver(0U), SC(Sched),
250 RAT(llvm::make_unique<RegisterFile>(MRI, RegisterFileSize)),
251 RCU(llvm::make_unique<RetireControlUnit>(MicroOpBufferSize,
252 MaxRetirePerCycle, this)),
253 Owner(B), DispatchStalls(DS_LAST, 0) {}
254
255 unsigned getDispatchWidth() const { return DispatchWidth; }
256
257 bool isAvailable(unsigned NumEntries) const {
258 return NumEntries <= AvailableEntries || AvailableEntries == DispatchWidth;
259 }
260
261 bool isRCUEmpty() const { return RCU->isEmpty(); }
262
Andrea Di Biagioaf904b92018-03-15 16:13:12 +0000263 bool canDispatch(const Instruction &Inst) {
264 const InstrDesc &Desc = Inst.getDesc();
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000265 assert(isAvailable(Desc.NumMicroOps));
Andrea Di Biagioaf904b92018-03-15 16:13:12 +0000266 return checkRCU(Desc) && checkRAT(Inst) && checkScheduler(Desc);
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000267 }
268
Andrea Di Biagio4732d43ca2018-03-14 14:57:23 +0000269 unsigned dispatch(unsigned IID, Instruction *NewInst,
270 const llvm::MCSubtargetInfo &STI);
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000271
272 void collectWrites(llvm::SmallVectorImpl<WriteState *> &Vec,
273 unsigned RegID) const {
274 return RAT->collectWrites(Vec, RegID);
275 }
276 unsigned getNumRATStalls() const {
277 return DispatchStalls[DS_RAT_REG_UNAVAILABLE];
278 }
279 unsigned getNumRCUStalls() const {
280 return DispatchStalls[DS_RCU_TOKEN_UNAVAILABLE];
281 }
282 unsigned getNumSQStalls() const {
283 return DispatchStalls[DS_SQ_TOKEN_UNAVAILABLE];
284 }
285 unsigned getNumLDQStalls() const {
286 return DispatchStalls[DS_LDQ_TOKEN_UNAVAILABLE];
287 }
288 unsigned getNumSTQStalls() const {
289 return DispatchStalls[DS_STQ_TOKEN_UNAVAILABLE];
290 }
291 unsigned getNumDispatchGroupStalls() const {
292 return DispatchStalls[DS_DISPATCH_GROUP_RESTRICTION];
293 }
294 unsigned getMaxUsedRegisterMappings() const {
295 return RAT->getMaxUsedRegisterMappings();
296 }
297 unsigned getTotalRegisterMappingsCreated() const {
298 return RAT->getTotalRegisterMappingsCreated();
299 }
300 void addNewRegisterMapping(WriteState &WS) { RAT->addRegisterMapping(WS); }
301
302 void cycleEvent(unsigned Cycle) {
303 RCU->cycleEvent();
304 AvailableEntries =
305 CarryOver >= DispatchWidth ? 0 : DispatchWidth - CarryOver;
306 CarryOver = CarryOver >= DispatchWidth ? CarryOver - DispatchWidth : 0U;
307 }
308
309 void notifyInstructionRetired(unsigned Index);
310
311 void onInstructionExecuted(unsigned TokenID) {
312 RCU->onInstructionExecuted(TokenID);
313 }
314
315 void invalidateRegisterMappings(const Instruction &Inst);
316#ifndef NDEBUG
317 void dump() const;
318#endif
319};
320
321} // namespace mca
322
323#endif