blob: e7c63c6ba7db962f1adceaf078d07f2b7b3170bb [file] [log] [blame]
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +00001//===----------------------- Dispatch.h -------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9/// \file
10///
11/// This file implements classes that are used to model register files,
12/// reorder buffers and the hardware dispatch logic.
13///
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_TOOLS_LLVM_MCA_DISPATCH_H
17#define LLVM_TOOLS_LLVM_MCA_DISPATCH_H
18
19#include "Instruction.h"
20#include "llvm/MC/MCRegisterInfo.h"
Andrea Di Biagio4732d43ca2018-03-14 14:57:23 +000021#include "llvm/MC/MCSubtargetInfo.h"
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +000022#include <map>
23
24namespace mca {
25
26class WriteState;
27class DispatchUnit;
28class Scheduler;
29class Backend;
30
Andrea Di Biagioe64f3b12018-03-18 15:33:27 +000031/// \brief Manages hardware register files, and tracks data dependencies
32/// between registers.
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +000033class RegisterFile {
34 const llvm::MCRegisterInfo &MRI;
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +000035
Andrea Di Biagioe64f3b12018-03-18 15:33:27 +000036 // Each register file is described by an instance of RegisterMappingTracker.
37 // RegisterMappingTracker tracks the number of register mappings dynamically
38 // allocated during the execution.
39 struct RegisterMappingTracker {
40 // Total number of register mappings that are available for register
41 // renaming. A value of zero for this field means: this register file has
42 // an unbounded number of registers.
43 const unsigned TotalMappings;
44 // Number of mappings that are currently in use.
45 unsigned NumUsedMappings;
46 // Maximum number of register mappings used.
47 unsigned MaxUsedMappings;
48 // Total number of mappings allocated during the entire execution.
49 unsigned TotalMappingsCreated;
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +000050
Andrea Di Biagioe64f3b12018-03-18 15:33:27 +000051 RegisterMappingTracker(unsigned NumMappings)
52 : TotalMappings(NumMappings), NumUsedMappings(0), MaxUsedMappings(0),
53 TotalMappingsCreated(0) {}
54 };
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +000055
Andrea Di Biagioe64f3b12018-03-18 15:33:27 +000056 // This is where information related to the various register files is kept.
57 // This set always contains at least one register file at index #0. That
58 // register file "sees" all the physical registers declared by the target, and
59 // (by default) it allows an unbound number of mappings.
60 // Users can limit the number of mappings that can be created by register file
61 // #0 through the command line flag `-register-file-size`.
62 llvm::SmallVector<RegisterMappingTracker, 4> RegisterFiles;
63
64 // RegisterMapping objects are mainly used to track physical register
65 // definitions. A WriteState object describes a register definition, and it is
66 // used to track RAW dependencies (see Instruction.h). A RegisterMapping
67 // object also specifies the set of register files. The mapping between
68 // physreg and register files is done using a "register file mask".
69 //
70 // A register file mask identifies a set of register files. Each bit of the
71 // mask representation references a specific register file.
72 // For example:
73 // 0b0001 --> Register file #0
74 // 0b0010 --> Register file #1
75 // 0b0100 --> Register file #2
76 //
77 // Note that this implementation allows register files to overlap.
78 // The maximum number of register files allowed by this implementation is 32.
79 using RegisterMapping = std::pair<WriteState *, unsigned>;
80
81 // This map contains one entry for each physical register defined by the
82 // processor scheduling model.
83 std::vector<RegisterMapping> RegisterMappings;
84
85 // This method creates a new RegisterMappingTracker for a register file that
86 // contains all the physical registers specified by the register classes in
87 // the 'RegisterClasses' set.
88 //
89 // The long term goal is to let scheduling models optionally describe register
90 // files via tablegen definitions. This is still a work in progress.
91 // For example, here is how a tablegen definition for a x86 FP register file
92 // that features AVX might look like:
93 //
94 // def FPRegisterFile : RegisterFile<[VR128RegClass, VR256RegClass], 60>
95 //
96 // Here FPRegisterFile contains all the registers defined by register class
97 // VR128RegClass and VR256RegClass. FPRegisterFile implements 60
98 // registers which can be used for register renaming purpose.
99 //
100 // The list of register classes is then converted by the tablegen backend into
101 // a list of register class indices. That list, along with the number of
102 // available mappings, is then used to create a new RegisterMappingTracker.
103 void addRegisterFile(llvm::ArrayRef<unsigned> RegisterClasses,
104 unsigned NumTemps);
105
106 // Allocates a new register mapping in every register file specified by the
107 // register file mask. This method is called from addRegisterMapping.
108 void createNewMappings(unsigned RegisterFileMask);
109
110 // Removes a previously allocated mapping from each register file in the
111 // RegisterFileMask set. This method is called from invalidateRegisterMapping.
112 void removeMappings(unsigned RegisterFileMask);
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000113
114public:
Andrea Di Biagioe64f3b12018-03-18 15:33:27 +0000115 RegisterFile(const llvm::MCRegisterInfo &mri, unsigned TempRegs = 0)
116 : MRI(mri), RegisterMappings(MRI.getNumRegs(), {nullptr, 0U}) {
117 addRegisterFile({}, TempRegs);
118 // TODO: teach the scheduling models how to specify multiple register files.
119 }
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000120
121 // Creates a new register mapping for RegID.
Andrea Di Biagioe64f3b12018-03-18 15:33:27 +0000122 // This reserves a microarchitectural register in every register file that
123 // contains RegID.
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000124 void addRegisterMapping(WriteState &WS);
125
126 // Invalidates register mappings associated to the input WriteState object.
Andrea Di Biagioe64f3b12018-03-18 15:33:27 +0000127 // This releases previously allocated mappings for the physical register
128 // associated to the WriteState.
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000129 void invalidateRegisterMapping(const WriteState &WS);
130
Andrea Di Biagioe64f3b12018-03-18 15:33:27 +0000131 // Checks if there are enough microarchitectural registers in the register
132 // files. Returns a "response mask" where each bit is the response from a
133 // RegisterMappingTracker.
134 // For example: if all register files are available, then the response mask
135 // is a bitmask of all zeroes. If Instead register file #1 is not available,
136 // then the response mask is 0b10.
137 unsigned isAvailable(const llvm::ArrayRef<unsigned> Regs) const;
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000138 void collectWrites(llvm::SmallVectorImpl<WriteState *> &Writes,
139 unsigned RegID) const;
140 void updateOnRead(ReadState &RS, unsigned RegID);
Andrea Di Biagioe64f3b12018-03-18 15:33:27 +0000141 unsigned getMaxUsedRegisterMappings(unsigned RegisterFileIndex) const {
142 assert(RegisterFileIndex < getNumRegisterFiles() &&
143 "Invalid register file index!");
144 return RegisterFiles[RegisterFileIndex].MaxUsedMappings;
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000145 }
Andrea Di Biagioe64f3b12018-03-18 15:33:27 +0000146 unsigned getTotalRegisterMappingsCreated(unsigned RegisterFileIndex) const {
147 assert(RegisterFileIndex < getNumRegisterFiles() &&
148 "Invalid register file index!");
149 return RegisterFiles[RegisterFileIndex].TotalMappingsCreated;
150 }
151 unsigned getNumRegisterFiles() const { return RegisterFiles.size(); }
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000152
153#ifndef NDEBUG
154 void dump() const;
155#endif
156};
157
158/// \brief tracks which instructions are in-flight (i.e. dispatched but not
159/// retired) in the OoO backend.
160///
161/// This class checks on every cycle if/which instructions can be retired.
162/// Instructions are retired in program order.
163/// In the event of instruction retired, the DispatchUnit object that owns
164/// this RetireControlUnit gets notified.
165/// On instruction retired, register updates are all architecturally
166/// committed, and any temporary registers originally allocated for the
167/// retired instruction are freed.
168struct RetireControlUnit {
169 // A "token" (object of class RUToken) is created by the retire unit for every
170 // instruction dispatched to the schedulers. Flag 'Executed' is used to
171 // quickly check if an instruction has reached the write-back stage. A token
172 // also carries information related to the number of entries consumed by the
173 // instruction in the reorder buffer. The idea is that those entries will
174 // become available again once the instruction is retired. On every cycle,
175 // the RCU (Retire Control Unit) scans every token starting to search for
176 // instructions that are ready to retire. retired. Instructions are retired
177 // in program order. Only 'Executed' instructions are eligible for retire.
178 // Note that the size of the reorder buffer is defined by the scheduling model
179 // via field 'NumMicroOpBufferSize'.
180 struct RUToken {
181 unsigned Index; // Instruction index.
182 unsigned NumSlots; // Slots reserved to this instruction.
183 bool Executed; // True if the instruction is past the WB stage.
184 };
185
186private:
187 unsigned NextAvailableSlotIdx;
188 unsigned CurrentInstructionSlotIdx;
189 unsigned AvailableSlots;
190 unsigned MaxRetirePerCycle; // 0 means no limit.
191 std::vector<RUToken> Queue;
192 DispatchUnit *Owner;
193
194public:
195 RetireControlUnit(unsigned NumSlots, unsigned RPC, DispatchUnit *DU)
196 : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0),
197 AvailableSlots(NumSlots), MaxRetirePerCycle(RPC), Owner(DU) {
198 assert(NumSlots && "Expected at least one slot!");
199 Queue.resize(NumSlots);
200 }
201
202 bool isFull() const { return !AvailableSlots; }
203 bool isEmpty() const { return AvailableSlots == Queue.size(); }
204 bool isAvailable(unsigned Quantity = 1) const {
205 // Some instructions may declare a number of uOps which exceedes the size
206 // of the reorder buffer. To avoid problems, cap the amount of slots to
207 // the size of the reorder buffer.
208 Quantity = std::min(Quantity, static_cast<unsigned>(Queue.size()));
209 return AvailableSlots >= Quantity;
210 }
211
212 // Reserves a number of slots, and returns a new token.
213 unsigned reserveSlot(unsigned Index, unsigned NumMicroOps);
214
215 /// Retires instructions in program order.
216 void cycleEvent();
217
218 void onInstructionExecuted(unsigned TokenID);
219
220#ifndef NDEBUG
221 void dump() const;
222#endif
223};
224
225// \brief Implements the hardware dispatch logic.
226//
227// This class is responsible for the dispatch stage, in which instructions are
228// dispatched in groups to the Scheduler. An instruction can be dispatched if
229// functional units are available.
230// To be more specific, an instruction can be dispatched to the Scheduler if:
231// 1) There are enough entries in the reorder buffer (implemented by class
232// RetireControlUnit) to accomodate all opcodes.
233// 2) There are enough temporaries to rename output register operands.
234// 3) There are enough entries available in the used buffered resource(s).
235//
236// The number of micro opcodes that can be dispatched in one cycle is limited by
237// the value of field 'DispatchWidth'. A "dynamic dispatch stall" occurs when
238// processor resources are not available (i.e. at least one of the
239// abovementioned checks fails). Dispatch stall events are counted during the
240// entire execution of the code, and displayed by the performance report when
241// flag '-verbose' is specified.
242//
243// If the number of micro opcodes of an instruction is bigger than
244// DispatchWidth, then it can only be dispatched at the beginning of one cycle.
245// The DispatchUnit will still have to wait for a number of cycles (depending on
246// the DispatchWidth and the number of micro opcodes) before it can serve other
247// instructions.
248class DispatchUnit {
249 unsigned DispatchWidth;
250 unsigned AvailableEntries;
251 unsigned CarryOver;
252 Scheduler *SC;
253
254 std::unique_ptr<RegisterFile> RAT;
255 std::unique_ptr<RetireControlUnit> RCU;
256 Backend *Owner;
257
Andrea Di Biagio91ab2ee2018-03-19 13:23:07 +0000258 bool checkRAT(unsigned Index, const Instruction &Desc);
259 bool checkRCU(unsigned Index, const InstrDesc &Desc);
260 bool checkScheduler(unsigned Index, const InstrDesc &Desc);
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000261
Andrea Di Biagio4732d43ca2018-03-14 14:57:23 +0000262 void updateRAWDependencies(ReadState &RS, const llvm::MCSubtargetInfo &STI);
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000263 void notifyInstructionDispatched(unsigned IID);
264
265public:
266 DispatchUnit(Backend *B, const llvm::MCRegisterInfo &MRI,
267 unsigned MicroOpBufferSize, unsigned RegisterFileSize,
268 unsigned MaxRetirePerCycle, unsigned MaxDispatchWidth,
269 Scheduler *Sched)
270 : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth),
271 CarryOver(0U), SC(Sched),
272 RAT(llvm::make_unique<RegisterFile>(MRI, RegisterFileSize)),
273 RCU(llvm::make_unique<RetireControlUnit>(MicroOpBufferSize,
274 MaxRetirePerCycle, this)),
Andrea Di Biagio91ab2ee2018-03-19 13:23:07 +0000275 Owner(B) {}
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000276
277 unsigned getDispatchWidth() const { return DispatchWidth; }
278
279 bool isAvailable(unsigned NumEntries) const {
280 return NumEntries <= AvailableEntries || AvailableEntries == DispatchWidth;
281 }
282
283 bool isRCUEmpty() const { return RCU->isEmpty(); }
284
Andrea Di Biagio91ab2ee2018-03-19 13:23:07 +0000285 bool canDispatch(unsigned Index, const Instruction &Inst) {
Andrea Di Biagioaf904b92018-03-15 16:13:12 +0000286 const InstrDesc &Desc = Inst.getDesc();
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000287 assert(isAvailable(Desc.NumMicroOps));
Andrea Di Biagio91ab2ee2018-03-19 13:23:07 +0000288 return checkRCU(Index, Desc) && checkRAT(Index, Inst) &&
289 checkScheduler(Index, Desc);
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000290 }
291
Andrea Di Biagio4732d43ca2018-03-14 14:57:23 +0000292 unsigned dispatch(unsigned IID, Instruction *NewInst,
293 const llvm::MCSubtargetInfo &STI);
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000294
295 void collectWrites(llvm::SmallVectorImpl<WriteState *> &Vec,
296 unsigned RegID) const {
297 return RAT->collectWrites(Vec, RegID);
298 }
Andrea Di Biagioe64f3b12018-03-18 15:33:27 +0000299 unsigned getMaxUsedRegisterMappings(unsigned RegFileIndex = 0) const {
300 return RAT->getMaxUsedRegisterMappings(RegFileIndex);
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000301 }
Andrea Di Biagioe64f3b12018-03-18 15:33:27 +0000302 unsigned getTotalRegisterMappingsCreated(unsigned RegFileIndex = 0) const {
303 return RAT->getTotalRegisterMappingsCreated(RegFileIndex);
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000304 }
305 void addNewRegisterMapping(WriteState &WS) { RAT->addRegisterMapping(WS); }
306
307 void cycleEvent(unsigned Cycle) {
308 RCU->cycleEvent();
309 AvailableEntries =
310 CarryOver >= DispatchWidth ? 0 : DispatchWidth - CarryOver;
311 CarryOver = CarryOver >= DispatchWidth ? CarryOver - DispatchWidth : 0U;
312 }
313
314 void notifyInstructionRetired(unsigned Index);
315
Andrea Di Biagio91ab2ee2018-03-19 13:23:07 +0000316 void notifyDispatchStall(unsigned Index, unsigned EventType);
317
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000318 void onInstructionExecuted(unsigned TokenID) {
319 RCU->onInstructionExecuted(TokenID);
320 }
321
322 void invalidateRegisterMappings(const Instruction &Inst);
323#ifndef NDEBUG
324 void dump() const;
325#endif
326};
327
328} // namespace mca
329
330#endif