blob: ba202e3cbf6300f3e609f702680e28333c77faaf [file] [log] [blame]
Tom Stellard82d3d452013-01-18 21:15:53 +00001//===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Insert wait instructions for memory reads and writes.
12///
13/// Memory reads and writes are issued asynchronously, so we need to insert
14/// S_WAITCNT instructions when we want to access any of their results or
15/// overwrite any register that's used asynchronously.
16//
17//===----------------------------------------------------------------------===//
18
19#include "AMDGPU.h"
20#include "SIInstrInfo.h"
21#include "SIMachineFunctionInfo.h"
22#include "llvm/CodeGen/MachineFunction.h"
23#include "llvm/CodeGen/MachineFunctionPass.h"
24#include "llvm/CodeGen/MachineInstrBuilder.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26
27using namespace llvm;
28
29namespace {
30
31/// \brief One variable for each of the hardware counters
32typedef union {
33 struct {
34 unsigned VM;
35 unsigned EXP;
36 unsigned LGKM;
37 } Named;
38 unsigned Array[3];
39
40} Counters;
41
42typedef Counters RegCounters[512];
43typedef std::pair<unsigned, unsigned> RegInterval;
44
45class SIInsertWaits : public MachineFunctionPass {
46
47private:
48 static char ID;
49 const SIInstrInfo *TII;
Bill Wendlingb5632b52013-06-07 20:28:55 +000050 const SIRegisterInfo *TRI;
Tom Stellard82d3d452013-01-18 21:15:53 +000051 const MachineRegisterInfo *MRI;
52
53 /// \brief Constant hardware limits
54 static const Counters WaitCounts;
55
56 /// \brief Constant zero value
57 static const Counters ZeroCounts;
58
59 /// \brief Counter values we have already waited on.
60 Counters WaitedOn;
61
62 /// \brief Counter values for last instruction issued.
63 Counters LastIssued;
64
65 /// \brief Registers used by async instructions.
66 RegCounters UsedRegs;
67
68 /// \brief Registers defined by async instructions.
69 RegCounters DefinedRegs;
70
71 /// \brief Different export instruction types seen since last wait.
72 unsigned ExpInstrTypesSeen;
73
74 /// \brief Get increment/decrement amount for this instruction.
75 Counters getHwCounts(MachineInstr &MI);
76
77 /// \brief Is operand relevant for async execution?
78 bool isOpRelevant(MachineOperand &Op);
79
80 /// \brief Get register interval an operand affects.
81 RegInterval getRegInterval(MachineOperand &Op);
82
83 /// \brief Handle instructions async components
84 void pushInstruction(MachineInstr &MI);
85
86 /// \brief Insert the actual wait instruction
87 bool insertWait(MachineBasicBlock &MBB,
88 MachineBasicBlock::iterator I,
89 const Counters &Counts);
90
Christian Konig9ff8dc82013-03-01 09:46:04 +000091 /// \brief Do we need def2def checks?
92 bool unorderedDefines(MachineInstr &MI);
93
Tom Stellard82d3d452013-01-18 21:15:53 +000094 /// \brief Resolve all operand dependencies to counter requirements
95 Counters handleOperands(MachineInstr &MI);
96
97public:
98 SIInsertWaits(TargetMachine &tm) :
99 MachineFunctionPass(ID),
Bill Wendlingb5632b52013-06-07 20:28:55 +0000100 TII(0),
Evgeniy Stepanov8f3562b2013-08-07 07:47:41 +0000101 TRI(0),
102 ExpInstrTypesSeen(0) { }
Tom Stellard82d3d452013-01-18 21:15:53 +0000103
104 virtual bool runOnMachineFunction(MachineFunction &MF);
105
106 const char *getPassName() const {
107 return "SI insert wait instructions";
108 }
109
110};
111
112} // End anonymous namespace
113
114char SIInsertWaits::ID = 0;
115
116const Counters SIInsertWaits::WaitCounts = { { 15, 7, 7 } };
117const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
118
119FunctionPass *llvm::createSIInsertWaits(TargetMachine &tm) {
120 return new SIInsertWaits(tm);
121}
122
123Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
124
125 uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags;
126 Counters Result;
127
128 Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
129
130 // Only consider stores or EXP for EXP_CNT
131 Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT &&
Christian Konig9ff8dc82013-03-01 09:46:04 +0000132 (MI.getOpcode() == AMDGPU::EXP || MI.getDesc().mayStore()));
Tom Stellard82d3d452013-01-18 21:15:53 +0000133
134 // LGKM may uses larger values
135 if (TSFlags & SIInstrFlags::LGKM_CNT) {
136
137 MachineOperand &Op = MI.getOperand(0);
Michel Danzer7740daa2013-07-10 16:36:43 +0000138 if (!Op.isReg())
139 Op = MI.getOperand(1);
Tom Stellard82d3d452013-01-18 21:15:53 +0000140 assert(Op.isReg() && "First LGKM operand must be a register!");
141
142 unsigned Reg = Op.getReg();
Bill Wendlingb5632b52013-06-07 20:28:55 +0000143 unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
Tom Stellard82d3d452013-01-18 21:15:53 +0000144 Result.Named.LGKM = Size > 4 ? 2 : 1;
145
146 } else {
147 Result.Named.LGKM = 0;
148 }
149
150 return Result;
151}
152
153bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
154
155 // Constants are always irrelevant
156 if (!Op.isReg())
157 return false;
158
159 // Defines are always relevant
160 if (Op.isDef())
161 return true;
162
163 // For exports all registers are relevant
164 MachineInstr &MI = *Op.getParent();
165 if (MI.getOpcode() == AMDGPU::EXP)
166 return true;
167
168 // For stores the stored value is also relevant
169 if (!MI.getDesc().mayStore())
170 return false;
171
172 for (MachineInstr::mop_iterator I = MI.operands_begin(),
173 E = MI.operands_end(); I != E; ++I) {
174
175 if (I->isReg() && I->isUse())
176 return Op.isIdenticalTo(*I);
177 }
178
179 return false;
180}
181
182RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
183
184 if (!Op.isReg())
185 return std::make_pair(0, 0);
186
187 unsigned Reg = Op.getReg();
Bill Wendlingb5632b52013-06-07 20:28:55 +0000188 unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
Tom Stellard82d3d452013-01-18 21:15:53 +0000189
190 assert(Size >= 4);
191
192 RegInterval Result;
Bill Wendlingb5632b52013-06-07 20:28:55 +0000193 Result.first = TRI->getEncodingValue(Reg);
Tom Stellard82d3d452013-01-18 21:15:53 +0000194 Result.second = Result.first + Size / 4;
195
196 return Result;
197}
198
199void SIInsertWaits::pushInstruction(MachineInstr &MI) {
200
201 // Get the hardware counter increments and sum them up
202 Counters Increment = getHwCounts(MI);
203 unsigned Sum = 0;
204
205 for (unsigned i = 0; i < 3; ++i) {
206 LastIssued.Array[i] += Increment.Array[i];
207 Sum += Increment.Array[i];
208 }
209
210 // If we don't increase anything then that's it
211 if (Sum == 0)
212 return;
213
214 // Remember which export instructions we have seen
215 if (Increment.Named.EXP) {
216 ExpInstrTypesSeen |= MI.getOpcode() == AMDGPU::EXP ? 1 : 2;
217 }
218
219 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
220
221 MachineOperand &Op = MI.getOperand(i);
222 if (!isOpRelevant(Op))
223 continue;
224
225 RegInterval Interval = getRegInterval(Op);
226 for (unsigned j = Interval.first; j < Interval.second; ++j) {
227
228 // Remember which registers we define
229 if (Op.isDef())
230 DefinedRegs[j] = LastIssued;
231
232 // and which one we are using
233 if (Op.isUse())
234 UsedRegs[j] = LastIssued;
235 }
236 }
237}
238
239bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
240 MachineBasicBlock::iterator I,
241 const Counters &Required) {
242
243 // End of program? No need to wait on anything
244 if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM)
245 return false;
246
247 // Figure out if the async instructions execute in order
248 bool Ordered[3];
249
250 // VM_CNT is always ordered
251 Ordered[0] = true;
252
253 // EXP_CNT is unordered if we have both EXP & VM-writes
254 Ordered[1] = ExpInstrTypesSeen == 3;
255
256 // LGKM_CNT is handled as always unordered. TODO: Handle LDS and GDS
257 Ordered[2] = false;
258
259 // The values we are going to put into the S_WAITCNT instruction
260 Counters Counts = WaitCounts;
261
262 // Do we really need to wait?
263 bool NeedWait = false;
264
265 for (unsigned i = 0; i < 3; ++i) {
266
267 if (Required.Array[i] <= WaitedOn.Array[i])
268 continue;
269
270 NeedWait = true;
271
272 if (Ordered[i]) {
273 unsigned Value = LastIssued.Array[i] - Required.Array[i];
274
275 // adjust the value to the real hardware posibilities
276 Counts.Array[i] = std::min(Value, WaitCounts.Array[i]);
277
278 } else
279 Counts.Array[i] = 0;
280
281 // Remember on what we have waited on
282 WaitedOn.Array[i] = LastIssued.Array[i] - Counts.Array[i];
283 }
284
285 if (!NeedWait)
286 return false;
287
288 // Reset EXP_CNT instruction types
289 if (Counts.Named.EXP == 0)
290 ExpInstrTypesSeen = 0;
291
292 // Build the wait instruction
293 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
294 .addImm((Counts.Named.VM & 0xF) |
295 ((Counts.Named.EXP & 0x7) << 4) |
296 ((Counts.Named.LGKM & 0x7) << 8));
297
298 return true;
299}
300
301/// \brief helper function for handleOperands
302static void increaseCounters(Counters &Dst, const Counters &Src) {
303
304 for (unsigned i = 0; i < 3; ++i)
305 Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
306}
307
308Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
309
310 Counters Result = ZeroCounts;
311
312 // For each register affected by this
313 // instruction increase the result sequence
314 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
315
316 MachineOperand &Op = MI.getOperand(i);
317 RegInterval Interval = getRegInterval(Op);
318 for (unsigned j = Interval.first; j < Interval.second; ++j) {
319
Christian Konig9ff8dc82013-03-01 09:46:04 +0000320 if (Op.isDef()) {
Tom Stellard82d3d452013-01-18 21:15:53 +0000321 increaseCounters(Result, UsedRegs[j]);
Christian Konigae621a22013-03-18 11:33:45 +0000322 increaseCounters(Result, DefinedRegs[j]);
Christian Konig9ff8dc82013-03-01 09:46:04 +0000323 }
Tom Stellard82d3d452013-01-18 21:15:53 +0000324
325 if (Op.isUse())
326 increaseCounters(Result, DefinedRegs[j]);
327 }
328 }
329
330 return Result;
331}
332
333bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
Tom Stellard82d3d452013-01-18 21:15:53 +0000334 bool Changes = false;
335
Bill Wendlingb5632b52013-06-07 20:28:55 +0000336 TII = static_cast<const SIInstrInfo*>(MF.getTarget().getInstrInfo());
337 TRI = static_cast<const SIRegisterInfo*>(MF.getTarget().getRegisterInfo());
338
Tom Stellard82d3d452013-01-18 21:15:53 +0000339 MRI = &MF.getRegInfo();
340
341 WaitedOn = ZeroCounts;
342 LastIssued = ZeroCounts;
343
344 memset(&UsedRegs, 0, sizeof(UsedRegs));
345 memset(&DefinedRegs, 0, sizeof(DefinedRegs));
346
347 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
348 BI != BE; ++BI) {
349
350 MachineBasicBlock &MBB = *BI;
351 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
352 I != E; ++I) {
353
354 Changes |= insertWait(MBB, I, handleOperands(*I));
355 pushInstruction(*I);
356 }
357
358 // Wait for everything at the end of the MBB
359 Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
360 }
361
362 return Changes;
363}