blob: 202a1e9ed8ac0361c6795d3aa5a32db8e2f3b803 [file] [log] [blame]
Tom Stellardc4cabef2013-01-18 21:15:53 +00001//===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Insert wait instructions for memory reads and writes.
12///
13/// Memory reads and writes are issued asynchronously, so we need to insert
14/// S_WAITCNT instructions when we want to access any of their results or
15/// overwrite any register that's used asynchronously.
16//
17//===----------------------------------------------------------------------===//
18
19#include "AMDGPU.h"
Eric Christopherd9134482014-08-04 21:25:23 +000020#include "AMDGPUSubtarget.h"
Matt Arsenault9783e002014-09-29 15:50:26 +000021#include "SIDefines.h"
Matt Arsenault1fd0c622014-09-29 15:53:15 +000022#include "SIInstrInfo.h"
Tom Stellardc4cabef2013-01-18 21:15:53 +000023#include "SIMachineFunctionInfo.h"
Konstantin Zhuravlyov836cbff2016-09-30 17:01:40 +000024#include "Utils/AMDGPUBaseInfo.h"
Tom Stellardc4cabef2013-01-18 21:15:53 +000025#include "llvm/CodeGen/MachineFunction.h"
26#include "llvm/CodeGen/MachineFunctionPass.h"
27#include "llvm/CodeGen/MachineInstrBuilder.h"
28#include "llvm/CodeGen/MachineRegisterInfo.h"
29
Tom Stellard6e1967e2016-02-05 17:42:38 +000030#define DEBUG_TYPE "si-insert-waits"
31
Tom Stellardc4cabef2013-01-18 21:15:53 +000032using namespace llvm;
Konstantin Zhuravlyov836cbff2016-09-30 17:01:40 +000033using namespace llvm::AMDGPU;
Tom Stellardc4cabef2013-01-18 21:15:53 +000034
35namespace {
36
37/// \brief One variable for each of the hardware counters
38typedef union {
39 struct {
40 unsigned VM;
41 unsigned EXP;
42 unsigned LGKM;
43 } Named;
44 unsigned Array[3];
45
46} Counters;
47
Marek Olsakfa58e5e2014-12-07 17:17:43 +000048typedef enum {
49 OTHER,
50 SMEM,
51 VMEM
52} InstType;
53
Tom Stellardc4cabef2013-01-18 21:15:53 +000054typedef Counters RegCounters[512];
55typedef std::pair<unsigned, unsigned> RegInterval;
56
57class SIInsertWaits : public MachineFunctionPass {
58
59private:
Matt Arsenault43e92fe2016-06-24 06:30:11 +000060 const SISubtarget *ST;
Tom Stellardc4cabef2013-01-18 21:15:53 +000061 const SIInstrInfo *TII;
Bill Wendling37e9adb2013-06-07 20:28:55 +000062 const SIRegisterInfo *TRI;
Tom Stellardc4cabef2013-01-18 21:15:53 +000063 const MachineRegisterInfo *MRI;
Konstantin Zhuravlyov836cbff2016-09-30 17:01:40 +000064 IsaVersion IV;
Tom Stellardc4cabef2013-01-18 21:15:53 +000065
Tom Stellardc4cabef2013-01-18 21:15:53 +000066 /// \brief Constant zero value
67 static const Counters ZeroCounts;
68
Konstantin Zhuravlyovcdd45472016-10-11 18:58:22 +000069 /// \brief Hardware limits
70 Counters HardwareLimits;
71
Tom Stellardc4cabef2013-01-18 21:15:53 +000072 /// \brief Counter values we have already waited on.
73 Counters WaitedOn;
74
Nicolai Haehnlef66bdb52016-04-27 15:46:01 +000075 /// \brief Counter values that we must wait on before the next counter
76 /// increase.
77 Counters DelayedWaitOn;
78
Tom Stellardc4cabef2013-01-18 21:15:53 +000079 /// \brief Counter values for last instruction issued.
80 Counters LastIssued;
81
82 /// \brief Registers used by async instructions.
83 RegCounters UsedRegs;
84
85 /// \brief Registers defined by async instructions.
86 RegCounters DefinedRegs;
87
88 /// \brief Different export instruction types seen since last wait.
89 unsigned ExpInstrTypesSeen;
90
Marek Olsakfa58e5e2014-12-07 17:17:43 +000091 /// \brief Type of the last opcode.
92 InstType LastOpcodeType;
93
Marek Olsak1bd24632015-02-03 17:37:52 +000094 bool LastInstWritesM0;
95
Tom Stellard6695ba02016-10-28 23:53:48 +000096 /// Whether or not we have flat operations outstanding.
97 bool IsFlatOutstanding;
98
Marek Olsak8e9cc632016-01-13 17:23:09 +000099 /// \brief Whether the machine function returns void
100 bool ReturnsVoid;
101
Tom Stellard30961762016-02-08 19:49:20 +0000102 /// Whether the VCCZ bit is possibly corrupt
103 bool VCCZCorrupt;
104
Tom Stellardc4cabef2013-01-18 21:15:53 +0000105 /// \brief Get increment/decrement amount for this instruction.
106 Counters getHwCounts(MachineInstr &MI);
107
108 /// \brief Is operand relevant for async execution?
109 bool isOpRelevant(MachineOperand &Op);
110
111 /// \brief Get register interval an operand affects.
Matt Arsenaultd1d499a2015-10-01 21:43:15 +0000112 RegInterval getRegInterval(const TargetRegisterClass *RC,
113 const MachineOperand &Reg) const;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000114
115 /// \brief Handle instructions async components
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000116 void pushInstruction(MachineBasicBlock &MBB,
Nicolai Haehnlef66bdb52016-04-27 15:46:01 +0000117 MachineBasicBlock::iterator I,
118 const Counters& Increment);
Tom Stellardc4cabef2013-01-18 21:15:53 +0000119
120 /// \brief Insert the actual wait instruction
121 bool insertWait(MachineBasicBlock &MBB,
122 MachineBasicBlock::iterator I,
123 const Counters &Counts);
124
Nicolai Haehnlef66bdb52016-04-27 15:46:01 +0000125 /// \brief Handle existing wait instructions (from intrinsics)
126 void handleExistingWait(MachineBasicBlock::iterator I);
127
Christian Konig862fd9f2013-03-01 09:46:04 +0000128 /// \brief Do we need def2def checks?
129 bool unorderedDefines(MachineInstr &MI);
130
Tom Stellardc4cabef2013-01-18 21:15:53 +0000131 /// \brief Resolve all operand dependencies to counter requirements
132 Counters handleOperands(MachineInstr &MI);
133
Marek Olsak1bd24632015-02-03 17:37:52 +0000134 /// \brief Insert S_NOP between an instruction writing M0 and S_SENDMSG.
135 void handleSendMsg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
136
Tom Stellard30961762016-02-08 19:49:20 +0000137 /// Return true if there are LGKM instrucitons that haven't been waited on
138 /// yet.
139 bool hasOutstandingLGKM() const;
140
Tom Stellardc4cabef2013-01-18 21:15:53 +0000141public:
Tom Stellard6e1967e2016-02-05 17:42:38 +0000142 static char ID;
143
144 SIInsertWaits() :
Tom Stellardc4cabef2013-01-18 21:15:53 +0000145 MachineFunctionPass(ID),
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000146 ST(nullptr),
Craig Topper062a2ba2014-04-25 05:30:21 +0000147 TII(nullptr),
148 TRI(nullptr),
Tom Stellard30961762016-02-08 19:49:20 +0000149 ExpInstrTypesSeen(0),
150 VCCZCorrupt(false) { }
Tom Stellardc4cabef2013-01-18 21:15:53 +0000151
Craig Topper5656db42014-04-29 07:57:24 +0000152 bool runOnMachineFunction(MachineFunction &MF) override;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000153
Mehdi Amini117296c2016-10-01 02:56:57 +0000154 StringRef getPassName() const override {
Matt Arsenault0cb85172015-09-25 17:21:28 +0000155 return "SI insert wait instructions";
Tom Stellardc4cabef2013-01-18 21:15:53 +0000156 }
157
Matt Arsenault0cb85172015-09-25 17:21:28 +0000158 void getAnalysisUsage(AnalysisUsage &AU) const override {
159 AU.setPreservesCFG();
160 MachineFunctionPass::getAnalysisUsage(AU);
161 }
Tom Stellardc4cabef2013-01-18 21:15:53 +0000162};
163
164} // End anonymous namespace
165
Tom Stellard6e1967e2016-02-05 17:42:38 +0000166INITIALIZE_PASS_BEGIN(SIInsertWaits, DEBUG_TYPE,
167 "SI Insert Waits", false, false)
168INITIALIZE_PASS_END(SIInsertWaits, DEBUG_TYPE,
169 "SI Insert Waits", false, false)
170
Tom Stellardc4cabef2013-01-18 21:15:53 +0000171char SIInsertWaits::ID = 0;
172
Tom Stellard6e1967e2016-02-05 17:42:38 +0000173char &llvm::SIInsertWaitsID = SIInsertWaits::ID;
174
175FunctionPass *llvm::createSIInsertWaitsPass() {
176 return new SIInsertWaits();
177}
178
Tom Stellardc4cabef2013-01-18 21:15:53 +0000179const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
180
Matt Arsenault52f14ec2016-11-07 19:09:27 +0000181static bool readsVCCZ(const MachineInstr &MI) {
182 unsigned Opc = MI.getOpcode();
183 return (Opc == AMDGPU::S_CBRANCH_VCCNZ || Opc == AMDGPU::S_CBRANCH_VCCZ) &&
184 !MI.getOperand(1).isUndef();
Tom Stellard30961762016-02-08 19:49:20 +0000185}
186
187bool SIInsertWaits::hasOutstandingLGKM() const {
188 return WaitedOn.Named.LGKM != LastIssued.Named.LGKM;
189}
Tom Stellardc4cabef2013-01-18 21:15:53 +0000190
191Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
Matt Arsenaultd1d499a2015-10-01 21:43:15 +0000192 uint64_t TSFlags = MI.getDesc().TSFlags;
Matt Arsenaulte66621b2015-09-24 19:52:27 +0000193 Counters Result = { { 0, 0, 0 } };
Tom Stellardc4cabef2013-01-18 21:15:53 +0000194
195 Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
196
197 // Only consider stores or EXP for EXP_CNT
Matt Arsenault7bee6ac2016-12-05 20:23:10 +0000198 Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT) && MI.mayStore();
Tom Stellardc4cabef2013-01-18 21:15:53 +0000199
200 // LGKM may uses larger values
201 if (TSFlags & SIInstrFlags::LGKM_CNT) {
202
Matt Arsenault3add6432015-10-20 04:35:43 +0000203 if (TII->isSMRD(MI)) {
Tom Stellardc4cabef2013-01-18 21:15:53 +0000204
Matt Arsenaulte66621b2015-09-24 19:52:27 +0000205 if (MI.getNumOperands() != 0) {
Matt Arsenaultb733f002015-10-01 22:40:35 +0000206 assert(MI.getOperand(0).isReg() &&
207 "First LGKM operand must be a register!");
Michel Danzer20680b12013-08-16 16:19:24 +0000208
Matt Arsenaulte66621b2015-09-24 19:52:27 +0000209 // XXX - What if this is a write into a super register?
Matt Arsenaultd1d499a2015-10-01 21:43:15 +0000210 const TargetRegisterClass *RC = TII->getOpRegClass(MI, 0);
211 unsigned Size = RC->getSize();
Matt Arsenaulte66621b2015-09-24 19:52:27 +0000212 Result.Named.LGKM = Size > 4 ? 2 : 1;
213 } else {
214 // s_dcache_inv etc. do not have a a destination register. Assume we
215 // want a wait on these.
216 // XXX - What is the right value?
217 Result.Named.LGKM = 1;
218 }
Michel Danzer20680b12013-08-16 16:19:24 +0000219 } else {
220 // DS
221 Result.Named.LGKM = 1;
222 }
Tom Stellardc4cabef2013-01-18 21:15:53 +0000223
224 } else {
225 Result.Named.LGKM = 0;
226 }
227
228 return Result;
229}
230
231bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
Tom Stellardc4cabef2013-01-18 21:15:53 +0000232 // Constants are always irrelevant
Matt Arsenaultd1d499a2015-10-01 21:43:15 +0000233 if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
Tom Stellardc4cabef2013-01-18 21:15:53 +0000234 return false;
235
236 // Defines are always relevant
237 if (Op.isDef())
238 return true;
239
Matt Arsenault7bee6ac2016-12-05 20:23:10 +0000240 // For exports all registers are relevant.
241 // TODO: Skip undef/disabled registers.
Tom Stellardc4cabef2013-01-18 21:15:53 +0000242 MachineInstr &MI = *Op.getParent();
Matt Arsenault7bee6ac2016-12-05 20:23:10 +0000243 if (TII->isEXP(MI))
Tom Stellardc4cabef2013-01-18 21:15:53 +0000244 return true;
245
246 // For stores the stored value is also relevant
247 if (!MI.getDesc().mayStore())
248 return false;
249
Tom Stellardb3931b82015-01-06 19:52:04 +0000250 // Check if this operand is the value being stored.
Tom Stellard2d26fe72016-02-19 15:33:13 +0000251 // Special case for DS/FLAT instructions, since the address
Tom Stellardb3931b82015-01-06 19:52:04 +0000252 // operand comes before the value operand and it may have
253 // multiple data operands.
254
Tom Stellard2d26fe72016-02-19 15:33:13 +0000255 if (TII->isDS(MI)) {
Tom Stellardb3931b82015-01-06 19:52:04 +0000256 MachineOperand *Data0 = TII->getNamedOperand(MI, AMDGPU::OpName::data0);
257 if (Data0 && Op.isIdenticalTo(*Data0))
258 return true;
259
260 MachineOperand *Data1 = TII->getNamedOperand(MI, AMDGPU::OpName::data1);
Matt Arsenault8226fc42016-03-02 23:00:21 +0000261 return Data1 && Op.isIdenticalTo(*Data1);
Tom Stellardb3931b82015-01-06 19:52:04 +0000262 }
263
Matt Arsenault97279a82016-11-29 19:30:44 +0000264 if (TII->isFLAT(MI)) {
265 MachineOperand *Data = TII->getNamedOperand(MI, AMDGPU::OpName::vdata);
266 if (Data && Op.isIdenticalTo(*Data))
267 return true;
268 }
269
Tom Stellardb3931b82015-01-06 19:52:04 +0000270 // NOTE: This assumes that the value operand is before the
271 // address operand, and that there is only one value operand.
Tom Stellardc4cabef2013-01-18 21:15:53 +0000272 for (MachineInstr::mop_iterator I = MI.operands_begin(),
273 E = MI.operands_end(); I != E; ++I) {
274
275 if (I->isReg() && I->isUse())
276 return Op.isIdenticalTo(*I);
277 }
278
279 return false;
280}
281
Matt Arsenaultd1d499a2015-10-01 21:43:15 +0000282RegInterval SIInsertWaits::getRegInterval(const TargetRegisterClass *RC,
283 const MachineOperand &Reg) const {
284 unsigned Size = RC->getSize();
Tom Stellardc4cabef2013-01-18 21:15:53 +0000285 assert(Size >= 4);
286
287 RegInterval Result;
Matt Arsenaultd1d499a2015-10-01 21:43:15 +0000288 Result.first = TRI->getEncodingValue(Reg.getReg());
Tom Stellardc4cabef2013-01-18 21:15:53 +0000289 Result.second = Result.first + Size / 4;
290
291 return Result;
292}
293
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000294void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
Nicolai Haehnlef66bdb52016-04-27 15:46:01 +0000295 MachineBasicBlock::iterator I,
296 const Counters &Increment) {
Tom Stellardc4cabef2013-01-18 21:15:53 +0000297
298 // Get the hardware counter increments and sum them up
Tom Stellardbd8a0852015-08-21 22:47:27 +0000299 Counters Limit = ZeroCounts;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000300 unsigned Sum = 0;
301
Tom Stellard6695ba02016-10-28 23:53:48 +0000302 if (TII->mayAccessFlatAddressSpace(*I))
303 IsFlatOutstanding = true;
304
Tom Stellardc4cabef2013-01-18 21:15:53 +0000305 for (unsigned i = 0; i < 3; ++i) {
306 LastIssued.Array[i] += Increment.Array[i];
Tom Stellardbd8a0852015-08-21 22:47:27 +0000307 if (Increment.Array[i])
308 Limit.Array[i] = LastIssued.Array[i];
Tom Stellardc4cabef2013-01-18 21:15:53 +0000309 Sum += Increment.Array[i];
310 }
311
312 // If we don't increase anything then that's it
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000313 if (Sum == 0) {
314 LastOpcodeType = OTHER;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000315 return;
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000316 }
317
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000318 if (ST->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
Benjamin Kramerdf005cb2015-08-08 18:27:36 +0000319 // Any occurrence of consecutive VMEM or SMEM instructions forms a VMEM
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000320 // or SMEM clause, respectively.
321 //
322 // The temporary workaround is to break the clauses with S_NOP.
323 //
324 // The proper solution would be to allocate registers such that all source
325 // and destination registers don't overlap, e.g. this is illegal:
326 // r0 = load r2
327 // r2 = load r0
Tom Stellard1f520e52016-05-02 17:39:06 +0000328 if (LastOpcodeType == VMEM && Increment.Named.VM) {
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000329 // Insert a NOP to break the clause.
330 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP))
331 .addImm(0);
Marek Olsak1bd24632015-02-03 17:37:52 +0000332 LastInstWritesM0 = false;
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000333 }
334
Matt Arsenault3add6432015-10-20 04:35:43 +0000335 if (TII->isSMRD(*I))
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000336 LastOpcodeType = SMEM;
337 else if (Increment.Named.VM)
338 LastOpcodeType = VMEM;
339 }
Tom Stellardc4cabef2013-01-18 21:15:53 +0000340
341 // Remember which export instructions we have seen
342 if (Increment.Named.EXP) {
Matt Arsenault7bee6ac2016-12-05 20:23:10 +0000343 ExpInstrTypesSeen |= TII->isEXP(*I) ? 1 : 2;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000344 }
345
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000346 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000347 MachineOperand &Op = I->getOperand(i);
Tom Stellardc4cabef2013-01-18 21:15:53 +0000348 if (!isOpRelevant(Op))
349 continue;
350
Matt Arsenaultd1d499a2015-10-01 21:43:15 +0000351 const TargetRegisterClass *RC = TII->getOpRegClass(*I, i);
352 RegInterval Interval = getRegInterval(RC, Op);
Tom Stellardc4cabef2013-01-18 21:15:53 +0000353 for (unsigned j = Interval.first; j < Interval.second; ++j) {
354
355 // Remember which registers we define
356 if (Op.isDef())
Tom Stellardbd8a0852015-08-21 22:47:27 +0000357 DefinedRegs[j] = Limit;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000358
359 // and which one we are using
360 if (Op.isUse())
Tom Stellardbd8a0852015-08-21 22:47:27 +0000361 UsedRegs[j] = Limit;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000362 }
363 }
364}
365
366bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
367 MachineBasicBlock::iterator I,
368 const Counters &Required) {
369
370 // End of program? No need to wait on anything
Marek Olsak8e9cc632016-01-13 17:23:09 +0000371 // A function not returning void needs to wait, because other bytecode will
372 // be appended after it and we don't know what it will be.
373 if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM && ReturnsVoid)
Tom Stellardc4cabef2013-01-18 21:15:53 +0000374 return false;
375
376 // Figure out if the async instructions execute in order
377 bool Ordered[3];
378
Tom Stellard6695ba02016-10-28 23:53:48 +0000379 // VM_CNT is always ordered except when there are flat instructions, which
380 // can return out of order.
381 Ordered[0] = !IsFlatOutstanding;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000382
383 // EXP_CNT is unordered if we have both EXP & VM-writes
384 Ordered[1] = ExpInstrTypesSeen == 3;
385
386 // LGKM_CNT is handled as always unordered. TODO: Handle LDS and GDS
387 Ordered[2] = false;
388
389 // The values we are going to put into the S_WAITCNT instruction
Konstantin Zhuravlyovcdd45472016-10-11 18:58:22 +0000390 Counters Counts = HardwareLimits;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000391
392 // Do we really need to wait?
393 bool NeedWait = false;
394
395 for (unsigned i = 0; i < 3; ++i) {
396
397 if (Required.Array[i] <= WaitedOn.Array[i])
398 continue;
399
400 NeedWait = true;
Matt Arsenault97483692014-07-17 17:50:22 +0000401
Tom Stellardc4cabef2013-01-18 21:15:53 +0000402 if (Ordered[i]) {
403 unsigned Value = LastIssued.Array[i] - Required.Array[i];
404
Matt Arsenault97483692014-07-17 17:50:22 +0000405 // Adjust the value to the real hardware possibilities.
Konstantin Zhuravlyovcdd45472016-10-11 18:58:22 +0000406 Counts.Array[i] = std::min(Value, HardwareLimits.Array[i]);
Tom Stellardc4cabef2013-01-18 21:15:53 +0000407
408 } else
409 Counts.Array[i] = 0;
410
Matt Arsenault97483692014-07-17 17:50:22 +0000411 // Remember on what we have waited on.
Tom Stellardc4cabef2013-01-18 21:15:53 +0000412 WaitedOn.Array[i] = LastIssued.Array[i] - Counts.Array[i];
413 }
414
415 if (!NeedWait)
416 return false;
417
418 // Reset EXP_CNT instruction types
419 if (Counts.Named.EXP == 0)
420 ExpInstrTypesSeen = 0;
421
422 // Build the wait instruction
423 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
Konstantin Zhuravlyovcdd45472016-10-11 18:58:22 +0000424 .addImm(encodeWaitcnt(IV,
425 Counts.Named.VM,
426 Counts.Named.EXP,
427 Counts.Named.LGKM));
Tom Stellardc4cabef2013-01-18 21:15:53 +0000428
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000429 LastOpcodeType = OTHER;
Marek Olsak1bd24632015-02-03 17:37:52 +0000430 LastInstWritesM0 = false;
Tom Stellard6695ba02016-10-28 23:53:48 +0000431 IsFlatOutstanding = false;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000432 return true;
433}
434
435/// \brief helper function for handleOperands
436static void increaseCounters(Counters &Dst, const Counters &Src) {
437
438 for (unsigned i = 0; i < 3; ++i)
439 Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
440}
441
Nicolai Haehnlef66bdb52016-04-27 15:46:01 +0000442/// \brief check whether any of the counters is non-zero
443static bool countersNonZero(const Counters &Counter) {
444 for (unsigned i = 0; i < 3; ++i)
445 if (Counter.Array[i])
446 return true;
447 return false;
448}
449
450void SIInsertWaits::handleExistingWait(MachineBasicBlock::iterator I) {
451 assert(I->getOpcode() == AMDGPU::S_WAITCNT);
452
453 unsigned Imm = I->getOperand(0).getImm();
454 Counters Counts, WaitOn;
455
Konstantin Zhuravlyovcdd45472016-10-11 18:58:22 +0000456 Counts.Named.VM = decodeVmcnt(IV, Imm);
457 Counts.Named.EXP = decodeExpcnt(IV, Imm);
458 Counts.Named.LGKM = decodeLgkmcnt(IV, Imm);
Nicolai Haehnlef66bdb52016-04-27 15:46:01 +0000459
460 for (unsigned i = 0; i < 3; ++i) {
461 if (Counts.Array[i] <= LastIssued.Array[i])
462 WaitOn.Array[i] = LastIssued.Array[i] - Counts.Array[i];
463 else
464 WaitOn.Array[i] = 0;
465 }
466
467 increaseCounters(DelayedWaitOn, WaitOn);
468}
469
Tom Stellardc4cabef2013-01-18 21:15:53 +0000470Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
471
472 Counters Result = ZeroCounts;
473
Matt Arsenaultd1d499a2015-10-01 21:43:15 +0000474 // For each register affected by this instruction increase the result
475 // sequence.
476 //
477 // TODO: We could probably just look at explicit operands if we removed VCC /
478 // EXEC from SMRD dest reg classes.
Tom Stellardc4cabef2013-01-18 21:15:53 +0000479 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
Tom Stellardc4cabef2013-01-18 21:15:53 +0000480 MachineOperand &Op = MI.getOperand(i);
Matt Arsenaultd1d499a2015-10-01 21:43:15 +0000481 if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
482 continue;
483
484 const TargetRegisterClass *RC = TII->getOpRegClass(MI, i);
485 RegInterval Interval = getRegInterval(RC, Op);
Tom Stellardc4cabef2013-01-18 21:15:53 +0000486 for (unsigned j = Interval.first; j < Interval.second; ++j) {
487
Christian Konig862fd9f2013-03-01 09:46:04 +0000488 if (Op.isDef()) {
Tom Stellardc4cabef2013-01-18 21:15:53 +0000489 increaseCounters(Result, UsedRegs[j]);
Christian Konigf1fd5fa2013-03-18 11:33:45 +0000490 increaseCounters(Result, DefinedRegs[j]);
Christian Konig862fd9f2013-03-01 09:46:04 +0000491 }
Tom Stellardc4cabef2013-01-18 21:15:53 +0000492
493 if (Op.isUse())
494 increaseCounters(Result, DefinedRegs[j]);
495 }
496 }
497
498 return Result;
499}
500
Marek Olsak1bd24632015-02-03 17:37:52 +0000501void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
502 MachineBasicBlock::iterator I) {
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000503 if (ST->getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
Marek Olsak1bd24632015-02-03 17:37:52 +0000504 return;
505
506 // There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
507 if (LastInstWritesM0 && I->getOpcode() == AMDGPU::S_SENDMSG) {
508 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
509 LastInstWritesM0 = false;
510 return;
511 }
512
513 // Set whether this instruction sets M0
514 LastInstWritesM0 = false;
515
516 unsigned NumOperands = I->getNumOperands();
517 for (unsigned i = 0; i < NumOperands; i++) {
518 const MachineOperand &Op = I->getOperand(i);
519
520 if (Op.isReg() && Op.isDef() && Op.getReg() == AMDGPU::M0)
521 LastInstWritesM0 = true;
522 }
523}
524
Matt Arsenaulta0050b02014-06-19 01:19:19 +0000525// FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
526// around other non-memory instructions.
Tom Stellardc4cabef2013-01-18 21:15:53 +0000527bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
Tom Stellardc4cabef2013-01-18 21:15:53 +0000528 bool Changes = false;
529
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000530 ST = &MF.getSubtarget<SISubtarget>();
531 TII = ST->getInstrInfo();
532 TRI = &TII->getRegisterInfo();
Tom Stellardc4cabef2013-01-18 21:15:53 +0000533 MRI = &MF.getRegInfo();
Konstantin Zhuravlyov836cbff2016-09-30 17:01:40 +0000534 IV = getIsaVersion(ST->getFeatureBits());
Marek Olsak79c05872016-11-25 17:37:09 +0000535 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
Tom Stellardc4cabef2013-01-18 21:15:53 +0000536
Konstantin Zhuravlyovcdd45472016-10-11 18:58:22 +0000537 HardwareLimits.Named.VM = getVmcntBitMask(IV);
538 HardwareLimits.Named.EXP = getExpcntBitMask(IV);
539 HardwareLimits.Named.LGKM = getLgkmcntBitMask(IV);
540
Tom Stellardc4cabef2013-01-18 21:15:53 +0000541 WaitedOn = ZeroCounts;
Nicolai Haehnlef66bdb52016-04-27 15:46:01 +0000542 DelayedWaitOn = ZeroCounts;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000543 LastIssued = ZeroCounts;
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000544 LastOpcodeType = OTHER;
Marek Olsak1bd24632015-02-03 17:37:52 +0000545 LastInstWritesM0 = false;
Tom Stellard6695ba02016-10-28 23:53:48 +0000546 IsFlatOutstanding = false;
Marek Olsak79c05872016-11-25 17:37:09 +0000547 ReturnsVoid = MFI->returnsVoid();
Tom Stellardc4cabef2013-01-18 21:15:53 +0000548
549 memset(&UsedRegs, 0, sizeof(UsedRegs));
550 memset(&DefinedRegs, 0, sizeof(DefinedRegs));
551
Nicolai Haehnlef66bdb52016-04-27 15:46:01 +0000552 SmallVector<MachineInstr *, 4> RemoveMI;
Marek Olsak79c05872016-11-25 17:37:09 +0000553 SmallVector<MachineBasicBlock *, 4> EndPgmBlocks;
554
555 bool HaveScalarStores = false;
Nicolai Haehnlef66bdb52016-04-27 15:46:01 +0000556
Tom Stellardc4cabef2013-01-18 21:15:53 +0000557 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
558 BI != BE; ++BI) {
559
560 MachineBasicBlock &MBB = *BI;
Marek Olsak79c05872016-11-25 17:37:09 +0000561
Tom Stellardc4cabef2013-01-18 21:15:53 +0000562 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
563 I != E; ++I) {
564
Marek Olsak79c05872016-11-25 17:37:09 +0000565 if (!HaveScalarStores && TII->isScalarStore(*I))
566 HaveScalarStores = true;
567
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000568 if (ST->getGeneration() <= SISubtarget::SEA_ISLANDS) {
Tom Stellard30961762016-02-08 19:49:20 +0000569 // There is a hardware bug on CI/SI where SMRD instruction may corrupt
570 // vccz bit, so when we detect that an instruction may read from a
571 // corrupt vccz bit, we need to:
572 // 1. Insert s_waitcnt lgkm(0) to wait for all outstanding SMRD operations to
573 // complete.
574 // 2. Restore the correct value of vccz by writing the current value
575 // of vcc back to vcc.
576
577 if (TII->isSMRD(I->getOpcode())) {
578 VCCZCorrupt = true;
579 } else if (!hasOutstandingLGKM() && I->modifiesRegister(AMDGPU::VCC, TRI)) {
580 // FIXME: We only care about SMRD instructions here, not LDS or GDS.
581 // Whenever we store a value in vcc, the correct value of vccz is
582 // restored.
583 VCCZCorrupt = false;
584 }
585
586 // Check if we need to apply the bug work-around
Matt Arsenault52f14ec2016-11-07 19:09:27 +0000587 if (VCCZCorrupt && readsVCCZ(*I)) {
Tom Stellard30961762016-02-08 19:49:20 +0000588 DEBUG(dbgs() << "Inserting vccz bug work-around before: " << *I << '\n');
589
590 // Wait on everything, not just LGKM. vccz reads usually come from
591 // terminators, and we always wait on everything at the end of the
592 // block, so if we only wait on LGKM here, we might end up with
593 // another s_waitcnt inserted right after this if there are non-LGKM
594 // instructions still outstanding.
595 insertWait(MBB, I, LastIssued);
596
597 // Restore the vccz bit. Any time a value is written to vcc, the vcc
598 // bit is updated, so we can restore the bit by reading the value of
599 // vcc and then writing it back to the register.
600 BuildMI(MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_MOV_B64),
601 AMDGPU::VCC)
Matt Arsenault52f14ec2016-11-07 19:09:27 +0000602 .addReg(AMDGPU::VCC);
Tom Stellard30961762016-02-08 19:49:20 +0000603 }
604 }
605
Nicolai Haehnlef66bdb52016-04-27 15:46:01 +0000606 // Record pre-existing, explicitly requested waits
607 if (I->getOpcode() == AMDGPU::S_WAITCNT) {
608 handleExistingWait(*I);
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000609 RemoveMI.push_back(&*I);
Nicolai Haehnlef66bdb52016-04-27 15:46:01 +0000610 continue;
611 }
Marek Olsak1bd24632015-02-03 17:37:52 +0000612
Nicolai Haehnlef66bdb52016-04-27 15:46:01 +0000613 Counters Required;
614
615 // Wait for everything before a barrier.
616 //
617 // S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish,
618 // but we also want to wait for any other outstanding transfers before
619 // signalling other hardware blocks
Konstantin Zhuravlyovd7bdf242016-09-30 16:50:36 +0000620 if ((I->getOpcode() == AMDGPU::S_BARRIER &&
621 ST->needWaitcntBeforeBarrier()) ||
622 I->getOpcode() == AMDGPU::S_SENDMSG)
Nicolai Haehnlef66bdb52016-04-27 15:46:01 +0000623 Required = LastIssued;
624 else
625 Required = handleOperands(*I);
626
627 Counters Increment = getHwCounts(*I);
628
629 if (countersNonZero(Required) || countersNonZero(Increment))
630 increaseCounters(Required, DelayedWaitOn);
631
632 Changes |= insertWait(MBB, I, Required);
633
634 pushInstruction(MBB, I, Increment);
Marek Olsak1bd24632015-02-03 17:37:52 +0000635 handleSendMsg(MBB, I);
Marek Olsak79c05872016-11-25 17:37:09 +0000636
637 if (I->getOpcode() == AMDGPU::S_ENDPGM ||
638 I->getOpcode() == AMDGPU::SI_RETURN)
639 EndPgmBlocks.push_back(&MBB);
Tom Stellardc4cabef2013-01-18 21:15:53 +0000640 }
641
642 // Wait for everything at the end of the MBB
643 Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
Tom Stellardc4cabef2013-01-18 21:15:53 +0000644 }
645
Marek Olsak79c05872016-11-25 17:37:09 +0000646 if (HaveScalarStores) {
647 // If scalar writes are used, the cache must be flushed or else the next
648 // wave to reuse the same scratch memory can be clobbered.
649 //
650 // Insert s_dcache_wb at wave termination points if there were any scalar
651 // stores, and only if the cache hasn't already been flushed. This could be
652 // improved by looking across blocks for flushes in postdominating blocks
653 // from the stores but an explicitly requested flush is probably very rare.
654 for (MachineBasicBlock *MBB : EndPgmBlocks) {
655 bool SeenDCacheWB = false;
656
657 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
658 I != E; ++I) {
659
660 if (I->getOpcode() == AMDGPU::S_DCACHE_WB)
661 SeenDCacheWB = true;
662 else if (TII->isScalarStore(*I))
663 SeenDCacheWB = false;
664
665 // FIXME: It would be better to insert this before a waitcnt if any.
666 if ((I->getOpcode() == AMDGPU::S_ENDPGM ||
667 I->getOpcode() == AMDGPU::SI_RETURN) && !SeenDCacheWB) {
668 Changes = true;
669 BuildMI(*MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_DCACHE_WB));
670 }
671 }
672 }
673 }
674
Nicolai Haehnlef66bdb52016-04-27 15:46:01 +0000675 for (MachineInstr *I : RemoveMI)
676 I->eraseFromParent();
677
Tom Stellardc4cabef2013-01-18 21:15:53 +0000678 return Changes;
679}