blob: 03bebae729e554b0a90cae177bb628f588d91c6d [file] [log] [blame]
Tom Stellardc4cabef2013-01-18 21:15:53 +00001//===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Insert wait instructions for memory reads and writes.
12///
13/// Memory reads and writes are issued asynchronously, so we need to insert
14/// S_WAITCNT instructions when we want to access any of their results or
15/// overwrite any register that's used asynchronously.
16//
17//===----------------------------------------------------------------------===//
18
19#include "AMDGPU.h"
Eric Christopherd9134482014-08-04 21:25:23 +000020#include "AMDGPUSubtarget.h"
Matt Arsenault9783e002014-09-29 15:50:26 +000021#include "SIDefines.h"
Matt Arsenault1fd0c622014-09-29 15:53:15 +000022#include "SIInstrInfo.h"
Tom Stellardc4cabef2013-01-18 21:15:53 +000023#include "SIMachineFunctionInfo.h"
24#include "llvm/CodeGen/MachineFunction.h"
25#include "llvm/CodeGen/MachineFunctionPass.h"
26#include "llvm/CodeGen/MachineInstrBuilder.h"
27#include "llvm/CodeGen/MachineRegisterInfo.h"
28
Tom Stellard6e1967e2016-02-05 17:42:38 +000029#define DEBUG_TYPE "si-insert-waits"
30
Tom Stellardc4cabef2013-01-18 21:15:53 +000031using namespace llvm;
32
33namespace {
34
35/// \brief One variable for each of the hardware counters
36typedef union {
37 struct {
38 unsigned VM;
39 unsigned EXP;
40 unsigned LGKM;
41 } Named;
42 unsigned Array[3];
43
44} Counters;
45
Marek Olsakfa58e5e2014-12-07 17:17:43 +000046typedef enum {
47 OTHER,
48 SMEM,
49 VMEM
50} InstType;
51
Tom Stellardc4cabef2013-01-18 21:15:53 +000052typedef Counters RegCounters[512];
53typedef std::pair<unsigned, unsigned> RegInterval;
54
55class SIInsertWaits : public MachineFunctionPass {
56
57private:
Tom Stellardc4cabef2013-01-18 21:15:53 +000058 const SIInstrInfo *TII;
Bill Wendling37e9adb2013-06-07 20:28:55 +000059 const SIRegisterInfo *TRI;
Tom Stellardc4cabef2013-01-18 21:15:53 +000060 const MachineRegisterInfo *MRI;
61
62 /// \brief Constant hardware limits
63 static const Counters WaitCounts;
64
65 /// \brief Constant zero value
66 static const Counters ZeroCounts;
67
68 /// \brief Counter values we have already waited on.
69 Counters WaitedOn;
70
71 /// \brief Counter values for last instruction issued.
72 Counters LastIssued;
73
74 /// \brief Registers used by async instructions.
75 RegCounters UsedRegs;
76
77 /// \brief Registers defined by async instructions.
78 RegCounters DefinedRegs;
79
80 /// \brief Different export instruction types seen since last wait.
81 unsigned ExpInstrTypesSeen;
82
Marek Olsakfa58e5e2014-12-07 17:17:43 +000083 /// \brief Type of the last opcode.
84 InstType LastOpcodeType;
85
Marek Olsak1bd24632015-02-03 17:37:52 +000086 bool LastInstWritesM0;
87
Marek Olsak8e9cc632016-01-13 17:23:09 +000088 /// \brief Whether the machine function returns void
89 bool ReturnsVoid;
90
Tom Stellard30961762016-02-08 19:49:20 +000091 /// Whether the VCCZ bit is possibly corrupt
92 bool VCCZCorrupt;
93
Tom Stellardc4cabef2013-01-18 21:15:53 +000094 /// \brief Get increment/decrement amount for this instruction.
95 Counters getHwCounts(MachineInstr &MI);
96
97 /// \brief Is operand relevant for async execution?
98 bool isOpRelevant(MachineOperand &Op);
99
100 /// \brief Get register interval an operand affects.
Matt Arsenaultd1d499a2015-10-01 21:43:15 +0000101 RegInterval getRegInterval(const TargetRegisterClass *RC,
102 const MachineOperand &Reg) const;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000103
104 /// \brief Handle instructions async components
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000105 void pushInstruction(MachineBasicBlock &MBB,
106 MachineBasicBlock::iterator I);
Tom Stellardc4cabef2013-01-18 21:15:53 +0000107
108 /// \brief Insert the actual wait instruction
109 bool insertWait(MachineBasicBlock &MBB,
110 MachineBasicBlock::iterator I,
111 const Counters &Counts);
112
Christian Konig862fd9f2013-03-01 09:46:04 +0000113 /// \brief Do we need def2def checks?
114 bool unorderedDefines(MachineInstr &MI);
115
Tom Stellardc4cabef2013-01-18 21:15:53 +0000116 /// \brief Resolve all operand dependencies to counter requirements
117 Counters handleOperands(MachineInstr &MI);
118
Marek Olsak1bd24632015-02-03 17:37:52 +0000119 /// \brief Insert S_NOP between an instruction writing M0 and S_SENDMSG.
120 void handleSendMsg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
121
Tom Stellard30961762016-02-08 19:49:20 +0000122 /// Return true if there are LGKM instrucitons that haven't been waited on
123 /// yet.
124 bool hasOutstandingLGKM() const;
125
Tom Stellardc4cabef2013-01-18 21:15:53 +0000126public:
Tom Stellard6e1967e2016-02-05 17:42:38 +0000127 static char ID;
128
129 SIInsertWaits() :
Tom Stellardc4cabef2013-01-18 21:15:53 +0000130 MachineFunctionPass(ID),
Craig Topper062a2ba2014-04-25 05:30:21 +0000131 TII(nullptr),
132 TRI(nullptr),
Tom Stellard30961762016-02-08 19:49:20 +0000133 ExpInstrTypesSeen(0),
134 VCCZCorrupt(false) { }
Tom Stellardc4cabef2013-01-18 21:15:53 +0000135
Craig Topper5656db42014-04-29 07:57:24 +0000136 bool runOnMachineFunction(MachineFunction &MF) override;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000137
Craig Topper5656db42014-04-29 07:57:24 +0000138 const char *getPassName() const override {
Matt Arsenault0cb85172015-09-25 17:21:28 +0000139 return "SI insert wait instructions";
Tom Stellardc4cabef2013-01-18 21:15:53 +0000140 }
141
Matt Arsenault0cb85172015-09-25 17:21:28 +0000142 void getAnalysisUsage(AnalysisUsage &AU) const override {
143 AU.setPreservesCFG();
144 MachineFunctionPass::getAnalysisUsage(AU);
145 }
Tom Stellardc4cabef2013-01-18 21:15:53 +0000146};
147
148} // End anonymous namespace
149
Tom Stellard6e1967e2016-02-05 17:42:38 +0000150INITIALIZE_PASS_BEGIN(SIInsertWaits, DEBUG_TYPE,
151 "SI Insert Waits", false, false)
152INITIALIZE_PASS_END(SIInsertWaits, DEBUG_TYPE,
153 "SI Insert Waits", false, false)
154
Tom Stellardc4cabef2013-01-18 21:15:53 +0000155char SIInsertWaits::ID = 0;
156
Tom Stellard6e1967e2016-02-05 17:42:38 +0000157char &llvm::SIInsertWaitsID = SIInsertWaits::ID;
158
159FunctionPass *llvm::createSIInsertWaitsPass() {
160 return new SIInsertWaits();
161}
162
Tom Stellard3d2c8522016-01-28 17:13:44 +0000163const Counters SIInsertWaits::WaitCounts = { { 15, 7, 15 } };
Tom Stellardc4cabef2013-01-18 21:15:53 +0000164const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
165
Tom Stellard30961762016-02-08 19:49:20 +0000166static bool readsVCCZ(unsigned Opcode) {
167 return Opcode == AMDGPU::S_CBRANCH_VCCNZ || Opcode == AMDGPU::S_CBRANCH_VCCNZ;
168}
169
170bool SIInsertWaits::hasOutstandingLGKM() const {
171 return WaitedOn.Named.LGKM != LastIssued.Named.LGKM;
172}
Tom Stellardc4cabef2013-01-18 21:15:53 +0000173
174Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
Matt Arsenaultd1d499a2015-10-01 21:43:15 +0000175 uint64_t TSFlags = MI.getDesc().TSFlags;
Matt Arsenaulte66621b2015-09-24 19:52:27 +0000176 Counters Result = { { 0, 0, 0 } };
Tom Stellardc4cabef2013-01-18 21:15:53 +0000177
178 Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
179
180 // Only consider stores or EXP for EXP_CNT
181 Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT &&
Christian Konig862fd9f2013-03-01 09:46:04 +0000182 (MI.getOpcode() == AMDGPU::EXP || MI.getDesc().mayStore()));
Tom Stellardc4cabef2013-01-18 21:15:53 +0000183
184 // LGKM may uses larger values
185 if (TSFlags & SIInstrFlags::LGKM_CNT) {
186
Matt Arsenault3add6432015-10-20 04:35:43 +0000187 if (TII->isSMRD(MI)) {
Tom Stellardc4cabef2013-01-18 21:15:53 +0000188
Matt Arsenaulte66621b2015-09-24 19:52:27 +0000189 if (MI.getNumOperands() != 0) {
Matt Arsenaultb733f002015-10-01 22:40:35 +0000190 assert(MI.getOperand(0).isReg() &&
191 "First LGKM operand must be a register!");
Michel Danzer20680b12013-08-16 16:19:24 +0000192
Matt Arsenaulte66621b2015-09-24 19:52:27 +0000193 // XXX - What if this is a write into a super register?
Matt Arsenaultd1d499a2015-10-01 21:43:15 +0000194 const TargetRegisterClass *RC = TII->getOpRegClass(MI, 0);
195 unsigned Size = RC->getSize();
Matt Arsenaulte66621b2015-09-24 19:52:27 +0000196 Result.Named.LGKM = Size > 4 ? 2 : 1;
197 } else {
198 // s_dcache_inv etc. do not have a a destination register. Assume we
199 // want a wait on these.
200 // XXX - What is the right value?
201 Result.Named.LGKM = 1;
202 }
Michel Danzer20680b12013-08-16 16:19:24 +0000203 } else {
204 // DS
205 Result.Named.LGKM = 1;
206 }
Tom Stellardc4cabef2013-01-18 21:15:53 +0000207
208 } else {
209 Result.Named.LGKM = 0;
210 }
211
212 return Result;
213}
214
215bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
Tom Stellardc4cabef2013-01-18 21:15:53 +0000216 // Constants are always irrelevant
Matt Arsenaultd1d499a2015-10-01 21:43:15 +0000217 if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
Tom Stellardc4cabef2013-01-18 21:15:53 +0000218 return false;
219
220 // Defines are always relevant
221 if (Op.isDef())
222 return true;
223
224 // For exports all registers are relevant
225 MachineInstr &MI = *Op.getParent();
226 if (MI.getOpcode() == AMDGPU::EXP)
227 return true;
228
229 // For stores the stored value is also relevant
230 if (!MI.getDesc().mayStore())
231 return false;
232
Tom Stellardb3931b82015-01-06 19:52:04 +0000233 // Check if this operand is the value being stored.
Tom Stellard2d26fe72016-02-19 15:33:13 +0000234 // Special case for DS/FLAT instructions, since the address
Tom Stellardb3931b82015-01-06 19:52:04 +0000235 // operand comes before the value operand and it may have
236 // multiple data operands.
237
Tom Stellard2d26fe72016-02-19 15:33:13 +0000238 if (TII->isDS(MI) || TII->isFLAT(MI)) {
Tom Stellardb3931b82015-01-06 19:52:04 +0000239 MachineOperand *Data = TII->getNamedOperand(MI, AMDGPU::OpName::data);
240 if (Data && Op.isIdenticalTo(*Data))
241 return true;
Tom Stellard2d26fe72016-02-19 15:33:13 +0000242 }
Tom Stellardb3931b82015-01-06 19:52:04 +0000243
Tom Stellard2d26fe72016-02-19 15:33:13 +0000244 if (TII->isDS(MI)) {
Tom Stellardb3931b82015-01-06 19:52:04 +0000245 MachineOperand *Data0 = TII->getNamedOperand(MI, AMDGPU::OpName::data0);
246 if (Data0 && Op.isIdenticalTo(*Data0))
247 return true;
248
249 MachineOperand *Data1 = TII->getNamedOperand(MI, AMDGPU::OpName::data1);
250 if (Data1 && Op.isIdenticalTo(*Data1))
251 return true;
252
253 return false;
254 }
255
256 // NOTE: This assumes that the value operand is before the
257 // address operand, and that there is only one value operand.
Tom Stellardc4cabef2013-01-18 21:15:53 +0000258 for (MachineInstr::mop_iterator I = MI.operands_begin(),
259 E = MI.operands_end(); I != E; ++I) {
260
261 if (I->isReg() && I->isUse())
262 return Op.isIdenticalTo(*I);
263 }
264
265 return false;
266}
267
Matt Arsenaultd1d499a2015-10-01 21:43:15 +0000268RegInterval SIInsertWaits::getRegInterval(const TargetRegisterClass *RC,
269 const MachineOperand &Reg) const {
270 unsigned Size = RC->getSize();
Tom Stellardc4cabef2013-01-18 21:15:53 +0000271 assert(Size >= 4);
272
273 RegInterval Result;
Matt Arsenaultd1d499a2015-10-01 21:43:15 +0000274 Result.first = TRI->getEncodingValue(Reg.getReg());
Tom Stellardc4cabef2013-01-18 21:15:53 +0000275 Result.second = Result.first + Size / 4;
276
277 return Result;
278}
279
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000280void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
281 MachineBasicBlock::iterator I) {
Tom Stellardc4cabef2013-01-18 21:15:53 +0000282
283 // Get the hardware counter increments and sum them up
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000284 Counters Increment = getHwCounts(*I);
Tom Stellardbd8a0852015-08-21 22:47:27 +0000285 Counters Limit = ZeroCounts;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000286 unsigned Sum = 0;
287
288 for (unsigned i = 0; i < 3; ++i) {
289 LastIssued.Array[i] += Increment.Array[i];
Tom Stellardbd8a0852015-08-21 22:47:27 +0000290 if (Increment.Array[i])
291 Limit.Array[i] = LastIssued.Array[i];
Tom Stellardc4cabef2013-01-18 21:15:53 +0000292 Sum += Increment.Array[i];
293 }
294
295 // If we don't increase anything then that's it
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000296 if (Sum == 0) {
297 LastOpcodeType = OTHER;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000298 return;
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000299 }
300
Eric Christopher6c5b5112015-03-11 18:43:21 +0000301 if (MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() >=
302 AMDGPUSubtarget::VOLCANIC_ISLANDS) {
Benjamin Kramerdf005cb2015-08-08 18:27:36 +0000303 // Any occurrence of consecutive VMEM or SMEM instructions forms a VMEM
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000304 // or SMEM clause, respectively.
305 //
306 // The temporary workaround is to break the clauses with S_NOP.
307 //
308 // The proper solution would be to allocate registers such that all source
309 // and destination registers don't overlap, e.g. this is illegal:
310 // r0 = load r2
311 // r2 = load r0
Matt Arsenault3add6432015-10-20 04:35:43 +0000312 if ((LastOpcodeType == SMEM && TII->isSMRD(*I)) ||
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000313 (LastOpcodeType == VMEM && Increment.Named.VM)) {
314 // Insert a NOP to break the clause.
315 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP))
316 .addImm(0);
Marek Olsak1bd24632015-02-03 17:37:52 +0000317 LastInstWritesM0 = false;
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000318 }
319
Matt Arsenault3add6432015-10-20 04:35:43 +0000320 if (TII->isSMRD(*I))
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000321 LastOpcodeType = SMEM;
322 else if (Increment.Named.VM)
323 LastOpcodeType = VMEM;
324 }
Tom Stellardc4cabef2013-01-18 21:15:53 +0000325
326 // Remember which export instructions we have seen
327 if (Increment.Named.EXP) {
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000328 ExpInstrTypesSeen |= I->getOpcode() == AMDGPU::EXP ? 1 : 2;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000329 }
330
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000331 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000332 MachineOperand &Op = I->getOperand(i);
Tom Stellardc4cabef2013-01-18 21:15:53 +0000333 if (!isOpRelevant(Op))
334 continue;
335
Matt Arsenaultd1d499a2015-10-01 21:43:15 +0000336 const TargetRegisterClass *RC = TII->getOpRegClass(*I, i);
337 RegInterval Interval = getRegInterval(RC, Op);
Tom Stellardc4cabef2013-01-18 21:15:53 +0000338 for (unsigned j = Interval.first; j < Interval.second; ++j) {
339
340 // Remember which registers we define
341 if (Op.isDef())
Tom Stellardbd8a0852015-08-21 22:47:27 +0000342 DefinedRegs[j] = Limit;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000343
344 // and which one we are using
345 if (Op.isUse())
Tom Stellardbd8a0852015-08-21 22:47:27 +0000346 UsedRegs[j] = Limit;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000347 }
348 }
349}
350
351bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
352 MachineBasicBlock::iterator I,
353 const Counters &Required) {
354
355 // End of program? No need to wait on anything
Marek Olsak8e9cc632016-01-13 17:23:09 +0000356 // A function not returning void needs to wait, because other bytecode will
357 // be appended after it and we don't know what it will be.
358 if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM && ReturnsVoid)
Tom Stellardc4cabef2013-01-18 21:15:53 +0000359 return false;
360
361 // Figure out if the async instructions execute in order
362 bool Ordered[3];
363
364 // VM_CNT is always ordered
365 Ordered[0] = true;
366
367 // EXP_CNT is unordered if we have both EXP & VM-writes
368 Ordered[1] = ExpInstrTypesSeen == 3;
369
370 // LGKM_CNT is handled as always unordered. TODO: Handle LDS and GDS
371 Ordered[2] = false;
372
373 // The values we are going to put into the S_WAITCNT instruction
374 Counters Counts = WaitCounts;
375
376 // Do we really need to wait?
377 bool NeedWait = false;
378
379 for (unsigned i = 0; i < 3; ++i) {
380
381 if (Required.Array[i] <= WaitedOn.Array[i])
382 continue;
383
384 NeedWait = true;
Matt Arsenault97483692014-07-17 17:50:22 +0000385
Tom Stellardc4cabef2013-01-18 21:15:53 +0000386 if (Ordered[i]) {
387 unsigned Value = LastIssued.Array[i] - Required.Array[i];
388
Matt Arsenault97483692014-07-17 17:50:22 +0000389 // Adjust the value to the real hardware possibilities.
Tom Stellardc4cabef2013-01-18 21:15:53 +0000390 Counts.Array[i] = std::min(Value, WaitCounts.Array[i]);
391
392 } else
393 Counts.Array[i] = 0;
394
Matt Arsenault97483692014-07-17 17:50:22 +0000395 // Remember on what we have waited on.
Tom Stellardc4cabef2013-01-18 21:15:53 +0000396 WaitedOn.Array[i] = LastIssued.Array[i] - Counts.Array[i];
397 }
398
399 if (!NeedWait)
400 return false;
401
402 // Reset EXP_CNT instruction types
403 if (Counts.Named.EXP == 0)
404 ExpInstrTypesSeen = 0;
405
406 // Build the wait instruction
407 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
408 .addImm((Counts.Named.VM & 0xF) |
409 ((Counts.Named.EXP & 0x7) << 4) |
Tom Stellard3d2c8522016-01-28 17:13:44 +0000410 ((Counts.Named.LGKM & 0xF) << 8));
Tom Stellardc4cabef2013-01-18 21:15:53 +0000411
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000412 LastOpcodeType = OTHER;
Marek Olsak1bd24632015-02-03 17:37:52 +0000413 LastInstWritesM0 = false;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000414 return true;
415}
416
417/// \brief helper function for handleOperands
418static void increaseCounters(Counters &Dst, const Counters &Src) {
419
420 for (unsigned i = 0; i < 3; ++i)
421 Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
422}
423
424Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
425
426 Counters Result = ZeroCounts;
427
Michel Danzer6064f572014-01-27 07:20:44 +0000428 // S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish,
429 // but we also want to wait for any other outstanding transfers before
430 // signalling other hardware blocks
431 if (MI.getOpcode() == AMDGPU::S_SENDMSG)
432 return LastIssued;
433
Matt Arsenaultd1d499a2015-10-01 21:43:15 +0000434 // For each register affected by this instruction increase the result
435 // sequence.
436 //
437 // TODO: We could probably just look at explicit operands if we removed VCC /
438 // EXEC from SMRD dest reg classes.
Tom Stellardc4cabef2013-01-18 21:15:53 +0000439 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
Tom Stellardc4cabef2013-01-18 21:15:53 +0000440 MachineOperand &Op = MI.getOperand(i);
Matt Arsenaultd1d499a2015-10-01 21:43:15 +0000441 if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
442 continue;
443
444 const TargetRegisterClass *RC = TII->getOpRegClass(MI, i);
445 RegInterval Interval = getRegInterval(RC, Op);
Tom Stellardc4cabef2013-01-18 21:15:53 +0000446 for (unsigned j = Interval.first; j < Interval.second; ++j) {
447
Christian Konig862fd9f2013-03-01 09:46:04 +0000448 if (Op.isDef()) {
Tom Stellardc4cabef2013-01-18 21:15:53 +0000449 increaseCounters(Result, UsedRegs[j]);
Christian Konigf1fd5fa2013-03-18 11:33:45 +0000450 increaseCounters(Result, DefinedRegs[j]);
Christian Konig862fd9f2013-03-01 09:46:04 +0000451 }
Tom Stellardc4cabef2013-01-18 21:15:53 +0000452
453 if (Op.isUse())
454 increaseCounters(Result, DefinedRegs[j]);
455 }
456 }
457
458 return Result;
459}
460
Marek Olsak1bd24632015-02-03 17:37:52 +0000461void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
462 MachineBasicBlock::iterator I) {
Eric Christopher6c5b5112015-03-11 18:43:21 +0000463 if (MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() <
464 AMDGPUSubtarget::VOLCANIC_ISLANDS)
Marek Olsak1bd24632015-02-03 17:37:52 +0000465 return;
466
467 // There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
468 if (LastInstWritesM0 && I->getOpcode() == AMDGPU::S_SENDMSG) {
469 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
470 LastInstWritesM0 = false;
471 return;
472 }
473
474 // Set whether this instruction sets M0
475 LastInstWritesM0 = false;
476
477 unsigned NumOperands = I->getNumOperands();
478 for (unsigned i = 0; i < NumOperands; i++) {
479 const MachineOperand &Op = I->getOperand(i);
480
481 if (Op.isReg() && Op.isDef() && Op.getReg() == AMDGPU::M0)
482 LastInstWritesM0 = true;
483 }
484}
485
Matt Arsenaulta0050b02014-06-19 01:19:19 +0000486// FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
487// around other non-memory instructions.
Tom Stellardc4cabef2013-01-18 21:15:53 +0000488bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
Tom Stellardc4cabef2013-01-18 21:15:53 +0000489 bool Changes = false;
490
Eric Christopherfc6de422014-08-05 02:39:49 +0000491 TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
492 TRI =
493 static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
Bill Wendling37e9adb2013-06-07 20:28:55 +0000494
Tom Stellard30961762016-02-08 19:49:20 +0000495 const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
Tom Stellardc4cabef2013-01-18 21:15:53 +0000496 MRI = &MF.getRegInfo();
497
498 WaitedOn = ZeroCounts;
499 LastIssued = ZeroCounts;
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000500 LastOpcodeType = OTHER;
Marek Olsak1bd24632015-02-03 17:37:52 +0000501 LastInstWritesM0 = false;
Marek Olsak8e9cc632016-01-13 17:23:09 +0000502 ReturnsVoid = MF.getInfo<SIMachineFunctionInfo>()->returnsVoid();
Tom Stellardc4cabef2013-01-18 21:15:53 +0000503
504 memset(&UsedRegs, 0, sizeof(UsedRegs));
505 memset(&DefinedRegs, 0, sizeof(DefinedRegs));
506
507 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
508 BI != BE; ++BI) {
509
510 MachineBasicBlock &MBB = *BI;
511 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
512 I != E; ++I) {
513
Tom Stellard30961762016-02-08 19:49:20 +0000514 if (ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) {
515 // There is a hardware bug on CI/SI where SMRD instruction may corrupt
516 // vccz bit, so when we detect that an instruction may read from a
517 // corrupt vccz bit, we need to:
518 // 1. Insert s_waitcnt lgkm(0) to wait for all outstanding SMRD operations to
519 // complete.
520 // 2. Restore the correct value of vccz by writing the current value
521 // of vcc back to vcc.
522
523 if (TII->isSMRD(I->getOpcode())) {
524 VCCZCorrupt = true;
525 } else if (!hasOutstandingLGKM() && I->modifiesRegister(AMDGPU::VCC, TRI)) {
526 // FIXME: We only care about SMRD instructions here, not LDS or GDS.
527 // Whenever we store a value in vcc, the correct value of vccz is
528 // restored.
529 VCCZCorrupt = false;
530 }
531
532 // Check if we need to apply the bug work-around
533 if (readsVCCZ(I->getOpcode()) && VCCZCorrupt) {
534 DEBUG(dbgs() << "Inserting vccz bug work-around before: " << *I << '\n');
535
536 // Wait on everything, not just LGKM. vccz reads usually come from
537 // terminators, and we always wait on everything at the end of the
538 // block, so if we only wait on LGKM here, we might end up with
539 // another s_waitcnt inserted right after this if there are non-LGKM
540 // instructions still outstanding.
541 insertWait(MBB, I, LastIssued);
542
543 // Restore the vccz bit. Any time a value is written to vcc, the vcc
544 // bit is updated, so we can restore the bit by reading the value of
545 // vcc and then writing it back to the register.
546 BuildMI(MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_MOV_B64),
547 AMDGPU::VCC)
548 .addReg(AMDGPU::VCC);
549 }
550 }
551
Tom Stellard9d6797a2015-01-06 19:52:07 +0000552 // Wait for everything before a barrier.
553 if (I->getOpcode() == AMDGPU::S_BARRIER)
554 Changes |= insertWait(MBB, I, LastIssued);
555 else
556 Changes |= insertWait(MBB, I, handleOperands(*I));
Marek Olsak1bd24632015-02-03 17:37:52 +0000557
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000558 pushInstruction(MBB, I);
Marek Olsak1bd24632015-02-03 17:37:52 +0000559 handleSendMsg(MBB, I);
Tom Stellardc4cabef2013-01-18 21:15:53 +0000560 }
561
562 // Wait for everything at the end of the MBB
563 Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
Marek Olsak3c0ebc72016-01-13 17:23:12 +0000564
565 // Functions returning something shouldn't contain S_ENDPGM, because other
566 // bytecode will be appended after it.
567 if (!ReturnsVoid) {
568 MachineBasicBlock::iterator I = MBB.getFirstTerminator();
569 if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM)
570 I->eraseFromParent();
571 }
Tom Stellardc4cabef2013-01-18 21:15:53 +0000572 }
573
574 return Changes;
575}