blob: b074b95c2d3c9e283075f063dc984a1d7b14d1cd [file] [log] [blame]
Eugene Zelenko59e12822017-08-08 00:47:13 +00001//===- SILowerControlFlow.cpp - Use predicates for control flow -----------===//
Tom Stellardc4cabef2013-01-18 21:15:53 +00002//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Insert wait instructions for memory reads and writes.
12///
13/// Memory reads and writes are issued asynchronously, so we need to insert
14/// S_WAITCNT instructions when we want to access any of their results or
15/// overwrite any register that's used asynchronously.
16//
17//===----------------------------------------------------------------------===//
18
19#include "AMDGPU.h"
Eric Christopherd9134482014-08-04 21:25:23 +000020#include "AMDGPUSubtarget.h"
Matt Arsenault9783e002014-09-29 15:50:26 +000021#include "SIDefines.h"
Matt Arsenault1fd0c622014-09-29 15:53:15 +000022#include "SIInstrInfo.h"
Tom Stellardc4cabef2013-01-18 21:15:53 +000023#include "SIMachineFunctionInfo.h"
Eugene Zelenko66203762017-01-21 00:53:49 +000024#include "SIRegisterInfo.h"
Konstantin Zhuravlyov836cbff2016-09-30 17:01:40 +000025#include "Utils/AMDGPUBaseInfo.h"
Eugene Zelenko66203762017-01-21 00:53:49 +000026#include "llvm/ADT/SmallVector.h"
27#include "llvm/ADT/StringRef.h"
28#include "llvm/CodeGen/MachineBasicBlock.h"
Tom Stellardc4cabef2013-01-18 21:15:53 +000029#include "llvm/CodeGen/MachineFunction.h"
30#include "llvm/CodeGen/MachineFunctionPass.h"
Eugene Zelenko66203762017-01-21 00:53:49 +000031#include "llvm/CodeGen/MachineInstr.h"
Tom Stellardc4cabef2013-01-18 21:15:53 +000032#include "llvm/CodeGen/MachineInstrBuilder.h"
Eugene Zelenko66203762017-01-21 00:53:49 +000033#include "llvm/CodeGen/MachineOperand.h"
Tom Stellardc4cabef2013-01-18 21:15:53 +000034#include "llvm/CodeGen/MachineRegisterInfo.h"
Eugene Zelenko66203762017-01-21 00:53:49 +000035#include "llvm/IR/DebugLoc.h"
Eugene Zelenko59e12822017-08-08 00:47:13 +000036#include "llvm/MC/MCInstrDesc.h"
Eugene Zelenko66203762017-01-21 00:53:49 +000037#include "llvm/Pass.h"
38#include "llvm/Support/Debug.h"
39#include "llvm/Support/raw_ostream.h"
Eugene Zelenko66203762017-01-21 00:53:49 +000040#include <algorithm>
41#include <cassert>
42#include <cstdint>
43#include <cstring>
Eugene Zelenko66203762017-01-21 00:53:49 +000044#include <utility>
Tom Stellardc4cabef2013-01-18 21:15:53 +000045
Tom Stellard6e1967e2016-02-05 17:42:38 +000046#define DEBUG_TYPE "si-insert-waits"
47
Tom Stellardc4cabef2013-01-18 21:15:53 +000048using namespace llvm;
49
50namespace {
51
52/// \brief One variable for each of the hardware counters
Eugene Zelenko59e12822017-08-08 00:47:13 +000053using Counters = union {
Tom Stellardc4cabef2013-01-18 21:15:53 +000054 struct {
55 unsigned VM;
56 unsigned EXP;
57 unsigned LGKM;
58 } Named;
59 unsigned Array[3];
Eugene Zelenko59e12822017-08-08 00:47:13 +000060};
Tom Stellardc4cabef2013-01-18 21:15:53 +000061
Eugene Zelenko59e12822017-08-08 00:47:13 +000062using InstType = enum {
Marek Olsakfa58e5e2014-12-07 17:17:43 +000063 OTHER,
64 SMEM,
65 VMEM
Eugene Zelenko59e12822017-08-08 00:47:13 +000066};
Marek Olsakfa58e5e2014-12-07 17:17:43 +000067
Eugene Zelenko59e12822017-08-08 00:47:13 +000068using RegCounters = Counters[512];
69using RegInterval = std::pair<unsigned, unsigned>;
Tom Stellardc4cabef2013-01-18 21:15:53 +000070
71class SIInsertWaits : public MachineFunctionPass {
Tom Stellardc4cabef2013-01-18 21:15:53 +000072private:
Eugene Zelenko66203762017-01-21 00:53:49 +000073 const SISubtarget *ST = nullptr;
74 const SIInstrInfo *TII = nullptr;
75 const SIRegisterInfo *TRI = nullptr;
Tom Stellardc4cabef2013-01-18 21:15:53 +000076 const MachineRegisterInfo *MRI;
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +000077 AMDGPU::IsaInfo::IsaVersion ISA;
Tom Stellardc4cabef2013-01-18 21:15:53 +000078
Tom Stellardc4cabef2013-01-18 21:15:53 +000079 /// \brief Constant zero value
80 static const Counters ZeroCounts;
81
Konstantin Zhuravlyovcdd45472016-10-11 18:58:22 +000082 /// \brief Hardware limits
83 Counters HardwareLimits;
84
Tom Stellardc4cabef2013-01-18 21:15:53 +000085 /// \brief Counter values we have already waited on.
86 Counters WaitedOn;
87
Nicolai Haehnlef66bdb52016-04-27 15:46:01 +000088 /// \brief Counter values that we must wait on before the next counter
89 /// increase.
90 Counters DelayedWaitOn;
91
Tom Stellardc4cabef2013-01-18 21:15:53 +000092 /// \brief Counter values for last instruction issued.
93 Counters LastIssued;
94
95 /// \brief Registers used by async instructions.
96 RegCounters UsedRegs;
97
98 /// \brief Registers defined by async instructions.
99 RegCounters DefinedRegs;
100
101 /// \brief Different export instruction types seen since last wait.
Eugene Zelenko66203762017-01-21 00:53:49 +0000102 unsigned ExpInstrTypesSeen = 0;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000103
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000104 /// \brief Type of the last opcode.
105 InstType LastOpcodeType;
106
Marek Olsak1bd24632015-02-03 17:37:52 +0000107 bool LastInstWritesM0;
108
Tom Stellard6695ba02016-10-28 23:53:48 +0000109 /// Whether or not we have flat operations outstanding.
110 bool IsFlatOutstanding;
111
Marek Olsak8e9cc632016-01-13 17:23:09 +0000112 /// \brief Whether the machine function returns void
113 bool ReturnsVoid;
114
Tom Stellard30961762016-02-08 19:49:20 +0000115 /// Whether the VCCZ bit is possibly corrupt
Eugene Zelenko66203762017-01-21 00:53:49 +0000116 bool VCCZCorrupt = false;
Tom Stellard30961762016-02-08 19:49:20 +0000117
Tom Stellardc4cabef2013-01-18 21:15:53 +0000118 /// \brief Get increment/decrement amount for this instruction.
119 Counters getHwCounts(MachineInstr &MI);
120
121 /// \brief Is operand relevant for async execution?
122 bool isOpRelevant(MachineOperand &Op);
123
124 /// \brief Get register interval an operand affects.
Matt Arsenaultd1d499a2015-10-01 21:43:15 +0000125 RegInterval getRegInterval(const TargetRegisterClass *RC,
126 const MachineOperand &Reg) const;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000127
128 /// \brief Handle instructions async components
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000129 void pushInstruction(MachineBasicBlock &MBB,
Nicolai Haehnlef66bdb52016-04-27 15:46:01 +0000130 MachineBasicBlock::iterator I,
131 const Counters& Increment);
Tom Stellardc4cabef2013-01-18 21:15:53 +0000132
133 /// \brief Insert the actual wait instruction
134 bool insertWait(MachineBasicBlock &MBB,
135 MachineBasicBlock::iterator I,
136 const Counters &Counts);
137
Nicolai Haehnlef66bdb52016-04-27 15:46:01 +0000138 /// \brief Handle existing wait instructions (from intrinsics)
139 void handleExistingWait(MachineBasicBlock::iterator I);
140
Christian Konig862fd9f2013-03-01 09:46:04 +0000141 /// \brief Do we need def2def checks?
142 bool unorderedDefines(MachineInstr &MI);
143
Tom Stellardc4cabef2013-01-18 21:15:53 +0000144 /// \brief Resolve all operand dependencies to counter requirements
145 Counters handleOperands(MachineInstr &MI);
146
Marek Olsak1bd24632015-02-03 17:37:52 +0000147 /// \brief Insert S_NOP between an instruction writing M0 and S_SENDMSG.
148 void handleSendMsg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
149
Tom Stellard30961762016-02-08 19:49:20 +0000150 /// Return true if there are LGKM instrucitons that haven't been waited on
151 /// yet.
152 bool hasOutstandingLGKM() const;
153
Tom Stellardc4cabef2013-01-18 21:15:53 +0000154public:
Tom Stellard6e1967e2016-02-05 17:42:38 +0000155 static char ID;
156
Eugene Zelenko66203762017-01-21 00:53:49 +0000157 SIInsertWaits() : MachineFunctionPass(ID) {}
Tom Stellardc4cabef2013-01-18 21:15:53 +0000158
Craig Topper5656db42014-04-29 07:57:24 +0000159 bool runOnMachineFunction(MachineFunction &MF) override;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000160
Mehdi Amini117296c2016-10-01 02:56:57 +0000161 StringRef getPassName() const override {
Matt Arsenault0cb85172015-09-25 17:21:28 +0000162 return "SI insert wait instructions";
Tom Stellardc4cabef2013-01-18 21:15:53 +0000163 }
164
Matt Arsenault0cb85172015-09-25 17:21:28 +0000165 void getAnalysisUsage(AnalysisUsage &AU) const override {
166 AU.setPreservesCFG();
167 MachineFunctionPass::getAnalysisUsage(AU);
168 }
Tom Stellardc4cabef2013-01-18 21:15:53 +0000169};
170
Eugene Zelenko66203762017-01-21 00:53:49 +0000171} // end anonymous namespace
Tom Stellardc4cabef2013-01-18 21:15:53 +0000172
Tom Stellard6e1967e2016-02-05 17:42:38 +0000173INITIALIZE_PASS_BEGIN(SIInsertWaits, DEBUG_TYPE,
174 "SI Insert Waits", false, false)
175INITIALIZE_PASS_END(SIInsertWaits, DEBUG_TYPE,
176 "SI Insert Waits", false, false)
177
Tom Stellardc4cabef2013-01-18 21:15:53 +0000178char SIInsertWaits::ID = 0;
179
Tom Stellard6e1967e2016-02-05 17:42:38 +0000180char &llvm::SIInsertWaitsID = SIInsertWaits::ID;
181
182FunctionPass *llvm::createSIInsertWaitsPass() {
183 return new SIInsertWaits();
184}
185
Tom Stellardc4cabef2013-01-18 21:15:53 +0000186const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
187
Matt Arsenault52f14ec2016-11-07 19:09:27 +0000188static bool readsVCCZ(const MachineInstr &MI) {
189 unsigned Opc = MI.getOpcode();
190 return (Opc == AMDGPU::S_CBRANCH_VCCNZ || Opc == AMDGPU::S_CBRANCH_VCCZ) &&
191 !MI.getOperand(1).isUndef();
Tom Stellard30961762016-02-08 19:49:20 +0000192}
193
194bool SIInsertWaits::hasOutstandingLGKM() const {
195 return WaitedOn.Named.LGKM != LastIssued.Named.LGKM;
196}
Tom Stellardc4cabef2013-01-18 21:15:53 +0000197
198Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
Matt Arsenaultd1d499a2015-10-01 21:43:15 +0000199 uint64_t TSFlags = MI.getDesc().TSFlags;
Matt Arsenaulte66621b2015-09-24 19:52:27 +0000200 Counters Result = { { 0, 0, 0 } };
Tom Stellardc4cabef2013-01-18 21:15:53 +0000201
202 Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
203
204 // Only consider stores or EXP for EXP_CNT
Matt Arsenault7bee6ac2016-12-05 20:23:10 +0000205 Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT) && MI.mayStore();
Tom Stellardc4cabef2013-01-18 21:15:53 +0000206
207 // LGKM may uses larger values
208 if (TSFlags & SIInstrFlags::LGKM_CNT) {
209
Matt Arsenault3add6432015-10-20 04:35:43 +0000210 if (TII->isSMRD(MI)) {
Tom Stellardc4cabef2013-01-18 21:15:53 +0000211
Matt Arsenaulte66621b2015-09-24 19:52:27 +0000212 if (MI.getNumOperands() != 0) {
Matt Arsenaultb733f002015-10-01 22:40:35 +0000213 assert(MI.getOperand(0).isReg() &&
214 "First LGKM operand must be a register!");
Michel Danzer20680b12013-08-16 16:19:24 +0000215
Matt Arsenaulte66621b2015-09-24 19:52:27 +0000216 // XXX - What if this is a write into a super register?
Matt Arsenaultd1d499a2015-10-01 21:43:15 +0000217 const TargetRegisterClass *RC = TII->getOpRegClass(MI, 0);
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000218 unsigned Size = TRI->getRegSizeInBits(*RC);
219 Result.Named.LGKM = Size > 32 ? 2 : 1;
Matt Arsenaulte66621b2015-09-24 19:52:27 +0000220 } else {
221 // s_dcache_inv etc. do not have a a destination register. Assume we
222 // want a wait on these.
223 // XXX - What is the right value?
224 Result.Named.LGKM = 1;
225 }
Michel Danzer20680b12013-08-16 16:19:24 +0000226 } else {
227 // DS
228 Result.Named.LGKM = 1;
229 }
Tom Stellardc4cabef2013-01-18 21:15:53 +0000230
231 } else {
232 Result.Named.LGKM = 0;
233 }
234
235 return Result;
236}
237
238bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
Tom Stellardc4cabef2013-01-18 21:15:53 +0000239 // Constants are always irrelevant
Matt Arsenaultd1d499a2015-10-01 21:43:15 +0000240 if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
Tom Stellardc4cabef2013-01-18 21:15:53 +0000241 return false;
242
243 // Defines are always relevant
244 if (Op.isDef())
245 return true;
246
Matt Arsenault7bee6ac2016-12-05 20:23:10 +0000247 // For exports all registers are relevant.
248 // TODO: Skip undef/disabled registers.
Tom Stellardc4cabef2013-01-18 21:15:53 +0000249 MachineInstr &MI = *Op.getParent();
Matt Arsenault7bee6ac2016-12-05 20:23:10 +0000250 if (TII->isEXP(MI))
Tom Stellardc4cabef2013-01-18 21:15:53 +0000251 return true;
252
253 // For stores the stored value is also relevant
254 if (!MI.getDesc().mayStore())
255 return false;
256
Tom Stellardb3931b82015-01-06 19:52:04 +0000257 // Check if this operand is the value being stored.
Tom Stellard2d26fe72016-02-19 15:33:13 +0000258 // Special case for DS/FLAT instructions, since the address
Tom Stellardb3931b82015-01-06 19:52:04 +0000259 // operand comes before the value operand and it may have
260 // multiple data operands.
261
Tom Stellard2d26fe72016-02-19 15:33:13 +0000262 if (TII->isDS(MI)) {
Tom Stellardb3931b82015-01-06 19:52:04 +0000263 MachineOperand *Data0 = TII->getNamedOperand(MI, AMDGPU::OpName::data0);
264 if (Data0 && Op.isIdenticalTo(*Data0))
265 return true;
266
267 MachineOperand *Data1 = TII->getNamedOperand(MI, AMDGPU::OpName::data1);
Matt Arsenault8226fc42016-03-02 23:00:21 +0000268 return Data1 && Op.isIdenticalTo(*Data1);
Tom Stellardb3931b82015-01-06 19:52:04 +0000269 }
270
Matt Arsenault97279a82016-11-29 19:30:44 +0000271 if (TII->isFLAT(MI)) {
272 MachineOperand *Data = TII->getNamedOperand(MI, AMDGPU::OpName::vdata);
273 if (Data && Op.isIdenticalTo(*Data))
274 return true;
275 }
276
Tom Stellardb3931b82015-01-06 19:52:04 +0000277 // NOTE: This assumes that the value operand is before the
278 // address operand, and that there is only one value operand.
Tom Stellardc4cabef2013-01-18 21:15:53 +0000279 for (MachineInstr::mop_iterator I = MI.operands_begin(),
280 E = MI.operands_end(); I != E; ++I) {
281
282 if (I->isReg() && I->isUse())
283 return Op.isIdenticalTo(*I);
284 }
285
286 return false;
287}
288
Matt Arsenaultd1d499a2015-10-01 21:43:15 +0000289RegInterval SIInsertWaits::getRegInterval(const TargetRegisterClass *RC,
290 const MachineOperand &Reg) const {
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000291 unsigned Size = TRI->getRegSizeInBits(*RC);
292 assert(Size >= 32);
Tom Stellardc4cabef2013-01-18 21:15:53 +0000293
294 RegInterval Result;
Matt Arsenaultd1d499a2015-10-01 21:43:15 +0000295 Result.first = TRI->getEncodingValue(Reg.getReg());
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000296 Result.second = Result.first + Size / 32;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000297
298 return Result;
299}
300
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000301void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
Nicolai Haehnlef66bdb52016-04-27 15:46:01 +0000302 MachineBasicBlock::iterator I,
303 const Counters &Increment) {
Tom Stellardc4cabef2013-01-18 21:15:53 +0000304 // Get the hardware counter increments and sum them up
Tom Stellardbd8a0852015-08-21 22:47:27 +0000305 Counters Limit = ZeroCounts;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000306 unsigned Sum = 0;
307
Tom Stellard6695ba02016-10-28 23:53:48 +0000308 if (TII->mayAccessFlatAddressSpace(*I))
309 IsFlatOutstanding = true;
310
Tom Stellardc4cabef2013-01-18 21:15:53 +0000311 for (unsigned i = 0; i < 3; ++i) {
312 LastIssued.Array[i] += Increment.Array[i];
Tom Stellardbd8a0852015-08-21 22:47:27 +0000313 if (Increment.Array[i])
314 Limit.Array[i] = LastIssued.Array[i];
Tom Stellardc4cabef2013-01-18 21:15:53 +0000315 Sum += Increment.Array[i];
316 }
317
318 // If we don't increase anything then that's it
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000319 if (Sum == 0) {
320 LastOpcodeType = OTHER;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000321 return;
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000322 }
323
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000324 if (ST->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
Benjamin Kramerdf005cb2015-08-08 18:27:36 +0000325 // Any occurrence of consecutive VMEM or SMEM instructions forms a VMEM
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000326 // or SMEM clause, respectively.
327 //
328 // The temporary workaround is to break the clauses with S_NOP.
329 //
330 // The proper solution would be to allocate registers such that all source
331 // and destination registers don't overlap, e.g. this is illegal:
332 // r0 = load r2
333 // r2 = load r0
Tom Stellard1f520e52016-05-02 17:39:06 +0000334 if (LastOpcodeType == VMEM && Increment.Named.VM) {
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000335 // Insert a NOP to break the clause.
336 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP))
337 .addImm(0);
Marek Olsak1bd24632015-02-03 17:37:52 +0000338 LastInstWritesM0 = false;
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000339 }
340
Matt Arsenault3add6432015-10-20 04:35:43 +0000341 if (TII->isSMRD(*I))
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000342 LastOpcodeType = SMEM;
343 else if (Increment.Named.VM)
344 LastOpcodeType = VMEM;
345 }
Tom Stellardc4cabef2013-01-18 21:15:53 +0000346
347 // Remember which export instructions we have seen
348 if (Increment.Named.EXP) {
Matt Arsenault7bee6ac2016-12-05 20:23:10 +0000349 ExpInstrTypesSeen |= TII->isEXP(*I) ? 1 : 2;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000350 }
351
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000352 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000353 MachineOperand &Op = I->getOperand(i);
Tom Stellardc4cabef2013-01-18 21:15:53 +0000354 if (!isOpRelevant(Op))
355 continue;
356
Matt Arsenaultd1d499a2015-10-01 21:43:15 +0000357 const TargetRegisterClass *RC = TII->getOpRegClass(*I, i);
358 RegInterval Interval = getRegInterval(RC, Op);
Tom Stellardc4cabef2013-01-18 21:15:53 +0000359 for (unsigned j = Interval.first; j < Interval.second; ++j) {
360
361 // Remember which registers we define
362 if (Op.isDef())
Tom Stellardbd8a0852015-08-21 22:47:27 +0000363 DefinedRegs[j] = Limit;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000364
365 // and which one we are using
366 if (Op.isUse())
Tom Stellardbd8a0852015-08-21 22:47:27 +0000367 UsedRegs[j] = Limit;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000368 }
369 }
370}
371
372bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
373 MachineBasicBlock::iterator I,
374 const Counters &Required) {
Tom Stellardc4cabef2013-01-18 21:15:53 +0000375 // End of program? No need to wait on anything
Marek Olsak8e9cc632016-01-13 17:23:09 +0000376 // A function not returning void needs to wait, because other bytecode will
377 // be appended after it and we don't know what it will be.
378 if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM && ReturnsVoid)
Tom Stellardc4cabef2013-01-18 21:15:53 +0000379 return false;
380
381 // Figure out if the async instructions execute in order
382 bool Ordered[3];
383
Tom Stellard6695ba02016-10-28 23:53:48 +0000384 // VM_CNT is always ordered except when there are flat instructions, which
385 // can return out of order.
386 Ordered[0] = !IsFlatOutstanding;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000387
388 // EXP_CNT is unordered if we have both EXP & VM-writes
389 Ordered[1] = ExpInstrTypesSeen == 3;
390
391 // LGKM_CNT is handled as always unordered. TODO: Handle LDS and GDS
392 Ordered[2] = false;
393
394 // The values we are going to put into the S_WAITCNT instruction
Konstantin Zhuravlyovcdd45472016-10-11 18:58:22 +0000395 Counters Counts = HardwareLimits;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000396
397 // Do we really need to wait?
398 bool NeedWait = false;
399
400 for (unsigned i = 0; i < 3; ++i) {
Tom Stellardc4cabef2013-01-18 21:15:53 +0000401 if (Required.Array[i] <= WaitedOn.Array[i])
402 continue;
403
404 NeedWait = true;
Matt Arsenault97483692014-07-17 17:50:22 +0000405
Tom Stellardc4cabef2013-01-18 21:15:53 +0000406 if (Ordered[i]) {
407 unsigned Value = LastIssued.Array[i] - Required.Array[i];
408
Matt Arsenault97483692014-07-17 17:50:22 +0000409 // Adjust the value to the real hardware possibilities.
Konstantin Zhuravlyovcdd45472016-10-11 18:58:22 +0000410 Counts.Array[i] = std::min(Value, HardwareLimits.Array[i]);
Tom Stellardc4cabef2013-01-18 21:15:53 +0000411 } else
412 Counts.Array[i] = 0;
413
Matt Arsenault97483692014-07-17 17:50:22 +0000414 // Remember on what we have waited on.
Tom Stellardc4cabef2013-01-18 21:15:53 +0000415 WaitedOn.Array[i] = LastIssued.Array[i] - Counts.Array[i];
416 }
417
418 if (!NeedWait)
419 return false;
420
421 // Reset EXP_CNT instruction types
422 if (Counts.Named.EXP == 0)
423 ExpInstrTypesSeen = 0;
424
425 // Build the wait instruction
426 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +0000427 .addImm(AMDGPU::encodeWaitcnt(ISA,
428 Counts.Named.VM,
429 Counts.Named.EXP,
430 Counts.Named.LGKM));
Tom Stellardc4cabef2013-01-18 21:15:53 +0000431
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000432 LastOpcodeType = OTHER;
Marek Olsak1bd24632015-02-03 17:37:52 +0000433 LastInstWritesM0 = false;
Tom Stellard6695ba02016-10-28 23:53:48 +0000434 IsFlatOutstanding = false;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000435 return true;
436}
437
438/// \brief helper function for handleOperands
439static void increaseCounters(Counters &Dst, const Counters &Src) {
Tom Stellardc4cabef2013-01-18 21:15:53 +0000440 for (unsigned i = 0; i < 3; ++i)
441 Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
442}
443
Nicolai Haehnlef66bdb52016-04-27 15:46:01 +0000444/// \brief check whether any of the counters is non-zero
445static bool countersNonZero(const Counters &Counter) {
446 for (unsigned i = 0; i < 3; ++i)
447 if (Counter.Array[i])
448 return true;
449 return false;
450}
451
452void SIInsertWaits::handleExistingWait(MachineBasicBlock::iterator I) {
453 assert(I->getOpcode() == AMDGPU::S_WAITCNT);
454
455 unsigned Imm = I->getOperand(0).getImm();
456 Counters Counts, WaitOn;
457
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +0000458 Counts.Named.VM = AMDGPU::decodeVmcnt(ISA, Imm);
459 Counts.Named.EXP = AMDGPU::decodeExpcnt(ISA, Imm);
460 Counts.Named.LGKM = AMDGPU::decodeLgkmcnt(ISA, Imm);
Nicolai Haehnlef66bdb52016-04-27 15:46:01 +0000461
462 for (unsigned i = 0; i < 3; ++i) {
463 if (Counts.Array[i] <= LastIssued.Array[i])
464 WaitOn.Array[i] = LastIssued.Array[i] - Counts.Array[i];
465 else
466 WaitOn.Array[i] = 0;
467 }
468
469 increaseCounters(DelayedWaitOn, WaitOn);
470}
471
Tom Stellardc4cabef2013-01-18 21:15:53 +0000472Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
Tom Stellardc4cabef2013-01-18 21:15:53 +0000473 Counters Result = ZeroCounts;
474
Matt Arsenaultd1d499a2015-10-01 21:43:15 +0000475 // For each register affected by this instruction increase the result
476 // sequence.
477 //
478 // TODO: We could probably just look at explicit operands if we removed VCC /
479 // EXEC from SMRD dest reg classes.
Tom Stellardc4cabef2013-01-18 21:15:53 +0000480 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
Tom Stellardc4cabef2013-01-18 21:15:53 +0000481 MachineOperand &Op = MI.getOperand(i);
Matt Arsenaultd1d499a2015-10-01 21:43:15 +0000482 if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
483 continue;
484
485 const TargetRegisterClass *RC = TII->getOpRegClass(MI, i);
486 RegInterval Interval = getRegInterval(RC, Op);
Tom Stellardc4cabef2013-01-18 21:15:53 +0000487 for (unsigned j = Interval.first; j < Interval.second; ++j) {
Christian Konig862fd9f2013-03-01 09:46:04 +0000488 if (Op.isDef()) {
Tom Stellardc4cabef2013-01-18 21:15:53 +0000489 increaseCounters(Result, UsedRegs[j]);
Christian Konigf1fd5fa2013-03-18 11:33:45 +0000490 increaseCounters(Result, DefinedRegs[j]);
Christian Konig862fd9f2013-03-01 09:46:04 +0000491 }
Tom Stellardc4cabef2013-01-18 21:15:53 +0000492
493 if (Op.isUse())
494 increaseCounters(Result, DefinedRegs[j]);
495 }
496 }
497
498 return Result;
499}
500
Marek Olsak1bd24632015-02-03 17:37:52 +0000501void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
502 MachineBasicBlock::iterator I) {
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000503 if (ST->getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
Marek Olsak1bd24632015-02-03 17:37:52 +0000504 return;
505
506 // There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
Jan Veselyd48445d2017-01-04 18:06:55 +0000507 if (LastInstWritesM0 && (I->getOpcode() == AMDGPU::S_SENDMSG || I->getOpcode() == AMDGPU::S_SENDMSGHALT)) {
Marek Olsak1bd24632015-02-03 17:37:52 +0000508 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
509 LastInstWritesM0 = false;
510 return;
511 }
512
513 // Set whether this instruction sets M0
514 LastInstWritesM0 = false;
515
516 unsigned NumOperands = I->getNumOperands();
517 for (unsigned i = 0; i < NumOperands; i++) {
518 const MachineOperand &Op = I->getOperand(i);
519
520 if (Op.isReg() && Op.isDef() && Op.getReg() == AMDGPU::M0)
521 LastInstWritesM0 = true;
522 }
523}
524
Matt Arsenault52d1b622017-03-08 01:06:58 +0000525/// Return true if \p MBB has one successor immediately following, and is its
526/// only predecessor
527static bool hasTrivialSuccessor(const MachineBasicBlock &MBB) {
528 if (MBB.succ_size() != 1)
529 return false;
530
531 const MachineBasicBlock *Succ = *MBB.succ_begin();
532 return (Succ->pred_size() == 1) && MBB.isLayoutSuccessor(Succ);
533}
534
Matt Arsenaulta0050b02014-06-19 01:19:19 +0000535// FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
536// around other non-memory instructions.
Tom Stellardc4cabef2013-01-18 21:15:53 +0000537bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
Tom Stellardc4cabef2013-01-18 21:15:53 +0000538 bool Changes = false;
539
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000540 ST = &MF.getSubtarget<SISubtarget>();
541 TII = ST->getInstrInfo();
542 TRI = &TII->getRegisterInfo();
Tom Stellardc4cabef2013-01-18 21:15:53 +0000543 MRI = &MF.getRegInfo();
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +0000544 ISA = AMDGPU::IsaInfo::getIsaVersion(ST->getFeatureBits());
Marek Olsak79c05872016-11-25 17:37:09 +0000545 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
Tom Stellardc4cabef2013-01-18 21:15:53 +0000546
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +0000547 HardwareLimits.Named.VM = AMDGPU::getVmcntBitMask(ISA);
548 HardwareLimits.Named.EXP = AMDGPU::getExpcntBitMask(ISA);
549 HardwareLimits.Named.LGKM = AMDGPU::getLgkmcntBitMask(ISA);
Konstantin Zhuravlyovcdd45472016-10-11 18:58:22 +0000550
Tom Stellardc4cabef2013-01-18 21:15:53 +0000551 WaitedOn = ZeroCounts;
Nicolai Haehnlef66bdb52016-04-27 15:46:01 +0000552 DelayedWaitOn = ZeroCounts;
Tom Stellardc4cabef2013-01-18 21:15:53 +0000553 LastIssued = ZeroCounts;
Marek Olsakfa58e5e2014-12-07 17:17:43 +0000554 LastOpcodeType = OTHER;
Marek Olsak1bd24632015-02-03 17:37:52 +0000555 LastInstWritesM0 = false;
Tom Stellard6695ba02016-10-28 23:53:48 +0000556 IsFlatOutstanding = false;
Marek Olsak79c05872016-11-25 17:37:09 +0000557 ReturnsVoid = MFI->returnsVoid();
Tom Stellardc4cabef2013-01-18 21:15:53 +0000558
559 memset(&UsedRegs, 0, sizeof(UsedRegs));
560 memset(&DefinedRegs, 0, sizeof(DefinedRegs));
561
Nicolai Haehnlef66bdb52016-04-27 15:46:01 +0000562 SmallVector<MachineInstr *, 4> RemoveMI;
Marek Olsak79c05872016-11-25 17:37:09 +0000563 SmallVector<MachineBasicBlock *, 4> EndPgmBlocks;
564
565 bool HaveScalarStores = false;
Nicolai Haehnlef66bdb52016-04-27 15:46:01 +0000566
Tom Stellardc4cabef2013-01-18 21:15:53 +0000567 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
568 BI != BE; ++BI) {
Tom Stellardc4cabef2013-01-18 21:15:53 +0000569 MachineBasicBlock &MBB = *BI;
Marek Olsak79c05872016-11-25 17:37:09 +0000570
Tom Stellardc4cabef2013-01-18 21:15:53 +0000571 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
572 I != E; ++I) {
Marek Olsak79c05872016-11-25 17:37:09 +0000573 if (!HaveScalarStores && TII->isScalarStore(*I))
574 HaveScalarStores = true;
575
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000576 if (ST->getGeneration() <= SISubtarget::SEA_ISLANDS) {
Tom Stellard30961762016-02-08 19:49:20 +0000577 // There is a hardware bug on CI/SI where SMRD instruction may corrupt
578 // vccz bit, so when we detect that an instruction may read from a
579 // corrupt vccz bit, we need to:
580 // 1. Insert s_waitcnt lgkm(0) to wait for all outstanding SMRD operations to
581 // complete.
582 // 2. Restore the correct value of vccz by writing the current value
583 // of vcc back to vcc.
584
585 if (TII->isSMRD(I->getOpcode())) {
586 VCCZCorrupt = true;
587 } else if (!hasOutstandingLGKM() && I->modifiesRegister(AMDGPU::VCC, TRI)) {
588 // FIXME: We only care about SMRD instructions here, not LDS or GDS.
589 // Whenever we store a value in vcc, the correct value of vccz is
590 // restored.
591 VCCZCorrupt = false;
592 }
593
594 // Check if we need to apply the bug work-around
Matt Arsenault52f14ec2016-11-07 19:09:27 +0000595 if (VCCZCorrupt && readsVCCZ(*I)) {
Tom Stellard30961762016-02-08 19:49:20 +0000596 DEBUG(dbgs() << "Inserting vccz bug work-around before: " << *I << '\n');
597
598 // Wait on everything, not just LGKM. vccz reads usually come from
599 // terminators, and we always wait on everything at the end of the
600 // block, so if we only wait on LGKM here, we might end up with
601 // another s_waitcnt inserted right after this if there are non-LGKM
602 // instructions still outstanding.
603 insertWait(MBB, I, LastIssued);
604
605 // Restore the vccz bit. Any time a value is written to vcc, the vcc
606 // bit is updated, so we can restore the bit by reading the value of
607 // vcc and then writing it back to the register.
608 BuildMI(MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_MOV_B64),
609 AMDGPU::VCC)
Matt Arsenault52f14ec2016-11-07 19:09:27 +0000610 .addReg(AMDGPU::VCC);
Tom Stellard30961762016-02-08 19:49:20 +0000611 }
612 }
613
Nicolai Haehnlef66bdb52016-04-27 15:46:01 +0000614 // Record pre-existing, explicitly requested waits
615 if (I->getOpcode() == AMDGPU::S_WAITCNT) {
616 handleExistingWait(*I);
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000617 RemoveMI.push_back(&*I);
Nicolai Haehnlef66bdb52016-04-27 15:46:01 +0000618 continue;
619 }
Marek Olsak1bd24632015-02-03 17:37:52 +0000620
Nicolai Haehnlef66bdb52016-04-27 15:46:01 +0000621 Counters Required;
622
623 // Wait for everything before a barrier.
624 //
625 // S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish,
626 // but we also want to wait for any other outstanding transfers before
627 // signalling other hardware blocks
Konstantin Zhuravlyovd7bdf242016-09-30 16:50:36 +0000628 if ((I->getOpcode() == AMDGPU::S_BARRIER &&
Konstantin Zhuravlyovbe6c0ca2017-06-02 17:40:26 +0000629 !ST->hasAutoWaitcntBeforeBarrier()) ||
Jan Veselyd48445d2017-01-04 18:06:55 +0000630 I->getOpcode() == AMDGPU::S_SENDMSG ||
631 I->getOpcode() == AMDGPU::S_SENDMSGHALT)
Nicolai Haehnlef66bdb52016-04-27 15:46:01 +0000632 Required = LastIssued;
633 else
634 Required = handleOperands(*I);
635
636 Counters Increment = getHwCounts(*I);
637
638 if (countersNonZero(Required) || countersNonZero(Increment))
639 increaseCounters(Required, DelayedWaitOn);
640
641 Changes |= insertWait(MBB, I, Required);
642
643 pushInstruction(MBB, I, Increment);
Marek Olsak1bd24632015-02-03 17:37:52 +0000644 handleSendMsg(MBB, I);
Marek Olsak79c05872016-11-25 17:37:09 +0000645
646 if (I->getOpcode() == AMDGPU::S_ENDPGM ||
Matt Arsenault5b20fbb2017-03-21 22:18:10 +0000647 I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG)
Marek Olsak79c05872016-11-25 17:37:09 +0000648 EndPgmBlocks.push_back(&MBB);
Tom Stellardc4cabef2013-01-18 21:15:53 +0000649 }
650
Matt Arsenault52d1b622017-03-08 01:06:58 +0000651 // Wait for everything at the end of the MBB. If there is only one
652 // successor, we can defer this until the uses there.
653 if (!hasTrivialSuccessor(MBB))
654 Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
Tom Stellardc4cabef2013-01-18 21:15:53 +0000655 }
656
Marek Olsak79c05872016-11-25 17:37:09 +0000657 if (HaveScalarStores) {
658 // If scalar writes are used, the cache must be flushed or else the next
659 // wave to reuse the same scratch memory can be clobbered.
660 //
661 // Insert s_dcache_wb at wave termination points if there were any scalar
662 // stores, and only if the cache hasn't already been flushed. This could be
663 // improved by looking across blocks for flushes in postdominating blocks
664 // from the stores but an explicitly requested flush is probably very rare.
665 for (MachineBasicBlock *MBB : EndPgmBlocks) {
666 bool SeenDCacheWB = false;
667
668 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
669 I != E; ++I) {
Marek Olsak79c05872016-11-25 17:37:09 +0000670 if (I->getOpcode() == AMDGPU::S_DCACHE_WB)
671 SeenDCacheWB = true;
672 else if (TII->isScalarStore(*I))
673 SeenDCacheWB = false;
674
675 // FIXME: It would be better to insert this before a waitcnt if any.
676 if ((I->getOpcode() == AMDGPU::S_ENDPGM ||
Matt Arsenault5b20fbb2017-03-21 22:18:10 +0000677 I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) && !SeenDCacheWB) {
Marek Olsak79c05872016-11-25 17:37:09 +0000678 Changes = true;
679 BuildMI(*MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_DCACHE_WB));
680 }
681 }
682 }
683 }
684
Nicolai Haehnlef66bdb52016-04-27 15:46:01 +0000685 for (MachineInstr *I : RemoveMI)
686 I->eraseFromParent();
687
Matt Arsenault9ac40022017-04-11 22:29:31 +0000688 if (!MFI->isEntryFunction()) {
689 // Wait for any outstanding memory operations that the input registers may
690 // depend on. We can't track them and it's better to to the wait after the
691 // costly call sequence.
692
693 // TODO: Could insert earlier and schedule more liberally with operations
694 // that only use caller preserved registers.
695 MachineBasicBlock &EntryBB = MF.front();
696 BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
697 .addImm(0);
698
699 Changes = true;
700 }
701
Tom Stellardc4cabef2013-01-18 21:15:53 +0000702 return Changes;
703}