blob: 87a8c06e6975200ac8d92260b9ef8ad0c413bd22 [file] [log] [blame]
Tom Stellardcb6ba622016-04-30 00:23:06 +00001//===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Tom Stellardcb6ba622016-04-30 00:23:06 +00006//
7//===----------------------------------------------------------------------===//
8//
9// This file implements hazard recognizers for scheduling on GCN processors.
10//
11//===----------------------------------------------------------------------===//
12
Eugene Zelenko734bb7b2017-01-20 17:52:16 +000013#include "GCNHazardRecognizer.h"
Chandler Carruth6bda14b2017-06-06 11:49:48 +000014#include "AMDGPUSubtarget.h"
Eugene Zelenko734bb7b2017-01-20 17:52:16 +000015#include "SIDefines.h"
Tom Stellardcb6ba622016-04-30 00:23:06 +000016#include "SIInstrInfo.h"
Eugene Zelenko734bb7b2017-01-20 17:52:16 +000017#include "SIRegisterInfo.h"
Tom Stellard44b30b42018-05-22 02:03:23 +000018#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Eugene Zelenko734bb7b2017-01-20 17:52:16 +000019#include "Utils/AMDGPUBaseInfo.h"
20#include "llvm/ADT/iterator_range.h"
21#include "llvm/CodeGen/MachineFunction.h"
22#include "llvm/CodeGen/MachineInstr.h"
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +000023#include "llvm/CodeGen/MachineInstrBuilder.h"
Eugene Zelenko734bb7b2017-01-20 17:52:16 +000024#include "llvm/CodeGen/MachineOperand.h"
Tom Stellardcb6ba622016-04-30 00:23:06 +000025#include "llvm/CodeGen/ScheduleDAG.h"
Eugene Zelenko734bb7b2017-01-20 17:52:16 +000026#include "llvm/MC/MCInstrDesc.h"
27#include "llvm/Support/ErrorHandling.h"
28#include <algorithm>
29#include <cassert>
30#include <limits>
31#include <set>
32#include <vector>
Tom Stellardcb6ba622016-04-30 00:23:06 +000033
34using namespace llvm;
35
36//===----------------------------------------------------------------------===//
37// Hazard Recoginizer Implementation
38//===----------------------------------------------------------------------===//
39
40GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +000041 IsHazardRecognizerMode(false),
Tom Stellardcb6ba622016-04-30 00:23:06 +000042 CurrCycleInstr(nullptr),
Matt Arsenault43e92fe2016-06-24 06:30:11 +000043 MF(MF),
Tom Stellard5bfbae52018-07-11 20:59:01 +000044 ST(MF.getSubtarget<GCNSubtarget>()),
Matt Arsenault03c67d12017-11-17 04:18:24 +000045 TII(*ST.getInstrInfo()),
46 TRI(TII.getRegisterInfo()),
47 ClauseUses(TRI.getNumRegUnits()),
48 ClauseDefs(TRI.getNumRegUnits()) {
Tom Stellardcb6ba622016-04-30 00:23:06 +000049 MaxLookAhead = 5;
50}
51
52void GCNHazardRecognizer::EmitInstruction(SUnit *SU) {
53 EmitInstruction(SU->getInstr());
54}
55
56void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) {
57 CurrCycleInstr = MI;
58}
59
Tom Stellard5ab61542016-10-07 23:42:48 +000060static bool isDivFMas(unsigned Opcode) {
61 return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64;
62}
63
Tom Stellard961811c2016-10-15 00:58:14 +000064static bool isSGetReg(unsigned Opcode) {
65 return Opcode == AMDGPU::S_GETREG_B32;
66}
67
68static bool isSSetReg(unsigned Opcode) {
69 return Opcode == AMDGPU::S_SETREG_B32 || Opcode == AMDGPU::S_SETREG_IMM32_B32;
70}
71
Tom Stellard04051b52016-10-27 23:42:29 +000072static bool isRWLane(unsigned Opcode) {
73 return Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32;
74}
75
Tom Stellardaea899e2016-10-27 23:50:21 +000076static bool isRFE(unsigned Opcode) {
77 return Opcode == AMDGPU::S_RFE_B64;
78}
79
Matt Arsenaulte823d922017-02-18 18:29:53 +000080static bool isSMovRel(unsigned Opcode) {
Matt Arsenault59ece952017-03-17 21:36:28 +000081 switch (Opcode) {
82 case AMDGPU::S_MOVRELS_B32:
83 case AMDGPU::S_MOVRELS_B64:
84 case AMDGPU::S_MOVRELD_B32:
85 case AMDGPU::S_MOVRELD_B64:
86 return true;
87 default:
88 return false;
89 }
Matt Arsenaulte823d922017-02-18 18:29:53 +000090}
91
Marek Olsakc5cec5e2019-01-16 15:43:53 +000092static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII,
93 const MachineInstr &MI) {
94 if (TII.isAlwaysGDS(MI.getOpcode()))
95 return true;
96
Matt Arsenaulta41351e2017-11-17 21:35:32 +000097 switch (MI.getOpcode()) {
98 case AMDGPU::S_SENDMSG:
99 case AMDGPU::S_SENDMSGHALT:
100 case AMDGPU::S_TTRACEDATA:
101 return true;
Marek Olsakc5cec5e2019-01-16 15:43:53 +0000102 // These DS opcodes don't support GDS.
103 case AMDGPU::DS_NOP:
104 case AMDGPU::DS_PERMUTE_B32:
105 case AMDGPU::DS_BPERMUTE_B32:
106 return false;
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000107 default:
Marek Olsakc5cec5e2019-01-16 15:43:53 +0000108 if (TII.isDS(MI.getOpcode())) {
109 int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
110 AMDGPU::OpName::gds);
111 if (MI.getOperand(GDS).getImm())
112 return true;
113 }
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000114 return false;
115 }
116}
117
Tom Stellardaea899e2016-10-27 23:50:21 +0000118static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
Tom Stellard961811c2016-10-15 00:58:14 +0000119 const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
120 AMDGPU::OpName::simm16);
121 return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_;
122}
123
Tom Stellardcb6ba622016-04-30 00:23:06 +0000124ScheduleHazardRecognizer::HazardType
125GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
Tom Stellardcb6ba622016-04-30 00:23:06 +0000126 MachineInstr *MI = SU->getInstr();
Austin Kerbow8a3d3a92019-05-07 22:12:15 +0000127 if (MI->isBundle())
128 return NoHazard;
Tom Stellardcb6ba622016-04-30 00:23:06 +0000129
Aaron Ballman5c190d02016-05-02 14:48:03 +0000130 if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
Tom Stellardcb6ba622016-04-30 00:23:06 +0000131 return NoopHazard;
132
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000133 // FIXME: Should flat be considered vmem?
134 if ((SIInstrInfo::isVMEM(*MI) ||
135 SIInstrInfo::isFLAT(*MI))
136 && checkVMEMHazards(MI) > 0)
Tom Stellardcb6ba622016-04-30 00:23:06 +0000137 return NoopHazard;
138
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000139 if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0)
140 return NoopHazard;
141
142 if (ST.hasNoDataDepHazard())
143 return NoHazard;
144
Tom Stellardb133fbb2016-10-27 23:05:31 +0000145 if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
146 return NoopHazard;
147
Tom Stellarda27007e2016-05-02 16:23:09 +0000148 if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0)
149 return NoopHazard;
150
Tom Stellard5ab61542016-10-07 23:42:48 +0000151 if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0)
152 return NoopHazard;
153
Tom Stellard04051b52016-10-27 23:42:29 +0000154 if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0)
155 return NoopHazard;
156
Tom Stellard961811c2016-10-15 00:58:14 +0000157 if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0)
158 return NoopHazard;
159
Tom Stellard30d30822016-10-27 20:39:09 +0000160 if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0)
161 return NoopHazard;
162
Tom Stellardaea899e2016-10-27 23:50:21 +0000163 if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
164 return NoopHazard;
165
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000166 if (ST.hasReadM0MovRelInterpHazard() &&
167 (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
168 checkReadM0Hazards(MI) > 0)
169 return NoopHazard;
170
Marek Olsakc5cec5e2019-01-16 15:43:53 +0000171 if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) &&
Matt Arsenaulte823d922017-02-18 18:29:53 +0000172 checkReadM0Hazards(MI) > 0)
173 return NoopHazard;
174
Mark Searlesd29f24a2017-12-07 20:34:25 +0000175 if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
176 return NoopHazard;
177
Matt Arsenaulte823d922017-02-18 18:29:53 +0000178 if (checkAnyInstHazards(MI) > 0)
179 return NoopHazard;
180
Tom Stellardcb6ba622016-04-30 00:23:06 +0000181 return NoHazard;
182}
183
Austin Kerbow8a3d3a92019-05-07 22:12:15 +0000184static void insertNoopInBundle(MachineInstr *MI, const SIInstrInfo &TII) {
185 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
186 .addImm(0);
187}
188
189void GCNHazardRecognizer::processBundle() {
190 MachineBasicBlock::instr_iterator MI = std::next(CurrCycleInstr->getIterator());
191 MachineBasicBlock::instr_iterator E = CurrCycleInstr->getParent()->instr_end();
192 // Check bundled MachineInstr's for hazards.
193 for (; MI != E && MI->isInsideBundle(); ++MI) {
194 CurrCycleInstr = &*MI;
195 unsigned WaitStates = PreEmitNoopsCommon(CurrCycleInstr);
196
197 if (IsHazardRecognizerMode)
198 fixHazards(CurrCycleInstr);
199
200 for (unsigned i = 0; i < WaitStates; ++i)
201 insertNoopInBundle(CurrCycleInstr, TII);
202
203 // It’s unnecessary to track more than MaxLookAhead instructions. Since we
204 // include the bundled MI directly after, only add a maximum of
205 // (MaxLookAhead - 1) noops to EmittedInstrs.
206 for (unsigned i = 0, e = std::min(WaitStates, MaxLookAhead - 1); i < e; ++i)
207 EmittedInstrs.push_front(nullptr);
208
209 EmittedInstrs.push_front(CurrCycleInstr);
210 EmittedInstrs.resize(MaxLookAhead);
211 }
212 CurrCycleInstr = nullptr;
213}
214
Tom Stellardcb6ba622016-04-30 00:23:06 +0000215unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) {
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000216 IsHazardRecognizerMode = false;
217 return PreEmitNoopsCommon(SU->getInstr());
Tom Stellardcb6ba622016-04-30 00:23:06 +0000218}
219
220unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000221 IsHazardRecognizerMode = true;
222 CurrCycleInstr = MI;
223 unsigned W = PreEmitNoopsCommon(MI);
Austin Kerbow8a3d3a92019-05-07 22:12:15 +0000224 fixHazards(MI);
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000225 CurrCycleInstr = nullptr;
226 return W;
227}
228
229unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
Austin Kerbow8a3d3a92019-05-07 22:12:15 +0000230 if (MI->isBundle())
231 return 0;
232
Matt Arsenaulte823d922017-02-18 18:29:53 +0000233 int WaitStates = std::max(0, checkAnyInstHazards(MI));
234
Aaron Ballman5c190d02016-05-02 14:48:03 +0000235 if (SIInstrInfo::isSMRD(*MI))
Matt Arsenaulte823d922017-02-18 18:29:53 +0000236 return std::max(WaitStates, checkSMRDHazards(MI));
Tom Stellardcb6ba622016-04-30 00:23:06 +0000237
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000238 if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
239 WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
Tom Stellarda27007e2016-05-02 16:23:09 +0000240
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000241 if (ST.hasNSAtoVMEMBug())
242 WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI));
243
244 if (ST.hasNoDataDepHazard())
245 return WaitStates;
246
247 if (SIInstrInfo::isVALU(*MI))
248 WaitStates = std::max(WaitStates, checkVALUHazards(MI));
249
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000250 if (SIInstrInfo::isDPP(*MI))
251 WaitStates = std::max(WaitStates, checkDPPHazards(MI));
Tom Stellardb133fbb2016-10-27 23:05:31 +0000252
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000253 if (isDivFMas(MI->getOpcode()))
254 WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
Tom Stellardb133fbb2016-10-27 23:05:31 +0000255
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000256 if (isRWLane(MI->getOpcode()))
257 WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
Tom Stellard5ab61542016-10-07 23:42:48 +0000258
Mark Searlesd29f24a2017-12-07 20:34:25 +0000259 if (MI->isInlineAsm())
260 return std::max(WaitStates, checkInlineAsmHazards(MI));
261
Tom Stellard961811c2016-10-15 00:58:14 +0000262 if (isSGetReg(MI->getOpcode()))
Matt Arsenaulte823d922017-02-18 18:29:53 +0000263 return std::max(WaitStates, checkGetRegHazards(MI));
Tom Stellard961811c2016-10-15 00:58:14 +0000264
Tom Stellard30d30822016-10-27 20:39:09 +0000265 if (isSSetReg(MI->getOpcode()))
Matt Arsenaulte823d922017-02-18 18:29:53 +0000266 return std::max(WaitStates, checkSetRegHazards(MI));
Tom Stellard30d30822016-10-27 20:39:09 +0000267
Tom Stellardaea899e2016-10-27 23:50:21 +0000268 if (isRFE(MI->getOpcode()))
Matt Arsenaulte823d922017-02-18 18:29:53 +0000269 return std::max(WaitStates, checkRFEHazards(MI));
Tom Stellardaea899e2016-10-27 23:50:21 +0000270
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000271 if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) ||
272 isSMovRel(MI->getOpcode())))
273 return std::max(WaitStates, checkReadM0Hazards(MI));
274
Marek Olsakc5cec5e2019-01-16 15:43:53 +0000275 if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI))
Matt Arsenaulte823d922017-02-18 18:29:53 +0000276 return std::max(WaitStates, checkReadM0Hazards(MI));
277
278 return WaitStates;
Tom Stellardcb6ba622016-04-30 00:23:06 +0000279}
280
281void GCNHazardRecognizer::EmitNoop() {
282 EmittedInstrs.push_front(nullptr);
283}
284
285void GCNHazardRecognizer::AdvanceCycle() {
Tom Stellardcb6ba622016-04-30 00:23:06 +0000286 // When the scheduler detects a stall, it will call AdvanceCycle() without
287 // emitting any instructions.
288 if (!CurrCycleInstr)
289 return;
290
Carl Ritsonf898edd2018-09-10 10:14:48 +0000291 // Do not track non-instructions which do not affect the wait states.
292 // If included, these instructions can lead to buffer overflow such that
293 // detectable hazards are missed.
David Stuttard81eec582019-03-05 10:25:16 +0000294 if (CurrCycleInstr->isImplicitDef() || CurrCycleInstr->isDebugInstr() ||
295 CurrCycleInstr->isKill())
Carl Ritsonf898edd2018-09-10 10:14:48 +0000296 return;
297
Austin Kerbow8a3d3a92019-05-07 22:12:15 +0000298 if (CurrCycleInstr->isBundle()) {
299 processBundle();
300 return;
301 }
302
Matt Arsenault59ece952017-03-17 21:36:28 +0000303 unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
Tom Stellardcb6ba622016-04-30 00:23:06 +0000304
305 // Keep track of emitted instructions
306 EmittedInstrs.push_front(CurrCycleInstr);
307
308 // Add a nullptr for each additional wait state after the first. Make sure
309 // not to add more than getMaxLookAhead() items to the list, since we
310 // truncate the list to that size right after this loop.
311 for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead());
312 i < e; ++i) {
313 EmittedInstrs.push_front(nullptr);
314 }
315
316 // getMaxLookahead() is the largest number of wait states we will ever need
317 // to insert, so there is no point in keeping track of more than that many
318 // wait states.
319 EmittedInstrs.resize(getMaxLookAhead());
320
321 CurrCycleInstr = nullptr;
322}
323
324void GCNHazardRecognizer::RecedeCycle() {
325 llvm_unreachable("hazard recognizer does not support bottom-up scheduling.");
326}
327
328//===----------------------------------------------------------------------===//
329// Helper Functions
330//===----------------------------------------------------------------------===//
331
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000332typedef function_ref<bool(MachineInstr *, int WaitStates)> IsExpiredFn;
333
334// Returns a minimum wait states since \p I walking all predecessors.
335// Only scans until \p IsExpired does not return true.
336// Can only be run in a hazard recognizer mode.
337static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
338 MachineBasicBlock *MBB,
339 MachineBasicBlock::reverse_instr_iterator I,
340 int WaitStates,
341 IsExpiredFn IsExpired,
342 DenseSet<const MachineBasicBlock *> &Visited) {
Austin Kerbow8a3d3a92019-05-07 22:12:15 +0000343 for (auto E = MBB->instr_rend(); I != E; ++I) {
344 // Don't add WaitStates for parent BUNDLE instructions.
345 if (I->isBundle())
346 continue;
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000347
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000348 if (IsHazard(&*I))
349 return WaitStates;
350
351 if (I->isInlineAsm() || I->isImplicitDef() || I->isDebugInstr())
352 continue;
353
354 WaitStates += SIInstrInfo::getNumWaitStates(*I);
355
356 if (IsExpired(&*I, WaitStates))
357 return std::numeric_limits<int>::max();
358 }
359
360 int MinWaitStates = WaitStates;
361 bool Found = false;
362 for (MachineBasicBlock *Pred : MBB->predecessors()) {
363 if (!Visited.insert(Pred).second)
364 continue;
365
366 int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(),
367 WaitStates, IsExpired, Visited);
368
369 if (W == std::numeric_limits<int>::max())
370 continue;
371
372 MinWaitStates = Found ? std::min(MinWaitStates, W) : W;
373 if (IsExpired(nullptr, MinWaitStates))
374 return MinWaitStates;
375
376 Found = true;
377 }
378
379 if (Found)
380 return MinWaitStates;
381
382 return std::numeric_limits<int>::max();
383}
384
385static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
386 MachineInstr *MI,
387 IsExpiredFn IsExpired) {
388 DenseSet<const MachineBasicBlock *> Visited;
389 return getWaitStatesSince(IsHazard, MI->getParent(),
390 std::next(MI->getReverseIterator()),
391 0, IsExpired, Visited);
392}
393
394int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) {
395 if (IsHazardRecognizerMode) {
396 auto IsExpiredFn = [Limit] (MachineInstr *, int WaitStates) {
397 return WaitStates >= Limit;
398 };
399 return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn);
400 }
401
Nicolai Haehnle75c98c32017-09-01 16:56:32 +0000402 int WaitStates = 0;
Tom Stellard961811c2016-10-15 00:58:14 +0000403 for (MachineInstr *MI : EmittedInstrs) {
Nicolai Haehnle75c98c32017-09-01 16:56:32 +0000404 if (MI) {
405 if (IsHazard(MI))
406 return WaitStates;
407
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000408 if (MI->isInlineAsm())
Nicolai Haehnle75c98c32017-09-01 16:56:32 +0000409 continue;
410 }
Tom Stellard961811c2016-10-15 00:58:14 +0000411 ++WaitStates;
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000412
413 if (WaitStates >= Limit)
414 break;
Tom Stellard961811c2016-10-15 00:58:14 +0000415 }
416 return std::numeric_limits<int>::max();
417}
418
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000419int GCNHazardRecognizer::getWaitStatesSinceDef(unsigned Reg,
420 IsHazardFn IsHazardDef,
421 int Limit) {
Tom Stellardb133fbb2016-10-27 23:05:31 +0000422 const SIRegisterInfo *TRI = ST.getRegisterInfo();
423
424 auto IsHazardFn = [IsHazardDef, TRI, Reg] (MachineInstr *MI) {
425 return IsHazardDef(MI) && MI->modifiesRegister(Reg, TRI);
426 };
427
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000428 return getWaitStatesSince(IsHazardFn, Limit);
Tom Stellardb133fbb2016-10-27 23:05:31 +0000429}
430
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000431int GCNHazardRecognizer::getWaitStatesSinceSetReg(IsHazardFn IsHazard,
432 int Limit) {
Tom Stellardb133fbb2016-10-27 23:05:31 +0000433 auto IsHazardFn = [IsHazard] (MachineInstr *MI) {
434 return isSSetReg(MI->getOpcode()) && IsHazard(MI);
435 };
436
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000437 return getWaitStatesSince(IsHazardFn, Limit);
Tom Stellardb133fbb2016-10-27 23:05:31 +0000438}
439
Tom Stellardcb6ba622016-04-30 00:23:06 +0000440//===----------------------------------------------------------------------===//
441// No-op Hazard Detection
442//===----------------------------------------------------------------------===//
443
Matt Arsenault03c67d12017-11-17 04:18:24 +0000444static void addRegUnits(const SIRegisterInfo &TRI,
445 BitVector &BV, unsigned Reg) {
446 for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI)
447 BV.set(*RUI);
448}
449
450static void addRegsToSet(const SIRegisterInfo &TRI,
451 iterator_range<MachineInstr::const_mop_iterator> Ops,
452 BitVector &Set) {
Tom Stellard1f520e52016-05-02 17:39:06 +0000453 for (const MachineOperand &Op : Ops) {
454 if (Op.isReg())
Matt Arsenault03c67d12017-11-17 04:18:24 +0000455 addRegUnits(TRI, Set, Op.getReg());
Tom Stellard1f520e52016-05-02 17:39:06 +0000456 }
457}
458
Matt Arsenault03c67d12017-11-17 04:18:24 +0000459void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) {
460 // XXX: Do we need to worry about implicit operands
461 addRegsToSet(TRI, MI.defs(), ClauseDefs);
462 addRegsToSet(TRI, MI.uses(), ClauseUses);
463}
464
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000465int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
Matt Arsenault03c67d12017-11-17 04:18:24 +0000466 // SMEM soft clause are only present on VI+, and only matter if xnack is
467 // enabled.
468 if (!ST.isXNACKEnabled())
Tom Stellard1f520e52016-05-02 17:39:06 +0000469 return 0;
470
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000471 bool IsSMRD = TII.isSMRD(*MEM);
472
Matt Arsenault03c67d12017-11-17 04:18:24 +0000473 resetClause();
474
Tom Stellard1f520e52016-05-02 17:39:06 +0000475 // A soft-clause is any group of consecutive SMEM instructions. The
476 // instructions in this group may return out of order and/or may be
477 // replayed (i.e. the same instruction issued more than once).
478 //
Austin Kerbow8a3d3a92019-05-07 22:12:15 +0000479 // In order to handle these situations correctly we need to make sure that
480 // when a clause has more than one instruction, no instruction in the clause
481 // writes to a register that is read by another instruction in the clause
Tom Stellard1f520e52016-05-02 17:39:06 +0000482 // (including itself). If we encounter this situaion, we need to break the
483 // clause by inserting a non SMEM instruction.
484
Tom Stellard1f520e52016-05-02 17:39:06 +0000485 for (MachineInstr *MI : EmittedInstrs) {
Tom Stellard1f520e52016-05-02 17:39:06 +0000486 // When we hit a non-SMEM instruction then we have passed the start of the
487 // clause and we can stop.
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000488 if (!MI)
489 break;
490
491 if (IsSMRD != SIInstrInfo::isSMRD(*MI))
Tom Stellard1f520e52016-05-02 17:39:06 +0000492 break;
493
Matt Arsenault03c67d12017-11-17 04:18:24 +0000494 addClauseInst(*MI);
Tom Stellard1f520e52016-05-02 17:39:06 +0000495 }
496
Matt Arsenault03c67d12017-11-17 04:18:24 +0000497 if (ClauseDefs.none())
Tom Stellard1f520e52016-05-02 17:39:06 +0000498 return 0;
499
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000500 // We need to make sure not to put loads and stores in the same clause if they
501 // use the same address. For now, just start a new clause whenever we see a
502 // store.
503 if (MEM->mayStore())
Tom Stellard1f520e52016-05-02 17:39:06 +0000504 return 1;
505
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000506 addClauseInst(*MEM);
Tom Stellard1f520e52016-05-02 17:39:06 +0000507
508 // If the set of defs and uses intersect then we cannot add this instruction
509 // to the clause, so we have a hazard.
Matt Arsenault03c67d12017-11-17 04:18:24 +0000510 return ClauseDefs.anyCommon(ClauseUses) ? 1 : 0;
Tom Stellard1f520e52016-05-02 17:39:06 +0000511}
512
Tom Stellardcb6ba622016-04-30 00:23:06 +0000513int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
Tom Stellard1f520e52016-05-02 17:39:06 +0000514 int WaitStatesNeeded = 0;
515
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000516 WaitStatesNeeded = checkSoftClauseHazards(SMRD);
Tom Stellardcb6ba622016-04-30 00:23:06 +0000517
518 // This SMRD hazard only affects SI.
Tom Stellard5bfbae52018-07-11 20:59:01 +0000519 if (ST.getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS)
Tom Stellard1f520e52016-05-02 17:39:06 +0000520 return WaitStatesNeeded;
Tom Stellardcb6ba622016-04-30 00:23:06 +0000521
522 // A read of an SGPR by SMRD instruction requires 4 wait states when the
523 // SGPR was written by a VALU instruction.
524 int SmrdSgprWaitStates = 4;
Matt Arsenault59ece952017-03-17 21:36:28 +0000525 auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
Marek Olsak22322432017-10-26 14:43:02 +0000526 auto IsBufferHazardDefFn = [this] (MachineInstr *MI) { return TII.isSALU(*MI); };
527
Matt Arsenault4512d0a2017-11-17 04:18:26 +0000528 bool IsBufferSMRD = TII.isBufferSMRD(*SMRD);
Tom Stellardcb6ba622016-04-30 00:23:06 +0000529
530 for (const MachineOperand &Use : SMRD->uses()) {
531 if (!Use.isReg())
532 continue;
533 int WaitStatesNeededForUse =
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000534 SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
535 SmrdSgprWaitStates);
Tom Stellardcb6ba622016-04-30 00:23:06 +0000536 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
Marek Olsak22322432017-10-26 14:43:02 +0000537
538 // This fixes what appears to be undocumented hardware behavior in SI where
539 // s_mov writing a descriptor and s_buffer_load_dword reading the descriptor
540 // needs some number of nops in between. We don't know how many we need, but
541 // let's use 4. This wasn't discovered before probably because the only
542 // case when this happens is when we expand a 64-bit pointer into a full
543 // descriptor and use s_buffer_load_dword instead of s_load_dword, which was
544 // probably never encountered in the closed-source land.
545 if (IsBufferSMRD) {
546 int WaitStatesNeededForUse =
547 SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000548 IsBufferHazardDefFn,
549 SmrdSgprWaitStates);
Marek Olsak22322432017-10-26 14:43:02 +0000550 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
551 }
Tom Stellardcb6ba622016-04-30 00:23:06 +0000552 }
Marek Olsak22322432017-10-26 14:43:02 +0000553
Tom Stellardcb6ba622016-04-30 00:23:06 +0000554 return WaitStatesNeeded;
555}
556
557int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
Tom Stellard5bfbae52018-07-11 20:59:01 +0000558 if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
Tom Stellardcb6ba622016-04-30 00:23:06 +0000559 return 0;
560
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000561 int WaitStatesNeeded = checkSoftClauseHazards(VMEM);
Tom Stellardcb6ba622016-04-30 00:23:06 +0000562
563 // A read of an SGPR by a VMEM instruction requires 5 wait states when the
564 // SGPR was written by a VALU Instruction.
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000565 const int VmemSgprWaitStates = 5;
566 auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
Tom Stellardcb6ba622016-04-30 00:23:06 +0000567 for (const MachineOperand &Use : VMEM->uses()) {
568 if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
569 continue;
570
571 int WaitStatesNeededForUse =
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000572 VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
573 VmemSgprWaitStates);
Tom Stellardcb6ba622016-04-30 00:23:06 +0000574 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
575 }
576 return WaitStatesNeeded;
577}
Tom Stellarda27007e2016-05-02 16:23:09 +0000578
579int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000580 const SIRegisterInfo *TRI = ST.getRegisterInfo();
Connor Abbott00755362017-08-04 01:09:43 +0000581 const SIInstrInfo *TII = ST.getInstrInfo();
Tom Stellarda27007e2016-05-02 16:23:09 +0000582
Connor Abbott00755362017-08-04 01:09:43 +0000583 // Check for DPP VGPR read after VALU VGPR write and EXEC write.
Tom Stellarda27007e2016-05-02 16:23:09 +0000584 int DppVgprWaitStates = 2;
Connor Abbott00755362017-08-04 01:09:43 +0000585 int DppExecWaitStates = 5;
Tom Stellarda27007e2016-05-02 16:23:09 +0000586 int WaitStatesNeeded = 0;
Connor Abbott00755362017-08-04 01:09:43 +0000587 auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
Tom Stellarda27007e2016-05-02 16:23:09 +0000588
589 for (const MachineOperand &Use : DPP->uses()) {
590 if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
591 continue;
592 int WaitStatesNeededForUse =
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000593 DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
594 [](MachineInstr *) { return true; },
595 DppVgprWaitStates);
Tom Stellarda27007e2016-05-02 16:23:09 +0000596 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
597 }
598
Connor Abbott00755362017-08-04 01:09:43 +0000599 WaitStatesNeeded = std::max(
600 WaitStatesNeeded,
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000601 DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn,
602 DppExecWaitStates));
Connor Abbott00755362017-08-04 01:09:43 +0000603
Tom Stellarda27007e2016-05-02 16:23:09 +0000604 return WaitStatesNeeded;
605}
Tom Stellard5ab61542016-10-07 23:42:48 +0000606
607int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) {
608 const SIInstrInfo *TII = ST.getInstrInfo();
609
610 // v_div_fmas requires 4 wait states after a write to vcc from a VALU
611 // instruction.
612 const int DivFMasWaitStates = 4;
613 auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000614 int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn,
615 DivFMasWaitStates);
Tom Stellard5ab61542016-10-07 23:42:48 +0000616
617 return DivFMasWaitStates - WaitStatesNeeded;
618}
Tom Stellard961811c2016-10-15 00:58:14 +0000619
620int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) {
621 const SIInstrInfo *TII = ST.getInstrInfo();
622 unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr);
623
624 const int GetRegWaitStates = 2;
625 auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) {
626 return GetRegHWReg == getHWReg(TII, *MI);
627 };
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000628 int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, GetRegWaitStates);
Tom Stellard961811c2016-10-15 00:58:14 +0000629
630 return GetRegWaitStates - WaitStatesNeeded;
631}
Tom Stellard30d30822016-10-27 20:39:09 +0000632
633int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) {
634 const SIInstrInfo *TII = ST.getInstrInfo();
635 unsigned HWReg = getHWReg(TII, *SetRegInstr);
636
637 const int SetRegWaitStates =
638 ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ? 1 : 2;
639 auto IsHazardFn = [TII, HWReg] (MachineInstr *MI) {
640 return HWReg == getHWReg(TII, *MI);
641 };
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000642 int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, SetRegWaitStates);
Tom Stellard30d30822016-10-27 20:39:09 +0000643 return SetRegWaitStates - WaitStatesNeeded;
644}
Tom Stellardb133fbb2016-10-27 23:05:31 +0000645
646int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) {
647 if (!MI.mayStore())
648 return -1;
649
650 const SIInstrInfo *TII = ST.getInstrInfo();
651 unsigned Opcode = MI.getOpcode();
652 const MCInstrDesc &Desc = MI.getDesc();
653
654 int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
655 int VDataRCID = -1;
656 if (VDataIdx != -1)
657 VDataRCID = Desc.OpInfo[VDataIdx].RegClass;
658
659 if (TII->isMUBUF(MI) || TII->isMTBUF(MI)) {
Jan Veselye8cc3952016-11-15 23:55:15 +0000660 // There is no hazard if the instruction does not use vector regs
661 // (like wbinvl1)
662 if (VDataIdx == -1)
663 return -1;
Tom Stellardb133fbb2016-10-27 23:05:31 +0000664 // For MUBUF/MTBUF instructions this hazard only exists if the
665 // instruction is not using a register in the soffset field.
666 const MachineOperand *SOffset =
667 TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
668 // If we have no soffset operand, then assume this field has been
669 // hardcoded to zero.
670 if (AMDGPU::getRegBitWidth(VDataRCID) > 64 &&
671 (!SOffset || !SOffset->isReg()))
672 return VDataIdx;
673 }
674
675 // MIMG instructions create a hazard if they don't use a 256-bit T# and
676 // the store size is greater than 8 bytes and they have more than two bits
677 // of their dmask set.
678 // All our MIMG definitions use a 256-bit T#, so we can skip checking for them.
679 if (TII->isMIMG(MI)) {
680 int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc);
681 assert(SRsrcIdx != -1 &&
682 AMDGPU::getRegBitWidth(Desc.OpInfo[SRsrcIdx].RegClass) == 256);
Tom Stellard6b9c1be2016-10-27 23:28:03 +0000683 (void)SRsrcIdx;
Tom Stellardb133fbb2016-10-27 23:05:31 +0000684 }
685
686 if (TII->isFLAT(MI)) {
Matt Arsenault97279a82016-11-29 19:30:44 +0000687 int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
Tom Stellardb133fbb2016-10-27 23:05:31 +0000688 if (AMDGPU::getRegBitWidth(Desc.OpInfo[DataIdx].RegClass) > 64)
689 return DataIdx;
690 }
691
692 return -1;
693}
694
Mark Searlesd29f24a2017-12-07 20:34:25 +0000695int GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
696 const MachineRegisterInfo &MRI) {
697 // Helper to check for the hazard where VMEM instructions that store more than
698 // 8 bytes can have there store data over written by the next instruction.
699 const SIRegisterInfo *TRI = ST.getRegisterInfo();
700
701 const int VALUWaitStates = 1;
702 int WaitStatesNeeded = 0;
703
704 if (!TRI->isVGPR(MRI, Def.getReg()))
705 return WaitStatesNeeded;
706 unsigned Reg = Def.getReg();
707 auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) {
708 int DataIdx = createsVALUHazard(*MI);
709 return DataIdx >= 0 &&
710 TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg);
711 };
712 int WaitStatesNeededForDef =
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000713 VALUWaitStates - getWaitStatesSince(IsHazardFn, VALUWaitStates);
Mark Searlesd29f24a2017-12-07 20:34:25 +0000714 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
715
716 return WaitStatesNeeded;
717}
718
Tom Stellardb133fbb2016-10-27 23:05:31 +0000719int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
720 // This checks for the hazard where VMEM instructions that store more than
721 // 8 bytes can have there store data over written by the next instruction.
722 if (!ST.has12DWordStoreHazard())
723 return 0;
724
Mark Searlesd29f24a2017-12-07 20:34:25 +0000725 const MachineRegisterInfo &MRI = MF.getRegInfo();
Tom Stellardb133fbb2016-10-27 23:05:31 +0000726 int WaitStatesNeeded = 0;
727
728 for (const MachineOperand &Def : VALU->defs()) {
Mark Searlesd29f24a2017-12-07 20:34:25 +0000729 WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI));
Tom Stellardb133fbb2016-10-27 23:05:31 +0000730 }
Mark Searlesd29f24a2017-12-07 20:34:25 +0000731
732 return WaitStatesNeeded;
733}
734
735int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) {
736 // This checks for hazards associated with inline asm statements.
737 // Since inline asms can contain just about anything, we use this
738 // to call/leverage other check*Hazard routines. Note that
739 // this function doesn't attempt to address all possible inline asm
740 // hazards (good luck), but is a collection of what has been
741 // problematic thus far.
742
743 // see checkVALUHazards()
744 if (!ST.has12DWordStoreHazard())
745 return 0;
746
747 const MachineRegisterInfo &MRI = MF.getRegInfo();
748 int WaitStatesNeeded = 0;
749
750 for (unsigned I = InlineAsm::MIOp_FirstOperand, E = IA->getNumOperands();
751 I != E; ++I) {
752 const MachineOperand &Op = IA->getOperand(I);
753 if (Op.isReg() && Op.isDef()) {
754 WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI));
755 }
756 }
757
Tom Stellardb133fbb2016-10-27 23:05:31 +0000758 return WaitStatesNeeded;
759}
Tom Stellard04051b52016-10-27 23:42:29 +0000760
761int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) {
762 const SIInstrInfo *TII = ST.getInstrInfo();
763 const SIRegisterInfo *TRI = ST.getRegisterInfo();
Mark Searlesd29f24a2017-12-07 20:34:25 +0000764 const MachineRegisterInfo &MRI = MF.getRegInfo();
Tom Stellard04051b52016-10-27 23:42:29 +0000765
766 const MachineOperand *LaneSelectOp =
767 TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1);
768
769 if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg()))
770 return 0;
771
772 unsigned LaneSelectReg = LaneSelectOp->getReg();
773 auto IsHazardFn = [TII] (MachineInstr *MI) {
774 return TII->isVALU(*MI);
775 };
776
777 const int RWLaneWaitStates = 4;
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000778 int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn,
779 RWLaneWaitStates);
Tom Stellard04051b52016-10-27 23:42:29 +0000780 return RWLaneWaitStates - WaitStatesSince;
781}
Tom Stellardaea899e2016-10-27 23:50:21 +0000782
783int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
Tom Stellardaea899e2016-10-27 23:50:21 +0000784 if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
785 return 0;
786
787 const SIInstrInfo *TII = ST.getInstrInfo();
788
789 const int RFEWaitStates = 1;
790
791 auto IsHazardFn = [TII] (MachineInstr *MI) {
792 return getHWReg(TII, *MI) == AMDGPU::Hwreg::ID_TRAPSTS;
793 };
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000794 int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, RFEWaitStates);
Tom Stellardaea899e2016-10-27 23:50:21 +0000795 return RFEWaitStates - WaitStatesNeeded;
796}
Matt Arsenaulte823d922017-02-18 18:29:53 +0000797
798int GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) {
Shiva Chen801bf7e2018-05-09 02:42:00 +0000799 if (MI->isDebugInstr())
Matt Arsenaulte823d922017-02-18 18:29:53 +0000800 return 0;
801
802 const SIRegisterInfo *TRI = ST.getRegisterInfo();
803 if (!ST.hasSMovFedHazard())
804 return 0;
805
806 // Check for any instruction reading an SGPR after a write from
807 // s_mov_fed_b32.
808 int MovFedWaitStates = 1;
809 int WaitStatesNeeded = 0;
810
811 for (const MachineOperand &Use : MI->uses()) {
812 if (!Use.isReg() || TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
813 continue;
814 auto IsHazardFn = [] (MachineInstr *MI) {
815 return MI->getOpcode() == AMDGPU::S_MOV_FED_B32;
816 };
817 int WaitStatesNeededForUse =
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000818 MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn,
819 MovFedWaitStates);
Matt Arsenaulte823d922017-02-18 18:29:53 +0000820 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
821 }
822
823 return WaitStatesNeeded;
824}
825
826int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
Matt Arsenaulte823d922017-02-18 18:29:53 +0000827 const SIInstrInfo *TII = ST.getInstrInfo();
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000828 const int SMovRelWaitStates = 1;
Matt Arsenaulte823d922017-02-18 18:29:53 +0000829 auto IsHazardFn = [TII] (MachineInstr *MI) {
830 return TII->isSALU(*MI);
831 };
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000832 return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn,
833 SMovRelWaitStates);
Matt Arsenaulte823d922017-02-18 18:29:53 +0000834}
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000835
Austin Kerbow8a3d3a92019-05-07 22:12:15 +0000836void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
837 fixVMEMtoScalarWriteHazards(MI);
838 fixSMEMtoVectorWriteHazards(MI);
839 fixVcmpxExecWARHazard(MI);
840 fixLdsBranchVmemWARHazard(MI);
841}
842
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000843bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
844 if (!ST.hasVMEMtoScalarWriteHazard())
845 return false;
846
847 if (!SIInstrInfo::isSALU(*MI) && !SIInstrInfo::isSMRD(*MI))
848 return false;
849
850 if (MI->getNumDefs() == 0)
851 return false;
852
853 const SIRegisterInfo *TRI = ST.getRegisterInfo();
854
855 auto IsHazardFn = [TRI, MI] (MachineInstr *I) {
856 if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isDS(*I) &&
857 !SIInstrInfo::isFLAT(*I))
858 return false;
859
860 for (const MachineOperand &Def : MI->defs()) {
861 MachineOperand *Op = I->findRegisterUseOperand(Def.getReg(), false, TRI);
862 if (!Op || (Op->isImplicit() && Op->getReg() == AMDGPU::EXEC))
863 continue;
864 return true;
865 }
866 return false;
867 };
868
869 auto IsExpiredFn = [] (MachineInstr *MI, int) {
870 return MI && (SIInstrInfo::isVALU(*MI) ||
871 (MI->getOpcode() == AMDGPU::S_WAITCNT &&
872 !MI->getOperand(0).getImm()));
873 };
874
875 if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
876 std::numeric_limits<int>::max())
877 return false;
878
879 const SIInstrInfo *TII = ST.getInstrInfo();
880 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_NOP_e32));
881 return true;
882}
883
884bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
885 if (!ST.hasSMEMtoVectorWriteHazard())
886 return false;
887
888 if (!SIInstrInfo::isVALU(*MI))
889 return false;
890
891 unsigned SDSTName;
892 switch (MI->getOpcode()) {
893 case AMDGPU::V_READLANE_B32:
894 case AMDGPU::V_READFIRSTLANE_B32:
895 SDSTName = AMDGPU::OpName::vdst;
896 break;
897 default:
898 SDSTName = AMDGPU::OpName::sdst;
899 break;
900 }
901
902 const SIInstrInfo *TII = ST.getInstrInfo();
903 const SIRegisterInfo *TRI = ST.getRegisterInfo();
Carl Ritson34e95ce2019-05-20 07:20:12 +0000904 const AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(ST.getCPU());
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000905 const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName);
906 if (!SDST) {
Stanislav Mekhanoshin5ddd5642019-05-04 06:40:20 +0000907 for (const auto &MO : MI->implicit_operands()) {
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000908 if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) {
909 SDST = &MO;
910 break;
911 }
912 }
913 }
914
915 if (!SDST)
916 return false;
917
918 const unsigned SDSTReg = SDST->getReg();
919 auto IsHazardFn = [SDSTReg, TRI] (MachineInstr *I) {
920 return SIInstrInfo::isSMRD(*I) && I->readsRegister(SDSTReg, TRI);
921 };
922
Carl Ritson34e95ce2019-05-20 07:20:12 +0000923 auto IsExpiredFn = [TII, IV] (MachineInstr *MI, int) {
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000924 if (MI) {
925 if (TII->isSALU(*MI)) {
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000926 switch (MI->getOpcode()) {
927 case AMDGPU::S_SETVSKIP:
928 case AMDGPU::S_VERSION:
929 case AMDGPU::S_WAITCNT_VSCNT:
930 case AMDGPU::S_WAITCNT_VMCNT:
931 case AMDGPU::S_WAITCNT_EXPCNT:
Carl Ritson34e95ce2019-05-20 07:20:12 +0000932 // These instructions cannot not mitigate the hazard.
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000933 return false;
Carl Ritson34e95ce2019-05-20 07:20:12 +0000934 case AMDGPU::S_WAITCNT_LGKMCNT:
935 // Reducing lgkmcnt count to 0 always mitigates the hazard.
936 return (MI->getOperand(1).getImm() == 0) &&
937 (MI->getOperand(0).getReg() == AMDGPU::SGPR_NULL);
938 case AMDGPU::S_WAITCNT: {
939 const int64_t Imm = MI->getOperand(0).getImm();
940 AMDGPU::Waitcnt Decoded = AMDGPU::decodeWaitcnt(IV, Imm);
941 return (Decoded.LgkmCnt == 0);
942 }
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000943 default:
Carl Ritson34e95ce2019-05-20 07:20:12 +0000944 // SOPP instructions cannot mitigate the hazard.
945 if (TII->isSOPP(*MI))
946 return false;
947 // At this point the SALU can be assumed to mitigate the hazard
948 // because either:
949 // (a) it is independent of the at risk SMEM (breaking chain),
950 // or
951 // (b) it is dependent on the SMEM, in which case an appropriate
952 // s_waitcnt lgkmcnt _must_ exist between it and the at risk
953 // SMEM instruction.
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000954 return true;
955 }
956 }
957 }
958 return false;
959 };
960
961 if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
962 std::numeric_limits<int>::max())
963 return false;
964
965 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
966 TII->get(AMDGPU::S_MOV_B32), AMDGPU::SGPR_NULL)
967 .addImm(0);
968 return true;
969}
970
971bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) {
972 if (!ST.hasVcmpxExecWARHazard() || !SIInstrInfo::isVALU(*MI))
973 return false;
974
975 const SIRegisterInfo *TRI = ST.getRegisterInfo();
976 if (!MI->modifiesRegister(AMDGPU::EXEC, TRI))
977 return false;
978
979 auto IsHazardFn = [TRI] (MachineInstr *I) {
980 if (SIInstrInfo::isVALU(*I))
981 return false;
982 return I->readsRegister(AMDGPU::EXEC, TRI);
983 };
984
985 const SIInstrInfo *TII = ST.getInstrInfo();
986 auto IsExpiredFn = [TII, TRI] (MachineInstr *MI, int) {
987 if (!MI)
988 return false;
989 if (SIInstrInfo::isVALU(*MI)) {
990 if (TII->getNamedOperand(*MI, AMDGPU::OpName::sdst))
991 return true;
992 for (auto MO : MI->implicit_operands())
993 if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg())))
994 return true;
995 }
996 if (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
997 (MI->getOperand(0).getImm() & 0xfffe) == 0xfffe)
998 return true;
999 return false;
1000 };
1001
1002 if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1003 std::numeric_limits<int>::max())
1004 return false;
1005
1006 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1007 TII->get(AMDGPU::S_WAITCNT_DEPCTR))
1008 .addImm(0xfffe);
1009 return true;
1010}
1011
1012bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
1013 if (!ST.hasLdsBranchVmemWARHazard())
1014 return false;
1015
1016 auto IsHazardInst = [] (const MachineInstr *MI) {
1017 if (SIInstrInfo::isDS(*MI))
1018 return 1;
1019 if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isSegmentSpecificFLAT(*MI))
1020 return 2;
1021 return 0;
1022 };
1023
1024 auto InstType = IsHazardInst(MI);
1025 if (!InstType)
1026 return false;
1027
1028 auto IsExpiredFn = [&IsHazardInst] (MachineInstr *I, int) {
1029 return I && (IsHazardInst(I) ||
1030 (I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
1031 I->getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
1032 !I->getOperand(1).getImm()));
1033 };
1034
1035 auto IsHazardFn = [InstType, &IsHazardInst] (MachineInstr *I) {
1036 if (!I->isBranch())
1037 return false;
1038
1039 auto IsHazardFn = [InstType, IsHazardInst] (MachineInstr *I) {
1040 auto InstType2 = IsHazardInst(I);
1041 return InstType2 && InstType != InstType2;
1042 };
1043
1044 auto IsExpiredFn = [InstType, &IsHazardInst] (MachineInstr *I, int) {
1045 if (!I)
1046 return false;
1047
1048 auto InstType2 = IsHazardInst(I);
1049 if (InstType == InstType2)
1050 return true;
1051
1052 return I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
1053 I->getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
1054 !I->getOperand(1).getImm();
1055 };
1056
1057 return ::getWaitStatesSince(IsHazardFn, I, IsExpiredFn) !=
1058 std::numeric_limits<int>::max();
1059 };
1060
1061 if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1062 std::numeric_limits<int>::max())
1063 return false;
1064
1065 const SIInstrInfo *TII = ST.getInstrInfo();
1066 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1067 TII->get(AMDGPU::S_WAITCNT_VSCNT))
1068 .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
1069 .addImm(0);
1070
1071 return true;
1072}
1073
1074int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) {
1075 int NSAtoVMEMWaitStates = 1;
1076
1077 if (!ST.hasNSAtoVMEMBug())
1078 return 0;
1079
1080 if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isMTBUF(*MI))
1081 return 0;
1082
1083 const SIInstrInfo *TII = ST.getInstrInfo();
1084 const auto *Offset = TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
1085 if (!Offset || (Offset->getImm() & 6) == 0)
1086 return 0;
1087
1088 auto IsHazardFn = [TII] (MachineInstr *I) {
1089 if (!SIInstrInfo::isMIMG(*I))
1090 return false;
1091 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(I->getOpcode());
1092 return Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA &&
1093 TII->getInstSizeInBytes(*I) >= 16;
1094 };
1095
1096 return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1);
1097}