blob: 16436be984f3b3fae991885a686180dc642ae47d [file] [log] [blame]
Tom Stellardcb6ba622016-04-30 00:23:06 +00001//===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Tom Stellardcb6ba622016-04-30 00:23:06 +00006//
7//===----------------------------------------------------------------------===//
8//
9// This file implements hazard recognizers for scheduling on GCN processors.
10//
11//===----------------------------------------------------------------------===//
12
Eugene Zelenko734bb7b2017-01-20 17:52:16 +000013#include "GCNHazardRecognizer.h"
Chandler Carruth6bda14b2017-06-06 11:49:48 +000014#include "AMDGPUSubtarget.h"
Eugene Zelenko734bb7b2017-01-20 17:52:16 +000015#include "SIDefines.h"
Tom Stellardcb6ba622016-04-30 00:23:06 +000016#include "SIInstrInfo.h"
Eugene Zelenko734bb7b2017-01-20 17:52:16 +000017#include "SIRegisterInfo.h"
Tom Stellard44b30b42018-05-22 02:03:23 +000018#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Eugene Zelenko734bb7b2017-01-20 17:52:16 +000019#include "Utils/AMDGPUBaseInfo.h"
20#include "llvm/ADT/iterator_range.h"
21#include "llvm/CodeGen/MachineFunction.h"
22#include "llvm/CodeGen/MachineInstr.h"
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +000023#include "llvm/CodeGen/MachineInstrBuilder.h"
Eugene Zelenko734bb7b2017-01-20 17:52:16 +000024#include "llvm/CodeGen/MachineOperand.h"
Tom Stellardcb6ba622016-04-30 00:23:06 +000025#include "llvm/CodeGen/ScheduleDAG.h"
Eugene Zelenko734bb7b2017-01-20 17:52:16 +000026#include "llvm/MC/MCInstrDesc.h"
27#include "llvm/Support/ErrorHandling.h"
28#include <algorithm>
29#include <cassert>
30#include <limits>
31#include <set>
32#include <vector>
Tom Stellardcb6ba622016-04-30 00:23:06 +000033
34using namespace llvm;
35
36//===----------------------------------------------------------------------===//
37// Hazard Recoginizer Implementation
38//===----------------------------------------------------------------------===//
39
40GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +000041 IsHazardRecognizerMode(false),
Tom Stellardcb6ba622016-04-30 00:23:06 +000042 CurrCycleInstr(nullptr),
Matt Arsenault43e92fe2016-06-24 06:30:11 +000043 MF(MF),
Tom Stellard5bfbae52018-07-11 20:59:01 +000044 ST(MF.getSubtarget<GCNSubtarget>()),
Matt Arsenault03c67d12017-11-17 04:18:24 +000045 TII(*ST.getInstrInfo()),
46 TRI(TII.getRegisterInfo()),
47 ClauseUses(TRI.getNumRegUnits()),
48 ClauseDefs(TRI.getNumRegUnits()) {
Tom Stellardcb6ba622016-04-30 00:23:06 +000049 MaxLookAhead = 5;
50}
51
52void GCNHazardRecognizer::EmitInstruction(SUnit *SU) {
53 EmitInstruction(SU->getInstr());
54}
55
56void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) {
57 CurrCycleInstr = MI;
58}
59
Tom Stellard5ab61542016-10-07 23:42:48 +000060static bool isDivFMas(unsigned Opcode) {
61 return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64;
62}
63
Tom Stellard961811c2016-10-15 00:58:14 +000064static bool isSGetReg(unsigned Opcode) {
65 return Opcode == AMDGPU::S_GETREG_B32;
66}
67
68static bool isSSetReg(unsigned Opcode) {
69 return Opcode == AMDGPU::S_SETREG_B32 || Opcode == AMDGPU::S_SETREG_IMM32_B32;
70}
71
Tom Stellard04051b52016-10-27 23:42:29 +000072static bool isRWLane(unsigned Opcode) {
73 return Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32;
74}
75
Tom Stellardaea899e2016-10-27 23:50:21 +000076static bool isRFE(unsigned Opcode) {
77 return Opcode == AMDGPU::S_RFE_B64;
78}
79
Matt Arsenaulte823d922017-02-18 18:29:53 +000080static bool isSMovRel(unsigned Opcode) {
Matt Arsenault59ece952017-03-17 21:36:28 +000081 switch (Opcode) {
82 case AMDGPU::S_MOVRELS_B32:
83 case AMDGPU::S_MOVRELS_B64:
84 case AMDGPU::S_MOVRELD_B32:
85 case AMDGPU::S_MOVRELD_B64:
86 return true;
87 default:
88 return false;
89 }
Matt Arsenaulte823d922017-02-18 18:29:53 +000090}
91
Marek Olsakc5cec5e2019-01-16 15:43:53 +000092static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII,
93 const MachineInstr &MI) {
94 if (TII.isAlwaysGDS(MI.getOpcode()))
95 return true;
96
Matt Arsenaulta41351e2017-11-17 21:35:32 +000097 switch (MI.getOpcode()) {
98 case AMDGPU::S_SENDMSG:
99 case AMDGPU::S_SENDMSGHALT:
100 case AMDGPU::S_TTRACEDATA:
101 return true;
Marek Olsakc5cec5e2019-01-16 15:43:53 +0000102 // These DS opcodes don't support GDS.
103 case AMDGPU::DS_NOP:
104 case AMDGPU::DS_PERMUTE_B32:
105 case AMDGPU::DS_BPERMUTE_B32:
106 return false;
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000107 default:
Marek Olsakc5cec5e2019-01-16 15:43:53 +0000108 if (TII.isDS(MI.getOpcode())) {
109 int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
110 AMDGPU::OpName::gds);
111 if (MI.getOperand(GDS).getImm())
112 return true;
113 }
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000114 return false;
115 }
116}
117
Stanislav Mekhanoshin5f581c92019-06-12 17:52:51 +0000118static bool isPermlane(const MachineInstr &MI) {
119 unsigned Opcode = MI.getOpcode();
120 return Opcode == AMDGPU::V_PERMLANE16_B32 ||
121 Opcode == AMDGPU::V_PERMLANEX16_B32;
122}
123
Tom Stellardaea899e2016-10-27 23:50:21 +0000124static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
Tom Stellard961811c2016-10-15 00:58:14 +0000125 const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
126 AMDGPU::OpName::simm16);
127 return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_;
128}
129
Tom Stellardcb6ba622016-04-30 00:23:06 +0000130ScheduleHazardRecognizer::HazardType
131GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
Tom Stellardcb6ba622016-04-30 00:23:06 +0000132 MachineInstr *MI = SU->getInstr();
Austin Kerbow8a3d3a92019-05-07 22:12:15 +0000133 if (MI->isBundle())
134 return NoHazard;
Tom Stellardcb6ba622016-04-30 00:23:06 +0000135
Aaron Ballman5c190d02016-05-02 14:48:03 +0000136 if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
Tom Stellardcb6ba622016-04-30 00:23:06 +0000137 return NoopHazard;
138
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000139 // FIXME: Should flat be considered vmem?
140 if ((SIInstrInfo::isVMEM(*MI) ||
141 SIInstrInfo::isFLAT(*MI))
142 && checkVMEMHazards(MI) > 0)
Tom Stellardcb6ba622016-04-30 00:23:06 +0000143 return NoopHazard;
144
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000145 if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0)
146 return NoopHazard;
147
Stanislav Mekhanoshinbdf7f812019-06-21 16:30:14 +0000148 if (checkFPAtomicToDenormModeHazard(MI) > 0)
149 return NoopHazard;
150
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000151 if (ST.hasNoDataDepHazard())
152 return NoHazard;
153
Tom Stellardb133fbb2016-10-27 23:05:31 +0000154 if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
155 return NoopHazard;
156
Tom Stellarda27007e2016-05-02 16:23:09 +0000157 if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0)
158 return NoopHazard;
159
Tom Stellard5ab61542016-10-07 23:42:48 +0000160 if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0)
161 return NoopHazard;
162
Tom Stellard04051b52016-10-27 23:42:29 +0000163 if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0)
164 return NoopHazard;
165
Tom Stellard961811c2016-10-15 00:58:14 +0000166 if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0)
167 return NoopHazard;
168
Tom Stellard30d30822016-10-27 20:39:09 +0000169 if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0)
170 return NoopHazard;
171
Tom Stellardaea899e2016-10-27 23:50:21 +0000172 if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
173 return NoopHazard;
174
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000175 if (ST.hasReadM0MovRelInterpHazard() &&
176 (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
177 checkReadM0Hazards(MI) > 0)
178 return NoopHazard;
179
Marek Olsakc5cec5e2019-01-16 15:43:53 +0000180 if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) &&
Matt Arsenaulte823d922017-02-18 18:29:53 +0000181 checkReadM0Hazards(MI) > 0)
182 return NoopHazard;
183
Mark Searlesd29f24a2017-12-07 20:34:25 +0000184 if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
185 return NoopHazard;
186
Matt Arsenaulte823d922017-02-18 18:29:53 +0000187 if (checkAnyInstHazards(MI) > 0)
188 return NoopHazard;
189
Tom Stellardcb6ba622016-04-30 00:23:06 +0000190 return NoHazard;
191}
192
Austin Kerbow8a3d3a92019-05-07 22:12:15 +0000193static void insertNoopInBundle(MachineInstr *MI, const SIInstrInfo &TII) {
194 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
195 .addImm(0);
196}
197
198void GCNHazardRecognizer::processBundle() {
199 MachineBasicBlock::instr_iterator MI = std::next(CurrCycleInstr->getIterator());
200 MachineBasicBlock::instr_iterator E = CurrCycleInstr->getParent()->instr_end();
201 // Check bundled MachineInstr's for hazards.
202 for (; MI != E && MI->isInsideBundle(); ++MI) {
203 CurrCycleInstr = &*MI;
204 unsigned WaitStates = PreEmitNoopsCommon(CurrCycleInstr);
205
206 if (IsHazardRecognizerMode)
207 fixHazards(CurrCycleInstr);
208
209 for (unsigned i = 0; i < WaitStates; ++i)
210 insertNoopInBundle(CurrCycleInstr, TII);
211
212 // It’s unnecessary to track more than MaxLookAhead instructions. Since we
213 // include the bundled MI directly after, only add a maximum of
214 // (MaxLookAhead - 1) noops to EmittedInstrs.
215 for (unsigned i = 0, e = std::min(WaitStates, MaxLookAhead - 1); i < e; ++i)
216 EmittedInstrs.push_front(nullptr);
217
218 EmittedInstrs.push_front(CurrCycleInstr);
219 EmittedInstrs.resize(MaxLookAhead);
220 }
221 CurrCycleInstr = nullptr;
222}
223
Tom Stellardcb6ba622016-04-30 00:23:06 +0000224unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) {
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000225 IsHazardRecognizerMode = false;
226 return PreEmitNoopsCommon(SU->getInstr());
Tom Stellardcb6ba622016-04-30 00:23:06 +0000227}
228
229unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000230 IsHazardRecognizerMode = true;
231 CurrCycleInstr = MI;
232 unsigned W = PreEmitNoopsCommon(MI);
Austin Kerbow8a3d3a92019-05-07 22:12:15 +0000233 fixHazards(MI);
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000234 CurrCycleInstr = nullptr;
235 return W;
236}
237
238unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
Austin Kerbow8a3d3a92019-05-07 22:12:15 +0000239 if (MI->isBundle())
240 return 0;
241
Matt Arsenaulte823d922017-02-18 18:29:53 +0000242 int WaitStates = std::max(0, checkAnyInstHazards(MI));
243
Aaron Ballman5c190d02016-05-02 14:48:03 +0000244 if (SIInstrInfo::isSMRD(*MI))
Matt Arsenaulte823d922017-02-18 18:29:53 +0000245 return std::max(WaitStates, checkSMRDHazards(MI));
Tom Stellardcb6ba622016-04-30 00:23:06 +0000246
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000247 if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
248 WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
Tom Stellarda27007e2016-05-02 16:23:09 +0000249
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000250 if (ST.hasNSAtoVMEMBug())
251 WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI));
252
Stanislav Mekhanoshinbdf7f812019-06-21 16:30:14 +0000253 WaitStates = std::max(WaitStates, checkFPAtomicToDenormModeHazard(MI));
254
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000255 if (ST.hasNoDataDepHazard())
256 return WaitStates;
257
258 if (SIInstrInfo::isVALU(*MI))
259 WaitStates = std::max(WaitStates, checkVALUHazards(MI));
260
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000261 if (SIInstrInfo::isDPP(*MI))
262 WaitStates = std::max(WaitStates, checkDPPHazards(MI));
Tom Stellardb133fbb2016-10-27 23:05:31 +0000263
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000264 if (isDivFMas(MI->getOpcode()))
265 WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
Tom Stellardb133fbb2016-10-27 23:05:31 +0000266
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000267 if (isRWLane(MI->getOpcode()))
268 WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
Tom Stellard5ab61542016-10-07 23:42:48 +0000269
Mark Searlesd29f24a2017-12-07 20:34:25 +0000270 if (MI->isInlineAsm())
271 return std::max(WaitStates, checkInlineAsmHazards(MI));
272
Tom Stellard961811c2016-10-15 00:58:14 +0000273 if (isSGetReg(MI->getOpcode()))
Matt Arsenaulte823d922017-02-18 18:29:53 +0000274 return std::max(WaitStates, checkGetRegHazards(MI));
Tom Stellard961811c2016-10-15 00:58:14 +0000275
Tom Stellard30d30822016-10-27 20:39:09 +0000276 if (isSSetReg(MI->getOpcode()))
Matt Arsenaulte823d922017-02-18 18:29:53 +0000277 return std::max(WaitStates, checkSetRegHazards(MI));
Tom Stellard30d30822016-10-27 20:39:09 +0000278
Tom Stellardaea899e2016-10-27 23:50:21 +0000279 if (isRFE(MI->getOpcode()))
Matt Arsenaulte823d922017-02-18 18:29:53 +0000280 return std::max(WaitStates, checkRFEHazards(MI));
Tom Stellardaea899e2016-10-27 23:50:21 +0000281
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000282 if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) ||
283 isSMovRel(MI->getOpcode())))
284 return std::max(WaitStates, checkReadM0Hazards(MI));
285
Marek Olsakc5cec5e2019-01-16 15:43:53 +0000286 if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI))
Matt Arsenaulte823d922017-02-18 18:29:53 +0000287 return std::max(WaitStates, checkReadM0Hazards(MI));
288
289 return WaitStates;
Tom Stellardcb6ba622016-04-30 00:23:06 +0000290}
291
292void GCNHazardRecognizer::EmitNoop() {
293 EmittedInstrs.push_front(nullptr);
294}
295
296void GCNHazardRecognizer::AdvanceCycle() {
Tom Stellardcb6ba622016-04-30 00:23:06 +0000297 // When the scheduler detects a stall, it will call AdvanceCycle() without
298 // emitting any instructions.
299 if (!CurrCycleInstr)
300 return;
301
Carl Ritsonf898edd2018-09-10 10:14:48 +0000302 // Do not track non-instructions which do not affect the wait states.
303 // If included, these instructions can lead to buffer overflow such that
304 // detectable hazards are missed.
David Stuttard81eec582019-03-05 10:25:16 +0000305 if (CurrCycleInstr->isImplicitDef() || CurrCycleInstr->isDebugInstr() ||
306 CurrCycleInstr->isKill())
Carl Ritsonf898edd2018-09-10 10:14:48 +0000307 return;
308
Austin Kerbow8a3d3a92019-05-07 22:12:15 +0000309 if (CurrCycleInstr->isBundle()) {
310 processBundle();
311 return;
312 }
313
Matt Arsenault59ece952017-03-17 21:36:28 +0000314 unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
Tom Stellardcb6ba622016-04-30 00:23:06 +0000315
316 // Keep track of emitted instructions
317 EmittedInstrs.push_front(CurrCycleInstr);
318
319 // Add a nullptr for each additional wait state after the first. Make sure
320 // not to add more than getMaxLookAhead() items to the list, since we
321 // truncate the list to that size right after this loop.
322 for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead());
323 i < e; ++i) {
324 EmittedInstrs.push_front(nullptr);
325 }
326
327 // getMaxLookahead() is the largest number of wait states we will ever need
328 // to insert, so there is no point in keeping track of more than that many
329 // wait states.
330 EmittedInstrs.resize(getMaxLookAhead());
331
332 CurrCycleInstr = nullptr;
333}
334
335void GCNHazardRecognizer::RecedeCycle() {
336 llvm_unreachable("hazard recognizer does not support bottom-up scheduling.");
337}
338
339//===----------------------------------------------------------------------===//
340// Helper Functions
341//===----------------------------------------------------------------------===//
342
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000343typedef function_ref<bool(MachineInstr *, int WaitStates)> IsExpiredFn;
344
345// Returns a minimum wait states since \p I walking all predecessors.
346// Only scans until \p IsExpired does not return true.
347// Can only be run in a hazard recognizer mode.
348static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
349 MachineBasicBlock *MBB,
350 MachineBasicBlock::reverse_instr_iterator I,
351 int WaitStates,
352 IsExpiredFn IsExpired,
353 DenseSet<const MachineBasicBlock *> &Visited) {
Austin Kerbow8a3d3a92019-05-07 22:12:15 +0000354 for (auto E = MBB->instr_rend(); I != E; ++I) {
355 // Don't add WaitStates for parent BUNDLE instructions.
356 if (I->isBundle())
357 continue;
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000358
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000359 if (IsHazard(&*I))
360 return WaitStates;
361
362 if (I->isInlineAsm() || I->isImplicitDef() || I->isDebugInstr())
363 continue;
364
365 WaitStates += SIInstrInfo::getNumWaitStates(*I);
366
367 if (IsExpired(&*I, WaitStates))
368 return std::numeric_limits<int>::max();
369 }
370
371 int MinWaitStates = WaitStates;
372 bool Found = false;
373 for (MachineBasicBlock *Pred : MBB->predecessors()) {
374 if (!Visited.insert(Pred).second)
375 continue;
376
377 int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(),
378 WaitStates, IsExpired, Visited);
379
380 if (W == std::numeric_limits<int>::max())
381 continue;
382
383 MinWaitStates = Found ? std::min(MinWaitStates, W) : W;
384 if (IsExpired(nullptr, MinWaitStates))
385 return MinWaitStates;
386
387 Found = true;
388 }
389
390 if (Found)
391 return MinWaitStates;
392
393 return std::numeric_limits<int>::max();
394}
395
396static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
397 MachineInstr *MI,
398 IsExpiredFn IsExpired) {
399 DenseSet<const MachineBasicBlock *> Visited;
400 return getWaitStatesSince(IsHazard, MI->getParent(),
401 std::next(MI->getReverseIterator()),
402 0, IsExpired, Visited);
403}
404
405int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) {
406 if (IsHazardRecognizerMode) {
407 auto IsExpiredFn = [Limit] (MachineInstr *, int WaitStates) {
408 return WaitStates >= Limit;
409 };
410 return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn);
411 }
412
Nicolai Haehnle75c98c32017-09-01 16:56:32 +0000413 int WaitStates = 0;
Tom Stellard961811c2016-10-15 00:58:14 +0000414 for (MachineInstr *MI : EmittedInstrs) {
Nicolai Haehnle75c98c32017-09-01 16:56:32 +0000415 if (MI) {
416 if (IsHazard(MI))
417 return WaitStates;
418
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000419 if (MI->isInlineAsm())
Nicolai Haehnle75c98c32017-09-01 16:56:32 +0000420 continue;
421 }
Tom Stellard961811c2016-10-15 00:58:14 +0000422 ++WaitStates;
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000423
424 if (WaitStates >= Limit)
425 break;
Tom Stellard961811c2016-10-15 00:58:14 +0000426 }
427 return std::numeric_limits<int>::max();
428}
429
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000430int GCNHazardRecognizer::getWaitStatesSinceDef(unsigned Reg,
431 IsHazardFn IsHazardDef,
432 int Limit) {
Tom Stellardb133fbb2016-10-27 23:05:31 +0000433 const SIRegisterInfo *TRI = ST.getRegisterInfo();
434
435 auto IsHazardFn = [IsHazardDef, TRI, Reg] (MachineInstr *MI) {
436 return IsHazardDef(MI) && MI->modifiesRegister(Reg, TRI);
437 };
438
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000439 return getWaitStatesSince(IsHazardFn, Limit);
Tom Stellardb133fbb2016-10-27 23:05:31 +0000440}
441
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000442int GCNHazardRecognizer::getWaitStatesSinceSetReg(IsHazardFn IsHazard,
443 int Limit) {
Tom Stellardb133fbb2016-10-27 23:05:31 +0000444 auto IsHazardFn = [IsHazard] (MachineInstr *MI) {
445 return isSSetReg(MI->getOpcode()) && IsHazard(MI);
446 };
447
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000448 return getWaitStatesSince(IsHazardFn, Limit);
Tom Stellardb133fbb2016-10-27 23:05:31 +0000449}
450
Tom Stellardcb6ba622016-04-30 00:23:06 +0000451//===----------------------------------------------------------------------===//
452// No-op Hazard Detection
453//===----------------------------------------------------------------------===//
454
Matt Arsenault03c67d12017-11-17 04:18:24 +0000455static void addRegUnits(const SIRegisterInfo &TRI,
456 BitVector &BV, unsigned Reg) {
457 for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI)
458 BV.set(*RUI);
459}
460
461static void addRegsToSet(const SIRegisterInfo &TRI,
462 iterator_range<MachineInstr::const_mop_iterator> Ops,
463 BitVector &Set) {
Tom Stellard1f520e52016-05-02 17:39:06 +0000464 for (const MachineOperand &Op : Ops) {
465 if (Op.isReg())
Matt Arsenault03c67d12017-11-17 04:18:24 +0000466 addRegUnits(TRI, Set, Op.getReg());
Tom Stellard1f520e52016-05-02 17:39:06 +0000467 }
468}
469
Matt Arsenault03c67d12017-11-17 04:18:24 +0000470void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) {
471 // XXX: Do we need to worry about implicit operands
472 addRegsToSet(TRI, MI.defs(), ClauseDefs);
473 addRegsToSet(TRI, MI.uses(), ClauseUses);
474}
475
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000476int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
Matt Arsenault03c67d12017-11-17 04:18:24 +0000477 // SMEM soft clause are only present on VI+, and only matter if xnack is
478 // enabled.
479 if (!ST.isXNACKEnabled())
Tom Stellard1f520e52016-05-02 17:39:06 +0000480 return 0;
481
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000482 bool IsSMRD = TII.isSMRD(*MEM);
483
Matt Arsenault03c67d12017-11-17 04:18:24 +0000484 resetClause();
485
Tom Stellard1f520e52016-05-02 17:39:06 +0000486 // A soft-clause is any group of consecutive SMEM instructions. The
487 // instructions in this group may return out of order and/or may be
488 // replayed (i.e. the same instruction issued more than once).
489 //
Austin Kerbow8a3d3a92019-05-07 22:12:15 +0000490 // In order to handle these situations correctly we need to make sure that
491 // when a clause has more than one instruction, no instruction in the clause
492 // writes to a register that is read by another instruction in the clause
Tom Stellard1f520e52016-05-02 17:39:06 +0000493 // (including itself). If we encounter this situaion, we need to break the
494 // clause by inserting a non SMEM instruction.
495
Tom Stellard1f520e52016-05-02 17:39:06 +0000496 for (MachineInstr *MI : EmittedInstrs) {
Tom Stellard1f520e52016-05-02 17:39:06 +0000497 // When we hit a non-SMEM instruction then we have passed the start of the
498 // clause and we can stop.
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000499 if (!MI)
500 break;
501
502 if (IsSMRD != SIInstrInfo::isSMRD(*MI))
Tom Stellard1f520e52016-05-02 17:39:06 +0000503 break;
504
Matt Arsenault03c67d12017-11-17 04:18:24 +0000505 addClauseInst(*MI);
Tom Stellard1f520e52016-05-02 17:39:06 +0000506 }
507
Matt Arsenault03c67d12017-11-17 04:18:24 +0000508 if (ClauseDefs.none())
Tom Stellard1f520e52016-05-02 17:39:06 +0000509 return 0;
510
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000511 // We need to make sure not to put loads and stores in the same clause if they
512 // use the same address. For now, just start a new clause whenever we see a
513 // store.
514 if (MEM->mayStore())
Tom Stellard1f520e52016-05-02 17:39:06 +0000515 return 1;
516
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000517 addClauseInst(*MEM);
Tom Stellard1f520e52016-05-02 17:39:06 +0000518
519 // If the set of defs and uses intersect then we cannot add this instruction
520 // to the clause, so we have a hazard.
Matt Arsenault03c67d12017-11-17 04:18:24 +0000521 return ClauseDefs.anyCommon(ClauseUses) ? 1 : 0;
Tom Stellard1f520e52016-05-02 17:39:06 +0000522}
523
Tom Stellardcb6ba622016-04-30 00:23:06 +0000524int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
Tom Stellard1f520e52016-05-02 17:39:06 +0000525 int WaitStatesNeeded = 0;
526
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000527 WaitStatesNeeded = checkSoftClauseHazards(SMRD);
Tom Stellardcb6ba622016-04-30 00:23:06 +0000528
529 // This SMRD hazard only affects SI.
Matt Arsenaulte4c2e9b2019-06-19 23:54:58 +0000530 if (!ST.hasSMRDReadVALUDefHazard())
Tom Stellard1f520e52016-05-02 17:39:06 +0000531 return WaitStatesNeeded;
Tom Stellardcb6ba622016-04-30 00:23:06 +0000532
533 // A read of an SGPR by SMRD instruction requires 4 wait states when the
534 // SGPR was written by a VALU instruction.
535 int SmrdSgprWaitStates = 4;
Matt Arsenault59ece952017-03-17 21:36:28 +0000536 auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
Marek Olsak22322432017-10-26 14:43:02 +0000537 auto IsBufferHazardDefFn = [this] (MachineInstr *MI) { return TII.isSALU(*MI); };
538
Matt Arsenault4512d0a2017-11-17 04:18:26 +0000539 bool IsBufferSMRD = TII.isBufferSMRD(*SMRD);
Tom Stellardcb6ba622016-04-30 00:23:06 +0000540
541 for (const MachineOperand &Use : SMRD->uses()) {
542 if (!Use.isReg())
543 continue;
544 int WaitStatesNeededForUse =
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000545 SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
546 SmrdSgprWaitStates);
Tom Stellardcb6ba622016-04-30 00:23:06 +0000547 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
Marek Olsak22322432017-10-26 14:43:02 +0000548
549 // This fixes what appears to be undocumented hardware behavior in SI where
550 // s_mov writing a descriptor and s_buffer_load_dword reading the descriptor
551 // needs some number of nops in between. We don't know how many we need, but
552 // let's use 4. This wasn't discovered before probably because the only
553 // case when this happens is when we expand a 64-bit pointer into a full
554 // descriptor and use s_buffer_load_dword instead of s_load_dword, which was
555 // probably never encountered in the closed-source land.
556 if (IsBufferSMRD) {
557 int WaitStatesNeededForUse =
558 SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000559 IsBufferHazardDefFn,
560 SmrdSgprWaitStates);
Marek Olsak22322432017-10-26 14:43:02 +0000561 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
562 }
Tom Stellardcb6ba622016-04-30 00:23:06 +0000563 }
Marek Olsak22322432017-10-26 14:43:02 +0000564
Tom Stellardcb6ba622016-04-30 00:23:06 +0000565 return WaitStatesNeeded;
566}
567
568int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
Matt Arsenaulte4c2e9b2019-06-19 23:54:58 +0000569 if (!ST.hasVMEMReadSGPRVALUDefHazard())
Tom Stellardcb6ba622016-04-30 00:23:06 +0000570 return 0;
571
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000572 int WaitStatesNeeded = checkSoftClauseHazards(VMEM);
Tom Stellardcb6ba622016-04-30 00:23:06 +0000573
574 // A read of an SGPR by a VMEM instruction requires 5 wait states when the
575 // SGPR was written by a VALU Instruction.
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000576 const int VmemSgprWaitStates = 5;
577 auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
Tom Stellardcb6ba622016-04-30 00:23:06 +0000578 for (const MachineOperand &Use : VMEM->uses()) {
579 if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
580 continue;
581
582 int WaitStatesNeededForUse =
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000583 VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
584 VmemSgprWaitStates);
Tom Stellardcb6ba622016-04-30 00:23:06 +0000585 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
586 }
587 return WaitStatesNeeded;
588}
Tom Stellarda27007e2016-05-02 16:23:09 +0000589
590int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000591 const SIRegisterInfo *TRI = ST.getRegisterInfo();
Connor Abbott00755362017-08-04 01:09:43 +0000592 const SIInstrInfo *TII = ST.getInstrInfo();
Tom Stellarda27007e2016-05-02 16:23:09 +0000593
Connor Abbott00755362017-08-04 01:09:43 +0000594 // Check for DPP VGPR read after VALU VGPR write and EXEC write.
Tom Stellarda27007e2016-05-02 16:23:09 +0000595 int DppVgprWaitStates = 2;
Connor Abbott00755362017-08-04 01:09:43 +0000596 int DppExecWaitStates = 5;
Tom Stellarda27007e2016-05-02 16:23:09 +0000597 int WaitStatesNeeded = 0;
Connor Abbott00755362017-08-04 01:09:43 +0000598 auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
Tom Stellarda27007e2016-05-02 16:23:09 +0000599
600 for (const MachineOperand &Use : DPP->uses()) {
601 if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
602 continue;
603 int WaitStatesNeededForUse =
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000604 DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
605 [](MachineInstr *) { return true; },
606 DppVgprWaitStates);
Tom Stellarda27007e2016-05-02 16:23:09 +0000607 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
608 }
609
Connor Abbott00755362017-08-04 01:09:43 +0000610 WaitStatesNeeded = std::max(
611 WaitStatesNeeded,
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000612 DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn,
613 DppExecWaitStates));
Connor Abbott00755362017-08-04 01:09:43 +0000614
Tom Stellarda27007e2016-05-02 16:23:09 +0000615 return WaitStatesNeeded;
616}
Tom Stellard5ab61542016-10-07 23:42:48 +0000617
618int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) {
619 const SIInstrInfo *TII = ST.getInstrInfo();
620
621 // v_div_fmas requires 4 wait states after a write to vcc from a VALU
622 // instruction.
623 const int DivFMasWaitStates = 4;
624 auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000625 int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn,
626 DivFMasWaitStates);
Tom Stellard5ab61542016-10-07 23:42:48 +0000627
628 return DivFMasWaitStates - WaitStatesNeeded;
629}
Tom Stellard961811c2016-10-15 00:58:14 +0000630
631int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) {
632 const SIInstrInfo *TII = ST.getInstrInfo();
633 unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr);
634
635 const int GetRegWaitStates = 2;
636 auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) {
637 return GetRegHWReg == getHWReg(TII, *MI);
638 };
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000639 int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, GetRegWaitStates);
Tom Stellard961811c2016-10-15 00:58:14 +0000640
641 return GetRegWaitStates - WaitStatesNeeded;
642}
Tom Stellard30d30822016-10-27 20:39:09 +0000643
644int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) {
645 const SIInstrInfo *TII = ST.getInstrInfo();
646 unsigned HWReg = getHWReg(TII, *SetRegInstr);
647
Matt Arsenaulte4c2e9b2019-06-19 23:54:58 +0000648 const int SetRegWaitStates = ST.getSetRegWaitStates();
Tom Stellard30d30822016-10-27 20:39:09 +0000649 auto IsHazardFn = [TII, HWReg] (MachineInstr *MI) {
650 return HWReg == getHWReg(TII, *MI);
651 };
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000652 int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, SetRegWaitStates);
Tom Stellard30d30822016-10-27 20:39:09 +0000653 return SetRegWaitStates - WaitStatesNeeded;
654}
Tom Stellardb133fbb2016-10-27 23:05:31 +0000655
656int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) {
657 if (!MI.mayStore())
658 return -1;
659
660 const SIInstrInfo *TII = ST.getInstrInfo();
661 unsigned Opcode = MI.getOpcode();
662 const MCInstrDesc &Desc = MI.getDesc();
663
664 int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
665 int VDataRCID = -1;
666 if (VDataIdx != -1)
667 VDataRCID = Desc.OpInfo[VDataIdx].RegClass;
668
669 if (TII->isMUBUF(MI) || TII->isMTBUF(MI)) {
Jan Veselye8cc3952016-11-15 23:55:15 +0000670 // There is no hazard if the instruction does not use vector regs
671 // (like wbinvl1)
672 if (VDataIdx == -1)
673 return -1;
Tom Stellardb133fbb2016-10-27 23:05:31 +0000674 // For MUBUF/MTBUF instructions this hazard only exists if the
675 // instruction is not using a register in the soffset field.
676 const MachineOperand *SOffset =
677 TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
678 // If we have no soffset operand, then assume this field has been
679 // hardcoded to zero.
680 if (AMDGPU::getRegBitWidth(VDataRCID) > 64 &&
681 (!SOffset || !SOffset->isReg()))
682 return VDataIdx;
683 }
684
685 // MIMG instructions create a hazard if they don't use a 256-bit T# and
686 // the store size is greater than 8 bytes and they have more than two bits
687 // of their dmask set.
688 // All our MIMG definitions use a 256-bit T#, so we can skip checking for them.
689 if (TII->isMIMG(MI)) {
690 int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc);
691 assert(SRsrcIdx != -1 &&
692 AMDGPU::getRegBitWidth(Desc.OpInfo[SRsrcIdx].RegClass) == 256);
Tom Stellard6b9c1be2016-10-27 23:28:03 +0000693 (void)SRsrcIdx;
Tom Stellardb133fbb2016-10-27 23:05:31 +0000694 }
695
696 if (TII->isFLAT(MI)) {
Matt Arsenault97279a82016-11-29 19:30:44 +0000697 int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
Tom Stellardb133fbb2016-10-27 23:05:31 +0000698 if (AMDGPU::getRegBitWidth(Desc.OpInfo[DataIdx].RegClass) > 64)
699 return DataIdx;
700 }
701
702 return -1;
703}
704
Mark Searlesd29f24a2017-12-07 20:34:25 +0000705int GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
706 const MachineRegisterInfo &MRI) {
707 // Helper to check for the hazard where VMEM instructions that store more than
708 // 8 bytes can have there store data over written by the next instruction.
709 const SIRegisterInfo *TRI = ST.getRegisterInfo();
710
711 const int VALUWaitStates = 1;
712 int WaitStatesNeeded = 0;
713
714 if (!TRI->isVGPR(MRI, Def.getReg()))
715 return WaitStatesNeeded;
716 unsigned Reg = Def.getReg();
717 auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) {
718 int DataIdx = createsVALUHazard(*MI);
719 return DataIdx >= 0 &&
720 TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg);
721 };
722 int WaitStatesNeededForDef =
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000723 VALUWaitStates - getWaitStatesSince(IsHazardFn, VALUWaitStates);
Mark Searlesd29f24a2017-12-07 20:34:25 +0000724 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
725
726 return WaitStatesNeeded;
727}
728
Tom Stellardb133fbb2016-10-27 23:05:31 +0000729int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
730 // This checks for the hazard where VMEM instructions that store more than
731 // 8 bytes can have there store data over written by the next instruction.
732 if (!ST.has12DWordStoreHazard())
733 return 0;
734
Mark Searlesd29f24a2017-12-07 20:34:25 +0000735 const MachineRegisterInfo &MRI = MF.getRegInfo();
Tom Stellardb133fbb2016-10-27 23:05:31 +0000736 int WaitStatesNeeded = 0;
737
738 for (const MachineOperand &Def : VALU->defs()) {
Mark Searlesd29f24a2017-12-07 20:34:25 +0000739 WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI));
Tom Stellardb133fbb2016-10-27 23:05:31 +0000740 }
Mark Searlesd29f24a2017-12-07 20:34:25 +0000741
742 return WaitStatesNeeded;
743}
744
745int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) {
746 // This checks for hazards associated with inline asm statements.
747 // Since inline asms can contain just about anything, we use this
748 // to call/leverage other check*Hazard routines. Note that
749 // this function doesn't attempt to address all possible inline asm
750 // hazards (good luck), but is a collection of what has been
751 // problematic thus far.
752
753 // see checkVALUHazards()
754 if (!ST.has12DWordStoreHazard())
755 return 0;
756
757 const MachineRegisterInfo &MRI = MF.getRegInfo();
758 int WaitStatesNeeded = 0;
759
760 for (unsigned I = InlineAsm::MIOp_FirstOperand, E = IA->getNumOperands();
761 I != E; ++I) {
762 const MachineOperand &Op = IA->getOperand(I);
763 if (Op.isReg() && Op.isDef()) {
764 WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI));
765 }
766 }
767
Tom Stellardb133fbb2016-10-27 23:05:31 +0000768 return WaitStatesNeeded;
769}
Tom Stellard04051b52016-10-27 23:42:29 +0000770
771int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) {
772 const SIInstrInfo *TII = ST.getInstrInfo();
773 const SIRegisterInfo *TRI = ST.getRegisterInfo();
Mark Searlesd29f24a2017-12-07 20:34:25 +0000774 const MachineRegisterInfo &MRI = MF.getRegInfo();
Tom Stellard04051b52016-10-27 23:42:29 +0000775
776 const MachineOperand *LaneSelectOp =
777 TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1);
778
779 if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg()))
780 return 0;
781
782 unsigned LaneSelectReg = LaneSelectOp->getReg();
783 auto IsHazardFn = [TII] (MachineInstr *MI) {
784 return TII->isVALU(*MI);
785 };
786
787 const int RWLaneWaitStates = 4;
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000788 int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn,
789 RWLaneWaitStates);
Tom Stellard04051b52016-10-27 23:42:29 +0000790 return RWLaneWaitStates - WaitStatesSince;
791}
Tom Stellardaea899e2016-10-27 23:50:21 +0000792
793int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
Matt Arsenaulte4c2e9b2019-06-19 23:54:58 +0000794 if (!ST.hasRFEHazards())
Tom Stellardaea899e2016-10-27 23:50:21 +0000795 return 0;
796
797 const SIInstrInfo *TII = ST.getInstrInfo();
798
799 const int RFEWaitStates = 1;
800
801 auto IsHazardFn = [TII] (MachineInstr *MI) {
802 return getHWReg(TII, *MI) == AMDGPU::Hwreg::ID_TRAPSTS;
803 };
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000804 int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, RFEWaitStates);
Tom Stellardaea899e2016-10-27 23:50:21 +0000805 return RFEWaitStates - WaitStatesNeeded;
806}
Matt Arsenaulte823d922017-02-18 18:29:53 +0000807
808int GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) {
Shiva Chen801bf7e2018-05-09 02:42:00 +0000809 if (MI->isDebugInstr())
Matt Arsenaulte823d922017-02-18 18:29:53 +0000810 return 0;
811
812 const SIRegisterInfo *TRI = ST.getRegisterInfo();
813 if (!ST.hasSMovFedHazard())
814 return 0;
815
816 // Check for any instruction reading an SGPR after a write from
817 // s_mov_fed_b32.
818 int MovFedWaitStates = 1;
819 int WaitStatesNeeded = 0;
820
821 for (const MachineOperand &Use : MI->uses()) {
822 if (!Use.isReg() || TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
823 continue;
824 auto IsHazardFn = [] (MachineInstr *MI) {
825 return MI->getOpcode() == AMDGPU::S_MOV_FED_B32;
826 };
827 int WaitStatesNeededForUse =
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000828 MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn,
829 MovFedWaitStates);
Matt Arsenaulte823d922017-02-18 18:29:53 +0000830 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
831 }
832
833 return WaitStatesNeeded;
834}
835
836int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
Matt Arsenaulte823d922017-02-18 18:29:53 +0000837 const SIInstrInfo *TII = ST.getInstrInfo();
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000838 const int SMovRelWaitStates = 1;
Matt Arsenaulte823d922017-02-18 18:29:53 +0000839 auto IsHazardFn = [TII] (MachineInstr *MI) {
840 return TII->isSALU(*MI);
841 };
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000842 return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn,
843 SMovRelWaitStates);
Matt Arsenaulte823d922017-02-18 18:29:53 +0000844}
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000845
Austin Kerbow8a3d3a92019-05-07 22:12:15 +0000846void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
847 fixVMEMtoScalarWriteHazards(MI);
Stanislav Mekhanoshin5f581c92019-06-12 17:52:51 +0000848 fixVcmpxPermlaneHazards(MI);
Austin Kerbow8a3d3a92019-05-07 22:12:15 +0000849 fixSMEMtoVectorWriteHazards(MI);
850 fixVcmpxExecWARHazard(MI);
851 fixLdsBranchVmemWARHazard(MI);
852}
853
Stanislav Mekhanoshin5f581c92019-06-12 17:52:51 +0000854bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
855 if (!ST.hasVcmpxPermlaneHazard() || !isPermlane(*MI))
856 return false;
857
858 const SIInstrInfo *TII = ST.getInstrInfo();
859 auto IsHazardFn = [TII] (MachineInstr *MI) {
860 return TII->isVOPC(*MI);
861 };
862
863 auto IsExpiredFn = [] (MachineInstr *MI, int) {
864 if (!MI)
865 return false;
866 unsigned Opc = MI->getOpcode();
867 return SIInstrInfo::isVALU(*MI) &&
868 Opc != AMDGPU::V_NOP_e32 &&
869 Opc != AMDGPU::V_NOP_e64 &&
870 Opc != AMDGPU::V_NOP_sdwa;
871 };
872
873 if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
874 std::numeric_limits<int>::max())
875 return false;
876
877 // V_NOP will be discarded by SQ.
878 // Use V_MOB_B32 v?, v?. Register must be alive so use src0 of V_PERMLANE*
879 // which is always a VGPR and available.
880 auto *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
881 unsigned Reg = Src0->getReg();
882 bool IsUndef = Src0->isUndef();
883 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
884 TII->get(AMDGPU::V_MOV_B32_e32))
885 .addReg(Reg, RegState::Define | (IsUndef ? RegState::Dead : 0))
886 .addReg(Reg, IsUndef ? RegState::Undef : RegState::Kill);
887
888 return true;
889}
890
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000891bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
892 if (!ST.hasVMEMtoScalarWriteHazard())
893 return false;
894
895 if (!SIInstrInfo::isSALU(*MI) && !SIInstrInfo::isSMRD(*MI))
896 return false;
897
898 if (MI->getNumDefs() == 0)
899 return false;
900
901 const SIRegisterInfo *TRI = ST.getRegisterInfo();
902
903 auto IsHazardFn = [TRI, MI] (MachineInstr *I) {
904 if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isDS(*I) &&
905 !SIInstrInfo::isFLAT(*I))
906 return false;
907
908 for (const MachineOperand &Def : MI->defs()) {
909 MachineOperand *Op = I->findRegisterUseOperand(Def.getReg(), false, TRI);
910 if (!Op || (Op->isImplicit() && Op->getReg() == AMDGPU::EXEC))
911 continue;
912 return true;
913 }
914 return false;
915 };
916
917 auto IsExpiredFn = [] (MachineInstr *MI, int) {
918 return MI && (SIInstrInfo::isVALU(*MI) ||
919 (MI->getOpcode() == AMDGPU::S_WAITCNT &&
920 !MI->getOperand(0).getImm()));
921 };
922
923 if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
924 std::numeric_limits<int>::max())
925 return false;
926
927 const SIInstrInfo *TII = ST.getInstrInfo();
928 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_NOP_e32));
929 return true;
930}
931
932bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
933 if (!ST.hasSMEMtoVectorWriteHazard())
934 return false;
935
936 if (!SIInstrInfo::isVALU(*MI))
937 return false;
938
939 unsigned SDSTName;
940 switch (MI->getOpcode()) {
941 case AMDGPU::V_READLANE_B32:
942 case AMDGPU::V_READFIRSTLANE_B32:
943 SDSTName = AMDGPU::OpName::vdst;
944 break;
945 default:
946 SDSTName = AMDGPU::OpName::sdst;
947 break;
948 }
949
950 const SIInstrInfo *TII = ST.getInstrInfo();
951 const SIRegisterInfo *TRI = ST.getRegisterInfo();
Carl Ritson34e95ce2019-05-20 07:20:12 +0000952 const AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(ST.getCPU());
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000953 const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName);
954 if (!SDST) {
Stanislav Mekhanoshin5ddd5642019-05-04 06:40:20 +0000955 for (const auto &MO : MI->implicit_operands()) {
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000956 if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) {
957 SDST = &MO;
958 break;
959 }
960 }
961 }
962
963 if (!SDST)
964 return false;
965
966 const unsigned SDSTReg = SDST->getReg();
967 auto IsHazardFn = [SDSTReg, TRI] (MachineInstr *I) {
968 return SIInstrInfo::isSMRD(*I) && I->readsRegister(SDSTReg, TRI);
969 };
970
Carl Ritson34e95ce2019-05-20 07:20:12 +0000971 auto IsExpiredFn = [TII, IV] (MachineInstr *MI, int) {
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000972 if (MI) {
973 if (TII->isSALU(*MI)) {
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000974 switch (MI->getOpcode()) {
975 case AMDGPU::S_SETVSKIP:
976 case AMDGPU::S_VERSION:
977 case AMDGPU::S_WAITCNT_VSCNT:
978 case AMDGPU::S_WAITCNT_VMCNT:
979 case AMDGPU::S_WAITCNT_EXPCNT:
Carl Ritson34e95ce2019-05-20 07:20:12 +0000980 // These instructions cannot not mitigate the hazard.
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000981 return false;
Carl Ritson34e95ce2019-05-20 07:20:12 +0000982 case AMDGPU::S_WAITCNT_LGKMCNT:
983 // Reducing lgkmcnt count to 0 always mitigates the hazard.
984 return (MI->getOperand(1).getImm() == 0) &&
985 (MI->getOperand(0).getReg() == AMDGPU::SGPR_NULL);
986 case AMDGPU::S_WAITCNT: {
987 const int64_t Imm = MI->getOperand(0).getImm();
988 AMDGPU::Waitcnt Decoded = AMDGPU::decodeWaitcnt(IV, Imm);
989 return (Decoded.LgkmCnt == 0);
990 }
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000991 default:
Carl Ritson34e95ce2019-05-20 07:20:12 +0000992 // SOPP instructions cannot mitigate the hazard.
993 if (TII->isSOPP(*MI))
994 return false;
995 // At this point the SALU can be assumed to mitigate the hazard
996 // because either:
997 // (a) it is independent of the at risk SMEM (breaking chain),
998 // or
999 // (b) it is dependent on the SMEM, in which case an appropriate
1000 // s_waitcnt lgkmcnt _must_ exist between it and the at risk
1001 // SMEM instruction.
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +00001002 return true;
1003 }
1004 }
1005 }
1006 return false;
1007 };
1008
1009 if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1010 std::numeric_limits<int>::max())
1011 return false;
1012
1013 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1014 TII->get(AMDGPU::S_MOV_B32), AMDGPU::SGPR_NULL)
1015 .addImm(0);
1016 return true;
1017}
1018
1019bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) {
1020 if (!ST.hasVcmpxExecWARHazard() || !SIInstrInfo::isVALU(*MI))
1021 return false;
1022
1023 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1024 if (!MI->modifiesRegister(AMDGPU::EXEC, TRI))
1025 return false;
1026
1027 auto IsHazardFn = [TRI] (MachineInstr *I) {
1028 if (SIInstrInfo::isVALU(*I))
1029 return false;
1030 return I->readsRegister(AMDGPU::EXEC, TRI);
1031 };
1032
1033 const SIInstrInfo *TII = ST.getInstrInfo();
1034 auto IsExpiredFn = [TII, TRI] (MachineInstr *MI, int) {
1035 if (!MI)
1036 return false;
1037 if (SIInstrInfo::isVALU(*MI)) {
1038 if (TII->getNamedOperand(*MI, AMDGPU::OpName::sdst))
1039 return true;
1040 for (auto MO : MI->implicit_operands())
1041 if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg())))
1042 return true;
1043 }
1044 if (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
1045 (MI->getOperand(0).getImm() & 0xfffe) == 0xfffe)
1046 return true;
1047 return false;
1048 };
1049
1050 if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1051 std::numeric_limits<int>::max())
1052 return false;
1053
1054 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1055 TII->get(AMDGPU::S_WAITCNT_DEPCTR))
1056 .addImm(0xfffe);
1057 return true;
1058}
1059
1060bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
1061 if (!ST.hasLdsBranchVmemWARHazard())
1062 return false;
1063
1064 auto IsHazardInst = [] (const MachineInstr *MI) {
1065 if (SIInstrInfo::isDS(*MI))
1066 return 1;
1067 if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isSegmentSpecificFLAT(*MI))
1068 return 2;
1069 return 0;
1070 };
1071
1072 auto InstType = IsHazardInst(MI);
1073 if (!InstType)
1074 return false;
1075
1076 auto IsExpiredFn = [&IsHazardInst] (MachineInstr *I, int) {
1077 return I && (IsHazardInst(I) ||
1078 (I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
1079 I->getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
1080 !I->getOperand(1).getImm()));
1081 };
1082
1083 auto IsHazardFn = [InstType, &IsHazardInst] (MachineInstr *I) {
1084 if (!I->isBranch())
1085 return false;
1086
1087 auto IsHazardFn = [InstType, IsHazardInst] (MachineInstr *I) {
1088 auto InstType2 = IsHazardInst(I);
1089 return InstType2 && InstType != InstType2;
1090 };
1091
1092 auto IsExpiredFn = [InstType, &IsHazardInst] (MachineInstr *I, int) {
1093 if (!I)
1094 return false;
1095
1096 auto InstType2 = IsHazardInst(I);
1097 if (InstType == InstType2)
1098 return true;
1099
1100 return I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
1101 I->getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
1102 !I->getOperand(1).getImm();
1103 };
1104
1105 return ::getWaitStatesSince(IsHazardFn, I, IsExpiredFn) !=
1106 std::numeric_limits<int>::max();
1107 };
1108
1109 if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1110 std::numeric_limits<int>::max())
1111 return false;
1112
1113 const SIInstrInfo *TII = ST.getInstrInfo();
1114 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1115 TII->get(AMDGPU::S_WAITCNT_VSCNT))
1116 .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
1117 .addImm(0);
1118
1119 return true;
1120}
1121
1122int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) {
1123 int NSAtoVMEMWaitStates = 1;
1124
1125 if (!ST.hasNSAtoVMEMBug())
1126 return 0;
1127
1128 if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isMTBUF(*MI))
1129 return 0;
1130
1131 const SIInstrInfo *TII = ST.getInstrInfo();
1132 const auto *Offset = TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
1133 if (!Offset || (Offset->getImm() & 6) == 0)
1134 return 0;
1135
1136 auto IsHazardFn = [TII] (MachineInstr *I) {
1137 if (!SIInstrInfo::isMIMG(*I))
1138 return false;
1139 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(I->getOpcode());
1140 return Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA &&
1141 TII->getInstSizeInBytes(*I) >= 16;
1142 };
1143
1144 return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1);
1145}
Stanislav Mekhanoshinbdf7f812019-06-21 16:30:14 +00001146
1147int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
1148 int FPAtomicToDenormModeWaitStates = 3;
1149
1150 if (MI->getOpcode() != AMDGPU::S_DENORM_MODE)
1151 return 0;
1152
1153 auto IsHazardFn = [] (MachineInstr *I) {
1154 if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isFLAT(*I))
1155 return false;
1156 return SIInstrInfo::isFPAtomic(*I);
1157 };
1158
1159 auto IsExpiredFn = [] (MachineInstr *MI, int WaitStates) {
1160 if (WaitStates >= 3 || SIInstrInfo::isVALU(*MI))
1161 return true;
1162
1163 switch (MI->getOpcode()) {
1164 case AMDGPU::S_WAITCNT:
1165 case AMDGPU::S_WAITCNT_VSCNT:
1166 case AMDGPU::S_WAITCNT_VMCNT:
1167 case AMDGPU::S_WAITCNT_EXPCNT:
1168 case AMDGPU::S_WAITCNT_LGKMCNT:
1169 case AMDGPU::S_WAITCNT_IDLE:
1170 return true;
1171 default:
1172 break;
1173 }
1174
1175 return false;
1176 };
1177
1178
1179 return FPAtomicToDenormModeWaitStates -
1180 ::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn);
1181}