blob: a23348e18f92da43bc65e10c2ba0baec0f1ba3fe [file] [log] [blame]
Tom Stellardcb6ba622016-04-30 00:23:06 +00001//===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Tom Stellardcb6ba622016-04-30 00:23:06 +00006//
7//===----------------------------------------------------------------------===//
8//
9// This file implements hazard recognizers for scheduling on GCN processors.
10//
11//===----------------------------------------------------------------------===//
12
Eugene Zelenko734bb7b2017-01-20 17:52:16 +000013#include "GCNHazardRecognizer.h"
Chandler Carruth6bda14b2017-06-06 11:49:48 +000014#include "AMDGPUSubtarget.h"
Eugene Zelenko734bb7b2017-01-20 17:52:16 +000015#include "SIDefines.h"
Tom Stellardcb6ba622016-04-30 00:23:06 +000016#include "SIInstrInfo.h"
Eugene Zelenko734bb7b2017-01-20 17:52:16 +000017#include "SIRegisterInfo.h"
Tom Stellard44b30b42018-05-22 02:03:23 +000018#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Eugene Zelenko734bb7b2017-01-20 17:52:16 +000019#include "Utils/AMDGPUBaseInfo.h"
20#include "llvm/ADT/iterator_range.h"
21#include "llvm/CodeGen/MachineFunction.h"
22#include "llvm/CodeGen/MachineInstr.h"
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +000023#include "llvm/CodeGen/MachineInstrBuilder.h"
Eugene Zelenko734bb7b2017-01-20 17:52:16 +000024#include "llvm/CodeGen/MachineOperand.h"
Tom Stellardcb6ba622016-04-30 00:23:06 +000025#include "llvm/CodeGen/ScheduleDAG.h"
Eugene Zelenko734bb7b2017-01-20 17:52:16 +000026#include "llvm/MC/MCInstrDesc.h"
27#include "llvm/Support/ErrorHandling.h"
28#include <algorithm>
29#include <cassert>
30#include <limits>
31#include <set>
32#include <vector>
Tom Stellardcb6ba622016-04-30 00:23:06 +000033
34using namespace llvm;
35
36//===----------------------------------------------------------------------===//
37// Hazard Recoginizer Implementation
38//===----------------------------------------------------------------------===//
39
40GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +000041 IsHazardRecognizerMode(false),
Tom Stellardcb6ba622016-04-30 00:23:06 +000042 CurrCycleInstr(nullptr),
Matt Arsenault43e92fe2016-06-24 06:30:11 +000043 MF(MF),
Tom Stellard5bfbae52018-07-11 20:59:01 +000044 ST(MF.getSubtarget<GCNSubtarget>()),
Matt Arsenault03c67d12017-11-17 04:18:24 +000045 TII(*ST.getInstrInfo()),
46 TRI(TII.getRegisterInfo()),
47 ClauseUses(TRI.getNumRegUnits()),
48 ClauseDefs(TRI.getNumRegUnits()) {
Stanislav Mekhanoshin7d2019b2019-07-11 21:30:34 +000049 MaxLookAhead = MF.getRegInfo().isPhysRegUsed(AMDGPU::AGPR0) ? 18 : 5;
50 TSchedModel.init(&ST);
Tom Stellardcb6ba622016-04-30 00:23:06 +000051}
52
53void GCNHazardRecognizer::EmitInstruction(SUnit *SU) {
54 EmitInstruction(SU->getInstr());
55}
56
57void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) {
58 CurrCycleInstr = MI;
59}
60
Tom Stellard5ab61542016-10-07 23:42:48 +000061static bool isDivFMas(unsigned Opcode) {
62 return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64;
63}
64
Tom Stellard961811c2016-10-15 00:58:14 +000065static bool isSGetReg(unsigned Opcode) {
66 return Opcode == AMDGPU::S_GETREG_B32;
67}
68
69static bool isSSetReg(unsigned Opcode) {
70 return Opcode == AMDGPU::S_SETREG_B32 || Opcode == AMDGPU::S_SETREG_IMM32_B32;
71}
72
Tom Stellard04051b52016-10-27 23:42:29 +000073static bool isRWLane(unsigned Opcode) {
74 return Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32;
75}
76
Tom Stellardaea899e2016-10-27 23:50:21 +000077static bool isRFE(unsigned Opcode) {
78 return Opcode == AMDGPU::S_RFE_B64;
79}
80
Matt Arsenaulte823d922017-02-18 18:29:53 +000081static bool isSMovRel(unsigned Opcode) {
Matt Arsenault59ece952017-03-17 21:36:28 +000082 switch (Opcode) {
83 case AMDGPU::S_MOVRELS_B32:
84 case AMDGPU::S_MOVRELS_B64:
85 case AMDGPU::S_MOVRELD_B32:
86 case AMDGPU::S_MOVRELD_B64:
87 return true;
88 default:
89 return false;
90 }
Matt Arsenaulte823d922017-02-18 18:29:53 +000091}
92
Marek Olsakc5cec5e2019-01-16 15:43:53 +000093static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII,
94 const MachineInstr &MI) {
95 if (TII.isAlwaysGDS(MI.getOpcode()))
96 return true;
97
Matt Arsenaulta41351e2017-11-17 21:35:32 +000098 switch (MI.getOpcode()) {
99 case AMDGPU::S_SENDMSG:
100 case AMDGPU::S_SENDMSGHALT:
101 case AMDGPU::S_TTRACEDATA:
102 return true;
Marek Olsakc5cec5e2019-01-16 15:43:53 +0000103 // These DS opcodes don't support GDS.
104 case AMDGPU::DS_NOP:
105 case AMDGPU::DS_PERMUTE_B32:
106 case AMDGPU::DS_BPERMUTE_B32:
107 return false;
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000108 default:
Marek Olsakc5cec5e2019-01-16 15:43:53 +0000109 if (TII.isDS(MI.getOpcode())) {
110 int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
111 AMDGPU::OpName::gds);
112 if (MI.getOperand(GDS).getImm())
113 return true;
114 }
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000115 return false;
116 }
117}
118
Stanislav Mekhanoshin5f581c92019-06-12 17:52:51 +0000119static bool isPermlane(const MachineInstr &MI) {
120 unsigned Opcode = MI.getOpcode();
121 return Opcode == AMDGPU::V_PERMLANE16_B32 ||
122 Opcode == AMDGPU::V_PERMLANEX16_B32;
123}
124
Tom Stellardaea899e2016-10-27 23:50:21 +0000125static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
Tom Stellard961811c2016-10-15 00:58:14 +0000126 const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
127 AMDGPU::OpName::simm16);
128 return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_;
129}
130
Tom Stellardcb6ba622016-04-30 00:23:06 +0000131ScheduleHazardRecognizer::HazardType
132GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
Tom Stellardcb6ba622016-04-30 00:23:06 +0000133 MachineInstr *MI = SU->getInstr();
Austin Kerbow8a3d3a92019-05-07 22:12:15 +0000134 if (MI->isBundle())
135 return NoHazard;
Tom Stellardcb6ba622016-04-30 00:23:06 +0000136
Aaron Ballman5c190d02016-05-02 14:48:03 +0000137 if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
Tom Stellardcb6ba622016-04-30 00:23:06 +0000138 return NoopHazard;
139
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000140 // FIXME: Should flat be considered vmem?
141 if ((SIInstrInfo::isVMEM(*MI) ||
142 SIInstrInfo::isFLAT(*MI))
143 && checkVMEMHazards(MI) > 0)
Tom Stellardcb6ba622016-04-30 00:23:06 +0000144 return NoopHazard;
145
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000146 if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0)
147 return NoopHazard;
148
Stanislav Mekhanoshinbdf7f812019-06-21 16:30:14 +0000149 if (checkFPAtomicToDenormModeHazard(MI) > 0)
150 return NoopHazard;
151
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000152 if (ST.hasNoDataDepHazard())
153 return NoHazard;
154
Tom Stellardb133fbb2016-10-27 23:05:31 +0000155 if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
156 return NoopHazard;
157
Tom Stellarda27007e2016-05-02 16:23:09 +0000158 if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0)
159 return NoopHazard;
160
Tom Stellard5ab61542016-10-07 23:42:48 +0000161 if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0)
162 return NoopHazard;
163
Tom Stellard04051b52016-10-27 23:42:29 +0000164 if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0)
165 return NoopHazard;
166
Tom Stellard961811c2016-10-15 00:58:14 +0000167 if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0)
168 return NoopHazard;
169
Tom Stellard30d30822016-10-27 20:39:09 +0000170 if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0)
171 return NoopHazard;
172
Tom Stellardaea899e2016-10-27 23:50:21 +0000173 if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
174 return NoopHazard;
175
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000176 if (ST.hasReadM0MovRelInterpHazard() &&
177 (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
178 checkReadM0Hazards(MI) > 0)
179 return NoopHazard;
180
Marek Olsakc5cec5e2019-01-16 15:43:53 +0000181 if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) &&
Matt Arsenaulte823d922017-02-18 18:29:53 +0000182 checkReadM0Hazards(MI) > 0)
183 return NoopHazard;
184
Stanislav Mekhanoshin7d2019b2019-07-11 21:30:34 +0000185 if (SIInstrInfo::isMAI(*MI) && checkMAIHazards(MI) > 0)
186 return NoopHazard;
187
188 if ((MI->mayLoad() || MI->mayStore()) && checkMAILdStHazards(MI) > 0)
189 return NoopHazard;
190
Mark Searlesd29f24a2017-12-07 20:34:25 +0000191 if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
192 return NoopHazard;
193
Matt Arsenaulte823d922017-02-18 18:29:53 +0000194 if (checkAnyInstHazards(MI) > 0)
195 return NoopHazard;
196
Tom Stellardcb6ba622016-04-30 00:23:06 +0000197 return NoHazard;
198}
199
Austin Kerbow8a3d3a92019-05-07 22:12:15 +0000200static void insertNoopInBundle(MachineInstr *MI, const SIInstrInfo &TII) {
201 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
202 .addImm(0);
203}
204
205void GCNHazardRecognizer::processBundle() {
206 MachineBasicBlock::instr_iterator MI = std::next(CurrCycleInstr->getIterator());
207 MachineBasicBlock::instr_iterator E = CurrCycleInstr->getParent()->instr_end();
208 // Check bundled MachineInstr's for hazards.
209 for (; MI != E && MI->isInsideBundle(); ++MI) {
210 CurrCycleInstr = &*MI;
211 unsigned WaitStates = PreEmitNoopsCommon(CurrCycleInstr);
212
213 if (IsHazardRecognizerMode)
214 fixHazards(CurrCycleInstr);
215
216 for (unsigned i = 0; i < WaitStates; ++i)
217 insertNoopInBundle(CurrCycleInstr, TII);
218
219 // It’s unnecessary to track more than MaxLookAhead instructions. Since we
220 // include the bundled MI directly after, only add a maximum of
221 // (MaxLookAhead - 1) noops to EmittedInstrs.
222 for (unsigned i = 0, e = std::min(WaitStates, MaxLookAhead - 1); i < e; ++i)
223 EmittedInstrs.push_front(nullptr);
224
225 EmittedInstrs.push_front(CurrCycleInstr);
226 EmittedInstrs.resize(MaxLookAhead);
227 }
228 CurrCycleInstr = nullptr;
229}
230
Tom Stellardcb6ba622016-04-30 00:23:06 +0000231unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) {
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000232 IsHazardRecognizerMode = false;
233 return PreEmitNoopsCommon(SU->getInstr());
Tom Stellardcb6ba622016-04-30 00:23:06 +0000234}
235
236unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000237 IsHazardRecognizerMode = true;
238 CurrCycleInstr = MI;
239 unsigned W = PreEmitNoopsCommon(MI);
Austin Kerbow8a3d3a92019-05-07 22:12:15 +0000240 fixHazards(MI);
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000241 CurrCycleInstr = nullptr;
242 return W;
243}
244
245unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
Austin Kerbow8a3d3a92019-05-07 22:12:15 +0000246 if (MI->isBundle())
247 return 0;
248
Matt Arsenaulte823d922017-02-18 18:29:53 +0000249 int WaitStates = std::max(0, checkAnyInstHazards(MI));
250
Aaron Ballman5c190d02016-05-02 14:48:03 +0000251 if (SIInstrInfo::isSMRD(*MI))
Matt Arsenaulte823d922017-02-18 18:29:53 +0000252 return std::max(WaitStates, checkSMRDHazards(MI));
Tom Stellardcb6ba622016-04-30 00:23:06 +0000253
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000254 if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
255 WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
Tom Stellarda27007e2016-05-02 16:23:09 +0000256
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000257 if (ST.hasNSAtoVMEMBug())
258 WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI));
259
Stanislav Mekhanoshinbdf7f812019-06-21 16:30:14 +0000260 WaitStates = std::max(WaitStates, checkFPAtomicToDenormModeHazard(MI));
261
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000262 if (ST.hasNoDataDepHazard())
263 return WaitStates;
264
265 if (SIInstrInfo::isVALU(*MI))
266 WaitStates = std::max(WaitStates, checkVALUHazards(MI));
267
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000268 if (SIInstrInfo::isDPP(*MI))
269 WaitStates = std::max(WaitStates, checkDPPHazards(MI));
Tom Stellardb133fbb2016-10-27 23:05:31 +0000270
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000271 if (isDivFMas(MI->getOpcode()))
272 WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
Tom Stellardb133fbb2016-10-27 23:05:31 +0000273
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000274 if (isRWLane(MI->getOpcode()))
275 WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
Tom Stellard5ab61542016-10-07 23:42:48 +0000276
Mark Searlesd29f24a2017-12-07 20:34:25 +0000277 if (MI->isInlineAsm())
278 return std::max(WaitStates, checkInlineAsmHazards(MI));
279
Tom Stellard961811c2016-10-15 00:58:14 +0000280 if (isSGetReg(MI->getOpcode()))
Matt Arsenaulte823d922017-02-18 18:29:53 +0000281 return std::max(WaitStates, checkGetRegHazards(MI));
Tom Stellard961811c2016-10-15 00:58:14 +0000282
Tom Stellard30d30822016-10-27 20:39:09 +0000283 if (isSSetReg(MI->getOpcode()))
Matt Arsenaulte823d922017-02-18 18:29:53 +0000284 return std::max(WaitStates, checkSetRegHazards(MI));
Tom Stellard30d30822016-10-27 20:39:09 +0000285
Tom Stellardaea899e2016-10-27 23:50:21 +0000286 if (isRFE(MI->getOpcode()))
Matt Arsenaulte823d922017-02-18 18:29:53 +0000287 return std::max(WaitStates, checkRFEHazards(MI));
Tom Stellardaea899e2016-10-27 23:50:21 +0000288
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000289 if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) ||
290 isSMovRel(MI->getOpcode())))
291 return std::max(WaitStates, checkReadM0Hazards(MI));
292
Marek Olsakc5cec5e2019-01-16 15:43:53 +0000293 if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI))
Matt Arsenaulte823d922017-02-18 18:29:53 +0000294 return std::max(WaitStates, checkReadM0Hazards(MI));
295
Stanislav Mekhanoshin7d2019b2019-07-11 21:30:34 +0000296 if (SIInstrInfo::isMAI(*MI))
297 return std::max(WaitStates, checkMAIHazards(MI));
298
299 if (MI->mayLoad() || MI->mayStore())
300 return std::max(WaitStates, checkMAILdStHazards(MI));
301
Matt Arsenaulte823d922017-02-18 18:29:53 +0000302 return WaitStates;
Tom Stellardcb6ba622016-04-30 00:23:06 +0000303}
304
305void GCNHazardRecognizer::EmitNoop() {
306 EmittedInstrs.push_front(nullptr);
307}
308
309void GCNHazardRecognizer::AdvanceCycle() {
Tom Stellardcb6ba622016-04-30 00:23:06 +0000310 // When the scheduler detects a stall, it will call AdvanceCycle() without
311 // emitting any instructions.
312 if (!CurrCycleInstr)
313 return;
314
Carl Ritsonf898edd2018-09-10 10:14:48 +0000315 // Do not track non-instructions which do not affect the wait states.
316 // If included, these instructions can lead to buffer overflow such that
317 // detectable hazards are missed.
David Stuttard81eec582019-03-05 10:25:16 +0000318 if (CurrCycleInstr->isImplicitDef() || CurrCycleInstr->isDebugInstr() ||
319 CurrCycleInstr->isKill())
Carl Ritsonf898edd2018-09-10 10:14:48 +0000320 return;
321
Austin Kerbow8a3d3a92019-05-07 22:12:15 +0000322 if (CurrCycleInstr->isBundle()) {
323 processBundle();
324 return;
325 }
326
Matt Arsenault59ece952017-03-17 21:36:28 +0000327 unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
Tom Stellardcb6ba622016-04-30 00:23:06 +0000328
329 // Keep track of emitted instructions
330 EmittedInstrs.push_front(CurrCycleInstr);
331
332 // Add a nullptr for each additional wait state after the first. Make sure
333 // not to add more than getMaxLookAhead() items to the list, since we
334 // truncate the list to that size right after this loop.
335 for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead());
336 i < e; ++i) {
337 EmittedInstrs.push_front(nullptr);
338 }
339
340 // getMaxLookahead() is the largest number of wait states we will ever need
341 // to insert, so there is no point in keeping track of more than that many
342 // wait states.
343 EmittedInstrs.resize(getMaxLookAhead());
344
345 CurrCycleInstr = nullptr;
346}
347
348void GCNHazardRecognizer::RecedeCycle() {
349 llvm_unreachable("hazard recognizer does not support bottom-up scheduling.");
350}
351
352//===----------------------------------------------------------------------===//
353// Helper Functions
354//===----------------------------------------------------------------------===//
355
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000356typedef function_ref<bool(MachineInstr *, int WaitStates)> IsExpiredFn;
357
358// Returns a minimum wait states since \p I walking all predecessors.
359// Only scans until \p IsExpired does not return true.
360// Can only be run in a hazard recognizer mode.
361static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
362 MachineBasicBlock *MBB,
363 MachineBasicBlock::reverse_instr_iterator I,
364 int WaitStates,
365 IsExpiredFn IsExpired,
366 DenseSet<const MachineBasicBlock *> &Visited) {
Austin Kerbow8a3d3a92019-05-07 22:12:15 +0000367 for (auto E = MBB->instr_rend(); I != E; ++I) {
368 // Don't add WaitStates for parent BUNDLE instructions.
369 if (I->isBundle())
370 continue;
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000371
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000372 if (IsHazard(&*I))
373 return WaitStates;
374
375 if (I->isInlineAsm() || I->isImplicitDef() || I->isDebugInstr())
376 continue;
377
378 WaitStates += SIInstrInfo::getNumWaitStates(*I);
379
380 if (IsExpired(&*I, WaitStates))
381 return std::numeric_limits<int>::max();
382 }
383
384 int MinWaitStates = WaitStates;
385 bool Found = false;
386 for (MachineBasicBlock *Pred : MBB->predecessors()) {
387 if (!Visited.insert(Pred).second)
388 continue;
389
390 int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(),
391 WaitStates, IsExpired, Visited);
392
393 if (W == std::numeric_limits<int>::max())
394 continue;
395
396 MinWaitStates = Found ? std::min(MinWaitStates, W) : W;
397 if (IsExpired(nullptr, MinWaitStates))
398 return MinWaitStates;
399
400 Found = true;
401 }
402
403 if (Found)
404 return MinWaitStates;
405
406 return std::numeric_limits<int>::max();
407}
408
409static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
410 MachineInstr *MI,
411 IsExpiredFn IsExpired) {
412 DenseSet<const MachineBasicBlock *> Visited;
413 return getWaitStatesSince(IsHazard, MI->getParent(),
414 std::next(MI->getReverseIterator()),
415 0, IsExpired, Visited);
416}
417
418int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) {
419 if (IsHazardRecognizerMode) {
420 auto IsExpiredFn = [Limit] (MachineInstr *, int WaitStates) {
421 return WaitStates >= Limit;
422 };
423 return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn);
424 }
425
Nicolai Haehnle75c98c32017-09-01 16:56:32 +0000426 int WaitStates = 0;
Tom Stellard961811c2016-10-15 00:58:14 +0000427 for (MachineInstr *MI : EmittedInstrs) {
Nicolai Haehnle75c98c32017-09-01 16:56:32 +0000428 if (MI) {
429 if (IsHazard(MI))
430 return WaitStates;
431
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000432 if (MI->isInlineAsm())
Nicolai Haehnle75c98c32017-09-01 16:56:32 +0000433 continue;
434 }
Tom Stellard961811c2016-10-15 00:58:14 +0000435 ++WaitStates;
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000436
437 if (WaitStates >= Limit)
438 break;
Tom Stellard961811c2016-10-15 00:58:14 +0000439 }
440 return std::numeric_limits<int>::max();
441}
442
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000443int GCNHazardRecognizer::getWaitStatesSinceDef(unsigned Reg,
444 IsHazardFn IsHazardDef,
445 int Limit) {
Tom Stellardb133fbb2016-10-27 23:05:31 +0000446 const SIRegisterInfo *TRI = ST.getRegisterInfo();
447
448 auto IsHazardFn = [IsHazardDef, TRI, Reg] (MachineInstr *MI) {
449 return IsHazardDef(MI) && MI->modifiesRegister(Reg, TRI);
450 };
451
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000452 return getWaitStatesSince(IsHazardFn, Limit);
Tom Stellardb133fbb2016-10-27 23:05:31 +0000453}
454
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000455int GCNHazardRecognizer::getWaitStatesSinceSetReg(IsHazardFn IsHazard,
456 int Limit) {
Tom Stellardb133fbb2016-10-27 23:05:31 +0000457 auto IsHazardFn = [IsHazard] (MachineInstr *MI) {
458 return isSSetReg(MI->getOpcode()) && IsHazard(MI);
459 };
460
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000461 return getWaitStatesSince(IsHazardFn, Limit);
Tom Stellardb133fbb2016-10-27 23:05:31 +0000462}
463
Tom Stellardcb6ba622016-04-30 00:23:06 +0000464//===----------------------------------------------------------------------===//
465// No-op Hazard Detection
466//===----------------------------------------------------------------------===//
467
Matt Arsenault03c67d12017-11-17 04:18:24 +0000468static void addRegUnits(const SIRegisterInfo &TRI,
469 BitVector &BV, unsigned Reg) {
470 for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI)
471 BV.set(*RUI);
472}
473
474static void addRegsToSet(const SIRegisterInfo &TRI,
475 iterator_range<MachineInstr::const_mop_iterator> Ops,
476 BitVector &Set) {
Tom Stellard1f520e52016-05-02 17:39:06 +0000477 for (const MachineOperand &Op : Ops) {
478 if (Op.isReg())
Matt Arsenault03c67d12017-11-17 04:18:24 +0000479 addRegUnits(TRI, Set, Op.getReg());
Tom Stellard1f520e52016-05-02 17:39:06 +0000480 }
481}
482
Matt Arsenault03c67d12017-11-17 04:18:24 +0000483void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) {
484 // XXX: Do we need to worry about implicit operands
485 addRegsToSet(TRI, MI.defs(), ClauseDefs);
486 addRegsToSet(TRI, MI.uses(), ClauseUses);
487}
488
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000489int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
Matt Arsenault03c67d12017-11-17 04:18:24 +0000490 // SMEM soft clause are only present on VI+, and only matter if xnack is
491 // enabled.
492 if (!ST.isXNACKEnabled())
Tom Stellard1f520e52016-05-02 17:39:06 +0000493 return 0;
494
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000495 bool IsSMRD = TII.isSMRD(*MEM);
496
Matt Arsenault03c67d12017-11-17 04:18:24 +0000497 resetClause();
498
Tom Stellard1f520e52016-05-02 17:39:06 +0000499 // A soft-clause is any group of consecutive SMEM instructions. The
500 // instructions in this group may return out of order and/or may be
501 // replayed (i.e. the same instruction issued more than once).
502 //
Austin Kerbow8a3d3a92019-05-07 22:12:15 +0000503 // In order to handle these situations correctly we need to make sure that
504 // when a clause has more than one instruction, no instruction in the clause
505 // writes to a register that is read by another instruction in the clause
Tom Stellard1f520e52016-05-02 17:39:06 +0000506 // (including itself). If we encounter this situaion, we need to break the
507 // clause by inserting a non SMEM instruction.
508
Tom Stellard1f520e52016-05-02 17:39:06 +0000509 for (MachineInstr *MI : EmittedInstrs) {
Tom Stellard1f520e52016-05-02 17:39:06 +0000510 // When we hit a non-SMEM instruction then we have passed the start of the
511 // clause and we can stop.
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000512 if (!MI)
513 break;
514
515 if (IsSMRD != SIInstrInfo::isSMRD(*MI))
Tom Stellard1f520e52016-05-02 17:39:06 +0000516 break;
517
Matt Arsenault03c67d12017-11-17 04:18:24 +0000518 addClauseInst(*MI);
Tom Stellard1f520e52016-05-02 17:39:06 +0000519 }
520
Matt Arsenault03c67d12017-11-17 04:18:24 +0000521 if (ClauseDefs.none())
Tom Stellard1f520e52016-05-02 17:39:06 +0000522 return 0;
523
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000524 // We need to make sure not to put loads and stores in the same clause if they
525 // use the same address. For now, just start a new clause whenever we see a
526 // store.
527 if (MEM->mayStore())
Tom Stellard1f520e52016-05-02 17:39:06 +0000528 return 1;
529
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000530 addClauseInst(*MEM);
Tom Stellard1f520e52016-05-02 17:39:06 +0000531
532 // If the set of defs and uses intersect then we cannot add this instruction
533 // to the clause, so we have a hazard.
Matt Arsenault03c67d12017-11-17 04:18:24 +0000534 return ClauseDefs.anyCommon(ClauseUses) ? 1 : 0;
Tom Stellard1f520e52016-05-02 17:39:06 +0000535}
536
Tom Stellardcb6ba622016-04-30 00:23:06 +0000537int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
Tom Stellard1f520e52016-05-02 17:39:06 +0000538 int WaitStatesNeeded = 0;
539
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000540 WaitStatesNeeded = checkSoftClauseHazards(SMRD);
Tom Stellardcb6ba622016-04-30 00:23:06 +0000541
542 // This SMRD hazard only affects SI.
Matt Arsenaulte4c2e9b2019-06-19 23:54:58 +0000543 if (!ST.hasSMRDReadVALUDefHazard())
Tom Stellard1f520e52016-05-02 17:39:06 +0000544 return WaitStatesNeeded;
Tom Stellardcb6ba622016-04-30 00:23:06 +0000545
546 // A read of an SGPR by SMRD instruction requires 4 wait states when the
547 // SGPR was written by a VALU instruction.
548 int SmrdSgprWaitStates = 4;
Matt Arsenault59ece952017-03-17 21:36:28 +0000549 auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
Marek Olsak22322432017-10-26 14:43:02 +0000550 auto IsBufferHazardDefFn = [this] (MachineInstr *MI) { return TII.isSALU(*MI); };
551
Matt Arsenault4512d0a2017-11-17 04:18:26 +0000552 bool IsBufferSMRD = TII.isBufferSMRD(*SMRD);
Tom Stellardcb6ba622016-04-30 00:23:06 +0000553
554 for (const MachineOperand &Use : SMRD->uses()) {
555 if (!Use.isReg())
556 continue;
557 int WaitStatesNeededForUse =
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000558 SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
559 SmrdSgprWaitStates);
Tom Stellardcb6ba622016-04-30 00:23:06 +0000560 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
Marek Olsak22322432017-10-26 14:43:02 +0000561
562 // This fixes what appears to be undocumented hardware behavior in SI where
563 // s_mov writing a descriptor and s_buffer_load_dword reading the descriptor
564 // needs some number of nops in between. We don't know how many we need, but
565 // let's use 4. This wasn't discovered before probably because the only
566 // case when this happens is when we expand a 64-bit pointer into a full
567 // descriptor and use s_buffer_load_dword instead of s_load_dword, which was
568 // probably never encountered in the closed-source land.
569 if (IsBufferSMRD) {
570 int WaitStatesNeededForUse =
571 SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000572 IsBufferHazardDefFn,
573 SmrdSgprWaitStates);
Marek Olsak22322432017-10-26 14:43:02 +0000574 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
575 }
Tom Stellardcb6ba622016-04-30 00:23:06 +0000576 }
Marek Olsak22322432017-10-26 14:43:02 +0000577
Tom Stellardcb6ba622016-04-30 00:23:06 +0000578 return WaitStatesNeeded;
579}
580
581int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
Matt Arsenaulte4c2e9b2019-06-19 23:54:58 +0000582 if (!ST.hasVMEMReadSGPRVALUDefHazard())
Tom Stellardcb6ba622016-04-30 00:23:06 +0000583 return 0;
584
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000585 int WaitStatesNeeded = checkSoftClauseHazards(VMEM);
Tom Stellardcb6ba622016-04-30 00:23:06 +0000586
587 // A read of an SGPR by a VMEM instruction requires 5 wait states when the
588 // SGPR was written by a VALU Instruction.
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000589 const int VmemSgprWaitStates = 5;
590 auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
Tom Stellardcb6ba622016-04-30 00:23:06 +0000591 for (const MachineOperand &Use : VMEM->uses()) {
592 if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
593 continue;
594
595 int WaitStatesNeededForUse =
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000596 VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
597 VmemSgprWaitStates);
Tom Stellardcb6ba622016-04-30 00:23:06 +0000598 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
599 }
600 return WaitStatesNeeded;
601}
Tom Stellarda27007e2016-05-02 16:23:09 +0000602
603int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000604 const SIRegisterInfo *TRI = ST.getRegisterInfo();
Connor Abbott00755362017-08-04 01:09:43 +0000605 const SIInstrInfo *TII = ST.getInstrInfo();
Tom Stellarda27007e2016-05-02 16:23:09 +0000606
Connor Abbott00755362017-08-04 01:09:43 +0000607 // Check for DPP VGPR read after VALU VGPR write and EXEC write.
Tom Stellarda27007e2016-05-02 16:23:09 +0000608 int DppVgprWaitStates = 2;
Connor Abbott00755362017-08-04 01:09:43 +0000609 int DppExecWaitStates = 5;
Tom Stellarda27007e2016-05-02 16:23:09 +0000610 int WaitStatesNeeded = 0;
Connor Abbott00755362017-08-04 01:09:43 +0000611 auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
Tom Stellarda27007e2016-05-02 16:23:09 +0000612
613 for (const MachineOperand &Use : DPP->uses()) {
614 if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
615 continue;
616 int WaitStatesNeededForUse =
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000617 DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
618 [](MachineInstr *) { return true; },
619 DppVgprWaitStates);
Tom Stellarda27007e2016-05-02 16:23:09 +0000620 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
621 }
622
Connor Abbott00755362017-08-04 01:09:43 +0000623 WaitStatesNeeded = std::max(
624 WaitStatesNeeded,
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000625 DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn,
626 DppExecWaitStates));
Connor Abbott00755362017-08-04 01:09:43 +0000627
Tom Stellarda27007e2016-05-02 16:23:09 +0000628 return WaitStatesNeeded;
629}
Tom Stellard5ab61542016-10-07 23:42:48 +0000630
631int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) {
632 const SIInstrInfo *TII = ST.getInstrInfo();
633
634 // v_div_fmas requires 4 wait states after a write to vcc from a VALU
635 // instruction.
636 const int DivFMasWaitStates = 4;
637 auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000638 int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn,
639 DivFMasWaitStates);
Tom Stellard5ab61542016-10-07 23:42:48 +0000640
641 return DivFMasWaitStates - WaitStatesNeeded;
642}
Tom Stellard961811c2016-10-15 00:58:14 +0000643
644int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) {
645 const SIInstrInfo *TII = ST.getInstrInfo();
646 unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr);
647
648 const int GetRegWaitStates = 2;
649 auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) {
650 return GetRegHWReg == getHWReg(TII, *MI);
651 };
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000652 int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, GetRegWaitStates);
Tom Stellard961811c2016-10-15 00:58:14 +0000653
654 return GetRegWaitStates - WaitStatesNeeded;
655}
Tom Stellard30d30822016-10-27 20:39:09 +0000656
657int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) {
658 const SIInstrInfo *TII = ST.getInstrInfo();
659 unsigned HWReg = getHWReg(TII, *SetRegInstr);
660
Matt Arsenaulte4c2e9b2019-06-19 23:54:58 +0000661 const int SetRegWaitStates = ST.getSetRegWaitStates();
Tom Stellard30d30822016-10-27 20:39:09 +0000662 auto IsHazardFn = [TII, HWReg] (MachineInstr *MI) {
663 return HWReg == getHWReg(TII, *MI);
664 };
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000665 int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, SetRegWaitStates);
Tom Stellard30d30822016-10-27 20:39:09 +0000666 return SetRegWaitStates - WaitStatesNeeded;
667}
Tom Stellardb133fbb2016-10-27 23:05:31 +0000668
669int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) {
670 if (!MI.mayStore())
671 return -1;
672
673 const SIInstrInfo *TII = ST.getInstrInfo();
674 unsigned Opcode = MI.getOpcode();
675 const MCInstrDesc &Desc = MI.getDesc();
676
677 int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
678 int VDataRCID = -1;
679 if (VDataIdx != -1)
680 VDataRCID = Desc.OpInfo[VDataIdx].RegClass;
681
682 if (TII->isMUBUF(MI) || TII->isMTBUF(MI)) {
Jan Veselye8cc3952016-11-15 23:55:15 +0000683 // There is no hazard if the instruction does not use vector regs
684 // (like wbinvl1)
685 if (VDataIdx == -1)
686 return -1;
Tom Stellardb133fbb2016-10-27 23:05:31 +0000687 // For MUBUF/MTBUF instructions this hazard only exists if the
688 // instruction is not using a register in the soffset field.
689 const MachineOperand *SOffset =
690 TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
691 // If we have no soffset operand, then assume this field has been
692 // hardcoded to zero.
693 if (AMDGPU::getRegBitWidth(VDataRCID) > 64 &&
694 (!SOffset || !SOffset->isReg()))
695 return VDataIdx;
696 }
697
698 // MIMG instructions create a hazard if they don't use a 256-bit T# and
699 // the store size is greater than 8 bytes and they have more than two bits
700 // of their dmask set.
701 // All our MIMG definitions use a 256-bit T#, so we can skip checking for them.
702 if (TII->isMIMG(MI)) {
703 int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc);
704 assert(SRsrcIdx != -1 &&
705 AMDGPU::getRegBitWidth(Desc.OpInfo[SRsrcIdx].RegClass) == 256);
Tom Stellard6b9c1be2016-10-27 23:28:03 +0000706 (void)SRsrcIdx;
Tom Stellardb133fbb2016-10-27 23:05:31 +0000707 }
708
709 if (TII->isFLAT(MI)) {
Matt Arsenault97279a82016-11-29 19:30:44 +0000710 int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
Tom Stellardb133fbb2016-10-27 23:05:31 +0000711 if (AMDGPU::getRegBitWidth(Desc.OpInfo[DataIdx].RegClass) > 64)
712 return DataIdx;
713 }
714
715 return -1;
716}
717
Mark Searlesd29f24a2017-12-07 20:34:25 +0000718int GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
719 const MachineRegisterInfo &MRI) {
720 // Helper to check for the hazard where VMEM instructions that store more than
721 // 8 bytes can have there store data over written by the next instruction.
722 const SIRegisterInfo *TRI = ST.getRegisterInfo();
723
724 const int VALUWaitStates = 1;
725 int WaitStatesNeeded = 0;
726
727 if (!TRI->isVGPR(MRI, Def.getReg()))
728 return WaitStatesNeeded;
729 unsigned Reg = Def.getReg();
730 auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) {
731 int DataIdx = createsVALUHazard(*MI);
732 return DataIdx >= 0 &&
733 TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg);
734 };
735 int WaitStatesNeededForDef =
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000736 VALUWaitStates - getWaitStatesSince(IsHazardFn, VALUWaitStates);
Mark Searlesd29f24a2017-12-07 20:34:25 +0000737 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
738
739 return WaitStatesNeeded;
740}
741
Tom Stellardb133fbb2016-10-27 23:05:31 +0000742int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
743 // This checks for the hazard where VMEM instructions that store more than
744 // 8 bytes can have there store data over written by the next instruction.
745 if (!ST.has12DWordStoreHazard())
746 return 0;
747
Mark Searlesd29f24a2017-12-07 20:34:25 +0000748 const MachineRegisterInfo &MRI = MF.getRegInfo();
Tom Stellardb133fbb2016-10-27 23:05:31 +0000749 int WaitStatesNeeded = 0;
750
751 for (const MachineOperand &Def : VALU->defs()) {
Mark Searlesd29f24a2017-12-07 20:34:25 +0000752 WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI));
Tom Stellardb133fbb2016-10-27 23:05:31 +0000753 }
Mark Searlesd29f24a2017-12-07 20:34:25 +0000754
755 return WaitStatesNeeded;
756}
757
758int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) {
759 // This checks for hazards associated with inline asm statements.
760 // Since inline asms can contain just about anything, we use this
761 // to call/leverage other check*Hazard routines. Note that
762 // this function doesn't attempt to address all possible inline asm
763 // hazards (good luck), but is a collection of what has been
764 // problematic thus far.
765
766 // see checkVALUHazards()
767 if (!ST.has12DWordStoreHazard())
768 return 0;
769
770 const MachineRegisterInfo &MRI = MF.getRegInfo();
771 int WaitStatesNeeded = 0;
772
773 for (unsigned I = InlineAsm::MIOp_FirstOperand, E = IA->getNumOperands();
774 I != E; ++I) {
775 const MachineOperand &Op = IA->getOperand(I);
776 if (Op.isReg() && Op.isDef()) {
777 WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI));
778 }
779 }
780
Tom Stellardb133fbb2016-10-27 23:05:31 +0000781 return WaitStatesNeeded;
782}
Tom Stellard04051b52016-10-27 23:42:29 +0000783
784int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) {
785 const SIInstrInfo *TII = ST.getInstrInfo();
786 const SIRegisterInfo *TRI = ST.getRegisterInfo();
Mark Searlesd29f24a2017-12-07 20:34:25 +0000787 const MachineRegisterInfo &MRI = MF.getRegInfo();
Tom Stellard04051b52016-10-27 23:42:29 +0000788
789 const MachineOperand *LaneSelectOp =
790 TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1);
791
792 if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg()))
793 return 0;
794
795 unsigned LaneSelectReg = LaneSelectOp->getReg();
796 auto IsHazardFn = [TII] (MachineInstr *MI) {
797 return TII->isVALU(*MI);
798 };
799
800 const int RWLaneWaitStates = 4;
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000801 int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn,
802 RWLaneWaitStates);
Tom Stellard04051b52016-10-27 23:42:29 +0000803 return RWLaneWaitStates - WaitStatesSince;
804}
Tom Stellardaea899e2016-10-27 23:50:21 +0000805
806int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
Matt Arsenaulte4c2e9b2019-06-19 23:54:58 +0000807 if (!ST.hasRFEHazards())
Tom Stellardaea899e2016-10-27 23:50:21 +0000808 return 0;
809
810 const SIInstrInfo *TII = ST.getInstrInfo();
811
812 const int RFEWaitStates = 1;
813
814 auto IsHazardFn = [TII] (MachineInstr *MI) {
815 return getHWReg(TII, *MI) == AMDGPU::Hwreg::ID_TRAPSTS;
816 };
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000817 int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, RFEWaitStates);
Tom Stellardaea899e2016-10-27 23:50:21 +0000818 return RFEWaitStates - WaitStatesNeeded;
819}
Matt Arsenaulte823d922017-02-18 18:29:53 +0000820
821int GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) {
Shiva Chen801bf7e2018-05-09 02:42:00 +0000822 if (MI->isDebugInstr())
Matt Arsenaulte823d922017-02-18 18:29:53 +0000823 return 0;
824
825 const SIRegisterInfo *TRI = ST.getRegisterInfo();
826 if (!ST.hasSMovFedHazard())
827 return 0;
828
829 // Check for any instruction reading an SGPR after a write from
830 // s_mov_fed_b32.
831 int MovFedWaitStates = 1;
832 int WaitStatesNeeded = 0;
833
834 for (const MachineOperand &Use : MI->uses()) {
835 if (!Use.isReg() || TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
836 continue;
837 auto IsHazardFn = [] (MachineInstr *MI) {
838 return MI->getOpcode() == AMDGPU::S_MOV_FED_B32;
839 };
840 int WaitStatesNeededForUse =
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000841 MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn,
842 MovFedWaitStates);
Matt Arsenaulte823d922017-02-18 18:29:53 +0000843 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
844 }
845
846 return WaitStatesNeeded;
847}
848
849int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
Matt Arsenaulte823d922017-02-18 18:29:53 +0000850 const SIInstrInfo *TII = ST.getInstrInfo();
Matt Arsenaulta41351e2017-11-17 21:35:32 +0000851 const int SMovRelWaitStates = 1;
Matt Arsenaulte823d922017-02-18 18:29:53 +0000852 auto IsHazardFn = [TII] (MachineInstr *MI) {
853 return TII->isSALU(*MI);
854 };
Stanislav Mekhanoshinf92ed692019-01-21 19:11:26 +0000855 return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn,
856 SMovRelWaitStates);
Matt Arsenaulte823d922017-02-18 18:29:53 +0000857}
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000858
Austin Kerbow8a3d3a92019-05-07 22:12:15 +0000859void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
860 fixVMEMtoScalarWriteHazards(MI);
Stanislav Mekhanoshin5f581c92019-06-12 17:52:51 +0000861 fixVcmpxPermlaneHazards(MI);
Austin Kerbow8a3d3a92019-05-07 22:12:15 +0000862 fixSMEMtoVectorWriteHazards(MI);
863 fixVcmpxExecWARHazard(MI);
864 fixLdsBranchVmemWARHazard(MI);
865}
866
Stanislav Mekhanoshin5f581c92019-06-12 17:52:51 +0000867bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
868 if (!ST.hasVcmpxPermlaneHazard() || !isPermlane(*MI))
869 return false;
870
871 const SIInstrInfo *TII = ST.getInstrInfo();
872 auto IsHazardFn = [TII] (MachineInstr *MI) {
873 return TII->isVOPC(*MI);
874 };
875
876 auto IsExpiredFn = [] (MachineInstr *MI, int) {
877 if (!MI)
878 return false;
879 unsigned Opc = MI->getOpcode();
880 return SIInstrInfo::isVALU(*MI) &&
881 Opc != AMDGPU::V_NOP_e32 &&
882 Opc != AMDGPU::V_NOP_e64 &&
883 Opc != AMDGPU::V_NOP_sdwa;
884 };
885
886 if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
887 std::numeric_limits<int>::max())
888 return false;
889
890 // V_NOP will be discarded by SQ.
891 // Use V_MOB_B32 v?, v?. Register must be alive so use src0 of V_PERMLANE*
892 // which is always a VGPR and available.
893 auto *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
894 unsigned Reg = Src0->getReg();
895 bool IsUndef = Src0->isUndef();
896 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
897 TII->get(AMDGPU::V_MOV_B32_e32))
898 .addReg(Reg, RegState::Define | (IsUndef ? RegState::Dead : 0))
899 .addReg(Reg, IsUndef ? RegState::Undef : RegState::Kill);
900
901 return true;
902}
903
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000904bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
905 if (!ST.hasVMEMtoScalarWriteHazard())
906 return false;
907
908 if (!SIInstrInfo::isSALU(*MI) && !SIInstrInfo::isSMRD(*MI))
909 return false;
910
911 if (MI->getNumDefs() == 0)
912 return false;
913
914 const SIRegisterInfo *TRI = ST.getRegisterInfo();
915
916 auto IsHazardFn = [TRI, MI] (MachineInstr *I) {
917 if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isDS(*I) &&
918 !SIInstrInfo::isFLAT(*I))
919 return false;
920
921 for (const MachineOperand &Def : MI->defs()) {
922 MachineOperand *Op = I->findRegisterUseOperand(Def.getReg(), false, TRI);
923 if (!Op || (Op->isImplicit() && Op->getReg() == AMDGPU::EXEC))
924 continue;
925 return true;
926 }
927 return false;
928 };
929
930 auto IsExpiredFn = [] (MachineInstr *MI, int) {
931 return MI && (SIInstrInfo::isVALU(*MI) ||
932 (MI->getOpcode() == AMDGPU::S_WAITCNT &&
933 !MI->getOperand(0).getImm()));
934 };
935
936 if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
937 std::numeric_limits<int>::max())
938 return false;
939
940 const SIInstrInfo *TII = ST.getInstrInfo();
941 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_NOP_e32));
942 return true;
943}
944
945bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
946 if (!ST.hasSMEMtoVectorWriteHazard())
947 return false;
948
949 if (!SIInstrInfo::isVALU(*MI))
950 return false;
951
952 unsigned SDSTName;
953 switch (MI->getOpcode()) {
954 case AMDGPU::V_READLANE_B32:
955 case AMDGPU::V_READFIRSTLANE_B32:
956 SDSTName = AMDGPU::OpName::vdst;
957 break;
958 default:
959 SDSTName = AMDGPU::OpName::sdst;
960 break;
961 }
962
963 const SIInstrInfo *TII = ST.getInstrInfo();
964 const SIRegisterInfo *TRI = ST.getRegisterInfo();
Carl Ritson34e95ce2019-05-20 07:20:12 +0000965 const AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(ST.getCPU());
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000966 const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName);
967 if (!SDST) {
Stanislav Mekhanoshin5ddd5642019-05-04 06:40:20 +0000968 for (const auto &MO : MI->implicit_operands()) {
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000969 if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) {
970 SDST = &MO;
971 break;
972 }
973 }
974 }
975
976 if (!SDST)
977 return false;
978
979 const unsigned SDSTReg = SDST->getReg();
980 auto IsHazardFn = [SDSTReg, TRI] (MachineInstr *I) {
981 return SIInstrInfo::isSMRD(*I) && I->readsRegister(SDSTReg, TRI);
982 };
983
Carl Ritson34e95ce2019-05-20 07:20:12 +0000984 auto IsExpiredFn = [TII, IV] (MachineInstr *MI, int) {
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000985 if (MI) {
986 if (TII->isSALU(*MI)) {
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000987 switch (MI->getOpcode()) {
988 case AMDGPU::S_SETVSKIP:
989 case AMDGPU::S_VERSION:
990 case AMDGPU::S_WAITCNT_VSCNT:
991 case AMDGPU::S_WAITCNT_VMCNT:
992 case AMDGPU::S_WAITCNT_EXPCNT:
Carl Ritson34e95ce2019-05-20 07:20:12 +0000993 // These instructions cannot not mitigate the hazard.
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +0000994 return false;
Carl Ritson34e95ce2019-05-20 07:20:12 +0000995 case AMDGPU::S_WAITCNT_LGKMCNT:
996 // Reducing lgkmcnt count to 0 always mitigates the hazard.
997 return (MI->getOperand(1).getImm() == 0) &&
998 (MI->getOperand(0).getReg() == AMDGPU::SGPR_NULL);
999 case AMDGPU::S_WAITCNT: {
1000 const int64_t Imm = MI->getOperand(0).getImm();
1001 AMDGPU::Waitcnt Decoded = AMDGPU::decodeWaitcnt(IV, Imm);
1002 return (Decoded.LgkmCnt == 0);
1003 }
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +00001004 default:
Carl Ritson34e95ce2019-05-20 07:20:12 +00001005 // SOPP instructions cannot mitigate the hazard.
1006 if (TII->isSOPP(*MI))
1007 return false;
1008 // At this point the SALU can be assumed to mitigate the hazard
1009 // because either:
1010 // (a) it is independent of the at risk SMEM (breaking chain),
1011 // or
1012 // (b) it is dependent on the SMEM, in which case an appropriate
1013 // s_waitcnt lgkmcnt _must_ exist between it and the at risk
1014 // SMEM instruction.
Stanislav Mekhanoshin51d14152019-05-04 04:30:57 +00001015 return true;
1016 }
1017 }
1018 }
1019 return false;
1020 };
1021
1022 if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1023 std::numeric_limits<int>::max())
1024 return false;
1025
1026 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1027 TII->get(AMDGPU::S_MOV_B32), AMDGPU::SGPR_NULL)
1028 .addImm(0);
1029 return true;
1030}
1031
1032bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) {
1033 if (!ST.hasVcmpxExecWARHazard() || !SIInstrInfo::isVALU(*MI))
1034 return false;
1035
1036 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1037 if (!MI->modifiesRegister(AMDGPU::EXEC, TRI))
1038 return false;
1039
1040 auto IsHazardFn = [TRI] (MachineInstr *I) {
1041 if (SIInstrInfo::isVALU(*I))
1042 return false;
1043 return I->readsRegister(AMDGPU::EXEC, TRI);
1044 };
1045
1046 const SIInstrInfo *TII = ST.getInstrInfo();
1047 auto IsExpiredFn = [TII, TRI] (MachineInstr *MI, int) {
1048 if (!MI)
1049 return false;
1050 if (SIInstrInfo::isVALU(*MI)) {
1051 if (TII->getNamedOperand(*MI, AMDGPU::OpName::sdst))
1052 return true;
1053 for (auto MO : MI->implicit_operands())
1054 if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg())))
1055 return true;
1056 }
1057 if (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
1058 (MI->getOperand(0).getImm() & 0xfffe) == 0xfffe)
1059 return true;
1060 return false;
1061 };
1062
1063 if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1064 std::numeric_limits<int>::max())
1065 return false;
1066
1067 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1068 TII->get(AMDGPU::S_WAITCNT_DEPCTR))
1069 .addImm(0xfffe);
1070 return true;
1071}
1072
1073bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
1074 if (!ST.hasLdsBranchVmemWARHazard())
1075 return false;
1076
1077 auto IsHazardInst = [] (const MachineInstr *MI) {
1078 if (SIInstrInfo::isDS(*MI))
1079 return 1;
1080 if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isSegmentSpecificFLAT(*MI))
1081 return 2;
1082 return 0;
1083 };
1084
1085 auto InstType = IsHazardInst(MI);
1086 if (!InstType)
1087 return false;
1088
1089 auto IsExpiredFn = [&IsHazardInst] (MachineInstr *I, int) {
1090 return I && (IsHazardInst(I) ||
1091 (I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
1092 I->getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
1093 !I->getOperand(1).getImm()));
1094 };
1095
1096 auto IsHazardFn = [InstType, &IsHazardInst] (MachineInstr *I) {
1097 if (!I->isBranch())
1098 return false;
1099
1100 auto IsHazardFn = [InstType, IsHazardInst] (MachineInstr *I) {
1101 auto InstType2 = IsHazardInst(I);
1102 return InstType2 && InstType != InstType2;
1103 };
1104
1105 auto IsExpiredFn = [InstType, &IsHazardInst] (MachineInstr *I, int) {
1106 if (!I)
1107 return false;
1108
1109 auto InstType2 = IsHazardInst(I);
1110 if (InstType == InstType2)
1111 return true;
1112
1113 return I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
1114 I->getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
1115 !I->getOperand(1).getImm();
1116 };
1117
1118 return ::getWaitStatesSince(IsHazardFn, I, IsExpiredFn) !=
1119 std::numeric_limits<int>::max();
1120 };
1121
1122 if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1123 std::numeric_limits<int>::max())
1124 return false;
1125
1126 const SIInstrInfo *TII = ST.getInstrInfo();
1127 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1128 TII->get(AMDGPU::S_WAITCNT_VSCNT))
1129 .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
1130 .addImm(0);
1131
1132 return true;
1133}
1134
1135int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) {
1136 int NSAtoVMEMWaitStates = 1;
1137
1138 if (!ST.hasNSAtoVMEMBug())
1139 return 0;
1140
1141 if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isMTBUF(*MI))
1142 return 0;
1143
1144 const SIInstrInfo *TII = ST.getInstrInfo();
1145 const auto *Offset = TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
1146 if (!Offset || (Offset->getImm() & 6) == 0)
1147 return 0;
1148
1149 auto IsHazardFn = [TII] (MachineInstr *I) {
1150 if (!SIInstrInfo::isMIMG(*I))
1151 return false;
1152 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(I->getOpcode());
1153 return Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA &&
1154 TII->getInstSizeInBytes(*I) >= 16;
1155 };
1156
1157 return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1);
1158}
Stanislav Mekhanoshinbdf7f812019-06-21 16:30:14 +00001159
1160int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
1161 int FPAtomicToDenormModeWaitStates = 3;
1162
1163 if (MI->getOpcode() != AMDGPU::S_DENORM_MODE)
1164 return 0;
1165
1166 auto IsHazardFn = [] (MachineInstr *I) {
1167 if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isFLAT(*I))
1168 return false;
1169 return SIInstrInfo::isFPAtomic(*I);
1170 };
1171
1172 auto IsExpiredFn = [] (MachineInstr *MI, int WaitStates) {
1173 if (WaitStates >= 3 || SIInstrInfo::isVALU(*MI))
1174 return true;
1175
1176 switch (MI->getOpcode()) {
1177 case AMDGPU::S_WAITCNT:
1178 case AMDGPU::S_WAITCNT_VSCNT:
1179 case AMDGPU::S_WAITCNT_VMCNT:
1180 case AMDGPU::S_WAITCNT_EXPCNT:
1181 case AMDGPU::S_WAITCNT_LGKMCNT:
1182 case AMDGPU::S_WAITCNT_IDLE:
1183 return true;
1184 default:
1185 break;
1186 }
1187
1188 return false;
1189 };
1190
1191
1192 return FPAtomicToDenormModeWaitStates -
1193 ::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn);
1194}
Stanislav Mekhanoshin7d2019b2019-07-11 21:30:34 +00001195
1196int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
1197 assert(SIInstrInfo::isMAI(*MI));
1198
1199 int WaitStatesNeeded = 0;
1200 unsigned Opc = MI->getOpcode();
1201
1202 auto IsVALUFn = [] (MachineInstr *MI) {
1203 return SIInstrInfo::isVALU(*MI);
1204 };
1205
1206 if (Opc != AMDGPU::V_ACCVGPR_READ_B32) { // MFMA or v_accvgpr_write
1207 const int LegacyVALUWritesVGPRWaitStates = 2;
1208 const int VALUWritesExecWaitStates = 4;
1209 const int MaxWaitStates = 4;
1210
1211 int WaitStatesNeededForUse = VALUWritesExecWaitStates -
1212 getWaitStatesSinceDef(AMDGPU::EXEC, IsVALUFn, MaxWaitStates);
1213 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1214
1215 if (WaitStatesNeeded < MaxWaitStates) {
1216 for (const MachineOperand &Use : MI->explicit_uses()) {
1217 const int MaxWaitStates = 2;
1218
1219 if (!Use.isReg() || !TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
1220 continue;
1221
1222 int WaitStatesNeededForUse = LegacyVALUWritesVGPRWaitStates -
1223 getWaitStatesSinceDef(Use.getReg(), IsVALUFn, MaxWaitStates);
1224 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1225
1226 if (WaitStatesNeeded == MaxWaitStates)
1227 break;
1228 }
1229 }
1230 }
1231
1232 auto IsMFMAFn = [] (MachineInstr *MI) {
1233 return SIInstrInfo::isMAI(*MI) &&
1234 MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32 &&
1235 MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32;
1236 };
1237
1238 for (const MachineOperand &Op : MI->explicit_operands()) {
1239 if (!Op.isReg() || !TRI.isAGPR(MF.getRegInfo(), Op.getReg()))
1240 continue;
1241
1242 if (Op.isDef() && Opc != AMDGPU::V_ACCVGPR_WRITE_B32)
1243 continue;
1244
1245 const int MFMAWritesAGPROverlappedSrcABWaitStates = 4;
1246 const int MFMAWritesAGPROverlappedSrcCWaitStates = 2;
1247 const int MFMA4x4WritesAGPRAccVgprReadWaitStates = 4;
1248 const int MFMA16x16WritesAGPRAccVgprReadWaitStates = 10;
1249 const int MFMA32x32WritesAGPRAccVgprReadWaitStates = 18;
1250 const int MFMA4x4WritesAGPRAccVgprWriteWaitStates = 1;
1251 const int MFMA16x16WritesAGPRAccVgprWriteWaitStates = 7;
1252 const int MFMA32x32WritesAGPRAccVgprWriteWaitStates = 15;
1253 const int MaxWaitStates = 18;
1254 unsigned Reg = Op.getReg();
1255 unsigned HazardDefLatency = 0;
1256
1257 auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &HazardDefLatency, this]
1258 (MachineInstr *MI) {
1259 if (!IsMFMAFn(MI))
1260 return false;
1261 unsigned DstReg = MI->getOperand(0).getReg();
1262 if (DstReg == Reg)
1263 return false;
1264 HazardDefLatency = std::max(HazardDefLatency,
1265 TSchedModel.computeInstrLatency(MI));
1266 return TRI.regsOverlap(DstReg, Reg);
1267 };
1268
1269 int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn,
1270 MaxWaitStates);
1271 int NeedWaitStates = MFMAWritesAGPROverlappedSrcABWaitStates;
1272 int SrcCIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
1273 int OpNo = MI->getOperandNo(&Op);
1274 if (OpNo == SrcCIdx) {
1275 NeedWaitStates = MFMAWritesAGPROverlappedSrcCWaitStates;
1276 } else if (Opc == AMDGPU::V_ACCVGPR_READ_B32) {
1277 switch (HazardDefLatency) {
1278 case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprReadWaitStates;
1279 break;
1280 case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprReadWaitStates;
1281 break;
1282 case 16: LLVM_FALLTHROUGH;
1283 default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprReadWaitStates;
1284 break;
1285 }
1286 } else if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) {
1287 switch (HazardDefLatency) {
1288 case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprWriteWaitStates;
1289 break;
1290 case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprWriteWaitStates;
1291 break;
1292 case 16: LLVM_FALLTHROUGH;
1293 default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprWriteWaitStates;
1294 break;
1295 }
1296 }
1297
1298 int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
1299 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1300
1301 if (WaitStatesNeeded == MaxWaitStates)
1302 return WaitStatesNeeded; // Early exit.
1303
1304 auto IsAccVgprWriteFn = [Reg, this] (MachineInstr *MI) {
1305 if (MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32)
1306 return false;
1307 unsigned DstReg = MI->getOperand(0).getReg();
1308 return TRI.regsOverlap(Reg, DstReg);
1309 };
1310
1311 const int AccVGPRWriteMFMAReadSrcCWaitStates = 1;
1312 const int AccVGPRWriteMFMAReadSrcABWaitStates = 3;
1313 const int AccVGPRWriteAccVgprReadWaitStates = 3;
1314 NeedWaitStates = AccVGPRWriteMFMAReadSrcABWaitStates;
1315 if (OpNo == SrcCIdx)
1316 NeedWaitStates = AccVGPRWriteMFMAReadSrcCWaitStates;
1317 else if (Opc == AMDGPU::V_ACCVGPR_READ_B32)
1318 NeedWaitStates = AccVGPRWriteAccVgprReadWaitStates;
1319
1320 WaitStatesNeededForUse = NeedWaitStates -
1321 getWaitStatesSinceDef(Reg, IsAccVgprWriteFn, MaxWaitStates);
1322 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1323
1324 if (WaitStatesNeeded == MaxWaitStates)
1325 return WaitStatesNeeded; // Early exit.
1326 }
1327
1328 if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) {
1329 const int MFMA4x4ReadSrcCAccVgprWriteWaitStates = 0;
1330 const int MFMA16x16ReadSrcCAccVgprWriteWaitStates = 5;
1331 const int MFMA32x32ReadSrcCAccVgprWriteWaitStates = 13;
1332 const int MaxWaitStates = 13;
1333 unsigned DstReg = MI->getOperand(0).getReg();
1334 unsigned HazardDefLatency = 0;
1335
1336 auto IsSrcCMFMAFn = [DstReg, &IsMFMAFn, &HazardDefLatency, this]
1337 (MachineInstr *MI) {
1338 if (!IsMFMAFn(MI))
1339 return false;
1340 unsigned Reg = TII.getNamedOperand(*MI, AMDGPU::OpName::src2)->getReg();
1341 HazardDefLatency = std::max(HazardDefLatency,
1342 TSchedModel.computeInstrLatency(MI));
1343 return TRI.regsOverlap(Reg, DstReg);
1344 };
1345
1346 int WaitStatesSince = getWaitStatesSince(IsSrcCMFMAFn, MaxWaitStates);
1347 int NeedWaitStates;
1348 switch (HazardDefLatency) {
1349 case 2: NeedWaitStates = MFMA4x4ReadSrcCAccVgprWriteWaitStates;
1350 break;
1351 case 8: NeedWaitStates = MFMA16x16ReadSrcCAccVgprWriteWaitStates;
1352 break;
1353 case 16: LLVM_FALLTHROUGH;
1354 default: NeedWaitStates = MFMA32x32ReadSrcCAccVgprWriteWaitStates;
1355 break;
1356 }
1357
1358 int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSince;
1359 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1360 }
1361
1362 return WaitStatesNeeded;
1363}
1364
1365int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
1366 if (!ST.hasMAIInsts())
1367 return 0;
1368
1369 int WaitStatesNeeded = 0;
1370
1371 auto IsAccVgprReadFn = [] (MachineInstr *MI) {
1372 return MI->getOpcode() == AMDGPU::V_ACCVGPR_READ_B32;
1373 };
1374
1375 for (const MachineOperand &Op : MI->explicit_uses()) {
1376 if (!Op.isReg() || !TRI.isVGPR(MF.getRegInfo(), Op.getReg()))
1377 continue;
1378
1379 unsigned Reg = Op.getReg();
1380
1381 const int AccVgprReadLdStWaitStates = 2;
1382 const int VALUWriteAccVgprReadLdStDepVALUWaitStates = 1;
1383 const int MaxWaitStates = 2;
1384
1385 int WaitStatesNeededForUse = AccVgprReadLdStWaitStates -
1386 getWaitStatesSinceDef(Reg, IsAccVgprReadFn, MaxWaitStates);
1387 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1388
1389 if (WaitStatesNeeded == MaxWaitStates)
1390 return WaitStatesNeeded; // Early exit.
1391
1392 auto IsVALUAccVgprReadCheckFn = [Reg, this] (MachineInstr *MI) {
1393 if (MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32)
1394 return false;
1395 auto IsVALUFn = [] (MachineInstr *MI) {
1396 return SIInstrInfo::isVALU(*MI) && !SIInstrInfo::isMAI(*MI);
1397 };
1398 return getWaitStatesSinceDef(Reg, IsVALUFn, 2 /*MaxWaitStates*/) <
1399 std::numeric_limits<int>::max();
1400 };
1401
1402 WaitStatesNeededForUse = VALUWriteAccVgprReadLdStDepVALUWaitStates -
1403 getWaitStatesSince(IsVALUAccVgprReadCheckFn, MaxWaitStates);
1404 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1405 }
1406
1407 return WaitStatesNeeded;
1408}