blob: 85d497ec2cede9fc31b7f42dbdec1cfb2d78d677 [file] [log] [blame]
Stanislav Mekhanoshin739174c2018-05-31 20:13:51 +00001//===-- SIFormMemoryClauses.cpp -------------------------------------------===//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Stanislav Mekhanoshin739174c2018-05-31 20:13:51 +00006//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This pass creates bundles of SMEM and VMEM instructions forming memory
11/// clauses if XNACK is enabled. Def operands of clauses are marked as early
12/// clobber to make sure we will not override any source within a clause.
13///
14//===----------------------------------------------------------------------===//
15
16#include "AMDGPU.h"
17#include "AMDGPUSubtarget.h"
18#include "GCNRegPressure.h"
19#include "SIInstrInfo.h"
20#include "SIMachineFunctionInfo.h"
21#include "SIRegisterInfo.h"
22#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/CodeGen/LiveIntervals.h"
25#include "llvm/CodeGen/MachineFunctionPass.h"
26
27using namespace llvm;
28
29#define DEBUG_TYPE "si-form-memory-clauses"
30
31// Clauses longer then 15 instructions would overflow one of the counters
32// and stall. They can stall even earlier if there are outstanding counters.
33static cl::opt<unsigned>
34MaxClause("amdgpu-max-memory-clause", cl::Hidden, cl::init(15),
35 cl::desc("Maximum length of a memory clause, instructions"));
36
37namespace {
38
39class SIFormMemoryClauses : public MachineFunctionPass {
40 typedef DenseMap<unsigned, std::pair<unsigned, LaneBitmask>> RegUse;
41
42public:
43 static char ID;
44
45public:
46 SIFormMemoryClauses() : MachineFunctionPass(ID) {
47 initializeSIFormMemoryClausesPass(*PassRegistry::getPassRegistry());
48 }
49
50 bool runOnMachineFunction(MachineFunction &MF) override;
51
52 StringRef getPassName() const override {
53 return "SI Form memory clauses";
54 }
55
56 void getAnalysisUsage(AnalysisUsage &AU) const override {
57 AU.addRequired<LiveIntervals>();
58 AU.setPreservesAll();
59 MachineFunctionPass::getAnalysisUsage(AU);
60 }
61
62private:
63 template <typename Callable>
64 void forAllLanes(unsigned Reg, LaneBitmask LaneMask, Callable Func) const;
65
66 bool canBundle(const MachineInstr &MI, RegUse &Defs, RegUse &Uses) const;
67 bool checkPressure(const MachineInstr &MI, GCNDownwardRPTracker &RPT);
68 void collectRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses) const;
69 bool processRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses,
70 GCNDownwardRPTracker &RPT);
71
Tom Stellard5bfbae52018-07-11 20:59:01 +000072 const GCNSubtarget *ST;
Stanislav Mekhanoshin739174c2018-05-31 20:13:51 +000073 const SIRegisterInfo *TRI;
74 const MachineRegisterInfo *MRI;
75 SIMachineFunctionInfo *MFI;
76
77 unsigned LastRecordedOccupancy;
78 unsigned MaxVGPRs;
79 unsigned MaxSGPRs;
80};
81
82} // End anonymous namespace.
83
84INITIALIZE_PASS_BEGIN(SIFormMemoryClauses, DEBUG_TYPE,
85 "SI Form memory clauses", false, false)
86INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
87INITIALIZE_PASS_END(SIFormMemoryClauses, DEBUG_TYPE,
88 "SI Form memory clauses", false, false)
89
90
91char SIFormMemoryClauses::ID = 0;
92
93char &llvm::SIFormMemoryClausesID = SIFormMemoryClauses::ID;
94
95FunctionPass *llvm::createSIFormMemoryClausesPass() {
96 return new SIFormMemoryClauses();
97}
98
99static bool isVMEMClauseInst(const MachineInstr &MI) {
100 return SIInstrInfo::isFLAT(MI) || SIInstrInfo::isVMEM(MI);
101}
102
103static bool isSMEMClauseInst(const MachineInstr &MI) {
104 return SIInstrInfo::isSMRD(MI);
105}
106
107// There no sense to create store clauses, they do not define anything,
108// thus there is nothing to set early-clobber.
109static bool isValidClauseInst(const MachineInstr &MI, bool IsVMEMClause) {
110 if (MI.isDebugValue() || MI.isBundled())
111 return false;
112 if (!MI.mayLoad() || MI.mayStore())
113 return false;
114 if (AMDGPU::getAtomicNoRetOp(MI.getOpcode()) != -1 ||
115 AMDGPU::getAtomicRetOp(MI.getOpcode()) != -1)
116 return false;
117 if (IsVMEMClause && !isVMEMClauseInst(MI))
118 return false;
119 if (!IsVMEMClause && !isSMEMClauseInst(MI))
120 return false;
121 return true;
122}
123
124static unsigned getMopState(const MachineOperand &MO) {
125 unsigned S = 0;
126 if (MO.isImplicit())
127 S |= RegState::Implicit;
128 if (MO.isDead())
129 S |= RegState::Dead;
130 if (MO.isUndef())
131 S |= RegState::Undef;
132 if (MO.isKill())
133 S |= RegState::Kill;
134 if (MO.isEarlyClobber())
135 S |= RegState::EarlyClobber;
136 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && MO.isRenamable())
137 S |= RegState::Renamable;
138 return S;
139}
140
141template <typename Callable>
142void SIFormMemoryClauses::forAllLanes(unsigned Reg, LaneBitmask LaneMask,
143 Callable Func) const {
144 if (LaneMask.all() || TargetRegisterInfo::isPhysicalRegister(Reg) ||
145 LaneMask == MRI->getMaxLaneMaskForVReg(Reg)) {
146 Func(0);
147 return;
148 }
149
150 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
151 unsigned E = TRI->getNumSubRegIndices();
152 SmallVector<unsigned, AMDGPU::NUM_TARGET_SUBREGS> CoveringSubregs;
153 for (unsigned Idx = 1; Idx < E; ++Idx) {
154 // Is this index even compatible with the given class?
155 if (TRI->getSubClassWithSubReg(RC, Idx) != RC)
156 continue;
157 LaneBitmask SubRegMask = TRI->getSubRegIndexLaneMask(Idx);
158 // Early exit if we found a perfect match.
159 if (SubRegMask == LaneMask) {
160 Func(Idx);
161 return;
162 }
163
164 if ((SubRegMask & ~LaneMask).any() || (SubRegMask & LaneMask).none())
165 continue;
166
167 CoveringSubregs.push_back(Idx);
168 }
169
Fangrui Song0cac7262018-09-27 02:13:45 +0000170 llvm::sort(CoveringSubregs, [this](unsigned A, unsigned B) {
171 LaneBitmask MaskA = TRI->getSubRegIndexLaneMask(A);
172 LaneBitmask MaskB = TRI->getSubRegIndexLaneMask(B);
173 unsigned NA = MaskA.getNumLanes();
174 unsigned NB = MaskB.getNumLanes();
175 if (NA != NB)
176 return NA > NB;
177 return MaskA.getHighestLane() > MaskB.getHighestLane();
178 });
Stanislav Mekhanoshin739174c2018-05-31 20:13:51 +0000179
180 for (unsigned Idx : CoveringSubregs) {
181 LaneBitmask SubRegMask = TRI->getSubRegIndexLaneMask(Idx);
182 if ((SubRegMask & ~LaneMask).any() || (SubRegMask & LaneMask).none())
183 continue;
184
185 Func(Idx);
186 LaneMask &= ~SubRegMask;
187 if (LaneMask.none())
188 return;
189 }
190
191 llvm_unreachable("Failed to find all subregs to cover lane mask");
192}
193
194// Returns false if there is a use of a def already in the map.
195// In this case we must break the clause.
196bool SIFormMemoryClauses::canBundle(const MachineInstr &MI,
197 RegUse &Defs, RegUse &Uses) const {
198 // Check interference with defs.
199 for (const MachineOperand &MO : MI.operands()) {
200 // TODO: Prologue/Epilogue Insertion pass does not process bundled
201 // instructions.
202 if (MO.isFI())
203 return false;
204
205 if (!MO.isReg())
206 continue;
207
208 unsigned Reg = MO.getReg();
209
210 // If it is tied we will need to write same register as we read.
211 if (MO.isTied())
212 return false;
213
214 RegUse &Map = MO.isDef() ? Uses : Defs;
215 auto Conflict = Map.find(Reg);
216 if (Conflict == Map.end())
217 continue;
218
219 if (TargetRegisterInfo::isPhysicalRegister(Reg))
220 return false;
221
222 LaneBitmask Mask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
223 if ((Conflict->second.second & Mask).any())
224 return false;
225 }
226
227 return true;
228}
229
230// Since all defs in the clause are early clobber we can run out of registers.
231// Function returns false if pressure would hit the limit if instruction is
232// bundled into a memory clause.
233bool SIFormMemoryClauses::checkPressure(const MachineInstr &MI,
234 GCNDownwardRPTracker &RPT) {
235 // NB: skip advanceBeforeNext() call. Since all defs will be marked
236 // early-clobber they will all stay alive at least to the end of the
237 // clause. Therefor we should not decrease pressure even if load
238 // pointer becomes dead and could otherwise be reused for destination.
239 RPT.advanceToNext();
240 GCNRegPressure MaxPressure = RPT.moveMaxPressure();
241 unsigned Occupancy = MaxPressure.getOccupancy(*ST);
242 if (Occupancy >= MFI->getMinAllowedOccupancy() &&
243 MaxPressure.getVGPRNum() <= MaxVGPRs &&
244 MaxPressure.getSGPRNum() <= MaxSGPRs) {
245 LastRecordedOccupancy = Occupancy;
246 return true;
247 }
248 return false;
249}
250
251// Collect register defs and uses along with their lane masks and states.
252void SIFormMemoryClauses::collectRegUses(const MachineInstr &MI,
253 RegUse &Defs, RegUse &Uses) const {
254 for (const MachineOperand &MO : MI.operands()) {
255 if (!MO.isReg())
256 continue;
257 unsigned Reg = MO.getReg();
258 if (!Reg)
259 continue;
260
261 LaneBitmask Mask = TargetRegisterInfo::isVirtualRegister(Reg) ?
262 TRI->getSubRegIndexLaneMask(MO.getSubReg()) :
263 LaneBitmask::getAll();
264 RegUse &Map = MO.isDef() ? Defs : Uses;
265
266 auto Loc = Map.find(Reg);
267 unsigned State = getMopState(MO);
268 if (Loc == Map.end()) {
269 Map[Reg] = std::make_pair(State, Mask);
270 } else {
271 Loc->second.first |= State;
272 Loc->second.second |= Mask;
273 }
274 }
275}
276
277// Check register def/use conflicts, occupancy limits and collect def/use maps.
278// Return true if instruction can be bundled with previous. It it cannot
279// def/use maps are not updated.
280bool SIFormMemoryClauses::processRegUses(const MachineInstr &MI,
281 RegUse &Defs, RegUse &Uses,
282 GCNDownwardRPTracker &RPT) {
283 if (!canBundle(MI, Defs, Uses))
284 return false;
285
286 if (!checkPressure(MI, RPT))
287 return false;
288
289 collectRegUses(MI, Defs, Uses);
290 return true;
291}
292
293bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
294 if (skipFunction(MF.getFunction()))
295 return false;
296
Tom Stellard5bfbae52018-07-11 20:59:01 +0000297 ST = &MF.getSubtarget<GCNSubtarget>();
Stanislav Mekhanoshin739174c2018-05-31 20:13:51 +0000298 if (!ST->isXNACKEnabled())
299 return false;
300
301 const SIInstrInfo *TII = ST->getInstrInfo();
302 TRI = ST->getRegisterInfo();
303 MRI = &MF.getRegInfo();
304 MFI = MF.getInfo<SIMachineFunctionInfo>();
305 LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
306 SlotIndexes *Ind = LIS->getSlotIndexes();
307 bool Changed = false;
308
309 MaxVGPRs = TRI->getAllocatableSet(MF, &AMDGPU::VGPR_32RegClass).count();
310 MaxSGPRs = TRI->getAllocatableSet(MF, &AMDGPU::SGPR_32RegClass).count();
311
312 for (MachineBasicBlock &MBB : MF) {
313 MachineBasicBlock::instr_iterator Next;
314 for (auto I = MBB.instr_begin(), E = MBB.instr_end(); I != E; I = Next) {
315 MachineInstr &MI = *I;
316 Next = std::next(I);
317
318 bool IsVMEM = isVMEMClauseInst(MI);
319
320 if (!isValidClauseInst(MI, IsVMEM))
321 continue;
322
323 RegUse Defs, Uses;
324 GCNDownwardRPTracker RPT(*LIS);
325 RPT.reset(MI);
326
327 if (!processRegUses(MI, Defs, Uses, RPT))
328 continue;
329
330 unsigned Length = 1;
331 for ( ; Next != E && Length < MaxClause; ++Next) {
332 if (!isValidClauseInst(*Next, IsVMEM))
333 break;
334
335 // A load from pointer which was loaded inside the same bundle is an
336 // impossible clause because we will need to write and read the same
337 // register inside. In this case processRegUses will return false.
338 if (!processRegUses(*Next, Defs, Uses, RPT))
339 break;
340
341 ++Length;
342 }
343 if (Length < 2)
344 continue;
345
346 Changed = true;
347 MFI->limitOccupancy(LastRecordedOccupancy);
348
349 auto B = BuildMI(MBB, I, DebugLoc(), TII->get(TargetOpcode::BUNDLE));
350 Ind->insertMachineInstrInMaps(*B);
351
352 for (auto BI = I; BI != Next; ++BI) {
353 BI->bundleWithPred();
354 Ind->removeSingleMachineInstrFromMaps(*BI);
355
356 for (MachineOperand &MO : BI->defs())
357 if (MO.readsReg())
358 MO.setIsInternalRead(true);
359 }
360
361 for (auto &&R : Defs) {
362 forAllLanes(R.first, R.second.second, [&R, &B](unsigned SubReg) {
363 unsigned S = R.second.first | RegState::EarlyClobber;
364 if (!SubReg)
365 S &= ~(RegState::Undef | RegState::Dead);
366 B.addDef(R.first, S, SubReg);
367 });
368 }
369
370 for (auto &&R : Uses) {
371 forAllLanes(R.first, R.second.second, [&R, &B](unsigned SubReg) {
372 B.addUse(R.first, R.second.first & ~RegState::Kill, SubReg);
373 });
374 }
375
376 for (auto &&R : Defs) {
377 unsigned Reg = R.first;
378 Uses.erase(Reg);
379 if (TargetRegisterInfo::isPhysicalRegister(Reg))
380 continue;
381 LIS->removeInterval(Reg);
382 LIS->createAndComputeVirtRegInterval(Reg);
383 }
384
385 for (auto &&R : Uses) {
386 unsigned Reg = R.first;
387 if (TargetRegisterInfo::isPhysicalRegister(Reg))
388 continue;
389 LIS->removeInterval(Reg);
390 LIS->createAndComputeVirtRegInterval(Reg);
391 }
392 }
393 }
394
395 return Changed;
396}