blob: 01c76327aab102e9c44c5d3111bcc62066dd6d00 [file] [log] [blame]
Connor Abbott92638ab2017-08-04 18:36:52 +00001//===-- SIFixWWMLiveness.cpp - Fix WWM live intervals ---------===//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Connor Abbott92638ab2017-08-04 18:36:52 +00006//
7//===----------------------------------------------------------------------===//
8//
9/// \file
Adrian Prantl5f8f34e42018-05-01 15:54:18 +000010/// Computations in WWM can overwrite values in inactive channels for
Connor Abbott92638ab2017-08-04 18:36:52 +000011/// variables that the register allocator thinks are dead. This pass adds fake
Tim Renoufabd85fb2018-08-02 23:31:32 +000012/// uses of those variables to their def(s) to make sure that they aren't
Connor Abbott92638ab2017-08-04 18:36:52 +000013/// overwritten.
14///
15/// As an example, consider this snippet:
16/// %vgpr0 = V_MOV_B32_e32 0.0
17/// if (...) {
18/// %vgpr1 = ...
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +000019/// %vgpr2 = WWM killed %vgpr1
20/// ... = killed %vgpr2
Connor Abbott92638ab2017-08-04 18:36:52 +000021/// %vgpr0 = V_MOV_B32_e32 1.0
22/// }
23/// ... = %vgpr0
24///
25/// The live intervals of %vgpr0 don't overlap with those of %vgpr1. Normally,
26/// we can safely allocate %vgpr0 and %vgpr1 in the same register, since
27/// writing %vgpr1 would only write to channels that would be clobbered by the
28/// second write to %vgpr0 anyways. But if %vgpr1 is written with WWM enabled,
29/// it would clobber even the inactive channels for which the if-condition is
30/// false, for which %vgpr0 is supposed to be 0. This pass adds an implicit use
Tim Renoufabd85fb2018-08-02 23:31:32 +000031/// of %vgpr0 to its def to make sure they aren't allocated to the
Connor Abbott92638ab2017-08-04 18:36:52 +000032/// same register.
33///
34/// In general, we need to figure out what registers might have their inactive
35/// channels which are eventually used accidentally clobbered by a WWM
Tim Renoufabd85fb2018-08-02 23:31:32 +000036/// instruction. We do that by spotting three separate cases of registers:
Connor Abbott92638ab2017-08-04 18:36:52 +000037///
Tim Renoufabd85fb2018-08-02 23:31:32 +000038/// 1. A "then phi": the value resulting from phi elimination of a phi node at
39/// the end of an if..endif. If there is WWM code in the "then", then we
40/// make the def at the end of the "then" branch a partial def by adding an
41/// implicit use of the register.
Connor Abbott92638ab2017-08-04 18:36:52 +000042///
Tim Renoufabd85fb2018-08-02 23:31:32 +000043/// 2. A "loop exit register": a value written inside a loop but used outside the
44/// loop, where there is WWM code inside the loop (the case in the example
45/// above). We add an implicit_def of the register in the loop pre-header,
46/// and make the original def a partial def by adding an implicit use of the
47/// register.
48///
49/// 3. A "loop exit phi": the value resulting from phi elimination of a phi node
50/// in a loop header. If there is WWM code inside the loop, then we make all
51/// defs inside the loop partial defs by adding an implicit use of the
52/// register on each one.
53///
54/// Note that we do not need to consider an if..else..endif phi. We only need to
55/// consider non-uniform control flow, and control flow structurization would
56/// have transformed a non-uniform if..else..endif into two if..endifs.
57///
58/// The analysis to detect these cases relies on a property of the MIR
59/// arising from this pass running straight after PHIElimination and before any
60/// coalescing: that any virtual register with more than one definition must be
61/// the new register added to lower a phi node by PHIElimination.
62///
63/// FIXME: We should detect whether a register in one of the above categories is
64/// already live at the WWM code before deciding to add the implicit uses to
65/// synthesize its liveness.
66///
67/// FIXME: I believe this whole scheme may be flawed due to the possibility of
68/// the register allocator doing live interval splitting.
Connor Abbott92638ab2017-08-04 18:36:52 +000069///
70//===----------------------------------------------------------------------===//
71
72#include "AMDGPU.h"
73#include "AMDGPUSubtarget.h"
74#include "SIInstrInfo.h"
75#include "SIRegisterInfo.h"
Tom Stellard44b30b42018-05-22 02:03:23 +000076#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Connor Abbott92638ab2017-08-04 18:36:52 +000077#include "llvm/ADT/DepthFirstIterator.h"
78#include "llvm/ADT/SparseBitVector.h"
Matthias Braunf8422972017-12-13 02:51:04 +000079#include "llvm/CodeGen/LiveIntervals.h"
Tim Renoufabd85fb2018-08-02 23:31:32 +000080#include "llvm/CodeGen/MachineDominators.h"
Connor Abbott92638ab2017-08-04 18:36:52 +000081#include "llvm/CodeGen/MachineFunctionPass.h"
Tim Renoufabd85fb2018-08-02 23:31:32 +000082#include "llvm/CodeGen/MachineLoopInfo.h"
Connor Abbott92638ab2017-08-04 18:36:52 +000083#include "llvm/CodeGen/Passes.h"
David Blaikieb3bde2e2017-11-17 01:07:10 +000084#include "llvm/CodeGen/TargetRegisterInfo.h"
Connor Abbott92638ab2017-08-04 18:36:52 +000085
86using namespace llvm;
87
88#define DEBUG_TYPE "si-fix-wwm-liveness"
89
90namespace {
91
92class SIFixWWMLiveness : public MachineFunctionPass {
93private:
Tim Renoufabd85fb2018-08-02 23:31:32 +000094 MachineDominatorTree *DomTree;
95 MachineLoopInfo *LoopInfo;
Connor Abbott92638ab2017-08-04 18:36:52 +000096 LiveIntervals *LIS = nullptr;
Tim Renoufabd85fb2018-08-02 23:31:32 +000097 const SIInstrInfo *TII;
Connor Abbott92638ab2017-08-04 18:36:52 +000098 const SIRegisterInfo *TRI;
99 MachineRegisterInfo *MRI;
100
Tim Renoufabd85fb2018-08-02 23:31:32 +0000101 std::vector<MachineInstr *> WWMs;
102 std::vector<MachineOperand *> ThenDefs;
103 std::vector<std::pair<MachineOperand *, MachineLoop *>> LoopExitDefs;
104 std::vector<std::pair<MachineOperand *, MachineLoop *>> LoopPhiDefs;
105
Connor Abbott92638ab2017-08-04 18:36:52 +0000106public:
107 static char ID;
108
109 SIFixWWMLiveness() : MachineFunctionPass(ID) {
110 initializeSIFixWWMLivenessPass(*PassRegistry::getPassRegistry());
111 }
112
113 bool runOnMachineFunction(MachineFunction &MF) override;
114
Connor Abbott92638ab2017-08-04 18:36:52 +0000115 StringRef getPassName() const override { return "SI Fix WWM Liveness"; }
116
117 void getAnalysisUsage(AnalysisUsage &AU) const override {
Tim Renoufabd85fb2018-08-02 23:31:32 +0000118 AU.addRequiredID(MachineDominatorsID);
119 AU.addRequiredID(MachineLoopInfoID);
Connor Abbott92638ab2017-08-04 18:36:52 +0000120 // Should preserve the same set that TwoAddressInstructions does.
121 AU.addPreserved<SlotIndexes>();
122 AU.addPreserved<LiveIntervals>();
123 AU.addPreservedID(LiveVariablesID);
124 AU.addPreservedID(MachineLoopInfoID);
125 AU.addPreservedID(MachineDominatorsID);
126 AU.setPreservesCFG();
127 MachineFunctionPass::getAnalysisUsage(AU);
128 }
Tim Renoufabd85fb2018-08-02 23:31:32 +0000129
130private:
131 void processDef(MachineOperand &DefOpnd);
132 bool processThenDef(MachineOperand *DefOpnd);
133 bool processLoopExitDef(MachineOperand *DefOpnd, MachineLoop *Loop);
134 bool processLoopPhiDef(MachineOperand *DefOpnd, MachineLoop *Loop);
Connor Abbott92638ab2017-08-04 18:36:52 +0000135};
136
137} // End anonymous namespace.
138
Tim Renoufabd85fb2018-08-02 23:31:32 +0000139INITIALIZE_PASS_BEGIN(SIFixWWMLiveness, DEBUG_TYPE,
140 "SI fix WWM liveness", false, false)
141INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
142INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
143INITIALIZE_PASS_END(SIFixWWMLiveness, DEBUG_TYPE,
Connor Abbott92638ab2017-08-04 18:36:52 +0000144 "SI fix WWM liveness", false, false)
145
146char SIFixWWMLiveness::ID = 0;
147
148char &llvm::SIFixWWMLivenessID = SIFixWWMLiveness::ID;
149
150FunctionPass *llvm::createSIFixWWMLivenessPass() {
151 return new SIFixWWMLiveness();
152}
153
Connor Abbott92638ab2017-08-04 18:36:52 +0000154bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction &MF) {
Tim Renoufabd85fb2018-08-02 23:31:32 +0000155 LLVM_DEBUG(dbgs() << "SIFixWWMLiveness: function " << MF.getName() << "\n");
Connor Abbott92638ab2017-08-04 18:36:52 +0000156 bool Modified = false;
157
158 // This doesn't actually need LiveIntervals, but we can preserve them.
159 LIS = getAnalysisIfAvailable<LiveIntervals>();
160
Tom Stellard5bfbae52018-07-11 20:59:01 +0000161 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
Connor Abbott92638ab2017-08-04 18:36:52 +0000162
Tim Renoufabd85fb2018-08-02 23:31:32 +0000163 TII = ST.getInstrInfo();
Connor Abbott92638ab2017-08-04 18:36:52 +0000164 TRI = &TII->getRegisterInfo();
165 MRI = &MF.getRegInfo();
166
Tim Renoufabd85fb2018-08-02 23:31:32 +0000167 DomTree = &getAnalysis<MachineDominatorTree>();
168 LoopInfo = &getAnalysis<MachineLoopInfo>();
169
170 // Scan the function to find the WWM sections and the candidate registers for
171 // having liveness modified.
Connor Abbott92638ab2017-08-04 18:36:52 +0000172 for (MachineBasicBlock &MBB : MF) {
173 for (MachineInstr &MI : MBB) {
Tim Renoufabd85fb2018-08-02 23:31:32 +0000174 if (MI.getOpcode() == AMDGPU::EXIT_WWM)
175 WWMs.push_back(&MI);
176 else {
177 for (MachineOperand &DefOpnd : MI.defs()) {
178 if (DefOpnd.isReg()) {
179 unsigned Reg = DefOpnd.getReg();
180 if (TRI->isVGPR(*MRI, Reg))
181 processDef(DefOpnd);
182 }
183 }
Connor Abbott92638ab2017-08-04 18:36:52 +0000184 }
185 }
186 }
Tim Renoufabd85fb2018-08-02 23:31:32 +0000187 if (!WWMs.empty()) {
188 // Synthesize liveness over WWM sections as required.
189 for (auto ThenDef : ThenDefs)
190 Modified |= processThenDef(ThenDef);
191 for (auto LoopExitDef : LoopExitDefs)
192 Modified |= processLoopExitDef(LoopExitDef.first, LoopExitDef.second);
193 for (auto LoopPhiDef : LoopPhiDefs)
194 Modified |= processLoopPhiDef(LoopPhiDef.first, LoopPhiDef.second);
195 }
196
197 WWMs.clear();
198 ThenDefs.clear();
199 LoopExitDefs.clear();
200 LoopPhiDefs.clear();
Connor Abbott92638ab2017-08-04 18:36:52 +0000201
202 return Modified;
203}
Tim Renoufabd85fb2018-08-02 23:31:32 +0000204
205// During the function scan, process an operand that defines a VGPR.
206// This categorizes the register and puts it in the appropriate list for later
207// use when processing a WWM section.
208void SIFixWWMLiveness::processDef(MachineOperand &DefOpnd) {
209 unsigned Reg = DefOpnd.getReg();
210 // Get all the defining instructions. For convenience, make Defs[0] the def
211 // we are on now.
212 SmallVector<const MachineInstr *, 4> Defs;
213 Defs.push_back(DefOpnd.getParent());
214 for (auto &MI : MRI->def_instructions(Reg)) {
215 if (&MI != DefOpnd.getParent())
216 Defs.push_back(&MI);
217 }
218 // Check whether this def dominates all the others. If not, ignore this def.
219 // Either it is going to be processed when the scan encounters its other def
220 // that dominates all defs, or there is no def that dominates all others.
221 // The latter case is an eliminated phi from an if..else..endif or similar,
222 // which must be for uniform control flow so can be ignored.
223 // Because this pass runs shortly after PHIElimination, we assume that any
224 // multi-def register is a lowered phi, and thus has each def in a separate
225 // basic block.
226 for (unsigned I = 1; I != Defs.size(); ++I) {
227 if (!DomTree->dominates(Defs[0]->getParent(), Defs[I]->getParent()))
228 return;
229 }
230 // Check for the case of an if..endif lowered phi: It has two defs, one
231 // dominates the other, and there is a single use in a successor of the
232 // dominant def.
233 // Later we will spot any WWM code inside
234 // the "then" clause and turn the second def into a partial def so its
235 // liveness goes through the WWM code in the "then" clause.
236 if (Defs.size() == 2) {
237 auto DomDefBlock = Defs[0]->getParent();
238 if (DomDefBlock->succ_size() == 2 && MRI->hasOneUse(Reg)) {
239 auto UseBlock = MRI->use_begin(Reg)->getParent()->getParent();
240 for (auto Succ : DomDefBlock->successors()) {
241 if (Succ == UseBlock) {
242 LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << " is a then phi reg\n");
243 ThenDefs.push_back(&DefOpnd);
244 return;
245 }
246 }
247 }
248 }
249 // Check for the case of a non-lowered-phi register (single def) that exits
250 // a loop, that is, it has a use that is outside a loop that the def is
251 // inside. We find the outermost loop that the def is inside but a use is
252 // outside. Later we will spot any WWM code inside that loop and then make
253 // the def a partial def so its liveness goes round the loop and through the
254 // WWM code.
255 if (Defs.size() == 1) {
256 auto Loop = LoopInfo->getLoopFor(Defs[0]->getParent());
257 if (!Loop)
258 return;
259 bool IsLoopExit = false;
260 for (auto &Use : MRI->use_instructions(Reg)) {
261 auto UseBlock = Use.getParent();
262 if (Loop->contains(UseBlock))
263 continue;
264 IsLoopExit = true;
265 while (auto Parent = Loop->getParentLoop()) {
266 if (Parent->contains(UseBlock))
267 break;
268 Loop = Parent;
269 }
270 }
271 if (!IsLoopExit)
272 return;
273 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
274 << " is a loop exit reg with loop header at "
275 << "bb." << Loop->getHeader()->getNumber() << "\n");
276 LoopExitDefs.push_back(std::pair<MachineOperand *, MachineLoop *>(
277 &DefOpnd, Loop));
278 return;
279 }
280 // Check for the case of a lowered single-preheader-loop phi, that is, a
281 // multi-def register where the dominating def is in the loop pre-header and
282 // all other defs are in backedges. Later we will spot any WWM code inside
283 // that loop and then make the backedge defs partial defs so the liveness
284 // goes through the WWM code.
285 // Note that we are ignoring multi-preheader loops on the basis that the
286 // structurizer does not allow that for non-uniform loops.
287 // There must be a single use in the loop header.
288 if (!MRI->hasOneUse(Reg))
289 return;
290 auto UseBlock = MRI->use_begin(Reg)->getParent()->getParent();
291 auto Loop = LoopInfo->getLoopFor(UseBlock);
292 if (!Loop || Loop->getHeader() != UseBlock
293 || Loop->contains(Defs[0]->getParent())) {
294 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
295 << " is multi-def but single use not in loop header\n");
296 return;
297 }
298 for (unsigned I = 1; I != Defs.size(); ++I) {
299 if (!Loop->contains(Defs[I]->getParent()))
300 return;
301 }
302 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
303 << " is a loop phi reg with loop header at "
304 << "bb." << Loop->getHeader()->getNumber() << "\n");
305 LoopPhiDefs.push_back(
306 std::pair<MachineOperand *, MachineLoop *>(&DefOpnd, Loop));
307}
308
309// Process a then phi def: It has two defs, one dominates the other, and there
310// is a single use in a successor of the dominant def. Here we spot any WWM
311// code inside the "then" clause and turn the second def into a partial def so
312// its liveness goes through the WWM code in the "then" clause.
313bool SIFixWWMLiveness::processThenDef(MachineOperand *DefOpnd) {
314 LLVM_DEBUG(dbgs() << "Processing then def: " << *DefOpnd->getParent());
315 if (DefOpnd->getParent()->getOpcode() == TargetOpcode::IMPLICIT_DEF) {
316 // Ignore if dominating def is undef.
317 LLVM_DEBUG(dbgs() << " ignoring as dominating def is undef\n");
318 return false;
319 }
320 unsigned Reg = DefOpnd->getReg();
321 // Get the use block, which is the endif block.
322 auto UseBlock = MRI->use_instr_begin(Reg)->getParent();
323 // Check whether there is WWM code inside the then branch. The WWM code must
324 // be dominated by the if but not dominated by the endif.
325 bool ContainsWWM = false;
326 for (auto WWM : WWMs) {
327 if (DomTree->dominates(DefOpnd->getParent()->getParent(), WWM->getParent())
328 && !DomTree->dominates(UseBlock, WWM->getParent())) {
329 LLVM_DEBUG(dbgs() << " contains WWM: " << *WWM);
330 ContainsWWM = true;
331 break;
332 }
333 }
334 if (!ContainsWWM)
335 return false;
336 // Get the other def.
337 MachineInstr *OtherDef = nullptr;
338 for (auto &MI : MRI->def_instructions(Reg)) {
339 if (&MI != DefOpnd->getParent())
340 OtherDef = &MI;
341 }
342 // Make it a partial def.
343 OtherDef->addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true));
344 LLVM_DEBUG(dbgs() << *OtherDef);
345 return true;
346}
347
348// Process a loop exit def, that is, a register with a single use in a loop
349// that has a use outside the loop. Here we spot any WWM code inside that loop
350// and then make the def a partial def so its liveness goes round the loop and
351// through the WWM code.
352bool SIFixWWMLiveness::processLoopExitDef(MachineOperand *DefOpnd,
353 MachineLoop *Loop) {
354 LLVM_DEBUG(dbgs() << "Processing loop exit def: " << *DefOpnd->getParent());
355 // Check whether there is WWM code inside the loop.
356 bool ContainsWWM = false;
357 for (auto WWM : WWMs) {
358 if (Loop->contains(WWM->getParent())) {
359 LLVM_DEBUG(dbgs() << " contains WWM: " << *WWM);
360 ContainsWWM = true;
361 break;
362 }
363 }
364 if (!ContainsWWM)
365 return false;
366 unsigned Reg = DefOpnd->getReg();
367 // Add a new implicit_def in loop preheader(s).
368 for (auto Pred : Loop->getHeader()->predecessors()) {
369 if (!Loop->contains(Pred)) {
370 auto ImplicitDef = BuildMI(*Pred, Pred->getFirstTerminator(), DebugLoc(),
371 TII->get(TargetOpcode::IMPLICIT_DEF), Reg);
372 LLVM_DEBUG(dbgs() << *ImplicitDef);
373 (void)ImplicitDef;
374 }
375 }
376 // Make the original def partial.
377 DefOpnd->getParent()->addOperand(MachineOperand::CreateReg(
378 Reg, false, /*isImp=*/true));
379 LLVM_DEBUG(dbgs() << *DefOpnd->getParent());
380 return true;
381}
382
383// Process a loop phi def, that is, a multi-def register where the dominating
384// def is in the loop pre-header and all other defs are in backedges. Here we
385// spot any WWM code inside that loop and then make the backedge defs partial
386// defs so the liveness goes through the WWM code.
387bool SIFixWWMLiveness::processLoopPhiDef(MachineOperand *DefOpnd,
388 MachineLoop *Loop) {
389 LLVM_DEBUG(dbgs() << "Processing loop phi def: " << *DefOpnd->getParent());
390 // Check whether there is WWM code inside the loop.
391 bool ContainsWWM = false;
392 for (auto WWM : WWMs) {
393 if (Loop->contains(WWM->getParent())) {
394 LLVM_DEBUG(dbgs() << " contains WWM: " << *WWM);
395 ContainsWWM = true;
396 break;
397 }
398 }
399 if (!ContainsWWM)
400 return false;
401 unsigned Reg = DefOpnd->getReg();
402 // Remove kill mark from uses.
403 for (auto &Use : MRI->use_operands(Reg))
404 Use.setIsKill(false);
405 // Make all defs except the dominating one partial defs.
406 SmallVector<MachineInstr *, 4> Defs;
407 for (auto &Def : MRI->def_instructions(Reg))
408 Defs.push_back(&Def);
409 for (auto Def : Defs) {
410 if (DefOpnd->getParent() == Def)
411 continue;
412 Def->addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true));
413 LLVM_DEBUG(dbgs() << *Def);
414 }
415 return true;
416}
417