blob: c671fed34bdf14bfcc27dc34183f4889f238ca56 [file] [log] [blame]
Stanislav Mekhanoshin37e7f952017-08-01 23:14:32 +00001//===-- SIOptimizeExecMaskingPreRA.cpp ------------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
Adrian Prantl5f8f34e42018-05-01 15:54:18 +000011/// This pass removes redundant S_OR_B64 instructions enabling lanes in
Stanislav Mekhanoshin37e7f952017-08-01 23:14:32 +000012/// the exec. If two SI_END_CF (lowered as S_OR_B64) come together without any
13/// vector instructions between them we can only keep outer SI_END_CF, given
14/// that CFG is structured and exec bits of the outer end statement are always
15/// not less than exec bit of the inner one.
16///
17/// This needs to be done before the RA to eliminate saved exec bits registers
18/// but after register coalescer to have no vector registers copies in between
19/// of different end cf statements.
20///
21//===----------------------------------------------------------------------===//
22
23#include "AMDGPU.h"
24#include "AMDGPUSubtarget.h"
25#include "SIInstrInfo.h"
Tom Stellard44b30b42018-05-22 02:03:23 +000026#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Matthias Braunf8422972017-12-13 02:51:04 +000027#include "llvm/CodeGen/LiveIntervals.h"
Stanislav Mekhanoshin37e7f952017-08-01 23:14:32 +000028#include "llvm/CodeGen/MachineFunctionPass.h"
29
30using namespace llvm;
31
32#define DEBUG_TYPE "si-optimize-exec-masking-pre-ra"
33
34namespace {
35
36class SIOptimizeExecMaskingPreRA : public MachineFunctionPass {
37public:
38 static char ID;
39
40public:
41 SIOptimizeExecMaskingPreRA() : MachineFunctionPass(ID) {
42 initializeSIOptimizeExecMaskingPreRAPass(*PassRegistry::getPassRegistry());
43 }
44
45 bool runOnMachineFunction(MachineFunction &MF) override;
46
47 StringRef getPassName() const override {
48 return "SI optimize exec mask operations pre-RA";
49 }
50
51 void getAnalysisUsage(AnalysisUsage &AU) const override {
52 AU.addRequired<LiveIntervals>();
53 AU.setPreservesAll();
54 MachineFunctionPass::getAnalysisUsage(AU);
55 }
56};
57
58} // End anonymous namespace.
59
60INITIALIZE_PASS_BEGIN(SIOptimizeExecMaskingPreRA, DEBUG_TYPE,
61 "SI optimize exec mask operations pre-RA", false, false)
62INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
63INITIALIZE_PASS_END(SIOptimizeExecMaskingPreRA, DEBUG_TYPE,
64 "SI optimize exec mask operations pre-RA", false, false)
65
66char SIOptimizeExecMaskingPreRA::ID = 0;
67
68char &llvm::SIOptimizeExecMaskingPreRAID = SIOptimizeExecMaskingPreRA::ID;
69
70FunctionPass *llvm::createSIOptimizeExecMaskingPreRAPass() {
71 return new SIOptimizeExecMaskingPreRA();
72}
73
74static bool isEndCF(const MachineInstr& MI, const SIRegisterInfo* TRI) {
75 return MI.getOpcode() == AMDGPU::S_OR_B64 &&
76 MI.modifiesRegister(AMDGPU::EXEC, TRI);
77}
78
79static bool isFullExecCopy(const MachineInstr& MI) {
80 return MI.isFullCopy() && MI.getOperand(1).getReg() == AMDGPU::EXEC;
81}
82
83static unsigned getOrNonExecReg(const MachineInstr &MI,
84 const SIInstrInfo &TII) {
85 auto Op = TII.getNamedOperand(MI, AMDGPU::OpName::src1);
86 if (Op->isReg() && Op->getReg() != AMDGPU::EXEC)
87 return Op->getReg();
88 Op = TII.getNamedOperand(MI, AMDGPU::OpName::src0);
89 if (Op->isReg() && Op->getReg() != AMDGPU::EXEC)
90 return Op->getReg();
91 return AMDGPU::NoRegister;
92}
93
94static MachineInstr* getOrExecSource(const MachineInstr &MI,
95 const SIInstrInfo &TII,
96 const MachineRegisterInfo &MRI) {
97 auto SavedExec = getOrNonExecReg(MI, TII);
98 if (SavedExec == AMDGPU::NoRegister)
99 return nullptr;
100 auto SaveExecInst = MRI.getUniqueVRegDef(SavedExec);
101 if (!SaveExecInst || !isFullExecCopy(*SaveExecInst))
102 return nullptr;
103 return SaveExecInst;
104}
105
Stanislav Mekhanoshin6071e1a2018-12-13 03:17:40 +0000106// Optimize sequence
107// %sel = V_CNDMASK_B32_e64 0, 1, %cc
108// %cmp = V_CMP_NE_U32 1, %1
109// $vcc = S_AND_B64 $exec, %cmp
110// S_CBRANCH_VCC[N]Z
111// =>
112// $vcc = S_ANDN2_B64 $exec, %cc
113// S_CBRANCH_VCC[N]Z
114//
115// It is the negation pattern inserted by DAGCombiner::visitBRCOND() in the
116// rebuildSetCC(). We start with S_CBRANCH to avoid exhaustive search, but
117// only 3 first instructions are really needed. S_AND_B64 with exec is a
118// required part of the pattern since V_CNDMASK_B32 writes zeroes for inactive
119// lanes.
120//
121// Returns %cc register on success.
122static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB,
123 const GCNSubtarget &ST,
124 MachineRegisterInfo &MRI,
125 LiveIntervals *LIS) {
126 const SIRegisterInfo *TRI = ST.getRegisterInfo();
127 const SIInstrInfo *TII = ST.getInstrInfo();
128 const unsigned AndOpc = AMDGPU::S_AND_B64;
129 const unsigned Andn2Opc = AMDGPU::S_ANDN2_B64;
130 const unsigned CondReg = AMDGPU::VCC;
131 const unsigned ExecReg = AMDGPU::EXEC;
132
133 auto I = llvm::find_if(MBB.terminators(), [](const MachineInstr &MI) {
134 unsigned Opc = MI.getOpcode();
135 return Opc == AMDGPU::S_CBRANCH_VCCZ ||
136 Opc == AMDGPU::S_CBRANCH_VCCNZ; });
137 if (I == MBB.terminators().end())
138 return AMDGPU::NoRegister;
139
140 auto *And = TRI->findReachingDef(CondReg, AMDGPU::NoSubRegister,
141 *I, MRI, LIS);
142 if (!And || And->getOpcode() != AndOpc ||
143 !And->getOperand(1).isReg() || !And->getOperand(2).isReg())
144 return AMDGPU::NoRegister;
145
146 MachineOperand *AndCC = &And->getOperand(1);
147 unsigned CmpReg = AndCC->getReg();
148 unsigned CmpSubReg = AndCC->getSubReg();
149 if (CmpReg == ExecReg) {
150 AndCC = &And->getOperand(2);
151 CmpReg = AndCC->getReg();
152 CmpSubReg = AndCC->getSubReg();
153 } else if (And->getOperand(2).getReg() != ExecReg) {
154 return AMDGPU::NoRegister;
155 }
156
157 auto *Cmp = TRI->findReachingDef(CmpReg, CmpSubReg, *And, MRI, LIS);
158 if (!Cmp || !(Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e32 ||
159 Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e64) ||
160 Cmp->getParent() != And->getParent())
161 return AMDGPU::NoRegister;
162
163 MachineOperand *Op1 = TII->getNamedOperand(*Cmp, AMDGPU::OpName::src0);
164 MachineOperand *Op2 = TII->getNamedOperand(*Cmp, AMDGPU::OpName::src1);
165 if (Op1->isImm() && Op2->isReg())
166 std::swap(Op1, Op2);
167 if (!Op1->isReg() || !Op2->isImm() || Op2->getImm() != 1)
168 return AMDGPU::NoRegister;
169
170 unsigned SelReg = Op1->getReg();
171 auto *Sel = TRI->findReachingDef(SelReg, Op1->getSubReg(), *Cmp, MRI, LIS);
172 if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
173 return AMDGPU::NoRegister;
174
175 Op1 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src0);
176 Op2 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src1);
177 MachineOperand *CC = TII->getNamedOperand(*Sel, AMDGPU::OpName::src2);
178 if (!Op1->isImm() || !Op2->isImm() || !CC->isReg() ||
179 Op1->getImm() != 0 || Op2->getImm() != 1)
180 return AMDGPU::NoRegister;
181
182 LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t'
183 << *Cmp << '\t' << *And);
184
185 unsigned CCReg = CC->getReg();
186 LIS->RemoveMachineInstrFromMaps(*And);
187 MachineInstr *Andn2 = BuildMI(MBB, *And, And->getDebugLoc(),
188 TII->get(Andn2Opc), And->getOperand(0).getReg())
189 .addReg(ExecReg)
190 .addReg(CCReg, CC->getSubReg());
191 And->eraseFromParent();
192 LIS->InsertMachineInstrInMaps(*Andn2);
193
194 LLVM_DEBUG(dbgs() << "=>\n\t" << *Andn2 << '\n');
195
196 // Try to remove compare. Cmp value should not used in between of cmp
197 // and s_and_b64 if VCC or just unused if any other register.
198 if ((TargetRegisterInfo::isVirtualRegister(CmpReg) &&
199 MRI.use_nodbg_empty(CmpReg)) ||
200 (CmpReg == CondReg &&
201 std::none_of(std::next(Cmp->getIterator()), Andn2->getIterator(),
Stanislav Mekhanoshind933c2c2018-12-13 05:52:11 +0000202 [&](const MachineInstr &MI) {
Stanislav Mekhanoshin6071e1a2018-12-13 03:17:40 +0000203 return MI.readsRegister(CondReg, TRI); }))) {
204 LLVM_DEBUG(dbgs() << "Erasing: " << *Cmp << '\n');
205
206 LIS->RemoveMachineInstrFromMaps(*Cmp);
207 Cmp->eraseFromParent();
208
209 // Try to remove v_cndmask_b32.
210 if (TargetRegisterInfo::isVirtualRegister(SelReg) &&
211 MRI.use_nodbg_empty(SelReg)) {
212 LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n');
213
214 LIS->RemoveMachineInstrFromMaps(*Sel);
215 Sel->eraseFromParent();
216 }
217 }
218
219 return CCReg;
220}
221
Stanislav Mekhanoshin37e7f952017-08-01 23:14:32 +0000222bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
Matthias Braunf1caa282017-12-15 22:22:58 +0000223 if (skipFunction(MF.getFunction()))
Stanislav Mekhanoshin37e7f952017-08-01 23:14:32 +0000224 return false;
225
Tom Stellard5bfbae52018-07-11 20:59:01 +0000226 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
Stanislav Mekhanoshin37e7f952017-08-01 23:14:32 +0000227 const SIRegisterInfo *TRI = ST.getRegisterInfo();
228 const SIInstrInfo *TII = ST.getInstrInfo();
229 MachineRegisterInfo &MRI = MF.getRegInfo();
230 LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
Stanislav Mekhanoshina9487d92017-08-16 04:43:49 +0000231 DenseSet<unsigned> RecalcRegs({AMDGPU::EXEC_LO, AMDGPU::EXEC_HI});
Stanislav Mekhanoshin37e7f952017-08-01 23:14:32 +0000232 bool Changed = false;
233
234 for (MachineBasicBlock &MBB : MF) {
Stanislav Mekhanoshina9487d92017-08-16 04:43:49 +0000235
Stanislav Mekhanoshin6071e1a2018-12-13 03:17:40 +0000236 if (unsigned Reg = optimizeVcndVcmpPair(MBB, ST, MRI, LIS)) {
237 RecalcRegs.insert(Reg);
238 RecalcRegs.insert(AMDGPU::VCC_LO);
239 RecalcRegs.insert(AMDGPU::VCC_HI);
240 RecalcRegs.insert(AMDGPU::SCC);
241 Changed = true;
242 }
243
Stanislav Mekhanoshina9487d92017-08-16 04:43:49 +0000244 // Try to remove unneeded instructions before s_endpgm.
245 if (MBB.succ_empty()) {
Matt Arsenault755f41f2018-08-28 18:55:55 +0000246 if (MBB.empty())
247 continue;
248
249 // Skip this if the endpgm has any implicit uses, otherwise we would need
250 // to be careful to update / remove them.
251 MachineInstr &Term = MBB.back();
252 if (Term.getOpcode() != AMDGPU::S_ENDPGM ||
253 Term.getNumOperands() != 0)
Stanislav Mekhanoshina9487d92017-08-16 04:43:49 +0000254 continue;
255
256 SmallVector<MachineBasicBlock*, 4> Blocks({&MBB});
257
258 while (!Blocks.empty()) {
259 auto CurBB = Blocks.pop_back_val();
260 auto I = CurBB->rbegin(), E = CurBB->rend();
261 if (I != E) {
262 if (I->isUnconditionalBranch() || I->getOpcode() == AMDGPU::S_ENDPGM)
263 ++I;
264 else if (I->isBranch())
265 continue;
266 }
267
268 while (I != E) {
Shiva Chen801bf7e2018-05-09 02:42:00 +0000269 if (I->isDebugInstr()) {
Matt Arsenault7f0a5272017-12-05 18:23:17 +0000270 I = std::next(I);
Stanislav Mekhanoshina9487d92017-08-16 04:43:49 +0000271 continue;
Matt Arsenault7f0a5272017-12-05 18:23:17 +0000272 }
273
Stanislav Mekhanoshina9487d92017-08-16 04:43:49 +0000274 if (I->mayStore() || I->isBarrier() || I->isCall() ||
275 I->hasUnmodeledSideEffects() || I->hasOrderedMemoryRef())
276 break;
277
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000278 LLVM_DEBUG(dbgs()
279 << "Removing no effect instruction: " << *I << '\n');
Stanislav Mekhanoshina9487d92017-08-16 04:43:49 +0000280
Matt Arsenault2f4df7e2017-09-08 18:51:26 +0000281 for (auto &Op : I->operands()) {
Stanislav Mekhanoshina9487d92017-08-16 04:43:49 +0000282 if (Op.isReg())
283 RecalcRegs.insert(Op.getReg());
Matt Arsenault2f4df7e2017-09-08 18:51:26 +0000284 }
Stanislav Mekhanoshina9487d92017-08-16 04:43:49 +0000285
286 auto Next = std::next(I);
287 LIS->RemoveMachineInstrFromMaps(*I);
288 I->eraseFromParent();
289 I = Next;
290
291 Changed = true;
292 }
293
294 if (I != E)
295 continue;
296
297 // Try to ascend predecessors.
298 for (auto *Pred : CurBB->predecessors()) {
299 if (Pred->succ_size() == 1)
300 Blocks.push_back(Pred);
301 }
302 }
303 continue;
304 }
305
306 // Try to collapse adjacent endifs.
Stanislav Mekhanoshin37e7f952017-08-01 23:14:32 +0000307 auto Lead = MBB.begin(), E = MBB.end();
308 if (MBB.succ_size() != 1 || Lead == E || !isEndCF(*Lead, TRI))
309 continue;
310
311 const MachineBasicBlock* Succ = *MBB.succ_begin();
312 if (!MBB.isLayoutSuccessor(Succ))
313 continue;
314
315 auto I = std::next(Lead);
316
317 for ( ; I != E; ++I)
318 if (!TII->isSALU(*I) || I->readsRegister(AMDGPU::EXEC, TRI))
319 break;
320
321 if (I != E)
322 continue;
323
324 const auto NextLead = Succ->begin();
325 if (NextLead == Succ->end() || !isEndCF(*NextLead, TRI) ||
326 !getOrExecSource(*NextLead, *TII, MRI))
327 continue;
328
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000329 LLVM_DEBUG(dbgs() << "Redundant EXEC = S_OR_B64 found: " << *Lead << '\n');
Stanislav Mekhanoshin37e7f952017-08-01 23:14:32 +0000330
Stanislav Mekhanoshinf23ae4f2017-08-02 01:18:57 +0000331 auto SaveExec = getOrExecSource(*Lead, *TII, MRI);
Stanislav Mekhanoshin37e7f952017-08-01 23:14:32 +0000332 unsigned SaveExecReg = getOrNonExecReg(*Lead, *TII);
Matt Arsenault2f4df7e2017-09-08 18:51:26 +0000333 for (auto &Op : Lead->operands()) {
334 if (Op.isReg())
335 RecalcRegs.insert(Op.getReg());
336 }
337
Stanislav Mekhanoshin37e7f952017-08-01 23:14:32 +0000338 LIS->RemoveMachineInstrFromMaps(*Lead);
339 Lead->eraseFromParent();
340 if (SaveExecReg) {
341 LIS->removeInterval(SaveExecReg);
342 LIS->createAndComputeVirtRegInterval(SaveExecReg);
343 }
344
345 Changed = true;
Stanislav Mekhanoshinda0edef2017-08-01 23:44:35 +0000346
347 // If the only use of saved exec in the removed instruction is S_AND_B64
348 // fold the copy now.
Stanislav Mekhanoshinda0edef2017-08-01 23:44:35 +0000349 if (!SaveExec || !SaveExec->isFullCopy())
350 continue;
351
352 unsigned SavedExec = SaveExec->getOperand(0).getReg();
353 bool SafeToReplace = true;
354 for (auto& U : MRI.use_nodbg_instructions(SavedExec)) {
355 if (U.getParent() != SaveExec->getParent()) {
356 SafeToReplace = false;
357 break;
358 }
359
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000360 LLVM_DEBUG(dbgs() << "Redundant EXEC COPY: " << *SaveExec << '\n');
Stanislav Mekhanoshinda0edef2017-08-01 23:44:35 +0000361 }
362
363 if (SafeToReplace) {
364 LIS->RemoveMachineInstrFromMaps(*SaveExec);
365 SaveExec->eraseFromParent();
366 MRI.replaceRegWith(SavedExec, AMDGPU::EXEC);
367 LIS->removeInterval(SavedExec);
368 }
Stanislav Mekhanoshin37e7f952017-08-01 23:14:32 +0000369 }
370
371 if (Changed) {
Stanislav Mekhanoshina9487d92017-08-16 04:43:49 +0000372 for (auto Reg : RecalcRegs) {
373 if (TargetRegisterInfo::isVirtualRegister(Reg)) {
374 LIS->removeInterval(Reg);
375 if (!MRI.reg_empty(Reg))
376 LIS->createAndComputeVirtRegInterval(Reg);
377 } else {
378 for (MCRegUnitIterator U(Reg, TRI); U.isValid(); ++U)
379 LIS->removeRegUnit(*U);
380 }
381 }
Stanislav Mekhanoshin37e7f952017-08-01 23:14:32 +0000382 }
383
384 return Changed;
385}